{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8918060856847287, "eval_steps": 1000, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7836121713694573e-05, "grad_norm": 323.2822570800781, "learning_rate": 8.919015340706387e-09, "loss": 10.7735, "step": 1 }, { "epoch": 3.567224342738915e-05, "grad_norm": 210.89393615722656, "learning_rate": 1.7838030681412774e-08, "loss": 7.1321, "step": 2 }, { "epoch": 5.350836514108372e-05, "grad_norm": 200.89666748046875, "learning_rate": 2.6757046022119158e-08, "loss": 9.5803, "step": 3 }, { "epoch": 7.13444868547783e-05, "grad_norm": 125.82511138916016, "learning_rate": 3.567606136282555e-08, "loss": 7.126, "step": 4 }, { "epoch": 8.918060856847287e-05, "grad_norm": 337.4522705078125, "learning_rate": 4.459507670353193e-08, "loss": 8.6375, "step": 5 }, { "epoch": 0.00010701673028216744, "grad_norm": 210.69891357421875, "learning_rate": 5.3514092044238315e-08, "loss": 7.1199, "step": 6 }, { "epoch": 0.00012485285199586202, "grad_norm": 94.18712615966797, "learning_rate": 6.243310738494472e-08, "loss": 6.4352, "step": 7 }, { "epoch": 0.0001426889737095566, "grad_norm": 122.2567367553711, "learning_rate": 7.13521227256511e-08, "loss": 6.3767, "step": 8 }, { "epoch": 0.00016052509542325118, "grad_norm": 133.38861083984375, "learning_rate": 8.027113806635748e-08, "loss": 6.6332, "step": 9 }, { "epoch": 0.00017836121713694575, "grad_norm": 99.66352844238281, "learning_rate": 8.919015340706386e-08, "loss": 6.2766, "step": 10 }, { "epoch": 0.00019619733885064031, "grad_norm": 104.68462371826172, "learning_rate": 9.810916874777025e-08, "loss": 7.0613, "step": 11 }, { "epoch": 0.00021403346056433488, "grad_norm": 234.64990234375, "learning_rate": 1.0702818408847663e-07, "loss": 7.3977, "step": 12 }, { "epoch": 0.00023186958227802948, "grad_norm": 235.16844177246094, "learning_rate": 1.1594719942918301e-07, "loss": 9.2356, "step": 13 }, { "epoch": 0.00024970570399172404, "grad_norm": 119.6419448852539, "learning_rate": 1.2486621476988943e-07, "loss": 6.9626, "step": 14 }, { "epoch": 0.00026754182570541864, "grad_norm": 182.8326416015625, "learning_rate": 1.337852301105958e-07, "loss": 7.3532, "step": 15 }, { "epoch": 0.0002853779474191132, "grad_norm": 187.87606811523438, "learning_rate": 1.427042454513022e-07, "loss": 6.831, "step": 16 }, { "epoch": 0.00030321406913280777, "grad_norm": 163.6082000732422, "learning_rate": 1.5162326079200857e-07, "loss": 7.0306, "step": 17 }, { "epoch": 0.00032105019084650236, "grad_norm": 155.1985321044922, "learning_rate": 1.6054227613271495e-07, "loss": 7.0035, "step": 18 }, { "epoch": 0.0003388863125601969, "grad_norm": 214.25897216796875, "learning_rate": 1.6946129147342133e-07, "loss": 7.3007, "step": 19 }, { "epoch": 0.0003567224342738915, "grad_norm": 285.1042785644531, "learning_rate": 1.7838030681412771e-07, "loss": 6.9673, "step": 20 }, { "epoch": 0.00037455855598758604, "grad_norm": 67.51636505126953, "learning_rate": 1.872993221548341e-07, "loss": 6.5743, "step": 21 }, { "epoch": 0.00039239467770128063, "grad_norm": 198.23965454101562, "learning_rate": 1.962183374955405e-07, "loss": 8.0284, "step": 22 }, { "epoch": 0.0004102307994149752, "grad_norm": 210.1754150390625, "learning_rate": 2.0513735283624688e-07, "loss": 6.7753, "step": 23 }, { "epoch": 0.00042806692112866976, "grad_norm": 200.4541778564453, "learning_rate": 2.1405636817695326e-07, "loss": 8.327, "step": 24 }, { "epoch": 0.00044590304284236436, "grad_norm": 268.79669189453125, "learning_rate": 2.2297538351765964e-07, "loss": 6.9922, "step": 25 }, { "epoch": 0.00046373916455605895, "grad_norm": 78.06359100341797, "learning_rate": 2.3189439885836602e-07, "loss": 6.7067, "step": 26 }, { "epoch": 0.0004815752862697535, "grad_norm": 310.5227355957031, "learning_rate": 2.4081341419907243e-07, "loss": 7.325, "step": 27 }, { "epoch": 0.0004994114079834481, "grad_norm": 240.86062622070312, "learning_rate": 2.4973242953977886e-07, "loss": 6.1034, "step": 28 }, { "epoch": 0.0005172475296971427, "grad_norm": 219.43072509765625, "learning_rate": 2.5865144488048524e-07, "loss": 7.0325, "step": 29 }, { "epoch": 0.0005350836514108373, "grad_norm": 442.5327453613281, "learning_rate": 2.675704602211916e-07, "loss": 6.8672, "step": 30 }, { "epoch": 0.0005529197731245318, "grad_norm": 151.15321350097656, "learning_rate": 2.76489475561898e-07, "loss": 7.2757, "step": 31 }, { "epoch": 0.0005707558948382264, "grad_norm": 91.259765625, "learning_rate": 2.854084909026044e-07, "loss": 6.224, "step": 32 }, { "epoch": 0.0005885920165519209, "grad_norm": 104.54844665527344, "learning_rate": 2.9432750624331076e-07, "loss": 6.3518, "step": 33 }, { "epoch": 0.0006064281382656155, "grad_norm": 99.20610809326172, "learning_rate": 3.0324652158401715e-07, "loss": 5.9676, "step": 34 }, { "epoch": 0.0006242642599793101, "grad_norm": 91.99747467041016, "learning_rate": 3.121655369247235e-07, "loss": 5.7934, "step": 35 }, { "epoch": 0.0006421003816930047, "grad_norm": 108.061767578125, "learning_rate": 3.210845522654299e-07, "loss": 5.8156, "step": 36 }, { "epoch": 0.0006599365034066992, "grad_norm": 252.54554748535156, "learning_rate": 3.300035676061363e-07, "loss": 6.7174, "step": 37 }, { "epoch": 0.0006777726251203938, "grad_norm": 89.84114837646484, "learning_rate": 3.3892258294684267e-07, "loss": 5.8271, "step": 38 }, { "epoch": 0.0006956087468340884, "grad_norm": 54.38611602783203, "learning_rate": 3.4784159828754905e-07, "loss": 5.9315, "step": 39 }, { "epoch": 0.000713444868547783, "grad_norm": 258.0915832519531, "learning_rate": 3.5676061362825543e-07, "loss": 5.937, "step": 40 }, { "epoch": 0.0007312809902614776, "grad_norm": 64.23783111572266, "learning_rate": 3.656796289689618e-07, "loss": 5.6457, "step": 41 }, { "epoch": 0.0007491171119751721, "grad_norm": 81.00452423095703, "learning_rate": 3.745986443096682e-07, "loss": 5.6441, "step": 42 }, { "epoch": 0.0007669532336888667, "grad_norm": 73.77316284179688, "learning_rate": 3.835176596503746e-07, "loss": 5.4766, "step": 43 }, { "epoch": 0.0007847893554025613, "grad_norm": 101.67465209960938, "learning_rate": 3.92436674991081e-07, "loss": 5.1605, "step": 44 }, { "epoch": 0.0008026254771162559, "grad_norm": 64.17410278320312, "learning_rate": 4.0135569033178733e-07, "loss": 4.8799, "step": 45 }, { "epoch": 0.0008204615988299504, "grad_norm": 77.47195434570312, "learning_rate": 4.1027470567249376e-07, "loss": 4.9629, "step": 46 }, { "epoch": 0.000838297720543645, "grad_norm": 90.45448303222656, "learning_rate": 4.191937210132002e-07, "loss": 4.929, "step": 47 }, { "epoch": 0.0008561338422573395, "grad_norm": 58.99647521972656, "learning_rate": 4.281127363539065e-07, "loss": 4.5549, "step": 48 }, { "epoch": 0.0008739699639710341, "grad_norm": 233.77806091308594, "learning_rate": 4.3703175169461296e-07, "loss": 5.069, "step": 49 }, { "epoch": 0.0008918060856847287, "grad_norm": 92.89978790283203, "learning_rate": 4.459507670353193e-07, "loss": 4.781, "step": 50 }, { "epoch": 0.0009096422073984233, "grad_norm": 192.8018798828125, "learning_rate": 4.548697823760257e-07, "loss": 4.6452, "step": 51 }, { "epoch": 0.0009274783291121179, "grad_norm": 77.0093765258789, "learning_rate": 4.6378879771673205e-07, "loss": 4.4629, "step": 52 }, { "epoch": 0.0009453144508258124, "grad_norm": 53.103431701660156, "learning_rate": 4.727078130574385e-07, "loss": 4.396, "step": 53 }, { "epoch": 0.000963150572539507, "grad_norm": 73.26648712158203, "learning_rate": 4.816268283981449e-07, "loss": 4.5091, "step": 54 }, { "epoch": 0.0009809866942532017, "grad_norm": 65.50182342529297, "learning_rate": 4.905458437388512e-07, "loss": 4.0329, "step": 55 }, { "epoch": 0.0009988228159668962, "grad_norm": 122.55022430419922, "learning_rate": 4.994648590795577e-07, "loss": 4.1873, "step": 56 }, { "epoch": 0.0010166589376805907, "grad_norm": 107.62737274169922, "learning_rate": 5.08383874420264e-07, "loss": 4.4333, "step": 57 }, { "epoch": 0.0010344950593942854, "grad_norm": 51.7789306640625, "learning_rate": 5.173028897609705e-07, "loss": 3.8496, "step": 58 }, { "epoch": 0.0010523311811079798, "grad_norm": 75.96542358398438, "learning_rate": 5.262219051016768e-07, "loss": 3.9539, "step": 59 }, { "epoch": 0.0010701673028216745, "grad_norm": 56.87948989868164, "learning_rate": 5.351409204423832e-07, "loss": 3.6712, "step": 60 }, { "epoch": 0.001088003424535369, "grad_norm": 135.2625274658203, "learning_rate": 5.440599357830895e-07, "loss": 3.334, "step": 61 }, { "epoch": 0.0011058395462490635, "grad_norm": 53.386959075927734, "learning_rate": 5.52978951123796e-07, "loss": 3.4197, "step": 62 }, { "epoch": 0.0011236756679627582, "grad_norm": 71.22798156738281, "learning_rate": 5.618979664645024e-07, "loss": 3.606, "step": 63 }, { "epoch": 0.0011415117896764527, "grad_norm": 54.5213737487793, "learning_rate": 5.708169818052088e-07, "loss": 3.4468, "step": 64 }, { "epoch": 0.0011593479113901474, "grad_norm": 186.6767120361328, "learning_rate": 5.797359971459151e-07, "loss": 4.5372, "step": 65 }, { "epoch": 0.0011771840331038419, "grad_norm": 99.92565155029297, "learning_rate": 5.886550124866215e-07, "loss": 3.4468, "step": 66 }, { "epoch": 0.0011950201548175364, "grad_norm": 36.229618072509766, "learning_rate": 5.975740278273279e-07, "loss": 2.7097, "step": 67 }, { "epoch": 0.001212856276531231, "grad_norm": 44.98109817504883, "learning_rate": 6.064930431680343e-07, "loss": 3.1559, "step": 68 }, { "epoch": 0.0012306923982449256, "grad_norm": 48.70180892944336, "learning_rate": 6.154120585087407e-07, "loss": 3.0656, "step": 69 }, { "epoch": 0.0012485285199586203, "grad_norm": 61.8568229675293, "learning_rate": 6.24331073849447e-07, "loss": 3.0177, "step": 70 }, { "epoch": 0.0012663646416723147, "grad_norm": 43.668888092041016, "learning_rate": 6.332500891901534e-07, "loss": 3.2232, "step": 71 }, { "epoch": 0.0012842007633860095, "grad_norm": 80.41217803955078, "learning_rate": 6.421691045308598e-07, "loss": 3.0677, "step": 72 }, { "epoch": 0.001302036885099704, "grad_norm": 44.075828552246094, "learning_rate": 6.510881198715662e-07, "loss": 2.6257, "step": 73 }, { "epoch": 0.0013198730068133984, "grad_norm": 47.72416687011719, "learning_rate": 6.600071352122726e-07, "loss": 2.7277, "step": 74 }, { "epoch": 0.0013377091285270931, "grad_norm": 44.380043029785156, "learning_rate": 6.68926150552979e-07, "loss": 2.7056, "step": 75 }, { "epoch": 0.0013555452502407876, "grad_norm": 36.327720642089844, "learning_rate": 6.778451658936853e-07, "loss": 2.6033, "step": 76 }, { "epoch": 0.0013733813719544823, "grad_norm": 29.031856536865234, "learning_rate": 6.867641812343917e-07, "loss": 2.5391, "step": 77 }, { "epoch": 0.0013912174936681768, "grad_norm": 20.884868621826172, "learning_rate": 6.956831965750981e-07, "loss": 2.3389, "step": 78 }, { "epoch": 0.0014090536153818713, "grad_norm": 65.40373992919922, "learning_rate": 7.046022119158046e-07, "loss": 2.7997, "step": 79 }, { "epoch": 0.001426889737095566, "grad_norm": 66.30245208740234, "learning_rate": 7.135212272565109e-07, "loss": 2.8164, "step": 80 }, { "epoch": 0.0014447258588092605, "grad_norm": 32.38147735595703, "learning_rate": 7.224402425972173e-07, "loss": 2.2865, "step": 81 }, { "epoch": 0.0014625619805229552, "grad_norm": 18.060773849487305, "learning_rate": 7.313592579379236e-07, "loss": 2.156, "step": 82 }, { "epoch": 0.0014803981022366497, "grad_norm": 34.35832977294922, "learning_rate": 7.402782732786301e-07, "loss": 2.5437, "step": 83 }, { "epoch": 0.0014982342239503441, "grad_norm": 23.287845611572266, "learning_rate": 7.491972886193364e-07, "loss": 2.2245, "step": 84 }, { "epoch": 0.0015160703456640388, "grad_norm": 54.39329147338867, "learning_rate": 7.581163039600429e-07, "loss": 2.161, "step": 85 }, { "epoch": 0.0015339064673777333, "grad_norm": 18.95912742614746, "learning_rate": 7.670353193007492e-07, "loss": 2.156, "step": 86 }, { "epoch": 0.001551742589091428, "grad_norm": 16.43122673034668, "learning_rate": 7.759543346414556e-07, "loss": 1.9135, "step": 87 }, { "epoch": 0.0015695787108051225, "grad_norm": 29.445796966552734, "learning_rate": 7.84873349982162e-07, "loss": 1.9606, "step": 88 }, { "epoch": 0.001587414832518817, "grad_norm": 15.478200912475586, "learning_rate": 7.937923653228685e-07, "loss": 1.9479, "step": 89 }, { "epoch": 0.0016052509542325117, "grad_norm": 19.078706741333008, "learning_rate": 8.027113806635747e-07, "loss": 2.0803, "step": 90 }, { "epoch": 0.0016230870759462062, "grad_norm": 74.88615417480469, "learning_rate": 8.116303960042811e-07, "loss": 1.9216, "step": 91 }, { "epoch": 0.001640923197659901, "grad_norm": 10.065621376037598, "learning_rate": 8.205494113449875e-07, "loss": 1.785, "step": 92 }, { "epoch": 0.0016587593193735954, "grad_norm": 16.853424072265625, "learning_rate": 8.29468426685694e-07, "loss": 1.7712, "step": 93 }, { "epoch": 0.00167659544108729, "grad_norm": 14.692481994628906, "learning_rate": 8.383874420264004e-07, "loss": 1.7445, "step": 94 }, { "epoch": 0.0016944315628009846, "grad_norm": 10.976871490478516, "learning_rate": 8.473064573671067e-07, "loss": 1.6523, "step": 95 }, { "epoch": 0.001712267684514679, "grad_norm": 10.271615982055664, "learning_rate": 8.56225472707813e-07, "loss": 1.6485, "step": 96 }, { "epoch": 0.0017301038062283738, "grad_norm": 10.25522232055664, "learning_rate": 8.651444880485195e-07, "loss": 1.7156, "step": 97 }, { "epoch": 0.0017479399279420682, "grad_norm": 14.337623596191406, "learning_rate": 8.740635033892259e-07, "loss": 1.6553, "step": 98 }, { "epoch": 0.001765776049655763, "grad_norm": 12.784684181213379, "learning_rate": 8.829825187299322e-07, "loss": 1.6241, "step": 99 }, { "epoch": 0.0017836121713694574, "grad_norm": 12.90092945098877, "learning_rate": 8.919015340706386e-07, "loss": 1.6651, "step": 100 }, { "epoch": 0.001801448293083152, "grad_norm": 37.45865249633789, "learning_rate": 9.008205494113451e-07, "loss": 1.6166, "step": 101 }, { "epoch": 0.0018192844147968466, "grad_norm": 10.109075546264648, "learning_rate": 9.097395647520514e-07, "loss": 1.6241, "step": 102 }, { "epoch": 0.001837120536510541, "grad_norm": 10.034185409545898, "learning_rate": 9.186585800927579e-07, "loss": 1.5487, "step": 103 }, { "epoch": 0.0018549566582242358, "grad_norm": 12.023565292358398, "learning_rate": 9.275775954334641e-07, "loss": 1.5102, "step": 104 }, { "epoch": 0.0018727927799379303, "grad_norm": 46.07319641113281, "learning_rate": 9.364966107741706e-07, "loss": 1.5761, "step": 105 }, { "epoch": 0.0018906289016516248, "grad_norm": 7.093059539794922, "learning_rate": 9.45415626114877e-07, "loss": 1.4614, "step": 106 }, { "epoch": 0.0019084650233653195, "grad_norm": 11.888785362243652, "learning_rate": 9.543346414555833e-07, "loss": 1.4124, "step": 107 }, { "epoch": 0.001926301145079014, "grad_norm": 6.246786594390869, "learning_rate": 9.632536567962897e-07, "loss": 1.5451, "step": 108 }, { "epoch": 0.0019441372667927087, "grad_norm": 107.81812286376953, "learning_rate": 9.72172672136996e-07, "loss": 1.461, "step": 109 }, { "epoch": 0.0019619733885064034, "grad_norm": 6.224181175231934, "learning_rate": 9.810916874777025e-07, "loss": 1.2912, "step": 110 }, { "epoch": 0.001979809510220098, "grad_norm": 4.665727615356445, "learning_rate": 9.900107028184089e-07, "loss": 1.3029, "step": 111 }, { "epoch": 0.0019976456319337923, "grad_norm": 9.664981842041016, "learning_rate": 9.989297181591155e-07, "loss": 1.343, "step": 112 }, { "epoch": 0.002015481753647487, "grad_norm": 9.630330085754395, "learning_rate": 1.0078487334998216e-06, "loss": 1.299, "step": 113 }, { "epoch": 0.0020333178753611813, "grad_norm": 10.348453521728516, "learning_rate": 1.016767748840528e-06, "loss": 1.4833, "step": 114 }, { "epoch": 0.0020511539970748762, "grad_norm": 4.746984958648682, "learning_rate": 1.0256867641812344e-06, "loss": 1.3208, "step": 115 }, { "epoch": 0.0020689901187885707, "grad_norm": 34.37080383300781, "learning_rate": 1.034605779521941e-06, "loss": 1.4292, "step": 116 }, { "epoch": 0.002086826240502265, "grad_norm": 21.08136558532715, "learning_rate": 1.0435247948626474e-06, "loss": 1.4235, "step": 117 }, { "epoch": 0.0021046623622159597, "grad_norm": 8.585554122924805, "learning_rate": 1.0524438102033535e-06, "loss": 1.2834, "step": 118 }, { "epoch": 0.002122498483929654, "grad_norm": 24.306896209716797, "learning_rate": 1.06136282554406e-06, "loss": 1.2809, "step": 119 }, { "epoch": 0.002140334605643349, "grad_norm": 7.346068382263184, "learning_rate": 1.0702818408847665e-06, "loss": 1.5063, "step": 120 }, { "epoch": 0.0021581707273570436, "grad_norm": 5.411838054656982, "learning_rate": 1.0792008562254729e-06, "loss": 1.2126, "step": 121 }, { "epoch": 0.002176006849070738, "grad_norm": 4.67111873626709, "learning_rate": 1.088119871566179e-06, "loss": 1.3172, "step": 122 }, { "epoch": 0.0021938429707844325, "grad_norm": 6.796841621398926, "learning_rate": 1.0970388869068854e-06, "loss": 1.2651, "step": 123 }, { "epoch": 0.002211679092498127, "grad_norm": 5.288766384124756, "learning_rate": 1.105957902247592e-06, "loss": 1.3635, "step": 124 }, { "epoch": 0.002229515214211822, "grad_norm": 4.216982841491699, "learning_rate": 1.1148769175882984e-06, "loss": 1.1853, "step": 125 }, { "epoch": 0.0022473513359255164, "grad_norm": 10.145190238952637, "learning_rate": 1.1237959329290048e-06, "loss": 1.2562, "step": 126 }, { "epoch": 0.002265187457639211, "grad_norm": 5.38865327835083, "learning_rate": 1.132714948269711e-06, "loss": 1.2521, "step": 127 }, { "epoch": 0.0022830235793529054, "grad_norm": 4.854418754577637, "learning_rate": 1.1416339636104175e-06, "loss": 1.2694, "step": 128 }, { "epoch": 0.0023008597010666, "grad_norm": 3.615403175354004, "learning_rate": 1.150552978951124e-06, "loss": 1.2194, "step": 129 }, { "epoch": 0.002318695822780295, "grad_norm": 4.837342739105225, "learning_rate": 1.1594719942918303e-06, "loss": 1.2039, "step": 130 }, { "epoch": 0.0023365319444939893, "grad_norm": 2.977159023284912, "learning_rate": 1.1683910096325365e-06, "loss": 1.113, "step": 131 }, { "epoch": 0.0023543680662076838, "grad_norm": 9.740659713745117, "learning_rate": 1.177310024973243e-06, "loss": 1.1635, "step": 132 }, { "epoch": 0.0023722041879213783, "grad_norm": 5.769473552703857, "learning_rate": 1.1862290403139494e-06, "loss": 1.1302, "step": 133 }, { "epoch": 0.0023900403096350727, "grad_norm": 4.656841278076172, "learning_rate": 1.1951480556546558e-06, "loss": 1.1217, "step": 134 }, { "epoch": 0.0024078764313487677, "grad_norm": 3.355837821960449, "learning_rate": 1.2040670709953622e-06, "loss": 1.0861, "step": 135 }, { "epoch": 0.002425712553062462, "grad_norm": 5.188744068145752, "learning_rate": 1.2129860863360686e-06, "loss": 1.1827, "step": 136 }, { "epoch": 0.0024435486747761566, "grad_norm": 4.206075668334961, "learning_rate": 1.221905101676775e-06, "loss": 1.0765, "step": 137 }, { "epoch": 0.002461384796489851, "grad_norm": 4.9574079513549805, "learning_rate": 1.2308241170174813e-06, "loss": 1.2729, "step": 138 }, { "epoch": 0.0024792209182035456, "grad_norm": 28.436365127563477, "learning_rate": 1.2397431323581877e-06, "loss": 1.1768, "step": 139 }, { "epoch": 0.0024970570399172405, "grad_norm": 11.312703132629395, "learning_rate": 1.248662147698894e-06, "loss": 1.2622, "step": 140 }, { "epoch": 0.002514893161630935, "grad_norm": 5.597165107727051, "learning_rate": 1.2575811630396005e-06, "loss": 1.0988, "step": 141 }, { "epoch": 0.0025327292833446295, "grad_norm": 4.262165546417236, "learning_rate": 1.2665001783803069e-06, "loss": 0.9773, "step": 142 }, { "epoch": 0.002550565405058324, "grad_norm": 3.6943838596343994, "learning_rate": 1.2754191937210132e-06, "loss": 1.1566, "step": 143 }, { "epoch": 0.002568401526772019, "grad_norm": 3.584569215774536, "learning_rate": 1.2843382090617196e-06, "loss": 1.1361, "step": 144 }, { "epoch": 0.0025862376484857134, "grad_norm": 3.577634334564209, "learning_rate": 1.293257224402426e-06, "loss": 1.0374, "step": 145 }, { "epoch": 0.002604073770199408, "grad_norm": 4.231927394866943, "learning_rate": 1.3021762397431324e-06, "loss": 1.0556, "step": 146 }, { "epoch": 0.0026219098919131024, "grad_norm": 4.495650291442871, "learning_rate": 1.3110952550838388e-06, "loss": 1.1813, "step": 147 }, { "epoch": 0.002639746013626797, "grad_norm": 2.9533169269561768, "learning_rate": 1.3200142704245451e-06, "loss": 1.0133, "step": 148 }, { "epoch": 0.0026575821353404918, "grad_norm": 5.6773858070373535, "learning_rate": 1.3289332857652517e-06, "loss": 1.134, "step": 149 }, { "epoch": 0.0026754182570541863, "grad_norm": 4.191747665405273, "learning_rate": 1.337852301105958e-06, "loss": 1.0288, "step": 150 }, { "epoch": 0.0026932543787678807, "grad_norm": 5.146604061126709, "learning_rate": 1.3467713164466643e-06, "loss": 0.9912, "step": 151 }, { "epoch": 0.0027110905004815752, "grad_norm": 4.744831562042236, "learning_rate": 1.3556903317873707e-06, "loss": 0.9719, "step": 152 }, { "epoch": 0.0027289266221952697, "grad_norm": 3.7088944911956787, "learning_rate": 1.3646093471280773e-06, "loss": 1.0839, "step": 153 }, { "epoch": 0.0027467627439089646, "grad_norm": 2.484596014022827, "learning_rate": 1.3735283624687834e-06, "loss": 0.9486, "step": 154 }, { "epoch": 0.002764598865622659, "grad_norm": 3.3776695728302, "learning_rate": 1.3824473778094898e-06, "loss": 1.0543, "step": 155 }, { "epoch": 0.0027824349873363536, "grad_norm": 3.0981509685516357, "learning_rate": 1.3913663931501962e-06, "loss": 1.0516, "step": 156 }, { "epoch": 0.002800271109050048, "grad_norm": 2.3308029174804688, "learning_rate": 1.4002854084909028e-06, "loss": 1.1079, "step": 157 }, { "epoch": 0.0028181072307637426, "grad_norm": 6.98223352432251, "learning_rate": 1.4092044238316092e-06, "loss": 0.9549, "step": 158 }, { "epoch": 0.0028359433524774375, "grad_norm": 2.6161375045776367, "learning_rate": 1.4181234391723153e-06, "loss": 0.9373, "step": 159 }, { "epoch": 0.002853779474191132, "grad_norm": 6.54719352722168, "learning_rate": 1.4270424545130217e-06, "loss": 1.1123, "step": 160 }, { "epoch": 0.0028716155959048265, "grad_norm": 5.496429443359375, "learning_rate": 1.4359614698537283e-06, "loss": 1.1083, "step": 161 }, { "epoch": 0.002889451717618521, "grad_norm": 2.4545984268188477, "learning_rate": 1.4448804851944347e-06, "loss": 0.9655, "step": 162 }, { "epoch": 0.0029072878393322154, "grad_norm": 3.0574238300323486, "learning_rate": 1.453799500535141e-06, "loss": 1.0865, "step": 163 }, { "epoch": 0.0029251239610459103, "grad_norm": 2.148712158203125, "learning_rate": 1.4627185158758472e-06, "loss": 1.0007, "step": 164 }, { "epoch": 0.002942960082759605, "grad_norm": 3.361168384552002, "learning_rate": 1.4716375312165538e-06, "loss": 0.973, "step": 165 }, { "epoch": 0.0029607962044732993, "grad_norm": 2.2146620750427246, "learning_rate": 1.4805565465572602e-06, "loss": 0.9946, "step": 166 }, { "epoch": 0.002978632326186994, "grad_norm": 2.1582112312316895, "learning_rate": 1.4894755618979666e-06, "loss": 0.9073, "step": 167 }, { "epoch": 0.0029964684479006883, "grad_norm": 2.935107469558716, "learning_rate": 1.4983945772386728e-06, "loss": 0.9513, "step": 168 }, { "epoch": 0.003014304569614383, "grad_norm": 4.1764960289001465, "learning_rate": 1.5073135925793793e-06, "loss": 0.9448, "step": 169 }, { "epoch": 0.0030321406913280777, "grad_norm": 2.118518352508545, "learning_rate": 1.5162326079200857e-06, "loss": 0.9539, "step": 170 }, { "epoch": 0.003049976813041772, "grad_norm": 1.9466509819030762, "learning_rate": 1.5251516232607921e-06, "loss": 0.9445, "step": 171 }, { "epoch": 0.0030678129347554667, "grad_norm": 1.8523297309875488, "learning_rate": 1.5340706386014985e-06, "loss": 0.9683, "step": 172 }, { "epoch": 0.003085649056469161, "grad_norm": 2.822516441345215, "learning_rate": 1.5429896539422049e-06, "loss": 0.9493, "step": 173 }, { "epoch": 0.003103485178182856, "grad_norm": 2.1008737087249756, "learning_rate": 1.5519086692829112e-06, "loss": 1.0008, "step": 174 }, { "epoch": 0.0031213212998965506, "grad_norm": 2.113912582397461, "learning_rate": 1.5608276846236176e-06, "loss": 0.8648, "step": 175 }, { "epoch": 0.003139157421610245, "grad_norm": 5.489522457122803, "learning_rate": 1.569746699964324e-06, "loss": 0.8341, "step": 176 }, { "epoch": 0.0031569935433239395, "grad_norm": 4.361125946044922, "learning_rate": 1.5786657153050304e-06, "loss": 0.8544, "step": 177 }, { "epoch": 0.003174829665037634, "grad_norm": 2.187650442123413, "learning_rate": 1.587584730645737e-06, "loss": 0.9864, "step": 178 }, { "epoch": 0.003192665786751329, "grad_norm": 2.0201544761657715, "learning_rate": 1.5965037459864434e-06, "loss": 0.8719, "step": 179 }, { "epoch": 0.0032105019084650234, "grad_norm": 2.9684455394744873, "learning_rate": 1.6054227613271493e-06, "loss": 0.8732, "step": 180 }, { "epoch": 0.003228338030178718, "grad_norm": 3.0871806144714355, "learning_rate": 1.614341776667856e-06, "loss": 0.9683, "step": 181 }, { "epoch": 0.0032461741518924124, "grad_norm": 2.1151089668273926, "learning_rate": 1.6232607920085623e-06, "loss": 0.8258, "step": 182 }, { "epoch": 0.0032640102736061073, "grad_norm": 2.6509292125701904, "learning_rate": 1.6321798073492687e-06, "loss": 0.7964, "step": 183 }, { "epoch": 0.003281846395319802, "grad_norm": 2.232733726501465, "learning_rate": 1.641098822689975e-06, "loss": 0.9056, "step": 184 }, { "epoch": 0.0032996825170334963, "grad_norm": 2.4667141437530518, "learning_rate": 1.6500178380306814e-06, "loss": 0.9627, "step": 185 }, { "epoch": 0.0033175186387471908, "grad_norm": 2.943213939666748, "learning_rate": 1.658936853371388e-06, "loss": 0.9321, "step": 186 }, { "epoch": 0.0033353547604608852, "grad_norm": 2.2956197261810303, "learning_rate": 1.6678558687120944e-06, "loss": 0.8541, "step": 187 }, { "epoch": 0.00335319088217458, "grad_norm": 2.2546980381011963, "learning_rate": 1.6767748840528008e-06, "loss": 0.8759, "step": 188 }, { "epoch": 0.0033710270038882746, "grad_norm": 2.286296844482422, "learning_rate": 1.685693899393507e-06, "loss": 0.8312, "step": 189 }, { "epoch": 0.003388863125601969, "grad_norm": 1.9116014242172241, "learning_rate": 1.6946129147342133e-06, "loss": 0.7941, "step": 190 }, { "epoch": 0.0034066992473156636, "grad_norm": 2.01637864112854, "learning_rate": 1.7035319300749197e-06, "loss": 0.8663, "step": 191 }, { "epoch": 0.003424535369029358, "grad_norm": 1.940755844116211, "learning_rate": 1.712450945415626e-06, "loss": 0.7687, "step": 192 }, { "epoch": 0.003442371490743053, "grad_norm": 1.4954153299331665, "learning_rate": 1.7213699607563327e-06, "loss": 0.7115, "step": 193 }, { "epoch": 0.0034602076124567475, "grad_norm": 2.2623729705810547, "learning_rate": 1.730288976097039e-06, "loss": 0.8145, "step": 194 }, { "epoch": 0.003478043734170442, "grad_norm": 3.0854616165161133, "learning_rate": 1.7392079914377454e-06, "loss": 0.9151, "step": 195 }, { "epoch": 0.0034958798558841365, "grad_norm": 2.6582725048065186, "learning_rate": 1.7481270067784518e-06, "loss": 0.8509, "step": 196 }, { "epoch": 0.003513715977597831, "grad_norm": 2.0415894985198975, "learning_rate": 1.7570460221191582e-06, "loss": 0.9229, "step": 197 }, { "epoch": 0.003531552099311526, "grad_norm": 2.0389983654022217, "learning_rate": 1.7659650374598644e-06, "loss": 0.786, "step": 198 }, { "epoch": 0.0035493882210252204, "grad_norm": 2.3008618354797363, "learning_rate": 1.7748840528005708e-06, "loss": 0.7101, "step": 199 }, { "epoch": 0.003567224342738915, "grad_norm": 2.06827712059021, "learning_rate": 1.7838030681412771e-06, "loss": 0.8509, "step": 200 }, { "epoch": 0.0035850604644526093, "grad_norm": 2.1113038063049316, "learning_rate": 1.7927220834819837e-06, "loss": 0.8245, "step": 201 }, { "epoch": 0.003602896586166304, "grad_norm": 1.9879345893859863, "learning_rate": 1.8016410988226901e-06, "loss": 0.7701, "step": 202 }, { "epoch": 0.0036207327078799987, "grad_norm": 2.397202968597412, "learning_rate": 1.8105601141633965e-06, "loss": 0.8071, "step": 203 }, { "epoch": 0.0036385688295936932, "grad_norm": 2.3976638317108154, "learning_rate": 1.8194791295041029e-06, "loss": 0.7202, "step": 204 }, { "epoch": 0.0036564049513073877, "grad_norm": 2.4036102294921875, "learning_rate": 1.8283981448448093e-06, "loss": 0.8314, "step": 205 }, { "epoch": 0.003674241073021082, "grad_norm": 2.960175037384033, "learning_rate": 1.8373171601855158e-06, "loss": 0.7731, "step": 206 }, { "epoch": 0.0036920771947347767, "grad_norm": 1.6719032526016235, "learning_rate": 1.8462361755262218e-06, "loss": 0.8398, "step": 207 }, { "epoch": 0.0037099133164484716, "grad_norm": 1.6310625076293945, "learning_rate": 1.8551551908669282e-06, "loss": 0.7694, "step": 208 }, { "epoch": 0.003727749438162166, "grad_norm": 2.1567938327789307, "learning_rate": 1.8640742062076348e-06, "loss": 0.9077, "step": 209 }, { "epoch": 0.0037455855598758606, "grad_norm": 2.805018186569214, "learning_rate": 1.8729932215483412e-06, "loss": 0.8192, "step": 210 }, { "epoch": 0.003763421681589555, "grad_norm": 4.236129283905029, "learning_rate": 1.8819122368890475e-06, "loss": 0.8155, "step": 211 }, { "epoch": 0.0037812578033032495, "grad_norm": 2.8584091663360596, "learning_rate": 1.890831252229754e-06, "loss": 0.7163, "step": 212 }, { "epoch": 0.0037990939250169445, "grad_norm": 3.4499547481536865, "learning_rate": 1.8997502675704603e-06, "loss": 0.7294, "step": 213 }, { "epoch": 0.003816930046730639, "grad_norm": 1.8830771446228027, "learning_rate": 1.9086692829111667e-06, "loss": 0.7697, "step": 214 }, { "epoch": 0.0038347661684443334, "grad_norm": 2.0773870944976807, "learning_rate": 1.917588298251873e-06, "loss": 0.7256, "step": 215 }, { "epoch": 0.003852602290158028, "grad_norm": 3.0219199657440186, "learning_rate": 1.9265073135925794e-06, "loss": 0.6671, "step": 216 }, { "epoch": 0.0038704384118717224, "grad_norm": 1.672107458114624, "learning_rate": 1.935426328933286e-06, "loss": 0.7603, "step": 217 }, { "epoch": 0.0038882745335854173, "grad_norm": 2.161341428756714, "learning_rate": 1.944345344273992e-06, "loss": 0.8354, "step": 218 }, { "epoch": 0.003906110655299112, "grad_norm": 2.58734393119812, "learning_rate": 1.9532643596146986e-06, "loss": 0.7265, "step": 219 }, { "epoch": 0.003923946777012807, "grad_norm": 2.354545831680298, "learning_rate": 1.962183374955405e-06, "loss": 0.8154, "step": 220 }, { "epoch": 0.003941782898726501, "grad_norm": 1.9903712272644043, "learning_rate": 1.9711023902961113e-06, "loss": 0.8039, "step": 221 }, { "epoch": 0.003959619020440196, "grad_norm": 1.8442986011505127, "learning_rate": 1.9800214056368177e-06, "loss": 0.6834, "step": 222 }, { "epoch": 0.00397745514215389, "grad_norm": 2.1357171535491943, "learning_rate": 1.988940420977524e-06, "loss": 0.718, "step": 223 }, { "epoch": 0.003995291263867585, "grad_norm": 2.547299861907959, "learning_rate": 1.997859436318231e-06, "loss": 0.7586, "step": 224 }, { "epoch": 0.00401312738558128, "grad_norm": 1.9040706157684326, "learning_rate": 2.0067784516589373e-06, "loss": 0.8541, "step": 225 }, { "epoch": 0.004030963507294974, "grad_norm": 1.9173662662506104, "learning_rate": 2.0156974669996432e-06, "loss": 0.6373, "step": 226 }, { "epoch": 0.0040487996290086686, "grad_norm": 2.092672109603882, "learning_rate": 2.0246164823403496e-06, "loss": 0.7019, "step": 227 }, { "epoch": 0.004066635750722363, "grad_norm": 1.8871886730194092, "learning_rate": 2.033535497681056e-06, "loss": 0.7077, "step": 228 }, { "epoch": 0.0040844718724360575, "grad_norm": 1.6429773569107056, "learning_rate": 2.0424545130217624e-06, "loss": 0.6934, "step": 229 }, { "epoch": 0.0041023079941497525, "grad_norm": 2.3653886318206787, "learning_rate": 2.0513735283624688e-06, "loss": 0.7352, "step": 230 }, { "epoch": 0.0041201441158634465, "grad_norm": 1.4735819101333618, "learning_rate": 2.060292543703175e-06, "loss": 0.583, "step": 231 }, { "epoch": 0.004137980237577141, "grad_norm": 1.8437955379486084, "learning_rate": 2.069211559043882e-06, "loss": 0.6417, "step": 232 }, { "epoch": 0.0041558163592908355, "grad_norm": 1.959718108177185, "learning_rate": 2.0781305743845883e-06, "loss": 0.7783, "step": 233 }, { "epoch": 0.00417365248100453, "grad_norm": 2.854862928390503, "learning_rate": 2.0870495897252947e-06, "loss": 0.8445, "step": 234 }, { "epoch": 0.004191488602718225, "grad_norm": 1.6057507991790771, "learning_rate": 2.0959686050660007e-06, "loss": 0.743, "step": 235 }, { "epoch": 0.004209324724431919, "grad_norm": 1.6600632667541504, "learning_rate": 2.104887620406707e-06, "loss": 0.7014, "step": 236 }, { "epoch": 0.004227160846145614, "grad_norm": 1.6484214067459106, "learning_rate": 2.1138066357474134e-06, "loss": 0.7308, "step": 237 }, { "epoch": 0.004244996967859308, "grad_norm": 1.508714199066162, "learning_rate": 2.12272565108812e-06, "loss": 0.6619, "step": 238 }, { "epoch": 0.004262833089573003, "grad_norm": 1.8737293481826782, "learning_rate": 2.131644666428826e-06, "loss": 0.7105, "step": 239 }, { "epoch": 0.004280669211286698, "grad_norm": 2.0244300365448, "learning_rate": 2.140563681769533e-06, "loss": 0.6374, "step": 240 }, { "epoch": 0.004298505333000392, "grad_norm": 1.944330096244812, "learning_rate": 2.1494826971102394e-06, "loss": 0.6326, "step": 241 }, { "epoch": 0.004316341454714087, "grad_norm": 2.075991153717041, "learning_rate": 2.1584017124509458e-06, "loss": 0.7177, "step": 242 }, { "epoch": 0.004334177576427781, "grad_norm": 1.7687220573425293, "learning_rate": 2.167320727791652e-06, "loss": 0.5802, "step": 243 }, { "epoch": 0.004352013698141476, "grad_norm": 1.4437633752822876, "learning_rate": 2.176239743132358e-06, "loss": 0.5867, "step": 244 }, { "epoch": 0.004369849819855171, "grad_norm": 1.7106932401657104, "learning_rate": 2.1851587584730645e-06, "loss": 0.6817, "step": 245 }, { "epoch": 0.004387685941568865, "grad_norm": 1.6352781057357788, "learning_rate": 2.194077773813771e-06, "loss": 0.6019, "step": 246 }, { "epoch": 0.00440552206328256, "grad_norm": 1.7784632444381714, "learning_rate": 2.2029967891544772e-06, "loss": 0.6686, "step": 247 }, { "epoch": 0.004423358184996254, "grad_norm": 1.570668339729309, "learning_rate": 2.211915804495184e-06, "loss": 0.5983, "step": 248 }, { "epoch": 0.004441194306709949, "grad_norm": 2.818324089050293, "learning_rate": 2.2208348198358904e-06, "loss": 0.6419, "step": 249 }, { "epoch": 0.004459030428423644, "grad_norm": 1.8933470249176025, "learning_rate": 2.229753835176597e-06, "loss": 0.7321, "step": 250 }, { "epoch": 0.004476866550137338, "grad_norm": 2.2469024658203125, "learning_rate": 2.238672850517303e-06, "loss": 0.6625, "step": 251 }, { "epoch": 0.004494702671851033, "grad_norm": 2.026979684829712, "learning_rate": 2.2475918658580096e-06, "loss": 0.7948, "step": 252 }, { "epoch": 0.004512538793564727, "grad_norm": 1.6033849716186523, "learning_rate": 2.2565108811987155e-06, "loss": 0.5719, "step": 253 }, { "epoch": 0.004530374915278422, "grad_norm": 1.6025912761688232, "learning_rate": 2.265429896539422e-06, "loss": 0.5667, "step": 254 }, { "epoch": 0.004548211036992117, "grad_norm": 1.8745354413986206, "learning_rate": 2.2743489118801283e-06, "loss": 0.578, "step": 255 }, { "epoch": 0.004566047158705811, "grad_norm": 1.65565824508667, "learning_rate": 2.283267927220835e-06, "loss": 0.6516, "step": 256 }, { "epoch": 0.004583883280419506, "grad_norm": 2.23405385017395, "learning_rate": 2.2921869425615415e-06, "loss": 0.6216, "step": 257 }, { "epoch": 0.0046017194021332, "grad_norm": 1.5478591918945312, "learning_rate": 2.301105957902248e-06, "loss": 0.5952, "step": 258 }, { "epoch": 0.004619555523846895, "grad_norm": 1.6051733493804932, "learning_rate": 2.3100249732429542e-06, "loss": 0.5946, "step": 259 }, { "epoch": 0.00463739164556059, "grad_norm": 2.2132322788238525, "learning_rate": 2.3189439885836606e-06, "loss": 0.6655, "step": 260 }, { "epoch": 0.004655227767274284, "grad_norm": 1.4680333137512207, "learning_rate": 2.327863003924367e-06, "loss": 0.5921, "step": 261 }, { "epoch": 0.004673063888987979, "grad_norm": 1.6973994970321655, "learning_rate": 2.336782019265073e-06, "loss": 0.6678, "step": 262 }, { "epoch": 0.004690900010701673, "grad_norm": 1.3919556140899658, "learning_rate": 2.3457010346057793e-06, "loss": 0.6527, "step": 263 }, { "epoch": 0.0047087361324153676, "grad_norm": 1.5178241729736328, "learning_rate": 2.354620049946486e-06, "loss": 0.6835, "step": 264 }, { "epoch": 0.0047265722541290625, "grad_norm": 2.493825912475586, "learning_rate": 2.3635390652871925e-06, "loss": 0.6242, "step": 265 }, { "epoch": 0.0047444083758427565, "grad_norm": 1.703263759613037, "learning_rate": 2.372458080627899e-06, "loss": 0.5809, "step": 266 }, { "epoch": 0.0047622444975564514, "grad_norm": 1.5449261665344238, "learning_rate": 2.3813770959686053e-06, "loss": 0.5676, "step": 267 }, { "epoch": 0.0047800806192701455, "grad_norm": 1.3614991903305054, "learning_rate": 2.3902961113093116e-06, "loss": 0.5449, "step": 268 }, { "epoch": 0.00479791674098384, "grad_norm": 1.0881032943725586, "learning_rate": 2.399215126650018e-06, "loss": 0.5325, "step": 269 }, { "epoch": 0.004815752862697535, "grad_norm": 2.1187965869903564, "learning_rate": 2.4081341419907244e-06, "loss": 0.5731, "step": 270 }, { "epoch": 0.004833588984411229, "grad_norm": 1.9353604316711426, "learning_rate": 2.4170531573314308e-06, "loss": 0.5831, "step": 271 }, { "epoch": 0.004851425106124924, "grad_norm": 1.7603288888931274, "learning_rate": 2.425972172672137e-06, "loss": 0.6485, "step": 272 }, { "epoch": 0.004869261227838618, "grad_norm": 1.321537733078003, "learning_rate": 2.4348911880128435e-06, "loss": 0.6277, "step": 273 }, { "epoch": 0.004887097349552313, "grad_norm": 2.3718101978302, "learning_rate": 2.44381020335355e-06, "loss": 0.6817, "step": 274 }, { "epoch": 0.004904933471266008, "grad_norm": 1.0946693420410156, "learning_rate": 2.4527292186942563e-06, "loss": 0.5611, "step": 275 }, { "epoch": 0.004922769592979702, "grad_norm": 1.7237051725387573, "learning_rate": 2.4616482340349627e-06, "loss": 0.6627, "step": 276 }, { "epoch": 0.004940605714693397, "grad_norm": 2.576864242553711, "learning_rate": 2.470567249375669e-06, "loss": 0.695, "step": 277 }, { "epoch": 0.004958441836407091, "grad_norm": 1.3089861869812012, "learning_rate": 2.4794862647163754e-06, "loss": 0.5667, "step": 278 }, { "epoch": 0.004976277958120786, "grad_norm": 1.6324995756149292, "learning_rate": 2.488405280057082e-06, "loss": 0.6064, "step": 279 }, { "epoch": 0.004994114079834481, "grad_norm": 1.286194920539856, "learning_rate": 2.497324295397788e-06, "loss": 0.6176, "step": 280 }, { "epoch": 0.005011950201548175, "grad_norm": 1.4862664937973022, "learning_rate": 2.5062433107384946e-06, "loss": 0.5522, "step": 281 }, { "epoch": 0.00502978632326187, "grad_norm": 1.4922723770141602, "learning_rate": 2.515162326079201e-06, "loss": 0.5496, "step": 282 }, { "epoch": 0.005047622444975564, "grad_norm": 1.2583637237548828, "learning_rate": 2.5240813414199073e-06, "loss": 0.5268, "step": 283 }, { "epoch": 0.005065458566689259, "grad_norm": 1.5133681297302246, "learning_rate": 2.5330003567606137e-06, "loss": 0.5266, "step": 284 }, { "epoch": 0.005083294688402954, "grad_norm": 1.3877657651901245, "learning_rate": 2.54191937210132e-06, "loss": 0.6455, "step": 285 }, { "epoch": 0.005101130810116648, "grad_norm": 2.116668462753296, "learning_rate": 2.5508383874420265e-06, "loss": 0.7159, "step": 286 }, { "epoch": 0.005118966931830343, "grad_norm": 1.7665098905563354, "learning_rate": 2.559757402782733e-06, "loss": 0.6775, "step": 287 }, { "epoch": 0.005136803053544038, "grad_norm": 1.3720237016677856, "learning_rate": 2.5686764181234392e-06, "loss": 0.6835, "step": 288 }, { "epoch": 0.005154639175257732, "grad_norm": 1.302917242050171, "learning_rate": 2.5775954334641456e-06, "loss": 0.5711, "step": 289 }, { "epoch": 0.005172475296971427, "grad_norm": 1.238458275794983, "learning_rate": 2.586514448804852e-06, "loss": 0.5214, "step": 290 }, { "epoch": 0.005190311418685121, "grad_norm": 1.5740962028503418, "learning_rate": 2.5954334641455584e-06, "loss": 0.5021, "step": 291 }, { "epoch": 0.005208147540398816, "grad_norm": 2.9247536659240723, "learning_rate": 2.6043524794862648e-06, "loss": 0.6231, "step": 292 }, { "epoch": 0.005225983662112511, "grad_norm": 1.5866360664367676, "learning_rate": 2.613271494826971e-06, "loss": 0.5794, "step": 293 }, { "epoch": 0.005243819783826205, "grad_norm": 2.219635248184204, "learning_rate": 2.6221905101676775e-06, "loss": 0.6279, "step": 294 }, { "epoch": 0.0052616559055399, "grad_norm": 1.5627270936965942, "learning_rate": 2.631109525508384e-06, "loss": 0.6574, "step": 295 }, { "epoch": 0.005279492027253594, "grad_norm": 1.6502068042755127, "learning_rate": 2.6400285408490903e-06, "loss": 0.5, "step": 296 }, { "epoch": 0.005297328148967289, "grad_norm": 1.1381497383117676, "learning_rate": 2.6489475561897967e-06, "loss": 0.5804, "step": 297 }, { "epoch": 0.0053151642706809835, "grad_norm": 2.142641067504883, "learning_rate": 2.6578665715305035e-06, "loss": 0.5662, "step": 298 }, { "epoch": 0.005333000392394678, "grad_norm": 1.1981918811798096, "learning_rate": 2.6667855868712094e-06, "loss": 0.5862, "step": 299 }, { "epoch": 0.0053508365141083725, "grad_norm": 1.5230048894882202, "learning_rate": 2.675704602211916e-06, "loss": 0.5038, "step": 300 }, { "epoch": 0.0053686726358220666, "grad_norm": 1.82878577709198, "learning_rate": 2.684623617552622e-06, "loss": 0.4595, "step": 301 }, { "epoch": 0.0053865087575357615, "grad_norm": 1.7125508785247803, "learning_rate": 2.6935426328933286e-06, "loss": 0.5876, "step": 302 }, { "epoch": 0.005404344879249456, "grad_norm": 1.678423285484314, "learning_rate": 2.702461648234035e-06, "loss": 0.627, "step": 303 }, { "epoch": 0.0054221810009631504, "grad_norm": 2.2996435165405273, "learning_rate": 2.7113806635747413e-06, "loss": 0.6109, "step": 304 }, { "epoch": 0.005440017122676845, "grad_norm": 3.5018179416656494, "learning_rate": 2.720299678915448e-06, "loss": 0.6813, "step": 305 }, { "epoch": 0.005457853244390539, "grad_norm": 1.7643263339996338, "learning_rate": 2.7292186942561545e-06, "loss": 0.4911, "step": 306 }, { "epoch": 0.005475689366104234, "grad_norm": 1.4608845710754395, "learning_rate": 2.738137709596861e-06, "loss": 0.5692, "step": 307 }, { "epoch": 0.005493525487817929, "grad_norm": 1.527064561843872, "learning_rate": 2.747056724937567e-06, "loss": 0.5854, "step": 308 }, { "epoch": 0.005511361609531623, "grad_norm": 1.1883187294006348, "learning_rate": 2.7559757402782732e-06, "loss": 0.4484, "step": 309 }, { "epoch": 0.005529197731245318, "grad_norm": 1.9517115354537964, "learning_rate": 2.7648947556189796e-06, "loss": 0.5218, "step": 310 }, { "epoch": 0.005547033852959012, "grad_norm": 1.6211800575256348, "learning_rate": 2.773813770959686e-06, "loss": 0.6561, "step": 311 }, { "epoch": 0.005564869974672707, "grad_norm": 1.3129562139511108, "learning_rate": 2.7827327863003924e-06, "loss": 0.5958, "step": 312 }, { "epoch": 0.005582706096386402, "grad_norm": 1.5863853693008423, "learning_rate": 2.791651801641099e-06, "loss": 0.5121, "step": 313 }, { "epoch": 0.005600542218100096, "grad_norm": 1.2407137155532837, "learning_rate": 2.8005708169818056e-06, "loss": 0.5192, "step": 314 }, { "epoch": 0.005618378339813791, "grad_norm": 1.194892406463623, "learning_rate": 2.809489832322512e-06, "loss": 0.5311, "step": 315 }, { "epoch": 0.005636214461527485, "grad_norm": 1.25631844997406, "learning_rate": 2.8184088476632183e-06, "loss": 0.591, "step": 316 }, { "epoch": 0.00565405058324118, "grad_norm": 1.6166402101516724, "learning_rate": 2.8273278630039247e-06, "loss": 0.5415, "step": 317 }, { "epoch": 0.005671886704954875, "grad_norm": 1.407192587852478, "learning_rate": 2.8362468783446307e-06, "loss": 0.5413, "step": 318 }, { "epoch": 0.005689722826668569, "grad_norm": 1.281314492225647, "learning_rate": 2.845165893685337e-06, "loss": 0.4938, "step": 319 }, { "epoch": 0.005707558948382264, "grad_norm": 1.1050326824188232, "learning_rate": 2.8540849090260434e-06, "loss": 0.5446, "step": 320 }, { "epoch": 0.005725395070095958, "grad_norm": 1.2858392000198364, "learning_rate": 2.8630039243667502e-06, "loss": 0.5782, "step": 321 }, { "epoch": 0.005743231191809653, "grad_norm": 1.362825870513916, "learning_rate": 2.8719229397074566e-06, "loss": 0.5673, "step": 322 }, { "epoch": 0.005761067313523348, "grad_norm": 2.028330087661743, "learning_rate": 2.880841955048163e-06, "loss": 0.6749, "step": 323 }, { "epoch": 0.005778903435237042, "grad_norm": 1.6358400583267212, "learning_rate": 2.8897609703888694e-06, "loss": 0.586, "step": 324 }, { "epoch": 0.005796739556950737, "grad_norm": 1.5424126386642456, "learning_rate": 2.8986799857295757e-06, "loss": 0.4897, "step": 325 }, { "epoch": 0.005814575678664431, "grad_norm": 1.7203178405761719, "learning_rate": 2.907599001070282e-06, "loss": 0.5751, "step": 326 }, { "epoch": 0.005832411800378126, "grad_norm": 1.8745055198669434, "learning_rate": 2.916518016410988e-06, "loss": 0.512, "step": 327 }, { "epoch": 0.005850247922091821, "grad_norm": 1.4206955432891846, "learning_rate": 2.9254370317516945e-06, "loss": 0.519, "step": 328 }, { "epoch": 0.005868084043805515, "grad_norm": 1.4659600257873535, "learning_rate": 2.9343560470924013e-06, "loss": 0.5376, "step": 329 }, { "epoch": 0.00588592016551921, "grad_norm": 1.2667752504348755, "learning_rate": 2.9432750624331076e-06, "loss": 0.4819, "step": 330 }, { "epoch": 0.005903756287232904, "grad_norm": 1.199530839920044, "learning_rate": 2.952194077773814e-06, "loss": 0.5012, "step": 331 }, { "epoch": 0.005921592408946599, "grad_norm": 1.3267625570297241, "learning_rate": 2.9611130931145204e-06, "loss": 0.4084, "step": 332 }, { "epoch": 0.0059394285306602936, "grad_norm": 1.6121457815170288, "learning_rate": 2.970032108455227e-06, "loss": 0.4773, "step": 333 }, { "epoch": 0.005957264652373988, "grad_norm": 1.4322900772094727, "learning_rate": 2.978951123795933e-06, "loss": 0.4992, "step": 334 }, { "epoch": 0.0059751007740876825, "grad_norm": 1.4893734455108643, "learning_rate": 2.9878701391366396e-06, "loss": 0.4496, "step": 335 }, { "epoch": 0.005992936895801377, "grad_norm": 1.2784552574157715, "learning_rate": 2.9967891544773455e-06, "loss": 0.5306, "step": 336 }, { "epoch": 0.0060107730175150715, "grad_norm": 2.11749529838562, "learning_rate": 3.0057081698180523e-06, "loss": 0.6142, "step": 337 }, { "epoch": 0.006028609139228766, "grad_norm": 3.343623399734497, "learning_rate": 3.0146271851587587e-06, "loss": 0.4995, "step": 338 }, { "epoch": 0.0060464452609424605, "grad_norm": 1.6374093294143677, "learning_rate": 3.023546200499465e-06, "loss": 0.5425, "step": 339 }, { "epoch": 0.006064281382656155, "grad_norm": 1.5746098756790161, "learning_rate": 3.0324652158401715e-06, "loss": 0.5573, "step": 340 }, { "epoch": 0.0060821175043698494, "grad_norm": 1.7472163438796997, "learning_rate": 3.041384231180878e-06, "loss": 0.5513, "step": 341 }, { "epoch": 0.006099953626083544, "grad_norm": 1.0055726766586304, "learning_rate": 3.0503032465215842e-06, "loss": 0.4851, "step": 342 }, { "epoch": 0.006117789747797239, "grad_norm": 1.4767462015151978, "learning_rate": 3.0592222618622906e-06, "loss": 0.4739, "step": 343 }, { "epoch": 0.006135625869510933, "grad_norm": 1.3134757280349731, "learning_rate": 3.068141277202997e-06, "loss": 0.5837, "step": 344 }, { "epoch": 0.006153461991224628, "grad_norm": 1.0430761575698853, "learning_rate": 3.0770602925437034e-06, "loss": 0.5398, "step": 345 }, { "epoch": 0.006171298112938322, "grad_norm": 1.345146894454956, "learning_rate": 3.0859793078844097e-06, "loss": 0.4762, "step": 346 }, { "epoch": 0.006189134234652017, "grad_norm": 1.6122509241104126, "learning_rate": 3.094898323225116e-06, "loss": 0.479, "step": 347 }, { "epoch": 0.006206970356365712, "grad_norm": 1.1161093711853027, "learning_rate": 3.1038173385658225e-06, "loss": 0.5598, "step": 348 }, { "epoch": 0.006224806478079406, "grad_norm": 1.5138565301895142, "learning_rate": 3.112736353906529e-06, "loss": 0.4709, "step": 349 }, { "epoch": 0.006242642599793101, "grad_norm": 1.3539141416549683, "learning_rate": 3.1216553692472353e-06, "loss": 0.4499, "step": 350 }, { "epoch": 0.006260478721506795, "grad_norm": 1.2136832475662231, "learning_rate": 3.1305743845879412e-06, "loss": 0.4589, "step": 351 }, { "epoch": 0.00627831484322049, "grad_norm": 1.1060885190963745, "learning_rate": 3.139493399928648e-06, "loss": 0.5172, "step": 352 }, { "epoch": 0.006296150964934185, "grad_norm": 1.2628618478775024, "learning_rate": 3.1484124152693544e-06, "loss": 0.5603, "step": 353 }, { "epoch": 0.006313987086647879, "grad_norm": 1.2681447267532349, "learning_rate": 3.1573314306100608e-06, "loss": 0.5185, "step": 354 }, { "epoch": 0.006331823208361574, "grad_norm": 1.3920072317123413, "learning_rate": 3.166250445950767e-06, "loss": 0.4208, "step": 355 }, { "epoch": 0.006349659330075268, "grad_norm": 1.335545301437378, "learning_rate": 3.175169461291474e-06, "loss": 0.4835, "step": 356 }, { "epoch": 0.006367495451788963, "grad_norm": 1.1903812885284424, "learning_rate": 3.18408847663218e-06, "loss": 0.4654, "step": 357 }, { "epoch": 0.006385331573502658, "grad_norm": 1.6971962451934814, "learning_rate": 3.1930074919728867e-06, "loss": 0.6008, "step": 358 }, { "epoch": 0.006403167695216352, "grad_norm": 1.1480566263198853, "learning_rate": 3.2019265073135927e-06, "loss": 0.4019, "step": 359 }, { "epoch": 0.006421003816930047, "grad_norm": 2.0387771129608154, "learning_rate": 3.2108455226542986e-06, "loss": 0.5006, "step": 360 }, { "epoch": 0.006438839938643741, "grad_norm": 1.2148463726043701, "learning_rate": 3.2197645379950054e-06, "loss": 0.4913, "step": 361 }, { "epoch": 0.006456676060357436, "grad_norm": 2.1562607288360596, "learning_rate": 3.228683553335712e-06, "loss": 0.514, "step": 362 }, { "epoch": 0.006474512182071131, "grad_norm": 1.2904720306396484, "learning_rate": 3.2376025686764186e-06, "loss": 0.4266, "step": 363 }, { "epoch": 0.006492348303784825, "grad_norm": 1.3285740613937378, "learning_rate": 3.2465215840171246e-06, "loss": 0.4309, "step": 364 }, { "epoch": 0.00651018442549852, "grad_norm": 1.137514591217041, "learning_rate": 3.2554405993578314e-06, "loss": 0.5002, "step": 365 }, { "epoch": 0.006528020547212215, "grad_norm": 1.4586100578308105, "learning_rate": 3.2643596146985373e-06, "loss": 0.5156, "step": 366 }, { "epoch": 0.006545856668925909, "grad_norm": 1.1244300603866577, "learning_rate": 3.273278630039244e-06, "loss": 0.4338, "step": 367 }, { "epoch": 0.006563692790639604, "grad_norm": 1.8239957094192505, "learning_rate": 3.28219764537995e-06, "loss": 0.4752, "step": 368 }, { "epoch": 0.006581528912353298, "grad_norm": 1.3585281372070312, "learning_rate": 3.2911166607206565e-06, "loss": 0.5186, "step": 369 }, { "epoch": 0.0065993650340669925, "grad_norm": 1.4697260856628418, "learning_rate": 3.300035676061363e-06, "loss": 0.53, "step": 370 }, { "epoch": 0.0066172011557806875, "grad_norm": 1.2938551902770996, "learning_rate": 3.3089546914020692e-06, "loss": 0.4337, "step": 371 }, { "epoch": 0.0066350372774943815, "grad_norm": 1.4707707166671753, "learning_rate": 3.317873706742776e-06, "loss": 0.482, "step": 372 }, { "epoch": 0.0066528733992080764, "grad_norm": 2.7919442653656006, "learning_rate": 3.326792722083482e-06, "loss": 0.4809, "step": 373 }, { "epoch": 0.0066707095209217705, "grad_norm": 1.4272215366363525, "learning_rate": 3.335711737424189e-06, "loss": 0.5062, "step": 374 }, { "epoch": 0.006688545642635465, "grad_norm": 1.4810606241226196, "learning_rate": 3.3446307527648948e-06, "loss": 0.5378, "step": 375 }, { "epoch": 0.00670638176434916, "grad_norm": 1.3353627920150757, "learning_rate": 3.3535497681056016e-06, "loss": 0.5338, "step": 376 }, { "epoch": 0.006724217886062854, "grad_norm": 1.8348220586776733, "learning_rate": 3.3624687834463075e-06, "loss": 0.4637, "step": 377 }, { "epoch": 0.006742054007776549, "grad_norm": 1.79783034324646, "learning_rate": 3.371387798787014e-06, "loss": 0.5172, "step": 378 }, { "epoch": 0.006759890129490243, "grad_norm": 1.573477864265442, "learning_rate": 3.3803068141277207e-06, "loss": 0.4414, "step": 379 }, { "epoch": 0.006777726251203938, "grad_norm": 1.387686848640442, "learning_rate": 3.3892258294684267e-06, "loss": 0.4532, "step": 380 }, { "epoch": 0.006795562372917633, "grad_norm": 1.5073461532592773, "learning_rate": 3.3981448448091335e-06, "loss": 0.5559, "step": 381 }, { "epoch": 0.006813398494631327, "grad_norm": 1.1114751100540161, "learning_rate": 3.4070638601498394e-06, "loss": 0.4802, "step": 382 }, { "epoch": 0.006831234616345022, "grad_norm": 1.208899974822998, "learning_rate": 3.4159828754905462e-06, "loss": 0.4944, "step": 383 }, { "epoch": 0.006849070738058716, "grad_norm": 1.5076521635055542, "learning_rate": 3.424901890831252e-06, "loss": 0.4995, "step": 384 }, { "epoch": 0.006866906859772411, "grad_norm": 1.2716889381408691, "learning_rate": 3.433820906171959e-06, "loss": 0.4668, "step": 385 }, { "epoch": 0.006884742981486106, "grad_norm": 1.4431092739105225, "learning_rate": 3.4427399215126654e-06, "loss": 0.4506, "step": 386 }, { "epoch": 0.0069025791031998, "grad_norm": 1.7871001958847046, "learning_rate": 3.4516589368533713e-06, "loss": 0.4809, "step": 387 }, { "epoch": 0.006920415224913495, "grad_norm": 1.1170417070388794, "learning_rate": 3.460577952194078e-06, "loss": 0.4382, "step": 388 }, { "epoch": 0.006938251346627189, "grad_norm": 2.742556571960449, "learning_rate": 3.469496967534784e-06, "loss": 0.4807, "step": 389 }, { "epoch": 0.006956087468340884, "grad_norm": 1.1922411918640137, "learning_rate": 3.478415982875491e-06, "loss": 0.4868, "step": 390 }, { "epoch": 0.006973923590054579, "grad_norm": 1.179193139076233, "learning_rate": 3.487334998216197e-06, "loss": 0.4649, "step": 391 }, { "epoch": 0.006991759711768273, "grad_norm": 1.147597074508667, "learning_rate": 3.4962540135569037e-06, "loss": 0.4213, "step": 392 }, { "epoch": 0.007009595833481968, "grad_norm": 1.1630451679229736, "learning_rate": 3.5051730288976096e-06, "loss": 0.4022, "step": 393 }, { "epoch": 0.007027431955195662, "grad_norm": 1.588491439819336, "learning_rate": 3.5140920442383164e-06, "loss": 0.5299, "step": 394 }, { "epoch": 0.007045268076909357, "grad_norm": 2.2154128551483154, "learning_rate": 3.523011059579023e-06, "loss": 0.5224, "step": 395 }, { "epoch": 0.007063104198623052, "grad_norm": 2.1236326694488525, "learning_rate": 3.5319300749197288e-06, "loss": 0.5057, "step": 396 }, { "epoch": 0.007080940320336746, "grad_norm": 1.2421132326126099, "learning_rate": 3.5408490902604356e-06, "loss": 0.5127, "step": 397 }, { "epoch": 0.007098776442050441, "grad_norm": 1.6268608570098877, "learning_rate": 3.5497681056011415e-06, "loss": 0.5657, "step": 398 }, { "epoch": 0.007116612563764135, "grad_norm": 1.184870719909668, "learning_rate": 3.5586871209418483e-06, "loss": 0.4631, "step": 399 }, { "epoch": 0.00713444868547783, "grad_norm": 1.7901984453201294, "learning_rate": 3.5676061362825543e-06, "loss": 0.4324, "step": 400 }, { "epoch": 0.007152284807191525, "grad_norm": 1.1787022352218628, "learning_rate": 3.576525151623261e-06, "loss": 0.4364, "step": 401 }, { "epoch": 0.007170120928905219, "grad_norm": 2.1964609622955322, "learning_rate": 3.5854441669639675e-06, "loss": 0.6286, "step": 402 }, { "epoch": 0.007187957050618914, "grad_norm": 5.040952205657959, "learning_rate": 3.594363182304674e-06, "loss": 0.4476, "step": 403 }, { "epoch": 0.007205793172332608, "grad_norm": 1.3974846601486206, "learning_rate": 3.6032821976453802e-06, "loss": 0.5002, "step": 404 }, { "epoch": 0.007223629294046303, "grad_norm": 1.2034550905227661, "learning_rate": 3.612201212986086e-06, "loss": 0.4535, "step": 405 }, { "epoch": 0.0072414654157599975, "grad_norm": 1.8403637409210205, "learning_rate": 3.621120228326793e-06, "loss": 0.5211, "step": 406 }, { "epoch": 0.0072593015374736915, "grad_norm": 2.155287504196167, "learning_rate": 3.630039243667499e-06, "loss": 0.5379, "step": 407 }, { "epoch": 0.0072771376591873865, "grad_norm": 1.310027003288269, "learning_rate": 3.6389582590082057e-06, "loss": 0.5387, "step": 408 }, { "epoch": 0.0072949737809010805, "grad_norm": 1.2275962829589844, "learning_rate": 3.6478772743489117e-06, "loss": 0.4531, "step": 409 }, { "epoch": 0.007312809902614775, "grad_norm": 1.025327444076538, "learning_rate": 3.6567962896896185e-06, "loss": 0.4605, "step": 410 }, { "epoch": 0.00733064602432847, "grad_norm": 1.4332187175750732, "learning_rate": 3.665715305030325e-06, "loss": 0.6221, "step": 411 }, { "epoch": 0.007348482146042164, "grad_norm": 1.6840649843215942, "learning_rate": 3.6746343203710317e-06, "loss": 0.4743, "step": 412 }, { "epoch": 0.007366318267755859, "grad_norm": 1.555158019065857, "learning_rate": 3.6835533357117376e-06, "loss": 0.4867, "step": 413 }, { "epoch": 0.007384154389469553, "grad_norm": 1.8823628425598145, "learning_rate": 3.6924723510524436e-06, "loss": 0.3919, "step": 414 }, { "epoch": 0.007401990511183248, "grad_norm": 1.327907681465149, "learning_rate": 3.7013913663931504e-06, "loss": 0.4655, "step": 415 }, { "epoch": 0.007419826632896943, "grad_norm": 1.3640567064285278, "learning_rate": 3.7103103817338564e-06, "loss": 0.4552, "step": 416 }, { "epoch": 0.007437662754610637, "grad_norm": 1.4085242748260498, "learning_rate": 3.719229397074563e-06, "loss": 0.5038, "step": 417 }, { "epoch": 0.007455498876324332, "grad_norm": 1.4482516050338745, "learning_rate": 3.7281484124152695e-06, "loss": 0.5309, "step": 418 }, { "epoch": 0.007473334998038026, "grad_norm": 1.718651533126831, "learning_rate": 3.737067427755976e-06, "loss": 0.4717, "step": 419 }, { "epoch": 0.007491171119751721, "grad_norm": 1.246842622756958, "learning_rate": 3.7459864430966823e-06, "loss": 0.5153, "step": 420 }, { "epoch": 0.007509007241465416, "grad_norm": 1.363585114479065, "learning_rate": 3.754905458437389e-06, "loss": 0.4304, "step": 421 }, { "epoch": 0.00752684336317911, "grad_norm": 1.3058159351348877, "learning_rate": 3.763824473778095e-06, "loss": 0.5269, "step": 422 }, { "epoch": 0.007544679484892805, "grad_norm": 1.8864489793777466, "learning_rate": 3.772743489118801e-06, "loss": 0.4435, "step": 423 }, { "epoch": 0.007562515606606499, "grad_norm": 3.019227981567383, "learning_rate": 3.781662504459508e-06, "loss": 0.4389, "step": 424 }, { "epoch": 0.007580351728320194, "grad_norm": 1.8623355627059937, "learning_rate": 3.7905815198002138e-06, "loss": 0.5844, "step": 425 }, { "epoch": 0.007598187850033889, "grad_norm": 3.843503713607788, "learning_rate": 3.7995005351409206e-06, "loss": 0.3918, "step": 426 }, { "epoch": 0.007616023971747583, "grad_norm": 1.4885210990905762, "learning_rate": 3.808419550481627e-06, "loss": 0.5026, "step": 427 }, { "epoch": 0.007633860093461278, "grad_norm": 1.3066977262496948, "learning_rate": 3.817338565822333e-06, "loss": 0.4086, "step": 428 }, { "epoch": 0.007651696215174972, "grad_norm": 1.1006132364273071, "learning_rate": 3.82625758116304e-06, "loss": 0.4779, "step": 429 }, { "epoch": 0.007669532336888667, "grad_norm": 1.1822632551193237, "learning_rate": 3.835176596503746e-06, "loss": 0.4337, "step": 430 }, { "epoch": 0.007687368458602362, "grad_norm": 1.4812755584716797, "learning_rate": 3.8440956118444525e-06, "loss": 0.5076, "step": 431 }, { "epoch": 0.007705204580316056, "grad_norm": 1.482459545135498, "learning_rate": 3.853014627185159e-06, "loss": 0.3985, "step": 432 }, { "epoch": 0.007723040702029751, "grad_norm": 1.344508171081543, "learning_rate": 3.861933642525865e-06, "loss": 0.4249, "step": 433 }, { "epoch": 0.007740876823743445, "grad_norm": 2.2285561561584473, "learning_rate": 3.870852657866572e-06, "loss": 0.5626, "step": 434 }, { "epoch": 0.00775871294545714, "grad_norm": 1.119579553604126, "learning_rate": 3.879771673207278e-06, "loss": 0.4266, "step": 435 }, { "epoch": 0.007776549067170835, "grad_norm": 3.160632610321045, "learning_rate": 3.888690688547984e-06, "loss": 0.5444, "step": 436 }, { "epoch": 0.007794385188884529, "grad_norm": 1.4120960235595703, "learning_rate": 3.897609703888691e-06, "loss": 0.4857, "step": 437 }, { "epoch": 0.007812221310598224, "grad_norm": 1.0629377365112305, "learning_rate": 3.906528719229397e-06, "loss": 0.5042, "step": 438 }, { "epoch": 0.007830057432311919, "grad_norm": 1.1580361127853394, "learning_rate": 3.9154477345701035e-06, "loss": 0.4843, "step": 439 }, { "epoch": 0.007847893554025613, "grad_norm": 1.8523987531661987, "learning_rate": 3.92436674991081e-06, "loss": 0.4615, "step": 440 }, { "epoch": 0.007865729675739307, "grad_norm": 1.5833494663238525, "learning_rate": 3.933285765251517e-06, "loss": 0.5434, "step": 441 }, { "epoch": 0.007883565797453002, "grad_norm": 2.201798677444458, "learning_rate": 3.942204780592223e-06, "loss": 0.478, "step": 442 }, { "epoch": 0.007901401919166696, "grad_norm": 3.5636396408081055, "learning_rate": 3.951123795932929e-06, "loss": 0.5597, "step": 443 }, { "epoch": 0.007919238040880391, "grad_norm": 1.3248926401138306, "learning_rate": 3.9600428112736354e-06, "loss": 0.5368, "step": 444 }, { "epoch": 0.007937074162594086, "grad_norm": 1.5661994218826294, "learning_rate": 3.968961826614342e-06, "loss": 0.514, "step": 445 }, { "epoch": 0.00795491028430778, "grad_norm": 1.6248027086257935, "learning_rate": 3.977880841955048e-06, "loss": 0.4774, "step": 446 }, { "epoch": 0.007972746406021474, "grad_norm": 1.3313586711883545, "learning_rate": 3.986799857295755e-06, "loss": 0.5059, "step": 447 }, { "epoch": 0.00799058252773517, "grad_norm": 1.2870584726333618, "learning_rate": 3.995718872636462e-06, "loss": 0.5068, "step": 448 }, { "epoch": 0.008008418649448864, "grad_norm": 1.2870802879333496, "learning_rate": 4.004637887977167e-06, "loss": 0.4465, "step": 449 }, { "epoch": 0.00802625477116256, "grad_norm": 1.390815019607544, "learning_rate": 4.0135569033178746e-06, "loss": 0.5422, "step": 450 }, { "epoch": 0.008044090892876252, "grad_norm": 1.075946569442749, "learning_rate": 4.02247591865858e-06, "loss": 0.44, "step": 451 }, { "epoch": 0.008061927014589947, "grad_norm": 0.9873358011245728, "learning_rate": 4.0313949339992865e-06, "loss": 0.4567, "step": 452 }, { "epoch": 0.008079763136303642, "grad_norm": 1.2685288190841675, "learning_rate": 4.040313949339993e-06, "loss": 0.5163, "step": 453 }, { "epoch": 0.008097599258017337, "grad_norm": 1.3385316133499146, "learning_rate": 4.049232964680699e-06, "loss": 0.4355, "step": 454 }, { "epoch": 0.008115435379731032, "grad_norm": 1.371286392211914, "learning_rate": 4.058151980021406e-06, "loss": 0.4362, "step": 455 }, { "epoch": 0.008133271501444725, "grad_norm": 0.9873465299606323, "learning_rate": 4.067070995362112e-06, "loss": 0.4447, "step": 456 }, { "epoch": 0.00815110762315842, "grad_norm": 6.998069763183594, "learning_rate": 4.075990010702819e-06, "loss": 0.5213, "step": 457 }, { "epoch": 0.008168943744872115, "grad_norm": 1.1350767612457275, "learning_rate": 4.084909026043525e-06, "loss": 0.4208, "step": 458 }, { "epoch": 0.00818677986658581, "grad_norm": 3.4948246479034424, "learning_rate": 4.093828041384232e-06, "loss": 0.5256, "step": 459 }, { "epoch": 0.008204615988299505, "grad_norm": 0.9854333400726318, "learning_rate": 4.1027470567249375e-06, "loss": 0.4685, "step": 460 }, { "epoch": 0.008222452110013198, "grad_norm": 1.5371170043945312, "learning_rate": 4.111666072065644e-06, "loss": 0.4657, "step": 461 }, { "epoch": 0.008240288231726893, "grad_norm": 1.978710412979126, "learning_rate": 4.12058508740635e-06, "loss": 0.4877, "step": 462 }, { "epoch": 0.008258124353440588, "grad_norm": 1.7735456228256226, "learning_rate": 4.129504102747057e-06, "loss": 0.5351, "step": 463 }, { "epoch": 0.008275960475154283, "grad_norm": 1.8886734247207642, "learning_rate": 4.138423118087764e-06, "loss": 0.4245, "step": 464 }, { "epoch": 0.008293796596867978, "grad_norm": 1.1141098737716675, "learning_rate": 4.1473421334284694e-06, "loss": 0.49, "step": 465 }, { "epoch": 0.008311632718581671, "grad_norm": 1.384822130203247, "learning_rate": 4.156261148769177e-06, "loss": 0.472, "step": 466 }, { "epoch": 0.008329468840295366, "grad_norm": 1.2739297151565552, "learning_rate": 4.165180164109882e-06, "loss": 0.4554, "step": 467 }, { "epoch": 0.00834730496200906, "grad_norm": 2.041304111480713, "learning_rate": 4.174099179450589e-06, "loss": 0.4724, "step": 468 }, { "epoch": 0.008365141083722756, "grad_norm": 1.703652262687683, "learning_rate": 4.183018194791295e-06, "loss": 0.4973, "step": 469 }, { "epoch": 0.00838297720543645, "grad_norm": 1.402593970298767, "learning_rate": 4.191937210132001e-06, "loss": 0.4945, "step": 470 }, { "epoch": 0.008400813327150144, "grad_norm": 2.099918842315674, "learning_rate": 4.200856225472708e-06, "loss": 0.4303, "step": 471 }, { "epoch": 0.008418649448863839, "grad_norm": 0.8471234440803528, "learning_rate": 4.209775240813414e-06, "loss": 0.3937, "step": 472 }, { "epoch": 0.008436485570577534, "grad_norm": 1.1729215383529663, "learning_rate": 4.218694256154121e-06, "loss": 0.4453, "step": 473 }, { "epoch": 0.008454321692291229, "grad_norm": 2.4997663497924805, "learning_rate": 4.227613271494827e-06, "loss": 0.4948, "step": 474 }, { "epoch": 0.008472157814004923, "grad_norm": 1.68740975856781, "learning_rate": 4.236532286835534e-06, "loss": 0.5015, "step": 475 }, { "epoch": 0.008489993935718617, "grad_norm": 1.1653437614440918, "learning_rate": 4.24545130217624e-06, "loss": 0.4326, "step": 476 }, { "epoch": 0.008507830057432312, "grad_norm": 1.3887028694152832, "learning_rate": 4.254370317516947e-06, "loss": 0.4663, "step": 477 }, { "epoch": 0.008525666179146007, "grad_norm": 1.160707950592041, "learning_rate": 4.263289332857652e-06, "loss": 0.4161, "step": 478 }, { "epoch": 0.008543502300859701, "grad_norm": 1.1170053482055664, "learning_rate": 4.272208348198359e-06, "loss": 0.451, "step": 479 }, { "epoch": 0.008561338422573396, "grad_norm": 1.5061770677566528, "learning_rate": 4.281127363539066e-06, "loss": 0.5079, "step": 480 }, { "epoch": 0.00857917454428709, "grad_norm": 3.0044429302215576, "learning_rate": 4.2900463788797715e-06, "loss": 0.4474, "step": 481 }, { "epoch": 0.008597010666000784, "grad_norm": 1.0271059274673462, "learning_rate": 4.298965394220479e-06, "loss": 0.3791, "step": 482 }, { "epoch": 0.00861484678771448, "grad_norm": 1.7308597564697266, "learning_rate": 4.307884409561184e-06, "loss": 0.5119, "step": 483 }, { "epoch": 0.008632682909428174, "grad_norm": 1.008509874343872, "learning_rate": 4.3168034249018915e-06, "loss": 0.4223, "step": 484 }, { "epoch": 0.00865051903114187, "grad_norm": 1.2962771654129028, "learning_rate": 4.325722440242597e-06, "loss": 0.5098, "step": 485 }, { "epoch": 0.008668355152855562, "grad_norm": 1.059078335762024, "learning_rate": 4.334641455583304e-06, "loss": 0.3876, "step": 486 }, { "epoch": 0.008686191274569257, "grad_norm": 1.275657296180725, "learning_rate": 4.34356047092401e-06, "loss": 0.4581, "step": 487 }, { "epoch": 0.008704027396282952, "grad_norm": 1.208746075630188, "learning_rate": 4.352479486264716e-06, "loss": 0.4419, "step": 488 }, { "epoch": 0.008721863517996647, "grad_norm": 1.7713290452957153, "learning_rate": 4.361398501605423e-06, "loss": 0.5964, "step": 489 }, { "epoch": 0.008739699639710342, "grad_norm": 1.4434576034545898, "learning_rate": 4.370317516946129e-06, "loss": 0.4817, "step": 490 }, { "epoch": 0.008757535761424035, "grad_norm": 1.8831814527511597, "learning_rate": 4.379236532286836e-06, "loss": 0.5045, "step": 491 }, { "epoch": 0.00877537188313773, "grad_norm": 2.0850300788879395, "learning_rate": 4.388155547627542e-06, "loss": 0.5111, "step": 492 }, { "epoch": 0.008793208004851425, "grad_norm": 1.17160964012146, "learning_rate": 4.397074562968249e-06, "loss": 0.4362, "step": 493 }, { "epoch": 0.00881104412656512, "grad_norm": 2.0882840156555176, "learning_rate": 4.4059935783089545e-06, "loss": 0.3787, "step": 494 }, { "epoch": 0.008828880248278815, "grad_norm": 4.528671741485596, "learning_rate": 4.414912593649662e-06, "loss": 0.4315, "step": 495 }, { "epoch": 0.008846716369992508, "grad_norm": 1.4445877075195312, "learning_rate": 4.423831608990368e-06, "loss": 0.3801, "step": 496 }, { "epoch": 0.008864552491706203, "grad_norm": 1.9293122291564941, "learning_rate": 4.432750624331074e-06, "loss": 0.4549, "step": 497 }, { "epoch": 0.008882388613419898, "grad_norm": 1.4013196229934692, "learning_rate": 4.441669639671781e-06, "loss": 0.5211, "step": 498 }, { "epoch": 0.008900224735133593, "grad_norm": 1.7515922784805298, "learning_rate": 4.450588655012486e-06, "loss": 0.4308, "step": 499 }, { "epoch": 0.008918060856847288, "grad_norm": 3.94881272315979, "learning_rate": 4.459507670353194e-06, "loss": 0.5709, "step": 500 }, { "epoch": 0.008935896978560981, "grad_norm": 3.567277431488037, "learning_rate": 4.468426685693899e-06, "loss": 0.4617, "step": 501 }, { "epoch": 0.008953733100274676, "grad_norm": 1.5492817163467407, "learning_rate": 4.477345701034606e-06, "loss": 0.4814, "step": 502 }, { "epoch": 0.00897156922198837, "grad_norm": 0.9941692352294922, "learning_rate": 4.486264716375312e-06, "loss": 0.4129, "step": 503 }, { "epoch": 0.008989405343702066, "grad_norm": 1.9810105562210083, "learning_rate": 4.495183731716019e-06, "loss": 0.4271, "step": 504 }, { "epoch": 0.00900724146541576, "grad_norm": 4.075789451599121, "learning_rate": 4.5041027470567255e-06, "loss": 0.5615, "step": 505 }, { "epoch": 0.009025077587129454, "grad_norm": 0.9861582517623901, "learning_rate": 4.513021762397431e-06, "loss": 0.4656, "step": 506 }, { "epoch": 0.009042913708843149, "grad_norm": 1.3125861883163452, "learning_rate": 4.521940777738138e-06, "loss": 0.477, "step": 507 }, { "epoch": 0.009060749830556844, "grad_norm": 1.2995643615722656, "learning_rate": 4.530859793078844e-06, "loss": 0.4401, "step": 508 }, { "epoch": 0.009078585952270539, "grad_norm": 1.8894366025924683, "learning_rate": 4.539778808419551e-06, "loss": 0.4924, "step": 509 }, { "epoch": 0.009096422073984234, "grad_norm": 1.2428226470947266, "learning_rate": 4.5486978237602565e-06, "loss": 0.4792, "step": 510 }, { "epoch": 0.009114258195697927, "grad_norm": 1.9830949306488037, "learning_rate": 4.557616839100964e-06, "loss": 0.4324, "step": 511 }, { "epoch": 0.009132094317411622, "grad_norm": 1.3322826623916626, "learning_rate": 4.56653585444167e-06, "loss": 0.5436, "step": 512 }, { "epoch": 0.009149930439125317, "grad_norm": 1.3594346046447754, "learning_rate": 4.5754548697823765e-06, "loss": 0.4621, "step": 513 }, { "epoch": 0.009167766560839011, "grad_norm": 1.4227405786514282, "learning_rate": 4.584373885123083e-06, "loss": 0.4373, "step": 514 }, { "epoch": 0.009185602682552706, "grad_norm": 1.657763123512268, "learning_rate": 4.5932929004637884e-06, "loss": 0.4845, "step": 515 }, { "epoch": 0.0092034388042664, "grad_norm": 1.3375740051269531, "learning_rate": 4.602211915804496e-06, "loss": 0.4677, "step": 516 }, { "epoch": 0.009221274925980094, "grad_norm": 1.4107847213745117, "learning_rate": 4.611130931145201e-06, "loss": 0.448, "step": 517 }, { "epoch": 0.00923911104769379, "grad_norm": 1.0197649002075195, "learning_rate": 4.6200499464859084e-06, "loss": 0.4242, "step": 518 }, { "epoch": 0.009256947169407484, "grad_norm": 1.2332050800323486, "learning_rate": 4.628968961826614e-06, "loss": 0.4631, "step": 519 }, { "epoch": 0.00927478329112118, "grad_norm": 1.6185851097106934, "learning_rate": 4.637887977167321e-06, "loss": 0.471, "step": 520 }, { "epoch": 0.009292619412834872, "grad_norm": 1.300220012664795, "learning_rate": 4.6468069925080276e-06, "loss": 0.4296, "step": 521 }, { "epoch": 0.009310455534548567, "grad_norm": 1.1333004236221313, "learning_rate": 4.655726007848734e-06, "loss": 0.4188, "step": 522 }, { "epoch": 0.009328291656262262, "grad_norm": 1.6971864700317383, "learning_rate": 4.66464502318944e-06, "loss": 0.5606, "step": 523 }, { "epoch": 0.009346127777975957, "grad_norm": 1.00901198387146, "learning_rate": 4.673564038530146e-06, "loss": 0.4002, "step": 524 }, { "epoch": 0.009363963899689652, "grad_norm": 1.1150277853012085, "learning_rate": 4.682483053870853e-06, "loss": 0.4832, "step": 525 }, { "epoch": 0.009381800021403345, "grad_norm": 1.2056350708007812, "learning_rate": 4.691402069211559e-06, "loss": 0.4359, "step": 526 }, { "epoch": 0.00939963614311704, "grad_norm": 1.6105479001998901, "learning_rate": 4.700321084552266e-06, "loss": 0.4294, "step": 527 }, { "epoch": 0.009417472264830735, "grad_norm": 1.207774043083191, "learning_rate": 4.709240099892972e-06, "loss": 0.4171, "step": 528 }, { "epoch": 0.00943530838654443, "grad_norm": 1.2855324745178223, "learning_rate": 4.718159115233679e-06, "loss": 0.4268, "step": 529 }, { "epoch": 0.009453144508258125, "grad_norm": 1.6374174356460571, "learning_rate": 4.727078130574385e-06, "loss": 0.445, "step": 530 }, { "epoch": 0.009470980629971818, "grad_norm": 1.2425190210342407, "learning_rate": 4.735997145915091e-06, "loss": 0.4469, "step": 531 }, { "epoch": 0.009488816751685513, "grad_norm": 1.3253897428512573, "learning_rate": 4.744916161255798e-06, "loss": 0.4303, "step": 532 }, { "epoch": 0.009506652873399208, "grad_norm": 1.1075959205627441, "learning_rate": 4.753835176596504e-06, "loss": 0.486, "step": 533 }, { "epoch": 0.009524488995112903, "grad_norm": 1.1825791597366333, "learning_rate": 4.7627541919372105e-06, "loss": 0.3865, "step": 534 }, { "epoch": 0.009542325116826598, "grad_norm": 1.493388295173645, "learning_rate": 4.771673207277916e-06, "loss": 0.5005, "step": 535 }, { "epoch": 0.009560161238540291, "grad_norm": 1.1791242361068726, "learning_rate": 4.780592222618623e-06, "loss": 0.4891, "step": 536 }, { "epoch": 0.009577997360253986, "grad_norm": 1.343740701675415, "learning_rate": 4.78951123795933e-06, "loss": 0.4821, "step": 537 }, { "epoch": 0.00959583348196768, "grad_norm": 1.3518675565719604, "learning_rate": 4.798430253300036e-06, "loss": 0.4596, "step": 538 }, { "epoch": 0.009613669603681376, "grad_norm": 1.4564203023910522, "learning_rate": 4.807349268640742e-06, "loss": 0.4375, "step": 539 }, { "epoch": 0.00963150572539507, "grad_norm": 3.196500062942505, "learning_rate": 4.816268283981449e-06, "loss": 0.4146, "step": 540 }, { "epoch": 0.009649341847108764, "grad_norm": 1.102216124534607, "learning_rate": 4.825187299322155e-06, "loss": 0.4836, "step": 541 }, { "epoch": 0.009667177968822459, "grad_norm": 1.8648408651351929, "learning_rate": 4.8341063146628616e-06, "loss": 0.511, "step": 542 }, { "epoch": 0.009685014090536154, "grad_norm": 0.9851669669151306, "learning_rate": 4.843025330003568e-06, "loss": 0.3768, "step": 543 }, { "epoch": 0.009702850212249849, "grad_norm": 1.006560206413269, "learning_rate": 4.851944345344274e-06, "loss": 0.4066, "step": 544 }, { "epoch": 0.009720686333963544, "grad_norm": 1.0856674909591675, "learning_rate": 4.860863360684981e-06, "loss": 0.3364, "step": 545 }, { "epoch": 0.009738522455677237, "grad_norm": 1.352399230003357, "learning_rate": 4.869782376025687e-06, "loss": 0.4577, "step": 546 }, { "epoch": 0.009756358577390932, "grad_norm": 1.6652281284332275, "learning_rate": 4.8787013913663935e-06, "loss": 0.413, "step": 547 }, { "epoch": 0.009774194699104627, "grad_norm": 1.9173638820648193, "learning_rate": 4.8876204067071e-06, "loss": 0.4688, "step": 548 }, { "epoch": 0.009792030820818321, "grad_norm": 0.9917348623275757, "learning_rate": 4.896539422047806e-06, "loss": 0.425, "step": 549 }, { "epoch": 0.009809866942532016, "grad_norm": 1.6308515071868896, "learning_rate": 4.905458437388513e-06, "loss": 0.4273, "step": 550 }, { "epoch": 0.00982770306424571, "grad_norm": 1.0930702686309814, "learning_rate": 4.914377452729219e-06, "loss": 0.4444, "step": 551 }, { "epoch": 0.009845539185959404, "grad_norm": 1.4137258529663086, "learning_rate": 4.923296468069925e-06, "loss": 0.442, "step": 552 }, { "epoch": 0.0098633753076731, "grad_norm": 1.506640076637268, "learning_rate": 4.932215483410632e-06, "loss": 0.4724, "step": 553 }, { "epoch": 0.009881211429386794, "grad_norm": 1.3620201349258423, "learning_rate": 4.941134498751338e-06, "loss": 0.4142, "step": 554 }, { "epoch": 0.00989904755110049, "grad_norm": 1.3157777786254883, "learning_rate": 4.9500535140920445e-06, "loss": 0.4635, "step": 555 }, { "epoch": 0.009916883672814182, "grad_norm": 1.209380030632019, "learning_rate": 4.958972529432751e-06, "loss": 0.4792, "step": 556 }, { "epoch": 0.009934719794527877, "grad_norm": 1.1213328838348389, "learning_rate": 4.967891544773457e-06, "loss": 0.4473, "step": 557 }, { "epoch": 0.009952555916241572, "grad_norm": 1.7144397497177124, "learning_rate": 4.976810560114164e-06, "loss": 0.496, "step": 558 }, { "epoch": 0.009970392037955267, "grad_norm": 1.9864416122436523, "learning_rate": 4.98572957545487e-06, "loss": 0.5314, "step": 559 }, { "epoch": 0.009988228159668962, "grad_norm": 1.0490071773529053, "learning_rate": 4.994648590795576e-06, "loss": 0.4173, "step": 560 }, { "epoch": 0.010006064281382655, "grad_norm": 1.5442557334899902, "learning_rate": 5.003567606136283e-06, "loss": 0.4887, "step": 561 }, { "epoch": 0.01002390040309635, "grad_norm": 0.9276285171508789, "learning_rate": 5.012486621476989e-06, "loss": 0.4398, "step": 562 }, { "epoch": 0.010041736524810045, "grad_norm": 1.0979938507080078, "learning_rate": 5.0214056368176956e-06, "loss": 0.4143, "step": 563 }, { "epoch": 0.01005957264652374, "grad_norm": 2.0221452713012695, "learning_rate": 5.030324652158402e-06, "loss": 0.4373, "step": 564 }, { "epoch": 0.010077408768237435, "grad_norm": 2.4793107509613037, "learning_rate": 5.039243667499108e-06, "loss": 0.4251, "step": 565 }, { "epoch": 0.010095244889951128, "grad_norm": 1.1153239011764526, "learning_rate": 5.048162682839815e-06, "loss": 0.4382, "step": 566 }, { "epoch": 0.010113081011664823, "grad_norm": 1.1595510244369507, "learning_rate": 5.057081698180521e-06, "loss": 0.4315, "step": 567 }, { "epoch": 0.010130917133378518, "grad_norm": 1.217896580696106, "learning_rate": 5.0660007135212275e-06, "loss": 0.4296, "step": 568 }, { "epoch": 0.010148753255092213, "grad_norm": 1.097383975982666, "learning_rate": 5.074919728861934e-06, "loss": 0.3926, "step": 569 }, { "epoch": 0.010166589376805908, "grad_norm": 1.653509497642517, "learning_rate": 5.08383874420264e-06, "loss": 0.5525, "step": 570 }, { "epoch": 0.010184425498519603, "grad_norm": 1.1412622928619385, "learning_rate": 5.092757759543347e-06, "loss": 0.467, "step": 571 }, { "epoch": 0.010202261620233296, "grad_norm": 1.1681066751480103, "learning_rate": 5.101676774884053e-06, "loss": 0.419, "step": 572 }, { "epoch": 0.01022009774194699, "grad_norm": 1.49830162525177, "learning_rate": 5.110595790224759e-06, "loss": 0.503, "step": 573 }, { "epoch": 0.010237933863660686, "grad_norm": 1.6811169385910034, "learning_rate": 5.119514805565466e-06, "loss": 0.4756, "step": 574 }, { "epoch": 0.01025576998537438, "grad_norm": 1.2853678464889526, "learning_rate": 5.128433820906172e-06, "loss": 0.3656, "step": 575 }, { "epoch": 0.010273606107088076, "grad_norm": 1.0954298973083496, "learning_rate": 5.1373528362468785e-06, "loss": 0.4478, "step": 576 }, { "epoch": 0.010291442228801769, "grad_norm": 0.8055521249771118, "learning_rate": 5.146271851587585e-06, "loss": 0.3895, "step": 577 }, { "epoch": 0.010309278350515464, "grad_norm": 1.025450587272644, "learning_rate": 5.155190866928291e-06, "loss": 0.3921, "step": 578 }, { "epoch": 0.010327114472229159, "grad_norm": 1.1558235883712769, "learning_rate": 5.164109882268998e-06, "loss": 0.4946, "step": 579 }, { "epoch": 0.010344950593942854, "grad_norm": 1.2848806381225586, "learning_rate": 5.173028897609704e-06, "loss": 0.4041, "step": 580 }, { "epoch": 0.010362786715656548, "grad_norm": 1.412864327430725, "learning_rate": 5.18194791295041e-06, "loss": 0.4781, "step": 581 }, { "epoch": 0.010380622837370242, "grad_norm": 1.0885170698165894, "learning_rate": 5.190866928291117e-06, "loss": 0.4267, "step": 582 }, { "epoch": 0.010398458959083937, "grad_norm": 1.2009660005569458, "learning_rate": 5.199785943631823e-06, "loss": 0.4196, "step": 583 }, { "epoch": 0.010416295080797632, "grad_norm": 1.0177676677703857, "learning_rate": 5.2087049589725295e-06, "loss": 0.4083, "step": 584 }, { "epoch": 0.010434131202511326, "grad_norm": 1.601419448852539, "learning_rate": 5.217623974313236e-06, "loss": 0.3974, "step": 585 }, { "epoch": 0.010451967324225021, "grad_norm": 2.39469838142395, "learning_rate": 5.226542989653942e-06, "loss": 0.4309, "step": 586 }, { "epoch": 0.010469803445938715, "grad_norm": 0.9701213240623474, "learning_rate": 5.2354620049946495e-06, "loss": 0.4188, "step": 587 }, { "epoch": 0.01048763956765241, "grad_norm": 1.7994223833084106, "learning_rate": 5.244381020335355e-06, "loss": 0.497, "step": 588 }, { "epoch": 0.010505475689366104, "grad_norm": 0.9525409936904907, "learning_rate": 5.2533000356760614e-06, "loss": 0.4708, "step": 589 }, { "epoch": 0.0105233118110798, "grad_norm": 1.0439306497573853, "learning_rate": 5.262219051016768e-06, "loss": 0.3759, "step": 590 }, { "epoch": 0.010541147932793494, "grad_norm": 1.2639439105987549, "learning_rate": 5.271138066357474e-06, "loss": 0.4916, "step": 591 }, { "epoch": 0.010558984054507187, "grad_norm": 1.2964301109313965, "learning_rate": 5.280057081698181e-06, "loss": 0.5315, "step": 592 }, { "epoch": 0.010576820176220882, "grad_norm": 1.2846462726593018, "learning_rate": 5.288976097038887e-06, "loss": 0.4064, "step": 593 }, { "epoch": 0.010594656297934577, "grad_norm": 1.2763394117355347, "learning_rate": 5.297895112379593e-06, "loss": 0.3243, "step": 594 }, { "epoch": 0.010612492419648272, "grad_norm": 1.6506128311157227, "learning_rate": 5.3068141277203e-06, "loss": 0.4227, "step": 595 }, { "epoch": 0.010630328541361967, "grad_norm": 1.3190261125564575, "learning_rate": 5.315733143061007e-06, "loss": 0.4495, "step": 596 }, { "epoch": 0.01064816466307566, "grad_norm": 1.0525825023651123, "learning_rate": 5.3246521584017125e-06, "loss": 0.44, "step": 597 }, { "epoch": 0.010666000784789355, "grad_norm": 1.310211420059204, "learning_rate": 5.333571173742419e-06, "loss": 0.4271, "step": 598 }, { "epoch": 0.01068383690650305, "grad_norm": 1.5680582523345947, "learning_rate": 5.342490189083125e-06, "loss": 0.5139, "step": 599 }, { "epoch": 0.010701673028216745, "grad_norm": 1.169052243232727, "learning_rate": 5.351409204423832e-06, "loss": 0.3914, "step": 600 }, { "epoch": 0.01071950914993044, "grad_norm": 1.0858758687973022, "learning_rate": 5.360328219764538e-06, "loss": 0.5053, "step": 601 }, { "epoch": 0.010737345271644133, "grad_norm": 1.5651134252548218, "learning_rate": 5.369247235105244e-06, "loss": 0.387, "step": 602 }, { "epoch": 0.010755181393357828, "grad_norm": 1.2751938104629517, "learning_rate": 5.378166250445952e-06, "loss": 0.4827, "step": 603 }, { "epoch": 0.010773017515071523, "grad_norm": 1.2434167861938477, "learning_rate": 5.387085265786657e-06, "loss": 0.4936, "step": 604 }, { "epoch": 0.010790853636785218, "grad_norm": 1.2758976221084595, "learning_rate": 5.396004281127364e-06, "loss": 0.3476, "step": 605 }, { "epoch": 0.010808689758498913, "grad_norm": 1.2970452308654785, "learning_rate": 5.40492329646807e-06, "loss": 0.4917, "step": 606 }, { "epoch": 0.010826525880212606, "grad_norm": 0.9976676106452942, "learning_rate": 5.413842311808776e-06, "loss": 0.3342, "step": 607 }, { "epoch": 0.010844362001926301, "grad_norm": 1.245278239250183, "learning_rate": 5.422761327149483e-06, "loss": 0.4906, "step": 608 }, { "epoch": 0.010862198123639996, "grad_norm": 0.8573669791221619, "learning_rate": 5.431680342490189e-06, "loss": 0.3658, "step": 609 }, { "epoch": 0.01088003424535369, "grad_norm": 1.2254329919815063, "learning_rate": 5.440599357830896e-06, "loss": 0.4932, "step": 610 }, { "epoch": 0.010897870367067386, "grad_norm": 1.2445495128631592, "learning_rate": 5.449518373171602e-06, "loss": 0.4537, "step": 611 }, { "epoch": 0.010915706488781079, "grad_norm": 1.8366972208023071, "learning_rate": 5.458437388512309e-06, "loss": 0.471, "step": 612 }, { "epoch": 0.010933542610494774, "grad_norm": 1.1743152141571045, "learning_rate": 5.4673564038530146e-06, "loss": 0.4653, "step": 613 }, { "epoch": 0.010951378732208469, "grad_norm": 0.6553313136100769, "learning_rate": 5.476275419193722e-06, "loss": 0.3757, "step": 614 }, { "epoch": 0.010969214853922164, "grad_norm": 1.457043170928955, "learning_rate": 5.485194434534427e-06, "loss": 0.4795, "step": 615 }, { "epoch": 0.010987050975635859, "grad_norm": 1.235823631286621, "learning_rate": 5.494113449875134e-06, "loss": 0.3929, "step": 616 }, { "epoch": 0.011004887097349552, "grad_norm": 1.167606234550476, "learning_rate": 5.50303246521584e-06, "loss": 0.4161, "step": 617 }, { "epoch": 0.011022723219063247, "grad_norm": 1.3439496755599976, "learning_rate": 5.5119514805565465e-06, "loss": 0.4563, "step": 618 }, { "epoch": 0.011040559340776942, "grad_norm": 0.9970110058784485, "learning_rate": 5.520870495897254e-06, "loss": 0.3599, "step": 619 }, { "epoch": 0.011058395462490636, "grad_norm": 1.0019282102584839, "learning_rate": 5.529789511237959e-06, "loss": 0.3712, "step": 620 }, { "epoch": 0.011076231584204331, "grad_norm": 1.249360203742981, "learning_rate": 5.5387085265786665e-06, "loss": 0.4243, "step": 621 }, { "epoch": 0.011094067705918025, "grad_norm": 1.39357590675354, "learning_rate": 5.547627541919372e-06, "loss": 0.4015, "step": 622 }, { "epoch": 0.01111190382763172, "grad_norm": 1.085498332977295, "learning_rate": 5.556546557260079e-06, "loss": 0.4042, "step": 623 }, { "epoch": 0.011129739949345414, "grad_norm": 1.1499019861221313, "learning_rate": 5.565465572600785e-06, "loss": 0.4452, "step": 624 }, { "epoch": 0.01114757607105911, "grad_norm": 1.0987162590026855, "learning_rate": 5.574384587941491e-06, "loss": 0.4568, "step": 625 }, { "epoch": 0.011165412192772804, "grad_norm": 1.8718235492706299, "learning_rate": 5.583303603282198e-06, "loss": 0.3886, "step": 626 }, { "epoch": 0.011183248314486497, "grad_norm": 0.8406670689582825, "learning_rate": 5.592222618622904e-06, "loss": 0.4412, "step": 627 }, { "epoch": 0.011201084436200192, "grad_norm": 1.0919603109359741, "learning_rate": 5.601141633963611e-06, "loss": 0.4251, "step": 628 }, { "epoch": 0.011218920557913887, "grad_norm": 1.5491275787353516, "learning_rate": 5.610060649304317e-06, "loss": 0.4637, "step": 629 }, { "epoch": 0.011236756679627582, "grad_norm": 1.3336869478225708, "learning_rate": 5.618979664645024e-06, "loss": 0.433, "step": 630 }, { "epoch": 0.011254592801341277, "grad_norm": 1.1420201063156128, "learning_rate": 5.627898679985729e-06, "loss": 0.4171, "step": 631 }, { "epoch": 0.01127242892305497, "grad_norm": 1.058229923248291, "learning_rate": 5.636817695326437e-06, "loss": 0.4333, "step": 632 }, { "epoch": 0.011290265044768665, "grad_norm": 1.1878844499588013, "learning_rate": 5.645736710667142e-06, "loss": 0.3893, "step": 633 }, { "epoch": 0.01130810116648236, "grad_norm": 0.9159463047981262, "learning_rate": 5.654655726007849e-06, "loss": 0.3675, "step": 634 }, { "epoch": 0.011325937288196055, "grad_norm": 1.9920088052749634, "learning_rate": 5.663574741348556e-06, "loss": 0.4744, "step": 635 }, { "epoch": 0.01134377340990975, "grad_norm": 1.0459561347961426, "learning_rate": 5.672493756689261e-06, "loss": 0.4143, "step": 636 }, { "epoch": 0.011361609531623443, "grad_norm": 0.9108535647392273, "learning_rate": 5.6814127720299685e-06, "loss": 0.3295, "step": 637 }, { "epoch": 0.011379445653337138, "grad_norm": 1.207665205001831, "learning_rate": 5.690331787370674e-06, "loss": 0.3778, "step": 638 }, { "epoch": 0.011397281775050833, "grad_norm": 1.1077829599380493, "learning_rate": 5.699250802711381e-06, "loss": 0.4113, "step": 639 }, { "epoch": 0.011415117896764528, "grad_norm": 1.5911637544631958, "learning_rate": 5.708169818052087e-06, "loss": 0.426, "step": 640 }, { "epoch": 0.011432954018478223, "grad_norm": 1.0696449279785156, "learning_rate": 5.717088833392794e-06, "loss": 0.4117, "step": 641 }, { "epoch": 0.011450790140191916, "grad_norm": 0.9678241014480591, "learning_rate": 5.7260078487335005e-06, "loss": 0.4008, "step": 642 }, { "epoch": 0.011468626261905611, "grad_norm": 1.048011064529419, "learning_rate": 5.734926864074207e-06, "loss": 0.3812, "step": 643 }, { "epoch": 0.011486462383619306, "grad_norm": 1.1569571495056152, "learning_rate": 5.743845879414913e-06, "loss": 0.3869, "step": 644 }, { "epoch": 0.011504298505333, "grad_norm": 1.0135393142700195, "learning_rate": 5.752764894755619e-06, "loss": 0.3841, "step": 645 }, { "epoch": 0.011522134627046696, "grad_norm": 0.8639402389526367, "learning_rate": 5.761683910096326e-06, "loss": 0.3285, "step": 646 }, { "epoch": 0.011539970748760389, "grad_norm": 1.4575084447860718, "learning_rate": 5.7706029254370315e-06, "loss": 0.4309, "step": 647 }, { "epoch": 0.011557806870474084, "grad_norm": 1.0409088134765625, "learning_rate": 5.779521940777739e-06, "loss": 0.4194, "step": 648 }, { "epoch": 0.011575642992187779, "grad_norm": 0.8197188377380371, "learning_rate": 5.788440956118444e-06, "loss": 0.4557, "step": 649 }, { "epoch": 0.011593479113901474, "grad_norm": 0.9931203126907349, "learning_rate": 5.7973599714591515e-06, "loss": 0.3672, "step": 650 }, { "epoch": 0.011611315235615169, "grad_norm": 2.18489933013916, "learning_rate": 5.806278986799858e-06, "loss": 0.4526, "step": 651 }, { "epoch": 0.011629151357328862, "grad_norm": 0.8428823351860046, "learning_rate": 5.815198002140564e-06, "loss": 0.4116, "step": 652 }, { "epoch": 0.011646987479042557, "grad_norm": 0.8353315591812134, "learning_rate": 5.824117017481271e-06, "loss": 0.421, "step": 653 }, { "epoch": 0.011664823600756252, "grad_norm": 1.0589710474014282, "learning_rate": 5.833036032821976e-06, "loss": 0.4442, "step": 654 }, { "epoch": 0.011682659722469946, "grad_norm": 1.1102938652038574, "learning_rate": 5.841955048162683e-06, "loss": 0.3566, "step": 655 }, { "epoch": 0.011700495844183641, "grad_norm": 1.614175796508789, "learning_rate": 5.850874063503389e-06, "loss": 0.4367, "step": 656 }, { "epoch": 0.011718331965897335, "grad_norm": 1.0842281579971313, "learning_rate": 5.859793078844096e-06, "loss": 0.4961, "step": 657 }, { "epoch": 0.01173616808761103, "grad_norm": 0.9871965050697327, "learning_rate": 5.8687120941848025e-06, "loss": 0.4513, "step": 658 }, { "epoch": 0.011754004209324724, "grad_norm": 0.9325709342956543, "learning_rate": 5.877631109525509e-06, "loss": 0.4133, "step": 659 }, { "epoch": 0.01177184033103842, "grad_norm": 0.9642876386642456, "learning_rate": 5.886550124866215e-06, "loss": 0.4521, "step": 660 }, { "epoch": 0.011789676452752114, "grad_norm": 1.7349592447280884, "learning_rate": 5.895469140206922e-06, "loss": 0.4424, "step": 661 }, { "epoch": 0.011807512574465807, "grad_norm": 1.0229498147964478, "learning_rate": 5.904388155547628e-06, "loss": 0.5209, "step": 662 }, { "epoch": 0.011825348696179502, "grad_norm": 0.950793981552124, "learning_rate": 5.913307170888334e-06, "loss": 0.4087, "step": 663 }, { "epoch": 0.011843184817893197, "grad_norm": 1.3746693134307861, "learning_rate": 5.922226186229041e-06, "loss": 0.4052, "step": 664 }, { "epoch": 0.011861020939606892, "grad_norm": 1.0733963251113892, "learning_rate": 5.931145201569746e-06, "loss": 0.4143, "step": 665 }, { "epoch": 0.011878857061320587, "grad_norm": 1.106656551361084, "learning_rate": 5.940064216910454e-06, "loss": 0.4357, "step": 666 }, { "epoch": 0.01189669318303428, "grad_norm": 1.124107003211975, "learning_rate": 5.94898323225116e-06, "loss": 0.4674, "step": 667 }, { "epoch": 0.011914529304747975, "grad_norm": 1.2719773054122925, "learning_rate": 5.957902247591866e-06, "loss": 0.4667, "step": 668 }, { "epoch": 0.01193236542646167, "grad_norm": 1.3913277387619019, "learning_rate": 5.966821262932573e-06, "loss": 0.4545, "step": 669 }, { "epoch": 0.011950201548175365, "grad_norm": 1.3351109027862549, "learning_rate": 5.975740278273279e-06, "loss": 0.4962, "step": 670 }, { "epoch": 0.01196803766988906, "grad_norm": 1.03583824634552, "learning_rate": 5.9846592936139855e-06, "loss": 0.3828, "step": 671 }, { "epoch": 0.011985873791602753, "grad_norm": 1.5020333528518677, "learning_rate": 5.993578308954691e-06, "loss": 0.4187, "step": 672 }, { "epoch": 0.012003709913316448, "grad_norm": 1.5509532690048218, "learning_rate": 6.002497324295398e-06, "loss": 0.3522, "step": 673 }, { "epoch": 0.012021546035030143, "grad_norm": 1.3156205415725708, "learning_rate": 6.011416339636105e-06, "loss": 0.4496, "step": 674 }, { "epoch": 0.012039382156743838, "grad_norm": 1.3814486265182495, "learning_rate": 6.020335354976811e-06, "loss": 0.4037, "step": 675 }, { "epoch": 0.012057218278457533, "grad_norm": 1.0259685516357422, "learning_rate": 6.029254370317517e-06, "loss": 0.3465, "step": 676 }, { "epoch": 0.012075054400171226, "grad_norm": 1.0664241313934326, "learning_rate": 6.038173385658224e-06, "loss": 0.364, "step": 677 }, { "epoch": 0.012092890521884921, "grad_norm": 1.151801347732544, "learning_rate": 6.04709240099893e-06, "loss": 0.3997, "step": 678 }, { "epoch": 0.012110726643598616, "grad_norm": 1.0412648916244507, "learning_rate": 6.0560114163396365e-06, "loss": 0.4186, "step": 679 }, { "epoch": 0.01212856276531231, "grad_norm": 1.2787470817565918, "learning_rate": 6.064930431680343e-06, "loss": 0.4059, "step": 680 }, { "epoch": 0.012146398887026006, "grad_norm": 1.2930010557174683, "learning_rate": 6.0738494470210484e-06, "loss": 0.4146, "step": 681 }, { "epoch": 0.012164235008739699, "grad_norm": 1.3312135934829712, "learning_rate": 6.082768462361756e-06, "loss": 0.4424, "step": 682 }, { "epoch": 0.012182071130453394, "grad_norm": 1.0913116931915283, "learning_rate": 6.091687477702462e-06, "loss": 0.4532, "step": 683 }, { "epoch": 0.012199907252167089, "grad_norm": 2.2016854286193848, "learning_rate": 6.1006064930431684e-06, "loss": 0.419, "step": 684 }, { "epoch": 0.012217743373880784, "grad_norm": 1.046238660812378, "learning_rate": 6.109525508383875e-06, "loss": 0.4202, "step": 685 }, { "epoch": 0.012235579495594479, "grad_norm": 1.246010184288025, "learning_rate": 6.118444523724581e-06, "loss": 0.425, "step": 686 }, { "epoch": 0.012253415617308172, "grad_norm": 1.2753421068191528, "learning_rate": 6.1273635390652876e-06, "loss": 0.3787, "step": 687 }, { "epoch": 0.012271251739021867, "grad_norm": 0.980410099029541, "learning_rate": 6.136282554405994e-06, "loss": 0.418, "step": 688 }, { "epoch": 0.012289087860735562, "grad_norm": 1.0079503059387207, "learning_rate": 6.1452015697467e-06, "loss": 0.4031, "step": 689 }, { "epoch": 0.012306923982449256, "grad_norm": 1.3984042406082153, "learning_rate": 6.154120585087407e-06, "loss": 0.3477, "step": 690 }, { "epoch": 0.012324760104162951, "grad_norm": 1.1816221475601196, "learning_rate": 6.163039600428113e-06, "loss": 0.3699, "step": 691 }, { "epoch": 0.012342596225876645, "grad_norm": 0.9247754812240601, "learning_rate": 6.1719586157688195e-06, "loss": 0.3727, "step": 692 }, { "epoch": 0.01236043234759034, "grad_norm": 0.8835532665252686, "learning_rate": 6.180877631109526e-06, "loss": 0.4442, "step": 693 }, { "epoch": 0.012378268469304034, "grad_norm": 0.9995298981666565, "learning_rate": 6.189796646450232e-06, "loss": 0.4309, "step": 694 }, { "epoch": 0.01239610459101773, "grad_norm": 1.4439276456832886, "learning_rate": 6.198715661790939e-06, "loss": 0.377, "step": 695 }, { "epoch": 0.012413940712731424, "grad_norm": 1.3238965272903442, "learning_rate": 6.207634677131645e-06, "loss": 0.4259, "step": 696 }, { "epoch": 0.012431776834445117, "grad_norm": 1.1488457918167114, "learning_rate": 6.216553692472351e-06, "loss": 0.4529, "step": 697 }, { "epoch": 0.012449612956158812, "grad_norm": 1.1248880624771118, "learning_rate": 6.225472707813058e-06, "loss": 0.3931, "step": 698 }, { "epoch": 0.012467449077872507, "grad_norm": 2.2086665630340576, "learning_rate": 6.234391723153764e-06, "loss": 0.4266, "step": 699 }, { "epoch": 0.012485285199586202, "grad_norm": 1.4757678508758545, "learning_rate": 6.2433107384944705e-06, "loss": 0.422, "step": 700 }, { "epoch": 0.012503121321299897, "grad_norm": 1.495514988899231, "learning_rate": 6.252229753835178e-06, "loss": 0.4223, "step": 701 }, { "epoch": 0.01252095744301359, "grad_norm": 1.1658685207366943, "learning_rate": 6.2611487691758824e-06, "loss": 0.364, "step": 702 }, { "epoch": 0.012538793564727285, "grad_norm": 1.3786637783050537, "learning_rate": 6.27006778451659e-06, "loss": 0.3867, "step": 703 }, { "epoch": 0.01255662968644098, "grad_norm": 1.184095025062561, "learning_rate": 6.278986799857296e-06, "loss": 0.4947, "step": 704 }, { "epoch": 0.012574465808154675, "grad_norm": 1.1303995847702026, "learning_rate": 6.287905815198003e-06, "loss": 0.3957, "step": 705 }, { "epoch": 0.01259230192986837, "grad_norm": 1.4637501239776611, "learning_rate": 6.296824830538709e-06, "loss": 0.4926, "step": 706 }, { "epoch": 0.012610138051582063, "grad_norm": 1.5414247512817383, "learning_rate": 6.305743845879415e-06, "loss": 0.4001, "step": 707 }, { "epoch": 0.012627974173295758, "grad_norm": 1.0155268907546997, "learning_rate": 6.3146628612201216e-06, "loss": 0.4371, "step": 708 }, { "epoch": 0.012645810295009453, "grad_norm": 1.166196584701538, "learning_rate": 6.323581876560827e-06, "loss": 0.4294, "step": 709 }, { "epoch": 0.012663646416723148, "grad_norm": 1.2898826599121094, "learning_rate": 6.332500891901534e-06, "loss": 0.438, "step": 710 }, { "epoch": 0.012681482538436843, "grad_norm": 1.0751768350601196, "learning_rate": 6.341419907242241e-06, "loss": 0.4241, "step": 711 }, { "epoch": 0.012699318660150536, "grad_norm": 0.8998647928237915, "learning_rate": 6.350338922582948e-06, "loss": 0.3414, "step": 712 }, { "epoch": 0.012717154781864231, "grad_norm": 1.0985339879989624, "learning_rate": 6.3592579379236535e-06, "loss": 0.4041, "step": 713 }, { "epoch": 0.012734990903577926, "grad_norm": 1.154080867767334, "learning_rate": 6.36817695326436e-06, "loss": 0.3809, "step": 714 }, { "epoch": 0.01275282702529162, "grad_norm": 1.9730243682861328, "learning_rate": 6.377095968605066e-06, "loss": 0.3941, "step": 715 }, { "epoch": 0.012770663147005316, "grad_norm": 1.3270517587661743, "learning_rate": 6.3860149839457734e-06, "loss": 0.4365, "step": 716 }, { "epoch": 0.012788499268719009, "grad_norm": 1.0822250843048096, "learning_rate": 6.394933999286479e-06, "loss": 0.4683, "step": 717 }, { "epoch": 0.012806335390432704, "grad_norm": 1.1357783079147339, "learning_rate": 6.403853014627185e-06, "loss": 0.4114, "step": 718 }, { "epoch": 0.012824171512146399, "grad_norm": 0.8394474387168884, "learning_rate": 6.412772029967893e-06, "loss": 0.3545, "step": 719 }, { "epoch": 0.012842007633860094, "grad_norm": 1.2799073457717896, "learning_rate": 6.421691045308597e-06, "loss": 0.4275, "step": 720 }, { "epoch": 0.012859843755573789, "grad_norm": 1.0163384675979614, "learning_rate": 6.4306100606493045e-06, "loss": 0.3397, "step": 721 }, { "epoch": 0.012877679877287482, "grad_norm": 1.1710898876190186, "learning_rate": 6.439529075990011e-06, "loss": 0.4035, "step": 722 }, { "epoch": 0.012895515999001177, "grad_norm": 0.9285147786140442, "learning_rate": 6.448448091330718e-06, "loss": 0.4014, "step": 723 }, { "epoch": 0.012913352120714872, "grad_norm": 1.1520582437515259, "learning_rate": 6.457367106671424e-06, "loss": 0.4091, "step": 724 }, { "epoch": 0.012931188242428567, "grad_norm": 2.1584787368774414, "learning_rate": 6.46628612201213e-06, "loss": 0.5232, "step": 725 }, { "epoch": 0.012949024364142261, "grad_norm": 1.363916277885437, "learning_rate": 6.475205137352837e-06, "loss": 0.3715, "step": 726 }, { "epoch": 0.012966860485855955, "grad_norm": 0.9038228988647461, "learning_rate": 6.484124152693542e-06, "loss": 0.4156, "step": 727 }, { "epoch": 0.01298469660756965, "grad_norm": 0.9826972484588623, "learning_rate": 6.493043168034249e-06, "loss": 0.4079, "step": 728 }, { "epoch": 0.013002532729283344, "grad_norm": 0.8310179114341736, "learning_rate": 6.5019621833749555e-06, "loss": 0.3577, "step": 729 }, { "epoch": 0.01302036885099704, "grad_norm": 1.237696647644043, "learning_rate": 6.510881198715663e-06, "loss": 0.4967, "step": 730 }, { "epoch": 0.013038204972710734, "grad_norm": 1.3650190830230713, "learning_rate": 6.519800214056368e-06, "loss": 0.4011, "step": 731 }, { "epoch": 0.01305604109442443, "grad_norm": 1.4350529909133911, "learning_rate": 6.528719229397075e-06, "loss": 0.3749, "step": 732 }, { "epoch": 0.013073877216138122, "grad_norm": 0.9284170269966125, "learning_rate": 6.537638244737782e-06, "loss": 0.416, "step": 733 }, { "epoch": 0.013091713337851817, "grad_norm": 1.1350346803665161, "learning_rate": 6.546557260078488e-06, "loss": 0.4542, "step": 734 }, { "epoch": 0.013109549459565512, "grad_norm": 2.328464984893799, "learning_rate": 6.555476275419194e-06, "loss": 0.3681, "step": 735 }, { "epoch": 0.013127385581279207, "grad_norm": 0.8249056339263916, "learning_rate": 6.5643952907599e-06, "loss": 0.397, "step": 736 }, { "epoch": 0.013145221702992902, "grad_norm": 1.0406147241592407, "learning_rate": 6.5733143061006074e-06, "loss": 0.4341, "step": 737 }, { "epoch": 0.013163057824706595, "grad_norm": 0.7927587628364563, "learning_rate": 6.582233321441313e-06, "loss": 0.3395, "step": 738 }, { "epoch": 0.01318089394642029, "grad_norm": 0.9958735704421997, "learning_rate": 6.591152336782019e-06, "loss": 0.3833, "step": 739 }, { "epoch": 0.013198730068133985, "grad_norm": 0.8341773748397827, "learning_rate": 6.600071352122726e-06, "loss": 0.344, "step": 740 }, { "epoch": 0.01321656618984768, "grad_norm": 1.1658989191055298, "learning_rate": 6.608990367463433e-06, "loss": 0.4142, "step": 741 }, { "epoch": 0.013234402311561375, "grad_norm": 1.5032583475112915, "learning_rate": 6.6179093828041385e-06, "loss": 0.4346, "step": 742 }, { "epoch": 0.013252238433275068, "grad_norm": 1.3449758291244507, "learning_rate": 6.626828398144845e-06, "loss": 0.4578, "step": 743 }, { "epoch": 0.013270074554988763, "grad_norm": 0.9802781343460083, "learning_rate": 6.635747413485552e-06, "loss": 0.3982, "step": 744 }, { "epoch": 0.013287910676702458, "grad_norm": 1.0957345962524414, "learning_rate": 6.644666428826258e-06, "loss": 0.3745, "step": 745 }, { "epoch": 0.013305746798416153, "grad_norm": 1.1497217416763306, "learning_rate": 6.653585444166964e-06, "loss": 0.4154, "step": 746 }, { "epoch": 0.013323582920129848, "grad_norm": 1.134560465812683, "learning_rate": 6.66250445950767e-06, "loss": 0.4289, "step": 747 }, { "epoch": 0.013341419041843541, "grad_norm": 1.1097346544265747, "learning_rate": 6.671423474848378e-06, "loss": 0.3922, "step": 748 }, { "epoch": 0.013359255163557236, "grad_norm": 1.1429896354675293, "learning_rate": 6.680342490189083e-06, "loss": 0.3488, "step": 749 }, { "epoch": 0.01337709128527093, "grad_norm": 3.192944288253784, "learning_rate": 6.6892615055297895e-06, "loss": 0.4736, "step": 750 }, { "epoch": 0.013394927406984626, "grad_norm": 0.9527766108512878, "learning_rate": 6.698180520870497e-06, "loss": 0.4484, "step": 751 }, { "epoch": 0.01341276352869832, "grad_norm": 0.9491976499557495, "learning_rate": 6.707099536211203e-06, "loss": 0.4781, "step": 752 }, { "epoch": 0.013430599650412014, "grad_norm": 1.2711595296859741, "learning_rate": 6.716018551551909e-06, "loss": 0.4414, "step": 753 }, { "epoch": 0.013448435772125709, "grad_norm": 0.9956739544868469, "learning_rate": 6.724937566892615e-06, "loss": 0.3846, "step": 754 }, { "epoch": 0.013466271893839404, "grad_norm": 1.4637160301208496, "learning_rate": 6.733856582233322e-06, "loss": 0.4572, "step": 755 }, { "epoch": 0.013484108015553099, "grad_norm": 1.0149110555648804, "learning_rate": 6.742775597574028e-06, "loss": 0.365, "step": 756 }, { "epoch": 0.013501944137266794, "grad_norm": 0.9533802270889282, "learning_rate": 6.751694612914734e-06, "loss": 0.4999, "step": 757 }, { "epoch": 0.013519780258980487, "grad_norm": 0.9060002565383911, "learning_rate": 6.7606136282554414e-06, "loss": 0.4258, "step": 758 }, { "epoch": 0.013537616380694182, "grad_norm": 1.1750162839889526, "learning_rate": 6.769532643596148e-06, "loss": 0.3791, "step": 759 }, { "epoch": 0.013555452502407877, "grad_norm": 0.9837682247161865, "learning_rate": 6.778451658936853e-06, "loss": 0.392, "step": 760 }, { "epoch": 0.013573288624121571, "grad_norm": 1.0854389667510986, "learning_rate": 6.78737067427756e-06, "loss": 0.3655, "step": 761 }, { "epoch": 0.013591124745835266, "grad_norm": 1.6050208806991577, "learning_rate": 6.796289689618267e-06, "loss": 0.4137, "step": 762 }, { "epoch": 0.01360896086754896, "grad_norm": 0.9369100332260132, "learning_rate": 6.8052087049589725e-06, "loss": 0.4084, "step": 763 }, { "epoch": 0.013626796989262654, "grad_norm": 0.9988794326782227, "learning_rate": 6.814127720299679e-06, "loss": 0.3253, "step": 764 }, { "epoch": 0.01364463311097635, "grad_norm": 1.476580262184143, "learning_rate": 6.823046735640386e-06, "loss": 0.5194, "step": 765 }, { "epoch": 0.013662469232690044, "grad_norm": 1.4281305074691772, "learning_rate": 6.8319657509810925e-06, "loss": 0.5293, "step": 766 }, { "epoch": 0.01368030535440374, "grad_norm": 1.12155282497406, "learning_rate": 6.840884766321798e-06, "loss": 0.4609, "step": 767 }, { "epoch": 0.013698141476117432, "grad_norm": 1.1472570896148682, "learning_rate": 6.849803781662504e-06, "loss": 0.4228, "step": 768 }, { "epoch": 0.013715977597831127, "grad_norm": 1.2671374082565308, "learning_rate": 6.858722797003212e-06, "loss": 0.4665, "step": 769 }, { "epoch": 0.013733813719544822, "grad_norm": 0.9118152856826782, "learning_rate": 6.867641812343918e-06, "loss": 0.4147, "step": 770 }, { "epoch": 0.013751649841258517, "grad_norm": 1.4787702560424805, "learning_rate": 6.8765608276846235e-06, "loss": 0.4077, "step": 771 }, { "epoch": 0.013769485962972212, "grad_norm": 0.890640914440155, "learning_rate": 6.885479843025331e-06, "loss": 0.3717, "step": 772 }, { "epoch": 0.013787322084685905, "grad_norm": 1.2971957921981812, "learning_rate": 6.894398858366037e-06, "loss": 0.4762, "step": 773 }, { "epoch": 0.0138051582063996, "grad_norm": 1.8639717102050781, "learning_rate": 6.903317873706743e-06, "loss": 0.4415, "step": 774 }, { "epoch": 0.013822994328113295, "grad_norm": 0.9721428751945496, "learning_rate": 6.912236889047449e-06, "loss": 0.3898, "step": 775 }, { "epoch": 0.01384083044982699, "grad_norm": 1.3769142627716064, "learning_rate": 6.921155904388156e-06, "loss": 0.4044, "step": 776 }, { "epoch": 0.013858666571540685, "grad_norm": 4.9716973304748535, "learning_rate": 6.930074919728863e-06, "loss": 0.4096, "step": 777 }, { "epoch": 0.013876502693254378, "grad_norm": 0.7622309923171997, "learning_rate": 6.938993935069568e-06, "loss": 0.3708, "step": 778 }, { "epoch": 0.013894338814968073, "grad_norm": 1.4392539262771606, "learning_rate": 6.9479129504102746e-06, "loss": 0.39, "step": 779 }, { "epoch": 0.013912174936681768, "grad_norm": 0.7795321941375732, "learning_rate": 6.956831965750982e-06, "loss": 0.3472, "step": 780 }, { "epoch": 0.013930011058395463, "grad_norm": 1.8850880861282349, "learning_rate": 6.965750981091688e-06, "loss": 0.4258, "step": 781 }, { "epoch": 0.013947847180109158, "grad_norm": 1.548249363899231, "learning_rate": 6.974669996432394e-06, "loss": 0.5222, "step": 782 }, { "epoch": 0.013965683301822851, "grad_norm": 1.331453800201416, "learning_rate": 6.983589011773101e-06, "loss": 0.3634, "step": 783 }, { "epoch": 0.013983519423536546, "grad_norm": 1.0235885381698608, "learning_rate": 6.992508027113807e-06, "loss": 0.4164, "step": 784 }, { "epoch": 0.01400135554525024, "grad_norm": 1.1273366212844849, "learning_rate": 7.001427042454513e-06, "loss": 0.3802, "step": 785 }, { "epoch": 0.014019191666963936, "grad_norm": 1.1602329015731812, "learning_rate": 7.010346057795219e-06, "loss": 0.4367, "step": 786 }, { "epoch": 0.01403702778867763, "grad_norm": 0.9095342755317688, "learning_rate": 7.0192650731359265e-06, "loss": 0.4024, "step": 787 }, { "epoch": 0.014054863910391324, "grad_norm": 1.5537116527557373, "learning_rate": 7.028184088476633e-06, "loss": 0.3809, "step": 788 }, { "epoch": 0.014072700032105019, "grad_norm": 1.4078360795974731, "learning_rate": 7.037103103817338e-06, "loss": 0.4279, "step": 789 }, { "epoch": 0.014090536153818714, "grad_norm": 1.0890816450119019, "learning_rate": 7.046022119158046e-06, "loss": 0.3316, "step": 790 }, { "epoch": 0.014108372275532409, "grad_norm": 1.2133907079696655, "learning_rate": 7.054941134498752e-06, "loss": 0.3913, "step": 791 }, { "epoch": 0.014126208397246104, "grad_norm": 0.9176425337791443, "learning_rate": 7.0638601498394575e-06, "loss": 0.3924, "step": 792 }, { "epoch": 0.014144044518959797, "grad_norm": 0.8917650580406189, "learning_rate": 7.072779165180164e-06, "loss": 0.3654, "step": 793 }, { "epoch": 0.014161880640673492, "grad_norm": 1.9989323616027832, "learning_rate": 7.081698180520871e-06, "loss": 0.5011, "step": 794 }, { "epoch": 0.014179716762387187, "grad_norm": 2.3614730834960938, "learning_rate": 7.0906171958615775e-06, "loss": 0.4123, "step": 795 }, { "epoch": 0.014197552884100881, "grad_norm": 1.1702730655670166, "learning_rate": 7.099536211202283e-06, "loss": 0.3909, "step": 796 }, { "epoch": 0.014215389005814576, "grad_norm": 0.99686199426651, "learning_rate": 7.10845522654299e-06, "loss": 0.3776, "step": 797 }, { "epoch": 0.01423322512752827, "grad_norm": 1.4835487604141235, "learning_rate": 7.117374241883697e-06, "loss": 0.3679, "step": 798 }, { "epoch": 0.014251061249241965, "grad_norm": 0.9821210503578186, "learning_rate": 7.126293257224403e-06, "loss": 0.3408, "step": 799 }, { "epoch": 0.01426889737095566, "grad_norm": 1.773571252822876, "learning_rate": 7.1352122725651086e-06, "loss": 0.3431, "step": 800 }, { "epoch": 0.014286733492669354, "grad_norm": 1.0360254049301147, "learning_rate": 7.144131287905816e-06, "loss": 0.3545, "step": 801 }, { "epoch": 0.01430456961438305, "grad_norm": 2.319758892059326, "learning_rate": 7.153050303246522e-06, "loss": 0.3851, "step": 802 }, { "epoch": 0.014322405736096742, "grad_norm": 0.7496076226234436, "learning_rate": 7.161969318587228e-06, "loss": 0.344, "step": 803 }, { "epoch": 0.014340241857810437, "grad_norm": 0.831169068813324, "learning_rate": 7.170888333927935e-06, "loss": 0.3482, "step": 804 }, { "epoch": 0.014358077979524132, "grad_norm": 0.9808482527732849, "learning_rate": 7.179807349268641e-06, "loss": 0.4026, "step": 805 }, { "epoch": 0.014375914101237827, "grad_norm": 0.9589486122131348, "learning_rate": 7.188726364609348e-06, "loss": 0.4512, "step": 806 }, { "epoch": 0.014393750222951522, "grad_norm": 1.0854859352111816, "learning_rate": 7.197645379950053e-06, "loss": 0.3563, "step": 807 }, { "epoch": 0.014411586344665215, "grad_norm": 0.8584607839584351, "learning_rate": 7.2065643952907604e-06, "loss": 0.3587, "step": 808 }, { "epoch": 0.01442942246637891, "grad_norm": 1.3820878267288208, "learning_rate": 7.215483410631467e-06, "loss": 0.3984, "step": 809 }, { "epoch": 0.014447258588092605, "grad_norm": 0.9056740403175354, "learning_rate": 7.224402425972172e-06, "loss": 0.3696, "step": 810 }, { "epoch": 0.0144650947098063, "grad_norm": 0.830923855304718, "learning_rate": 7.233321441312879e-06, "loss": 0.4189, "step": 811 }, { "epoch": 0.014482930831519995, "grad_norm": 0.7032369375228882, "learning_rate": 7.242240456653586e-06, "loss": 0.358, "step": 812 }, { "epoch": 0.014500766953233688, "grad_norm": 1.0487927198410034, "learning_rate": 7.251159471994292e-06, "loss": 0.3414, "step": 813 }, { "epoch": 0.014518603074947383, "grad_norm": 1.0935640335083008, "learning_rate": 7.260078487334998e-06, "loss": 0.3727, "step": 814 }, { "epoch": 0.014536439196661078, "grad_norm": 1.4685693979263306, "learning_rate": 7.268997502675705e-06, "loss": 0.4056, "step": 815 }, { "epoch": 0.014554275318374773, "grad_norm": 1.1004257202148438, "learning_rate": 7.2779165180164115e-06, "loss": 0.4911, "step": 816 }, { "epoch": 0.014572111440088468, "grad_norm": 0.8749274015426636, "learning_rate": 7.286835533357119e-06, "loss": 0.3947, "step": 817 }, { "epoch": 0.014589947561802161, "grad_norm": 1.633952021598816, "learning_rate": 7.295754548697823e-06, "loss": 0.4308, "step": 818 }, { "epoch": 0.014607783683515856, "grad_norm": 0.7927972674369812, "learning_rate": 7.304673564038531e-06, "loss": 0.3455, "step": 819 }, { "epoch": 0.01462561980522955, "grad_norm": 2.7083334922790527, "learning_rate": 7.313592579379237e-06, "loss": 0.372, "step": 820 }, { "epoch": 0.014643455926943246, "grad_norm": 1.5031018257141113, "learning_rate": 7.3225115947199425e-06, "loss": 0.4165, "step": 821 }, { "epoch": 0.01466129204865694, "grad_norm": 1.1417412757873535, "learning_rate": 7.33143061006065e-06, "loss": 0.3365, "step": 822 }, { "epoch": 0.014679128170370634, "grad_norm": 1.0912084579467773, "learning_rate": 7.340349625401356e-06, "loss": 0.4463, "step": 823 }, { "epoch": 0.014696964292084329, "grad_norm": 1.1397372484207153, "learning_rate": 7.349268640742063e-06, "loss": 0.3502, "step": 824 }, { "epoch": 0.014714800413798024, "grad_norm": 1.1142958402633667, "learning_rate": 7.358187656082768e-06, "loss": 0.4658, "step": 825 }, { "epoch": 0.014732636535511719, "grad_norm": 1.18959379196167, "learning_rate": 7.367106671423475e-06, "loss": 0.3681, "step": 826 }, { "epoch": 0.014750472657225414, "grad_norm": 1.7012782096862793, "learning_rate": 7.376025686764182e-06, "loss": 0.4448, "step": 827 }, { "epoch": 0.014768308778939107, "grad_norm": 0.7677625417709351, "learning_rate": 7.384944702104887e-06, "loss": 0.3694, "step": 828 }, { "epoch": 0.014786144900652802, "grad_norm": 0.9499193429946899, "learning_rate": 7.3938637174455944e-06, "loss": 0.3595, "step": 829 }, { "epoch": 0.014803981022366497, "grad_norm": 1.0089343786239624, "learning_rate": 7.402782732786301e-06, "loss": 0.5221, "step": 830 }, { "epoch": 0.014821817144080192, "grad_norm": 1.5784305334091187, "learning_rate": 7.411701748127008e-06, "loss": 0.3789, "step": 831 }, { "epoch": 0.014839653265793886, "grad_norm": 1.0147290229797363, "learning_rate": 7.420620763467713e-06, "loss": 0.4003, "step": 832 }, { "epoch": 0.01485748938750758, "grad_norm": 0.8705704212188721, "learning_rate": 7.42953977880842e-06, "loss": 0.3909, "step": 833 }, { "epoch": 0.014875325509221275, "grad_norm": 1.50275719165802, "learning_rate": 7.438458794149126e-06, "loss": 0.4745, "step": 834 }, { "epoch": 0.01489316163093497, "grad_norm": 1.1221299171447754, "learning_rate": 7.4473778094898336e-06, "loss": 0.419, "step": 835 }, { "epoch": 0.014910997752648664, "grad_norm": 1.7666137218475342, "learning_rate": 7.456296824830539e-06, "loss": 0.4896, "step": 836 }, { "epoch": 0.01492883387436236, "grad_norm": 1.1369482278823853, "learning_rate": 7.4652158401712455e-06, "loss": 0.4144, "step": 837 }, { "epoch": 0.014946669996076052, "grad_norm": 0.9525813460350037, "learning_rate": 7.474134855511952e-06, "loss": 0.4688, "step": 838 }, { "epoch": 0.014964506117789747, "grad_norm": 0.9223721623420715, "learning_rate": 7.483053870852657e-06, "loss": 0.4354, "step": 839 }, { "epoch": 0.014982342239503442, "grad_norm": 0.9912000298500061, "learning_rate": 7.491972886193365e-06, "loss": 0.3508, "step": 840 }, { "epoch": 0.015000178361217137, "grad_norm": 1.470048427581787, "learning_rate": 7.500891901534071e-06, "loss": 0.3996, "step": 841 }, { "epoch": 0.015018014482930832, "grad_norm": 0.8923424482345581, "learning_rate": 7.509810916874778e-06, "loss": 0.3814, "step": 842 }, { "epoch": 0.015035850604644525, "grad_norm": 1.0893371105194092, "learning_rate": 7.518729932215483e-06, "loss": 0.3633, "step": 843 }, { "epoch": 0.01505368672635822, "grad_norm": 1.2783355712890625, "learning_rate": 7.52764894755619e-06, "loss": 0.3868, "step": 844 }, { "epoch": 0.015071522848071915, "grad_norm": 1.3496581315994263, "learning_rate": 7.5365679628968965e-06, "loss": 0.4137, "step": 845 }, { "epoch": 0.01508935896978561, "grad_norm": 0.757161557674408, "learning_rate": 7.545486978237602e-06, "loss": 0.3529, "step": 846 }, { "epoch": 0.015107195091499305, "grad_norm": 1.3636268377304077, "learning_rate": 7.554405993578309e-06, "loss": 0.3748, "step": 847 }, { "epoch": 0.015125031213212998, "grad_norm": 0.9378393888473511, "learning_rate": 7.563325008919016e-06, "loss": 0.3729, "step": 848 }, { "epoch": 0.015142867334926693, "grad_norm": 0.8216644525527954, "learning_rate": 7.572244024259723e-06, "loss": 0.3436, "step": 849 }, { "epoch": 0.015160703456640388, "grad_norm": 0.9007389545440674, "learning_rate": 7.5811630396004276e-06, "loss": 0.37, "step": 850 }, { "epoch": 0.015178539578354083, "grad_norm": 1.465600848197937, "learning_rate": 7.590082054941135e-06, "loss": 0.3756, "step": 851 }, { "epoch": 0.015196375700067778, "grad_norm": 1.0671945810317993, "learning_rate": 7.599001070281841e-06, "loss": 0.3238, "step": 852 }, { "epoch": 0.015214211821781471, "grad_norm": 0.9975453019142151, "learning_rate": 7.607920085622548e-06, "loss": 0.3849, "step": 853 }, { "epoch": 0.015232047943495166, "grad_norm": 1.298552393913269, "learning_rate": 7.616839100963254e-06, "loss": 0.4651, "step": 854 }, { "epoch": 0.015249884065208861, "grad_norm": 1.0898548364639282, "learning_rate": 7.62575811630396e-06, "loss": 0.4237, "step": 855 }, { "epoch": 0.015267720186922556, "grad_norm": 1.3157875537872314, "learning_rate": 7.634677131644667e-06, "loss": 0.3605, "step": 856 }, { "epoch": 0.01528555630863625, "grad_norm": 1.0036447048187256, "learning_rate": 7.643596146985373e-06, "loss": 0.3343, "step": 857 }, { "epoch": 0.015303392430349944, "grad_norm": 1.3900219202041626, "learning_rate": 7.65251516232608e-06, "loss": 0.3951, "step": 858 }, { "epoch": 0.015321228552063639, "grad_norm": 0.8167012333869934, "learning_rate": 7.661434177666786e-06, "loss": 0.3923, "step": 859 }, { "epoch": 0.015339064673777334, "grad_norm": 1.1097334623336792, "learning_rate": 7.670353193007492e-06, "loss": 0.3583, "step": 860 }, { "epoch": 0.015356900795491029, "grad_norm": 0.9676361083984375, "learning_rate": 7.679272208348199e-06, "loss": 0.339, "step": 861 }, { "epoch": 0.015374736917204724, "grad_norm": 0.8648974895477295, "learning_rate": 7.688191223688905e-06, "loss": 0.3853, "step": 862 }, { "epoch": 0.015392573038918417, "grad_norm": 1.5109702348709106, "learning_rate": 7.697110239029611e-06, "loss": 0.3522, "step": 863 }, { "epoch": 0.015410409160632112, "grad_norm": 1.46962308883667, "learning_rate": 7.706029254370318e-06, "loss": 0.4402, "step": 864 }, { "epoch": 0.015428245282345807, "grad_norm": 0.7502992153167725, "learning_rate": 7.714948269711024e-06, "loss": 0.3603, "step": 865 }, { "epoch": 0.015446081404059502, "grad_norm": 0.9077794551849365, "learning_rate": 7.72386728505173e-06, "loss": 0.3762, "step": 866 }, { "epoch": 0.015463917525773196, "grad_norm": 1.4740957021713257, "learning_rate": 7.732786300392437e-06, "loss": 0.3627, "step": 867 }, { "epoch": 0.01548175364748689, "grad_norm": 0.938251256942749, "learning_rate": 7.741705315733143e-06, "loss": 0.4625, "step": 868 }, { "epoch": 0.015499589769200585, "grad_norm": 0.9048126339912415, "learning_rate": 7.75062433107385e-06, "loss": 0.3593, "step": 869 }, { "epoch": 0.01551742589091428, "grad_norm": 0.9404629468917847, "learning_rate": 7.759543346414556e-06, "loss": 0.4342, "step": 870 }, { "epoch": 0.015535262012627974, "grad_norm": 1.225550651550293, "learning_rate": 7.768462361755262e-06, "loss": 0.4465, "step": 871 }, { "epoch": 0.01555309813434167, "grad_norm": 2.438214063644409, "learning_rate": 7.777381377095969e-06, "loss": 0.3859, "step": 872 }, { "epoch": 0.015570934256055362, "grad_norm": 0.8626007437705994, "learning_rate": 7.786300392436675e-06, "loss": 0.5049, "step": 873 }, { "epoch": 0.015588770377769057, "grad_norm": 0.9151098728179932, "learning_rate": 7.795219407777382e-06, "loss": 0.4445, "step": 874 }, { "epoch": 0.015606606499482752, "grad_norm": 1.3421666622161865, "learning_rate": 7.804138423118088e-06, "loss": 0.4097, "step": 875 }, { "epoch": 0.015624442621196447, "grad_norm": 1.99418044090271, "learning_rate": 7.813057438458794e-06, "loss": 0.4548, "step": 876 }, { "epoch": 0.015642278742910142, "grad_norm": 1.5321028232574463, "learning_rate": 7.8219764537995e-06, "loss": 0.3853, "step": 877 }, { "epoch": 0.015660114864623837, "grad_norm": 0.7856518030166626, "learning_rate": 7.830895469140207e-06, "loss": 0.3634, "step": 878 }, { "epoch": 0.015677950986337532, "grad_norm": 1.006925106048584, "learning_rate": 7.839814484480913e-06, "loss": 0.4125, "step": 879 }, { "epoch": 0.015695787108051227, "grad_norm": 2.1221554279327393, "learning_rate": 7.84873349982162e-06, "loss": 0.415, "step": 880 }, { "epoch": 0.01571362322976492, "grad_norm": 0.8153918981552124, "learning_rate": 7.857652515162326e-06, "loss": 0.4215, "step": 881 }, { "epoch": 0.015731459351478613, "grad_norm": 1.1103639602661133, "learning_rate": 7.866571530503034e-06, "loss": 0.4071, "step": 882 }, { "epoch": 0.015749295473192308, "grad_norm": 1.2810118198394775, "learning_rate": 7.875490545843739e-06, "loss": 0.3632, "step": 883 }, { "epoch": 0.015767131594906003, "grad_norm": 0.8250142931938171, "learning_rate": 7.884409561184445e-06, "loss": 0.3739, "step": 884 }, { "epoch": 0.015784967716619698, "grad_norm": 1.2615044116973877, "learning_rate": 7.893328576525152e-06, "loss": 0.4946, "step": 885 }, { "epoch": 0.015802803838333393, "grad_norm": 0.9218889474868774, "learning_rate": 7.902247591865858e-06, "loss": 0.3791, "step": 886 }, { "epoch": 0.015820639960047088, "grad_norm": 1.1563516855239868, "learning_rate": 7.911166607206564e-06, "loss": 0.3406, "step": 887 }, { "epoch": 0.015838476081760783, "grad_norm": 1.1569664478302002, "learning_rate": 7.920085622547271e-06, "loss": 0.3709, "step": 888 }, { "epoch": 0.015856312203474478, "grad_norm": 0.928895115852356, "learning_rate": 7.929004637887979e-06, "loss": 0.4082, "step": 889 }, { "epoch": 0.015874148325188173, "grad_norm": 1.0746430158615112, "learning_rate": 7.937923653228684e-06, "loss": 0.3343, "step": 890 }, { "epoch": 0.015891984446901864, "grad_norm": 0.9965145587921143, "learning_rate": 7.94684266856939e-06, "loss": 0.3528, "step": 891 }, { "epoch": 0.01590982056861556, "grad_norm": 1.1421681642532349, "learning_rate": 7.955761683910096e-06, "loss": 0.4042, "step": 892 }, { "epoch": 0.015927656690329254, "grad_norm": 1.7538610696792603, "learning_rate": 7.964680699250803e-06, "loss": 0.4018, "step": 893 }, { "epoch": 0.01594549281204295, "grad_norm": 1.7577452659606934, "learning_rate": 7.97359971459151e-06, "loss": 0.3575, "step": 894 }, { "epoch": 0.015963328933756644, "grad_norm": 0.785850465297699, "learning_rate": 7.982518729932216e-06, "loss": 0.4608, "step": 895 }, { "epoch": 0.01598116505547034, "grad_norm": 0.99677973985672, "learning_rate": 7.991437745272924e-06, "loss": 0.357, "step": 896 }, { "epoch": 0.015999001177184034, "grad_norm": 1.9706685543060303, "learning_rate": 8.000356760613628e-06, "loss": 0.3966, "step": 897 }, { "epoch": 0.01601683729889773, "grad_norm": 0.6862720251083374, "learning_rate": 8.009275775954335e-06, "loss": 0.3992, "step": 898 }, { "epoch": 0.016034673420611423, "grad_norm": 1.010366678237915, "learning_rate": 8.018194791295041e-06, "loss": 0.3913, "step": 899 }, { "epoch": 0.01605250954232512, "grad_norm": 1.3852301836013794, "learning_rate": 8.027113806635749e-06, "loss": 0.4702, "step": 900 }, { "epoch": 0.01607034566403881, "grad_norm": 1.0316989421844482, "learning_rate": 8.036032821976454e-06, "loss": 0.4703, "step": 901 }, { "epoch": 0.016088181785752505, "grad_norm": 0.9395390748977661, "learning_rate": 8.04495183731716e-06, "loss": 0.3565, "step": 902 }, { "epoch": 0.0161060179074662, "grad_norm": 0.8489232659339905, "learning_rate": 8.053870852657867e-06, "loss": 0.4159, "step": 903 }, { "epoch": 0.016123854029179895, "grad_norm": 0.9959050416946411, "learning_rate": 8.062789867998573e-06, "loss": 0.4334, "step": 904 }, { "epoch": 0.01614169015089359, "grad_norm": 1.297946810722351, "learning_rate": 8.07170888333928e-06, "loss": 0.4919, "step": 905 }, { "epoch": 0.016159526272607284, "grad_norm": 1.3732280731201172, "learning_rate": 8.080627898679986e-06, "loss": 0.3933, "step": 906 }, { "epoch": 0.01617736239432098, "grad_norm": 0.8733242750167847, "learning_rate": 8.089546914020694e-06, "loss": 0.3343, "step": 907 }, { "epoch": 0.016195198516034674, "grad_norm": 1.3466911315917969, "learning_rate": 8.098465929361398e-06, "loss": 0.3641, "step": 908 }, { "epoch": 0.01621303463774837, "grad_norm": 1.0537189245224, "learning_rate": 8.107384944702105e-06, "loss": 0.3967, "step": 909 }, { "epoch": 0.016230870759462064, "grad_norm": 1.1082985401153564, "learning_rate": 8.116303960042811e-06, "loss": 0.4435, "step": 910 }, { "epoch": 0.016248706881175756, "grad_norm": 0.7933439612388611, "learning_rate": 8.125222975383518e-06, "loss": 0.3795, "step": 911 }, { "epoch": 0.01626654300288945, "grad_norm": 1.3988990783691406, "learning_rate": 8.134141990724224e-06, "loss": 0.4147, "step": 912 }, { "epoch": 0.016284379124603145, "grad_norm": 0.6847274303436279, "learning_rate": 8.14306100606493e-06, "loss": 0.3548, "step": 913 }, { "epoch": 0.01630221524631684, "grad_norm": 1.28653883934021, "learning_rate": 8.151980021405638e-06, "loss": 0.4874, "step": 914 }, { "epoch": 0.016320051368030535, "grad_norm": 0.8804856538772583, "learning_rate": 8.160899036746343e-06, "loss": 0.4444, "step": 915 }, { "epoch": 0.01633788748974423, "grad_norm": 0.8684842586517334, "learning_rate": 8.16981805208705e-06, "loss": 0.3481, "step": 916 }, { "epoch": 0.016355723611457925, "grad_norm": 0.8577004671096802, "learning_rate": 8.178737067427756e-06, "loss": 0.3945, "step": 917 }, { "epoch": 0.01637355973317162, "grad_norm": 1.2012475728988647, "learning_rate": 8.187656082768464e-06, "loss": 0.4704, "step": 918 }, { "epoch": 0.016391395854885315, "grad_norm": 0.8696227669715881, "learning_rate": 8.196575098109169e-06, "loss": 0.343, "step": 919 }, { "epoch": 0.01640923197659901, "grad_norm": 1.07411527633667, "learning_rate": 8.205494113449875e-06, "loss": 0.411, "step": 920 }, { "epoch": 0.0164270680983127, "grad_norm": 0.7849026322364807, "learning_rate": 8.214413128790583e-06, "loss": 0.3985, "step": 921 }, { "epoch": 0.016444904220026396, "grad_norm": 1.356942057609558, "learning_rate": 8.223332144131288e-06, "loss": 0.3756, "step": 922 }, { "epoch": 0.01646274034174009, "grad_norm": 0.95868319272995, "learning_rate": 8.232251159471994e-06, "loss": 0.4238, "step": 923 }, { "epoch": 0.016480576463453786, "grad_norm": 0.9421398043632507, "learning_rate": 8.2411701748127e-06, "loss": 0.3874, "step": 924 }, { "epoch": 0.01649841258516748, "grad_norm": 1.05433189868927, "learning_rate": 8.250089190153409e-06, "loss": 0.4118, "step": 925 }, { "epoch": 0.016516248706881176, "grad_norm": 2.117459774017334, "learning_rate": 8.259008205494113e-06, "loss": 0.3496, "step": 926 }, { "epoch": 0.01653408482859487, "grad_norm": 1.3546380996704102, "learning_rate": 8.26792722083482e-06, "loss": 0.4632, "step": 927 }, { "epoch": 0.016551920950308566, "grad_norm": 0.9243427515029907, "learning_rate": 8.276846236175528e-06, "loss": 0.3983, "step": 928 }, { "epoch": 0.01656975707202226, "grad_norm": 0.9842472076416016, "learning_rate": 8.285765251516232e-06, "loss": 0.4043, "step": 929 }, { "epoch": 0.016587593193735956, "grad_norm": 0.8520750999450684, "learning_rate": 8.294684266856939e-06, "loss": 0.386, "step": 930 }, { "epoch": 0.016605429315449647, "grad_norm": 1.1591039896011353, "learning_rate": 8.303603282197645e-06, "loss": 0.4513, "step": 931 }, { "epoch": 0.016623265437163342, "grad_norm": 1.1286969184875488, "learning_rate": 8.312522297538353e-06, "loss": 0.358, "step": 932 }, { "epoch": 0.016641101558877037, "grad_norm": 0.8448970913887024, "learning_rate": 8.321441312879058e-06, "loss": 0.3736, "step": 933 }, { "epoch": 0.016658937680590732, "grad_norm": 1.1036772727966309, "learning_rate": 8.330360328219764e-06, "loss": 0.378, "step": 934 }, { "epoch": 0.016676773802304427, "grad_norm": 1.2469936609268188, "learning_rate": 8.33927934356047e-06, "loss": 0.3758, "step": 935 }, { "epoch": 0.01669460992401812, "grad_norm": 1.0058586597442627, "learning_rate": 8.348198358901179e-06, "loss": 0.3772, "step": 936 }, { "epoch": 0.016712446045731816, "grad_norm": 1.0551007986068726, "learning_rate": 8.357117374241884e-06, "loss": 0.3603, "step": 937 }, { "epoch": 0.01673028216744551, "grad_norm": 1.0873944759368896, "learning_rate": 8.36603638958259e-06, "loss": 0.3745, "step": 938 }, { "epoch": 0.016748118289159206, "grad_norm": 1.276474952697754, "learning_rate": 8.374955404923298e-06, "loss": 0.3629, "step": 939 }, { "epoch": 0.0167659544108729, "grad_norm": 0.892585277557373, "learning_rate": 8.383874420264003e-06, "loss": 0.409, "step": 940 }, { "epoch": 0.016783790532586593, "grad_norm": 1.0321881771087646, "learning_rate": 8.392793435604709e-06, "loss": 0.332, "step": 941 }, { "epoch": 0.016801626654300288, "grad_norm": 1.2248462438583374, "learning_rate": 8.401712450945415e-06, "loss": 0.3936, "step": 942 }, { "epoch": 0.016819462776013983, "grad_norm": 1.0450223684310913, "learning_rate": 8.410631466286123e-06, "loss": 0.3439, "step": 943 }, { "epoch": 0.016837298897727677, "grad_norm": 2.632742166519165, "learning_rate": 8.419550481626828e-06, "loss": 0.3911, "step": 944 }, { "epoch": 0.016855135019441372, "grad_norm": 0.907788872718811, "learning_rate": 8.428469496967535e-06, "loss": 0.3474, "step": 945 }, { "epoch": 0.016872971141155067, "grad_norm": 0.9532281756401062, "learning_rate": 8.437388512308243e-06, "loss": 0.4082, "step": 946 }, { "epoch": 0.016890807262868762, "grad_norm": 0.7200486063957214, "learning_rate": 8.446307527648947e-06, "loss": 0.3763, "step": 947 }, { "epoch": 0.016908643384582457, "grad_norm": 0.8066374659538269, "learning_rate": 8.455226542989654e-06, "loss": 0.4267, "step": 948 }, { "epoch": 0.016926479506296152, "grad_norm": 0.9796708822250366, "learning_rate": 8.46414555833036e-06, "loss": 0.4264, "step": 949 }, { "epoch": 0.016944315628009847, "grad_norm": 1.1514612436294556, "learning_rate": 8.473064573671068e-06, "loss": 0.3832, "step": 950 }, { "epoch": 0.01696215174972354, "grad_norm": 1.1260454654693604, "learning_rate": 8.481983589011773e-06, "loss": 0.3337, "step": 951 }, { "epoch": 0.016979987871437233, "grad_norm": 1.0035359859466553, "learning_rate": 8.49090260435248e-06, "loss": 0.4341, "step": 952 }, { "epoch": 0.016997823993150928, "grad_norm": 0.8457959890365601, "learning_rate": 8.499821619693187e-06, "loss": 0.4132, "step": 953 }, { "epoch": 0.017015660114864623, "grad_norm": 1.1532012224197388, "learning_rate": 8.508740635033894e-06, "loss": 0.3535, "step": 954 }, { "epoch": 0.017033496236578318, "grad_norm": 0.841249942779541, "learning_rate": 8.517659650374598e-06, "loss": 0.3795, "step": 955 }, { "epoch": 0.017051332358292013, "grad_norm": 0.74698805809021, "learning_rate": 8.526578665715305e-06, "loss": 0.4153, "step": 956 }, { "epoch": 0.017069168480005708, "grad_norm": 1.704779028892517, "learning_rate": 8.535497681056013e-06, "loss": 0.4107, "step": 957 }, { "epoch": 0.017087004601719403, "grad_norm": 1.5854723453521729, "learning_rate": 8.544416696396718e-06, "loss": 0.3867, "step": 958 }, { "epoch": 0.017104840723433098, "grad_norm": 2.241612672805786, "learning_rate": 8.553335711737424e-06, "loss": 0.3918, "step": 959 }, { "epoch": 0.017122676845146793, "grad_norm": 0.8506503701210022, "learning_rate": 8.562254727078132e-06, "loss": 0.3158, "step": 960 }, { "epoch": 0.017140512966860484, "grad_norm": 0.7391118407249451, "learning_rate": 8.571173742418838e-06, "loss": 0.352, "step": 961 }, { "epoch": 0.01715834908857418, "grad_norm": 0.9272975325584412, "learning_rate": 8.580092757759543e-06, "loss": 0.4031, "step": 962 }, { "epoch": 0.017176185210287874, "grad_norm": 2.544142007827759, "learning_rate": 8.58901177310025e-06, "loss": 0.4406, "step": 963 }, { "epoch": 0.01719402133200157, "grad_norm": 1.7289552688598633, "learning_rate": 8.597930788440957e-06, "loss": 0.3602, "step": 964 }, { "epoch": 0.017211857453715264, "grad_norm": 1.2334113121032715, "learning_rate": 8.606849803781662e-06, "loss": 0.3795, "step": 965 }, { "epoch": 0.01722969357542896, "grad_norm": 0.8848539590835571, "learning_rate": 8.615768819122369e-06, "loss": 0.2925, "step": 966 }, { "epoch": 0.017247529697142654, "grad_norm": 0.8424369096755981, "learning_rate": 8.624687834463075e-06, "loss": 0.3758, "step": 967 }, { "epoch": 0.01726536581885635, "grad_norm": 0.876915693283081, "learning_rate": 8.633606849803783e-06, "loss": 0.4027, "step": 968 }, { "epoch": 0.017283201940570043, "grad_norm": 1.4270676374435425, "learning_rate": 8.642525865144488e-06, "loss": 0.3421, "step": 969 }, { "epoch": 0.01730103806228374, "grad_norm": 0.8328204154968262, "learning_rate": 8.651444880485194e-06, "loss": 0.3745, "step": 970 }, { "epoch": 0.01731887418399743, "grad_norm": 1.0704902410507202, "learning_rate": 8.660363895825902e-06, "loss": 0.3493, "step": 971 }, { "epoch": 0.017336710305711125, "grad_norm": 0.818196177482605, "learning_rate": 8.669282911166609e-06, "loss": 0.3426, "step": 972 }, { "epoch": 0.01735454642742482, "grad_norm": 0.8499037027359009, "learning_rate": 8.678201926507313e-06, "loss": 0.3264, "step": 973 }, { "epoch": 0.017372382549138515, "grad_norm": 1.086616039276123, "learning_rate": 8.68712094184802e-06, "loss": 0.3822, "step": 974 }, { "epoch": 0.01739021867085221, "grad_norm": 0.8740732073783875, "learning_rate": 8.696039957188728e-06, "loss": 0.3772, "step": 975 }, { "epoch": 0.017408054792565904, "grad_norm": 1.0218117237091064, "learning_rate": 8.704958972529432e-06, "loss": 0.3795, "step": 976 }, { "epoch": 0.0174258909142796, "grad_norm": 1.5836724042892456, "learning_rate": 8.713877987870139e-06, "loss": 0.4078, "step": 977 }, { "epoch": 0.017443727035993294, "grad_norm": 0.7581110000610352, "learning_rate": 8.722797003210847e-06, "loss": 0.3992, "step": 978 }, { "epoch": 0.01746156315770699, "grad_norm": 1.0162327289581299, "learning_rate": 8.731716018551553e-06, "loss": 0.3754, "step": 979 }, { "epoch": 0.017479399279420684, "grad_norm": 1.0005271434783936, "learning_rate": 8.740635033892258e-06, "loss": 0.3391, "step": 980 }, { "epoch": 0.01749723540113438, "grad_norm": 0.902283251285553, "learning_rate": 8.749554049232964e-06, "loss": 0.3565, "step": 981 }, { "epoch": 0.01751507152284807, "grad_norm": 0.7696515321731567, "learning_rate": 8.758473064573672e-06, "loss": 0.4062, "step": 982 }, { "epoch": 0.017532907644561765, "grad_norm": 0.7952608466148376, "learning_rate": 8.767392079914379e-06, "loss": 0.3125, "step": 983 }, { "epoch": 0.01755074376627546, "grad_norm": 0.9154542684555054, "learning_rate": 8.776311095255083e-06, "loss": 0.3663, "step": 984 }, { "epoch": 0.017568579887989155, "grad_norm": 0.9744145274162292, "learning_rate": 8.785230110595791e-06, "loss": 0.4255, "step": 985 }, { "epoch": 0.01758641600970285, "grad_norm": 1.046738862991333, "learning_rate": 8.794149125936498e-06, "loss": 0.3386, "step": 986 }, { "epoch": 0.017604252131416545, "grad_norm": 1.0191091299057007, "learning_rate": 8.803068141277203e-06, "loss": 0.398, "step": 987 }, { "epoch": 0.01762208825313024, "grad_norm": 1.1875132322311401, "learning_rate": 8.811987156617909e-06, "loss": 0.4394, "step": 988 }, { "epoch": 0.017639924374843935, "grad_norm": 0.9273723363876343, "learning_rate": 8.820906171958617e-06, "loss": 0.3593, "step": 989 }, { "epoch": 0.01765776049655763, "grad_norm": 2.2347023487091064, "learning_rate": 8.829825187299323e-06, "loss": 0.3727, "step": 990 }, { "epoch": 0.017675596618271325, "grad_norm": 0.8893629908561707, "learning_rate": 8.838744202640028e-06, "loss": 0.4047, "step": 991 }, { "epoch": 0.017693432739985016, "grad_norm": 1.4326751232147217, "learning_rate": 8.847663217980736e-06, "loss": 0.4774, "step": 992 }, { "epoch": 0.01771126886169871, "grad_norm": 1.0148738622665405, "learning_rate": 8.856582233321443e-06, "loss": 0.4132, "step": 993 }, { "epoch": 0.017729104983412406, "grad_norm": 1.1211464405059814, "learning_rate": 8.865501248662147e-06, "loss": 0.3738, "step": 994 }, { "epoch": 0.0177469411051261, "grad_norm": 0.6987782120704651, "learning_rate": 8.874420264002854e-06, "loss": 0.3284, "step": 995 }, { "epoch": 0.017764777226839796, "grad_norm": 0.861219048500061, "learning_rate": 8.883339279343562e-06, "loss": 0.3263, "step": 996 }, { "epoch": 0.01778261334855349, "grad_norm": 1.0110158920288086, "learning_rate": 8.892258294684268e-06, "loss": 0.4485, "step": 997 }, { "epoch": 0.017800449470267186, "grad_norm": 1.0399854183197021, "learning_rate": 8.901177310024973e-06, "loss": 0.4472, "step": 998 }, { "epoch": 0.01781828559198088, "grad_norm": 0.9171674251556396, "learning_rate": 8.91009632536568e-06, "loss": 0.3886, "step": 999 }, { "epoch": 0.017836121713694576, "grad_norm": 1.298419713973999, "learning_rate": 8.919015340706387e-06, "loss": 0.3966, "step": 1000 }, { "epoch": 0.017836121713694576, "eval_loss": 0.33756929636001587, "eval_runtime": 275.5663, "eval_samples_per_second": 3.716, "eval_steps_per_second": 0.621, "step": 1000 }, { "epoch": 0.01785395783540827, "grad_norm": 0.8770831227302551, "learning_rate": 8.927934356047094e-06, "loss": 0.3391, "step": 1001 }, { "epoch": 0.017871793957121962, "grad_norm": 3.754714250564575, "learning_rate": 8.936853371387798e-06, "loss": 0.3939, "step": 1002 }, { "epoch": 0.017889630078835657, "grad_norm": 0.8046690821647644, "learning_rate": 8.945772386728506e-06, "loss": 0.2857, "step": 1003 }, { "epoch": 0.017907466200549352, "grad_norm": 1.0197075605392456, "learning_rate": 8.954691402069213e-06, "loss": 0.3775, "step": 1004 }, { "epoch": 0.017925302322263047, "grad_norm": 1.0234031677246094, "learning_rate": 8.963610417409917e-06, "loss": 0.3761, "step": 1005 }, { "epoch": 0.01794313844397674, "grad_norm": 2.2465391159057617, "learning_rate": 8.972529432750624e-06, "loss": 0.3777, "step": 1006 }, { "epoch": 0.017960974565690437, "grad_norm": 0.9311102628707886, "learning_rate": 8.981448448091332e-06, "loss": 0.3959, "step": 1007 }, { "epoch": 0.01797881068740413, "grad_norm": 0.8143541216850281, "learning_rate": 8.990367463432038e-06, "loss": 0.4143, "step": 1008 }, { "epoch": 0.017996646809117826, "grad_norm": 0.8146178722381592, "learning_rate": 8.999286478772743e-06, "loss": 0.4048, "step": 1009 }, { "epoch": 0.01801448293083152, "grad_norm": 0.7817711234092712, "learning_rate": 9.008205494113451e-06, "loss": 0.2997, "step": 1010 }, { "epoch": 0.018032319052545216, "grad_norm": 1.109965205192566, "learning_rate": 9.017124509454157e-06, "loss": 0.3481, "step": 1011 }, { "epoch": 0.018050155174258908, "grad_norm": 0.6897335648536682, "learning_rate": 9.026043524794862e-06, "loss": 0.332, "step": 1012 }, { "epoch": 0.018067991295972603, "grad_norm": 1.485617756843567, "learning_rate": 9.034962540135568e-06, "loss": 0.3758, "step": 1013 }, { "epoch": 0.018085827417686298, "grad_norm": 0.9818662405014038, "learning_rate": 9.043881555476277e-06, "loss": 0.3444, "step": 1014 }, { "epoch": 0.018103663539399992, "grad_norm": 0.9142575263977051, "learning_rate": 9.052800570816983e-06, "loss": 0.4152, "step": 1015 }, { "epoch": 0.018121499661113687, "grad_norm": 1.0542312860488892, "learning_rate": 9.061719586157688e-06, "loss": 0.4386, "step": 1016 }, { "epoch": 0.018139335782827382, "grad_norm": 1.3937829732894897, "learning_rate": 9.070638601498396e-06, "loss": 0.4342, "step": 1017 }, { "epoch": 0.018157171904541077, "grad_norm": 0.78803950548172, "learning_rate": 9.079557616839102e-06, "loss": 0.402, "step": 1018 }, { "epoch": 0.018175008026254772, "grad_norm": 1.7907694578170776, "learning_rate": 9.088476632179808e-06, "loss": 0.4358, "step": 1019 }, { "epoch": 0.018192844147968467, "grad_norm": 0.5500625371932983, "learning_rate": 9.097395647520513e-06, "loss": 0.3381, "step": 1020 }, { "epoch": 0.018210680269682162, "grad_norm": 0.7231621146202087, "learning_rate": 9.106314662861221e-06, "loss": 0.3358, "step": 1021 }, { "epoch": 0.018228516391395853, "grad_norm": 0.8248460292816162, "learning_rate": 9.115233678201928e-06, "loss": 0.4092, "step": 1022 }, { "epoch": 0.01824635251310955, "grad_norm": 1.1761342287063599, "learning_rate": 9.124152693542632e-06, "loss": 0.3016, "step": 1023 }, { "epoch": 0.018264188634823243, "grad_norm": 0.8809008598327637, "learning_rate": 9.13307170888334e-06, "loss": 0.3401, "step": 1024 }, { "epoch": 0.018282024756536938, "grad_norm": 0.9658706784248352, "learning_rate": 9.141990724224047e-06, "loss": 0.3635, "step": 1025 }, { "epoch": 0.018299860878250633, "grad_norm": 1.1166636943817139, "learning_rate": 9.150909739564753e-06, "loss": 0.4423, "step": 1026 }, { "epoch": 0.018317696999964328, "grad_norm": 0.8470121026039124, "learning_rate": 9.159828754905458e-06, "loss": 0.4163, "step": 1027 }, { "epoch": 0.018335533121678023, "grad_norm": 0.7772458791732788, "learning_rate": 9.168747770246166e-06, "loss": 0.4036, "step": 1028 }, { "epoch": 0.018353369243391718, "grad_norm": 1.0990639925003052, "learning_rate": 9.177666785586872e-06, "loss": 0.3164, "step": 1029 }, { "epoch": 0.018371205365105413, "grad_norm": 0.9506021738052368, "learning_rate": 9.186585800927577e-06, "loss": 0.3709, "step": 1030 }, { "epoch": 0.018389041486819108, "grad_norm": 0.909835934638977, "learning_rate": 9.195504816268285e-06, "loss": 0.3815, "step": 1031 }, { "epoch": 0.0184068776085328, "grad_norm": 0.7399672865867615, "learning_rate": 9.204423831608991e-06, "loss": 0.3234, "step": 1032 }, { "epoch": 0.018424713730246494, "grad_norm": 1.0614478588104248, "learning_rate": 9.213342846949698e-06, "loss": 0.3931, "step": 1033 }, { "epoch": 0.01844254985196019, "grad_norm": 0.6906810402870178, "learning_rate": 9.222261862290402e-06, "loss": 0.3074, "step": 1034 }, { "epoch": 0.018460385973673884, "grad_norm": 0.7197563648223877, "learning_rate": 9.23118087763111e-06, "loss": 0.3373, "step": 1035 }, { "epoch": 0.01847822209538758, "grad_norm": 0.9273034334182739, "learning_rate": 9.240099892971817e-06, "loss": 0.4292, "step": 1036 }, { "epoch": 0.018496058217101274, "grad_norm": 1.1073459386825562, "learning_rate": 9.249018908312523e-06, "loss": 0.2683, "step": 1037 }, { "epoch": 0.01851389433881497, "grad_norm": 1.104052186012268, "learning_rate": 9.257937923653228e-06, "loss": 0.4062, "step": 1038 }, { "epoch": 0.018531730460528664, "grad_norm": 2.026197910308838, "learning_rate": 9.266856938993936e-06, "loss": 0.3907, "step": 1039 }, { "epoch": 0.01854956658224236, "grad_norm": 1.305974006652832, "learning_rate": 9.275775954334642e-06, "loss": 0.3665, "step": 1040 }, { "epoch": 0.018567402703956053, "grad_norm": 0.6929371356964111, "learning_rate": 9.284694969675347e-06, "loss": 0.3842, "step": 1041 }, { "epoch": 0.018585238825669745, "grad_norm": 0.6076000332832336, "learning_rate": 9.293613985016055e-06, "loss": 0.3296, "step": 1042 }, { "epoch": 0.01860307494738344, "grad_norm": 1.3677473068237305, "learning_rate": 9.302533000356762e-06, "loss": 0.3563, "step": 1043 }, { "epoch": 0.018620911069097135, "grad_norm": 1.170035719871521, "learning_rate": 9.311452015697468e-06, "loss": 0.401, "step": 1044 }, { "epoch": 0.01863874719081083, "grad_norm": 1.1717208623886108, "learning_rate": 9.320371031038173e-06, "loss": 0.4063, "step": 1045 }, { "epoch": 0.018656583312524525, "grad_norm": 0.9528186917304993, "learning_rate": 9.32929004637888e-06, "loss": 0.3779, "step": 1046 }, { "epoch": 0.01867441943423822, "grad_norm": 0.8370277285575867, "learning_rate": 9.338209061719587e-06, "loss": 0.4015, "step": 1047 }, { "epoch": 0.018692255555951914, "grad_norm": 0.793533980846405, "learning_rate": 9.347128077060292e-06, "loss": 0.3512, "step": 1048 }, { "epoch": 0.01871009167766561, "grad_norm": 0.9082489609718323, "learning_rate": 9.356047092401e-06, "loss": 0.3966, "step": 1049 }, { "epoch": 0.018727927799379304, "grad_norm": 0.9899812340736389, "learning_rate": 9.364966107741706e-06, "loss": 0.3731, "step": 1050 }, { "epoch": 0.018745763921093, "grad_norm": 4.113532066345215, "learning_rate": 9.373885123082413e-06, "loss": 0.4431, "step": 1051 }, { "epoch": 0.01876360004280669, "grad_norm": 0.5420061945915222, "learning_rate": 9.382804138423117e-06, "loss": 0.303, "step": 1052 }, { "epoch": 0.018781436164520385, "grad_norm": 0.7249712347984314, "learning_rate": 9.391723153763825e-06, "loss": 0.3765, "step": 1053 }, { "epoch": 0.01879927228623408, "grad_norm": 0.8847532272338867, "learning_rate": 9.400642169104532e-06, "loss": 0.3782, "step": 1054 }, { "epoch": 0.018817108407947775, "grad_norm": 0.8722662925720215, "learning_rate": 9.409561184445238e-06, "loss": 0.334, "step": 1055 }, { "epoch": 0.01883494452966147, "grad_norm": 0.6470535397529602, "learning_rate": 9.418480199785944e-06, "loss": 0.4192, "step": 1056 }, { "epoch": 0.018852780651375165, "grad_norm": 0.7459009289741516, "learning_rate": 9.427399215126651e-06, "loss": 0.4015, "step": 1057 }, { "epoch": 0.01887061677308886, "grad_norm": 1.0923402309417725, "learning_rate": 9.436318230467357e-06, "loss": 0.3711, "step": 1058 }, { "epoch": 0.018888452894802555, "grad_norm": 0.802423357963562, "learning_rate": 9.445237245808062e-06, "loss": 0.3794, "step": 1059 }, { "epoch": 0.01890628901651625, "grad_norm": 0.9659205079078674, "learning_rate": 9.45415626114877e-06, "loss": 0.4019, "step": 1060 }, { "epoch": 0.018924125138229945, "grad_norm": 0.9830519556999207, "learning_rate": 9.463075276489476e-06, "loss": 0.3762, "step": 1061 }, { "epoch": 0.018941961259943636, "grad_norm": 0.6956446766853333, "learning_rate": 9.471994291830183e-06, "loss": 0.3534, "step": 1062 }, { "epoch": 0.01895979738165733, "grad_norm": 1.0788321495056152, "learning_rate": 9.480913307170889e-06, "loss": 0.3809, "step": 1063 }, { "epoch": 0.018977633503371026, "grad_norm": 0.707240641117096, "learning_rate": 9.489832322511596e-06, "loss": 0.3505, "step": 1064 }, { "epoch": 0.01899546962508472, "grad_norm": 0.8199694156646729, "learning_rate": 9.498751337852302e-06, "loss": 0.4184, "step": 1065 }, { "epoch": 0.019013305746798416, "grad_norm": 0.7357836961746216, "learning_rate": 9.507670353193008e-06, "loss": 0.3826, "step": 1066 }, { "epoch": 0.01903114186851211, "grad_norm": 0.7746564149856567, "learning_rate": 9.516589368533715e-06, "loss": 0.3603, "step": 1067 }, { "epoch": 0.019048977990225806, "grad_norm": 0.7702838778495789, "learning_rate": 9.525508383874421e-06, "loss": 0.4102, "step": 1068 }, { "epoch": 0.0190668141119395, "grad_norm": 0.7079132795333862, "learning_rate": 9.534427399215127e-06, "loss": 0.305, "step": 1069 }, { "epoch": 0.019084650233653196, "grad_norm": 0.9278112649917603, "learning_rate": 9.543346414555832e-06, "loss": 0.449, "step": 1070 }, { "epoch": 0.01910248635536689, "grad_norm": 0.8345721364021301, "learning_rate": 9.55226542989654e-06, "loss": 0.3828, "step": 1071 }, { "epoch": 0.019120322477080582, "grad_norm": 1.123867154121399, "learning_rate": 9.561184445237247e-06, "loss": 0.4336, "step": 1072 }, { "epoch": 0.019138158598794277, "grad_norm": 1.0548595190048218, "learning_rate": 9.570103460577953e-06, "loss": 0.3897, "step": 1073 }, { "epoch": 0.019155994720507972, "grad_norm": 0.8144445419311523, "learning_rate": 9.57902247591866e-06, "loss": 0.4039, "step": 1074 }, { "epoch": 0.019173830842221667, "grad_norm": 0.8764538168907166, "learning_rate": 9.587941491259366e-06, "loss": 0.4016, "step": 1075 }, { "epoch": 0.01919166696393536, "grad_norm": 0.8950740694999695, "learning_rate": 9.596860506600072e-06, "loss": 0.3898, "step": 1076 }, { "epoch": 0.019209503085649057, "grad_norm": 0.746254026889801, "learning_rate": 9.605779521940777e-06, "loss": 0.3414, "step": 1077 }, { "epoch": 0.01922733920736275, "grad_norm": 0.614499032497406, "learning_rate": 9.614698537281485e-06, "loss": 0.3527, "step": 1078 }, { "epoch": 0.019245175329076446, "grad_norm": 0.626251220703125, "learning_rate": 9.623617552622191e-06, "loss": 0.3094, "step": 1079 }, { "epoch": 0.01926301145079014, "grad_norm": 0.8994700312614441, "learning_rate": 9.632536567962898e-06, "loss": 0.3647, "step": 1080 }, { "epoch": 0.019280847572503836, "grad_norm": 0.8249965906143188, "learning_rate": 9.641455583303604e-06, "loss": 0.3451, "step": 1081 }, { "epoch": 0.019298683694217528, "grad_norm": 0.6884198784828186, "learning_rate": 9.65037459864431e-06, "loss": 0.3033, "step": 1082 }, { "epoch": 0.019316519815931223, "grad_norm": 0.80061936378479, "learning_rate": 9.659293613985017e-06, "loss": 0.3434, "step": 1083 }, { "epoch": 0.019334355937644918, "grad_norm": 0.8543218970298767, "learning_rate": 9.668212629325723e-06, "loss": 0.3605, "step": 1084 }, { "epoch": 0.019352192059358612, "grad_norm": 0.8076745867729187, "learning_rate": 9.67713164466643e-06, "loss": 0.3879, "step": 1085 }, { "epoch": 0.019370028181072307, "grad_norm": 1.2448673248291016, "learning_rate": 9.686050660007136e-06, "loss": 0.4107, "step": 1086 }, { "epoch": 0.019387864302786002, "grad_norm": 0.6801688075065613, "learning_rate": 9.694969675347842e-06, "loss": 0.3263, "step": 1087 }, { "epoch": 0.019405700424499697, "grad_norm": 0.7092694044113159, "learning_rate": 9.703888690688549e-06, "loss": 0.3444, "step": 1088 }, { "epoch": 0.019423536546213392, "grad_norm": 0.6756287813186646, "learning_rate": 9.712807706029255e-06, "loss": 0.3616, "step": 1089 }, { "epoch": 0.019441372667927087, "grad_norm": 0.589930534362793, "learning_rate": 9.721726721369961e-06, "loss": 0.3845, "step": 1090 }, { "epoch": 0.019459208789640782, "grad_norm": 0.7394228577613831, "learning_rate": 9.730645736710668e-06, "loss": 0.3178, "step": 1091 }, { "epoch": 0.019477044911354473, "grad_norm": 0.8474282622337341, "learning_rate": 9.739564752051374e-06, "loss": 0.4054, "step": 1092 }, { "epoch": 0.01949488103306817, "grad_norm": 0.8308938145637512, "learning_rate": 9.74848376739208e-06, "loss": 0.4189, "step": 1093 }, { "epoch": 0.019512717154781863, "grad_norm": 0.7115844488143921, "learning_rate": 9.757402782732787e-06, "loss": 0.3978, "step": 1094 }, { "epoch": 0.019530553276495558, "grad_norm": 0.948319673538208, "learning_rate": 9.766321798073493e-06, "loss": 0.3886, "step": 1095 }, { "epoch": 0.019548389398209253, "grad_norm": 1.2271658182144165, "learning_rate": 9.7752408134142e-06, "loss": 0.4676, "step": 1096 }, { "epoch": 0.019566225519922948, "grad_norm": 1.0044245719909668, "learning_rate": 9.784159828754906e-06, "loss": 0.3595, "step": 1097 }, { "epoch": 0.019584061641636643, "grad_norm": 0.5866712927818298, "learning_rate": 9.793078844095612e-06, "loss": 0.3656, "step": 1098 }, { "epoch": 0.019601897763350338, "grad_norm": 0.7712075710296631, "learning_rate": 9.801997859436319e-06, "loss": 0.3324, "step": 1099 }, { "epoch": 0.019619733885064033, "grad_norm": 0.8975673913955688, "learning_rate": 9.810916874777025e-06, "loss": 0.4464, "step": 1100 }, { "epoch": 0.019637570006777728, "grad_norm": 0.6946120262145996, "learning_rate": 9.819835890117732e-06, "loss": 0.3799, "step": 1101 }, { "epoch": 0.01965540612849142, "grad_norm": 1.1008415222167969, "learning_rate": 9.828754905458438e-06, "loss": 0.3513, "step": 1102 }, { "epoch": 0.019673242250205114, "grad_norm": 0.7117588520050049, "learning_rate": 9.837673920799144e-06, "loss": 0.3698, "step": 1103 }, { "epoch": 0.01969107837191881, "grad_norm": 0.8079916834831238, "learning_rate": 9.84659293613985e-06, "loss": 0.3122, "step": 1104 }, { "epoch": 0.019708914493632504, "grad_norm": 0.7888576984405518, "learning_rate": 9.855511951480557e-06, "loss": 0.3777, "step": 1105 }, { "epoch": 0.0197267506153462, "grad_norm": 0.5892645120620728, "learning_rate": 9.864430966821263e-06, "loss": 0.2792, "step": 1106 }, { "epoch": 0.019744586737059894, "grad_norm": 0.869814932346344, "learning_rate": 9.87334998216197e-06, "loss": 0.3784, "step": 1107 }, { "epoch": 0.01976242285877359, "grad_norm": 0.7885927557945251, "learning_rate": 9.882268997502676e-06, "loss": 0.388, "step": 1108 }, { "epoch": 0.019780258980487284, "grad_norm": 0.6155068278312683, "learning_rate": 9.891188012843383e-06, "loss": 0.3696, "step": 1109 }, { "epoch": 0.01979809510220098, "grad_norm": 0.8185555934906006, "learning_rate": 9.900107028184089e-06, "loss": 0.3531, "step": 1110 }, { "epoch": 0.019815931223914673, "grad_norm": 0.7030999660491943, "learning_rate": 9.909026043524795e-06, "loss": 0.4374, "step": 1111 }, { "epoch": 0.019833767345628365, "grad_norm": 0.6789141893386841, "learning_rate": 9.917945058865502e-06, "loss": 0.3667, "step": 1112 }, { "epoch": 0.01985160346734206, "grad_norm": 1.3756961822509766, "learning_rate": 9.926864074206208e-06, "loss": 0.4171, "step": 1113 }, { "epoch": 0.019869439589055755, "grad_norm": 0.7162296772003174, "learning_rate": 9.935783089546915e-06, "loss": 0.3242, "step": 1114 }, { "epoch": 0.01988727571076945, "grad_norm": 0.8953533172607422, "learning_rate": 9.944702104887621e-06, "loss": 0.4475, "step": 1115 }, { "epoch": 0.019905111832483145, "grad_norm": 0.8703616261482239, "learning_rate": 9.953621120228327e-06, "loss": 0.3667, "step": 1116 }, { "epoch": 0.01992294795419684, "grad_norm": 1.4214484691619873, "learning_rate": 9.962540135569034e-06, "loss": 0.3954, "step": 1117 }, { "epoch": 0.019940784075910534, "grad_norm": 0.8139647841453552, "learning_rate": 9.97145915090974e-06, "loss": 0.416, "step": 1118 }, { "epoch": 0.01995862019762423, "grad_norm": 1.535498023033142, "learning_rate": 9.980378166250446e-06, "loss": 0.4378, "step": 1119 }, { "epoch": 0.019976456319337924, "grad_norm": 0.7993893623352051, "learning_rate": 9.989297181591153e-06, "loss": 0.3323, "step": 1120 }, { "epoch": 0.01999429244105162, "grad_norm": 0.7614473700523376, "learning_rate": 9.99821619693186e-06, "loss": 0.3553, "step": 1121 }, { "epoch": 0.02001212856276531, "grad_norm": 1.301061987876892, "learning_rate": 1.0007135212272566e-05, "loss": 0.3721, "step": 1122 }, { "epoch": 0.020029964684479006, "grad_norm": 0.9857878088951111, "learning_rate": 1.0016054227613272e-05, "loss": 0.3418, "step": 1123 }, { "epoch": 0.0200478008061927, "grad_norm": 0.6484463810920715, "learning_rate": 1.0024973242953978e-05, "loss": 0.3097, "step": 1124 }, { "epoch": 0.020065636927906395, "grad_norm": 0.82742840051651, "learning_rate": 1.0033892258294685e-05, "loss": 0.3952, "step": 1125 }, { "epoch": 0.02008347304962009, "grad_norm": 0.798607587814331, "learning_rate": 1.0042811273635391e-05, "loss": 0.3306, "step": 1126 }, { "epoch": 0.020101309171333785, "grad_norm": 1.0455095767974854, "learning_rate": 1.0051730288976097e-05, "loss": 0.4239, "step": 1127 }, { "epoch": 0.02011914529304748, "grad_norm": 1.093558669090271, "learning_rate": 1.0060649304316804e-05, "loss": 0.3184, "step": 1128 }, { "epoch": 0.020136981414761175, "grad_norm": 0.8237136602401733, "learning_rate": 1.006956831965751e-05, "loss": 0.3713, "step": 1129 }, { "epoch": 0.02015481753647487, "grad_norm": 0.7110701203346252, "learning_rate": 1.0078487334998217e-05, "loss": 0.367, "step": 1130 }, { "epoch": 0.020172653658188565, "grad_norm": 0.6957160830497742, "learning_rate": 1.0087406350338923e-05, "loss": 0.3648, "step": 1131 }, { "epoch": 0.020190489779902256, "grad_norm": 1.1493804454803467, "learning_rate": 1.009632536567963e-05, "loss": 0.4201, "step": 1132 }, { "epoch": 0.02020832590161595, "grad_norm": 0.9538556933403015, "learning_rate": 1.0105244381020336e-05, "loss": 0.3024, "step": 1133 }, { "epoch": 0.020226162023329646, "grad_norm": 0.7720417976379395, "learning_rate": 1.0114163396361042e-05, "loss": 0.3309, "step": 1134 }, { "epoch": 0.02024399814504334, "grad_norm": 0.7839877605438232, "learning_rate": 1.0123082411701749e-05, "loss": 0.3195, "step": 1135 }, { "epoch": 0.020261834266757036, "grad_norm": 0.7205426096916199, "learning_rate": 1.0132001427042455e-05, "loss": 0.3747, "step": 1136 }, { "epoch": 0.02027967038847073, "grad_norm": 0.7438391447067261, "learning_rate": 1.0140920442383161e-05, "loss": 0.3975, "step": 1137 }, { "epoch": 0.020297506510184426, "grad_norm": 0.8830706477165222, "learning_rate": 1.0149839457723868e-05, "loss": 0.3599, "step": 1138 }, { "epoch": 0.02031534263189812, "grad_norm": 0.7458361387252808, "learning_rate": 1.0158758473064574e-05, "loss": 0.3519, "step": 1139 }, { "epoch": 0.020333178753611816, "grad_norm": 0.6681880950927734, "learning_rate": 1.016767748840528e-05, "loss": 0.3044, "step": 1140 }, { "epoch": 0.02035101487532551, "grad_norm": 1.059791922569275, "learning_rate": 1.0176596503745987e-05, "loss": 0.3972, "step": 1141 }, { "epoch": 0.020368850997039206, "grad_norm": 0.7251273393630981, "learning_rate": 1.0185515519086693e-05, "loss": 0.3404, "step": 1142 }, { "epoch": 0.020386687118752897, "grad_norm": 1.1100664138793945, "learning_rate": 1.01944345344274e-05, "loss": 0.3718, "step": 1143 }, { "epoch": 0.020404523240466592, "grad_norm": 0.662216305732727, "learning_rate": 1.0203353549768106e-05, "loss": 0.3716, "step": 1144 }, { "epoch": 0.020422359362180287, "grad_norm": 0.6917687654495239, "learning_rate": 1.0212272565108812e-05, "loss": 0.4127, "step": 1145 }, { "epoch": 0.02044019548389398, "grad_norm": 0.7356336712837219, "learning_rate": 1.0221191580449519e-05, "loss": 0.3553, "step": 1146 }, { "epoch": 0.020458031605607677, "grad_norm": 0.8886292576789856, "learning_rate": 1.0230110595790225e-05, "loss": 0.401, "step": 1147 }, { "epoch": 0.02047586772732137, "grad_norm": 0.93147873878479, "learning_rate": 1.0239029611130931e-05, "loss": 0.3351, "step": 1148 }, { "epoch": 0.020493703849035066, "grad_norm": 1.390363097190857, "learning_rate": 1.0247948626471638e-05, "loss": 0.3611, "step": 1149 }, { "epoch": 0.02051153997074876, "grad_norm": 1.0718594789505005, "learning_rate": 1.0256867641812344e-05, "loss": 0.304, "step": 1150 }, { "epoch": 0.020529376092462456, "grad_norm": 0.7545117735862732, "learning_rate": 1.026578665715305e-05, "loss": 0.3446, "step": 1151 }, { "epoch": 0.02054721221417615, "grad_norm": 0.7707802057266235, "learning_rate": 1.0274705672493757e-05, "loss": 0.3562, "step": 1152 }, { "epoch": 0.020565048335889843, "grad_norm": 0.658502995967865, "learning_rate": 1.0283624687834463e-05, "loss": 0.331, "step": 1153 }, { "epoch": 0.020582884457603538, "grad_norm": 0.7341593503952026, "learning_rate": 1.029254370317517e-05, "loss": 0.2746, "step": 1154 }, { "epoch": 0.020600720579317233, "grad_norm": 0.8296139240264893, "learning_rate": 1.0301462718515876e-05, "loss": 0.3797, "step": 1155 }, { "epoch": 0.020618556701030927, "grad_norm": 1.3040196895599365, "learning_rate": 1.0310381733856583e-05, "loss": 0.3781, "step": 1156 }, { "epoch": 0.020636392822744622, "grad_norm": 0.8125171661376953, "learning_rate": 1.0319300749197289e-05, "loss": 0.3832, "step": 1157 }, { "epoch": 0.020654228944458317, "grad_norm": 0.8667579293251038, "learning_rate": 1.0328219764537995e-05, "loss": 0.4097, "step": 1158 }, { "epoch": 0.020672065066172012, "grad_norm": 0.6190834641456604, "learning_rate": 1.0337138779878702e-05, "loss": 0.358, "step": 1159 }, { "epoch": 0.020689901187885707, "grad_norm": 1.077289342880249, "learning_rate": 1.0346057795219408e-05, "loss": 0.3257, "step": 1160 }, { "epoch": 0.020707737309599402, "grad_norm": 0.8260953426361084, "learning_rate": 1.0354976810560114e-05, "loss": 0.3942, "step": 1161 }, { "epoch": 0.020725573431313097, "grad_norm": 0.8963320851325989, "learning_rate": 1.036389582590082e-05, "loss": 0.3753, "step": 1162 }, { "epoch": 0.02074340955302679, "grad_norm": 0.7348595261573792, "learning_rate": 1.0372814841241527e-05, "loss": 0.3758, "step": 1163 }, { "epoch": 0.020761245674740483, "grad_norm": 1.2362557649612427, "learning_rate": 1.0381733856582234e-05, "loss": 0.3453, "step": 1164 }, { "epoch": 0.020779081796454178, "grad_norm": 0.6578987836837769, "learning_rate": 1.039065287192294e-05, "loss": 0.3942, "step": 1165 }, { "epoch": 0.020796917918167873, "grad_norm": 0.7647325396537781, "learning_rate": 1.0399571887263646e-05, "loss": 0.3809, "step": 1166 }, { "epoch": 0.020814754039881568, "grad_norm": 0.7755059003829956, "learning_rate": 1.0408490902604354e-05, "loss": 0.3624, "step": 1167 }, { "epoch": 0.020832590161595263, "grad_norm": 0.8467023968696594, "learning_rate": 1.0417409917945059e-05, "loss": 0.3655, "step": 1168 }, { "epoch": 0.020850426283308958, "grad_norm": 1.1363590955734253, "learning_rate": 1.0426328933285765e-05, "loss": 0.3233, "step": 1169 }, { "epoch": 0.020868262405022653, "grad_norm": 0.7776278853416443, "learning_rate": 1.0435247948626472e-05, "loss": 0.3514, "step": 1170 }, { "epoch": 0.020886098526736348, "grad_norm": 0.6754946708679199, "learning_rate": 1.0444166963967178e-05, "loss": 0.3799, "step": 1171 }, { "epoch": 0.020903934648450043, "grad_norm": 0.6129249930381775, "learning_rate": 1.0453085979307885e-05, "loss": 0.3272, "step": 1172 }, { "epoch": 0.020921770770163734, "grad_norm": 0.5411374568939209, "learning_rate": 1.0462004994648591e-05, "loss": 0.3353, "step": 1173 }, { "epoch": 0.02093960689187743, "grad_norm": 1.0081175565719604, "learning_rate": 1.0470924009989299e-05, "loss": 0.312, "step": 1174 }, { "epoch": 0.020957443013591124, "grad_norm": 0.7088974118232727, "learning_rate": 1.0479843025330004e-05, "loss": 0.3653, "step": 1175 }, { "epoch": 0.02097527913530482, "grad_norm": 0.9669176936149597, "learning_rate": 1.048876204067071e-05, "loss": 0.3799, "step": 1176 }, { "epoch": 0.020993115257018514, "grad_norm": 0.717627763748169, "learning_rate": 1.0497681056011417e-05, "loss": 0.2725, "step": 1177 }, { "epoch": 0.02101095137873221, "grad_norm": 0.5851650834083557, "learning_rate": 1.0506600071352123e-05, "loss": 0.3497, "step": 1178 }, { "epoch": 0.021028787500445904, "grad_norm": 1.4409462213516235, "learning_rate": 1.051551908669283e-05, "loss": 0.3455, "step": 1179 }, { "epoch": 0.0210466236221596, "grad_norm": 0.6856871843338013, "learning_rate": 1.0524438102033536e-05, "loss": 0.355, "step": 1180 }, { "epoch": 0.021064459743873293, "grad_norm": 0.7965103983879089, "learning_rate": 1.0533357117374244e-05, "loss": 0.403, "step": 1181 }, { "epoch": 0.02108229586558699, "grad_norm": 0.845029890537262, "learning_rate": 1.0542276132714948e-05, "loss": 0.3826, "step": 1182 }, { "epoch": 0.02110013198730068, "grad_norm": 0.6509142518043518, "learning_rate": 1.0551195148055655e-05, "loss": 0.3505, "step": 1183 }, { "epoch": 0.021117968109014375, "grad_norm": 0.6117712259292603, "learning_rate": 1.0560114163396361e-05, "loss": 0.3301, "step": 1184 }, { "epoch": 0.02113580423072807, "grad_norm": 0.79875648021698, "learning_rate": 1.056903317873707e-05, "loss": 0.3352, "step": 1185 }, { "epoch": 0.021153640352441765, "grad_norm": 0.5840588808059692, "learning_rate": 1.0577952194077774e-05, "loss": 0.3113, "step": 1186 }, { "epoch": 0.02117147647415546, "grad_norm": 0.6373933553695679, "learning_rate": 1.058687120941848e-05, "loss": 0.3975, "step": 1187 }, { "epoch": 0.021189312595869154, "grad_norm": 0.6922791004180908, "learning_rate": 1.0595790224759187e-05, "loss": 0.3619, "step": 1188 }, { "epoch": 0.02120714871758285, "grad_norm": 0.9521928429603577, "learning_rate": 1.0604709240099893e-05, "loss": 0.3215, "step": 1189 }, { "epoch": 0.021224984839296544, "grad_norm": 0.5857126712799072, "learning_rate": 1.06136282554406e-05, "loss": 0.3182, "step": 1190 }, { "epoch": 0.02124282096101024, "grad_norm": 0.5465163588523865, "learning_rate": 1.0622547270781306e-05, "loss": 0.3526, "step": 1191 }, { "epoch": 0.021260657082723934, "grad_norm": 0.5520526170730591, "learning_rate": 1.0631466286122014e-05, "loss": 0.3466, "step": 1192 }, { "epoch": 0.021278493204437626, "grad_norm": 0.7705773711204529, "learning_rate": 1.0640385301462719e-05, "loss": 0.3893, "step": 1193 }, { "epoch": 0.02129632932615132, "grad_norm": 0.6714303493499756, "learning_rate": 1.0649304316803425e-05, "loss": 0.3373, "step": 1194 }, { "epoch": 0.021314165447865015, "grad_norm": 0.8076788783073425, "learning_rate": 1.0658223332144131e-05, "loss": 0.388, "step": 1195 }, { "epoch": 0.02133200156957871, "grad_norm": 0.7793228626251221, "learning_rate": 1.0667142347484838e-05, "loss": 0.4123, "step": 1196 }, { "epoch": 0.021349837691292405, "grad_norm": 0.9169655442237854, "learning_rate": 1.0676061362825544e-05, "loss": 0.3764, "step": 1197 }, { "epoch": 0.0213676738130061, "grad_norm": 0.8548396825790405, "learning_rate": 1.068498037816625e-05, "loss": 0.3923, "step": 1198 }, { "epoch": 0.021385509934719795, "grad_norm": 1.0099925994873047, "learning_rate": 1.0693899393506959e-05, "loss": 0.3566, "step": 1199 }, { "epoch": 0.02140334605643349, "grad_norm": 0.9970804452896118, "learning_rate": 1.0702818408847663e-05, "loss": 0.4287, "step": 1200 }, { "epoch": 0.021421182178147185, "grad_norm": 0.8104790449142456, "learning_rate": 1.071173742418837e-05, "loss": 0.4214, "step": 1201 }, { "epoch": 0.02143901829986088, "grad_norm": 0.6155812740325928, "learning_rate": 1.0720656439529076e-05, "loss": 0.3532, "step": 1202 }, { "epoch": 0.02145685442157457, "grad_norm": 0.6520779728889465, "learning_rate": 1.0729575454869784e-05, "loss": 0.3725, "step": 1203 }, { "epoch": 0.021474690543288266, "grad_norm": 0.9459627866744995, "learning_rate": 1.0738494470210489e-05, "loss": 0.4364, "step": 1204 }, { "epoch": 0.02149252666500196, "grad_norm": 0.9855123162269592, "learning_rate": 1.0747413485551195e-05, "loss": 0.3301, "step": 1205 }, { "epoch": 0.021510362786715656, "grad_norm": 0.6528222560882568, "learning_rate": 1.0756332500891903e-05, "loss": 0.3703, "step": 1206 }, { "epoch": 0.02152819890842935, "grad_norm": 0.9400623440742493, "learning_rate": 1.0765251516232608e-05, "loss": 0.3596, "step": 1207 }, { "epoch": 0.021546035030143046, "grad_norm": 0.5530434846878052, "learning_rate": 1.0774170531573314e-05, "loss": 0.3084, "step": 1208 }, { "epoch": 0.02156387115185674, "grad_norm": 0.6158286929130554, "learning_rate": 1.078308954691402e-05, "loss": 0.4104, "step": 1209 }, { "epoch": 0.021581707273570436, "grad_norm": 0.5364154577255249, "learning_rate": 1.0792008562254729e-05, "loss": 0.3691, "step": 1210 }, { "epoch": 0.02159954339528413, "grad_norm": 0.8397731184959412, "learning_rate": 1.0800927577595433e-05, "loss": 0.402, "step": 1211 }, { "epoch": 0.021617379516997826, "grad_norm": 1.206375241279602, "learning_rate": 1.080984659293614e-05, "loss": 0.3939, "step": 1212 }, { "epoch": 0.021635215638711517, "grad_norm": 0.8143543004989624, "learning_rate": 1.0818765608276848e-05, "loss": 0.3266, "step": 1213 }, { "epoch": 0.021653051760425212, "grad_norm": 0.5477320551872253, "learning_rate": 1.0827684623617553e-05, "loss": 0.3103, "step": 1214 }, { "epoch": 0.021670887882138907, "grad_norm": 0.7129520177841187, "learning_rate": 1.0836603638958259e-05, "loss": 0.4018, "step": 1215 }, { "epoch": 0.021688724003852602, "grad_norm": 0.6713840365409851, "learning_rate": 1.0845522654298965e-05, "loss": 0.3195, "step": 1216 }, { "epoch": 0.021706560125566297, "grad_norm": 0.5806293487548828, "learning_rate": 1.0854441669639673e-05, "loss": 0.2921, "step": 1217 }, { "epoch": 0.02172439624727999, "grad_norm": 0.6675949692726135, "learning_rate": 1.0863360684980378e-05, "loss": 0.3185, "step": 1218 }, { "epoch": 0.021742232368993687, "grad_norm": 0.6240116953849792, "learning_rate": 1.0872279700321084e-05, "loss": 0.3002, "step": 1219 }, { "epoch": 0.02176006849070738, "grad_norm": 0.5665594339370728, "learning_rate": 1.0881198715661793e-05, "loss": 0.3386, "step": 1220 }, { "epoch": 0.021777904612421076, "grad_norm": 0.6369854807853699, "learning_rate": 1.0890117731002499e-05, "loss": 0.2952, "step": 1221 }, { "epoch": 0.02179574073413477, "grad_norm": 0.9375613331794739, "learning_rate": 1.0899036746343204e-05, "loss": 0.3836, "step": 1222 }, { "epoch": 0.021813576855848463, "grad_norm": 0.7689806818962097, "learning_rate": 1.090795576168391e-05, "loss": 0.321, "step": 1223 }, { "epoch": 0.021831412977562158, "grad_norm": 0.6562024354934692, "learning_rate": 1.0916874777024618e-05, "loss": 0.3737, "step": 1224 }, { "epoch": 0.021849249099275853, "grad_norm": 0.6183013916015625, "learning_rate": 1.0925793792365323e-05, "loss": 0.4069, "step": 1225 }, { "epoch": 0.021867085220989547, "grad_norm": 0.5989112257957458, "learning_rate": 1.0934712807706029e-05, "loss": 0.3535, "step": 1226 }, { "epoch": 0.021884921342703242, "grad_norm": 0.8406489491462708, "learning_rate": 1.0943631823046736e-05, "loss": 0.506, "step": 1227 }, { "epoch": 0.021902757464416937, "grad_norm": 0.7310986518859863, "learning_rate": 1.0952550838387444e-05, "loss": 0.3709, "step": 1228 }, { "epoch": 0.021920593586130632, "grad_norm": 0.6726301312446594, "learning_rate": 1.0961469853728148e-05, "loss": 0.399, "step": 1229 }, { "epoch": 0.021938429707844327, "grad_norm": 1.2989193201065063, "learning_rate": 1.0970388869068855e-05, "loss": 0.3506, "step": 1230 }, { "epoch": 0.021956265829558022, "grad_norm": 1.3558590412139893, "learning_rate": 1.0979307884409563e-05, "loss": 0.3131, "step": 1231 }, { "epoch": 0.021974101951271717, "grad_norm": 0.4981108009815216, "learning_rate": 1.0988226899750267e-05, "loss": 0.3116, "step": 1232 }, { "epoch": 0.02199193807298541, "grad_norm": 0.7580838799476624, "learning_rate": 1.0997145915090974e-05, "loss": 0.3459, "step": 1233 }, { "epoch": 0.022009774194699103, "grad_norm": 0.8278317451477051, "learning_rate": 1.100606493043168e-05, "loss": 0.2989, "step": 1234 }, { "epoch": 0.0220276103164128, "grad_norm": 0.7717764973640442, "learning_rate": 1.1014983945772388e-05, "loss": 0.3764, "step": 1235 }, { "epoch": 0.022045446438126493, "grad_norm": 0.9159849286079407, "learning_rate": 1.1023902961113093e-05, "loss": 0.3438, "step": 1236 }, { "epoch": 0.022063282559840188, "grad_norm": 1.1090803146362305, "learning_rate": 1.10328219764538e-05, "loss": 0.3421, "step": 1237 }, { "epoch": 0.022081118681553883, "grad_norm": 0.7285106182098389, "learning_rate": 1.1041740991794507e-05, "loss": 0.3698, "step": 1238 }, { "epoch": 0.022098954803267578, "grad_norm": 0.6520172953605652, "learning_rate": 1.1050660007135214e-05, "loss": 0.3547, "step": 1239 }, { "epoch": 0.022116790924981273, "grad_norm": 0.9744372367858887, "learning_rate": 1.1059579022475918e-05, "loss": 0.3417, "step": 1240 }, { "epoch": 0.022134627046694968, "grad_norm": 0.75611811876297, "learning_rate": 1.1068498037816625e-05, "loss": 0.3388, "step": 1241 }, { "epoch": 0.022152463168408663, "grad_norm": 1.007924199104309, "learning_rate": 1.1077417053157333e-05, "loss": 0.3832, "step": 1242 }, { "epoch": 0.022170299290122354, "grad_norm": 0.6002600789070129, "learning_rate": 1.1086336068498038e-05, "loss": 0.2941, "step": 1243 }, { "epoch": 0.02218813541183605, "grad_norm": 0.7874862551689148, "learning_rate": 1.1095255083838744e-05, "loss": 0.389, "step": 1244 }, { "epoch": 0.022205971533549744, "grad_norm": 0.8228870630264282, "learning_rate": 1.1104174099179452e-05, "loss": 0.3955, "step": 1245 }, { "epoch": 0.02222380765526344, "grad_norm": 0.6489484906196594, "learning_rate": 1.1113093114520158e-05, "loss": 0.3003, "step": 1246 }, { "epoch": 0.022241643776977134, "grad_norm": 0.778408944606781, "learning_rate": 1.1122012129860863e-05, "loss": 0.3468, "step": 1247 }, { "epoch": 0.02225947989869083, "grad_norm": 0.8412859439849854, "learning_rate": 1.113093114520157e-05, "loss": 0.4323, "step": 1248 }, { "epoch": 0.022277316020404524, "grad_norm": 0.8153448104858398, "learning_rate": 1.1139850160542278e-05, "loss": 0.3486, "step": 1249 }, { "epoch": 0.02229515214211822, "grad_norm": 0.7701181769371033, "learning_rate": 1.1148769175882982e-05, "loss": 0.3746, "step": 1250 }, { "epoch": 0.022312988263831914, "grad_norm": 0.9697129726409912, "learning_rate": 1.1157688191223689e-05, "loss": 0.4327, "step": 1251 }, { "epoch": 0.02233082438554561, "grad_norm": 0.7470831871032715, "learning_rate": 1.1166607206564397e-05, "loss": 0.4245, "step": 1252 }, { "epoch": 0.0223486605072593, "grad_norm": 0.9449819326400757, "learning_rate": 1.1175526221905103e-05, "loss": 0.3416, "step": 1253 }, { "epoch": 0.022366496628972995, "grad_norm": 0.5881229639053345, "learning_rate": 1.1184445237245808e-05, "loss": 0.3463, "step": 1254 }, { "epoch": 0.02238433275068669, "grad_norm": 0.6636262536048889, "learning_rate": 1.1193364252586514e-05, "loss": 0.3682, "step": 1255 }, { "epoch": 0.022402168872400385, "grad_norm": 0.7649720311164856, "learning_rate": 1.1202283267927222e-05, "loss": 0.3458, "step": 1256 }, { "epoch": 0.02242000499411408, "grad_norm": 0.7601820230484009, "learning_rate": 1.1211202283267929e-05, "loss": 0.3582, "step": 1257 }, { "epoch": 0.022437841115827774, "grad_norm": 0.5499415993690491, "learning_rate": 1.1220121298608633e-05, "loss": 0.3632, "step": 1258 }, { "epoch": 0.02245567723754147, "grad_norm": 0.6030718088150024, "learning_rate": 1.122904031394934e-05, "loss": 0.3435, "step": 1259 }, { "epoch": 0.022473513359255164, "grad_norm": 1.0028022527694702, "learning_rate": 1.1237959329290048e-05, "loss": 0.362, "step": 1260 }, { "epoch": 0.02249134948096886, "grad_norm": 0.9477298259735107, "learning_rate": 1.1246878344630752e-05, "loss": 0.3707, "step": 1261 }, { "epoch": 0.022509185602682554, "grad_norm": 0.6312440633773804, "learning_rate": 1.1255797359971459e-05, "loss": 0.3758, "step": 1262 }, { "epoch": 0.022527021724396246, "grad_norm": 0.7580999135971069, "learning_rate": 1.1264716375312167e-05, "loss": 0.3863, "step": 1263 }, { "epoch": 0.02254485784610994, "grad_norm": 0.6269981265068054, "learning_rate": 1.1273635390652873e-05, "loss": 0.3736, "step": 1264 }, { "epoch": 0.022562693967823635, "grad_norm": 0.6420585513114929, "learning_rate": 1.1282554405993578e-05, "loss": 0.3995, "step": 1265 }, { "epoch": 0.02258053008953733, "grad_norm": 0.6503485441207886, "learning_rate": 1.1291473421334284e-05, "loss": 0.4484, "step": 1266 }, { "epoch": 0.022598366211251025, "grad_norm": 0.9527643918991089, "learning_rate": 1.1300392436674992e-05, "loss": 0.3842, "step": 1267 }, { "epoch": 0.02261620233296472, "grad_norm": 0.8242455720901489, "learning_rate": 1.1309311452015699e-05, "loss": 0.2785, "step": 1268 }, { "epoch": 0.022634038454678415, "grad_norm": 0.7459538578987122, "learning_rate": 1.1318230467356404e-05, "loss": 0.3611, "step": 1269 }, { "epoch": 0.02265187457639211, "grad_norm": 0.7388677597045898, "learning_rate": 1.1327149482697112e-05, "loss": 0.3404, "step": 1270 }, { "epoch": 0.022669710698105805, "grad_norm": 0.5609098672866821, "learning_rate": 1.1336068498037818e-05, "loss": 0.3264, "step": 1271 }, { "epoch": 0.0226875468198195, "grad_norm": 0.902957022190094, "learning_rate": 1.1344987513378523e-05, "loss": 0.3632, "step": 1272 }, { "epoch": 0.02270538294153319, "grad_norm": 0.9123650193214417, "learning_rate": 1.1353906528719229e-05, "loss": 0.4122, "step": 1273 }, { "epoch": 0.022723219063246886, "grad_norm": 0.6583011150360107, "learning_rate": 1.1362825544059937e-05, "loss": 0.3491, "step": 1274 }, { "epoch": 0.02274105518496058, "grad_norm": 0.7611053586006165, "learning_rate": 1.1371744559400643e-05, "loss": 0.362, "step": 1275 }, { "epoch": 0.022758891306674276, "grad_norm": 1.9730926752090454, "learning_rate": 1.1380663574741348e-05, "loss": 0.4072, "step": 1276 }, { "epoch": 0.02277672742838797, "grad_norm": 0.9904792904853821, "learning_rate": 1.1389582590082056e-05, "loss": 0.3232, "step": 1277 }, { "epoch": 0.022794563550101666, "grad_norm": 0.9127361178398132, "learning_rate": 1.1398501605422763e-05, "loss": 0.4263, "step": 1278 }, { "epoch": 0.02281239967181536, "grad_norm": 0.6934426426887512, "learning_rate": 1.1407420620763467e-05, "loss": 0.3319, "step": 1279 }, { "epoch": 0.022830235793529056, "grad_norm": 0.7358129620552063, "learning_rate": 1.1416339636104174e-05, "loss": 0.3492, "step": 1280 }, { "epoch": 0.02284807191524275, "grad_norm": 0.7250738739967346, "learning_rate": 1.1425258651444882e-05, "loss": 0.3692, "step": 1281 }, { "epoch": 0.022865908036956446, "grad_norm": 0.6895067691802979, "learning_rate": 1.1434177666785588e-05, "loss": 0.3441, "step": 1282 }, { "epoch": 0.022883744158670137, "grad_norm": 0.6137292385101318, "learning_rate": 1.1443096682126293e-05, "loss": 0.3553, "step": 1283 }, { "epoch": 0.022901580280383832, "grad_norm": 0.629615843296051, "learning_rate": 1.1452015697467001e-05, "loss": 0.3379, "step": 1284 }, { "epoch": 0.022919416402097527, "grad_norm": 0.8250669836997986, "learning_rate": 1.1460934712807707e-05, "loss": 0.3899, "step": 1285 }, { "epoch": 0.022937252523811222, "grad_norm": 3.8415939807891846, "learning_rate": 1.1469853728148414e-05, "loss": 0.4383, "step": 1286 }, { "epoch": 0.022955088645524917, "grad_norm": 0.5856311321258545, "learning_rate": 1.1478772743489118e-05, "loss": 0.3239, "step": 1287 }, { "epoch": 0.02297292476723861, "grad_norm": 1.1937249898910522, "learning_rate": 1.1487691758829826e-05, "loss": 0.3129, "step": 1288 }, { "epoch": 0.022990760888952307, "grad_norm": 0.7404820919036865, "learning_rate": 1.1496610774170533e-05, "loss": 0.4015, "step": 1289 }, { "epoch": 0.023008597010666, "grad_norm": 0.6083435416221619, "learning_rate": 1.1505529789511237e-05, "loss": 0.332, "step": 1290 }, { "epoch": 0.023026433132379696, "grad_norm": 0.5573935508728027, "learning_rate": 1.1514448804851944e-05, "loss": 0.3302, "step": 1291 }, { "epoch": 0.02304426925409339, "grad_norm": 0.5558791160583496, "learning_rate": 1.1523367820192652e-05, "loss": 0.3388, "step": 1292 }, { "epoch": 0.023062105375807083, "grad_norm": 0.6461998224258423, "learning_rate": 1.1532286835533358e-05, "loss": 0.3233, "step": 1293 }, { "epoch": 0.023079941497520778, "grad_norm": 0.609324038028717, "learning_rate": 1.1541205850874063e-05, "loss": 0.3886, "step": 1294 }, { "epoch": 0.023097777619234473, "grad_norm": 0.7898502945899963, "learning_rate": 1.1550124866214771e-05, "loss": 0.3979, "step": 1295 }, { "epoch": 0.023115613740948168, "grad_norm": 0.7442877292633057, "learning_rate": 1.1559043881555477e-05, "loss": 0.3901, "step": 1296 }, { "epoch": 0.023133449862661862, "grad_norm": 0.5739049315452576, "learning_rate": 1.1567962896896182e-05, "loss": 0.3253, "step": 1297 }, { "epoch": 0.023151285984375557, "grad_norm": 0.7304110527038574, "learning_rate": 1.1576881912236889e-05, "loss": 0.3857, "step": 1298 }, { "epoch": 0.023169122106089252, "grad_norm": 0.7546870708465576, "learning_rate": 1.1585800927577597e-05, "loss": 0.3216, "step": 1299 }, { "epoch": 0.023186958227802947, "grad_norm": 0.6870408654212952, "learning_rate": 1.1594719942918303e-05, "loss": 0.4039, "step": 1300 }, { "epoch": 0.023204794349516642, "grad_norm": 0.563120424747467, "learning_rate": 1.1603638958259008e-05, "loss": 0.3156, "step": 1301 }, { "epoch": 0.023222630471230337, "grad_norm": 0.6151665449142456, "learning_rate": 1.1612557973599716e-05, "loss": 0.424, "step": 1302 }, { "epoch": 0.023240466592944032, "grad_norm": 0.5403041839599609, "learning_rate": 1.1621476988940422e-05, "loss": 0.3983, "step": 1303 }, { "epoch": 0.023258302714657723, "grad_norm": 1.3480035066604614, "learning_rate": 1.1630396004281129e-05, "loss": 0.3477, "step": 1304 }, { "epoch": 0.02327613883637142, "grad_norm": 0.7916358113288879, "learning_rate": 1.1639315019621833e-05, "loss": 0.3757, "step": 1305 }, { "epoch": 0.023293974958085113, "grad_norm": 0.597547709941864, "learning_rate": 1.1648234034962541e-05, "loss": 0.3411, "step": 1306 }, { "epoch": 0.023311811079798808, "grad_norm": 0.9466458559036255, "learning_rate": 1.1657153050303248e-05, "loss": 0.4627, "step": 1307 }, { "epoch": 0.023329647201512503, "grad_norm": 0.6916009783744812, "learning_rate": 1.1666072065643952e-05, "loss": 0.3854, "step": 1308 }, { "epoch": 0.023347483323226198, "grad_norm": 0.743757963180542, "learning_rate": 1.167499108098466e-05, "loss": 0.3372, "step": 1309 }, { "epoch": 0.023365319444939893, "grad_norm": 0.6970309615135193, "learning_rate": 1.1683910096325367e-05, "loss": 0.3078, "step": 1310 }, { "epoch": 0.023383155566653588, "grad_norm": 1.208333134651184, "learning_rate": 1.1692829111666073e-05, "loss": 0.3811, "step": 1311 }, { "epoch": 0.023400991688367283, "grad_norm": 0.7694846987724304, "learning_rate": 1.1701748127006778e-05, "loss": 0.3797, "step": 1312 }, { "epoch": 0.023418827810080978, "grad_norm": 0.9177335500717163, "learning_rate": 1.1710667142347486e-05, "loss": 0.3947, "step": 1313 }, { "epoch": 0.02343666393179467, "grad_norm": 0.8459893465042114, "learning_rate": 1.1719586157688192e-05, "loss": 0.3446, "step": 1314 }, { "epoch": 0.023454500053508364, "grad_norm": 1.0277235507965088, "learning_rate": 1.1728505173028897e-05, "loss": 0.4476, "step": 1315 }, { "epoch": 0.02347233617522206, "grad_norm": 1.5987002849578857, "learning_rate": 1.1737424188369605e-05, "loss": 0.3207, "step": 1316 }, { "epoch": 0.023490172296935754, "grad_norm": 0.7976105809211731, "learning_rate": 1.1746343203710311e-05, "loss": 0.292, "step": 1317 }, { "epoch": 0.02350800841864945, "grad_norm": 0.7589207887649536, "learning_rate": 1.1755262219051018e-05, "loss": 0.4273, "step": 1318 }, { "epoch": 0.023525844540363144, "grad_norm": 0.9924066066741943, "learning_rate": 1.1764181234391723e-05, "loss": 0.3277, "step": 1319 }, { "epoch": 0.02354368066207684, "grad_norm": 0.5412936210632324, "learning_rate": 1.177310024973243e-05, "loss": 0.3073, "step": 1320 }, { "epoch": 0.023561516783790534, "grad_norm": 0.7021755576133728, "learning_rate": 1.1782019265073137e-05, "loss": 0.3623, "step": 1321 }, { "epoch": 0.02357935290550423, "grad_norm": 0.7117661833763123, "learning_rate": 1.1790938280413843e-05, "loss": 0.3996, "step": 1322 }, { "epoch": 0.023597189027217923, "grad_norm": 0.9206212162971497, "learning_rate": 1.179985729575455e-05, "loss": 0.4311, "step": 1323 }, { "epoch": 0.023615025148931615, "grad_norm": 0.5773261785507202, "learning_rate": 1.1808776311095256e-05, "loss": 0.3323, "step": 1324 }, { "epoch": 0.02363286127064531, "grad_norm": 0.5009505748748779, "learning_rate": 1.1817695326435962e-05, "loss": 0.3071, "step": 1325 }, { "epoch": 0.023650697392359005, "grad_norm": 1.3784102201461792, "learning_rate": 1.1826614341776667e-05, "loss": 0.2955, "step": 1326 }, { "epoch": 0.0236685335140727, "grad_norm": 0.7203867435455322, "learning_rate": 1.1835533357117375e-05, "loss": 0.3577, "step": 1327 }, { "epoch": 0.023686369635786395, "grad_norm": 0.6099973917007446, "learning_rate": 1.1844452372458082e-05, "loss": 0.337, "step": 1328 }, { "epoch": 0.02370420575750009, "grad_norm": 1.6613069772720337, "learning_rate": 1.1853371387798788e-05, "loss": 0.3803, "step": 1329 }, { "epoch": 0.023722041879213784, "grad_norm": 0.5442463755607605, "learning_rate": 1.1862290403139493e-05, "loss": 0.3242, "step": 1330 }, { "epoch": 0.02373987800092748, "grad_norm": 0.6935420632362366, "learning_rate": 1.18712094184802e-05, "loss": 0.3526, "step": 1331 }, { "epoch": 0.023757714122641174, "grad_norm": 0.8325861692428589, "learning_rate": 1.1880128433820907e-05, "loss": 0.2866, "step": 1332 }, { "epoch": 0.02377555024435487, "grad_norm": 0.7822994589805603, "learning_rate": 1.1889047449161612e-05, "loss": 0.3613, "step": 1333 }, { "epoch": 0.02379338636606856, "grad_norm": 0.7527873516082764, "learning_rate": 1.189796646450232e-05, "loss": 0.3506, "step": 1334 }, { "epoch": 0.023811222487782256, "grad_norm": 0.9543324708938599, "learning_rate": 1.1906885479843026e-05, "loss": 0.3824, "step": 1335 }, { "epoch": 0.02382905860949595, "grad_norm": 0.5387325286865234, "learning_rate": 1.1915804495183733e-05, "loss": 0.343, "step": 1336 }, { "epoch": 0.023846894731209645, "grad_norm": 1.0645473003387451, "learning_rate": 1.1924723510524437e-05, "loss": 0.3869, "step": 1337 }, { "epoch": 0.02386473085292334, "grad_norm": 0.7024256587028503, "learning_rate": 1.1933642525865145e-05, "loss": 0.3263, "step": 1338 }, { "epoch": 0.023882566974637035, "grad_norm": 0.7108498215675354, "learning_rate": 1.1942561541205852e-05, "loss": 0.2696, "step": 1339 }, { "epoch": 0.02390040309635073, "grad_norm": 0.8734039664268494, "learning_rate": 1.1951480556546558e-05, "loss": 0.4181, "step": 1340 }, { "epoch": 0.023918239218064425, "grad_norm": 0.723659873008728, "learning_rate": 1.1960399571887265e-05, "loss": 0.3302, "step": 1341 }, { "epoch": 0.02393607533977812, "grad_norm": 0.6870841383934021, "learning_rate": 1.1969318587227971e-05, "loss": 0.3548, "step": 1342 }, { "epoch": 0.023953911461491815, "grad_norm": 1.9990111589431763, "learning_rate": 1.1978237602568677e-05, "loss": 0.3612, "step": 1343 }, { "epoch": 0.023971747583205506, "grad_norm": 0.785835862159729, "learning_rate": 1.1987156617909382e-05, "loss": 0.3895, "step": 1344 }, { "epoch": 0.0239895837049192, "grad_norm": 0.902116596698761, "learning_rate": 1.199607563325009e-05, "loss": 0.4551, "step": 1345 }, { "epoch": 0.024007419826632896, "grad_norm": 0.6511051058769226, "learning_rate": 1.2004994648590796e-05, "loss": 0.3417, "step": 1346 }, { "epoch": 0.02402525594834659, "grad_norm": 0.6050383448600769, "learning_rate": 1.2013913663931503e-05, "loss": 0.2843, "step": 1347 }, { "epoch": 0.024043092070060286, "grad_norm": 0.745337188243866, "learning_rate": 1.202283267927221e-05, "loss": 0.3152, "step": 1348 }, { "epoch": 0.02406092819177398, "grad_norm": 0.7404078841209412, "learning_rate": 1.2031751694612916e-05, "loss": 0.3834, "step": 1349 }, { "epoch": 0.024078764313487676, "grad_norm": 0.5614080429077148, "learning_rate": 1.2040670709953622e-05, "loss": 0.3731, "step": 1350 }, { "epoch": 0.02409660043520137, "grad_norm": 0.7362163066864014, "learning_rate": 1.2049589725294327e-05, "loss": 0.3349, "step": 1351 }, { "epoch": 0.024114436556915066, "grad_norm": 0.8937839865684509, "learning_rate": 1.2058508740635035e-05, "loss": 0.3636, "step": 1352 }, { "epoch": 0.02413227267862876, "grad_norm": 1.1626262664794922, "learning_rate": 1.2067427755975741e-05, "loss": 0.4247, "step": 1353 }, { "epoch": 0.024150108800342452, "grad_norm": 0.6161826848983765, "learning_rate": 1.2076346771316448e-05, "loss": 0.3348, "step": 1354 }, { "epoch": 0.024167944922056147, "grad_norm": 0.6281194090843201, "learning_rate": 1.2085265786657154e-05, "loss": 0.2897, "step": 1355 }, { "epoch": 0.024185781043769842, "grad_norm": 0.8214020729064941, "learning_rate": 1.209418480199786e-05, "loss": 0.3801, "step": 1356 }, { "epoch": 0.024203617165483537, "grad_norm": 0.854334831237793, "learning_rate": 1.2103103817338567e-05, "loss": 0.5048, "step": 1357 }, { "epoch": 0.02422145328719723, "grad_norm": 0.675391435623169, "learning_rate": 1.2112022832679273e-05, "loss": 0.2958, "step": 1358 }, { "epoch": 0.024239289408910927, "grad_norm": 0.5455700159072876, "learning_rate": 1.212094184801998e-05, "loss": 0.299, "step": 1359 }, { "epoch": 0.02425712553062462, "grad_norm": 0.55274498462677, "learning_rate": 1.2129860863360686e-05, "loss": 0.2746, "step": 1360 }, { "epoch": 0.024274961652338316, "grad_norm": 0.6536729335784912, "learning_rate": 1.2138779878701392e-05, "loss": 0.345, "step": 1361 }, { "epoch": 0.02429279777405201, "grad_norm": 0.647176206111908, "learning_rate": 1.2147698894042097e-05, "loss": 0.3799, "step": 1362 }, { "epoch": 0.024310633895765706, "grad_norm": 0.6187819242477417, "learning_rate": 1.2156617909382805e-05, "loss": 0.3152, "step": 1363 }, { "epoch": 0.024328470017479398, "grad_norm": 0.835293710231781, "learning_rate": 1.2165536924723511e-05, "loss": 0.3179, "step": 1364 }, { "epoch": 0.024346306139193093, "grad_norm": 0.6720101833343506, "learning_rate": 1.2174455940064218e-05, "loss": 0.3186, "step": 1365 }, { "epoch": 0.024364142260906788, "grad_norm": 0.5965019464492798, "learning_rate": 1.2183374955404924e-05, "loss": 0.3023, "step": 1366 }, { "epoch": 0.024381978382620483, "grad_norm": 0.6113612055778503, "learning_rate": 1.219229397074563e-05, "loss": 0.2907, "step": 1367 }, { "epoch": 0.024399814504334177, "grad_norm": 0.5294867753982544, "learning_rate": 1.2201212986086337e-05, "loss": 0.2751, "step": 1368 }, { "epoch": 0.024417650626047872, "grad_norm": 0.9566969275474548, "learning_rate": 1.2210132001427043e-05, "loss": 0.3855, "step": 1369 }, { "epoch": 0.024435486747761567, "grad_norm": 0.9819380044937134, "learning_rate": 1.221905101676775e-05, "loss": 0.3529, "step": 1370 }, { "epoch": 0.024453322869475262, "grad_norm": 0.591349720954895, "learning_rate": 1.2227970032108456e-05, "loss": 0.3364, "step": 1371 }, { "epoch": 0.024471158991188957, "grad_norm": 1.2572211027145386, "learning_rate": 1.2236889047449162e-05, "loss": 0.4125, "step": 1372 }, { "epoch": 0.024488995112902652, "grad_norm": 1.1575344800949097, "learning_rate": 1.2245808062789869e-05, "loss": 0.4375, "step": 1373 }, { "epoch": 0.024506831234616343, "grad_norm": 0.6487801671028137, "learning_rate": 1.2254727078130575e-05, "loss": 0.3314, "step": 1374 }, { "epoch": 0.02452466735633004, "grad_norm": 0.6622575521469116, "learning_rate": 1.2263646093471282e-05, "loss": 0.2818, "step": 1375 }, { "epoch": 0.024542503478043733, "grad_norm": 0.6123865842819214, "learning_rate": 1.2272565108811988e-05, "loss": 0.3141, "step": 1376 }, { "epoch": 0.024560339599757428, "grad_norm": 0.641710102558136, "learning_rate": 1.2281484124152694e-05, "loss": 0.3636, "step": 1377 }, { "epoch": 0.024578175721471123, "grad_norm": 0.8112438917160034, "learning_rate": 1.22904031394934e-05, "loss": 0.3388, "step": 1378 }, { "epoch": 0.024596011843184818, "grad_norm": 0.7893108129501343, "learning_rate": 1.2299322154834107e-05, "loss": 0.4094, "step": 1379 }, { "epoch": 0.024613847964898513, "grad_norm": 1.0157471895217896, "learning_rate": 1.2308241170174813e-05, "loss": 0.328, "step": 1380 }, { "epoch": 0.024631684086612208, "grad_norm": 0.933978259563446, "learning_rate": 1.231716018551552e-05, "loss": 0.3566, "step": 1381 }, { "epoch": 0.024649520208325903, "grad_norm": 0.6377274394035339, "learning_rate": 1.2326079200856226e-05, "loss": 0.3758, "step": 1382 }, { "epoch": 0.024667356330039598, "grad_norm": 0.6422837376594543, "learning_rate": 1.2334998216196933e-05, "loss": 0.3149, "step": 1383 }, { "epoch": 0.02468519245175329, "grad_norm": 1.0213490724563599, "learning_rate": 1.2343917231537639e-05, "loss": 0.3905, "step": 1384 }, { "epoch": 0.024703028573466984, "grad_norm": 0.532490074634552, "learning_rate": 1.2352836246878345e-05, "loss": 0.2899, "step": 1385 }, { "epoch": 0.02472086469518068, "grad_norm": 0.4421103596687317, "learning_rate": 1.2361755262219052e-05, "loss": 0.2889, "step": 1386 }, { "epoch": 0.024738700816894374, "grad_norm": 0.5894536375999451, "learning_rate": 1.2370674277559758e-05, "loss": 0.2592, "step": 1387 }, { "epoch": 0.02475653693860807, "grad_norm": 1.573372721672058, "learning_rate": 1.2379593292900464e-05, "loss": 0.3207, "step": 1388 }, { "epoch": 0.024774373060321764, "grad_norm": 0.7059410214424133, "learning_rate": 1.238851230824117e-05, "loss": 0.3469, "step": 1389 }, { "epoch": 0.02479220918203546, "grad_norm": 0.6540749669075012, "learning_rate": 1.2397431323581877e-05, "loss": 0.3465, "step": 1390 }, { "epoch": 0.024810045303749154, "grad_norm": 0.8303396105766296, "learning_rate": 1.2406350338922584e-05, "loss": 0.4224, "step": 1391 }, { "epoch": 0.02482788142546285, "grad_norm": 0.4805128276348114, "learning_rate": 1.241526935426329e-05, "loss": 0.2973, "step": 1392 }, { "epoch": 0.024845717547176543, "grad_norm": 0.583146870136261, "learning_rate": 1.2424188369603996e-05, "loss": 0.3722, "step": 1393 }, { "epoch": 0.024863553668890235, "grad_norm": 0.7658040523529053, "learning_rate": 1.2433107384944703e-05, "loss": 0.3301, "step": 1394 }, { "epoch": 0.02488138979060393, "grad_norm": 0.8137679696083069, "learning_rate": 1.2442026400285409e-05, "loss": 0.3548, "step": 1395 }, { "epoch": 0.024899225912317625, "grad_norm": 0.6280669569969177, "learning_rate": 1.2450945415626116e-05, "loss": 0.3361, "step": 1396 }, { "epoch": 0.02491706203403132, "grad_norm": 0.7460063695907593, "learning_rate": 1.2459864430966822e-05, "loss": 0.344, "step": 1397 }, { "epoch": 0.024934898155745015, "grad_norm": 0.5598887801170349, "learning_rate": 1.2468783446307528e-05, "loss": 0.2878, "step": 1398 }, { "epoch": 0.02495273427745871, "grad_norm": 0.6991429328918457, "learning_rate": 1.2477702461648235e-05, "loss": 0.3496, "step": 1399 }, { "epoch": 0.024970570399172404, "grad_norm": 0.7219420075416565, "learning_rate": 1.2486621476988941e-05, "loss": 0.3826, "step": 1400 }, { "epoch": 0.0249884065208861, "grad_norm": 0.698403537273407, "learning_rate": 1.2495540492329647e-05, "loss": 0.349, "step": 1401 }, { "epoch": 0.025006242642599794, "grad_norm": 0.622410774230957, "learning_rate": 1.2504459507670355e-05, "loss": 0.3996, "step": 1402 }, { "epoch": 0.02502407876431349, "grad_norm": 0.5343538522720337, "learning_rate": 1.251337852301106e-05, "loss": 0.3327, "step": 1403 }, { "epoch": 0.02504191488602718, "grad_norm": 0.6778825521469116, "learning_rate": 1.2522297538351765e-05, "loss": 0.3212, "step": 1404 }, { "epoch": 0.025059751007740876, "grad_norm": 0.8760435581207275, "learning_rate": 1.2531216553692473e-05, "loss": 0.4228, "step": 1405 }, { "epoch": 0.02507758712945457, "grad_norm": 0.5947842001914978, "learning_rate": 1.254013556903318e-05, "loss": 0.3459, "step": 1406 }, { "epoch": 0.025095423251168265, "grad_norm": 0.7000359892845154, "learning_rate": 1.2549054584373884e-05, "loss": 0.3194, "step": 1407 }, { "epoch": 0.02511325937288196, "grad_norm": 1.1343384981155396, "learning_rate": 1.2557973599714592e-05, "loss": 0.3618, "step": 1408 }, { "epoch": 0.025131095494595655, "grad_norm": 0.6769985556602478, "learning_rate": 1.2566892615055298e-05, "loss": 0.3123, "step": 1409 }, { "epoch": 0.02514893161630935, "grad_norm": 0.614509105682373, "learning_rate": 1.2575811630396007e-05, "loss": 0.3353, "step": 1410 }, { "epoch": 0.025166767738023045, "grad_norm": 0.468760222196579, "learning_rate": 1.2584730645736711e-05, "loss": 0.3096, "step": 1411 }, { "epoch": 0.02518460385973674, "grad_norm": 0.7535741329193115, "learning_rate": 1.2593649661077418e-05, "loss": 0.3808, "step": 1412 }, { "epoch": 0.025202439981450435, "grad_norm": 0.9200375080108643, "learning_rate": 1.2602568676418126e-05, "loss": 0.3471, "step": 1413 }, { "epoch": 0.025220276103164126, "grad_norm": 0.714401125907898, "learning_rate": 1.261148769175883e-05, "loss": 0.3231, "step": 1414 }, { "epoch": 0.02523811222487782, "grad_norm": 0.5711655616760254, "learning_rate": 1.2620406707099535e-05, "loss": 0.3341, "step": 1415 }, { "epoch": 0.025255948346591516, "grad_norm": 0.5743415355682373, "learning_rate": 1.2629325722440243e-05, "loss": 0.2701, "step": 1416 }, { "epoch": 0.02527378446830521, "grad_norm": 0.787744402885437, "learning_rate": 1.263824473778095e-05, "loss": 0.323, "step": 1417 }, { "epoch": 0.025291620590018906, "grad_norm": 0.6809655427932739, "learning_rate": 1.2647163753121654e-05, "loss": 0.417, "step": 1418 }, { "epoch": 0.0253094567117326, "grad_norm": 0.5578548908233643, "learning_rate": 1.2656082768462362e-05, "loss": 0.3488, "step": 1419 }, { "epoch": 0.025327292833446296, "grad_norm": 0.617712676525116, "learning_rate": 1.2665001783803069e-05, "loss": 0.3481, "step": 1420 }, { "epoch": 0.02534512895515999, "grad_norm": 0.6966971158981323, "learning_rate": 1.2673920799143777e-05, "loss": 0.3653, "step": 1421 }, { "epoch": 0.025362965076873686, "grad_norm": 0.5483299493789673, "learning_rate": 1.2682839814484481e-05, "loss": 0.3565, "step": 1422 }, { "epoch": 0.02538080119858738, "grad_norm": 0.5309726595878601, "learning_rate": 1.2691758829825188e-05, "loss": 0.3281, "step": 1423 }, { "epoch": 0.025398637320301072, "grad_norm": 0.727033257484436, "learning_rate": 1.2700677845165896e-05, "loss": 0.2909, "step": 1424 }, { "epoch": 0.025416473442014767, "grad_norm": 0.5525379180908203, "learning_rate": 1.27095968605066e-05, "loss": 0.3079, "step": 1425 }, { "epoch": 0.025434309563728462, "grad_norm": 0.6853298544883728, "learning_rate": 1.2718515875847307e-05, "loss": 0.4044, "step": 1426 }, { "epoch": 0.025452145685442157, "grad_norm": 0.7836421728134155, "learning_rate": 1.2727434891188015e-05, "loss": 0.3994, "step": 1427 }, { "epoch": 0.025469981807155852, "grad_norm": 0.736638069152832, "learning_rate": 1.273635390652872e-05, "loss": 0.3416, "step": 1428 }, { "epoch": 0.025487817928869547, "grad_norm": 0.4796784818172455, "learning_rate": 1.2745272921869424e-05, "loss": 0.3432, "step": 1429 }, { "epoch": 0.02550565405058324, "grad_norm": 0.4791879951953888, "learning_rate": 1.2754191937210132e-05, "loss": 0.3685, "step": 1430 }, { "epoch": 0.025523490172296937, "grad_norm": 0.5028378367424011, "learning_rate": 1.2763110952550839e-05, "loss": 0.3364, "step": 1431 }, { "epoch": 0.02554132629401063, "grad_norm": 0.6632663607597351, "learning_rate": 1.2772029967891547e-05, "loss": 0.3623, "step": 1432 }, { "epoch": 0.025559162415724326, "grad_norm": 0.6015079021453857, "learning_rate": 1.2780948983232252e-05, "loss": 0.3378, "step": 1433 }, { "epoch": 0.025576998537438018, "grad_norm": 0.7614780068397522, "learning_rate": 1.2789867998572958e-05, "loss": 0.3269, "step": 1434 }, { "epoch": 0.025594834659151713, "grad_norm": 0.5741811990737915, "learning_rate": 1.2798787013913666e-05, "loss": 0.3403, "step": 1435 }, { "epoch": 0.025612670780865408, "grad_norm": 0.6418488621711731, "learning_rate": 1.280770602925437e-05, "loss": 0.3904, "step": 1436 }, { "epoch": 0.025630506902579103, "grad_norm": 0.6873551607131958, "learning_rate": 1.2816625044595077e-05, "loss": 0.2914, "step": 1437 }, { "epoch": 0.025648343024292797, "grad_norm": 0.9841389060020447, "learning_rate": 1.2825544059935785e-05, "loss": 0.3743, "step": 1438 }, { "epoch": 0.025666179146006492, "grad_norm": 0.5809444189071655, "learning_rate": 1.283446307527649e-05, "loss": 0.3552, "step": 1439 }, { "epoch": 0.025684015267720187, "grad_norm": 1.1492770910263062, "learning_rate": 1.2843382090617195e-05, "loss": 0.3157, "step": 1440 }, { "epoch": 0.025701851389433882, "grad_norm": 0.6113795042037964, "learning_rate": 1.2852301105957904e-05, "loss": 0.3476, "step": 1441 }, { "epoch": 0.025719687511147577, "grad_norm": 0.6590428948402405, "learning_rate": 1.2861220121298609e-05, "loss": 0.4073, "step": 1442 }, { "epoch": 0.025737523632861272, "grad_norm": 0.4011369049549103, "learning_rate": 1.2870139136639314e-05, "loss": 0.3252, "step": 1443 }, { "epoch": 0.025755359754574964, "grad_norm": 0.49081823229789734, "learning_rate": 1.2879058151980022e-05, "loss": 0.3137, "step": 1444 }, { "epoch": 0.02577319587628866, "grad_norm": 0.4497537910938263, "learning_rate": 1.2887977167320728e-05, "loss": 0.3052, "step": 1445 }, { "epoch": 0.025791031998002353, "grad_norm": 0.6175629496574402, "learning_rate": 1.2896896182661436e-05, "loss": 0.343, "step": 1446 }, { "epoch": 0.02580886811971605, "grad_norm": 3.055866241455078, "learning_rate": 1.2905815198002141e-05, "loss": 0.3096, "step": 1447 }, { "epoch": 0.025826704241429743, "grad_norm": 0.5497235655784607, "learning_rate": 1.2914734213342847e-05, "loss": 0.3371, "step": 1448 }, { "epoch": 0.025844540363143438, "grad_norm": 0.469852477312088, "learning_rate": 1.2923653228683555e-05, "loss": 0.3138, "step": 1449 }, { "epoch": 0.025862376484857133, "grad_norm": 0.9721972942352295, "learning_rate": 1.293257224402426e-05, "loss": 0.3592, "step": 1450 }, { "epoch": 0.025880212606570828, "grad_norm": 0.46501076221466064, "learning_rate": 1.2941491259364966e-05, "loss": 0.2978, "step": 1451 }, { "epoch": 0.025898048728284523, "grad_norm": 0.5042346119880676, "learning_rate": 1.2950410274705675e-05, "loss": 0.3551, "step": 1452 }, { "epoch": 0.025915884849998218, "grad_norm": 0.5198994874954224, "learning_rate": 1.295932929004638e-05, "loss": 0.3364, "step": 1453 }, { "epoch": 0.02593372097171191, "grad_norm": 0.624998927116394, "learning_rate": 1.2968248305387084e-05, "loss": 0.356, "step": 1454 }, { "epoch": 0.025951557093425604, "grad_norm": 0.6525312662124634, "learning_rate": 1.2977167320727792e-05, "loss": 0.3148, "step": 1455 }, { "epoch": 0.0259693932151393, "grad_norm": 0.5993296504020691, "learning_rate": 1.2986086336068498e-05, "loss": 0.3396, "step": 1456 }, { "epoch": 0.025987229336852994, "grad_norm": 0.47724413871765137, "learning_rate": 1.2995005351409206e-05, "loss": 0.3155, "step": 1457 }, { "epoch": 0.02600506545856669, "grad_norm": 0.5700188279151917, "learning_rate": 1.3003924366749911e-05, "loss": 0.353, "step": 1458 }, { "epoch": 0.026022901580280384, "grad_norm": 1.5542047023773193, "learning_rate": 1.3012843382090617e-05, "loss": 0.3417, "step": 1459 }, { "epoch": 0.02604073770199408, "grad_norm": 0.5037944912910461, "learning_rate": 1.3021762397431326e-05, "loss": 0.321, "step": 1460 }, { "epoch": 0.026058573823707774, "grad_norm": 0.5542275309562683, "learning_rate": 1.303068141277203e-05, "loss": 0.3451, "step": 1461 }, { "epoch": 0.02607640994542147, "grad_norm": 0.5835364460945129, "learning_rate": 1.3039600428112737e-05, "loss": 0.3028, "step": 1462 }, { "epoch": 0.026094246067135164, "grad_norm": 0.6984533667564392, "learning_rate": 1.3048519443453445e-05, "loss": 0.3474, "step": 1463 }, { "epoch": 0.02611208218884886, "grad_norm": 0.6229071021080017, "learning_rate": 1.305743845879415e-05, "loss": 0.3347, "step": 1464 }, { "epoch": 0.02612991831056255, "grad_norm": 0.9868772625923157, "learning_rate": 1.3066357474134854e-05, "loss": 0.3256, "step": 1465 }, { "epoch": 0.026147754432276245, "grad_norm": 0.6247256398200989, "learning_rate": 1.3075276489475564e-05, "loss": 0.3471, "step": 1466 }, { "epoch": 0.02616559055398994, "grad_norm": 0.63973468542099, "learning_rate": 1.3084195504816269e-05, "loss": 0.3835, "step": 1467 }, { "epoch": 0.026183426675703635, "grad_norm": 0.6585602760314941, "learning_rate": 1.3093114520156977e-05, "loss": 0.3757, "step": 1468 }, { "epoch": 0.02620126279741733, "grad_norm": 0.6673021912574768, "learning_rate": 1.3102033535497681e-05, "loss": 0.3514, "step": 1469 }, { "epoch": 0.026219098919131024, "grad_norm": 0.6367130875587463, "learning_rate": 1.3110952550838388e-05, "loss": 0.3627, "step": 1470 }, { "epoch": 0.02623693504084472, "grad_norm": 7.9414753913879395, "learning_rate": 1.3119871566179096e-05, "loss": 0.3573, "step": 1471 }, { "epoch": 0.026254771162558414, "grad_norm": 0.48212894797325134, "learning_rate": 1.31287905815198e-05, "loss": 0.311, "step": 1472 }, { "epoch": 0.02627260728427211, "grad_norm": 0.7470137476921082, "learning_rate": 1.3137709596860507e-05, "loss": 0.3415, "step": 1473 }, { "epoch": 0.026290443405985804, "grad_norm": 0.6094146966934204, "learning_rate": 1.3146628612201215e-05, "loss": 0.3442, "step": 1474 }, { "epoch": 0.026308279527699496, "grad_norm": 0.5558764338493347, "learning_rate": 1.315554762754192e-05, "loss": 0.3039, "step": 1475 }, { "epoch": 0.02632611564941319, "grad_norm": 0.6077886819839478, "learning_rate": 1.3164466642882626e-05, "loss": 0.3539, "step": 1476 }, { "epoch": 0.026343951771126885, "grad_norm": 0.5881067514419556, "learning_rate": 1.3173385658223334e-05, "loss": 0.3076, "step": 1477 }, { "epoch": 0.02636178789284058, "grad_norm": 0.7823331356048584, "learning_rate": 1.3182304673564039e-05, "loss": 0.4019, "step": 1478 }, { "epoch": 0.026379624014554275, "grad_norm": 0.6178668737411499, "learning_rate": 1.3191223688904747e-05, "loss": 0.2783, "step": 1479 }, { "epoch": 0.02639746013626797, "grad_norm": 0.9432263374328613, "learning_rate": 1.3200142704245451e-05, "loss": 0.3398, "step": 1480 }, { "epoch": 0.026415296257981665, "grad_norm": 0.49985745549201965, "learning_rate": 1.3209061719586158e-05, "loss": 0.3239, "step": 1481 }, { "epoch": 0.02643313237969536, "grad_norm": 0.560070276260376, "learning_rate": 1.3217980734926866e-05, "loss": 0.3822, "step": 1482 }, { "epoch": 0.026450968501409055, "grad_norm": 0.635017454624176, "learning_rate": 1.322689975026757e-05, "loss": 0.3437, "step": 1483 }, { "epoch": 0.02646880462312275, "grad_norm": 0.5243181586265564, "learning_rate": 1.3235818765608277e-05, "loss": 0.3217, "step": 1484 }, { "epoch": 0.02648664074483644, "grad_norm": 0.5804629325866699, "learning_rate": 1.3244737780948985e-05, "loss": 0.348, "step": 1485 }, { "epoch": 0.026504476866550136, "grad_norm": 0.6111013889312744, "learning_rate": 1.325365679628969e-05, "loss": 0.359, "step": 1486 }, { "epoch": 0.02652231298826383, "grad_norm": 0.8193077445030212, "learning_rate": 1.3262575811630396e-05, "loss": 0.3612, "step": 1487 }, { "epoch": 0.026540149109977526, "grad_norm": 0.6436665654182434, "learning_rate": 1.3271494826971104e-05, "loss": 0.3515, "step": 1488 }, { "epoch": 0.02655798523169122, "grad_norm": 0.48903459310531616, "learning_rate": 1.3280413842311809e-05, "loss": 0.3013, "step": 1489 }, { "epoch": 0.026575821353404916, "grad_norm": 1.1819206476211548, "learning_rate": 1.3289332857652515e-05, "loss": 0.3706, "step": 1490 }, { "epoch": 0.02659365747511861, "grad_norm": 0.5459044575691223, "learning_rate": 1.3298251872993223e-05, "loss": 0.3337, "step": 1491 }, { "epoch": 0.026611493596832306, "grad_norm": 0.6691137552261353, "learning_rate": 1.3307170888333928e-05, "loss": 0.3001, "step": 1492 }, { "epoch": 0.026629329718546, "grad_norm": 0.7296347618103027, "learning_rate": 1.3316089903674636e-05, "loss": 0.3423, "step": 1493 }, { "epoch": 0.026647165840259696, "grad_norm": 0.6832718849182129, "learning_rate": 1.332500891901534e-05, "loss": 0.3492, "step": 1494 }, { "epoch": 0.026665001961973387, "grad_norm": 0.5545360445976257, "learning_rate": 1.3333927934356047e-05, "loss": 0.2849, "step": 1495 }, { "epoch": 0.026682838083687082, "grad_norm": 0.588440477848053, "learning_rate": 1.3342846949696755e-05, "loss": 0.3028, "step": 1496 }, { "epoch": 0.026700674205400777, "grad_norm": 0.7887428998947144, "learning_rate": 1.335176596503746e-05, "loss": 0.343, "step": 1497 }, { "epoch": 0.026718510327114472, "grad_norm": 0.49277839064598083, "learning_rate": 1.3360684980378166e-05, "loss": 0.3225, "step": 1498 }, { "epoch": 0.026736346448828167, "grad_norm": 0.6437626481056213, "learning_rate": 1.3369603995718874e-05, "loss": 0.2881, "step": 1499 }, { "epoch": 0.02675418257054186, "grad_norm": 0.5196444392204285, "learning_rate": 1.3378523011059579e-05, "loss": 0.3201, "step": 1500 }, { "epoch": 0.026772018692255557, "grad_norm": 0.5267537236213684, "learning_rate": 1.3387442026400285e-05, "loss": 0.3445, "step": 1501 }, { "epoch": 0.02678985481396925, "grad_norm": 0.621799647808075, "learning_rate": 1.3396361041740994e-05, "loss": 0.3505, "step": 1502 }, { "epoch": 0.026807690935682946, "grad_norm": 0.7187339067459106, "learning_rate": 1.3405280057081698e-05, "loss": 0.3729, "step": 1503 }, { "epoch": 0.02682552705739664, "grad_norm": 1.050341248512268, "learning_rate": 1.3414199072422406e-05, "loss": 0.3127, "step": 1504 }, { "epoch": 0.026843363179110333, "grad_norm": 1.0283368825912476, "learning_rate": 1.3423118087763113e-05, "loss": 0.2707, "step": 1505 }, { "epoch": 0.026861199300824028, "grad_norm": 0.6192741394042969, "learning_rate": 1.3432037103103817e-05, "loss": 0.3436, "step": 1506 }, { "epoch": 0.026879035422537723, "grad_norm": 0.6149123311042786, "learning_rate": 1.3440956118444525e-05, "loss": 0.3437, "step": 1507 }, { "epoch": 0.026896871544251418, "grad_norm": 0.5970794558525085, "learning_rate": 1.344987513378523e-05, "loss": 0.3129, "step": 1508 }, { "epoch": 0.026914707665965112, "grad_norm": 0.8853777050971985, "learning_rate": 1.3458794149125936e-05, "loss": 0.3137, "step": 1509 }, { "epoch": 0.026932543787678807, "grad_norm": 0.4688767194747925, "learning_rate": 1.3467713164466645e-05, "loss": 0.3101, "step": 1510 }, { "epoch": 0.026950379909392502, "grad_norm": 0.7186697125434875, "learning_rate": 1.347663217980735e-05, "loss": 0.3505, "step": 1511 }, { "epoch": 0.026968216031106197, "grad_norm": 0.6535070538520813, "learning_rate": 1.3485551195148056e-05, "loss": 0.3267, "step": 1512 }, { "epoch": 0.026986052152819892, "grad_norm": 0.5069955587387085, "learning_rate": 1.3494470210488764e-05, "loss": 0.3396, "step": 1513 }, { "epoch": 0.027003888274533587, "grad_norm": 0.5632001161575317, "learning_rate": 1.3503389225829468e-05, "loss": 0.3781, "step": 1514 }, { "epoch": 0.02702172439624728, "grad_norm": 0.5032448172569275, "learning_rate": 1.3512308241170176e-05, "loss": 0.3771, "step": 1515 }, { "epoch": 0.027039560517960973, "grad_norm": 0.622677206993103, "learning_rate": 1.3521227256510883e-05, "loss": 0.335, "step": 1516 }, { "epoch": 0.02705739663967467, "grad_norm": 0.5769949555397034, "learning_rate": 1.3530146271851588e-05, "loss": 0.2998, "step": 1517 }, { "epoch": 0.027075232761388363, "grad_norm": 0.6577006578445435, "learning_rate": 1.3539065287192296e-05, "loss": 0.3837, "step": 1518 }, { "epoch": 0.027093068883102058, "grad_norm": 0.6344306468963623, "learning_rate": 1.3547984302533e-05, "loss": 0.3558, "step": 1519 }, { "epoch": 0.027110905004815753, "grad_norm": 0.5616790056228638, "learning_rate": 1.3556903317873707e-05, "loss": 0.3214, "step": 1520 }, { "epoch": 0.027128741126529448, "grad_norm": 0.5310586094856262, "learning_rate": 1.3565822333214415e-05, "loss": 0.3476, "step": 1521 }, { "epoch": 0.027146577248243143, "grad_norm": 0.6532830595970154, "learning_rate": 1.357474134855512e-05, "loss": 0.4263, "step": 1522 }, { "epoch": 0.027164413369956838, "grad_norm": 0.51121586561203, "learning_rate": 1.3583660363895826e-05, "loss": 0.3389, "step": 1523 }, { "epoch": 0.027182249491670533, "grad_norm": 0.7261708378791809, "learning_rate": 1.3592579379236534e-05, "loss": 0.3153, "step": 1524 }, { "epoch": 0.027200085613384224, "grad_norm": 0.6957829594612122, "learning_rate": 1.3601498394577239e-05, "loss": 0.3352, "step": 1525 }, { "epoch": 0.02721792173509792, "grad_norm": 0.8378706574440002, "learning_rate": 1.3610417409917945e-05, "loss": 0.3713, "step": 1526 }, { "epoch": 0.027235757856811614, "grad_norm": 0.5874273777008057, "learning_rate": 1.3619336425258653e-05, "loss": 0.3264, "step": 1527 }, { "epoch": 0.02725359397852531, "grad_norm": 0.6011341214179993, "learning_rate": 1.3628255440599358e-05, "loss": 0.3308, "step": 1528 }, { "epoch": 0.027271430100239004, "grad_norm": 0.5864375829696655, "learning_rate": 1.3637174455940066e-05, "loss": 0.3237, "step": 1529 }, { "epoch": 0.0272892662219527, "grad_norm": 0.8733957409858704, "learning_rate": 1.3646093471280772e-05, "loss": 0.3715, "step": 1530 }, { "epoch": 0.027307102343666394, "grad_norm": 0.4712708294391632, "learning_rate": 1.3655012486621477e-05, "loss": 0.3008, "step": 1531 }, { "epoch": 0.02732493846538009, "grad_norm": 0.5903174877166748, "learning_rate": 1.3663931501962185e-05, "loss": 0.351, "step": 1532 }, { "epoch": 0.027342774587093784, "grad_norm": 0.5234763026237488, "learning_rate": 1.367285051730289e-05, "loss": 0.2853, "step": 1533 }, { "epoch": 0.02736061070880748, "grad_norm": 0.6879488229751587, "learning_rate": 1.3681769532643596e-05, "loss": 0.3956, "step": 1534 }, { "epoch": 0.02737844683052117, "grad_norm": 0.5998932123184204, "learning_rate": 1.3690688547984304e-05, "loss": 0.276, "step": 1535 }, { "epoch": 0.027396282952234865, "grad_norm": 0.6094096899032593, "learning_rate": 1.3699607563325009e-05, "loss": 0.3341, "step": 1536 }, { "epoch": 0.02741411907394856, "grad_norm": 0.6380568146705627, "learning_rate": 1.3708526578665715e-05, "loss": 0.2824, "step": 1537 }, { "epoch": 0.027431955195662255, "grad_norm": 0.6752215623855591, "learning_rate": 1.3717445594006423e-05, "loss": 0.3041, "step": 1538 }, { "epoch": 0.02744979131737595, "grad_norm": 0.7440658211708069, "learning_rate": 1.3726364609347128e-05, "loss": 0.4142, "step": 1539 }, { "epoch": 0.027467627439089645, "grad_norm": 0.6443586349487305, "learning_rate": 1.3735283624687836e-05, "loss": 0.4107, "step": 1540 }, { "epoch": 0.02748546356080334, "grad_norm": 0.648849368095398, "learning_rate": 1.3744202640028542e-05, "loss": 0.3403, "step": 1541 }, { "epoch": 0.027503299682517034, "grad_norm": 0.6645941138267517, "learning_rate": 1.3753121655369247e-05, "loss": 0.3903, "step": 1542 }, { "epoch": 0.02752113580423073, "grad_norm": 0.5554529428482056, "learning_rate": 1.3762040670709955e-05, "loss": 0.3771, "step": 1543 }, { "epoch": 0.027538971925944424, "grad_norm": 0.5533734560012817, "learning_rate": 1.3770959686050661e-05, "loss": 0.3372, "step": 1544 }, { "epoch": 0.027556808047658116, "grad_norm": 0.46411558985710144, "learning_rate": 1.3779878701391366e-05, "loss": 0.2343, "step": 1545 }, { "epoch": 0.02757464416937181, "grad_norm": 0.5473881363868713, "learning_rate": 1.3788797716732074e-05, "loss": 0.3417, "step": 1546 }, { "epoch": 0.027592480291085505, "grad_norm": 1.1106927394866943, "learning_rate": 1.3797716732072779e-05, "loss": 0.4138, "step": 1547 }, { "epoch": 0.0276103164127992, "grad_norm": 0.5700728297233582, "learning_rate": 1.3806635747413485e-05, "loss": 0.3319, "step": 1548 }, { "epoch": 0.027628152534512895, "grad_norm": 0.7012478113174438, "learning_rate": 1.3815554762754193e-05, "loss": 0.3755, "step": 1549 }, { "epoch": 0.02764598865622659, "grad_norm": 0.5423607230186462, "learning_rate": 1.3824473778094898e-05, "loss": 0.3136, "step": 1550 }, { "epoch": 0.027663824777940285, "grad_norm": 0.5336237549781799, "learning_rate": 1.3833392793435606e-05, "loss": 0.3408, "step": 1551 }, { "epoch": 0.02768166089965398, "grad_norm": 0.4705270528793335, "learning_rate": 1.3842311808776313e-05, "loss": 0.3227, "step": 1552 }, { "epoch": 0.027699497021367675, "grad_norm": 0.5503244996070862, "learning_rate": 1.3851230824117017e-05, "loss": 0.3757, "step": 1553 }, { "epoch": 0.02771733314308137, "grad_norm": 0.5783112049102783, "learning_rate": 1.3860149839457725e-05, "loss": 0.286, "step": 1554 }, { "epoch": 0.02773516926479506, "grad_norm": 0.6264418363571167, "learning_rate": 1.3869068854798432e-05, "loss": 0.3062, "step": 1555 }, { "epoch": 0.027753005386508756, "grad_norm": 0.7298933863639832, "learning_rate": 1.3877987870139136e-05, "loss": 0.3553, "step": 1556 }, { "epoch": 0.02777084150822245, "grad_norm": 0.4315170645713806, "learning_rate": 1.3886906885479844e-05, "loss": 0.2857, "step": 1557 }, { "epoch": 0.027788677629936146, "grad_norm": 0.43308988213539124, "learning_rate": 1.3895825900820549e-05, "loss": 0.3102, "step": 1558 }, { "epoch": 0.02780651375164984, "grad_norm": 0.5154407024383545, "learning_rate": 1.3904744916161256e-05, "loss": 0.2972, "step": 1559 }, { "epoch": 0.027824349873363536, "grad_norm": 0.6666773557662964, "learning_rate": 1.3913663931501964e-05, "loss": 0.4002, "step": 1560 }, { "epoch": 0.02784218599507723, "grad_norm": 0.4917806386947632, "learning_rate": 1.3922582946842668e-05, "loss": 0.3263, "step": 1561 }, { "epoch": 0.027860022116790926, "grad_norm": 0.6318265199661255, "learning_rate": 1.3931501962183376e-05, "loss": 0.4311, "step": 1562 }, { "epoch": 0.02787785823850462, "grad_norm": 0.5178397297859192, "learning_rate": 1.3940420977524083e-05, "loss": 0.3612, "step": 1563 }, { "epoch": 0.027895694360218316, "grad_norm": 1.172103762626648, "learning_rate": 1.3949339992864787e-05, "loss": 0.3614, "step": 1564 }, { "epoch": 0.027913530481932007, "grad_norm": 0.5760762691497803, "learning_rate": 1.3958259008205495e-05, "loss": 0.3181, "step": 1565 }, { "epoch": 0.027931366603645702, "grad_norm": 0.5701383352279663, "learning_rate": 1.3967178023546202e-05, "loss": 0.3881, "step": 1566 }, { "epoch": 0.027949202725359397, "grad_norm": 0.4625813066959381, "learning_rate": 1.3976097038886907e-05, "loss": 0.2817, "step": 1567 }, { "epoch": 0.027967038847073092, "grad_norm": 0.5565279126167297, "learning_rate": 1.3985016054227615e-05, "loss": 0.4024, "step": 1568 }, { "epoch": 0.027984874968786787, "grad_norm": 0.5355582237243652, "learning_rate": 1.3993935069568321e-05, "loss": 0.2787, "step": 1569 }, { "epoch": 0.02800271109050048, "grad_norm": 0.5604333281517029, "learning_rate": 1.4002854084909026e-05, "loss": 0.2843, "step": 1570 }, { "epoch": 0.028020547212214177, "grad_norm": 0.6077826023101807, "learning_rate": 1.4011773100249734e-05, "loss": 0.3973, "step": 1571 }, { "epoch": 0.02803838333392787, "grad_norm": 0.6005956530570984, "learning_rate": 1.4020692115590438e-05, "loss": 0.3106, "step": 1572 }, { "epoch": 0.028056219455641566, "grad_norm": 0.6257055997848511, "learning_rate": 1.4029611130931145e-05, "loss": 0.3178, "step": 1573 }, { "epoch": 0.02807405557735526, "grad_norm": 0.7041438817977905, "learning_rate": 1.4038530146271853e-05, "loss": 0.3679, "step": 1574 }, { "epoch": 0.028091891699068953, "grad_norm": 0.6367039084434509, "learning_rate": 1.4047449161612558e-05, "loss": 0.3408, "step": 1575 }, { "epoch": 0.028109727820782648, "grad_norm": 0.6223627924919128, "learning_rate": 1.4056368176953266e-05, "loss": 0.339, "step": 1576 }, { "epoch": 0.028127563942496343, "grad_norm": 0.7636797428131104, "learning_rate": 1.4065287192293972e-05, "loss": 0.3413, "step": 1577 }, { "epoch": 0.028145400064210038, "grad_norm": 0.5388514995574951, "learning_rate": 1.4074206207634677e-05, "loss": 0.2687, "step": 1578 }, { "epoch": 0.028163236185923732, "grad_norm": 0.4802737534046173, "learning_rate": 1.4083125222975385e-05, "loss": 0.3133, "step": 1579 }, { "epoch": 0.028181072307637427, "grad_norm": 0.5418912768363953, "learning_rate": 1.4092044238316091e-05, "loss": 0.345, "step": 1580 }, { "epoch": 0.028198908429351122, "grad_norm": 0.9693113565444946, "learning_rate": 1.4100963253656796e-05, "loss": 0.4423, "step": 1581 }, { "epoch": 0.028216744551064817, "grad_norm": 0.7385473847389221, "learning_rate": 1.4109882268997504e-05, "loss": 0.3985, "step": 1582 }, { "epoch": 0.028234580672778512, "grad_norm": 1.2881739139556885, "learning_rate": 1.4118801284338209e-05, "loss": 0.3586, "step": 1583 }, { "epoch": 0.028252416794492207, "grad_norm": 0.6537035703659058, "learning_rate": 1.4127720299678915e-05, "loss": 0.3643, "step": 1584 }, { "epoch": 0.0282702529162059, "grad_norm": 0.4728448688983917, "learning_rate": 1.4136639315019623e-05, "loss": 0.3705, "step": 1585 }, { "epoch": 0.028288089037919593, "grad_norm": 0.6739163398742676, "learning_rate": 1.4145558330360328e-05, "loss": 0.415, "step": 1586 }, { "epoch": 0.02830592515963329, "grad_norm": 0.6766735911369324, "learning_rate": 1.4154477345701036e-05, "loss": 0.3254, "step": 1587 }, { "epoch": 0.028323761281346983, "grad_norm": 0.527630090713501, "learning_rate": 1.4163396361041742e-05, "loss": 0.3266, "step": 1588 }, { "epoch": 0.028341597403060678, "grad_norm": 0.5813126564025879, "learning_rate": 1.4172315376382447e-05, "loss": 0.335, "step": 1589 }, { "epoch": 0.028359433524774373, "grad_norm": 0.5767165422439575, "learning_rate": 1.4181234391723155e-05, "loss": 0.3075, "step": 1590 }, { "epoch": 0.028377269646488068, "grad_norm": 0.5451858639717102, "learning_rate": 1.4190153407063861e-05, "loss": 0.3574, "step": 1591 }, { "epoch": 0.028395105768201763, "grad_norm": 0.5933049917221069, "learning_rate": 1.4199072422404566e-05, "loss": 0.35, "step": 1592 }, { "epoch": 0.028412941889915458, "grad_norm": 0.47686028480529785, "learning_rate": 1.4207991437745274e-05, "loss": 0.3517, "step": 1593 }, { "epoch": 0.028430778011629153, "grad_norm": 1.1062206029891968, "learning_rate": 1.421691045308598e-05, "loss": 0.372, "step": 1594 }, { "epoch": 0.028448614133342844, "grad_norm": 0.5275440812110901, "learning_rate": 1.4225829468426685e-05, "loss": 0.3307, "step": 1595 }, { "epoch": 0.02846645025505654, "grad_norm": 0.5416312217712402, "learning_rate": 1.4234748483767393e-05, "loss": 0.3859, "step": 1596 }, { "epoch": 0.028484286376770234, "grad_norm": 0.5466942191123962, "learning_rate": 1.4243667499108098e-05, "loss": 0.3436, "step": 1597 }, { "epoch": 0.02850212249848393, "grad_norm": 0.6469510793685913, "learning_rate": 1.4252586514448806e-05, "loss": 0.3629, "step": 1598 }, { "epoch": 0.028519958620197624, "grad_norm": 0.683988630771637, "learning_rate": 1.4261505529789512e-05, "loss": 0.372, "step": 1599 }, { "epoch": 0.02853779474191132, "grad_norm": 0.533053457736969, "learning_rate": 1.4270424545130217e-05, "loss": 0.2931, "step": 1600 }, { "epoch": 0.028555630863625014, "grad_norm": 0.5505555868148804, "learning_rate": 1.4279343560470925e-05, "loss": 0.357, "step": 1601 }, { "epoch": 0.02857346698533871, "grad_norm": 0.5483716726303101, "learning_rate": 1.4288262575811632e-05, "loss": 0.364, "step": 1602 }, { "epoch": 0.028591303107052404, "grad_norm": 0.6058002710342407, "learning_rate": 1.4297181591152336e-05, "loss": 0.3096, "step": 1603 }, { "epoch": 0.0286091392287661, "grad_norm": 0.9553171992301941, "learning_rate": 1.4306100606493044e-05, "loss": 0.3439, "step": 1604 }, { "epoch": 0.02862697535047979, "grad_norm": 0.4612777829170227, "learning_rate": 1.431501962183375e-05, "loss": 0.3047, "step": 1605 }, { "epoch": 0.028644811472193485, "grad_norm": 0.7179811596870422, "learning_rate": 1.4323938637174455e-05, "loss": 0.3473, "step": 1606 }, { "epoch": 0.02866264759390718, "grad_norm": 0.7585439682006836, "learning_rate": 1.4332857652515163e-05, "loss": 0.3683, "step": 1607 }, { "epoch": 0.028680483715620875, "grad_norm": 0.5608382821083069, "learning_rate": 1.434177666785587e-05, "loss": 0.2939, "step": 1608 }, { "epoch": 0.02869831983733457, "grad_norm": 0.8679600358009338, "learning_rate": 1.4350695683196575e-05, "loss": 0.3439, "step": 1609 }, { "epoch": 0.028716155959048265, "grad_norm": 0.582264244556427, "learning_rate": 1.4359614698537283e-05, "loss": 0.3402, "step": 1610 }, { "epoch": 0.02873399208076196, "grad_norm": 0.5689700245857239, "learning_rate": 1.4368533713877987e-05, "loss": 0.3582, "step": 1611 }, { "epoch": 0.028751828202475654, "grad_norm": 0.5647953152656555, "learning_rate": 1.4377452729218695e-05, "loss": 0.3006, "step": 1612 }, { "epoch": 0.02876966432418935, "grad_norm": 0.6635328531265259, "learning_rate": 1.4386371744559402e-05, "loss": 0.3907, "step": 1613 }, { "epoch": 0.028787500445903044, "grad_norm": 0.45135289430618286, "learning_rate": 1.4395290759900106e-05, "loss": 0.3128, "step": 1614 }, { "epoch": 0.028805336567616736, "grad_norm": 0.5480947494506836, "learning_rate": 1.4404209775240815e-05, "loss": 0.3452, "step": 1615 }, { "epoch": 0.02882317268933043, "grad_norm": 0.5812448859214783, "learning_rate": 1.4413128790581521e-05, "loss": 0.3827, "step": 1616 }, { "epoch": 0.028841008811044126, "grad_norm": 0.4756130874156952, "learning_rate": 1.4422047805922226e-05, "loss": 0.2957, "step": 1617 }, { "epoch": 0.02885884493275782, "grad_norm": 0.614660382270813, "learning_rate": 1.4430966821262934e-05, "loss": 0.3268, "step": 1618 }, { "epoch": 0.028876681054471515, "grad_norm": 0.5634617805480957, "learning_rate": 1.443988583660364e-05, "loss": 0.3221, "step": 1619 }, { "epoch": 0.02889451717618521, "grad_norm": 0.5246202349662781, "learning_rate": 1.4448804851944345e-05, "loss": 0.3003, "step": 1620 }, { "epoch": 0.028912353297898905, "grad_norm": 0.5220772624015808, "learning_rate": 1.4457723867285053e-05, "loss": 0.3124, "step": 1621 }, { "epoch": 0.0289301894196126, "grad_norm": 0.5555007457733154, "learning_rate": 1.4466642882625757e-05, "loss": 0.321, "step": 1622 }, { "epoch": 0.028948025541326295, "grad_norm": 0.45659974217414856, "learning_rate": 1.4475561897966467e-05, "loss": 0.3601, "step": 1623 }, { "epoch": 0.02896586166303999, "grad_norm": 0.658943772315979, "learning_rate": 1.4484480913307172e-05, "loss": 0.3118, "step": 1624 }, { "epoch": 0.028983697784753685, "grad_norm": 0.59748375415802, "learning_rate": 1.4493399928647877e-05, "loss": 0.2728, "step": 1625 }, { "epoch": 0.029001533906467376, "grad_norm": 0.7809659242630005, "learning_rate": 1.4502318943988585e-05, "loss": 0.3662, "step": 1626 }, { "epoch": 0.02901937002818107, "grad_norm": 0.8069256544113159, "learning_rate": 1.4511237959329291e-05, "loss": 0.2995, "step": 1627 }, { "epoch": 0.029037206149894766, "grad_norm": 0.4573381543159485, "learning_rate": 1.4520156974669996e-05, "loss": 0.3255, "step": 1628 }, { "epoch": 0.02905504227160846, "grad_norm": 0.7915101647377014, "learning_rate": 1.4529075990010704e-05, "loss": 0.3378, "step": 1629 }, { "epoch": 0.029072878393322156, "grad_norm": 0.7040252685546875, "learning_rate": 1.453799500535141e-05, "loss": 0.3905, "step": 1630 }, { "epoch": 0.02909071451503585, "grad_norm": 0.5259699821472168, "learning_rate": 1.4546914020692115e-05, "loss": 0.3348, "step": 1631 }, { "epoch": 0.029108550636749546, "grad_norm": 0.5336809158325195, "learning_rate": 1.4555833036032823e-05, "loss": 0.2882, "step": 1632 }, { "epoch": 0.02912638675846324, "grad_norm": 0.6732202172279358, "learning_rate": 1.456475205137353e-05, "loss": 0.3696, "step": 1633 }, { "epoch": 0.029144222880176936, "grad_norm": 0.6707174777984619, "learning_rate": 1.4573671066714237e-05, "loss": 0.3388, "step": 1634 }, { "epoch": 0.02916205900189063, "grad_norm": 0.6094953417778015, "learning_rate": 1.4582590082054942e-05, "loss": 0.3194, "step": 1635 }, { "epoch": 0.029179895123604322, "grad_norm": 0.7700886726379395, "learning_rate": 1.4591509097395647e-05, "loss": 0.4018, "step": 1636 }, { "epoch": 0.029197731245318017, "grad_norm": 0.5840463042259216, "learning_rate": 1.4600428112736355e-05, "loss": 0.3459, "step": 1637 }, { "epoch": 0.029215567367031712, "grad_norm": 0.7443630695343018, "learning_rate": 1.4609347128077061e-05, "loss": 0.3808, "step": 1638 }, { "epoch": 0.029233403488745407, "grad_norm": 0.5667780637741089, "learning_rate": 1.4618266143417766e-05, "loss": 0.2996, "step": 1639 }, { "epoch": 0.0292512396104591, "grad_norm": 0.48666754364967346, "learning_rate": 1.4627185158758474e-05, "loss": 0.3225, "step": 1640 }, { "epoch": 0.029269075732172797, "grad_norm": 0.5524598360061646, "learning_rate": 1.463610417409918e-05, "loss": 0.3021, "step": 1641 }, { "epoch": 0.02928691185388649, "grad_norm": 0.6522731781005859, "learning_rate": 1.4645023189439885e-05, "loss": 0.4239, "step": 1642 }, { "epoch": 0.029304747975600186, "grad_norm": 0.5739700198173523, "learning_rate": 1.4653942204780593e-05, "loss": 0.3228, "step": 1643 }, { "epoch": 0.02932258409731388, "grad_norm": 0.47902143001556396, "learning_rate": 1.46628612201213e-05, "loss": 0.3283, "step": 1644 }, { "epoch": 0.029340420219027576, "grad_norm": 0.6247063279151917, "learning_rate": 1.4671780235462008e-05, "loss": 0.3888, "step": 1645 }, { "epoch": 0.029358256340741268, "grad_norm": 0.6175928115844727, "learning_rate": 1.4680699250802712e-05, "loss": 0.4071, "step": 1646 }, { "epoch": 0.029376092462454963, "grad_norm": 0.6941092610359192, "learning_rate": 1.4689618266143419e-05, "loss": 0.317, "step": 1647 }, { "epoch": 0.029393928584168658, "grad_norm": 0.6310111880302429, "learning_rate": 1.4698537281484127e-05, "loss": 0.375, "step": 1648 }, { "epoch": 0.029411764705882353, "grad_norm": 0.7415862083435059, "learning_rate": 1.4707456296824831e-05, "loss": 0.3676, "step": 1649 }, { "epoch": 0.029429600827596047, "grad_norm": 0.7599250078201294, "learning_rate": 1.4716375312165536e-05, "loss": 0.3719, "step": 1650 }, { "epoch": 0.029447436949309742, "grad_norm": 0.5519864559173584, "learning_rate": 1.4725294327506244e-05, "loss": 0.3253, "step": 1651 }, { "epoch": 0.029465273071023437, "grad_norm": 0.6969557404518127, "learning_rate": 1.473421334284695e-05, "loss": 0.3283, "step": 1652 }, { "epoch": 0.029483109192737132, "grad_norm": 0.7012293934822083, "learning_rate": 1.4743132358187655e-05, "loss": 0.3987, "step": 1653 }, { "epoch": 0.029500945314450827, "grad_norm": 0.5322942137718201, "learning_rate": 1.4752051373528363e-05, "loss": 0.3612, "step": 1654 }, { "epoch": 0.029518781436164522, "grad_norm": 0.5493043065071106, "learning_rate": 1.476097038886907e-05, "loss": 0.3537, "step": 1655 }, { "epoch": 0.029536617557878213, "grad_norm": 0.5930426120758057, "learning_rate": 1.4769889404209774e-05, "loss": 0.3685, "step": 1656 }, { "epoch": 0.02955445367959191, "grad_norm": 0.5160814523696899, "learning_rate": 1.4778808419550482e-05, "loss": 0.353, "step": 1657 }, { "epoch": 0.029572289801305603, "grad_norm": 0.9225820899009705, "learning_rate": 1.4787727434891189e-05, "loss": 0.3089, "step": 1658 }, { "epoch": 0.029590125923019298, "grad_norm": 0.6422647833824158, "learning_rate": 1.4796646450231897e-05, "loss": 0.3474, "step": 1659 }, { "epoch": 0.029607962044732993, "grad_norm": 0.5172878503799438, "learning_rate": 1.4805565465572602e-05, "loss": 0.33, "step": 1660 }, { "epoch": 0.029625798166446688, "grad_norm": 0.7302867770195007, "learning_rate": 1.4814484480913306e-05, "loss": 0.3845, "step": 1661 }, { "epoch": 0.029643634288160383, "grad_norm": 0.5681297183036804, "learning_rate": 1.4823403496254016e-05, "loss": 0.3181, "step": 1662 }, { "epoch": 0.029661470409874078, "grad_norm": 0.5470948219299316, "learning_rate": 1.483232251159472e-05, "loss": 0.2965, "step": 1663 }, { "epoch": 0.029679306531587773, "grad_norm": 0.9585771560668945, "learning_rate": 1.4841241526935425e-05, "loss": 0.3601, "step": 1664 }, { "epoch": 0.029697142653301468, "grad_norm": 0.46626949310302734, "learning_rate": 1.4850160542276134e-05, "loss": 0.2903, "step": 1665 }, { "epoch": 0.02971497877501516, "grad_norm": 0.5196533203125, "learning_rate": 1.485907955761684e-05, "loss": 0.3026, "step": 1666 }, { "epoch": 0.029732814896728854, "grad_norm": 0.6445388793945312, "learning_rate": 1.4867998572957545e-05, "loss": 0.3207, "step": 1667 }, { "epoch": 0.02975065101844255, "grad_norm": 0.49068230390548706, "learning_rate": 1.4876917588298253e-05, "loss": 0.3813, "step": 1668 }, { "epoch": 0.029768487140156244, "grad_norm": 0.5983552932739258, "learning_rate": 1.4885836603638959e-05, "loss": 0.3986, "step": 1669 }, { "epoch": 0.02978632326186994, "grad_norm": 0.6433863043785095, "learning_rate": 1.4894755618979667e-05, "loss": 0.3469, "step": 1670 }, { "epoch": 0.029804159383583634, "grad_norm": 0.7386375069618225, "learning_rate": 1.4903674634320372e-05, "loss": 0.3405, "step": 1671 }, { "epoch": 0.02982199550529733, "grad_norm": 0.7433804869651794, "learning_rate": 1.4912593649661078e-05, "loss": 0.3014, "step": 1672 }, { "epoch": 0.029839831627011024, "grad_norm": 0.6054247617721558, "learning_rate": 1.4921512665001786e-05, "loss": 0.3161, "step": 1673 }, { "epoch": 0.02985766774872472, "grad_norm": 0.7582512497901917, "learning_rate": 1.4930431680342491e-05, "loss": 0.3116, "step": 1674 }, { "epoch": 0.029875503870438413, "grad_norm": 0.6766425967216492, "learning_rate": 1.4939350695683196e-05, "loss": 0.3331, "step": 1675 }, { "epoch": 0.029893339992152105, "grad_norm": 0.5102553963661194, "learning_rate": 1.4948269711023904e-05, "loss": 0.2678, "step": 1676 }, { "epoch": 0.0299111761138658, "grad_norm": 0.4020669758319855, "learning_rate": 1.495718872636461e-05, "loss": 0.2832, "step": 1677 }, { "epoch": 0.029929012235579495, "grad_norm": 0.566681444644928, "learning_rate": 1.4966107741705315e-05, "loss": 0.3955, "step": 1678 }, { "epoch": 0.02994684835729319, "grad_norm": 0.6386412382125854, "learning_rate": 1.4975026757046023e-05, "loss": 0.3676, "step": 1679 }, { "epoch": 0.029964684479006885, "grad_norm": 0.6427171230316162, "learning_rate": 1.498394577238673e-05, "loss": 0.3432, "step": 1680 }, { "epoch": 0.02998252060072058, "grad_norm": 0.6253827810287476, "learning_rate": 1.4992864787727437e-05, "loss": 0.3553, "step": 1681 }, { "epoch": 0.030000356722434274, "grad_norm": 0.44797033071517944, "learning_rate": 1.5001783803068142e-05, "loss": 0.3329, "step": 1682 }, { "epoch": 0.03001819284414797, "grad_norm": 0.5939450263977051, "learning_rate": 1.5010702818408848e-05, "loss": 0.3701, "step": 1683 }, { "epoch": 0.030036028965861664, "grad_norm": 0.6008427739143372, "learning_rate": 1.5019621833749556e-05, "loss": 0.3999, "step": 1684 }, { "epoch": 0.03005386508757536, "grad_norm": 0.5785388350486755, "learning_rate": 1.5028540849090261e-05, "loss": 0.344, "step": 1685 }, { "epoch": 0.03007170120928905, "grad_norm": 0.4829639196395874, "learning_rate": 1.5037459864430966e-05, "loss": 0.3043, "step": 1686 }, { "epoch": 0.030089537331002746, "grad_norm": 0.46917492151260376, "learning_rate": 1.5046378879771676e-05, "loss": 0.28, "step": 1687 }, { "epoch": 0.03010737345271644, "grad_norm": 0.6790163516998291, "learning_rate": 1.505529789511238e-05, "loss": 0.3928, "step": 1688 }, { "epoch": 0.030125209574430135, "grad_norm": 1.2584736347198486, "learning_rate": 1.5064216910453085e-05, "loss": 0.3235, "step": 1689 }, { "epoch": 0.03014304569614383, "grad_norm": 0.6248006820678711, "learning_rate": 1.5073135925793793e-05, "loss": 0.4477, "step": 1690 }, { "epoch": 0.030160881817857525, "grad_norm": 0.9073899388313293, "learning_rate": 1.50820549411345e-05, "loss": 0.3809, "step": 1691 }, { "epoch": 0.03017871793957122, "grad_norm": 0.4276426136493683, "learning_rate": 1.5090973956475204e-05, "loss": 0.268, "step": 1692 }, { "epoch": 0.030196554061284915, "grad_norm": 0.5526872873306274, "learning_rate": 1.5099892971815912e-05, "loss": 0.347, "step": 1693 }, { "epoch": 0.03021439018299861, "grad_norm": 0.6889521479606628, "learning_rate": 1.5108811987156619e-05, "loss": 0.3236, "step": 1694 }, { "epoch": 0.030232226304712305, "grad_norm": 0.5612066984176636, "learning_rate": 1.5117731002497327e-05, "loss": 0.3442, "step": 1695 }, { "epoch": 0.030250062426425996, "grad_norm": 0.6143417358398438, "learning_rate": 1.5126650017838031e-05, "loss": 0.3385, "step": 1696 }, { "epoch": 0.03026789854813969, "grad_norm": 0.5099233984947205, "learning_rate": 1.5135569033178738e-05, "loss": 0.2929, "step": 1697 }, { "epoch": 0.030285734669853386, "grad_norm": 0.5673273801803589, "learning_rate": 1.5144488048519446e-05, "loss": 0.3271, "step": 1698 }, { "epoch": 0.03030357079156708, "grad_norm": 0.6243776679039001, "learning_rate": 1.515340706386015e-05, "loss": 0.3393, "step": 1699 }, { "epoch": 0.030321406913280776, "grad_norm": 0.44045859575271606, "learning_rate": 1.5162326079200855e-05, "loss": 0.3336, "step": 1700 }, { "epoch": 0.03033924303499447, "grad_norm": 0.7044060230255127, "learning_rate": 1.5171245094541563e-05, "loss": 0.311, "step": 1701 }, { "epoch": 0.030357079156708166, "grad_norm": 0.7421000003814697, "learning_rate": 1.518016410988227e-05, "loss": 0.3022, "step": 1702 }, { "epoch": 0.03037491527842186, "grad_norm": 0.7464106678962708, "learning_rate": 1.5189083125222974e-05, "loss": 0.3607, "step": 1703 }, { "epoch": 0.030392751400135556, "grad_norm": 0.547999918460846, "learning_rate": 1.5198002140563682e-05, "loss": 0.331, "step": 1704 }, { "epoch": 0.03041058752184925, "grad_norm": 0.747925341129303, "learning_rate": 1.5206921155904389e-05, "loss": 0.3451, "step": 1705 }, { "epoch": 0.030428423643562942, "grad_norm": 0.5418623089790344, "learning_rate": 1.5215840171245097e-05, "loss": 0.3059, "step": 1706 }, { "epoch": 0.030446259765276637, "grad_norm": 0.7048314213752747, "learning_rate": 1.5224759186585802e-05, "loss": 0.4262, "step": 1707 }, { "epoch": 0.030464095886990332, "grad_norm": 0.7050665616989136, "learning_rate": 1.5233678201926508e-05, "loss": 0.3639, "step": 1708 }, { "epoch": 0.030481932008704027, "grad_norm": 0.6242355108261108, "learning_rate": 1.5242597217267216e-05, "loss": 0.3864, "step": 1709 }, { "epoch": 0.030499768130417722, "grad_norm": 0.5299994945526123, "learning_rate": 1.525151623260792e-05, "loss": 0.3267, "step": 1710 }, { "epoch": 0.030517604252131417, "grad_norm": 0.4984886646270752, "learning_rate": 1.5260435247948627e-05, "loss": 0.3338, "step": 1711 }, { "epoch": 0.03053544037384511, "grad_norm": 0.6587681174278259, "learning_rate": 1.5269354263289333e-05, "loss": 0.3578, "step": 1712 }, { "epoch": 0.030553276495558807, "grad_norm": 0.5379131436347961, "learning_rate": 1.527827327863004e-05, "loss": 0.3552, "step": 1713 }, { "epoch": 0.0305711126172725, "grad_norm": 0.5867990851402283, "learning_rate": 1.5287192293970746e-05, "loss": 0.3984, "step": 1714 }, { "epoch": 0.030588948738986196, "grad_norm": 0.5907850861549377, "learning_rate": 1.5296111309311453e-05, "loss": 0.3518, "step": 1715 }, { "epoch": 0.030606784860699888, "grad_norm": 0.467940092086792, "learning_rate": 1.530503032465216e-05, "loss": 0.2849, "step": 1716 }, { "epoch": 0.030624620982413583, "grad_norm": 0.4978936016559601, "learning_rate": 1.5313949339992865e-05, "loss": 0.3223, "step": 1717 }, { "epoch": 0.030642457104127278, "grad_norm": 0.40754082798957825, "learning_rate": 1.532286835533357e-05, "loss": 0.293, "step": 1718 }, { "epoch": 0.030660293225840973, "grad_norm": 0.41278594732284546, "learning_rate": 1.5331787370674278e-05, "loss": 0.3247, "step": 1719 }, { "epoch": 0.030678129347554667, "grad_norm": 0.5860134363174438, "learning_rate": 1.5340706386014984e-05, "loss": 0.3044, "step": 1720 }, { "epoch": 0.030695965469268362, "grad_norm": 0.4603513479232788, "learning_rate": 1.534962540135569e-05, "loss": 0.2784, "step": 1721 }, { "epoch": 0.030713801590982057, "grad_norm": 0.5335546135902405, "learning_rate": 1.5358544416696397e-05, "loss": 0.2867, "step": 1722 }, { "epoch": 0.030731637712695752, "grad_norm": 0.8522798418998718, "learning_rate": 1.5367463432037104e-05, "loss": 0.353, "step": 1723 }, { "epoch": 0.030749473834409447, "grad_norm": 0.6604307889938354, "learning_rate": 1.537638244737781e-05, "loss": 0.3571, "step": 1724 }, { "epoch": 0.030767309956123142, "grad_norm": 0.5790998935699463, "learning_rate": 1.5385301462718516e-05, "loss": 0.3584, "step": 1725 }, { "epoch": 0.030785146077836834, "grad_norm": 0.728276252746582, "learning_rate": 1.5394220478059223e-05, "loss": 0.371, "step": 1726 }, { "epoch": 0.03080298219955053, "grad_norm": 0.4201717674732208, "learning_rate": 1.540313949339993e-05, "loss": 0.2946, "step": 1727 }, { "epoch": 0.030820818321264223, "grad_norm": 0.5037418603897095, "learning_rate": 1.5412058508740635e-05, "loss": 0.3255, "step": 1728 }, { "epoch": 0.03083865444297792, "grad_norm": 0.5051530599594116, "learning_rate": 1.5420977524081342e-05, "loss": 0.3207, "step": 1729 }, { "epoch": 0.030856490564691613, "grad_norm": 0.5607863664627075, "learning_rate": 1.5429896539422048e-05, "loss": 0.3102, "step": 1730 }, { "epoch": 0.030874326686405308, "grad_norm": 0.5118757486343384, "learning_rate": 1.5438815554762755e-05, "loss": 0.3617, "step": 1731 }, { "epoch": 0.030892162808119003, "grad_norm": 0.8359280228614807, "learning_rate": 1.544773457010346e-05, "loss": 0.31, "step": 1732 }, { "epoch": 0.030909998929832698, "grad_norm": 0.7327261567115784, "learning_rate": 1.5456653585444167e-05, "loss": 0.3672, "step": 1733 }, { "epoch": 0.030927835051546393, "grad_norm": 1.0370457172393799, "learning_rate": 1.5465572600784874e-05, "loss": 0.3358, "step": 1734 }, { "epoch": 0.030945671173260088, "grad_norm": 0.5791711211204529, "learning_rate": 1.547449161612558e-05, "loss": 0.3458, "step": 1735 }, { "epoch": 0.03096350729497378, "grad_norm": 1.1872961521148682, "learning_rate": 1.5483410631466287e-05, "loss": 0.3646, "step": 1736 }, { "epoch": 0.030981343416687474, "grad_norm": 0.9485112428665161, "learning_rate": 1.5492329646806993e-05, "loss": 0.3421, "step": 1737 }, { "epoch": 0.03099917953840117, "grad_norm": 0.8620749711990356, "learning_rate": 1.55012486621477e-05, "loss": 0.3419, "step": 1738 }, { "epoch": 0.031017015660114864, "grad_norm": 0.7116780281066895, "learning_rate": 1.5510167677488406e-05, "loss": 0.4405, "step": 1739 }, { "epoch": 0.03103485178182856, "grad_norm": 0.720676600933075, "learning_rate": 1.5519086692829112e-05, "loss": 0.3119, "step": 1740 }, { "epoch": 0.031052687903542254, "grad_norm": 0.4957166612148285, "learning_rate": 1.552800570816982e-05, "loss": 0.3314, "step": 1741 }, { "epoch": 0.03107052402525595, "grad_norm": 0.5247991681098938, "learning_rate": 1.5536924723510525e-05, "loss": 0.324, "step": 1742 }, { "epoch": 0.031088360146969644, "grad_norm": 0.6245825886726379, "learning_rate": 1.554584373885123e-05, "loss": 0.3569, "step": 1743 }, { "epoch": 0.03110619626868334, "grad_norm": 0.47443920373916626, "learning_rate": 1.5554762754191938e-05, "loss": 0.3254, "step": 1744 }, { "epoch": 0.031124032390397034, "grad_norm": 0.532443642616272, "learning_rate": 1.5563681769532644e-05, "loss": 0.2956, "step": 1745 }, { "epoch": 0.031141868512110725, "grad_norm": 0.6366216540336609, "learning_rate": 1.557260078487335e-05, "loss": 0.3432, "step": 1746 }, { "epoch": 0.03115970463382442, "grad_norm": 0.4841454029083252, "learning_rate": 1.5581519800214057e-05, "loss": 0.3041, "step": 1747 }, { "epoch": 0.031177540755538115, "grad_norm": 0.5549114942550659, "learning_rate": 1.5590438815554763e-05, "loss": 0.3335, "step": 1748 }, { "epoch": 0.03119537687725181, "grad_norm": 0.5786772966384888, "learning_rate": 1.559935783089547e-05, "loss": 0.2767, "step": 1749 }, { "epoch": 0.031213212998965505, "grad_norm": 0.4859549403190613, "learning_rate": 1.5608276846236176e-05, "loss": 0.3186, "step": 1750 }, { "epoch": 0.0312310491206792, "grad_norm": 0.6097210645675659, "learning_rate": 1.5617195861576882e-05, "loss": 0.3325, "step": 1751 }, { "epoch": 0.031248885242392895, "grad_norm": 0.5827273726463318, "learning_rate": 1.562611487691759e-05, "loss": 0.2936, "step": 1752 }, { "epoch": 0.03126672136410659, "grad_norm": 0.9926132559776306, "learning_rate": 1.56350338922583e-05, "loss": 0.2715, "step": 1753 }, { "epoch": 0.031284557485820284, "grad_norm": 0.6341984868049622, "learning_rate": 1.5643952907599e-05, "loss": 0.3361, "step": 1754 }, { "epoch": 0.03130239360753398, "grad_norm": 0.5048485994338989, "learning_rate": 1.5652871922939708e-05, "loss": 0.2997, "step": 1755 }, { "epoch": 0.031320229729247674, "grad_norm": 0.9564552307128906, "learning_rate": 1.5661790938280414e-05, "loss": 0.3162, "step": 1756 }, { "epoch": 0.03133806585096137, "grad_norm": 0.5259284973144531, "learning_rate": 1.567070995362112e-05, "loss": 0.3426, "step": 1757 }, { "epoch": 0.031355901972675064, "grad_norm": 0.4777657389640808, "learning_rate": 1.5679628968961827e-05, "loss": 0.3213, "step": 1758 }, { "epoch": 0.03137373809438876, "grad_norm": 0.4671401381492615, "learning_rate": 1.5688547984302533e-05, "loss": 0.3262, "step": 1759 }, { "epoch": 0.031391574216102454, "grad_norm": 0.6501715183258057, "learning_rate": 1.569746699964324e-05, "loss": 0.3214, "step": 1760 }, { "epoch": 0.03140941033781614, "grad_norm": 0.6746953129768372, "learning_rate": 1.5706386014983946e-05, "loss": 0.3335, "step": 1761 }, { "epoch": 0.03142724645952984, "grad_norm": 0.47077369689941406, "learning_rate": 1.5715305030324652e-05, "loss": 0.3455, "step": 1762 }, { "epoch": 0.03144508258124353, "grad_norm": 0.5709850192070007, "learning_rate": 1.572422404566536e-05, "loss": 0.2756, "step": 1763 }, { "epoch": 0.03146291870295723, "grad_norm": 0.6588783264160156, "learning_rate": 1.573314306100607e-05, "loss": 0.3302, "step": 1764 }, { "epoch": 0.03148075482467092, "grad_norm": 0.519670307636261, "learning_rate": 1.574206207634677e-05, "loss": 0.3091, "step": 1765 }, { "epoch": 0.031498590946384616, "grad_norm": 0.7215790748596191, "learning_rate": 1.5750981091687478e-05, "loss": 0.3603, "step": 1766 }, { "epoch": 0.03151642706809831, "grad_norm": 0.565071702003479, "learning_rate": 1.5759900107028184e-05, "loss": 0.3369, "step": 1767 }, { "epoch": 0.031534263189812006, "grad_norm": 0.5075246095657349, "learning_rate": 1.576881912236889e-05, "loss": 0.3287, "step": 1768 }, { "epoch": 0.0315520993115257, "grad_norm": 0.6114002466201782, "learning_rate": 1.5777738137709597e-05, "loss": 0.3298, "step": 1769 }, { "epoch": 0.031569935433239396, "grad_norm": 0.6642633676528931, "learning_rate": 1.5786657153050303e-05, "loss": 0.3377, "step": 1770 }, { "epoch": 0.03158777155495309, "grad_norm": 0.5811492800712585, "learning_rate": 1.579557616839101e-05, "loss": 0.3749, "step": 1771 }, { "epoch": 0.031605607676666786, "grad_norm": 0.708658754825592, "learning_rate": 1.5804495183731716e-05, "loss": 0.324, "step": 1772 }, { "epoch": 0.03162344379838048, "grad_norm": 0.4351314306259155, "learning_rate": 1.5813414199072423e-05, "loss": 0.302, "step": 1773 }, { "epoch": 0.031641279920094176, "grad_norm": 0.6305572986602783, "learning_rate": 1.582233321441313e-05, "loss": 0.3818, "step": 1774 }, { "epoch": 0.03165911604180787, "grad_norm": 0.5669882297515869, "learning_rate": 1.5831252229753835e-05, "loss": 0.263, "step": 1775 }, { "epoch": 0.031676952163521566, "grad_norm": 0.5446153283119202, "learning_rate": 1.5840171245094542e-05, "loss": 0.3097, "step": 1776 }, { "epoch": 0.03169478828523526, "grad_norm": 0.7341017723083496, "learning_rate": 1.5849090260435248e-05, "loss": 0.4043, "step": 1777 }, { "epoch": 0.031712624406948955, "grad_norm": 0.7255344986915588, "learning_rate": 1.5858009275775958e-05, "loss": 0.3509, "step": 1778 }, { "epoch": 0.03173046052866265, "grad_norm": 0.6021090745925903, "learning_rate": 1.586692829111666e-05, "loss": 0.3776, "step": 1779 }, { "epoch": 0.031748296650376345, "grad_norm": 0.5457401871681213, "learning_rate": 1.5875847306457367e-05, "loss": 0.3262, "step": 1780 }, { "epoch": 0.03176613277209003, "grad_norm": 0.6532450914382935, "learning_rate": 1.5884766321798074e-05, "loss": 0.3474, "step": 1781 }, { "epoch": 0.03178396889380373, "grad_norm": 0.5987659692764282, "learning_rate": 1.589368533713878e-05, "loss": 0.3386, "step": 1782 }, { "epoch": 0.03180180501551742, "grad_norm": 0.5551632642745972, "learning_rate": 1.5902604352479486e-05, "loss": 0.3638, "step": 1783 }, { "epoch": 0.03181964113723112, "grad_norm": 0.42713162302970886, "learning_rate": 1.5911523367820193e-05, "loss": 0.2944, "step": 1784 }, { "epoch": 0.03183747725894481, "grad_norm": 0.6613355875015259, "learning_rate": 1.59204423831609e-05, "loss": 0.3616, "step": 1785 }, { "epoch": 0.03185531338065851, "grad_norm": 0.6508033275604248, "learning_rate": 1.5929361398501606e-05, "loss": 0.351, "step": 1786 }, { "epoch": 0.0318731495023722, "grad_norm": 0.4862409830093384, "learning_rate": 1.5938280413842312e-05, "loss": 0.2744, "step": 1787 }, { "epoch": 0.0318909856240859, "grad_norm": 0.5413964986801147, "learning_rate": 1.594719942918302e-05, "loss": 0.3702, "step": 1788 }, { "epoch": 0.03190882174579959, "grad_norm": 0.5709779858589172, "learning_rate": 1.5956118444523728e-05, "loss": 0.3765, "step": 1789 }, { "epoch": 0.03192665786751329, "grad_norm": 0.5577303171157837, "learning_rate": 1.596503745986443e-05, "loss": 0.3755, "step": 1790 }, { "epoch": 0.03194449398922698, "grad_norm": 0.653723955154419, "learning_rate": 1.5973956475205137e-05, "loss": 0.3781, "step": 1791 }, { "epoch": 0.03196233011094068, "grad_norm": 0.6088376045227051, "learning_rate": 1.5982875490545847e-05, "loss": 0.3763, "step": 1792 }, { "epoch": 0.03198016623265437, "grad_norm": 0.4915998578071594, "learning_rate": 1.599179450588655e-05, "loss": 0.306, "step": 1793 }, { "epoch": 0.03199800235436807, "grad_norm": 0.6609599590301514, "learning_rate": 1.6000713521227257e-05, "loss": 0.2767, "step": 1794 }, { "epoch": 0.03201583847608176, "grad_norm": 0.7062175273895264, "learning_rate": 1.6009632536567963e-05, "loss": 0.3303, "step": 1795 }, { "epoch": 0.03203367459779546, "grad_norm": 0.6269365549087524, "learning_rate": 1.601855155190867e-05, "loss": 0.4083, "step": 1796 }, { "epoch": 0.03205151071950915, "grad_norm": 0.4888173043727875, "learning_rate": 1.6027470567249376e-05, "loss": 0.3019, "step": 1797 }, { "epoch": 0.03206934684122285, "grad_norm": 0.5467481017112732, "learning_rate": 1.6036389582590082e-05, "loss": 0.2895, "step": 1798 }, { "epoch": 0.03208718296293654, "grad_norm": 0.6655282378196716, "learning_rate": 1.604530859793079e-05, "loss": 0.3434, "step": 1799 }, { "epoch": 0.03210501908465024, "grad_norm": 0.5917126536369324, "learning_rate": 1.6054227613271498e-05, "loss": 0.3853, "step": 1800 }, { "epoch": 0.03212285520636393, "grad_norm": 0.524863600730896, "learning_rate": 1.60631466286122e-05, "loss": 0.3772, "step": 1801 }, { "epoch": 0.03214069132807762, "grad_norm": 0.8061359524726868, "learning_rate": 1.6072065643952908e-05, "loss": 0.4004, "step": 1802 }, { "epoch": 0.032158527449791315, "grad_norm": 0.4886523187160492, "learning_rate": 1.6080984659293617e-05, "loss": 0.3083, "step": 1803 }, { "epoch": 0.03217636357150501, "grad_norm": 0.6840428113937378, "learning_rate": 1.608990367463432e-05, "loss": 0.3674, "step": 1804 }, { "epoch": 0.032194199693218704, "grad_norm": 0.6019099950790405, "learning_rate": 1.6098822689975027e-05, "loss": 0.342, "step": 1805 }, { "epoch": 0.0322120358149324, "grad_norm": 0.5354987382888794, "learning_rate": 1.6107741705315733e-05, "loss": 0.3151, "step": 1806 }, { "epoch": 0.032229871936646094, "grad_norm": 0.4807489812374115, "learning_rate": 1.611666072065644e-05, "loss": 0.3347, "step": 1807 }, { "epoch": 0.03224770805835979, "grad_norm": 0.7448999881744385, "learning_rate": 1.6125579735997146e-05, "loss": 0.3125, "step": 1808 }, { "epoch": 0.032265544180073484, "grad_norm": 0.6970858573913574, "learning_rate": 1.6134498751337852e-05, "loss": 0.3795, "step": 1809 }, { "epoch": 0.03228338030178718, "grad_norm": 0.6009823679924011, "learning_rate": 1.614341776667856e-05, "loss": 0.3251, "step": 1810 }, { "epoch": 0.032301216423500874, "grad_norm": 0.7352908849716187, "learning_rate": 1.6152336782019265e-05, "loss": 0.3759, "step": 1811 }, { "epoch": 0.03231905254521457, "grad_norm": 0.5852219462394714, "learning_rate": 1.616125579735997e-05, "loss": 0.3477, "step": 1812 }, { "epoch": 0.032336888666928264, "grad_norm": 0.6261390447616577, "learning_rate": 1.6170174812700678e-05, "loss": 0.2943, "step": 1813 }, { "epoch": 0.03235472478864196, "grad_norm": 0.6401010155677795, "learning_rate": 1.6179093828041388e-05, "loss": 0.3198, "step": 1814 }, { "epoch": 0.032372560910355654, "grad_norm": 0.6377858519554138, "learning_rate": 1.618801284338209e-05, "loss": 0.3611, "step": 1815 }, { "epoch": 0.03239039703206935, "grad_norm": 0.507521390914917, "learning_rate": 1.6196931858722797e-05, "loss": 0.305, "step": 1816 }, { "epoch": 0.03240823315378304, "grad_norm": 0.675403892993927, "learning_rate": 1.6205850874063507e-05, "loss": 0.3398, "step": 1817 }, { "epoch": 0.03242606927549674, "grad_norm": 0.591806173324585, "learning_rate": 1.621476988940421e-05, "loss": 0.3112, "step": 1818 }, { "epoch": 0.03244390539721043, "grad_norm": 0.5430218577384949, "learning_rate": 1.6223688904744916e-05, "loss": 0.3599, "step": 1819 }, { "epoch": 0.03246174151892413, "grad_norm": 0.5644108653068542, "learning_rate": 1.6232607920085622e-05, "loss": 0.38, "step": 1820 }, { "epoch": 0.03247957764063782, "grad_norm": 0.6039701104164124, "learning_rate": 1.624152693542633e-05, "loss": 0.3463, "step": 1821 }, { "epoch": 0.03249741376235151, "grad_norm": 0.5967687368392944, "learning_rate": 1.6250445950767035e-05, "loss": 0.3288, "step": 1822 }, { "epoch": 0.032515249884065206, "grad_norm": 0.6029810309410095, "learning_rate": 1.625936496610774e-05, "loss": 0.2951, "step": 1823 }, { "epoch": 0.0325330860057789, "grad_norm": 0.9297475218772888, "learning_rate": 1.6268283981448448e-05, "loss": 0.3111, "step": 1824 }, { "epoch": 0.032550922127492596, "grad_norm": 0.5336518883705139, "learning_rate": 1.6277202996789158e-05, "loss": 0.2874, "step": 1825 }, { "epoch": 0.03256875824920629, "grad_norm": 0.6874714493751526, "learning_rate": 1.628612201212986e-05, "loss": 0.3765, "step": 1826 }, { "epoch": 0.032586594370919986, "grad_norm": 0.9035260677337646, "learning_rate": 1.6295041027470567e-05, "loss": 0.2961, "step": 1827 }, { "epoch": 0.03260443049263368, "grad_norm": 1.1584899425506592, "learning_rate": 1.6303960042811277e-05, "loss": 0.2838, "step": 1828 }, { "epoch": 0.032622266614347376, "grad_norm": 0.6553873419761658, "learning_rate": 1.631287905815198e-05, "loss": 0.2886, "step": 1829 }, { "epoch": 0.03264010273606107, "grad_norm": 0.8739922046661377, "learning_rate": 1.6321798073492686e-05, "loss": 0.3112, "step": 1830 }, { "epoch": 0.032657938857774765, "grad_norm": 0.7506694793701172, "learning_rate": 1.6330717088833393e-05, "loss": 0.3454, "step": 1831 }, { "epoch": 0.03267577497948846, "grad_norm": 0.5153954029083252, "learning_rate": 1.63396361041741e-05, "loss": 0.2918, "step": 1832 }, { "epoch": 0.032693611101202155, "grad_norm": 0.7300745844841003, "learning_rate": 1.6348555119514805e-05, "loss": 0.2963, "step": 1833 }, { "epoch": 0.03271144722291585, "grad_norm": 0.4347021281719208, "learning_rate": 1.6357474134855512e-05, "loss": 0.2743, "step": 1834 }, { "epoch": 0.032729283344629545, "grad_norm": 0.4705832898616791, "learning_rate": 1.6366393150196218e-05, "loss": 0.3328, "step": 1835 }, { "epoch": 0.03274711946634324, "grad_norm": 0.5846000909805298, "learning_rate": 1.6375312165536928e-05, "loss": 0.3243, "step": 1836 }, { "epoch": 0.032764955588056935, "grad_norm": 0.4604929983615875, "learning_rate": 1.638423118087763e-05, "loss": 0.3167, "step": 1837 }, { "epoch": 0.03278279170977063, "grad_norm": 0.5665956735610962, "learning_rate": 1.6393150196218337e-05, "loss": 0.3499, "step": 1838 }, { "epoch": 0.032800627831484325, "grad_norm": 0.410715252161026, "learning_rate": 1.6402069211559047e-05, "loss": 0.287, "step": 1839 }, { "epoch": 0.03281846395319802, "grad_norm": 0.6050565242767334, "learning_rate": 1.641098822689975e-05, "loss": 0.3052, "step": 1840 }, { "epoch": 0.032836300074911715, "grad_norm": 0.6299166679382324, "learning_rate": 1.6419907242240456e-05, "loss": 0.3288, "step": 1841 }, { "epoch": 0.0328541361966254, "grad_norm": 0.5011922121047974, "learning_rate": 1.6428826257581166e-05, "loss": 0.3266, "step": 1842 }, { "epoch": 0.0328719723183391, "grad_norm": 0.5295569896697998, "learning_rate": 1.643774527292187e-05, "loss": 0.3224, "step": 1843 }, { "epoch": 0.03288980844005279, "grad_norm": 0.5636371374130249, "learning_rate": 1.6446664288262576e-05, "loss": 0.337, "step": 1844 }, { "epoch": 0.03290764456176649, "grad_norm": 0.8197712302207947, "learning_rate": 1.6455583303603282e-05, "loss": 0.3129, "step": 1845 }, { "epoch": 0.03292548068348018, "grad_norm": 0.588846743106842, "learning_rate": 1.646450231894399e-05, "loss": 0.3359, "step": 1846 }, { "epoch": 0.03294331680519388, "grad_norm": 0.6380104422569275, "learning_rate": 1.6473421334284698e-05, "loss": 0.3615, "step": 1847 }, { "epoch": 0.03296115292690757, "grad_norm": 0.4611605107784271, "learning_rate": 1.64823403496254e-05, "loss": 0.3519, "step": 1848 }, { "epoch": 0.03297898904862127, "grad_norm": 0.42634543776512146, "learning_rate": 1.6491259364966108e-05, "loss": 0.2981, "step": 1849 }, { "epoch": 0.03299682517033496, "grad_norm": 0.5107312202453613, "learning_rate": 1.6500178380306817e-05, "loss": 0.3887, "step": 1850 }, { "epoch": 0.03301466129204866, "grad_norm": 0.629116952419281, "learning_rate": 1.650909739564752e-05, "loss": 0.3734, "step": 1851 }, { "epoch": 0.03303249741376235, "grad_norm": 0.5779350996017456, "learning_rate": 1.6518016410988227e-05, "loss": 0.2981, "step": 1852 }, { "epoch": 0.03305033353547605, "grad_norm": 0.45092159509658813, "learning_rate": 1.6526935426328936e-05, "loss": 0.301, "step": 1853 }, { "epoch": 0.03306816965718974, "grad_norm": 0.6753671169281006, "learning_rate": 1.653585444166964e-05, "loss": 0.3351, "step": 1854 }, { "epoch": 0.033086005778903436, "grad_norm": 0.49014580249786377, "learning_rate": 1.6544773457010346e-05, "loss": 0.307, "step": 1855 }, { "epoch": 0.03310384190061713, "grad_norm": 0.7182915806770325, "learning_rate": 1.6553692472351056e-05, "loss": 0.3411, "step": 1856 }, { "epoch": 0.033121678022330826, "grad_norm": 0.5920625329017639, "learning_rate": 1.656261148769176e-05, "loss": 0.3439, "step": 1857 }, { "epoch": 0.03313951414404452, "grad_norm": 0.7158372402191162, "learning_rate": 1.6571530503032465e-05, "loss": 0.3642, "step": 1858 }, { "epoch": 0.033157350265758216, "grad_norm": 0.6029052734375, "learning_rate": 1.658044951837317e-05, "loss": 0.2697, "step": 1859 }, { "epoch": 0.03317518638747191, "grad_norm": 0.43378040194511414, "learning_rate": 1.6589368533713878e-05, "loss": 0.2955, "step": 1860 }, { "epoch": 0.033193022509185606, "grad_norm": 0.47693780064582825, "learning_rate": 1.6598287549054587e-05, "loss": 0.2576, "step": 1861 }, { "epoch": 0.033210858630899294, "grad_norm": 0.4743706285953522, "learning_rate": 1.660720656439529e-05, "loss": 0.3305, "step": 1862 }, { "epoch": 0.03322869475261299, "grad_norm": 0.5159932374954224, "learning_rate": 1.6616125579735997e-05, "loss": 0.356, "step": 1863 }, { "epoch": 0.033246530874326684, "grad_norm": 0.5189605355262756, "learning_rate": 1.6625044595076707e-05, "loss": 0.341, "step": 1864 }, { "epoch": 0.03326436699604038, "grad_norm": 0.8925591707229614, "learning_rate": 1.663396361041741e-05, "loss": 0.2823, "step": 1865 }, { "epoch": 0.033282203117754074, "grad_norm": 0.648122251033783, "learning_rate": 1.6642882625758116e-05, "loss": 0.3705, "step": 1866 }, { "epoch": 0.03330003923946777, "grad_norm": 0.5023366212844849, "learning_rate": 1.6651801641098826e-05, "loss": 0.2878, "step": 1867 }, { "epoch": 0.033317875361181463, "grad_norm": 0.5993767380714417, "learning_rate": 1.666072065643953e-05, "loss": 0.3251, "step": 1868 }, { "epoch": 0.03333571148289516, "grad_norm": 0.6962936520576477, "learning_rate": 1.6669639671780235e-05, "loss": 0.3821, "step": 1869 }, { "epoch": 0.03335354760460885, "grad_norm": 0.5646497011184692, "learning_rate": 1.667855868712094e-05, "loss": 0.3277, "step": 1870 }, { "epoch": 0.03337138372632255, "grad_norm": 0.47788524627685547, "learning_rate": 1.6687477702461648e-05, "loss": 0.299, "step": 1871 }, { "epoch": 0.03338921984803624, "grad_norm": 0.4944382309913635, "learning_rate": 1.6696396717802358e-05, "loss": 0.3252, "step": 1872 }, { "epoch": 0.03340705596974994, "grad_norm": 0.5403092503547668, "learning_rate": 1.670531573314306e-05, "loss": 0.2805, "step": 1873 }, { "epoch": 0.03342489209146363, "grad_norm": 0.5597968697547913, "learning_rate": 1.6714234748483767e-05, "loss": 0.2987, "step": 1874 }, { "epoch": 0.03344272821317733, "grad_norm": 0.7297112345695496, "learning_rate": 1.6723153763824477e-05, "loss": 0.3411, "step": 1875 }, { "epoch": 0.03346056433489102, "grad_norm": 0.6010320782661438, "learning_rate": 1.673207277916518e-05, "loss": 0.3082, "step": 1876 }, { "epoch": 0.03347840045660472, "grad_norm": 0.5059168338775635, "learning_rate": 1.6740991794505886e-05, "loss": 0.3048, "step": 1877 }, { "epoch": 0.03349623657831841, "grad_norm": 0.6566490530967712, "learning_rate": 1.6749910809846596e-05, "loss": 0.3637, "step": 1878 }, { "epoch": 0.03351407270003211, "grad_norm": 0.6993565559387207, "learning_rate": 1.67588298251873e-05, "loss": 0.3342, "step": 1879 }, { "epoch": 0.0335319088217458, "grad_norm": 0.6532073616981506, "learning_rate": 1.6767748840528005e-05, "loss": 0.3718, "step": 1880 }, { "epoch": 0.0335497449434595, "grad_norm": 0.5813199877738953, "learning_rate": 1.6776667855868715e-05, "loss": 0.3486, "step": 1881 }, { "epoch": 0.033567581065173185, "grad_norm": 0.7437688708305359, "learning_rate": 1.6785586871209418e-05, "loss": 0.3141, "step": 1882 }, { "epoch": 0.03358541718688688, "grad_norm": 0.6422274112701416, "learning_rate": 1.6794505886550128e-05, "loss": 0.304, "step": 1883 }, { "epoch": 0.033603253308600575, "grad_norm": 0.7330365777015686, "learning_rate": 1.680342490189083e-05, "loss": 0.3759, "step": 1884 }, { "epoch": 0.03362108943031427, "grad_norm": 0.6227088570594788, "learning_rate": 1.6812343917231537e-05, "loss": 0.295, "step": 1885 }, { "epoch": 0.033638925552027965, "grad_norm": 1.264343500137329, "learning_rate": 1.6821262932572247e-05, "loss": 0.2965, "step": 1886 }, { "epoch": 0.03365676167374166, "grad_norm": 0.7335077524185181, "learning_rate": 1.683018194791295e-05, "loss": 0.3577, "step": 1887 }, { "epoch": 0.033674597795455355, "grad_norm": 0.49058955907821655, "learning_rate": 1.6839100963253656e-05, "loss": 0.2781, "step": 1888 }, { "epoch": 0.03369243391716905, "grad_norm": 0.4763009548187256, "learning_rate": 1.6848019978594366e-05, "loss": 0.3053, "step": 1889 }, { "epoch": 0.033710270038882745, "grad_norm": 0.9236243367195129, "learning_rate": 1.685693899393507e-05, "loss": 0.3252, "step": 1890 }, { "epoch": 0.03372810616059644, "grad_norm": 0.4823125898838043, "learning_rate": 1.6865858009275775e-05, "loss": 0.3179, "step": 1891 }, { "epoch": 0.033745942282310135, "grad_norm": 0.6278830170631409, "learning_rate": 1.6874777024616485e-05, "loss": 0.3573, "step": 1892 }, { "epoch": 0.03376377840402383, "grad_norm": 0.7841582298278809, "learning_rate": 1.6883696039957188e-05, "loss": 0.3618, "step": 1893 }, { "epoch": 0.033781614525737524, "grad_norm": 0.559029221534729, "learning_rate": 1.6892615055297895e-05, "loss": 0.3047, "step": 1894 }, { "epoch": 0.03379945064745122, "grad_norm": 0.5621006488800049, "learning_rate": 1.6901534070638604e-05, "loss": 0.2932, "step": 1895 }, { "epoch": 0.033817286769164914, "grad_norm": 0.6017270088195801, "learning_rate": 1.6910453085979307e-05, "loss": 0.3518, "step": 1896 }, { "epoch": 0.03383512289087861, "grad_norm": 0.47827112674713135, "learning_rate": 1.6919372101320017e-05, "loss": 0.2866, "step": 1897 }, { "epoch": 0.033852959012592304, "grad_norm": 0.572151780128479, "learning_rate": 1.692829111666072e-05, "loss": 0.3321, "step": 1898 }, { "epoch": 0.033870795134306, "grad_norm": 0.47333481907844543, "learning_rate": 1.6937210132001427e-05, "loss": 0.313, "step": 1899 }, { "epoch": 0.033888631256019694, "grad_norm": 0.5933525562286377, "learning_rate": 1.6946129147342136e-05, "loss": 0.2863, "step": 1900 }, { "epoch": 0.03390646737773339, "grad_norm": 0.5429176688194275, "learning_rate": 1.695504816268284e-05, "loss": 0.3129, "step": 1901 }, { "epoch": 0.03392430349944708, "grad_norm": 0.7529359459877014, "learning_rate": 1.6963967178023546e-05, "loss": 0.3121, "step": 1902 }, { "epoch": 0.03394213962116077, "grad_norm": 0.5944604873657227, "learning_rate": 1.6972886193364255e-05, "loss": 0.3949, "step": 1903 }, { "epoch": 0.03395997574287447, "grad_norm": 0.6306872367858887, "learning_rate": 1.698180520870496e-05, "loss": 0.4058, "step": 1904 }, { "epoch": 0.03397781186458816, "grad_norm": 0.6834776401519775, "learning_rate": 1.6990724224045665e-05, "loss": 0.3669, "step": 1905 }, { "epoch": 0.033995647986301857, "grad_norm": 0.5179616212844849, "learning_rate": 1.6999643239386375e-05, "loss": 0.371, "step": 1906 }, { "epoch": 0.03401348410801555, "grad_norm": 0.7298871874809265, "learning_rate": 1.7008562254727078e-05, "loss": 0.3361, "step": 1907 }, { "epoch": 0.034031320229729246, "grad_norm": 0.5836283564567566, "learning_rate": 1.7017481270067787e-05, "loss": 0.3226, "step": 1908 }, { "epoch": 0.03404915635144294, "grad_norm": 0.5526504516601562, "learning_rate": 1.702640028540849e-05, "loss": 0.3, "step": 1909 }, { "epoch": 0.034066992473156636, "grad_norm": 0.6150591969490051, "learning_rate": 1.7035319300749197e-05, "loss": 0.3208, "step": 1910 }, { "epoch": 0.03408482859487033, "grad_norm": 0.4714590609073639, "learning_rate": 1.7044238316089906e-05, "loss": 0.2804, "step": 1911 }, { "epoch": 0.034102664716584026, "grad_norm": 0.5982564091682434, "learning_rate": 1.705315733143061e-05, "loss": 0.2963, "step": 1912 }, { "epoch": 0.03412050083829772, "grad_norm": 0.6177613735198975, "learning_rate": 1.7062076346771316e-05, "loss": 0.3265, "step": 1913 }, { "epoch": 0.034138336960011416, "grad_norm": 0.9625240564346313, "learning_rate": 1.7070995362112026e-05, "loss": 0.4255, "step": 1914 }, { "epoch": 0.03415617308172511, "grad_norm": 0.5063337683677673, "learning_rate": 1.707991437745273e-05, "loss": 0.3545, "step": 1915 }, { "epoch": 0.034174009203438806, "grad_norm": 0.7352203726768494, "learning_rate": 1.7088833392793435e-05, "loss": 0.3495, "step": 1916 }, { "epoch": 0.0341918453251525, "grad_norm": 0.5571827292442322, "learning_rate": 1.7097752408134145e-05, "loss": 0.314, "step": 1917 }, { "epoch": 0.034209681446866196, "grad_norm": 0.5359615683555603, "learning_rate": 1.7106671423474848e-05, "loss": 0.3052, "step": 1918 }, { "epoch": 0.03422751756857989, "grad_norm": 0.9982262849807739, "learning_rate": 1.7115590438815558e-05, "loss": 0.3557, "step": 1919 }, { "epoch": 0.034245353690293585, "grad_norm": 0.5018155574798584, "learning_rate": 1.7124509454156264e-05, "loss": 0.3421, "step": 1920 }, { "epoch": 0.03426318981200728, "grad_norm": 0.5375298261642456, "learning_rate": 1.7133428469496967e-05, "loss": 0.3424, "step": 1921 }, { "epoch": 0.03428102593372097, "grad_norm": 0.6136654615402222, "learning_rate": 1.7142347484837677e-05, "loss": 0.3418, "step": 1922 }, { "epoch": 0.03429886205543466, "grad_norm": 0.5145890116691589, "learning_rate": 1.715126650017838e-05, "loss": 0.3421, "step": 1923 }, { "epoch": 0.03431669817714836, "grad_norm": 0.7545877695083618, "learning_rate": 1.7160185515519086e-05, "loss": 0.3328, "step": 1924 }, { "epoch": 0.03433453429886205, "grad_norm": 0.5231462121009827, "learning_rate": 1.7169104530859796e-05, "loss": 0.3492, "step": 1925 }, { "epoch": 0.03435237042057575, "grad_norm": 0.6672552227973938, "learning_rate": 1.71780235462005e-05, "loss": 0.2874, "step": 1926 }, { "epoch": 0.03437020654228944, "grad_norm": 0.5202288627624512, "learning_rate": 1.7186942561541205e-05, "loss": 0.3062, "step": 1927 }, { "epoch": 0.03438804266400314, "grad_norm": 0.4937322437763214, "learning_rate": 1.7195861576881915e-05, "loss": 0.377, "step": 1928 }, { "epoch": 0.03440587878571683, "grad_norm": 0.5765814185142517, "learning_rate": 1.7204780592222618e-05, "loss": 0.3257, "step": 1929 }, { "epoch": 0.03442371490743053, "grad_norm": 0.479180246591568, "learning_rate": 1.7213699607563324e-05, "loss": 0.2886, "step": 1930 }, { "epoch": 0.03444155102914422, "grad_norm": 0.7214354872703552, "learning_rate": 1.7222618622904034e-05, "loss": 0.3852, "step": 1931 }, { "epoch": 0.03445938715085792, "grad_norm": 0.4235411584377289, "learning_rate": 1.7231537638244737e-05, "loss": 0.2914, "step": 1932 }, { "epoch": 0.03447722327257161, "grad_norm": 0.5858898162841797, "learning_rate": 1.7240456653585447e-05, "loss": 0.3587, "step": 1933 }, { "epoch": 0.03449505939428531, "grad_norm": 0.517701268196106, "learning_rate": 1.724937566892615e-05, "loss": 0.3122, "step": 1934 }, { "epoch": 0.034512895515999, "grad_norm": 0.521261990070343, "learning_rate": 1.7258294684266856e-05, "loss": 0.3977, "step": 1935 }, { "epoch": 0.0345307316377127, "grad_norm": 0.4932325482368469, "learning_rate": 1.7267213699607566e-05, "loss": 0.3296, "step": 1936 }, { "epoch": 0.03454856775942639, "grad_norm": 0.6196635365486145, "learning_rate": 1.727613271494827e-05, "loss": 0.3566, "step": 1937 }, { "epoch": 0.03456640388114009, "grad_norm": 0.8973121643066406, "learning_rate": 1.7285051730288975e-05, "loss": 0.3155, "step": 1938 }, { "epoch": 0.03458424000285378, "grad_norm": 0.6645421385765076, "learning_rate": 1.7293970745629685e-05, "loss": 0.3316, "step": 1939 }, { "epoch": 0.03460207612456748, "grad_norm": 0.6201606392860413, "learning_rate": 1.7302889760970388e-05, "loss": 0.3755, "step": 1940 }, { "epoch": 0.03461991224628117, "grad_norm": 0.4170590043067932, "learning_rate": 1.7311808776311095e-05, "loss": 0.2868, "step": 1941 }, { "epoch": 0.03463774836799486, "grad_norm": 0.42760705947875977, "learning_rate": 1.7320727791651804e-05, "loss": 0.2734, "step": 1942 }, { "epoch": 0.034655584489708555, "grad_norm": 0.6392554044723511, "learning_rate": 1.7329646806992507e-05, "loss": 0.306, "step": 1943 }, { "epoch": 0.03467342061142225, "grad_norm": 0.37376856803894043, "learning_rate": 1.7338565822333217e-05, "loss": 0.2673, "step": 1944 }, { "epoch": 0.034691256733135944, "grad_norm": 0.534887969493866, "learning_rate": 1.7347484837673923e-05, "loss": 0.3133, "step": 1945 }, { "epoch": 0.03470909285484964, "grad_norm": 0.7607902884483337, "learning_rate": 1.7356403853014626e-05, "loss": 0.3811, "step": 1946 }, { "epoch": 0.034726928976563334, "grad_norm": 0.5955637097358704, "learning_rate": 1.7365322868355336e-05, "loss": 0.413, "step": 1947 }, { "epoch": 0.03474476509827703, "grad_norm": 0.5730603337287903, "learning_rate": 1.737424188369604e-05, "loss": 0.3591, "step": 1948 }, { "epoch": 0.034762601219990724, "grad_norm": 0.7538769841194153, "learning_rate": 1.7383160899036746e-05, "loss": 0.3859, "step": 1949 }, { "epoch": 0.03478043734170442, "grad_norm": 0.4605296552181244, "learning_rate": 1.7392079914377455e-05, "loss": 0.3387, "step": 1950 }, { "epoch": 0.034798273463418114, "grad_norm": 0.5367212891578674, "learning_rate": 1.740099892971816e-05, "loss": 0.3332, "step": 1951 }, { "epoch": 0.03481610958513181, "grad_norm": 0.5772411823272705, "learning_rate": 1.7409917945058865e-05, "loss": 0.3773, "step": 1952 }, { "epoch": 0.034833945706845504, "grad_norm": 0.5616193413734436, "learning_rate": 1.7418836960399574e-05, "loss": 0.3162, "step": 1953 }, { "epoch": 0.0348517818285592, "grad_norm": 0.5653926134109497, "learning_rate": 1.7427755975740277e-05, "loss": 0.3691, "step": 1954 }, { "epoch": 0.034869617950272894, "grad_norm": 0.4547867774963379, "learning_rate": 1.7436674991080987e-05, "loss": 0.2656, "step": 1955 }, { "epoch": 0.03488745407198659, "grad_norm": 0.6967268586158752, "learning_rate": 1.7445594006421694e-05, "loss": 0.4115, "step": 1956 }, { "epoch": 0.034905290193700284, "grad_norm": 0.6118252873420715, "learning_rate": 1.7454513021762397e-05, "loss": 0.3779, "step": 1957 }, { "epoch": 0.03492312631541398, "grad_norm": 0.618803083896637, "learning_rate": 1.7463432037103106e-05, "loss": 0.3375, "step": 1958 }, { "epoch": 0.03494096243712767, "grad_norm": 0.5251666307449341, "learning_rate": 1.7472351052443813e-05, "loss": 0.28, "step": 1959 }, { "epoch": 0.03495879855884137, "grad_norm": 0.5035682916641235, "learning_rate": 1.7481270067784516e-05, "loss": 0.3915, "step": 1960 }, { "epoch": 0.03497663468055506, "grad_norm": 0.5120066404342651, "learning_rate": 1.7490189083125226e-05, "loss": 0.2507, "step": 1961 }, { "epoch": 0.03499447080226876, "grad_norm": 0.9391610622406006, "learning_rate": 1.749910809846593e-05, "loss": 0.3243, "step": 1962 }, { "epoch": 0.035012306923982446, "grad_norm": 0.6144405007362366, "learning_rate": 1.7508027113806635e-05, "loss": 0.3327, "step": 1963 }, { "epoch": 0.03503014304569614, "grad_norm": 0.46523281931877136, "learning_rate": 1.7516946129147345e-05, "loss": 0.3191, "step": 1964 }, { "epoch": 0.035047979167409836, "grad_norm": 0.5145363211631775, "learning_rate": 1.7525865144488048e-05, "loss": 0.2986, "step": 1965 }, { "epoch": 0.03506581528912353, "grad_norm": 0.6042940616607666, "learning_rate": 1.7534784159828757e-05, "loss": 0.3554, "step": 1966 }, { "epoch": 0.035083651410837226, "grad_norm": 0.6096752285957336, "learning_rate": 1.7543703175169464e-05, "loss": 0.3637, "step": 1967 }, { "epoch": 0.03510148753255092, "grad_norm": 0.7093321681022644, "learning_rate": 1.7552622190510167e-05, "loss": 0.3532, "step": 1968 }, { "epoch": 0.035119323654264616, "grad_norm": 0.7536479234695435, "learning_rate": 1.7561541205850877e-05, "loss": 0.427, "step": 1969 }, { "epoch": 0.03513715977597831, "grad_norm": 0.5811682343482971, "learning_rate": 1.7570460221191583e-05, "loss": 0.3018, "step": 1970 }, { "epoch": 0.035154995897692005, "grad_norm": 0.5609343647956848, "learning_rate": 1.7579379236532286e-05, "loss": 0.3644, "step": 1971 }, { "epoch": 0.0351728320194057, "grad_norm": 0.6832839250564575, "learning_rate": 1.7588298251872996e-05, "loss": 0.3655, "step": 1972 }, { "epoch": 0.035190668141119395, "grad_norm": 0.6124131679534912, "learning_rate": 1.75972172672137e-05, "loss": 0.3418, "step": 1973 }, { "epoch": 0.03520850426283309, "grad_norm": 0.49659475684165955, "learning_rate": 1.7606136282554405e-05, "loss": 0.3601, "step": 1974 }, { "epoch": 0.035226340384546785, "grad_norm": 0.5575053691864014, "learning_rate": 1.7615055297895115e-05, "loss": 0.357, "step": 1975 }, { "epoch": 0.03524417650626048, "grad_norm": 0.7826827168464661, "learning_rate": 1.7623974313235818e-05, "loss": 0.347, "step": 1976 }, { "epoch": 0.035262012627974175, "grad_norm": 0.49193763732910156, "learning_rate": 1.7632893328576524e-05, "loss": 0.3214, "step": 1977 }, { "epoch": 0.03527984874968787, "grad_norm": 0.4988035261631012, "learning_rate": 1.7641812343917234e-05, "loss": 0.2832, "step": 1978 }, { "epoch": 0.035297684871401565, "grad_norm": 0.6013734936714172, "learning_rate": 1.7650731359257937e-05, "loss": 0.4104, "step": 1979 }, { "epoch": 0.03531552099311526, "grad_norm": 0.4478839337825775, "learning_rate": 1.7659650374598647e-05, "loss": 0.2856, "step": 1980 }, { "epoch": 0.035333357114828955, "grad_norm": 0.48602673411369324, "learning_rate": 1.7668569389939353e-05, "loss": 0.3413, "step": 1981 }, { "epoch": 0.03535119323654265, "grad_norm": 0.5484108328819275, "learning_rate": 1.7677488405280056e-05, "loss": 0.2965, "step": 1982 }, { "epoch": 0.03536902935825634, "grad_norm": 0.7806851267814636, "learning_rate": 1.7686407420620766e-05, "loss": 0.3772, "step": 1983 }, { "epoch": 0.03538686547997003, "grad_norm": 0.6586794853210449, "learning_rate": 1.7695326435961472e-05, "loss": 0.2941, "step": 1984 }, { "epoch": 0.03540470160168373, "grad_norm": 0.5919625759124756, "learning_rate": 1.7704245451302175e-05, "loss": 0.3908, "step": 1985 }, { "epoch": 0.03542253772339742, "grad_norm": 0.6526595950126648, "learning_rate": 1.7713164466642885e-05, "loss": 0.3964, "step": 1986 }, { "epoch": 0.03544037384511112, "grad_norm": 0.6588309407234192, "learning_rate": 1.7722083481983588e-05, "loss": 0.3099, "step": 1987 }, { "epoch": 0.03545820996682481, "grad_norm": 0.6340320706367493, "learning_rate": 1.7731002497324294e-05, "loss": 0.3396, "step": 1988 }, { "epoch": 0.03547604608853851, "grad_norm": 0.5385860204696655, "learning_rate": 1.7739921512665004e-05, "loss": 0.3035, "step": 1989 }, { "epoch": 0.0354938822102522, "grad_norm": 0.5645444393157959, "learning_rate": 1.7748840528005707e-05, "loss": 0.2993, "step": 1990 }, { "epoch": 0.0355117183319659, "grad_norm": 0.5452271699905396, "learning_rate": 1.7757759543346417e-05, "loss": 0.3267, "step": 1991 }, { "epoch": 0.03552955445367959, "grad_norm": 0.6098746061325073, "learning_rate": 1.7766678558687123e-05, "loss": 0.3503, "step": 1992 }, { "epoch": 0.03554739057539329, "grad_norm": 0.5513267517089844, "learning_rate": 1.7775597574027826e-05, "loss": 0.3347, "step": 1993 }, { "epoch": 0.03556522669710698, "grad_norm": 0.5899681448936462, "learning_rate": 1.7784516589368536e-05, "loss": 0.3196, "step": 1994 }, { "epoch": 0.03558306281882068, "grad_norm": 0.4549456238746643, "learning_rate": 1.7793435604709242e-05, "loss": 0.2949, "step": 1995 }, { "epoch": 0.03560089894053437, "grad_norm": 0.40827202796936035, "learning_rate": 1.7802354620049945e-05, "loss": 0.3124, "step": 1996 }, { "epoch": 0.035618735062248066, "grad_norm": 0.6398820877075195, "learning_rate": 1.7811273635390655e-05, "loss": 0.4124, "step": 1997 }, { "epoch": 0.03563657118396176, "grad_norm": 0.5127965807914734, "learning_rate": 1.782019265073136e-05, "loss": 0.3529, "step": 1998 }, { "epoch": 0.035654407305675456, "grad_norm": 0.5904882550239563, "learning_rate": 1.7829111666072065e-05, "loss": 0.3783, "step": 1999 }, { "epoch": 0.03567224342738915, "grad_norm": 0.4309324026107788, "learning_rate": 1.7838030681412774e-05, "loss": 0.3074, "step": 2000 }, { "epoch": 0.03567224342738915, "eval_loss": 0.2980582118034363, "eval_runtime": 1601.0462, "eval_samples_per_second": 0.64, "eval_steps_per_second": 0.107, "step": 2000 }, { "epoch": 0.035690079549102846, "grad_norm": 0.4615575671195984, "learning_rate": 1.7846949696753477e-05, "loss": 0.2872, "step": 2001 }, { "epoch": 0.03570791567081654, "grad_norm": 0.631496250629425, "learning_rate": 1.7855868712094187e-05, "loss": 0.3246, "step": 2002 }, { "epoch": 0.03572575179253023, "grad_norm": 0.6639876365661621, "learning_rate": 1.7864787727434893e-05, "loss": 0.3376, "step": 2003 }, { "epoch": 0.035743587914243924, "grad_norm": 0.7973620891571045, "learning_rate": 1.7873706742775596e-05, "loss": 0.348, "step": 2004 }, { "epoch": 0.03576142403595762, "grad_norm": 0.5009528398513794, "learning_rate": 1.7882625758116306e-05, "loss": 0.2803, "step": 2005 }, { "epoch": 0.035779260157671314, "grad_norm": 0.47912833094596863, "learning_rate": 1.7891544773457013e-05, "loss": 0.293, "step": 2006 }, { "epoch": 0.03579709627938501, "grad_norm": 0.45988672971725464, "learning_rate": 1.7900463788797716e-05, "loss": 0.3069, "step": 2007 }, { "epoch": 0.035814932401098704, "grad_norm": 0.5593786239624023, "learning_rate": 1.7909382804138425e-05, "loss": 0.3296, "step": 2008 }, { "epoch": 0.0358327685228124, "grad_norm": 0.4574684500694275, "learning_rate": 1.7918301819479132e-05, "loss": 0.2787, "step": 2009 }, { "epoch": 0.03585060464452609, "grad_norm": 0.6230701208114624, "learning_rate": 1.7927220834819835e-05, "loss": 0.3394, "step": 2010 }, { "epoch": 0.03586844076623979, "grad_norm": 0.580540657043457, "learning_rate": 1.7936139850160545e-05, "loss": 0.275, "step": 2011 }, { "epoch": 0.03588627688795348, "grad_norm": 0.8210400342941284, "learning_rate": 1.7945058865501248e-05, "loss": 0.3043, "step": 2012 }, { "epoch": 0.03590411300966718, "grad_norm": 0.5110849142074585, "learning_rate": 1.7953977880841954e-05, "loss": 0.2394, "step": 2013 }, { "epoch": 0.03592194913138087, "grad_norm": 0.5304075479507446, "learning_rate": 1.7962896896182664e-05, "loss": 0.3163, "step": 2014 }, { "epoch": 0.03593978525309457, "grad_norm": 0.5845052599906921, "learning_rate": 1.7971815911523367e-05, "loss": 0.3757, "step": 2015 }, { "epoch": 0.03595762137480826, "grad_norm": 0.4767301380634308, "learning_rate": 1.7980734926864076e-05, "loss": 0.3626, "step": 2016 }, { "epoch": 0.03597545749652196, "grad_norm": 0.5138675570487976, "learning_rate": 1.7989653942204783e-05, "loss": 0.28, "step": 2017 }, { "epoch": 0.03599329361823565, "grad_norm": 0.5862236618995667, "learning_rate": 1.7998572957545486e-05, "loss": 0.3175, "step": 2018 }, { "epoch": 0.03601112973994935, "grad_norm": 0.5228481292724609, "learning_rate": 1.8007491972886196e-05, "loss": 0.3104, "step": 2019 }, { "epoch": 0.03602896586166304, "grad_norm": 0.5921441912651062, "learning_rate": 1.8016410988226902e-05, "loss": 0.3723, "step": 2020 }, { "epoch": 0.03604680198337674, "grad_norm": 0.38433346152305603, "learning_rate": 1.8025330003567605e-05, "loss": 0.2831, "step": 2021 }, { "epoch": 0.03606463810509043, "grad_norm": 0.6258277893066406, "learning_rate": 1.8034249018908315e-05, "loss": 0.4046, "step": 2022 }, { "epoch": 0.03608247422680412, "grad_norm": 0.4964272379875183, "learning_rate": 1.804316803424902e-05, "loss": 0.2942, "step": 2023 }, { "epoch": 0.036100310348517815, "grad_norm": 0.5187031030654907, "learning_rate": 1.8052087049589724e-05, "loss": 0.3077, "step": 2024 }, { "epoch": 0.03611814647023151, "grad_norm": 0.6166718602180481, "learning_rate": 1.8061006064930434e-05, "loss": 0.3355, "step": 2025 }, { "epoch": 0.036135982591945205, "grad_norm": 0.44471755623817444, "learning_rate": 1.8069925080271137e-05, "loss": 0.2677, "step": 2026 }, { "epoch": 0.0361538187136589, "grad_norm": 0.8543984293937683, "learning_rate": 1.8078844095611847e-05, "loss": 0.3373, "step": 2027 }, { "epoch": 0.036171654835372595, "grad_norm": 0.5724572539329529, "learning_rate": 1.8087763110952553e-05, "loss": 0.3119, "step": 2028 }, { "epoch": 0.03618949095708629, "grad_norm": 0.5950539708137512, "learning_rate": 1.8096682126293256e-05, "loss": 0.3323, "step": 2029 }, { "epoch": 0.036207327078799985, "grad_norm": 0.4790019094944, "learning_rate": 1.8105601141633966e-05, "loss": 0.2828, "step": 2030 }, { "epoch": 0.03622516320051368, "grad_norm": 0.9051364660263062, "learning_rate": 1.8114520156974672e-05, "loss": 0.3469, "step": 2031 }, { "epoch": 0.036242999322227375, "grad_norm": 0.4470467269420624, "learning_rate": 1.8123439172315375e-05, "loss": 0.3137, "step": 2032 }, { "epoch": 0.03626083544394107, "grad_norm": 0.39944523572921753, "learning_rate": 1.8132358187656085e-05, "loss": 0.3224, "step": 2033 }, { "epoch": 0.036278671565654765, "grad_norm": 0.558316171169281, "learning_rate": 1.814127720299679e-05, "loss": 0.282, "step": 2034 }, { "epoch": 0.03629650768736846, "grad_norm": 0.45616936683654785, "learning_rate": 1.8150196218337494e-05, "loss": 0.2741, "step": 2035 }, { "epoch": 0.036314343809082154, "grad_norm": 0.5811489820480347, "learning_rate": 1.8159115233678204e-05, "loss": 0.3417, "step": 2036 }, { "epoch": 0.03633217993079585, "grad_norm": 0.5237880349159241, "learning_rate": 1.8168034249018907e-05, "loss": 0.3467, "step": 2037 }, { "epoch": 0.036350016052509544, "grad_norm": 0.535847008228302, "learning_rate": 1.8176953264359617e-05, "loss": 0.3689, "step": 2038 }, { "epoch": 0.03636785217422324, "grad_norm": 0.6724188327789307, "learning_rate": 1.8185872279700323e-05, "loss": 0.3404, "step": 2039 }, { "epoch": 0.036385688295936934, "grad_norm": 0.5290500521659851, "learning_rate": 1.8194791295041026e-05, "loss": 0.3456, "step": 2040 }, { "epoch": 0.03640352441765063, "grad_norm": 0.5779337286949158, "learning_rate": 1.8203710310381736e-05, "loss": 0.2705, "step": 2041 }, { "epoch": 0.036421360539364324, "grad_norm": 0.5553800463676453, "learning_rate": 1.8212629325722442e-05, "loss": 0.3096, "step": 2042 }, { "epoch": 0.03643919666107801, "grad_norm": 0.43628913164138794, "learning_rate": 1.8221548341063145e-05, "loss": 0.3255, "step": 2043 }, { "epoch": 0.03645703278279171, "grad_norm": 0.4505508542060852, "learning_rate": 1.8230467356403855e-05, "loss": 0.3219, "step": 2044 }, { "epoch": 0.0364748689045054, "grad_norm": 0.5080452561378479, "learning_rate": 1.823938637174456e-05, "loss": 0.3251, "step": 2045 }, { "epoch": 0.0364927050262191, "grad_norm": 0.6649768352508545, "learning_rate": 1.8248305387085264e-05, "loss": 0.4309, "step": 2046 }, { "epoch": 0.03651054114793279, "grad_norm": 0.4447711408138275, "learning_rate": 1.8257224402425974e-05, "loss": 0.3008, "step": 2047 }, { "epoch": 0.036528377269646486, "grad_norm": 0.44222530722618103, "learning_rate": 1.826614341776668e-05, "loss": 0.2426, "step": 2048 }, { "epoch": 0.03654621339136018, "grad_norm": 0.47739681601524353, "learning_rate": 1.8275062433107387e-05, "loss": 0.3007, "step": 2049 }, { "epoch": 0.036564049513073876, "grad_norm": 0.9375199675559998, "learning_rate": 1.8283981448448093e-05, "loss": 0.3428, "step": 2050 }, { "epoch": 0.03658188563478757, "grad_norm": 0.48347771167755127, "learning_rate": 1.8292900463788796e-05, "loss": 0.3713, "step": 2051 }, { "epoch": 0.036599721756501266, "grad_norm": 0.5927991271018982, "learning_rate": 1.8301819479129506e-05, "loss": 0.2797, "step": 2052 }, { "epoch": 0.03661755787821496, "grad_norm": 0.6231663227081299, "learning_rate": 1.8310738494470213e-05, "loss": 0.2601, "step": 2053 }, { "epoch": 0.036635393999928656, "grad_norm": 0.5645677447319031, "learning_rate": 1.8319657509810916e-05, "loss": 0.3493, "step": 2054 }, { "epoch": 0.03665323012164235, "grad_norm": 0.5088529586791992, "learning_rate": 1.8328576525151625e-05, "loss": 0.2935, "step": 2055 }, { "epoch": 0.036671066243356046, "grad_norm": 0.4417657256126404, "learning_rate": 1.833749554049233e-05, "loss": 0.2936, "step": 2056 }, { "epoch": 0.03668890236506974, "grad_norm": 0.47601479291915894, "learning_rate": 1.8346414555833035e-05, "loss": 0.3226, "step": 2057 }, { "epoch": 0.036706738486783436, "grad_norm": 0.49755915999412537, "learning_rate": 1.8355333571173744e-05, "loss": 0.255, "step": 2058 }, { "epoch": 0.03672457460849713, "grad_norm": 0.6121598482131958, "learning_rate": 1.836425258651445e-05, "loss": 0.291, "step": 2059 }, { "epoch": 0.036742410730210825, "grad_norm": 0.5487620830535889, "learning_rate": 1.8373171601855154e-05, "loss": 0.3913, "step": 2060 }, { "epoch": 0.03676024685192452, "grad_norm": 0.422702431678772, "learning_rate": 1.8382090617195864e-05, "loss": 0.2662, "step": 2061 }, { "epoch": 0.036778082973638215, "grad_norm": 0.5154445171356201, "learning_rate": 1.839100963253657e-05, "loss": 0.318, "step": 2062 }, { "epoch": 0.0367959190953519, "grad_norm": 0.8006268739700317, "learning_rate": 1.8399928647877276e-05, "loss": 0.3853, "step": 2063 }, { "epoch": 0.0368137552170656, "grad_norm": 0.45608943700790405, "learning_rate": 1.8408847663217983e-05, "loss": 0.3074, "step": 2064 }, { "epoch": 0.03683159133877929, "grad_norm": 0.7384217977523804, "learning_rate": 1.8417766678558686e-05, "loss": 0.3378, "step": 2065 }, { "epoch": 0.03684942746049299, "grad_norm": 0.5264842510223389, "learning_rate": 1.8426685693899395e-05, "loss": 0.3162, "step": 2066 }, { "epoch": 0.03686726358220668, "grad_norm": 0.5197806358337402, "learning_rate": 1.8435604709240102e-05, "loss": 0.3134, "step": 2067 }, { "epoch": 0.03688509970392038, "grad_norm": 0.5372104048728943, "learning_rate": 1.8444523724580805e-05, "loss": 0.2726, "step": 2068 }, { "epoch": 0.03690293582563407, "grad_norm": 0.5655398368835449, "learning_rate": 1.8453442739921515e-05, "loss": 0.2893, "step": 2069 }, { "epoch": 0.03692077194734777, "grad_norm": 1.0206730365753174, "learning_rate": 1.846236175526222e-05, "loss": 0.3049, "step": 2070 }, { "epoch": 0.03693860806906146, "grad_norm": 0.7678471207618713, "learning_rate": 1.8471280770602924e-05, "loss": 0.3114, "step": 2071 }, { "epoch": 0.03695644419077516, "grad_norm": 0.5387485027313232, "learning_rate": 1.8480199785943634e-05, "loss": 0.3264, "step": 2072 }, { "epoch": 0.03697428031248885, "grad_norm": 0.5938040018081665, "learning_rate": 1.848911880128434e-05, "loss": 0.3455, "step": 2073 }, { "epoch": 0.03699211643420255, "grad_norm": 0.5923523306846619, "learning_rate": 1.8498037816625046e-05, "loss": 0.3772, "step": 2074 }, { "epoch": 0.03700995255591624, "grad_norm": 0.8520667552947998, "learning_rate": 1.8506956831965753e-05, "loss": 0.313, "step": 2075 }, { "epoch": 0.03702778867762994, "grad_norm": 0.897028923034668, "learning_rate": 1.8515875847306456e-05, "loss": 0.2922, "step": 2076 }, { "epoch": 0.03704562479934363, "grad_norm": 0.6438193321228027, "learning_rate": 1.8524794862647166e-05, "loss": 0.3407, "step": 2077 }, { "epoch": 0.03706346092105733, "grad_norm": 0.4676547050476074, "learning_rate": 1.8533713877987872e-05, "loss": 0.3665, "step": 2078 }, { "epoch": 0.03708129704277102, "grad_norm": 0.6205225586891174, "learning_rate": 1.8542632893328575e-05, "loss": 0.3666, "step": 2079 }, { "epoch": 0.03709913316448472, "grad_norm": 0.4931584298610687, "learning_rate": 1.8551551908669285e-05, "loss": 0.2992, "step": 2080 }, { "epoch": 0.03711696928619841, "grad_norm": 1.0366450548171997, "learning_rate": 1.856047092400999e-05, "loss": 0.3023, "step": 2081 }, { "epoch": 0.03713480540791211, "grad_norm": 0.530633807182312, "learning_rate": 1.8569389939350694e-05, "loss": 0.2977, "step": 2082 }, { "epoch": 0.037152641529625795, "grad_norm": 0.44546690583229065, "learning_rate": 1.8578308954691404e-05, "loss": 0.3121, "step": 2083 }, { "epoch": 0.03717047765133949, "grad_norm": 0.535554051399231, "learning_rate": 1.858722797003211e-05, "loss": 0.3141, "step": 2084 }, { "epoch": 0.037188313773053185, "grad_norm": 0.5873515009880066, "learning_rate": 1.8596146985372817e-05, "loss": 0.3546, "step": 2085 }, { "epoch": 0.03720614989476688, "grad_norm": 0.5182445049285889, "learning_rate": 1.8605066000713523e-05, "loss": 0.3037, "step": 2086 }, { "epoch": 0.037223986016480574, "grad_norm": 0.38863617181777954, "learning_rate": 1.861398501605423e-05, "loss": 0.2312, "step": 2087 }, { "epoch": 0.03724182213819427, "grad_norm": 0.4798535406589508, "learning_rate": 1.8622904031394936e-05, "loss": 0.305, "step": 2088 }, { "epoch": 0.037259658259907964, "grad_norm": 0.5328872799873352, "learning_rate": 1.8631823046735642e-05, "loss": 0.3493, "step": 2089 }, { "epoch": 0.03727749438162166, "grad_norm": 0.6318294405937195, "learning_rate": 1.8640742062076345e-05, "loss": 0.3118, "step": 2090 }, { "epoch": 0.037295330503335354, "grad_norm": 0.37853115797042847, "learning_rate": 1.8649661077417055e-05, "loss": 0.2864, "step": 2091 }, { "epoch": 0.03731316662504905, "grad_norm": 0.41678526997566223, "learning_rate": 1.865858009275776e-05, "loss": 0.2949, "step": 2092 }, { "epoch": 0.037331002746762744, "grad_norm": 0.4275762140750885, "learning_rate": 1.8667499108098464e-05, "loss": 0.2948, "step": 2093 }, { "epoch": 0.03734883886847644, "grad_norm": 0.4504309892654419, "learning_rate": 1.8676418123439174e-05, "loss": 0.284, "step": 2094 }, { "epoch": 0.037366674990190134, "grad_norm": 0.445270299911499, "learning_rate": 1.868533713877988e-05, "loss": 0.2869, "step": 2095 }, { "epoch": 0.03738451111190383, "grad_norm": 0.9869561791419983, "learning_rate": 1.8694256154120583e-05, "loss": 0.351, "step": 2096 }, { "epoch": 0.037402347233617524, "grad_norm": 0.616989254951477, "learning_rate": 1.8703175169461293e-05, "loss": 0.3719, "step": 2097 }, { "epoch": 0.03742018335533122, "grad_norm": 0.9821555018424988, "learning_rate": 1.8712094184802e-05, "loss": 0.3739, "step": 2098 }, { "epoch": 0.03743801947704491, "grad_norm": 0.6926895976066589, "learning_rate": 1.8721013200142706e-05, "loss": 0.3178, "step": 2099 }, { "epoch": 0.03745585559875861, "grad_norm": 0.46907535195350647, "learning_rate": 1.8729932215483412e-05, "loss": 0.2822, "step": 2100 }, { "epoch": 0.0374736917204723, "grad_norm": 0.46654942631721497, "learning_rate": 1.873885123082412e-05, "loss": 0.3239, "step": 2101 }, { "epoch": 0.037491527842186, "grad_norm": 0.576643705368042, "learning_rate": 1.8747770246164825e-05, "loss": 0.3603, "step": 2102 }, { "epoch": 0.037509363963899686, "grad_norm": 0.5382814407348633, "learning_rate": 1.875668926150553e-05, "loss": 0.3458, "step": 2103 }, { "epoch": 0.03752720008561338, "grad_norm": 0.45998647809028625, "learning_rate": 1.8765608276846235e-05, "loss": 0.2816, "step": 2104 }, { "epoch": 0.037545036207327076, "grad_norm": 0.5114442706108093, "learning_rate": 1.8774527292186944e-05, "loss": 0.3352, "step": 2105 }, { "epoch": 0.03756287232904077, "grad_norm": 0.4888891577720642, "learning_rate": 1.878344630752765e-05, "loss": 0.3226, "step": 2106 }, { "epoch": 0.037580708450754466, "grad_norm": 0.6054527759552002, "learning_rate": 1.8792365322868354e-05, "loss": 0.3493, "step": 2107 }, { "epoch": 0.03759854457246816, "grad_norm": 0.5286481380462646, "learning_rate": 1.8801284338209063e-05, "loss": 0.3151, "step": 2108 }, { "epoch": 0.037616380694181856, "grad_norm": 0.551657497882843, "learning_rate": 1.881020335354977e-05, "loss": 0.2909, "step": 2109 }, { "epoch": 0.03763421681589555, "grad_norm": 0.524308443069458, "learning_rate": 1.8819122368890476e-05, "loss": 0.314, "step": 2110 }, { "epoch": 0.037652052937609246, "grad_norm": 0.8535147309303284, "learning_rate": 1.8828041384231183e-05, "loss": 0.336, "step": 2111 }, { "epoch": 0.03766988905932294, "grad_norm": 0.5297061800956726, "learning_rate": 1.883696039957189e-05, "loss": 0.425, "step": 2112 }, { "epoch": 0.037687725181036635, "grad_norm": 0.46865183115005493, "learning_rate": 1.8845879414912595e-05, "loss": 0.252, "step": 2113 }, { "epoch": 0.03770556130275033, "grad_norm": 0.5673788785934448, "learning_rate": 1.8854798430253302e-05, "loss": 0.366, "step": 2114 }, { "epoch": 0.037723397424464025, "grad_norm": 0.4931465685367584, "learning_rate": 1.8863717445594005e-05, "loss": 0.3285, "step": 2115 }, { "epoch": 0.03774123354617772, "grad_norm": 0.4411163926124573, "learning_rate": 1.8872636460934714e-05, "loss": 0.3042, "step": 2116 }, { "epoch": 0.037759069667891415, "grad_norm": 0.7002178430557251, "learning_rate": 1.888155547627542e-05, "loss": 0.3235, "step": 2117 }, { "epoch": 0.03777690578960511, "grad_norm": 0.5767307281494141, "learning_rate": 1.8890474491616124e-05, "loss": 0.3519, "step": 2118 }, { "epoch": 0.037794741911318805, "grad_norm": 0.6850040555000305, "learning_rate": 1.8899393506956834e-05, "loss": 0.3319, "step": 2119 }, { "epoch": 0.0378125780330325, "grad_norm": 0.4674622714519501, "learning_rate": 1.890831252229754e-05, "loss": 0.2911, "step": 2120 }, { "epoch": 0.037830414154746195, "grad_norm": 0.4767864942550659, "learning_rate": 1.8917231537638246e-05, "loss": 0.345, "step": 2121 }, { "epoch": 0.03784825027645989, "grad_norm": 0.3986469507217407, "learning_rate": 1.8926150552978953e-05, "loss": 0.3092, "step": 2122 }, { "epoch": 0.037866086398173585, "grad_norm": 0.4365829527378082, "learning_rate": 1.893506956831966e-05, "loss": 0.2793, "step": 2123 }, { "epoch": 0.03788392251988727, "grad_norm": 0.6047873497009277, "learning_rate": 1.8943988583660366e-05, "loss": 0.3328, "step": 2124 }, { "epoch": 0.03790175864160097, "grad_norm": 0.42645469307899475, "learning_rate": 1.8952907599001072e-05, "loss": 0.3107, "step": 2125 }, { "epoch": 0.03791959476331466, "grad_norm": 0.5384413003921509, "learning_rate": 1.8961826614341778e-05, "loss": 0.374, "step": 2126 }, { "epoch": 0.03793743088502836, "grad_norm": 0.4081381559371948, "learning_rate": 1.8970745629682485e-05, "loss": 0.2542, "step": 2127 }, { "epoch": 0.03795526700674205, "grad_norm": 0.4883652925491333, "learning_rate": 1.897966464502319e-05, "loss": 0.3227, "step": 2128 }, { "epoch": 0.03797310312845575, "grad_norm": 0.45527878403663635, "learning_rate": 1.8988583660363894e-05, "loss": 0.2766, "step": 2129 }, { "epoch": 0.03799093925016944, "grad_norm": 0.560852587223053, "learning_rate": 1.8997502675704604e-05, "loss": 0.3393, "step": 2130 }, { "epoch": 0.03800877537188314, "grad_norm": 0.5652244687080383, "learning_rate": 1.900642169104531e-05, "loss": 0.3451, "step": 2131 }, { "epoch": 0.03802661149359683, "grad_norm": 1.061940312385559, "learning_rate": 1.9015340706386017e-05, "loss": 0.2981, "step": 2132 }, { "epoch": 0.03804444761531053, "grad_norm": 0.5608810186386108, "learning_rate": 1.9024259721726723e-05, "loss": 0.2944, "step": 2133 }, { "epoch": 0.03806228373702422, "grad_norm": 0.6922007203102112, "learning_rate": 1.903317873706743e-05, "loss": 0.3341, "step": 2134 }, { "epoch": 0.03808011985873792, "grad_norm": 0.5143476724624634, "learning_rate": 1.9042097752408136e-05, "loss": 0.2751, "step": 2135 }, { "epoch": 0.03809795598045161, "grad_norm": 0.4784122109413147, "learning_rate": 1.9051016767748842e-05, "loss": 0.3042, "step": 2136 }, { "epoch": 0.038115792102165306, "grad_norm": 0.41936033964157104, "learning_rate": 1.905993578308955e-05, "loss": 0.3194, "step": 2137 }, { "epoch": 0.038133628223879, "grad_norm": 0.6206886172294617, "learning_rate": 1.9068854798430255e-05, "loss": 0.3369, "step": 2138 }, { "epoch": 0.038151464345592696, "grad_norm": 0.4272455871105194, "learning_rate": 1.907777381377096e-05, "loss": 0.2785, "step": 2139 }, { "epoch": 0.03816930046730639, "grad_norm": 0.39360836148262024, "learning_rate": 1.9086692829111664e-05, "loss": 0.322, "step": 2140 }, { "epoch": 0.038187136589020086, "grad_norm": 0.4603310823440552, "learning_rate": 1.9095611844452374e-05, "loss": 0.3164, "step": 2141 }, { "epoch": 0.03820497271073378, "grad_norm": 0.6076071858406067, "learning_rate": 1.910453085979308e-05, "loss": 0.3911, "step": 2142 }, { "epoch": 0.038222808832447476, "grad_norm": 0.658783495426178, "learning_rate": 1.9113449875133783e-05, "loss": 0.2904, "step": 2143 }, { "epoch": 0.038240644954161164, "grad_norm": 0.5335296988487244, "learning_rate": 1.9122368890474493e-05, "loss": 0.3337, "step": 2144 }, { "epoch": 0.03825848107587486, "grad_norm": 0.5562210083007812, "learning_rate": 1.91312879058152e-05, "loss": 0.2884, "step": 2145 }, { "epoch": 0.038276317197588554, "grad_norm": 0.5467308759689331, "learning_rate": 1.9140206921155906e-05, "loss": 0.2867, "step": 2146 }, { "epoch": 0.03829415331930225, "grad_norm": 0.8593414425849915, "learning_rate": 1.9149125936496612e-05, "loss": 0.3441, "step": 2147 }, { "epoch": 0.038311989441015944, "grad_norm": 0.5408951640129089, "learning_rate": 1.915804495183732e-05, "loss": 0.3497, "step": 2148 }, { "epoch": 0.03832982556272964, "grad_norm": 0.5210184454917908, "learning_rate": 1.9166963967178025e-05, "loss": 0.3445, "step": 2149 }, { "epoch": 0.038347661684443334, "grad_norm": 0.5488189458847046, "learning_rate": 1.917588298251873e-05, "loss": 0.3829, "step": 2150 }, { "epoch": 0.03836549780615703, "grad_norm": 0.597741425037384, "learning_rate": 1.9184801997859438e-05, "loss": 0.3547, "step": 2151 }, { "epoch": 0.03838333392787072, "grad_norm": 0.48983892798423767, "learning_rate": 1.9193721013200144e-05, "loss": 0.3371, "step": 2152 }, { "epoch": 0.03840117004958442, "grad_norm": 0.4413568675518036, "learning_rate": 1.920264002854085e-05, "loss": 0.3583, "step": 2153 }, { "epoch": 0.03841900617129811, "grad_norm": 0.9008107781410217, "learning_rate": 1.9211559043881554e-05, "loss": 0.3591, "step": 2154 }, { "epoch": 0.03843684229301181, "grad_norm": 0.7133767604827881, "learning_rate": 1.9220478059222263e-05, "loss": 0.3278, "step": 2155 }, { "epoch": 0.0384546784147255, "grad_norm": 0.5932398438453674, "learning_rate": 1.922939707456297e-05, "loss": 0.3929, "step": 2156 }, { "epoch": 0.0384725145364392, "grad_norm": 0.5169602036476135, "learning_rate": 1.9238316089903676e-05, "loss": 0.3668, "step": 2157 }, { "epoch": 0.03849035065815289, "grad_norm": 0.4599943161010742, "learning_rate": 1.9247235105244382e-05, "loss": 0.3313, "step": 2158 }, { "epoch": 0.03850818677986659, "grad_norm": 0.5276328921318054, "learning_rate": 1.925615412058509e-05, "loss": 0.3127, "step": 2159 }, { "epoch": 0.03852602290158028, "grad_norm": 0.4524444341659546, "learning_rate": 1.9265073135925795e-05, "loss": 0.2773, "step": 2160 }, { "epoch": 0.03854385902329398, "grad_norm": 0.5512828826904297, "learning_rate": 1.92739921512665e-05, "loss": 0.3223, "step": 2161 }, { "epoch": 0.03856169514500767, "grad_norm": 0.4261593222618103, "learning_rate": 1.9282911166607208e-05, "loss": 0.2805, "step": 2162 }, { "epoch": 0.03857953126672137, "grad_norm": 1.0806833505630493, "learning_rate": 1.9291830181947914e-05, "loss": 0.3005, "step": 2163 }, { "epoch": 0.038597367388435055, "grad_norm": 0.5254951119422913, "learning_rate": 1.930074919728862e-05, "loss": 0.3243, "step": 2164 }, { "epoch": 0.03861520351014875, "grad_norm": 0.9907423257827759, "learning_rate": 1.9309668212629327e-05, "loss": 0.2794, "step": 2165 }, { "epoch": 0.038633039631862445, "grad_norm": 0.5167833566665649, "learning_rate": 1.9318587227970033e-05, "loss": 0.297, "step": 2166 }, { "epoch": 0.03865087575357614, "grad_norm": 0.7189813852310181, "learning_rate": 1.932750624331074e-05, "loss": 0.3538, "step": 2167 }, { "epoch": 0.038668711875289835, "grad_norm": 0.4462776482105255, "learning_rate": 1.9336425258651446e-05, "loss": 0.3379, "step": 2168 }, { "epoch": 0.03868654799700353, "grad_norm": 0.5882665514945984, "learning_rate": 1.9345344273992153e-05, "loss": 0.2956, "step": 2169 }, { "epoch": 0.038704384118717225, "grad_norm": 0.41937246918678284, "learning_rate": 1.935426328933286e-05, "loss": 0.3274, "step": 2170 }, { "epoch": 0.03872222024043092, "grad_norm": 0.418430358171463, "learning_rate": 1.9363182304673565e-05, "loss": 0.2888, "step": 2171 }, { "epoch": 0.038740056362144615, "grad_norm": 0.4083843231201172, "learning_rate": 1.9372101320014272e-05, "loss": 0.3005, "step": 2172 }, { "epoch": 0.03875789248385831, "grad_norm": 0.4693141579627991, "learning_rate": 1.9381020335354978e-05, "loss": 0.3078, "step": 2173 }, { "epoch": 0.038775728605572005, "grad_norm": 0.43590137362480164, "learning_rate": 1.9389939350695685e-05, "loss": 0.302, "step": 2174 }, { "epoch": 0.0387935647272857, "grad_norm": 0.5631271004676819, "learning_rate": 1.939885836603639e-05, "loss": 0.2835, "step": 2175 }, { "epoch": 0.038811400848999394, "grad_norm": 0.6374658346176147, "learning_rate": 1.9407777381377097e-05, "loss": 0.2953, "step": 2176 }, { "epoch": 0.03882923697071309, "grad_norm": 0.5642484426498413, "learning_rate": 1.9416696396717804e-05, "loss": 0.2956, "step": 2177 }, { "epoch": 0.038847073092426784, "grad_norm": 0.42883986234664917, "learning_rate": 1.942561541205851e-05, "loss": 0.2713, "step": 2178 }, { "epoch": 0.03886490921414048, "grad_norm": 0.6185327768325806, "learning_rate": 1.9434534427399213e-05, "loss": 0.3097, "step": 2179 }, { "epoch": 0.038882745335854174, "grad_norm": 0.47632378339767456, "learning_rate": 1.9443453442739923e-05, "loss": 0.3023, "step": 2180 }, { "epoch": 0.03890058145756787, "grad_norm": 0.6266023516654968, "learning_rate": 1.945237245808063e-05, "loss": 0.3306, "step": 2181 }, { "epoch": 0.038918417579281564, "grad_norm": 0.7256380915641785, "learning_rate": 1.9461291473421336e-05, "loss": 0.2863, "step": 2182 }, { "epoch": 0.03893625370099526, "grad_norm": 0.5386956334114075, "learning_rate": 1.9470210488762042e-05, "loss": 0.331, "step": 2183 }, { "epoch": 0.03895408982270895, "grad_norm": 0.5050217509269714, "learning_rate": 1.947912950410275e-05, "loss": 0.2826, "step": 2184 }, { "epoch": 0.03897192594442264, "grad_norm": 0.7591911554336548, "learning_rate": 1.9488048519443455e-05, "loss": 0.3181, "step": 2185 }, { "epoch": 0.03898976206613634, "grad_norm": 0.5025246739387512, "learning_rate": 1.949696753478416e-05, "loss": 0.2636, "step": 2186 }, { "epoch": 0.03900759818785003, "grad_norm": 0.6930098533630371, "learning_rate": 1.9505886550124867e-05, "loss": 0.3118, "step": 2187 }, { "epoch": 0.03902543430956373, "grad_norm": 0.6897455453872681, "learning_rate": 1.9514805565465574e-05, "loss": 0.3103, "step": 2188 }, { "epoch": 0.03904327043127742, "grad_norm": 0.4128413200378418, "learning_rate": 1.952372458080628e-05, "loss": 0.3076, "step": 2189 }, { "epoch": 0.039061106552991116, "grad_norm": 0.5418302416801453, "learning_rate": 1.9532643596146987e-05, "loss": 0.2823, "step": 2190 }, { "epoch": 0.03907894267470481, "grad_norm": 0.8713166117668152, "learning_rate": 1.9541562611487693e-05, "loss": 0.2933, "step": 2191 }, { "epoch": 0.039096778796418506, "grad_norm": 0.4038330018520355, "learning_rate": 1.95504816268284e-05, "loss": 0.3019, "step": 2192 }, { "epoch": 0.0391146149181322, "grad_norm": 0.4462745189666748, "learning_rate": 1.9559400642169106e-05, "loss": 0.3249, "step": 2193 }, { "epoch": 0.039132451039845896, "grad_norm": 0.5353034138679504, "learning_rate": 1.9568319657509812e-05, "loss": 0.2453, "step": 2194 }, { "epoch": 0.03915028716155959, "grad_norm": 0.45148083567619324, "learning_rate": 1.957723867285052e-05, "loss": 0.3134, "step": 2195 }, { "epoch": 0.039168123283273286, "grad_norm": 0.48606717586517334, "learning_rate": 1.9586157688191225e-05, "loss": 0.3017, "step": 2196 }, { "epoch": 0.03918595940498698, "grad_norm": 0.6264825463294983, "learning_rate": 1.959507670353193e-05, "loss": 0.2939, "step": 2197 }, { "epoch": 0.039203795526700676, "grad_norm": 0.481516569852829, "learning_rate": 1.9603995718872638e-05, "loss": 0.2816, "step": 2198 }, { "epoch": 0.03922163164841437, "grad_norm": 0.5619494318962097, "learning_rate": 1.9612914734213344e-05, "loss": 0.3279, "step": 2199 }, { "epoch": 0.039239467770128066, "grad_norm": 0.4352967441082001, "learning_rate": 1.962183374955405e-05, "loss": 0.2899, "step": 2200 }, { "epoch": 0.03925730389184176, "grad_norm": 0.4811791479587555, "learning_rate": 1.9630752764894757e-05, "loss": 0.3475, "step": 2201 }, { "epoch": 0.039275140013555455, "grad_norm": 0.5548359751701355, "learning_rate": 1.9639671780235463e-05, "loss": 0.2496, "step": 2202 }, { "epoch": 0.03929297613526915, "grad_norm": 0.4365730285644531, "learning_rate": 1.964859079557617e-05, "loss": 0.2935, "step": 2203 }, { "epoch": 0.03931081225698284, "grad_norm": 0.3903549313545227, "learning_rate": 1.9657509810916876e-05, "loss": 0.3007, "step": 2204 }, { "epoch": 0.03932864837869653, "grad_norm": 0.5752881765365601, "learning_rate": 1.9666428826257582e-05, "loss": 0.3403, "step": 2205 }, { "epoch": 0.03934648450041023, "grad_norm": 0.4807453155517578, "learning_rate": 1.967534784159829e-05, "loss": 0.2952, "step": 2206 }, { "epoch": 0.03936432062212392, "grad_norm": 0.5764487981796265, "learning_rate": 1.9684266856938995e-05, "loss": 0.363, "step": 2207 }, { "epoch": 0.03938215674383762, "grad_norm": 0.5190560221672058, "learning_rate": 1.96931858722797e-05, "loss": 0.2629, "step": 2208 }, { "epoch": 0.03939999286555131, "grad_norm": 0.7670960426330566, "learning_rate": 1.9702104887620408e-05, "loss": 0.3365, "step": 2209 }, { "epoch": 0.03941782898726501, "grad_norm": 0.45047062635421753, "learning_rate": 1.9711023902961114e-05, "loss": 0.2565, "step": 2210 }, { "epoch": 0.0394356651089787, "grad_norm": 0.4333064556121826, "learning_rate": 1.971994291830182e-05, "loss": 0.2913, "step": 2211 }, { "epoch": 0.0394535012306924, "grad_norm": 0.4812524914741516, "learning_rate": 1.9728861933642527e-05, "loss": 0.2618, "step": 2212 }, { "epoch": 0.03947133735240609, "grad_norm": 0.7026241421699524, "learning_rate": 1.9737780948983233e-05, "loss": 0.315, "step": 2213 }, { "epoch": 0.03948917347411979, "grad_norm": 0.6039960384368896, "learning_rate": 1.974669996432394e-05, "loss": 0.2947, "step": 2214 }, { "epoch": 0.03950700959583348, "grad_norm": 0.5692183971405029, "learning_rate": 1.9755618979664646e-05, "loss": 0.3261, "step": 2215 }, { "epoch": 0.03952484571754718, "grad_norm": 0.9089668393135071, "learning_rate": 1.9764537995005353e-05, "loss": 0.3524, "step": 2216 }, { "epoch": 0.03954268183926087, "grad_norm": 0.6296941041946411, "learning_rate": 1.977345701034606e-05, "loss": 0.3383, "step": 2217 }, { "epoch": 0.03956051796097457, "grad_norm": 0.4746566414833069, "learning_rate": 1.9782376025686765e-05, "loss": 0.2781, "step": 2218 }, { "epoch": 0.03957835408268826, "grad_norm": 0.43100976943969727, "learning_rate": 1.979129504102747e-05, "loss": 0.2636, "step": 2219 }, { "epoch": 0.03959619020440196, "grad_norm": 0.44783324003219604, "learning_rate": 1.9800214056368178e-05, "loss": 0.2593, "step": 2220 }, { "epoch": 0.03961402632611565, "grad_norm": 0.5706475973129272, "learning_rate": 1.9809133071708884e-05, "loss": 0.3122, "step": 2221 }, { "epoch": 0.03963186244782935, "grad_norm": 0.4727674722671509, "learning_rate": 1.981805208704959e-05, "loss": 0.264, "step": 2222 }, { "epoch": 0.03964969856954304, "grad_norm": 0.41251450777053833, "learning_rate": 1.9826971102390297e-05, "loss": 0.3104, "step": 2223 }, { "epoch": 0.03966753469125673, "grad_norm": 0.533075749874115, "learning_rate": 1.9835890117731004e-05, "loss": 0.3244, "step": 2224 }, { "epoch": 0.039685370812970425, "grad_norm": 0.4140982925891876, "learning_rate": 1.984480913307171e-05, "loss": 0.3028, "step": 2225 }, { "epoch": 0.03970320693468412, "grad_norm": 0.6157547831535339, "learning_rate": 1.9853728148412416e-05, "loss": 0.3311, "step": 2226 }, { "epoch": 0.039721043056397815, "grad_norm": 0.5803517699241638, "learning_rate": 1.9862647163753123e-05, "loss": 0.3808, "step": 2227 }, { "epoch": 0.03973887917811151, "grad_norm": 0.5734399557113647, "learning_rate": 1.987156617909383e-05, "loss": 0.3543, "step": 2228 }, { "epoch": 0.039756715299825204, "grad_norm": 0.618794858455658, "learning_rate": 1.9880485194434535e-05, "loss": 0.3758, "step": 2229 }, { "epoch": 0.0397745514215389, "grad_norm": 0.5295289158821106, "learning_rate": 1.9889404209775242e-05, "loss": 0.3212, "step": 2230 }, { "epoch": 0.039792387543252594, "grad_norm": 0.6505454182624817, "learning_rate": 1.9898323225115948e-05, "loss": 0.3918, "step": 2231 }, { "epoch": 0.03981022366496629, "grad_norm": 0.4140424132347107, "learning_rate": 1.9907242240456655e-05, "loss": 0.2818, "step": 2232 }, { "epoch": 0.039828059786679984, "grad_norm": 0.7063069939613342, "learning_rate": 1.991616125579736e-05, "loss": 0.3091, "step": 2233 }, { "epoch": 0.03984589590839368, "grad_norm": 0.6158521175384521, "learning_rate": 1.9925080271138067e-05, "loss": 0.4188, "step": 2234 }, { "epoch": 0.039863732030107374, "grad_norm": 0.44827064871788025, "learning_rate": 1.9933999286478774e-05, "loss": 0.2364, "step": 2235 }, { "epoch": 0.03988156815182107, "grad_norm": 0.44625604152679443, "learning_rate": 1.994291830181948e-05, "loss": 0.3028, "step": 2236 }, { "epoch": 0.039899404273534764, "grad_norm": 0.5541078448295593, "learning_rate": 1.9951837317160187e-05, "loss": 0.3322, "step": 2237 }, { "epoch": 0.03991724039524846, "grad_norm": 0.4406847655773163, "learning_rate": 1.9960756332500893e-05, "loss": 0.314, "step": 2238 }, { "epoch": 0.039935076516962154, "grad_norm": 0.47166356444358826, "learning_rate": 1.99696753478416e-05, "loss": 0.3667, "step": 2239 }, { "epoch": 0.03995291263867585, "grad_norm": 0.36501219868659973, "learning_rate": 1.9978594363182306e-05, "loss": 0.2824, "step": 2240 }, { "epoch": 0.03997074876038954, "grad_norm": 0.6005622148513794, "learning_rate": 1.9987513378523012e-05, "loss": 0.3024, "step": 2241 }, { "epoch": 0.03998858488210324, "grad_norm": 0.43297284841537476, "learning_rate": 1.999643239386372e-05, "loss": 0.2632, "step": 2242 }, { "epoch": 0.04000642100381693, "grad_norm": 0.7355133891105652, "learning_rate": 2.0005351409204425e-05, "loss": 0.3686, "step": 2243 }, { "epoch": 0.04002425712553062, "grad_norm": 0.48259422183036804, "learning_rate": 2.001427042454513e-05, "loss": 0.3532, "step": 2244 }, { "epoch": 0.040042093247244316, "grad_norm": 0.39164793491363525, "learning_rate": 2.0023189439885838e-05, "loss": 0.3117, "step": 2245 }, { "epoch": 0.04005992936895801, "grad_norm": 0.5173423290252686, "learning_rate": 2.0032108455226544e-05, "loss": 0.3257, "step": 2246 }, { "epoch": 0.040077765490671706, "grad_norm": 0.6469289660453796, "learning_rate": 2.004102747056725e-05, "loss": 0.355, "step": 2247 }, { "epoch": 0.0400956016123854, "grad_norm": 0.40094152092933655, "learning_rate": 2.0049946485907957e-05, "loss": 0.2898, "step": 2248 }, { "epoch": 0.040113437734099096, "grad_norm": 0.6378834247589111, "learning_rate": 2.0058865501248663e-05, "loss": 0.3511, "step": 2249 }, { "epoch": 0.04013127385581279, "grad_norm": 0.5820125937461853, "learning_rate": 2.006778451658937e-05, "loss": 0.3662, "step": 2250 }, { "epoch": 0.040149109977526486, "grad_norm": 0.5466359853744507, "learning_rate": 2.0076703531930076e-05, "loss": 0.2816, "step": 2251 }, { "epoch": 0.04016694609924018, "grad_norm": 0.4169778525829315, "learning_rate": 2.0085622547270782e-05, "loss": 0.2557, "step": 2252 }, { "epoch": 0.040184782220953875, "grad_norm": 0.44945377111434937, "learning_rate": 2.009454156261149e-05, "loss": 0.3279, "step": 2253 }, { "epoch": 0.04020261834266757, "grad_norm": 0.38507401943206787, "learning_rate": 2.0103460577952195e-05, "loss": 0.2579, "step": 2254 }, { "epoch": 0.040220454464381265, "grad_norm": 0.6948372721672058, "learning_rate": 2.01123795932929e-05, "loss": 0.4155, "step": 2255 }, { "epoch": 0.04023829058609496, "grad_norm": 0.4727243185043335, "learning_rate": 2.0121298608633608e-05, "loss": 0.4056, "step": 2256 }, { "epoch": 0.040256126707808655, "grad_norm": 0.5559164881706238, "learning_rate": 2.0130217623974314e-05, "loss": 0.3192, "step": 2257 }, { "epoch": 0.04027396282952235, "grad_norm": 0.4496062695980072, "learning_rate": 2.013913663931502e-05, "loss": 0.346, "step": 2258 }, { "epoch": 0.040291798951236045, "grad_norm": 0.5188528895378113, "learning_rate": 2.0148055654655727e-05, "loss": 0.3029, "step": 2259 }, { "epoch": 0.04030963507294974, "grad_norm": 0.3797195553779602, "learning_rate": 2.0156974669996433e-05, "loss": 0.2717, "step": 2260 }, { "epoch": 0.040327471194663435, "grad_norm": 0.5428307056427002, "learning_rate": 2.016589368533714e-05, "loss": 0.3135, "step": 2261 }, { "epoch": 0.04034530731637713, "grad_norm": 0.8119169473648071, "learning_rate": 2.0174812700677846e-05, "loss": 0.339, "step": 2262 }, { "epoch": 0.040363143438090825, "grad_norm": 0.6079069375991821, "learning_rate": 2.0183731716018552e-05, "loss": 0.3559, "step": 2263 }, { "epoch": 0.04038097955980451, "grad_norm": 0.603508710861206, "learning_rate": 2.019265073135926e-05, "loss": 0.396, "step": 2264 }, { "epoch": 0.04039881568151821, "grad_norm": 0.5170609951019287, "learning_rate": 2.0201569746699965e-05, "loss": 0.3224, "step": 2265 }, { "epoch": 0.0404166518032319, "grad_norm": 0.4569767117500305, "learning_rate": 2.021048876204067e-05, "loss": 0.2836, "step": 2266 }, { "epoch": 0.0404344879249456, "grad_norm": 0.5537528395652771, "learning_rate": 2.0219407777381378e-05, "loss": 0.367, "step": 2267 }, { "epoch": 0.04045232404665929, "grad_norm": 0.4843725264072418, "learning_rate": 2.0228326792722084e-05, "loss": 0.3144, "step": 2268 }, { "epoch": 0.04047016016837299, "grad_norm": 0.5590280294418335, "learning_rate": 2.023724580806279e-05, "loss": 0.3184, "step": 2269 }, { "epoch": 0.04048799629008668, "grad_norm": 0.44634491205215454, "learning_rate": 2.0246164823403497e-05, "loss": 0.301, "step": 2270 }, { "epoch": 0.04050583241180038, "grad_norm": 0.6276856660842896, "learning_rate": 2.0255083838744203e-05, "loss": 0.3689, "step": 2271 }, { "epoch": 0.04052366853351407, "grad_norm": 0.38367608189582825, "learning_rate": 2.026400285408491e-05, "loss": 0.3083, "step": 2272 }, { "epoch": 0.04054150465522777, "grad_norm": 0.518040657043457, "learning_rate": 2.0272921869425616e-05, "loss": 0.2921, "step": 2273 }, { "epoch": 0.04055934077694146, "grad_norm": 0.5452049970626831, "learning_rate": 2.0281840884766323e-05, "loss": 0.314, "step": 2274 }, { "epoch": 0.04057717689865516, "grad_norm": 0.5913676619529724, "learning_rate": 2.029075990010703e-05, "loss": 0.4119, "step": 2275 }, { "epoch": 0.04059501302036885, "grad_norm": 0.47705668210983276, "learning_rate": 2.0299678915447735e-05, "loss": 0.2675, "step": 2276 }, { "epoch": 0.04061284914208255, "grad_norm": 0.5474919080734253, "learning_rate": 2.0308597930788442e-05, "loss": 0.3613, "step": 2277 }, { "epoch": 0.04063068526379624, "grad_norm": 0.43692636489868164, "learning_rate": 2.0317516946129148e-05, "loss": 0.2709, "step": 2278 }, { "epoch": 0.040648521385509936, "grad_norm": 0.6070582270622253, "learning_rate": 2.0326435961469854e-05, "loss": 0.2958, "step": 2279 }, { "epoch": 0.04066635750722363, "grad_norm": 0.7254700064659119, "learning_rate": 2.033535497681056e-05, "loss": 0.4509, "step": 2280 }, { "epoch": 0.040684193628937326, "grad_norm": 1.0255929231643677, "learning_rate": 2.0344273992151267e-05, "loss": 0.3368, "step": 2281 }, { "epoch": 0.04070202975065102, "grad_norm": 0.5275586247444153, "learning_rate": 2.0353193007491974e-05, "loss": 0.3552, "step": 2282 }, { "epoch": 0.040719865872364716, "grad_norm": 0.361937940120697, "learning_rate": 2.036211202283268e-05, "loss": 0.2558, "step": 2283 }, { "epoch": 0.04073770199407841, "grad_norm": 0.4156278371810913, "learning_rate": 2.0371031038173386e-05, "loss": 0.264, "step": 2284 }, { "epoch": 0.0407555381157921, "grad_norm": 0.48568591475486755, "learning_rate": 2.0379950053514093e-05, "loss": 0.296, "step": 2285 }, { "epoch": 0.040773374237505794, "grad_norm": 0.5209950804710388, "learning_rate": 2.03888690688548e-05, "loss": 0.2349, "step": 2286 }, { "epoch": 0.04079121035921949, "grad_norm": 0.5858215689659119, "learning_rate": 2.0397788084195506e-05, "loss": 0.3768, "step": 2287 }, { "epoch": 0.040809046480933184, "grad_norm": 0.4604300856590271, "learning_rate": 2.0406707099536212e-05, "loss": 0.2949, "step": 2288 }, { "epoch": 0.04082688260264688, "grad_norm": 0.7059128880500793, "learning_rate": 2.0415626114876918e-05, "loss": 0.2974, "step": 2289 }, { "epoch": 0.040844718724360574, "grad_norm": 0.47811001539230347, "learning_rate": 2.0424545130217625e-05, "loss": 0.3108, "step": 2290 }, { "epoch": 0.04086255484607427, "grad_norm": 0.4712660610675812, "learning_rate": 2.043346414555833e-05, "loss": 0.3033, "step": 2291 }, { "epoch": 0.04088039096778796, "grad_norm": 0.586381196975708, "learning_rate": 2.0442383160899037e-05, "loss": 0.3293, "step": 2292 }, { "epoch": 0.04089822708950166, "grad_norm": 0.6724193096160889, "learning_rate": 2.0451302176239744e-05, "loss": 0.3365, "step": 2293 }, { "epoch": 0.04091606321121535, "grad_norm": 0.5653788447380066, "learning_rate": 2.046022119158045e-05, "loss": 0.3819, "step": 2294 }, { "epoch": 0.04093389933292905, "grad_norm": 0.4913775622844696, "learning_rate": 2.0469140206921157e-05, "loss": 0.2767, "step": 2295 }, { "epoch": 0.04095173545464274, "grad_norm": 0.6029754877090454, "learning_rate": 2.0478059222261863e-05, "loss": 0.3332, "step": 2296 }, { "epoch": 0.04096957157635644, "grad_norm": 0.5877436399459839, "learning_rate": 2.048697823760257e-05, "loss": 0.3302, "step": 2297 }, { "epoch": 0.04098740769807013, "grad_norm": 0.3311327397823334, "learning_rate": 2.0495897252943276e-05, "loss": 0.2755, "step": 2298 }, { "epoch": 0.04100524381978383, "grad_norm": 0.5030799508094788, "learning_rate": 2.0504816268283982e-05, "loss": 0.3081, "step": 2299 }, { "epoch": 0.04102307994149752, "grad_norm": 0.4549279808998108, "learning_rate": 2.051373528362469e-05, "loss": 0.3393, "step": 2300 }, { "epoch": 0.04104091606321122, "grad_norm": 0.42377546429634094, "learning_rate": 2.0522654298965395e-05, "loss": 0.2816, "step": 2301 }, { "epoch": 0.04105875218492491, "grad_norm": 0.48868995904922485, "learning_rate": 2.05315733143061e-05, "loss": 0.3191, "step": 2302 }, { "epoch": 0.04107658830663861, "grad_norm": 0.5731281638145447, "learning_rate": 2.0540492329646808e-05, "loss": 0.326, "step": 2303 }, { "epoch": 0.0410944244283523, "grad_norm": 0.48770371079444885, "learning_rate": 2.0549411344987514e-05, "loss": 0.3263, "step": 2304 }, { "epoch": 0.04111226055006599, "grad_norm": 0.5813767910003662, "learning_rate": 2.055833036032822e-05, "loss": 0.2899, "step": 2305 }, { "epoch": 0.041130096671779685, "grad_norm": 0.5096173882484436, "learning_rate": 2.0567249375668927e-05, "loss": 0.3702, "step": 2306 }, { "epoch": 0.04114793279349338, "grad_norm": 0.470236212015152, "learning_rate": 2.0576168391009633e-05, "loss": 0.2751, "step": 2307 }, { "epoch": 0.041165768915207075, "grad_norm": 0.7232435345649719, "learning_rate": 2.058508740635034e-05, "loss": 0.3457, "step": 2308 }, { "epoch": 0.04118360503692077, "grad_norm": 0.6438577771186829, "learning_rate": 2.0594006421691046e-05, "loss": 0.3594, "step": 2309 }, { "epoch": 0.041201441158634465, "grad_norm": 0.5336143970489502, "learning_rate": 2.0602925437031752e-05, "loss": 0.3039, "step": 2310 }, { "epoch": 0.04121927728034816, "grad_norm": 0.3439522087574005, "learning_rate": 2.061184445237246e-05, "loss": 0.2676, "step": 2311 }, { "epoch": 0.041237113402061855, "grad_norm": 0.6166762113571167, "learning_rate": 2.0620763467713165e-05, "loss": 0.4102, "step": 2312 }, { "epoch": 0.04125494952377555, "grad_norm": 0.4449693560600281, "learning_rate": 2.062968248305387e-05, "loss": 0.3031, "step": 2313 }, { "epoch": 0.041272785645489245, "grad_norm": 0.6253373622894287, "learning_rate": 2.0638601498394578e-05, "loss": 0.3601, "step": 2314 }, { "epoch": 0.04129062176720294, "grad_norm": 0.38799795508384705, "learning_rate": 2.0647520513735284e-05, "loss": 0.2674, "step": 2315 }, { "epoch": 0.041308457888916635, "grad_norm": 0.4372713565826416, "learning_rate": 2.065643952907599e-05, "loss": 0.288, "step": 2316 }, { "epoch": 0.04132629401063033, "grad_norm": 0.42214682698249817, "learning_rate": 2.0665358544416697e-05, "loss": 0.3154, "step": 2317 }, { "epoch": 0.041344130132344024, "grad_norm": 0.5073306560516357, "learning_rate": 2.0674277559757403e-05, "loss": 0.3148, "step": 2318 }, { "epoch": 0.04136196625405772, "grad_norm": 0.41248980164527893, "learning_rate": 2.068319657509811e-05, "loss": 0.2518, "step": 2319 }, { "epoch": 0.041379802375771414, "grad_norm": 0.4477764964103699, "learning_rate": 2.0692115590438816e-05, "loss": 0.3297, "step": 2320 }, { "epoch": 0.04139763849748511, "grad_norm": 0.5122391581535339, "learning_rate": 2.0701034605779522e-05, "loss": 0.3157, "step": 2321 }, { "epoch": 0.041415474619198804, "grad_norm": 0.6319571137428284, "learning_rate": 2.070995362112023e-05, "loss": 0.2594, "step": 2322 }, { "epoch": 0.0414333107409125, "grad_norm": 0.5080597400665283, "learning_rate": 2.071887263646094e-05, "loss": 0.2905, "step": 2323 }, { "epoch": 0.041451146862626194, "grad_norm": 0.5869331359863281, "learning_rate": 2.072779165180164e-05, "loss": 0.3042, "step": 2324 }, { "epoch": 0.04146898298433988, "grad_norm": 0.535035252571106, "learning_rate": 2.0736710667142348e-05, "loss": 0.246, "step": 2325 }, { "epoch": 0.04148681910605358, "grad_norm": 0.52500981092453, "learning_rate": 2.0745629682483054e-05, "loss": 0.2982, "step": 2326 }, { "epoch": 0.04150465522776727, "grad_norm": 0.5187718868255615, "learning_rate": 2.075454869782376e-05, "loss": 0.3345, "step": 2327 }, { "epoch": 0.04152249134948097, "grad_norm": 0.5416833758354187, "learning_rate": 2.0763467713164467e-05, "loss": 0.3724, "step": 2328 }, { "epoch": 0.04154032747119466, "grad_norm": 0.5092459917068481, "learning_rate": 2.0772386728505174e-05, "loss": 0.2979, "step": 2329 }, { "epoch": 0.041558163592908356, "grad_norm": 0.6160767078399658, "learning_rate": 2.078130574384588e-05, "loss": 0.396, "step": 2330 }, { "epoch": 0.04157599971462205, "grad_norm": 0.9167537093162537, "learning_rate": 2.0790224759186586e-05, "loss": 0.3068, "step": 2331 }, { "epoch": 0.041593835836335746, "grad_norm": 0.6085203289985657, "learning_rate": 2.0799143774527293e-05, "loss": 0.3381, "step": 2332 }, { "epoch": 0.04161167195804944, "grad_norm": 0.42790383100509644, "learning_rate": 2.0808062789868e-05, "loss": 0.2761, "step": 2333 }, { "epoch": 0.041629508079763136, "grad_norm": 0.40416112542152405, "learning_rate": 2.081698180520871e-05, "loss": 0.2872, "step": 2334 }, { "epoch": 0.04164734420147683, "grad_norm": 0.5766844749450684, "learning_rate": 2.0825900820549412e-05, "loss": 0.2891, "step": 2335 }, { "epoch": 0.041665180323190526, "grad_norm": 0.535537600517273, "learning_rate": 2.0834819835890118e-05, "loss": 0.3242, "step": 2336 }, { "epoch": 0.04168301644490422, "grad_norm": 0.49317222833633423, "learning_rate": 2.0843738851230828e-05, "loss": 0.2905, "step": 2337 }, { "epoch": 0.041700852566617916, "grad_norm": 0.6061891317367554, "learning_rate": 2.085265786657153e-05, "loss": 0.3397, "step": 2338 }, { "epoch": 0.04171868868833161, "grad_norm": 0.47608044743537903, "learning_rate": 2.0861576881912237e-05, "loss": 0.3388, "step": 2339 }, { "epoch": 0.041736524810045306, "grad_norm": 0.6357356905937195, "learning_rate": 2.0870495897252944e-05, "loss": 0.3539, "step": 2340 }, { "epoch": 0.041754360931759, "grad_norm": 0.5893821120262146, "learning_rate": 2.087941491259365e-05, "loss": 0.358, "step": 2341 }, { "epoch": 0.041772197053472696, "grad_norm": 0.5480794310569763, "learning_rate": 2.0888333927934356e-05, "loss": 0.3279, "step": 2342 }, { "epoch": 0.04179003317518639, "grad_norm": 0.6927926540374756, "learning_rate": 2.0897252943275063e-05, "loss": 0.3196, "step": 2343 }, { "epoch": 0.041807869296900085, "grad_norm": 0.5544004440307617, "learning_rate": 2.090617195861577e-05, "loss": 0.3307, "step": 2344 }, { "epoch": 0.04182570541861377, "grad_norm": 0.5229715704917908, "learning_rate": 2.0915090973956476e-05, "loss": 0.2475, "step": 2345 }, { "epoch": 0.04184354154032747, "grad_norm": 0.5360671281814575, "learning_rate": 2.0924009989297182e-05, "loss": 0.2907, "step": 2346 }, { "epoch": 0.04186137766204116, "grad_norm": 0.469929575920105, "learning_rate": 2.093292900463789e-05, "loss": 0.307, "step": 2347 }, { "epoch": 0.04187921378375486, "grad_norm": 0.5420454740524292, "learning_rate": 2.0941848019978598e-05, "loss": 0.3359, "step": 2348 }, { "epoch": 0.04189704990546855, "grad_norm": 0.5146106481552124, "learning_rate": 2.09507670353193e-05, "loss": 0.3435, "step": 2349 }, { "epoch": 0.04191488602718225, "grad_norm": 0.7414354085922241, "learning_rate": 2.0959686050660007e-05, "loss": 0.3271, "step": 2350 }, { "epoch": 0.04193272214889594, "grad_norm": 0.5352063775062561, "learning_rate": 2.0968605066000714e-05, "loss": 0.2821, "step": 2351 }, { "epoch": 0.04195055827060964, "grad_norm": 0.4640241265296936, "learning_rate": 2.097752408134142e-05, "loss": 0.232, "step": 2352 }, { "epoch": 0.04196839439232333, "grad_norm": 0.5043139457702637, "learning_rate": 2.0986443096682127e-05, "loss": 0.3616, "step": 2353 }, { "epoch": 0.04198623051403703, "grad_norm": 0.6212011575698853, "learning_rate": 2.0995362112022833e-05, "loss": 0.3228, "step": 2354 }, { "epoch": 0.04200406663575072, "grad_norm": 0.5618504881858826, "learning_rate": 2.100428112736354e-05, "loss": 0.3604, "step": 2355 }, { "epoch": 0.04202190275746442, "grad_norm": 0.42103826999664307, "learning_rate": 2.1013200142704246e-05, "loss": 0.3094, "step": 2356 }, { "epoch": 0.04203973887917811, "grad_norm": 0.5442635416984558, "learning_rate": 2.1022119158044952e-05, "loss": 0.3092, "step": 2357 }, { "epoch": 0.04205757500089181, "grad_norm": 0.5144985318183899, "learning_rate": 2.103103817338566e-05, "loss": 0.3384, "step": 2358 }, { "epoch": 0.0420754111226055, "grad_norm": 0.8168731331825256, "learning_rate": 2.1039957188726368e-05, "loss": 0.3549, "step": 2359 }, { "epoch": 0.0420932472443192, "grad_norm": 0.8109040856361389, "learning_rate": 2.104887620406707e-05, "loss": 0.3192, "step": 2360 }, { "epoch": 0.04211108336603289, "grad_norm": 0.5028811693191528, "learning_rate": 2.1057795219407778e-05, "loss": 0.3227, "step": 2361 }, { "epoch": 0.04212891948774659, "grad_norm": 0.5158123970031738, "learning_rate": 2.1066714234748487e-05, "loss": 0.3088, "step": 2362 }, { "epoch": 0.04214675560946028, "grad_norm": 0.4773781895637512, "learning_rate": 2.107563325008919e-05, "loss": 0.3319, "step": 2363 }, { "epoch": 0.04216459173117398, "grad_norm": 0.45322614908218384, "learning_rate": 2.1084552265429897e-05, "loss": 0.323, "step": 2364 }, { "epoch": 0.042182427852887665, "grad_norm": 0.4446718990802765, "learning_rate": 2.1093471280770603e-05, "loss": 0.2892, "step": 2365 }, { "epoch": 0.04220026397460136, "grad_norm": 0.8757718801498413, "learning_rate": 2.110239029611131e-05, "loss": 0.3844, "step": 2366 }, { "epoch": 0.042218100096315055, "grad_norm": 0.5051206946372986, "learning_rate": 2.1111309311452016e-05, "loss": 0.2808, "step": 2367 }, { "epoch": 0.04223593621802875, "grad_norm": 0.6871507167816162, "learning_rate": 2.1120228326792722e-05, "loss": 0.3357, "step": 2368 }, { "epoch": 0.042253772339742444, "grad_norm": 0.5976685285568237, "learning_rate": 2.112914734213343e-05, "loss": 0.3442, "step": 2369 }, { "epoch": 0.04227160846145614, "grad_norm": 0.4438973367214203, "learning_rate": 2.113806635747414e-05, "loss": 0.3416, "step": 2370 }, { "epoch": 0.042289444583169834, "grad_norm": 0.40539249777793884, "learning_rate": 2.114698537281484e-05, "loss": 0.3113, "step": 2371 }, { "epoch": 0.04230728070488353, "grad_norm": 0.5118851661682129, "learning_rate": 2.1155904388155548e-05, "loss": 0.2747, "step": 2372 }, { "epoch": 0.042325116826597224, "grad_norm": 0.4912601411342621, "learning_rate": 2.1164823403496258e-05, "loss": 0.3323, "step": 2373 }, { "epoch": 0.04234295294831092, "grad_norm": 0.5088908672332764, "learning_rate": 2.117374241883696e-05, "loss": 0.3285, "step": 2374 }, { "epoch": 0.042360789070024614, "grad_norm": 0.49872055649757385, "learning_rate": 2.1182661434177667e-05, "loss": 0.2725, "step": 2375 }, { "epoch": 0.04237862519173831, "grad_norm": 0.5044299364089966, "learning_rate": 2.1191580449518373e-05, "loss": 0.3251, "step": 2376 }, { "epoch": 0.042396461313452004, "grad_norm": 0.3838597238063812, "learning_rate": 2.120049946485908e-05, "loss": 0.2625, "step": 2377 }, { "epoch": 0.0424142974351657, "grad_norm": 0.522831380367279, "learning_rate": 2.1209418480199786e-05, "loss": 0.2986, "step": 2378 }, { "epoch": 0.042432133556879394, "grad_norm": 0.6911067366600037, "learning_rate": 2.1218337495540493e-05, "loss": 0.3627, "step": 2379 }, { "epoch": 0.04244996967859309, "grad_norm": 0.37572261691093445, "learning_rate": 2.12272565108812e-05, "loss": 0.2998, "step": 2380 }, { "epoch": 0.042467805800306783, "grad_norm": 0.4918407201766968, "learning_rate": 2.1236175526221905e-05, "loss": 0.3174, "step": 2381 }, { "epoch": 0.04248564192202048, "grad_norm": 0.43629008531570435, "learning_rate": 2.124509454156261e-05, "loss": 0.3344, "step": 2382 }, { "epoch": 0.04250347804373417, "grad_norm": 0.4399808645248413, "learning_rate": 2.1254013556903318e-05, "loss": 0.339, "step": 2383 }, { "epoch": 0.04252131416544787, "grad_norm": 0.5683116912841797, "learning_rate": 2.1262932572244028e-05, "loss": 0.3148, "step": 2384 }, { "epoch": 0.042539150287161556, "grad_norm": 0.4692027270793915, "learning_rate": 2.127185158758473e-05, "loss": 0.3272, "step": 2385 }, { "epoch": 0.04255698640887525, "grad_norm": 0.5380603671073914, "learning_rate": 2.1280770602925437e-05, "loss": 0.3644, "step": 2386 }, { "epoch": 0.042574822530588946, "grad_norm": 0.420159250497818, "learning_rate": 2.1289689618266147e-05, "loss": 0.3036, "step": 2387 }, { "epoch": 0.04259265865230264, "grad_norm": 0.5040472745895386, "learning_rate": 2.129860863360685e-05, "loss": 0.3368, "step": 2388 }, { "epoch": 0.042610494774016336, "grad_norm": 0.5153719782829285, "learning_rate": 2.1307527648947556e-05, "loss": 0.3385, "step": 2389 }, { "epoch": 0.04262833089573003, "grad_norm": 0.5071914196014404, "learning_rate": 2.1316446664288263e-05, "loss": 0.3093, "step": 2390 }, { "epoch": 0.042646167017443726, "grad_norm": 0.4509255588054657, "learning_rate": 2.132536567962897e-05, "loss": 0.3208, "step": 2391 }, { "epoch": 0.04266400313915742, "grad_norm": 0.48281392455101013, "learning_rate": 2.1334284694969675e-05, "loss": 0.329, "step": 2392 }, { "epoch": 0.042681839260871116, "grad_norm": 0.5003436207771301, "learning_rate": 2.1343203710310382e-05, "loss": 0.3021, "step": 2393 }, { "epoch": 0.04269967538258481, "grad_norm": 0.4456157982349396, "learning_rate": 2.1352122725651088e-05, "loss": 0.3087, "step": 2394 }, { "epoch": 0.042717511504298505, "grad_norm": 0.6597629189491272, "learning_rate": 2.1361041740991798e-05, "loss": 0.3174, "step": 2395 }, { "epoch": 0.0427353476260122, "grad_norm": 0.4982938766479492, "learning_rate": 2.13699607563325e-05, "loss": 0.3214, "step": 2396 }, { "epoch": 0.042753183747725895, "grad_norm": 0.31174424290657043, "learning_rate": 2.1378879771673207e-05, "loss": 0.3062, "step": 2397 }, { "epoch": 0.04277101986943959, "grad_norm": 0.3858984112739563, "learning_rate": 2.1387798787013917e-05, "loss": 0.2666, "step": 2398 }, { "epoch": 0.042788855991153285, "grad_norm": 0.446459025144577, "learning_rate": 2.139671780235462e-05, "loss": 0.2738, "step": 2399 }, { "epoch": 0.04280669211286698, "grad_norm": 0.5048308968544006, "learning_rate": 2.1405636817695327e-05, "loss": 0.3442, "step": 2400 }, { "epoch": 0.042824528234580675, "grad_norm": 0.4895900785923004, "learning_rate": 2.1414555833036036e-05, "loss": 0.2963, "step": 2401 }, { "epoch": 0.04284236435629437, "grad_norm": 0.7040649652481079, "learning_rate": 2.142347484837674e-05, "loss": 0.3823, "step": 2402 }, { "epoch": 0.042860200478008065, "grad_norm": 0.4924567937850952, "learning_rate": 2.1432393863717446e-05, "loss": 0.2977, "step": 2403 }, { "epoch": 0.04287803659972176, "grad_norm": 0.46823903918266296, "learning_rate": 2.1441312879058152e-05, "loss": 0.321, "step": 2404 }, { "epoch": 0.04289587272143545, "grad_norm": 0.4611259698867798, "learning_rate": 2.145023189439886e-05, "loss": 0.2637, "step": 2405 }, { "epoch": 0.04291370884314914, "grad_norm": 0.6974970102310181, "learning_rate": 2.1459150909739568e-05, "loss": 0.2996, "step": 2406 }, { "epoch": 0.04293154496486284, "grad_norm": 0.5615206360816956, "learning_rate": 2.146806992508027e-05, "loss": 0.318, "step": 2407 }, { "epoch": 0.04294938108657653, "grad_norm": 0.41417187452316284, "learning_rate": 2.1476988940420978e-05, "loss": 0.3238, "step": 2408 }, { "epoch": 0.04296721720829023, "grad_norm": 0.5840863585472107, "learning_rate": 2.1485907955761687e-05, "loss": 0.3218, "step": 2409 }, { "epoch": 0.04298505333000392, "grad_norm": 0.48224252462387085, "learning_rate": 2.149482697110239e-05, "loss": 0.2747, "step": 2410 }, { "epoch": 0.04300288945171762, "grad_norm": 0.4228859841823578, "learning_rate": 2.1503745986443097e-05, "loss": 0.3057, "step": 2411 }, { "epoch": 0.04302072557343131, "grad_norm": 0.47475579380989075, "learning_rate": 2.1512665001783806e-05, "loss": 0.3379, "step": 2412 }, { "epoch": 0.04303856169514501, "grad_norm": 0.8998664617538452, "learning_rate": 2.152158401712451e-05, "loss": 0.3426, "step": 2413 }, { "epoch": 0.0430563978168587, "grad_norm": 0.44181686639785767, "learning_rate": 2.1530503032465216e-05, "loss": 0.2845, "step": 2414 }, { "epoch": 0.0430742339385724, "grad_norm": 0.6558424234390259, "learning_rate": 2.1539422047805922e-05, "loss": 0.3703, "step": 2415 }, { "epoch": 0.04309207006028609, "grad_norm": 0.4764523208141327, "learning_rate": 2.154834106314663e-05, "loss": 0.2719, "step": 2416 }, { "epoch": 0.04310990618199979, "grad_norm": 0.5102486610412598, "learning_rate": 2.155726007848734e-05, "loss": 0.3327, "step": 2417 }, { "epoch": 0.04312774230371348, "grad_norm": 0.45058155059814453, "learning_rate": 2.156617909382804e-05, "loss": 0.3298, "step": 2418 }, { "epoch": 0.043145578425427177, "grad_norm": 0.43510904908180237, "learning_rate": 2.1575098109168748e-05, "loss": 0.3043, "step": 2419 }, { "epoch": 0.04316341454714087, "grad_norm": 0.5134738087654114, "learning_rate": 2.1584017124509458e-05, "loss": 0.2935, "step": 2420 }, { "epoch": 0.043181250668854566, "grad_norm": 0.46419647336006165, "learning_rate": 2.159293613985016e-05, "loss": 0.2851, "step": 2421 }, { "epoch": 0.04319908679056826, "grad_norm": 0.3891203999519348, "learning_rate": 2.1601855155190867e-05, "loss": 0.3058, "step": 2422 }, { "epoch": 0.043216922912281956, "grad_norm": 0.48797038197517395, "learning_rate": 2.1610774170531577e-05, "loss": 0.3117, "step": 2423 }, { "epoch": 0.04323475903399565, "grad_norm": 0.5592419505119324, "learning_rate": 2.161969318587228e-05, "loss": 0.3143, "step": 2424 }, { "epoch": 0.04325259515570934, "grad_norm": 0.38748639822006226, "learning_rate": 2.1628612201212986e-05, "loss": 0.2404, "step": 2425 }, { "epoch": 0.043270431277423034, "grad_norm": 0.4412252604961395, "learning_rate": 2.1637531216553696e-05, "loss": 0.267, "step": 2426 }, { "epoch": 0.04328826739913673, "grad_norm": 0.5482329726219177, "learning_rate": 2.16464502318944e-05, "loss": 0.3145, "step": 2427 }, { "epoch": 0.043306103520850424, "grad_norm": 0.6546977758407593, "learning_rate": 2.1655369247235105e-05, "loss": 0.3593, "step": 2428 }, { "epoch": 0.04332393964256412, "grad_norm": 0.31310614943504333, "learning_rate": 2.166428826257581e-05, "loss": 0.2483, "step": 2429 }, { "epoch": 0.043341775764277814, "grad_norm": 0.39456674456596375, "learning_rate": 2.1673207277916518e-05, "loss": 0.2918, "step": 2430 }, { "epoch": 0.04335961188599151, "grad_norm": 0.6820403337478638, "learning_rate": 2.1682126293257228e-05, "loss": 0.2927, "step": 2431 }, { "epoch": 0.043377448007705204, "grad_norm": 0.5346879363059998, "learning_rate": 2.169104530859793e-05, "loss": 0.3189, "step": 2432 }, { "epoch": 0.0433952841294189, "grad_norm": 0.4923313558101654, "learning_rate": 2.1699964323938637e-05, "loss": 0.2876, "step": 2433 }, { "epoch": 0.04341312025113259, "grad_norm": 0.7940296530723572, "learning_rate": 2.1708883339279347e-05, "loss": 0.3012, "step": 2434 }, { "epoch": 0.04343095637284629, "grad_norm": 0.4262382686138153, "learning_rate": 2.171780235462005e-05, "loss": 0.3329, "step": 2435 }, { "epoch": 0.04344879249455998, "grad_norm": 0.48211798071861267, "learning_rate": 2.1726721369960756e-05, "loss": 0.3255, "step": 2436 }, { "epoch": 0.04346662861627368, "grad_norm": 0.42853403091430664, "learning_rate": 2.1735640385301466e-05, "loss": 0.2451, "step": 2437 }, { "epoch": 0.04348446473798737, "grad_norm": 0.47822248935699463, "learning_rate": 2.174455940064217e-05, "loss": 0.3623, "step": 2438 }, { "epoch": 0.04350230085970107, "grad_norm": 0.5484446883201599, "learning_rate": 2.1753478415982875e-05, "loss": 0.3191, "step": 2439 }, { "epoch": 0.04352013698141476, "grad_norm": 0.5001198649406433, "learning_rate": 2.1762397431323585e-05, "loss": 0.327, "step": 2440 }, { "epoch": 0.04353797310312846, "grad_norm": 0.4653749465942383, "learning_rate": 2.1771316446664288e-05, "loss": 0.3595, "step": 2441 }, { "epoch": 0.04355580922484215, "grad_norm": 0.4383077621459961, "learning_rate": 2.1780235462004998e-05, "loss": 0.3661, "step": 2442 }, { "epoch": 0.04357364534655585, "grad_norm": 0.4560093879699707, "learning_rate": 2.17891544773457e-05, "loss": 0.2966, "step": 2443 }, { "epoch": 0.04359148146826954, "grad_norm": 0.570239245891571, "learning_rate": 2.1798073492686407e-05, "loss": 0.3332, "step": 2444 }, { "epoch": 0.04360931758998324, "grad_norm": 0.47931545972824097, "learning_rate": 2.1806992508027117e-05, "loss": 0.308, "step": 2445 }, { "epoch": 0.043627153711696925, "grad_norm": 0.5334298014640808, "learning_rate": 2.181591152336782e-05, "loss": 0.3508, "step": 2446 }, { "epoch": 0.04364498983341062, "grad_norm": 0.4662988483905792, "learning_rate": 2.1824830538708526e-05, "loss": 0.2944, "step": 2447 }, { "epoch": 0.043662825955124315, "grad_norm": 0.5688419342041016, "learning_rate": 2.1833749554049236e-05, "loss": 0.3501, "step": 2448 }, { "epoch": 0.04368066207683801, "grad_norm": 0.44827187061309814, "learning_rate": 2.184266856938994e-05, "loss": 0.2859, "step": 2449 }, { "epoch": 0.043698498198551705, "grad_norm": 0.5130137801170349, "learning_rate": 2.1851587584730646e-05, "loss": 0.3706, "step": 2450 }, { "epoch": 0.0437163343202654, "grad_norm": 0.4982846975326538, "learning_rate": 2.1860506600071355e-05, "loss": 0.2911, "step": 2451 }, { "epoch": 0.043734170441979095, "grad_norm": 0.5739523768424988, "learning_rate": 2.1869425615412058e-05, "loss": 0.2886, "step": 2452 }, { "epoch": 0.04375200656369279, "grad_norm": 0.5005897283554077, "learning_rate": 2.1878344630752768e-05, "loss": 0.3455, "step": 2453 }, { "epoch": 0.043769842685406485, "grad_norm": 0.5920192003250122, "learning_rate": 2.188726364609347e-05, "loss": 0.3557, "step": 2454 }, { "epoch": 0.04378767880712018, "grad_norm": 0.34021905064582825, "learning_rate": 2.1896182661434177e-05, "loss": 0.2805, "step": 2455 }, { "epoch": 0.043805514928833875, "grad_norm": 0.4086892604827881, "learning_rate": 2.1905101676774887e-05, "loss": 0.2633, "step": 2456 }, { "epoch": 0.04382335105054757, "grad_norm": 0.5249380469322205, "learning_rate": 2.191402069211559e-05, "loss": 0.2992, "step": 2457 }, { "epoch": 0.043841187172261264, "grad_norm": 0.3988586664199829, "learning_rate": 2.1922939707456297e-05, "loss": 0.2593, "step": 2458 }, { "epoch": 0.04385902329397496, "grad_norm": 0.6643022298812866, "learning_rate": 2.1931858722797006e-05, "loss": 0.3897, "step": 2459 }, { "epoch": 0.043876859415688654, "grad_norm": 0.4478844702243805, "learning_rate": 2.194077773813771e-05, "loss": 0.3488, "step": 2460 }, { "epoch": 0.04389469553740235, "grad_norm": 0.42367005348205566, "learning_rate": 2.1949696753478416e-05, "loss": 0.33, "step": 2461 }, { "epoch": 0.043912531659116044, "grad_norm": 0.48345765471458435, "learning_rate": 2.1958615768819125e-05, "loss": 0.3366, "step": 2462 }, { "epoch": 0.04393036778082974, "grad_norm": 0.4464993178844452, "learning_rate": 2.196753478415983e-05, "loss": 0.3225, "step": 2463 }, { "epoch": 0.043948203902543434, "grad_norm": 0.43755653500556946, "learning_rate": 2.1976453799500535e-05, "loss": 0.2638, "step": 2464 }, { "epoch": 0.04396604002425713, "grad_norm": 0.5456868410110474, "learning_rate": 2.1985372814841245e-05, "loss": 0.3164, "step": 2465 }, { "epoch": 0.04398387614597082, "grad_norm": 0.5072988271713257, "learning_rate": 2.1994291830181948e-05, "loss": 0.3305, "step": 2466 }, { "epoch": 0.04400171226768451, "grad_norm": 0.6825007796287537, "learning_rate": 2.2003210845522657e-05, "loss": 0.3381, "step": 2467 }, { "epoch": 0.04401954838939821, "grad_norm": 0.38148221373558044, "learning_rate": 2.201212986086336e-05, "loss": 0.2493, "step": 2468 }, { "epoch": 0.0440373845111119, "grad_norm": 0.48421597480773926, "learning_rate": 2.2021048876204067e-05, "loss": 0.288, "step": 2469 }, { "epoch": 0.0440552206328256, "grad_norm": 0.3696889579296112, "learning_rate": 2.2029967891544777e-05, "loss": 0.2892, "step": 2470 }, { "epoch": 0.04407305675453929, "grad_norm": 0.45962435007095337, "learning_rate": 2.203888690688548e-05, "loss": 0.3065, "step": 2471 }, { "epoch": 0.044090892876252986, "grad_norm": 0.4431440234184265, "learning_rate": 2.2047805922226186e-05, "loss": 0.262, "step": 2472 }, { "epoch": 0.04410872899796668, "grad_norm": 0.5578441023826599, "learning_rate": 2.2056724937566896e-05, "loss": 0.327, "step": 2473 }, { "epoch": 0.044126565119680376, "grad_norm": 0.7166373133659363, "learning_rate": 2.20656439529076e-05, "loss": 0.3849, "step": 2474 }, { "epoch": 0.04414440124139407, "grad_norm": 0.3914194107055664, "learning_rate": 2.2074562968248305e-05, "loss": 0.2384, "step": 2475 }, { "epoch": 0.044162237363107766, "grad_norm": 0.696711540222168, "learning_rate": 2.2083481983589015e-05, "loss": 0.3363, "step": 2476 }, { "epoch": 0.04418007348482146, "grad_norm": 0.5842909216880798, "learning_rate": 2.2092400998929718e-05, "loss": 0.3365, "step": 2477 }, { "epoch": 0.044197909606535156, "grad_norm": 0.41654083132743835, "learning_rate": 2.2101320014270428e-05, "loss": 0.2739, "step": 2478 }, { "epoch": 0.04421574572824885, "grad_norm": 0.5053631067276001, "learning_rate": 2.211023902961113e-05, "loss": 0.2666, "step": 2479 }, { "epoch": 0.044233581849962546, "grad_norm": 0.4576139748096466, "learning_rate": 2.2119158044951837e-05, "loss": 0.266, "step": 2480 }, { "epoch": 0.04425141797167624, "grad_norm": 0.5171185731887817, "learning_rate": 2.2128077060292547e-05, "loss": 0.3709, "step": 2481 }, { "epoch": 0.044269254093389936, "grad_norm": 0.5942704677581787, "learning_rate": 2.213699607563325e-05, "loss": 0.2764, "step": 2482 }, { "epoch": 0.04428709021510363, "grad_norm": 0.5231592655181885, "learning_rate": 2.2145915090973956e-05, "loss": 0.3039, "step": 2483 }, { "epoch": 0.044304926336817325, "grad_norm": 0.7226346731185913, "learning_rate": 2.2154834106314666e-05, "loss": 0.2838, "step": 2484 }, { "epoch": 0.04432276245853102, "grad_norm": 0.44686105847358704, "learning_rate": 2.216375312165537e-05, "loss": 0.3098, "step": 2485 }, { "epoch": 0.04434059858024471, "grad_norm": 0.4780190587043762, "learning_rate": 2.2172672136996075e-05, "loss": 0.285, "step": 2486 }, { "epoch": 0.0443584347019584, "grad_norm": 0.5844359397888184, "learning_rate": 2.2181591152336785e-05, "loss": 0.2865, "step": 2487 }, { "epoch": 0.0443762708236721, "grad_norm": 0.36143380403518677, "learning_rate": 2.2190510167677488e-05, "loss": 0.2614, "step": 2488 }, { "epoch": 0.04439410694538579, "grad_norm": 0.5340003967285156, "learning_rate": 2.2199429183018198e-05, "loss": 0.3734, "step": 2489 }, { "epoch": 0.04441194306709949, "grad_norm": 0.3827823996543884, "learning_rate": 2.2208348198358904e-05, "loss": 0.2946, "step": 2490 }, { "epoch": 0.04442977918881318, "grad_norm": 0.5796611905097961, "learning_rate": 2.2217267213699607e-05, "loss": 0.3181, "step": 2491 }, { "epoch": 0.04444761531052688, "grad_norm": 0.7845086455345154, "learning_rate": 2.2226186229040317e-05, "loss": 0.4209, "step": 2492 }, { "epoch": 0.04446545143224057, "grad_norm": 0.8272339701652527, "learning_rate": 2.223510524438102e-05, "loss": 0.2958, "step": 2493 }, { "epoch": 0.04448328755395427, "grad_norm": 0.5014579892158508, "learning_rate": 2.2244024259721726e-05, "loss": 0.2514, "step": 2494 }, { "epoch": 0.04450112367566796, "grad_norm": 0.5427517294883728, "learning_rate": 2.2252943275062436e-05, "loss": 0.3478, "step": 2495 }, { "epoch": 0.04451895979738166, "grad_norm": 0.592760443687439, "learning_rate": 2.226186229040314e-05, "loss": 0.3795, "step": 2496 }, { "epoch": 0.04453679591909535, "grad_norm": 0.40188559889793396, "learning_rate": 2.2270781305743845e-05, "loss": 0.3014, "step": 2497 }, { "epoch": 0.04455463204080905, "grad_norm": 0.5569307804107666, "learning_rate": 2.2279700321084555e-05, "loss": 0.3045, "step": 2498 }, { "epoch": 0.04457246816252274, "grad_norm": 0.43672263622283936, "learning_rate": 2.2288619336425258e-05, "loss": 0.2574, "step": 2499 }, { "epoch": 0.04459030428423644, "grad_norm": 0.42467162013053894, "learning_rate": 2.2297538351765965e-05, "loss": 0.3068, "step": 2500 }, { "epoch": 0.04460814040595013, "grad_norm": 0.4768904745578766, "learning_rate": 2.2306457367106674e-05, "loss": 0.2945, "step": 2501 }, { "epoch": 0.04462597652766383, "grad_norm": 0.44962868094444275, "learning_rate": 2.2315376382447377e-05, "loss": 0.3175, "step": 2502 }, { "epoch": 0.04464381264937752, "grad_norm": 0.4747069478034973, "learning_rate": 2.2324295397788087e-05, "loss": 0.3069, "step": 2503 }, { "epoch": 0.04466164877109122, "grad_norm": 0.5932212471961975, "learning_rate": 2.2333214413128793e-05, "loss": 0.3127, "step": 2504 }, { "epoch": 0.04467948489280491, "grad_norm": 0.5715842247009277, "learning_rate": 2.2342133428469496e-05, "loss": 0.2807, "step": 2505 }, { "epoch": 0.0446973210145186, "grad_norm": 0.45579037070274353, "learning_rate": 2.2351052443810206e-05, "loss": 0.3088, "step": 2506 }, { "epoch": 0.044715157136232295, "grad_norm": 0.5731293559074402, "learning_rate": 2.235997145915091e-05, "loss": 0.3475, "step": 2507 }, { "epoch": 0.04473299325794599, "grad_norm": 0.9777519702911377, "learning_rate": 2.2368890474491616e-05, "loss": 0.2962, "step": 2508 }, { "epoch": 0.044750829379659685, "grad_norm": 0.5079028010368347, "learning_rate": 2.2377809489832325e-05, "loss": 0.3421, "step": 2509 }, { "epoch": 0.04476866550137338, "grad_norm": 0.40570247173309326, "learning_rate": 2.238672850517303e-05, "loss": 0.2726, "step": 2510 }, { "epoch": 0.044786501623087074, "grad_norm": 0.39341166615486145, "learning_rate": 2.2395647520513735e-05, "loss": 0.2931, "step": 2511 }, { "epoch": 0.04480433774480077, "grad_norm": 0.34767574071884155, "learning_rate": 2.2404566535854445e-05, "loss": 0.2642, "step": 2512 }, { "epoch": 0.044822173866514464, "grad_norm": 0.4499184191226959, "learning_rate": 2.2413485551195147e-05, "loss": 0.3253, "step": 2513 }, { "epoch": 0.04484000998822816, "grad_norm": 0.6738024950027466, "learning_rate": 2.2422404566535857e-05, "loss": 0.3187, "step": 2514 }, { "epoch": 0.044857846109941854, "grad_norm": 0.4801851212978363, "learning_rate": 2.2431323581876564e-05, "loss": 0.312, "step": 2515 }, { "epoch": 0.04487568223165555, "grad_norm": 0.3929755985736847, "learning_rate": 2.2440242597217267e-05, "loss": 0.2512, "step": 2516 }, { "epoch": 0.044893518353369244, "grad_norm": 0.5497623085975647, "learning_rate": 2.2449161612557976e-05, "loss": 0.2886, "step": 2517 }, { "epoch": 0.04491135447508294, "grad_norm": 0.49939242005348206, "learning_rate": 2.245808062789868e-05, "loss": 0.3265, "step": 2518 }, { "epoch": 0.044929190596796634, "grad_norm": 0.45610734820365906, "learning_rate": 2.2466999643239386e-05, "loss": 0.2786, "step": 2519 }, { "epoch": 0.04494702671851033, "grad_norm": 0.542926549911499, "learning_rate": 2.2475918658580096e-05, "loss": 0.3651, "step": 2520 }, { "epoch": 0.044964862840224024, "grad_norm": 0.3948073089122772, "learning_rate": 2.24848376739208e-05, "loss": 0.269, "step": 2521 }, { "epoch": 0.04498269896193772, "grad_norm": 0.5280768275260925, "learning_rate": 2.2493756689261505e-05, "loss": 0.3078, "step": 2522 }, { "epoch": 0.04500053508365141, "grad_norm": 0.6438367366790771, "learning_rate": 2.2502675704602215e-05, "loss": 0.3172, "step": 2523 }, { "epoch": 0.04501837120536511, "grad_norm": 0.4665434956550598, "learning_rate": 2.2511594719942918e-05, "loss": 0.2809, "step": 2524 }, { "epoch": 0.0450362073270788, "grad_norm": 0.35218581557273865, "learning_rate": 2.2520513735283627e-05, "loss": 0.2688, "step": 2525 }, { "epoch": 0.04505404344879249, "grad_norm": 0.5222311019897461, "learning_rate": 2.2529432750624334e-05, "loss": 0.3775, "step": 2526 }, { "epoch": 0.045071879570506186, "grad_norm": 0.479929655790329, "learning_rate": 2.2538351765965037e-05, "loss": 0.3371, "step": 2527 }, { "epoch": 0.04508971569221988, "grad_norm": 0.5023528337478638, "learning_rate": 2.2547270781305747e-05, "loss": 0.2818, "step": 2528 }, { "epoch": 0.045107551813933576, "grad_norm": 1.1559685468673706, "learning_rate": 2.2556189796646453e-05, "loss": 0.3098, "step": 2529 }, { "epoch": 0.04512538793564727, "grad_norm": 0.7924062013626099, "learning_rate": 2.2565108811987156e-05, "loss": 0.2848, "step": 2530 }, { "epoch": 0.045143224057360966, "grad_norm": 0.4267258942127228, "learning_rate": 2.2574027827327866e-05, "loss": 0.31, "step": 2531 }, { "epoch": 0.04516106017907466, "grad_norm": 0.3967335820198059, "learning_rate": 2.258294684266857e-05, "loss": 0.2621, "step": 2532 }, { "epoch": 0.045178896300788356, "grad_norm": 0.46475982666015625, "learning_rate": 2.2591865858009275e-05, "loss": 0.2736, "step": 2533 }, { "epoch": 0.04519673242250205, "grad_norm": 0.43687716126441956, "learning_rate": 2.2600784873349985e-05, "loss": 0.2698, "step": 2534 }, { "epoch": 0.045214568544215746, "grad_norm": 0.486655056476593, "learning_rate": 2.2609703888690688e-05, "loss": 0.3086, "step": 2535 }, { "epoch": 0.04523240466592944, "grad_norm": 0.4176377058029175, "learning_rate": 2.2618622904031398e-05, "loss": 0.3316, "step": 2536 }, { "epoch": 0.045250240787643135, "grad_norm": 0.6278610825538635, "learning_rate": 2.2627541919372104e-05, "loss": 0.3458, "step": 2537 }, { "epoch": 0.04526807690935683, "grad_norm": 0.5996041893959045, "learning_rate": 2.2636460934712807e-05, "loss": 0.3162, "step": 2538 }, { "epoch": 0.045285913031070525, "grad_norm": 0.5668736100196838, "learning_rate": 2.2645379950053517e-05, "loss": 0.3195, "step": 2539 }, { "epoch": 0.04530374915278422, "grad_norm": 0.5813754796981812, "learning_rate": 2.2654298965394223e-05, "loss": 0.3059, "step": 2540 }, { "epoch": 0.045321585274497915, "grad_norm": 0.672214150428772, "learning_rate": 2.2663217980734926e-05, "loss": 0.2986, "step": 2541 }, { "epoch": 0.04533942139621161, "grad_norm": 0.8300511837005615, "learning_rate": 2.2672136996075636e-05, "loss": 0.2726, "step": 2542 }, { "epoch": 0.045357257517925305, "grad_norm": 0.3515639305114746, "learning_rate": 2.2681056011416342e-05, "loss": 0.2656, "step": 2543 }, { "epoch": 0.045375093639639, "grad_norm": 0.38729098439216614, "learning_rate": 2.2689975026757045e-05, "loss": 0.3299, "step": 2544 }, { "epoch": 0.045392929761352695, "grad_norm": 0.49949008226394653, "learning_rate": 2.2698894042097755e-05, "loss": 0.3128, "step": 2545 }, { "epoch": 0.04541076588306638, "grad_norm": 0.6647465229034424, "learning_rate": 2.2707813057438458e-05, "loss": 0.3633, "step": 2546 }, { "epoch": 0.04542860200478008, "grad_norm": 0.40272918343544006, "learning_rate": 2.2716732072779164e-05, "loss": 0.3302, "step": 2547 }, { "epoch": 0.04544643812649377, "grad_norm": 0.4645010232925415, "learning_rate": 2.2725651088119874e-05, "loss": 0.297, "step": 2548 }, { "epoch": 0.04546427424820747, "grad_norm": 0.8034027218818665, "learning_rate": 2.2734570103460577e-05, "loss": 0.265, "step": 2549 }, { "epoch": 0.04548211036992116, "grad_norm": 0.5579841136932373, "learning_rate": 2.2743489118801287e-05, "loss": 0.2942, "step": 2550 }, { "epoch": 0.04549994649163486, "grad_norm": 0.7305700778961182, "learning_rate": 2.2752408134141993e-05, "loss": 0.3412, "step": 2551 }, { "epoch": 0.04551778261334855, "grad_norm": 0.4577408730983734, "learning_rate": 2.2761327149482696e-05, "loss": 0.339, "step": 2552 }, { "epoch": 0.04553561873506225, "grad_norm": 0.6619099974632263, "learning_rate": 2.2770246164823406e-05, "loss": 0.2905, "step": 2553 }, { "epoch": 0.04555345485677594, "grad_norm": 0.3685494065284729, "learning_rate": 2.2779165180164112e-05, "loss": 0.3248, "step": 2554 }, { "epoch": 0.04557129097848964, "grad_norm": 0.5546826124191284, "learning_rate": 2.2788084195504815e-05, "loss": 0.2518, "step": 2555 }, { "epoch": 0.04558912710020333, "grad_norm": 0.4956215023994446, "learning_rate": 2.2797003210845525e-05, "loss": 0.3182, "step": 2556 }, { "epoch": 0.04560696322191703, "grad_norm": 0.5660912394523621, "learning_rate": 2.2805922226186228e-05, "loss": 0.31, "step": 2557 }, { "epoch": 0.04562479934363072, "grad_norm": 0.6326999664306641, "learning_rate": 2.2814841241526935e-05, "loss": 0.2687, "step": 2558 }, { "epoch": 0.04564263546534442, "grad_norm": 0.5014093518257141, "learning_rate": 2.2823760256867644e-05, "loss": 0.2829, "step": 2559 }, { "epoch": 0.04566047158705811, "grad_norm": 0.3998367488384247, "learning_rate": 2.2832679272208347e-05, "loss": 0.2754, "step": 2560 }, { "epoch": 0.045678307708771806, "grad_norm": 0.5275185704231262, "learning_rate": 2.2841598287549057e-05, "loss": 0.2952, "step": 2561 }, { "epoch": 0.0456961438304855, "grad_norm": 0.8106608986854553, "learning_rate": 2.2850517302889764e-05, "loss": 0.3418, "step": 2562 }, { "epoch": 0.045713979952199196, "grad_norm": 0.5508556962013245, "learning_rate": 2.2859436318230467e-05, "loss": 0.3197, "step": 2563 }, { "epoch": 0.04573181607391289, "grad_norm": 0.7296937108039856, "learning_rate": 2.2868355333571176e-05, "loss": 0.2893, "step": 2564 }, { "epoch": 0.045749652195626586, "grad_norm": 0.40568533539772034, "learning_rate": 2.2877274348911883e-05, "loss": 0.3217, "step": 2565 }, { "epoch": 0.045767488317340274, "grad_norm": 0.5739626884460449, "learning_rate": 2.2886193364252586e-05, "loss": 0.271, "step": 2566 }, { "epoch": 0.04578532443905397, "grad_norm": 0.6474645137786865, "learning_rate": 2.2895112379593295e-05, "loss": 0.4017, "step": 2567 }, { "epoch": 0.045803160560767664, "grad_norm": 0.3779413104057312, "learning_rate": 2.2904031394934002e-05, "loss": 0.235, "step": 2568 }, { "epoch": 0.04582099668248136, "grad_norm": 0.4976176619529724, "learning_rate": 2.2912950410274705e-05, "loss": 0.3104, "step": 2569 }, { "epoch": 0.045838832804195054, "grad_norm": 0.5540872812271118, "learning_rate": 2.2921869425615415e-05, "loss": 0.3095, "step": 2570 }, { "epoch": 0.04585666892590875, "grad_norm": 0.6369584202766418, "learning_rate": 2.2930788440956118e-05, "loss": 0.2425, "step": 2571 }, { "epoch": 0.045874505047622444, "grad_norm": 0.4630275368690491, "learning_rate": 2.2939707456296827e-05, "loss": 0.2756, "step": 2572 }, { "epoch": 0.04589234116933614, "grad_norm": 0.6564838886260986, "learning_rate": 2.2948626471637534e-05, "loss": 0.335, "step": 2573 }, { "epoch": 0.045910177291049833, "grad_norm": 0.4954453110694885, "learning_rate": 2.2957545486978237e-05, "loss": 0.2609, "step": 2574 }, { "epoch": 0.04592801341276353, "grad_norm": 0.6018038988113403, "learning_rate": 2.2966464502318946e-05, "loss": 0.3276, "step": 2575 }, { "epoch": 0.04594584953447722, "grad_norm": 0.6929775476455688, "learning_rate": 2.2975383517659653e-05, "loss": 0.327, "step": 2576 }, { "epoch": 0.04596368565619092, "grad_norm": 0.7452569007873535, "learning_rate": 2.2984302533000356e-05, "loss": 0.2921, "step": 2577 }, { "epoch": 0.04598152177790461, "grad_norm": 0.7317106127738953, "learning_rate": 2.2993221548341066e-05, "loss": 0.2935, "step": 2578 }, { "epoch": 0.04599935789961831, "grad_norm": 0.47860607504844666, "learning_rate": 2.3002140563681772e-05, "loss": 0.3166, "step": 2579 }, { "epoch": 0.046017194021332, "grad_norm": 1.1246650218963623, "learning_rate": 2.3011059579022475e-05, "loss": 0.3304, "step": 2580 }, { "epoch": 0.0460350301430457, "grad_norm": 0.49457529187202454, "learning_rate": 2.3019978594363185e-05, "loss": 0.3005, "step": 2581 }, { "epoch": 0.04605286626475939, "grad_norm": 0.49166783690452576, "learning_rate": 2.3028897609703888e-05, "loss": 0.3016, "step": 2582 }, { "epoch": 0.04607070238647309, "grad_norm": 0.7067668437957764, "learning_rate": 2.3037816625044594e-05, "loss": 0.3757, "step": 2583 }, { "epoch": 0.04608853850818678, "grad_norm": 0.6487246155738831, "learning_rate": 2.3046735640385304e-05, "loss": 0.2881, "step": 2584 }, { "epoch": 0.04610637462990048, "grad_norm": 0.873086154460907, "learning_rate": 2.3055654655726007e-05, "loss": 0.2515, "step": 2585 }, { "epoch": 0.046124210751614166, "grad_norm": 0.4572746157646179, "learning_rate": 2.3064573671066717e-05, "loss": 0.2373, "step": 2586 }, { "epoch": 0.04614204687332786, "grad_norm": 0.5847325325012207, "learning_rate": 2.3073492686407423e-05, "loss": 0.282, "step": 2587 }, { "epoch": 0.046159882995041555, "grad_norm": 0.35473737120628357, "learning_rate": 2.3082411701748126e-05, "loss": 0.278, "step": 2588 }, { "epoch": 0.04617771911675525, "grad_norm": 0.5584276914596558, "learning_rate": 2.3091330717088836e-05, "loss": 0.2903, "step": 2589 }, { "epoch": 0.046195555238468945, "grad_norm": 0.458571195602417, "learning_rate": 2.3100249732429542e-05, "loss": 0.3213, "step": 2590 }, { "epoch": 0.04621339136018264, "grad_norm": 0.44304999709129333, "learning_rate": 2.3109168747770245e-05, "loss": 0.2991, "step": 2591 }, { "epoch": 0.046231227481896335, "grad_norm": 0.5245705246925354, "learning_rate": 2.3118087763110955e-05, "loss": 0.381, "step": 2592 }, { "epoch": 0.04624906360361003, "grad_norm": 0.48145824670791626, "learning_rate": 2.312700677845166e-05, "loss": 0.3074, "step": 2593 }, { "epoch": 0.046266899725323725, "grad_norm": 0.6381738781929016, "learning_rate": 2.3135925793792364e-05, "loss": 0.2799, "step": 2594 }, { "epoch": 0.04628473584703742, "grad_norm": 0.5631144642829895, "learning_rate": 2.3144844809133074e-05, "loss": 0.3401, "step": 2595 }, { "epoch": 0.046302571968751115, "grad_norm": 0.3579598367214203, "learning_rate": 2.3153763824473777e-05, "loss": 0.2796, "step": 2596 }, { "epoch": 0.04632040809046481, "grad_norm": 0.8319517970085144, "learning_rate": 2.3162682839814487e-05, "loss": 0.2859, "step": 2597 }, { "epoch": 0.046338244212178505, "grad_norm": 0.6031146049499512, "learning_rate": 2.3171601855155193e-05, "loss": 0.2885, "step": 2598 }, { "epoch": 0.0463560803338922, "grad_norm": 0.44405749440193176, "learning_rate": 2.3180520870495896e-05, "loss": 0.3487, "step": 2599 }, { "epoch": 0.046373916455605894, "grad_norm": 0.5020195245742798, "learning_rate": 2.3189439885836606e-05, "loss": 0.3071, "step": 2600 }, { "epoch": 0.04639175257731959, "grad_norm": 0.39273878931999207, "learning_rate": 2.3198358901177312e-05, "loss": 0.2778, "step": 2601 }, { "epoch": 0.046409588699033284, "grad_norm": 0.38813886046409607, "learning_rate": 2.3207277916518015e-05, "loss": 0.2887, "step": 2602 }, { "epoch": 0.04642742482074698, "grad_norm": 0.4507793188095093, "learning_rate": 2.3216196931858725e-05, "loss": 0.2909, "step": 2603 }, { "epoch": 0.046445260942460674, "grad_norm": 0.32840240001678467, "learning_rate": 2.322511594719943e-05, "loss": 0.2849, "step": 2604 }, { "epoch": 0.04646309706417437, "grad_norm": 0.5821751356124878, "learning_rate": 2.3234034962540134e-05, "loss": 0.3296, "step": 2605 }, { "epoch": 0.046480933185888064, "grad_norm": 0.5966184735298157, "learning_rate": 2.3242953977880844e-05, "loss": 0.3656, "step": 2606 }, { "epoch": 0.04649876930760175, "grad_norm": 0.42971763014793396, "learning_rate": 2.325187299322155e-05, "loss": 0.3098, "step": 2607 }, { "epoch": 0.04651660542931545, "grad_norm": 0.43057823181152344, "learning_rate": 2.3260792008562257e-05, "loss": 0.3478, "step": 2608 }, { "epoch": 0.04653444155102914, "grad_norm": 0.38382527232170105, "learning_rate": 2.3269711023902963e-05, "loss": 0.2529, "step": 2609 }, { "epoch": 0.04655227767274284, "grad_norm": 0.3996851146221161, "learning_rate": 2.3278630039243666e-05, "loss": 0.3395, "step": 2610 }, { "epoch": 0.04657011379445653, "grad_norm": 0.5662938952445984, "learning_rate": 2.3287549054584376e-05, "loss": 0.3764, "step": 2611 }, { "epoch": 0.046587949916170227, "grad_norm": 0.4093371331691742, "learning_rate": 2.3296468069925083e-05, "loss": 0.2487, "step": 2612 }, { "epoch": 0.04660578603788392, "grad_norm": 0.5064240097999573, "learning_rate": 2.3305387085265786e-05, "loss": 0.3307, "step": 2613 }, { "epoch": 0.046623622159597616, "grad_norm": 0.4669743776321411, "learning_rate": 2.3314306100606495e-05, "loss": 0.284, "step": 2614 }, { "epoch": 0.04664145828131131, "grad_norm": 0.30909350514411926, "learning_rate": 2.33232251159472e-05, "loss": 0.2529, "step": 2615 }, { "epoch": 0.046659294403025006, "grad_norm": 0.43209806084632874, "learning_rate": 2.3332144131287905e-05, "loss": 0.3041, "step": 2616 }, { "epoch": 0.0466771305247387, "grad_norm": 0.35900524258613586, "learning_rate": 2.3341063146628614e-05, "loss": 0.3106, "step": 2617 }, { "epoch": 0.046694966646452396, "grad_norm": 0.423440158367157, "learning_rate": 2.334998216196932e-05, "loss": 0.3344, "step": 2618 }, { "epoch": 0.04671280276816609, "grad_norm": 0.3105817139148712, "learning_rate": 2.3358901177310027e-05, "loss": 0.2517, "step": 2619 }, { "epoch": 0.046730638889879786, "grad_norm": 0.43081656098365784, "learning_rate": 2.3367820192650734e-05, "loss": 0.3203, "step": 2620 }, { "epoch": 0.04674847501159348, "grad_norm": 0.35156628489494324, "learning_rate": 2.3376739207991437e-05, "loss": 0.2499, "step": 2621 }, { "epoch": 0.046766311133307176, "grad_norm": 0.4836219847202301, "learning_rate": 2.3385658223332146e-05, "loss": 0.3276, "step": 2622 }, { "epoch": 0.04678414725502087, "grad_norm": 0.700020432472229, "learning_rate": 2.3394577238672853e-05, "loss": 0.2757, "step": 2623 }, { "epoch": 0.046801983376734566, "grad_norm": 0.45737606287002563, "learning_rate": 2.3403496254013556e-05, "loss": 0.277, "step": 2624 }, { "epoch": 0.04681981949844826, "grad_norm": 0.47991931438446045, "learning_rate": 2.3412415269354265e-05, "loss": 0.3075, "step": 2625 }, { "epoch": 0.046837655620161955, "grad_norm": 0.7000904083251953, "learning_rate": 2.3421334284694972e-05, "loss": 0.3013, "step": 2626 }, { "epoch": 0.04685549174187564, "grad_norm": 0.44958168268203735, "learning_rate": 2.3430253300035675e-05, "loss": 0.3125, "step": 2627 }, { "epoch": 0.04687332786358934, "grad_norm": 0.5032778382301331, "learning_rate": 2.3439172315376385e-05, "loss": 0.3368, "step": 2628 }, { "epoch": 0.04689116398530303, "grad_norm": 0.5663533806800842, "learning_rate": 2.344809133071709e-05, "loss": 0.362, "step": 2629 }, { "epoch": 0.04690900010701673, "grad_norm": 0.4374352693557739, "learning_rate": 2.3457010346057794e-05, "loss": 0.2393, "step": 2630 }, { "epoch": 0.04692683622873042, "grad_norm": 0.4122285842895508, "learning_rate": 2.3465929361398504e-05, "loss": 0.2952, "step": 2631 }, { "epoch": 0.04694467235044412, "grad_norm": 0.500024139881134, "learning_rate": 2.347484837673921e-05, "loss": 0.3238, "step": 2632 }, { "epoch": 0.04696250847215781, "grad_norm": 0.31356459856033325, "learning_rate": 2.3483767392079917e-05, "loss": 0.2604, "step": 2633 }, { "epoch": 0.04698034459387151, "grad_norm": 0.4864186644554138, "learning_rate": 2.3492686407420623e-05, "loss": 0.2548, "step": 2634 }, { "epoch": 0.0469981807155852, "grad_norm": 0.531172513961792, "learning_rate": 2.3501605422761326e-05, "loss": 0.3647, "step": 2635 }, { "epoch": 0.0470160168372989, "grad_norm": 0.7010324597358704, "learning_rate": 2.3510524438102036e-05, "loss": 0.3049, "step": 2636 }, { "epoch": 0.04703385295901259, "grad_norm": 0.5139377117156982, "learning_rate": 2.3519443453442742e-05, "loss": 0.3048, "step": 2637 }, { "epoch": 0.04705168908072629, "grad_norm": 0.4551222324371338, "learning_rate": 2.3528362468783445e-05, "loss": 0.3419, "step": 2638 }, { "epoch": 0.04706952520243998, "grad_norm": 0.4624652564525604, "learning_rate": 2.3537281484124155e-05, "loss": 0.2818, "step": 2639 }, { "epoch": 0.04708736132415368, "grad_norm": 0.3740825951099396, "learning_rate": 2.354620049946486e-05, "loss": 0.2796, "step": 2640 }, { "epoch": 0.04710519744586737, "grad_norm": 0.4301629066467285, "learning_rate": 2.3555119514805564e-05, "loss": 0.2806, "step": 2641 }, { "epoch": 0.04712303356758107, "grad_norm": 0.3898015320301056, "learning_rate": 2.3564038530146274e-05, "loss": 0.2972, "step": 2642 }, { "epoch": 0.04714086968929476, "grad_norm": 0.3900796175003052, "learning_rate": 2.357295754548698e-05, "loss": 0.3241, "step": 2643 }, { "epoch": 0.04715870581100846, "grad_norm": 0.50234055519104, "learning_rate": 2.3581876560827687e-05, "loss": 0.3363, "step": 2644 }, { "epoch": 0.04717654193272215, "grad_norm": 0.4925941228866577, "learning_rate": 2.3590795576168393e-05, "loss": 0.3285, "step": 2645 }, { "epoch": 0.04719437805443585, "grad_norm": 0.41006824374198914, "learning_rate": 2.35997145915091e-05, "loss": 0.2546, "step": 2646 }, { "epoch": 0.047212214176149535, "grad_norm": 0.7196215391159058, "learning_rate": 2.3608633606849806e-05, "loss": 0.2943, "step": 2647 }, { "epoch": 0.04723005029786323, "grad_norm": 0.3472844362258911, "learning_rate": 2.3617552622190512e-05, "loss": 0.2876, "step": 2648 }, { "epoch": 0.047247886419576925, "grad_norm": 0.5129361748695374, "learning_rate": 2.3626471637531215e-05, "loss": 0.3762, "step": 2649 }, { "epoch": 0.04726572254129062, "grad_norm": 0.452271968126297, "learning_rate": 2.3635390652871925e-05, "loss": 0.2862, "step": 2650 }, { "epoch": 0.047283558663004314, "grad_norm": 0.4501533508300781, "learning_rate": 2.364430966821263e-05, "loss": 0.3249, "step": 2651 }, { "epoch": 0.04730139478471801, "grad_norm": 0.49350038170814514, "learning_rate": 2.3653228683553334e-05, "loss": 0.2701, "step": 2652 }, { "epoch": 0.047319230906431704, "grad_norm": 0.5184900760650635, "learning_rate": 2.3662147698894044e-05, "loss": 0.2975, "step": 2653 }, { "epoch": 0.0473370670281454, "grad_norm": 0.5035743713378906, "learning_rate": 2.367106671423475e-05, "loss": 0.335, "step": 2654 }, { "epoch": 0.047354903149859094, "grad_norm": 0.4158850312232971, "learning_rate": 2.3679985729575457e-05, "loss": 0.2374, "step": 2655 }, { "epoch": 0.04737273927157279, "grad_norm": 0.35874706506729126, "learning_rate": 2.3688904744916163e-05, "loss": 0.289, "step": 2656 }, { "epoch": 0.047390575393286484, "grad_norm": 0.370504766702652, "learning_rate": 2.369782376025687e-05, "loss": 0.3031, "step": 2657 }, { "epoch": 0.04740841151500018, "grad_norm": 0.39797043800354004, "learning_rate": 2.3706742775597576e-05, "loss": 0.3068, "step": 2658 }, { "epoch": 0.047426247636713874, "grad_norm": 0.4915393590927124, "learning_rate": 2.3715661790938282e-05, "loss": 0.3244, "step": 2659 }, { "epoch": 0.04744408375842757, "grad_norm": 0.515765368938446, "learning_rate": 2.3724580806278985e-05, "loss": 0.3086, "step": 2660 }, { "epoch": 0.047461919880141264, "grad_norm": 0.4049784243106842, "learning_rate": 2.3733499821619695e-05, "loss": 0.3482, "step": 2661 }, { "epoch": 0.04747975600185496, "grad_norm": 0.475574254989624, "learning_rate": 2.37424188369604e-05, "loss": 0.3228, "step": 2662 }, { "epoch": 0.047497592123568654, "grad_norm": 0.6775217056274414, "learning_rate": 2.3751337852301105e-05, "loss": 0.2793, "step": 2663 }, { "epoch": 0.04751542824528235, "grad_norm": 0.4063296318054199, "learning_rate": 2.3760256867641814e-05, "loss": 0.3473, "step": 2664 }, { "epoch": 0.04753326436699604, "grad_norm": 0.4551413059234619, "learning_rate": 2.376917588298252e-05, "loss": 0.2967, "step": 2665 }, { "epoch": 0.04755110048870974, "grad_norm": 0.4418201744556427, "learning_rate": 2.3778094898323224e-05, "loss": 0.3215, "step": 2666 }, { "epoch": 0.047568936610423426, "grad_norm": 0.4525734782218933, "learning_rate": 2.3787013913663933e-05, "loss": 0.3215, "step": 2667 }, { "epoch": 0.04758677273213712, "grad_norm": 0.36298611760139465, "learning_rate": 2.379593292900464e-05, "loss": 0.2704, "step": 2668 }, { "epoch": 0.047604608853850816, "grad_norm": 0.42858415842056274, "learning_rate": 2.3804851944345346e-05, "loss": 0.3266, "step": 2669 }, { "epoch": 0.04762244497556451, "grad_norm": 0.4325609505176544, "learning_rate": 2.3813770959686053e-05, "loss": 0.3107, "step": 2670 }, { "epoch": 0.047640281097278206, "grad_norm": 0.5073983669281006, "learning_rate": 2.382268997502676e-05, "loss": 0.3308, "step": 2671 }, { "epoch": 0.0476581172189919, "grad_norm": 0.3791866600513458, "learning_rate": 2.3831608990367465e-05, "loss": 0.3009, "step": 2672 }, { "epoch": 0.047675953340705596, "grad_norm": 0.37248167395591736, "learning_rate": 2.3840528005708172e-05, "loss": 0.2915, "step": 2673 }, { "epoch": 0.04769378946241929, "grad_norm": 0.49261602759361267, "learning_rate": 2.3849447021048875e-05, "loss": 0.2993, "step": 2674 }, { "epoch": 0.047711625584132986, "grad_norm": 0.36422669887542725, "learning_rate": 2.3858366036389585e-05, "loss": 0.2618, "step": 2675 }, { "epoch": 0.04772946170584668, "grad_norm": 0.31536272168159485, "learning_rate": 2.386728505173029e-05, "loss": 0.2807, "step": 2676 }, { "epoch": 0.047747297827560375, "grad_norm": 0.3920343518257141, "learning_rate": 2.3876204067070994e-05, "loss": 0.2772, "step": 2677 }, { "epoch": 0.04776513394927407, "grad_norm": 0.3932455778121948, "learning_rate": 2.3885123082411704e-05, "loss": 0.3162, "step": 2678 }, { "epoch": 0.047782970070987765, "grad_norm": 0.455054372549057, "learning_rate": 2.389404209775241e-05, "loss": 0.314, "step": 2679 }, { "epoch": 0.04780080619270146, "grad_norm": 0.4854518175125122, "learning_rate": 2.3902961113093116e-05, "loss": 0.2581, "step": 2680 }, { "epoch": 0.047818642314415155, "grad_norm": 0.4947347640991211, "learning_rate": 2.3911880128433823e-05, "loss": 0.3596, "step": 2681 }, { "epoch": 0.04783647843612885, "grad_norm": 0.48636892437934875, "learning_rate": 2.392079914377453e-05, "loss": 0.3124, "step": 2682 }, { "epoch": 0.047854314557842545, "grad_norm": 0.4568527042865753, "learning_rate": 2.3929718159115236e-05, "loss": 0.3023, "step": 2683 }, { "epoch": 0.04787215067955624, "grad_norm": 0.4741967022418976, "learning_rate": 2.3938637174455942e-05, "loss": 0.3257, "step": 2684 }, { "epoch": 0.047889986801269935, "grad_norm": 0.44635698199272156, "learning_rate": 2.3947556189796645e-05, "loss": 0.294, "step": 2685 }, { "epoch": 0.04790782292298363, "grad_norm": 0.4073943793773651, "learning_rate": 2.3956475205137355e-05, "loss": 0.2582, "step": 2686 }, { "epoch": 0.04792565904469732, "grad_norm": 0.5220855474472046, "learning_rate": 2.396539422047806e-05, "loss": 0.3106, "step": 2687 }, { "epoch": 0.04794349516641101, "grad_norm": 0.46042484045028687, "learning_rate": 2.3974313235818764e-05, "loss": 0.2694, "step": 2688 }, { "epoch": 0.04796133128812471, "grad_norm": 0.4090389013290405, "learning_rate": 2.3983232251159474e-05, "loss": 0.3377, "step": 2689 }, { "epoch": 0.0479791674098384, "grad_norm": 0.6514447331428528, "learning_rate": 2.399215126650018e-05, "loss": 0.3257, "step": 2690 }, { "epoch": 0.0479970035315521, "grad_norm": 0.5830377340316772, "learning_rate": 2.4001070281840887e-05, "loss": 0.3815, "step": 2691 }, { "epoch": 0.04801483965326579, "grad_norm": 0.5281463861465454, "learning_rate": 2.4009989297181593e-05, "loss": 0.3444, "step": 2692 }, { "epoch": 0.04803267577497949, "grad_norm": 0.4410761594772339, "learning_rate": 2.40189083125223e-05, "loss": 0.3353, "step": 2693 }, { "epoch": 0.04805051189669318, "grad_norm": 0.5555058717727661, "learning_rate": 2.4027827327863006e-05, "loss": 0.3452, "step": 2694 }, { "epoch": 0.04806834801840688, "grad_norm": 0.5104464888572693, "learning_rate": 2.4036746343203712e-05, "loss": 0.3165, "step": 2695 }, { "epoch": 0.04808618414012057, "grad_norm": 0.5032020211219788, "learning_rate": 2.404566535854442e-05, "loss": 0.3224, "step": 2696 }, { "epoch": 0.04810402026183427, "grad_norm": 0.41885411739349365, "learning_rate": 2.4054584373885125e-05, "loss": 0.2923, "step": 2697 }, { "epoch": 0.04812185638354796, "grad_norm": 0.5228544473648071, "learning_rate": 2.406350338922583e-05, "loss": 0.4019, "step": 2698 }, { "epoch": 0.04813969250526166, "grad_norm": 0.40701359510421753, "learning_rate": 2.4072422404566534e-05, "loss": 0.2907, "step": 2699 }, { "epoch": 0.04815752862697535, "grad_norm": 0.3381403982639313, "learning_rate": 2.4081341419907244e-05, "loss": 0.2482, "step": 2700 }, { "epoch": 0.04817536474868905, "grad_norm": 0.9533022046089172, "learning_rate": 2.409026043524795e-05, "loss": 0.291, "step": 2701 }, { "epoch": 0.04819320087040274, "grad_norm": 0.7095092535018921, "learning_rate": 2.4099179450588653e-05, "loss": 0.2945, "step": 2702 }, { "epoch": 0.048211036992116436, "grad_norm": 0.580734372138977, "learning_rate": 2.4108098465929363e-05, "loss": 0.3661, "step": 2703 }, { "epoch": 0.04822887311383013, "grad_norm": 0.6417325139045715, "learning_rate": 2.411701748127007e-05, "loss": 0.337, "step": 2704 }, { "epoch": 0.048246709235543826, "grad_norm": 0.5337891578674316, "learning_rate": 2.4125936496610776e-05, "loss": 0.3528, "step": 2705 }, { "epoch": 0.04826454535725752, "grad_norm": 0.5764961838722229, "learning_rate": 2.4134855511951482e-05, "loss": 0.341, "step": 2706 }, { "epoch": 0.04828238147897121, "grad_norm": 0.37059906125068665, "learning_rate": 2.414377452729219e-05, "loss": 0.2727, "step": 2707 }, { "epoch": 0.048300217600684904, "grad_norm": 0.5342280268669128, "learning_rate": 2.4152693542632895e-05, "loss": 0.3702, "step": 2708 }, { "epoch": 0.0483180537223986, "grad_norm": 0.4810682237148285, "learning_rate": 2.41616125579736e-05, "loss": 0.2814, "step": 2709 }, { "epoch": 0.048335889844112294, "grad_norm": 0.5229276418685913, "learning_rate": 2.4170531573314308e-05, "loss": 0.3175, "step": 2710 }, { "epoch": 0.04835372596582599, "grad_norm": 0.4699035882949829, "learning_rate": 2.4179450588655014e-05, "loss": 0.3248, "step": 2711 }, { "epoch": 0.048371562087539684, "grad_norm": 0.48911675810813904, "learning_rate": 2.418836960399572e-05, "loss": 0.3354, "step": 2712 }, { "epoch": 0.04838939820925338, "grad_norm": 0.7131666541099548, "learning_rate": 2.4197288619336424e-05, "loss": 0.3201, "step": 2713 }, { "epoch": 0.048407234330967074, "grad_norm": 0.45483145117759705, "learning_rate": 2.4206207634677133e-05, "loss": 0.3282, "step": 2714 }, { "epoch": 0.04842507045268077, "grad_norm": 0.36584872007369995, "learning_rate": 2.421512665001784e-05, "loss": 0.2612, "step": 2715 }, { "epoch": 0.04844290657439446, "grad_norm": 0.47576916217803955, "learning_rate": 2.4224045665358546e-05, "loss": 0.2965, "step": 2716 }, { "epoch": 0.04846074269610816, "grad_norm": 0.7287359237670898, "learning_rate": 2.4232964680699252e-05, "loss": 0.275, "step": 2717 }, { "epoch": 0.04847857881782185, "grad_norm": 0.5523488521575928, "learning_rate": 2.424188369603996e-05, "loss": 0.322, "step": 2718 }, { "epoch": 0.04849641493953555, "grad_norm": 0.5767621397972107, "learning_rate": 2.4250802711380665e-05, "loss": 0.317, "step": 2719 }, { "epoch": 0.04851425106124924, "grad_norm": 1.306100606918335, "learning_rate": 2.425972172672137e-05, "loss": 0.2773, "step": 2720 }, { "epoch": 0.04853208718296294, "grad_norm": 0.38748273253440857, "learning_rate": 2.4268640742062078e-05, "loss": 0.2858, "step": 2721 }, { "epoch": 0.04854992330467663, "grad_norm": 0.49541452527046204, "learning_rate": 2.4277559757402784e-05, "loss": 0.2528, "step": 2722 }, { "epoch": 0.04856775942639033, "grad_norm": 0.3869876265525818, "learning_rate": 2.428647877274349e-05, "loss": 0.3253, "step": 2723 }, { "epoch": 0.04858559554810402, "grad_norm": 0.6745806932449341, "learning_rate": 2.4295397788084194e-05, "loss": 0.3011, "step": 2724 }, { "epoch": 0.04860343166981772, "grad_norm": 0.4834120273590088, "learning_rate": 2.4304316803424904e-05, "loss": 0.3062, "step": 2725 }, { "epoch": 0.04862126779153141, "grad_norm": 0.3577546179294586, "learning_rate": 2.431323581876561e-05, "loss": 0.2635, "step": 2726 }, { "epoch": 0.0486391039132451, "grad_norm": 0.6025672554969788, "learning_rate": 2.4322154834106316e-05, "loss": 0.3077, "step": 2727 }, { "epoch": 0.048656940034958795, "grad_norm": 0.5578452944755554, "learning_rate": 2.4331073849447023e-05, "loss": 0.2975, "step": 2728 }, { "epoch": 0.04867477615667249, "grad_norm": 0.40623703598976135, "learning_rate": 2.433999286478773e-05, "loss": 0.2308, "step": 2729 }, { "epoch": 0.048692612278386185, "grad_norm": 0.40554261207580566, "learning_rate": 2.4348911880128435e-05, "loss": 0.267, "step": 2730 }, { "epoch": 0.04871044840009988, "grad_norm": 0.5625680685043335, "learning_rate": 2.4357830895469142e-05, "loss": 0.3159, "step": 2731 }, { "epoch": 0.048728284521813575, "grad_norm": 0.5170318484306335, "learning_rate": 2.4366749910809848e-05, "loss": 0.3053, "step": 2732 }, { "epoch": 0.04874612064352727, "grad_norm": 0.41871926188468933, "learning_rate": 2.4375668926150555e-05, "loss": 0.2673, "step": 2733 }, { "epoch": 0.048763956765240965, "grad_norm": 0.4423244595527649, "learning_rate": 2.438458794149126e-05, "loss": 0.2791, "step": 2734 }, { "epoch": 0.04878179288695466, "grad_norm": 0.47761958837509155, "learning_rate": 2.4393506956831967e-05, "loss": 0.2933, "step": 2735 }, { "epoch": 0.048799629008668355, "grad_norm": 0.5461574792861938, "learning_rate": 2.4402425972172674e-05, "loss": 0.3336, "step": 2736 }, { "epoch": 0.04881746513038205, "grad_norm": 0.5050264000892639, "learning_rate": 2.441134498751338e-05, "loss": 0.276, "step": 2737 }, { "epoch": 0.048835301252095745, "grad_norm": 1.313736915588379, "learning_rate": 2.4420264002854086e-05, "loss": 0.3089, "step": 2738 }, { "epoch": 0.04885313737380944, "grad_norm": 0.5269498229026794, "learning_rate": 2.4429183018194793e-05, "loss": 0.3282, "step": 2739 }, { "epoch": 0.048870973495523135, "grad_norm": 0.5273535847663879, "learning_rate": 2.44381020335355e-05, "loss": 0.3163, "step": 2740 }, { "epoch": 0.04888880961723683, "grad_norm": 0.8047959804534912, "learning_rate": 2.4447021048876206e-05, "loss": 0.2968, "step": 2741 }, { "epoch": 0.048906645738950524, "grad_norm": 0.9569438099861145, "learning_rate": 2.4455940064216912e-05, "loss": 0.3439, "step": 2742 }, { "epoch": 0.04892448186066422, "grad_norm": 0.828893780708313, "learning_rate": 2.446485907955762e-05, "loss": 0.2937, "step": 2743 }, { "epoch": 0.048942317982377914, "grad_norm": 0.3880298435688019, "learning_rate": 2.4473778094898325e-05, "loss": 0.248, "step": 2744 }, { "epoch": 0.04896015410409161, "grad_norm": 0.5503308773040771, "learning_rate": 2.448269711023903e-05, "loss": 0.2936, "step": 2745 }, { "epoch": 0.048977990225805304, "grad_norm": 0.4243747293949127, "learning_rate": 2.4491616125579738e-05, "loss": 0.2479, "step": 2746 }, { "epoch": 0.04899582634751899, "grad_norm": 0.37995001673698425, "learning_rate": 2.4500535140920444e-05, "loss": 0.2733, "step": 2747 }, { "epoch": 0.04901366246923269, "grad_norm": 0.4783131778240204, "learning_rate": 2.450945415626115e-05, "loss": 0.3016, "step": 2748 }, { "epoch": 0.04903149859094638, "grad_norm": 0.41291674971580505, "learning_rate": 2.4518373171601857e-05, "loss": 0.2507, "step": 2749 }, { "epoch": 0.04904933471266008, "grad_norm": 0.470490962266922, "learning_rate": 2.4527292186942563e-05, "loss": 0.3242, "step": 2750 }, { "epoch": 0.04906717083437377, "grad_norm": 0.4082280099391937, "learning_rate": 2.453621120228327e-05, "loss": 0.2922, "step": 2751 }, { "epoch": 0.04908500695608747, "grad_norm": 0.4830440580844879, "learning_rate": 2.4545130217623976e-05, "loss": 0.3209, "step": 2752 }, { "epoch": 0.04910284307780116, "grad_norm": 0.4787217676639557, "learning_rate": 2.4554049232964682e-05, "loss": 0.2934, "step": 2753 }, { "epoch": 0.049120679199514856, "grad_norm": 0.5571637749671936, "learning_rate": 2.456296824830539e-05, "loss": 0.3278, "step": 2754 }, { "epoch": 0.04913851532122855, "grad_norm": 0.48631027340888977, "learning_rate": 2.4571887263646095e-05, "loss": 0.2961, "step": 2755 }, { "epoch": 0.049156351442942246, "grad_norm": 0.3554821312427521, "learning_rate": 2.45808062789868e-05, "loss": 0.2561, "step": 2756 }, { "epoch": 0.04917418756465594, "grad_norm": 0.365263432264328, "learning_rate": 2.4589725294327508e-05, "loss": 0.2289, "step": 2757 }, { "epoch": 0.049192023686369636, "grad_norm": 0.5041653513908386, "learning_rate": 2.4598644309668214e-05, "loss": 0.3571, "step": 2758 }, { "epoch": 0.04920985980808333, "grad_norm": 0.4902588427066803, "learning_rate": 2.460756332500892e-05, "loss": 0.3084, "step": 2759 }, { "epoch": 0.049227695929797026, "grad_norm": 0.5168266296386719, "learning_rate": 2.4616482340349627e-05, "loss": 0.3666, "step": 2760 }, { "epoch": 0.04924553205151072, "grad_norm": 0.44216209650039673, "learning_rate": 2.4625401355690333e-05, "loss": 0.2371, "step": 2761 }, { "epoch": 0.049263368173224416, "grad_norm": 0.35730811953544617, "learning_rate": 2.463432037103104e-05, "loss": 0.2762, "step": 2762 }, { "epoch": 0.04928120429493811, "grad_norm": 0.5990766882896423, "learning_rate": 2.4643239386371746e-05, "loss": 0.2745, "step": 2763 }, { "epoch": 0.049299040416651806, "grad_norm": 0.43171870708465576, "learning_rate": 2.4652158401712452e-05, "loss": 0.2774, "step": 2764 }, { "epoch": 0.0493168765383655, "grad_norm": 0.384461373090744, "learning_rate": 2.466107741705316e-05, "loss": 0.2606, "step": 2765 }, { "epoch": 0.049334712660079195, "grad_norm": 0.6836086511611938, "learning_rate": 2.4669996432393865e-05, "loss": 0.2675, "step": 2766 }, { "epoch": 0.04935254878179289, "grad_norm": 0.4748278558254242, "learning_rate": 2.467891544773457e-05, "loss": 0.3026, "step": 2767 }, { "epoch": 0.04937038490350658, "grad_norm": 0.4196484386920929, "learning_rate": 2.4687834463075278e-05, "loss": 0.252, "step": 2768 }, { "epoch": 0.04938822102522027, "grad_norm": 0.4558366537094116, "learning_rate": 2.4696753478415984e-05, "loss": 0.3075, "step": 2769 }, { "epoch": 0.04940605714693397, "grad_norm": 0.7233859896659851, "learning_rate": 2.470567249375669e-05, "loss": 0.3477, "step": 2770 }, { "epoch": 0.04942389326864766, "grad_norm": 0.6275060772895813, "learning_rate": 2.4714591509097397e-05, "loss": 0.3877, "step": 2771 }, { "epoch": 0.04944172939036136, "grad_norm": 0.462270051240921, "learning_rate": 2.4723510524438103e-05, "loss": 0.2872, "step": 2772 }, { "epoch": 0.04945956551207505, "grad_norm": 0.38063880801200867, "learning_rate": 2.473242953977881e-05, "loss": 0.286, "step": 2773 }, { "epoch": 0.04947740163378875, "grad_norm": 0.6094436645507812, "learning_rate": 2.4741348555119516e-05, "loss": 0.282, "step": 2774 }, { "epoch": 0.04949523775550244, "grad_norm": 0.43387001752853394, "learning_rate": 2.4750267570460223e-05, "loss": 0.2644, "step": 2775 }, { "epoch": 0.04951307387721614, "grad_norm": 0.47578999400138855, "learning_rate": 2.475918658580093e-05, "loss": 0.2786, "step": 2776 }, { "epoch": 0.04953090999892983, "grad_norm": 0.6152646541595459, "learning_rate": 2.4768105601141635e-05, "loss": 0.3084, "step": 2777 }, { "epoch": 0.04954874612064353, "grad_norm": 0.7436725497245789, "learning_rate": 2.477702461648234e-05, "loss": 0.3556, "step": 2778 }, { "epoch": 0.04956658224235722, "grad_norm": 0.376797616481781, "learning_rate": 2.4785943631823048e-05, "loss": 0.2824, "step": 2779 }, { "epoch": 0.04958441836407092, "grad_norm": 0.5116544365882874, "learning_rate": 2.4794862647163754e-05, "loss": 0.3458, "step": 2780 }, { "epoch": 0.04960225448578461, "grad_norm": 0.4267617464065552, "learning_rate": 2.480378166250446e-05, "loss": 0.2801, "step": 2781 }, { "epoch": 0.04962009060749831, "grad_norm": 0.7026563286781311, "learning_rate": 2.4812700677845167e-05, "loss": 0.2947, "step": 2782 }, { "epoch": 0.049637926729212, "grad_norm": 0.6572867035865784, "learning_rate": 2.4821619693185874e-05, "loss": 0.2629, "step": 2783 }, { "epoch": 0.0496557628509257, "grad_norm": 0.4855692982673645, "learning_rate": 2.483053870852658e-05, "loss": 0.2915, "step": 2784 }, { "epoch": 0.04967359897263939, "grad_norm": 0.34191015362739563, "learning_rate": 2.4839457723867286e-05, "loss": 0.2672, "step": 2785 }, { "epoch": 0.04969143509435309, "grad_norm": 0.5398693680763245, "learning_rate": 2.4848376739207993e-05, "loss": 0.285, "step": 2786 }, { "epoch": 0.04970927121606678, "grad_norm": 0.5253065824508667, "learning_rate": 2.48572957545487e-05, "loss": 0.3257, "step": 2787 }, { "epoch": 0.04972710733778047, "grad_norm": 0.4885236322879791, "learning_rate": 2.4866214769889405e-05, "loss": 0.3412, "step": 2788 }, { "epoch": 0.049744943459494165, "grad_norm": 0.5017727017402649, "learning_rate": 2.4875133785230112e-05, "loss": 0.3061, "step": 2789 }, { "epoch": 0.04976277958120786, "grad_norm": 0.5432559251785278, "learning_rate": 2.4884052800570818e-05, "loss": 0.3005, "step": 2790 }, { "epoch": 0.049780615702921555, "grad_norm": 0.39586201310157776, "learning_rate": 2.4892971815911525e-05, "loss": 0.3135, "step": 2791 }, { "epoch": 0.04979845182463525, "grad_norm": 0.4748157262802124, "learning_rate": 2.490189083125223e-05, "loss": 0.2895, "step": 2792 }, { "epoch": 0.049816287946348944, "grad_norm": 0.4283110201358795, "learning_rate": 2.4910809846592937e-05, "loss": 0.3083, "step": 2793 }, { "epoch": 0.04983412406806264, "grad_norm": 0.427421897649765, "learning_rate": 2.4919728861933644e-05, "loss": 0.291, "step": 2794 }, { "epoch": 0.049851960189776334, "grad_norm": 0.5595164895057678, "learning_rate": 2.492864787727435e-05, "loss": 0.3381, "step": 2795 }, { "epoch": 0.04986979631149003, "grad_norm": 0.36938780546188354, "learning_rate": 2.4937566892615057e-05, "loss": 0.2386, "step": 2796 }, { "epoch": 0.049887632433203724, "grad_norm": 0.495216965675354, "learning_rate": 2.4946485907955763e-05, "loss": 0.2872, "step": 2797 }, { "epoch": 0.04990546855491742, "grad_norm": 0.5744511485099792, "learning_rate": 2.495540492329647e-05, "loss": 0.2605, "step": 2798 }, { "epoch": 0.049923304676631114, "grad_norm": 0.4692898392677307, "learning_rate": 2.4964323938637176e-05, "loss": 0.2574, "step": 2799 }, { "epoch": 0.04994114079834481, "grad_norm": 0.6649041771888733, "learning_rate": 2.4973242953977882e-05, "loss": 0.3636, "step": 2800 }, { "epoch": 0.049958976920058504, "grad_norm": 0.4850791394710541, "learning_rate": 2.498216196931859e-05, "loss": 0.3021, "step": 2801 }, { "epoch": 0.0499768130417722, "grad_norm": 0.3736015260219574, "learning_rate": 2.4991080984659295e-05, "loss": 0.2793, "step": 2802 }, { "epoch": 0.049994649163485894, "grad_norm": 0.45169389247894287, "learning_rate": 2.5e-05, "loss": 0.2555, "step": 2803 }, { "epoch": 0.05001248528519959, "grad_norm": 0.40781697630882263, "learning_rate": 2.500891901534071e-05, "loss": 0.3162, "step": 2804 }, { "epoch": 0.05003032140691328, "grad_norm": 0.48869258165359497, "learning_rate": 2.501783803068141e-05, "loss": 0.341, "step": 2805 }, { "epoch": 0.05004815752862698, "grad_norm": 0.4770379960536957, "learning_rate": 2.502675704602212e-05, "loss": 0.2912, "step": 2806 }, { "epoch": 0.05006599365034067, "grad_norm": 0.5151196122169495, "learning_rate": 2.5035676061362827e-05, "loss": 0.2994, "step": 2807 }, { "epoch": 0.05008382977205436, "grad_norm": 0.9012157320976257, "learning_rate": 2.504459507670353e-05, "loss": 0.2494, "step": 2808 }, { "epoch": 0.050101665893768056, "grad_norm": 0.48816928267478943, "learning_rate": 2.505351409204424e-05, "loss": 0.335, "step": 2809 }, { "epoch": 0.05011950201548175, "grad_norm": 0.478715181350708, "learning_rate": 2.5062433107384946e-05, "loss": 0.2973, "step": 2810 }, { "epoch": 0.050137338137195446, "grad_norm": 0.5877270102500916, "learning_rate": 2.507135212272565e-05, "loss": 0.4271, "step": 2811 }, { "epoch": 0.05015517425890914, "grad_norm": 0.39555367827415466, "learning_rate": 2.508027113806636e-05, "loss": 0.2391, "step": 2812 }, { "epoch": 0.050173010380622836, "grad_norm": 0.3225173354148865, "learning_rate": 2.5089190153407065e-05, "loss": 0.2875, "step": 2813 }, { "epoch": 0.05019084650233653, "grad_norm": 0.5344067811965942, "learning_rate": 2.5098109168747768e-05, "loss": 0.3146, "step": 2814 }, { "epoch": 0.050208682624050226, "grad_norm": 0.4604073464870453, "learning_rate": 2.5107028184088478e-05, "loss": 0.2374, "step": 2815 }, { "epoch": 0.05022651874576392, "grad_norm": 0.5769338011741638, "learning_rate": 2.5115947199429184e-05, "loss": 0.3054, "step": 2816 }, { "epoch": 0.050244354867477616, "grad_norm": 0.519643247127533, "learning_rate": 2.5124866214769894e-05, "loss": 0.3037, "step": 2817 }, { "epoch": 0.05026219098919131, "grad_norm": 0.3918305039405823, "learning_rate": 2.5133785230110597e-05, "loss": 0.287, "step": 2818 }, { "epoch": 0.050280027110905005, "grad_norm": 0.8546401262283325, "learning_rate": 2.5142704245451303e-05, "loss": 0.3325, "step": 2819 }, { "epoch": 0.0502978632326187, "grad_norm": 0.47385314106941223, "learning_rate": 2.5151623260792013e-05, "loss": 0.2745, "step": 2820 }, { "epoch": 0.050315699354332395, "grad_norm": 0.426802396774292, "learning_rate": 2.5160542276132716e-05, "loss": 0.3601, "step": 2821 }, { "epoch": 0.05033353547604609, "grad_norm": 0.5062058568000793, "learning_rate": 2.5169461291473422e-05, "loss": 0.2857, "step": 2822 }, { "epoch": 0.050351371597759785, "grad_norm": 0.48094964027404785, "learning_rate": 2.5178380306814132e-05, "loss": 0.2974, "step": 2823 }, { "epoch": 0.05036920771947348, "grad_norm": 0.4357699751853943, "learning_rate": 2.5187299322154835e-05, "loss": 0.298, "step": 2824 }, { "epoch": 0.050387043841187175, "grad_norm": 0.5114694833755493, "learning_rate": 2.519621833749554e-05, "loss": 0.3536, "step": 2825 }, { "epoch": 0.05040487996290087, "grad_norm": 0.4184374213218689, "learning_rate": 2.520513735283625e-05, "loss": 0.2515, "step": 2826 }, { "epoch": 0.050422716084614565, "grad_norm": 0.492558091878891, "learning_rate": 2.521405636817695e-05, "loss": 0.3362, "step": 2827 }, { "epoch": 0.05044055220632825, "grad_norm": 0.5285553932189941, "learning_rate": 2.522297538351766e-05, "loss": 0.2891, "step": 2828 }, { "epoch": 0.05045838832804195, "grad_norm": 0.42458224296569824, "learning_rate": 2.523189439885837e-05, "loss": 0.2751, "step": 2829 }, { "epoch": 0.05047622444975564, "grad_norm": 0.4313815236091614, "learning_rate": 2.524081341419907e-05, "loss": 0.3602, "step": 2830 }, { "epoch": 0.05049406057146934, "grad_norm": 0.5407450199127197, "learning_rate": 2.524973242953978e-05, "loss": 0.3251, "step": 2831 }, { "epoch": 0.05051189669318303, "grad_norm": 0.412727028131485, "learning_rate": 2.5258651444880486e-05, "loss": 0.2634, "step": 2832 }, { "epoch": 0.05052973281489673, "grad_norm": 0.2968381345272064, "learning_rate": 2.526757046022119e-05, "loss": 0.2663, "step": 2833 }, { "epoch": 0.05054756893661042, "grad_norm": 0.6257922053337097, "learning_rate": 2.52764894755619e-05, "loss": 0.3231, "step": 2834 }, { "epoch": 0.05056540505832412, "grad_norm": 0.5430404543876648, "learning_rate": 2.5285408490902605e-05, "loss": 0.3239, "step": 2835 }, { "epoch": 0.05058324118003781, "grad_norm": 0.4025420546531677, "learning_rate": 2.529432750624331e-05, "loss": 0.2496, "step": 2836 }, { "epoch": 0.05060107730175151, "grad_norm": 0.49791595339775085, "learning_rate": 2.5303246521584018e-05, "loss": 0.2949, "step": 2837 }, { "epoch": 0.0506189134234652, "grad_norm": 0.5615082383155823, "learning_rate": 2.5312165536924725e-05, "loss": 0.3253, "step": 2838 }, { "epoch": 0.0506367495451789, "grad_norm": 0.4231736958026886, "learning_rate": 2.5321084552265434e-05, "loss": 0.2426, "step": 2839 }, { "epoch": 0.05065458566689259, "grad_norm": 0.3824504017829895, "learning_rate": 2.5330003567606137e-05, "loss": 0.2586, "step": 2840 }, { "epoch": 0.05067242178860629, "grad_norm": 0.4817415475845337, "learning_rate": 2.5338922582946844e-05, "loss": 0.2663, "step": 2841 }, { "epoch": 0.05069025791031998, "grad_norm": 0.45310771465301514, "learning_rate": 2.5347841598287553e-05, "loss": 0.2725, "step": 2842 }, { "epoch": 0.050708094032033676, "grad_norm": 0.6365208625793457, "learning_rate": 2.5356760613628256e-05, "loss": 0.297, "step": 2843 }, { "epoch": 0.05072593015374737, "grad_norm": 0.4242004454135895, "learning_rate": 2.5365679628968963e-05, "loss": 0.2659, "step": 2844 }, { "epoch": 0.050743766275461066, "grad_norm": 0.7390111684799194, "learning_rate": 2.5374598644309673e-05, "loss": 0.3009, "step": 2845 }, { "epoch": 0.05076160239717476, "grad_norm": 0.38651764392852783, "learning_rate": 2.5383517659650376e-05, "loss": 0.2754, "step": 2846 }, { "epoch": 0.050779438518888456, "grad_norm": 0.4260665774345398, "learning_rate": 2.5392436674991082e-05, "loss": 0.3592, "step": 2847 }, { "epoch": 0.050797274640602144, "grad_norm": 0.45752131938934326, "learning_rate": 2.5401355690331792e-05, "loss": 0.2646, "step": 2848 }, { "epoch": 0.05081511076231584, "grad_norm": 0.4391932189464569, "learning_rate": 2.5410274705672495e-05, "loss": 0.3309, "step": 2849 }, { "epoch": 0.050832946884029534, "grad_norm": 0.4140661060810089, "learning_rate": 2.54191937210132e-05, "loss": 0.2656, "step": 2850 }, { "epoch": 0.05085078300574323, "grad_norm": 0.4588901400566101, "learning_rate": 2.542811273635391e-05, "loss": 0.3797, "step": 2851 }, { "epoch": 0.050868619127456924, "grad_norm": 0.5754001140594482, "learning_rate": 2.5437031751694614e-05, "loss": 0.2538, "step": 2852 }, { "epoch": 0.05088645524917062, "grad_norm": 0.37646663188934326, "learning_rate": 2.544595076703532e-05, "loss": 0.2321, "step": 2853 }, { "epoch": 0.050904291370884314, "grad_norm": 0.5341233015060425, "learning_rate": 2.545486978237603e-05, "loss": 0.2833, "step": 2854 }, { "epoch": 0.05092212749259801, "grad_norm": 0.44105154275894165, "learning_rate": 2.546378879771673e-05, "loss": 0.2872, "step": 2855 }, { "epoch": 0.050939963614311703, "grad_norm": 0.4411798119544983, "learning_rate": 2.547270781305744e-05, "loss": 0.3343, "step": 2856 }, { "epoch": 0.0509577997360254, "grad_norm": 0.4009215533733368, "learning_rate": 2.5481626828398146e-05, "loss": 0.2879, "step": 2857 }, { "epoch": 0.05097563585773909, "grad_norm": 0.43954575061798096, "learning_rate": 2.549054584373885e-05, "loss": 0.3138, "step": 2858 }, { "epoch": 0.05099347197945279, "grad_norm": 0.425375759601593, "learning_rate": 2.549946485907956e-05, "loss": 0.31, "step": 2859 }, { "epoch": 0.05101130810116648, "grad_norm": 0.3734534680843353, "learning_rate": 2.5508383874420265e-05, "loss": 0.2811, "step": 2860 }, { "epoch": 0.05102914422288018, "grad_norm": 0.449495792388916, "learning_rate": 2.5517302889760968e-05, "loss": 0.2842, "step": 2861 }, { "epoch": 0.05104698034459387, "grad_norm": 0.415831595659256, "learning_rate": 2.5526221905101678e-05, "loss": 0.3001, "step": 2862 }, { "epoch": 0.05106481646630757, "grad_norm": 0.522121787071228, "learning_rate": 2.5535140920442384e-05, "loss": 0.2817, "step": 2863 }, { "epoch": 0.05108265258802126, "grad_norm": 2.2089128494262695, "learning_rate": 2.5544059935783094e-05, "loss": 0.2395, "step": 2864 }, { "epoch": 0.05110048870973496, "grad_norm": 0.41909059882164, "learning_rate": 2.5552978951123797e-05, "loss": 0.3211, "step": 2865 }, { "epoch": 0.05111832483144865, "grad_norm": 0.377627968788147, "learning_rate": 2.5561897966464503e-05, "loss": 0.2508, "step": 2866 }, { "epoch": 0.05113616095316235, "grad_norm": 0.4478866755962372, "learning_rate": 2.5570816981805213e-05, "loss": 0.3244, "step": 2867 }, { "epoch": 0.051153997074876036, "grad_norm": 0.47881847620010376, "learning_rate": 2.5579735997145916e-05, "loss": 0.2786, "step": 2868 }, { "epoch": 0.05117183319658973, "grad_norm": 0.5202912092208862, "learning_rate": 2.5588655012486622e-05, "loss": 0.3574, "step": 2869 }, { "epoch": 0.051189669318303425, "grad_norm": 0.7261667847633362, "learning_rate": 2.5597574027827332e-05, "loss": 0.3479, "step": 2870 }, { "epoch": 0.05120750544001712, "grad_norm": 0.5155419111251831, "learning_rate": 2.5606493043168035e-05, "loss": 0.2844, "step": 2871 }, { "epoch": 0.051225341561730815, "grad_norm": 0.5000239610671997, "learning_rate": 2.561541205850874e-05, "loss": 0.2889, "step": 2872 }, { "epoch": 0.05124317768344451, "grad_norm": 0.5503202080726624, "learning_rate": 2.562433107384945e-05, "loss": 0.2718, "step": 2873 }, { "epoch": 0.051261013805158205, "grad_norm": 0.4166640043258667, "learning_rate": 2.5633250089190154e-05, "loss": 0.2782, "step": 2874 }, { "epoch": 0.0512788499268719, "grad_norm": 0.4286460280418396, "learning_rate": 2.564216910453086e-05, "loss": 0.333, "step": 2875 }, { "epoch": 0.051296686048585595, "grad_norm": 0.3176979124546051, "learning_rate": 2.565108811987157e-05, "loss": 0.2494, "step": 2876 }, { "epoch": 0.05131452217029929, "grad_norm": 0.3552425503730774, "learning_rate": 2.5660007135212273e-05, "loss": 0.2382, "step": 2877 }, { "epoch": 0.051332358292012985, "grad_norm": 0.5758682489395142, "learning_rate": 2.566892615055298e-05, "loss": 0.3464, "step": 2878 }, { "epoch": 0.05135019441372668, "grad_norm": 0.3782997131347656, "learning_rate": 2.567784516589369e-05, "loss": 0.2879, "step": 2879 }, { "epoch": 0.051368030535440375, "grad_norm": 0.5711967349052429, "learning_rate": 2.568676418123439e-05, "loss": 0.2249, "step": 2880 }, { "epoch": 0.05138586665715407, "grad_norm": 0.48385098576545715, "learning_rate": 2.56956831965751e-05, "loss": 0.3048, "step": 2881 }, { "epoch": 0.051403702778867764, "grad_norm": 0.4495086371898651, "learning_rate": 2.570460221191581e-05, "loss": 0.2982, "step": 2882 }, { "epoch": 0.05142153890058146, "grad_norm": 0.49659374356269836, "learning_rate": 2.5713521227256508e-05, "loss": 0.3092, "step": 2883 }, { "epoch": 0.051439375022295154, "grad_norm": 0.3788304626941681, "learning_rate": 2.5722440242597218e-05, "loss": 0.2807, "step": 2884 }, { "epoch": 0.05145721114400885, "grad_norm": 0.4766710102558136, "learning_rate": 2.5731359257937924e-05, "loss": 0.3716, "step": 2885 }, { "epoch": 0.051475047265722544, "grad_norm": 0.4333093464374542, "learning_rate": 2.5740278273278627e-05, "loss": 0.2942, "step": 2886 }, { "epoch": 0.05149288338743624, "grad_norm": 0.5158129930496216, "learning_rate": 2.5749197288619337e-05, "loss": 0.286, "step": 2887 }, { "epoch": 0.05151071950914993, "grad_norm": 0.9444575309753418, "learning_rate": 2.5758116303960044e-05, "loss": 0.3562, "step": 2888 }, { "epoch": 0.05152855563086362, "grad_norm": 0.592723548412323, "learning_rate": 2.5767035319300753e-05, "loss": 0.3498, "step": 2889 }, { "epoch": 0.05154639175257732, "grad_norm": 0.4227851629257202, "learning_rate": 2.5775954334641456e-05, "loss": 0.3013, "step": 2890 }, { "epoch": 0.05156422787429101, "grad_norm": 0.4196017384529114, "learning_rate": 2.5784873349982163e-05, "loss": 0.2958, "step": 2891 }, { "epoch": 0.05158206399600471, "grad_norm": 0.496559739112854, "learning_rate": 2.5793792365322872e-05, "loss": 0.3345, "step": 2892 }, { "epoch": 0.0515999001177184, "grad_norm": 0.5769611597061157, "learning_rate": 2.5802711380663575e-05, "loss": 0.3352, "step": 2893 }, { "epoch": 0.0516177362394321, "grad_norm": 0.49038267135620117, "learning_rate": 2.5811630396004282e-05, "loss": 0.2857, "step": 2894 }, { "epoch": 0.05163557236114579, "grad_norm": 0.3836861252784729, "learning_rate": 2.582054941134499e-05, "loss": 0.2732, "step": 2895 }, { "epoch": 0.051653408482859486, "grad_norm": 0.531437337398529, "learning_rate": 2.5829468426685695e-05, "loss": 0.3088, "step": 2896 }, { "epoch": 0.05167124460457318, "grad_norm": 0.927949845790863, "learning_rate": 2.58383874420264e-05, "loss": 0.367, "step": 2897 }, { "epoch": 0.051689080726286876, "grad_norm": 0.44664403796195984, "learning_rate": 2.584730645736711e-05, "loss": 0.3421, "step": 2898 }, { "epoch": 0.05170691684800057, "grad_norm": 0.5556526780128479, "learning_rate": 2.5856225472707814e-05, "loss": 0.2948, "step": 2899 }, { "epoch": 0.051724752969714266, "grad_norm": 0.49114635586738586, "learning_rate": 2.586514448804852e-05, "loss": 0.2775, "step": 2900 }, { "epoch": 0.05174258909142796, "grad_norm": 0.4353705048561096, "learning_rate": 2.587406350338923e-05, "loss": 0.2956, "step": 2901 }, { "epoch": 0.051760425213141656, "grad_norm": 0.6163909435272217, "learning_rate": 2.5882982518729933e-05, "loss": 0.4008, "step": 2902 }, { "epoch": 0.05177826133485535, "grad_norm": 0.483247846364975, "learning_rate": 2.589190153407064e-05, "loss": 0.2734, "step": 2903 }, { "epoch": 0.051796097456569046, "grad_norm": 0.49507302045822144, "learning_rate": 2.590082054941135e-05, "loss": 0.3181, "step": 2904 }, { "epoch": 0.05181393357828274, "grad_norm": 0.4445972144603729, "learning_rate": 2.590973956475205e-05, "loss": 0.3219, "step": 2905 }, { "epoch": 0.051831769699996436, "grad_norm": 0.4449050724506378, "learning_rate": 2.591865858009276e-05, "loss": 0.283, "step": 2906 }, { "epoch": 0.05184960582171013, "grad_norm": 0.390539288520813, "learning_rate": 2.5927577595433468e-05, "loss": 0.2845, "step": 2907 }, { "epoch": 0.05186744194342382, "grad_norm": 0.43584051728248596, "learning_rate": 2.5936496610774168e-05, "loss": 0.3225, "step": 2908 }, { "epoch": 0.05188527806513751, "grad_norm": 0.35692664980888367, "learning_rate": 2.5945415626114878e-05, "loss": 0.3023, "step": 2909 }, { "epoch": 0.05190311418685121, "grad_norm": 0.3913831114768982, "learning_rate": 2.5954334641455584e-05, "loss": 0.21, "step": 2910 }, { "epoch": 0.0519209503085649, "grad_norm": 0.5844758152961731, "learning_rate": 2.5963253656796294e-05, "loss": 0.2907, "step": 2911 }, { "epoch": 0.0519387864302786, "grad_norm": 0.46606016159057617, "learning_rate": 2.5972172672136997e-05, "loss": 0.2727, "step": 2912 }, { "epoch": 0.05195662255199229, "grad_norm": 0.4373425543308258, "learning_rate": 2.5981091687477703e-05, "loss": 0.3108, "step": 2913 }, { "epoch": 0.05197445867370599, "grad_norm": 0.3102944493293762, "learning_rate": 2.5990010702818413e-05, "loss": 0.2366, "step": 2914 }, { "epoch": 0.05199229479541968, "grad_norm": 0.5374158024787903, "learning_rate": 2.5998929718159116e-05, "loss": 0.3073, "step": 2915 }, { "epoch": 0.05201013091713338, "grad_norm": 0.4365256428718567, "learning_rate": 2.6007848733499822e-05, "loss": 0.2712, "step": 2916 }, { "epoch": 0.05202796703884707, "grad_norm": 0.5548803806304932, "learning_rate": 2.6016767748840532e-05, "loss": 0.375, "step": 2917 }, { "epoch": 0.05204580316056077, "grad_norm": 0.5051923990249634, "learning_rate": 2.6025686764181235e-05, "loss": 0.3929, "step": 2918 }, { "epoch": 0.05206363928227446, "grad_norm": 0.45372942090034485, "learning_rate": 2.603460577952194e-05, "loss": 0.28, "step": 2919 }, { "epoch": 0.05208147540398816, "grad_norm": 0.5156953930854797, "learning_rate": 2.604352479486265e-05, "loss": 0.2965, "step": 2920 }, { "epoch": 0.05209931152570185, "grad_norm": 1.1953109502792358, "learning_rate": 2.6052443810203354e-05, "loss": 0.3607, "step": 2921 }, { "epoch": 0.05211714764741555, "grad_norm": 0.3817157745361328, "learning_rate": 2.606136282554406e-05, "loss": 0.288, "step": 2922 }, { "epoch": 0.05213498376912924, "grad_norm": 0.46914711594581604, "learning_rate": 2.607028184088477e-05, "loss": 0.2943, "step": 2923 }, { "epoch": 0.05215281989084294, "grad_norm": 0.485501766204834, "learning_rate": 2.6079200856225473e-05, "loss": 0.3004, "step": 2924 }, { "epoch": 0.05217065601255663, "grad_norm": 0.44770991802215576, "learning_rate": 2.608811987156618e-05, "loss": 0.2327, "step": 2925 }, { "epoch": 0.05218849213427033, "grad_norm": 0.3211217522621155, "learning_rate": 2.609703888690689e-05, "loss": 0.2568, "step": 2926 }, { "epoch": 0.05220632825598402, "grad_norm": 0.39864063262939453, "learning_rate": 2.6105957902247592e-05, "loss": 0.2804, "step": 2927 }, { "epoch": 0.05222416437769772, "grad_norm": 0.49669742584228516, "learning_rate": 2.61148769175883e-05, "loss": 0.3116, "step": 2928 }, { "epoch": 0.052242000499411405, "grad_norm": 0.5273682475090027, "learning_rate": 2.612379593292901e-05, "loss": 0.291, "step": 2929 }, { "epoch": 0.0522598366211251, "grad_norm": 0.39144521951675415, "learning_rate": 2.6132714948269708e-05, "loss": 0.301, "step": 2930 }, { "epoch": 0.052277672742838795, "grad_norm": 0.402148962020874, "learning_rate": 2.6141633963610418e-05, "loss": 0.2788, "step": 2931 }, { "epoch": 0.05229550886455249, "grad_norm": 0.38230326771736145, "learning_rate": 2.6150552978951128e-05, "loss": 0.3084, "step": 2932 }, { "epoch": 0.052313344986266185, "grad_norm": 0.4424063563346863, "learning_rate": 2.6159471994291827e-05, "loss": 0.2951, "step": 2933 }, { "epoch": 0.05233118110797988, "grad_norm": 0.4585544168949127, "learning_rate": 2.6168391009632537e-05, "loss": 0.3357, "step": 2934 }, { "epoch": 0.052349017229693574, "grad_norm": 0.4543989598751068, "learning_rate": 2.6177310024973243e-05, "loss": 0.2956, "step": 2935 }, { "epoch": 0.05236685335140727, "grad_norm": 0.39039894938468933, "learning_rate": 2.6186229040313953e-05, "loss": 0.2764, "step": 2936 }, { "epoch": 0.052384689473120964, "grad_norm": 0.3535137474536896, "learning_rate": 2.6195148055654656e-05, "loss": 0.2503, "step": 2937 }, { "epoch": 0.05240252559483466, "grad_norm": 0.5466296076774597, "learning_rate": 2.6204067070995363e-05, "loss": 0.3026, "step": 2938 }, { "epoch": 0.052420361716548354, "grad_norm": 0.47230756282806396, "learning_rate": 2.6212986086336072e-05, "loss": 0.3632, "step": 2939 }, { "epoch": 0.05243819783826205, "grad_norm": 0.3603994846343994, "learning_rate": 2.6221905101676775e-05, "loss": 0.289, "step": 2940 }, { "epoch": 0.052456033959975744, "grad_norm": 0.5505605340003967, "learning_rate": 2.623082411701748e-05, "loss": 0.3249, "step": 2941 }, { "epoch": 0.05247387008168944, "grad_norm": 0.4415774345397949, "learning_rate": 2.623974313235819e-05, "loss": 0.3526, "step": 2942 }, { "epoch": 0.052491706203403134, "grad_norm": 0.4135698676109314, "learning_rate": 2.6248662147698894e-05, "loss": 0.253, "step": 2943 }, { "epoch": 0.05250954232511683, "grad_norm": 0.5751461982727051, "learning_rate": 2.62575811630396e-05, "loss": 0.3269, "step": 2944 }, { "epoch": 0.052527378446830524, "grad_norm": 0.4263665974140167, "learning_rate": 2.626650017838031e-05, "loss": 0.3046, "step": 2945 }, { "epoch": 0.05254521456854422, "grad_norm": 0.5448289513587952, "learning_rate": 2.6275419193721014e-05, "loss": 0.3038, "step": 2946 }, { "epoch": 0.05256305069025791, "grad_norm": 0.40797874331474304, "learning_rate": 2.628433820906172e-05, "loss": 0.2939, "step": 2947 }, { "epoch": 0.05258088681197161, "grad_norm": 0.5384146571159363, "learning_rate": 2.629325722440243e-05, "loss": 0.2963, "step": 2948 }, { "epoch": 0.052598722933685296, "grad_norm": 0.37856200337409973, "learning_rate": 2.6302176239743133e-05, "loss": 0.254, "step": 2949 }, { "epoch": 0.05261655905539899, "grad_norm": 0.43756726384162903, "learning_rate": 2.631109525508384e-05, "loss": 0.3324, "step": 2950 }, { "epoch": 0.052634395177112686, "grad_norm": 1.7020657062530518, "learning_rate": 2.632001427042455e-05, "loss": 0.387, "step": 2951 }, { "epoch": 0.05265223129882638, "grad_norm": 0.4374551475048065, "learning_rate": 2.6328933285765252e-05, "loss": 0.3146, "step": 2952 }, { "epoch": 0.052670067420540076, "grad_norm": 0.3677287995815277, "learning_rate": 2.6337852301105958e-05, "loss": 0.2629, "step": 2953 }, { "epoch": 0.05268790354225377, "grad_norm": 0.4279806315898895, "learning_rate": 2.6346771316446668e-05, "loss": 0.2785, "step": 2954 }, { "epoch": 0.052705739663967466, "grad_norm": 0.4172787368297577, "learning_rate": 2.635569033178737e-05, "loss": 0.3172, "step": 2955 }, { "epoch": 0.05272357578568116, "grad_norm": 0.7966938018798828, "learning_rate": 2.6364609347128077e-05, "loss": 0.2457, "step": 2956 }, { "epoch": 0.052741411907394856, "grad_norm": 0.5118768215179443, "learning_rate": 2.6373528362468787e-05, "loss": 0.2471, "step": 2957 }, { "epoch": 0.05275924802910855, "grad_norm": 0.41275715827941895, "learning_rate": 2.6382447377809494e-05, "loss": 0.2778, "step": 2958 }, { "epoch": 0.052777084150822245, "grad_norm": 0.5184250473976135, "learning_rate": 2.6391366393150197e-05, "loss": 0.361, "step": 2959 }, { "epoch": 0.05279492027253594, "grad_norm": 0.601280689239502, "learning_rate": 2.6400285408490903e-05, "loss": 0.2862, "step": 2960 }, { "epoch": 0.052812756394249635, "grad_norm": 0.4165460467338562, "learning_rate": 2.6409204423831613e-05, "loss": 0.2429, "step": 2961 }, { "epoch": 0.05283059251596333, "grad_norm": 0.48465052247047424, "learning_rate": 2.6418123439172316e-05, "loss": 0.2706, "step": 2962 }, { "epoch": 0.052848428637677025, "grad_norm": 0.33272939920425415, "learning_rate": 2.6427042454513022e-05, "loss": 0.2427, "step": 2963 }, { "epoch": 0.05286626475939072, "grad_norm": 0.4371368885040283, "learning_rate": 2.6435961469853732e-05, "loss": 0.2818, "step": 2964 }, { "epoch": 0.052884100881104415, "grad_norm": 0.3477995693683624, "learning_rate": 2.6444880485194435e-05, "loss": 0.273, "step": 2965 }, { "epoch": 0.05290193700281811, "grad_norm": 0.41649848222732544, "learning_rate": 2.645379950053514e-05, "loss": 0.2682, "step": 2966 }, { "epoch": 0.052919773124531805, "grad_norm": 0.45629921555519104, "learning_rate": 2.646271851587585e-05, "loss": 0.2497, "step": 2967 }, { "epoch": 0.0529376092462455, "grad_norm": 0.3903038501739502, "learning_rate": 2.6471637531216554e-05, "loss": 0.3093, "step": 2968 }, { "epoch": 0.05295544536795919, "grad_norm": 0.3767849802970886, "learning_rate": 2.648055654655726e-05, "loss": 0.2813, "step": 2969 }, { "epoch": 0.05297328148967288, "grad_norm": 0.6601611971855164, "learning_rate": 2.648947556189797e-05, "loss": 0.2368, "step": 2970 }, { "epoch": 0.05299111761138658, "grad_norm": 0.399946391582489, "learning_rate": 2.6498394577238673e-05, "loss": 0.2905, "step": 2971 }, { "epoch": 0.05300895373310027, "grad_norm": 0.37308692932128906, "learning_rate": 2.650731359257938e-05, "loss": 0.2735, "step": 2972 }, { "epoch": 0.05302678985481397, "grad_norm": 0.4638592302799225, "learning_rate": 2.651623260792009e-05, "loss": 0.3048, "step": 2973 }, { "epoch": 0.05304462597652766, "grad_norm": 0.4922824203968048, "learning_rate": 2.6525151623260792e-05, "loss": 0.3118, "step": 2974 }, { "epoch": 0.05306246209824136, "grad_norm": 0.46285897493362427, "learning_rate": 2.65340706386015e-05, "loss": 0.3461, "step": 2975 }, { "epoch": 0.05308029821995505, "grad_norm": 0.6495586037635803, "learning_rate": 2.654298965394221e-05, "loss": 0.2968, "step": 2976 }, { "epoch": 0.05309813434166875, "grad_norm": 0.335318922996521, "learning_rate": 2.655190866928291e-05, "loss": 0.2448, "step": 2977 }, { "epoch": 0.05311597046338244, "grad_norm": 0.45703160762786865, "learning_rate": 2.6560827684623618e-05, "loss": 0.2687, "step": 2978 }, { "epoch": 0.05313380658509614, "grad_norm": 0.411228746175766, "learning_rate": 2.6569746699964328e-05, "loss": 0.2964, "step": 2979 }, { "epoch": 0.05315164270680983, "grad_norm": 0.32256802916526794, "learning_rate": 2.657866571530503e-05, "loss": 0.2477, "step": 2980 }, { "epoch": 0.05316947882852353, "grad_norm": 0.3313572108745575, "learning_rate": 2.6587584730645737e-05, "loss": 0.2502, "step": 2981 }, { "epoch": 0.05318731495023722, "grad_norm": 0.3550911843776703, "learning_rate": 2.6596503745986447e-05, "loss": 0.2223, "step": 2982 }, { "epoch": 0.05320515107195092, "grad_norm": 0.41639426350593567, "learning_rate": 2.6605422761327153e-05, "loss": 0.2759, "step": 2983 }, { "epoch": 0.05322298719366461, "grad_norm": 0.4413129687309265, "learning_rate": 2.6614341776667856e-05, "loss": 0.3204, "step": 2984 }, { "epoch": 0.053240823315378306, "grad_norm": 0.3897430896759033, "learning_rate": 2.6623260792008566e-05, "loss": 0.2577, "step": 2985 }, { "epoch": 0.053258659437092, "grad_norm": 0.4310801923274994, "learning_rate": 2.6632179807349272e-05, "loss": 0.2786, "step": 2986 }, { "epoch": 0.053276495558805696, "grad_norm": 0.41899827122688293, "learning_rate": 2.6641098822689975e-05, "loss": 0.2959, "step": 2987 }, { "epoch": 0.05329433168051939, "grad_norm": 0.42233455181121826, "learning_rate": 2.665001783803068e-05, "loss": 0.2553, "step": 2988 }, { "epoch": 0.05331216780223308, "grad_norm": 0.4711173474788666, "learning_rate": 2.665893685337139e-05, "loss": 0.2947, "step": 2989 }, { "epoch": 0.053330003923946774, "grad_norm": 0.48387402296066284, "learning_rate": 2.6667855868712094e-05, "loss": 0.3444, "step": 2990 }, { "epoch": 0.05334784004566047, "grad_norm": 0.35652148723602295, "learning_rate": 2.66767748840528e-05, "loss": 0.2864, "step": 2991 }, { "epoch": 0.053365676167374164, "grad_norm": 0.3819200396537781, "learning_rate": 2.668569389939351e-05, "loss": 0.2841, "step": 2992 }, { "epoch": 0.05338351228908786, "grad_norm": 0.32867348194122314, "learning_rate": 2.6694612914734213e-05, "loss": 0.2999, "step": 2993 }, { "epoch": 0.053401348410801554, "grad_norm": 0.3875643014907837, "learning_rate": 2.670353193007492e-05, "loss": 0.2543, "step": 2994 }, { "epoch": 0.05341918453251525, "grad_norm": 0.5814125537872314, "learning_rate": 2.671245094541563e-05, "loss": 0.2709, "step": 2995 }, { "epoch": 0.053437020654228944, "grad_norm": 0.3573005199432373, "learning_rate": 2.6721369960756333e-05, "loss": 0.2316, "step": 2996 }, { "epoch": 0.05345485677594264, "grad_norm": 0.5520204901695251, "learning_rate": 2.673028897609704e-05, "loss": 0.3047, "step": 2997 }, { "epoch": 0.05347269289765633, "grad_norm": 0.41722971200942993, "learning_rate": 2.673920799143775e-05, "loss": 0.2709, "step": 2998 }, { "epoch": 0.05349052901937003, "grad_norm": 0.5371643304824829, "learning_rate": 2.6748127006778452e-05, "loss": 0.255, "step": 2999 }, { "epoch": 0.05350836514108372, "grad_norm": 0.4850537180900574, "learning_rate": 2.6757046022119158e-05, "loss": 0.3051, "step": 3000 }, { "epoch": 0.05350836514108372, "eval_loss": 0.28895989060401917, "eval_runtime": 1569.3044, "eval_samples_per_second": 0.653, "eval_steps_per_second": 0.109, "step": 3000 }, { "epoch": 0.05352620126279742, "grad_norm": 0.5266303420066833, "learning_rate": 2.6765965037459868e-05, "loss": 0.2834, "step": 3001 }, { "epoch": 0.05354403738451111, "grad_norm": 0.43296658992767334, "learning_rate": 2.677488405280057e-05, "loss": 0.2907, "step": 3002 }, { "epoch": 0.05356187350622481, "grad_norm": 0.4872777760028839, "learning_rate": 2.6783803068141277e-05, "loss": 0.3463, "step": 3003 }, { "epoch": 0.0535797096279385, "grad_norm": 0.6589179039001465, "learning_rate": 2.6792722083481987e-05, "loss": 0.2644, "step": 3004 }, { "epoch": 0.0535975457496522, "grad_norm": 0.4832260012626648, "learning_rate": 2.6801641098822693e-05, "loss": 0.2683, "step": 3005 }, { "epoch": 0.05361538187136589, "grad_norm": 0.33053654432296753, "learning_rate": 2.6810560114163396e-05, "loss": 0.2564, "step": 3006 }, { "epoch": 0.05363321799307959, "grad_norm": 0.38049063086509705, "learning_rate": 2.6819479129504106e-05, "loss": 0.3414, "step": 3007 }, { "epoch": 0.05365105411479328, "grad_norm": 0.41180679202079773, "learning_rate": 2.6828398144844813e-05, "loss": 0.2937, "step": 3008 }, { "epoch": 0.05366889023650697, "grad_norm": 0.3803144693374634, "learning_rate": 2.6837317160185516e-05, "loss": 0.3214, "step": 3009 }, { "epoch": 0.053686726358220666, "grad_norm": 0.36073756217956543, "learning_rate": 2.6846236175526225e-05, "loss": 0.2575, "step": 3010 }, { "epoch": 0.05370456247993436, "grad_norm": 0.3315037190914154, "learning_rate": 2.6855155190866932e-05, "loss": 0.2746, "step": 3011 }, { "epoch": 0.053722398601648055, "grad_norm": 0.39566829800605774, "learning_rate": 2.6864074206207635e-05, "loss": 0.2887, "step": 3012 }, { "epoch": 0.05374023472336175, "grad_norm": 0.3574841320514679, "learning_rate": 2.687299322154834e-05, "loss": 0.301, "step": 3013 }, { "epoch": 0.053758070845075445, "grad_norm": 0.3679479956626892, "learning_rate": 2.688191223688905e-05, "loss": 0.2782, "step": 3014 }, { "epoch": 0.05377590696678914, "grad_norm": 0.516028106212616, "learning_rate": 2.6890831252229754e-05, "loss": 0.3331, "step": 3015 }, { "epoch": 0.053793743088502835, "grad_norm": 0.4886375963687897, "learning_rate": 2.689975026757046e-05, "loss": 0.3141, "step": 3016 }, { "epoch": 0.05381157921021653, "grad_norm": 0.4060991406440735, "learning_rate": 2.690866928291117e-05, "loss": 0.303, "step": 3017 }, { "epoch": 0.053829415331930225, "grad_norm": 0.3348287343978882, "learning_rate": 2.6917588298251873e-05, "loss": 0.2586, "step": 3018 }, { "epoch": 0.05384725145364392, "grad_norm": 0.44578617811203003, "learning_rate": 2.692650731359258e-05, "loss": 0.2988, "step": 3019 }, { "epoch": 0.053865087575357615, "grad_norm": 0.33656173944473267, "learning_rate": 2.693542632893329e-05, "loss": 0.261, "step": 3020 }, { "epoch": 0.05388292369707131, "grad_norm": 0.39461520314216614, "learning_rate": 2.6944345344273992e-05, "loss": 0.275, "step": 3021 }, { "epoch": 0.053900759818785005, "grad_norm": 0.557515561580658, "learning_rate": 2.69532643596147e-05, "loss": 0.3078, "step": 3022 }, { "epoch": 0.0539185959404987, "grad_norm": 0.407611608505249, "learning_rate": 2.6962183374955408e-05, "loss": 0.2137, "step": 3023 }, { "epoch": 0.053936432062212394, "grad_norm": 0.4608760178089142, "learning_rate": 2.697110239029611e-05, "loss": 0.3493, "step": 3024 }, { "epoch": 0.05395426818392609, "grad_norm": 0.3761187493801117, "learning_rate": 2.6980021405636818e-05, "loss": 0.24, "step": 3025 }, { "epoch": 0.053972104305639784, "grad_norm": 0.6394914984703064, "learning_rate": 2.6988940420977527e-05, "loss": 0.3024, "step": 3026 }, { "epoch": 0.05398994042735348, "grad_norm": 0.7831883430480957, "learning_rate": 2.699785943631823e-05, "loss": 0.3252, "step": 3027 }, { "epoch": 0.054007776549067174, "grad_norm": 0.43982136249542236, "learning_rate": 2.7006778451658937e-05, "loss": 0.3415, "step": 3028 }, { "epoch": 0.05402561267078086, "grad_norm": 0.5146108865737915, "learning_rate": 2.7015697466999647e-05, "loss": 0.3062, "step": 3029 }, { "epoch": 0.05404344879249456, "grad_norm": 0.3952732980251312, "learning_rate": 2.7024616482340353e-05, "loss": 0.2898, "step": 3030 }, { "epoch": 0.05406128491420825, "grad_norm": 0.3943594992160797, "learning_rate": 2.7033535497681056e-05, "loss": 0.2686, "step": 3031 }, { "epoch": 0.05407912103592195, "grad_norm": 0.5055153965950012, "learning_rate": 2.7042454513021766e-05, "loss": 0.3217, "step": 3032 }, { "epoch": 0.05409695715763564, "grad_norm": 0.48227664828300476, "learning_rate": 2.7051373528362472e-05, "loss": 0.2487, "step": 3033 }, { "epoch": 0.05411479327934934, "grad_norm": 0.3622559607028961, "learning_rate": 2.7060292543703175e-05, "loss": 0.2929, "step": 3034 }, { "epoch": 0.05413262940106303, "grad_norm": 0.41722092032432556, "learning_rate": 2.7069211559043885e-05, "loss": 0.2558, "step": 3035 }, { "epoch": 0.054150465522776726, "grad_norm": 0.4224678874015808, "learning_rate": 2.707813057438459e-05, "loss": 0.3332, "step": 3036 }, { "epoch": 0.05416830164449042, "grad_norm": 0.536918580532074, "learning_rate": 2.7087049589725294e-05, "loss": 0.2774, "step": 3037 }, { "epoch": 0.054186137766204116, "grad_norm": 0.45742127299308777, "learning_rate": 2.7095968605066e-05, "loss": 0.286, "step": 3038 }, { "epoch": 0.05420397388791781, "grad_norm": 0.43477529287338257, "learning_rate": 2.710488762040671e-05, "loss": 0.3558, "step": 3039 }, { "epoch": 0.054221810009631506, "grad_norm": 0.4702891707420349, "learning_rate": 2.7113806635747413e-05, "loss": 0.2599, "step": 3040 }, { "epoch": 0.0542396461313452, "grad_norm": 0.6312264800071716, "learning_rate": 2.712272565108812e-05, "loss": 0.3242, "step": 3041 }, { "epoch": 0.054257482253058896, "grad_norm": 0.42315179109573364, "learning_rate": 2.713164466642883e-05, "loss": 0.3345, "step": 3042 }, { "epoch": 0.05427531837477259, "grad_norm": 0.3658837676048279, "learning_rate": 2.7140563681769532e-05, "loss": 0.2793, "step": 3043 }, { "epoch": 0.054293154496486286, "grad_norm": 0.4221414625644684, "learning_rate": 2.714948269711024e-05, "loss": 0.3228, "step": 3044 }, { "epoch": 0.05431099061819998, "grad_norm": 0.44399699568748474, "learning_rate": 2.715840171245095e-05, "loss": 0.2625, "step": 3045 }, { "epoch": 0.054328826739913676, "grad_norm": 0.6177511215209961, "learning_rate": 2.716732072779165e-05, "loss": 0.2173, "step": 3046 }, { "epoch": 0.05434666286162737, "grad_norm": 0.5023239254951477, "learning_rate": 2.7176239743132358e-05, "loss": 0.2987, "step": 3047 }, { "epoch": 0.054364498983341066, "grad_norm": 0.3383076786994934, "learning_rate": 2.7185158758473068e-05, "loss": 0.231, "step": 3048 }, { "epoch": 0.054382335105054753, "grad_norm": 0.5170038938522339, "learning_rate": 2.719407777381377e-05, "loss": 0.2843, "step": 3049 }, { "epoch": 0.05440017122676845, "grad_norm": 0.5562129020690918, "learning_rate": 2.7202996789154477e-05, "loss": 0.3023, "step": 3050 }, { "epoch": 0.05441800734848214, "grad_norm": 0.38035914301872253, "learning_rate": 2.7211915804495187e-05, "loss": 0.2858, "step": 3051 }, { "epoch": 0.05443584347019584, "grad_norm": 0.4255860149860382, "learning_rate": 2.722083481983589e-05, "loss": 0.2881, "step": 3052 }, { "epoch": 0.05445367959190953, "grad_norm": 0.2930384576320648, "learning_rate": 2.7229753835176596e-05, "loss": 0.2275, "step": 3053 }, { "epoch": 0.05447151571362323, "grad_norm": 0.42791998386383057, "learning_rate": 2.7238672850517306e-05, "loss": 0.3013, "step": 3054 }, { "epoch": 0.05448935183533692, "grad_norm": 0.4384118616580963, "learning_rate": 2.7247591865858012e-05, "loss": 0.3061, "step": 3055 }, { "epoch": 0.05450718795705062, "grad_norm": 0.584568202495575, "learning_rate": 2.7256510881198715e-05, "loss": 0.335, "step": 3056 }, { "epoch": 0.05452502407876431, "grad_norm": 0.4241713881492615, "learning_rate": 2.7265429896539425e-05, "loss": 0.2707, "step": 3057 }, { "epoch": 0.05454286020047801, "grad_norm": 0.5592104196548462, "learning_rate": 2.727434891188013e-05, "loss": 0.3268, "step": 3058 }, { "epoch": 0.0545606963221917, "grad_norm": 0.38657495379447937, "learning_rate": 2.7283267927220835e-05, "loss": 0.2936, "step": 3059 }, { "epoch": 0.0545785324439054, "grad_norm": 0.5117170214653015, "learning_rate": 2.7292186942561544e-05, "loss": 0.3072, "step": 3060 }, { "epoch": 0.05459636856561909, "grad_norm": 0.616719126701355, "learning_rate": 2.730110595790225e-05, "loss": 0.3407, "step": 3061 }, { "epoch": 0.05461420468733279, "grad_norm": 0.5758919715881348, "learning_rate": 2.7310024973242954e-05, "loss": 0.2504, "step": 3062 }, { "epoch": 0.05463204080904648, "grad_norm": 0.45872604846954346, "learning_rate": 2.731894398858366e-05, "loss": 0.2728, "step": 3063 }, { "epoch": 0.05464987693076018, "grad_norm": 0.5148894190788269, "learning_rate": 2.732786300392437e-05, "loss": 0.276, "step": 3064 }, { "epoch": 0.05466771305247387, "grad_norm": 0.3636108934879303, "learning_rate": 2.7336782019265073e-05, "loss": 0.2539, "step": 3065 }, { "epoch": 0.05468554917418757, "grad_norm": 0.4732643961906433, "learning_rate": 2.734570103460578e-05, "loss": 0.2315, "step": 3066 }, { "epoch": 0.05470338529590126, "grad_norm": 0.4992886781692505, "learning_rate": 2.735462004994649e-05, "loss": 0.3466, "step": 3067 }, { "epoch": 0.05472122141761496, "grad_norm": 0.5815374255180359, "learning_rate": 2.7363539065287192e-05, "loss": 0.2843, "step": 3068 }, { "epoch": 0.054739057539328645, "grad_norm": 0.3680073320865631, "learning_rate": 2.73724580806279e-05, "loss": 0.2919, "step": 3069 }, { "epoch": 0.05475689366104234, "grad_norm": 0.537388026714325, "learning_rate": 2.7381377095968608e-05, "loss": 0.3381, "step": 3070 }, { "epoch": 0.054774729782756035, "grad_norm": 0.5731326341629028, "learning_rate": 2.739029611130931e-05, "loss": 0.3667, "step": 3071 }, { "epoch": 0.05479256590446973, "grad_norm": 0.4754807949066162, "learning_rate": 2.7399215126650018e-05, "loss": 0.2875, "step": 3072 }, { "epoch": 0.054810402026183425, "grad_norm": 0.4064819812774658, "learning_rate": 2.7408134141990727e-05, "loss": 0.3255, "step": 3073 }, { "epoch": 0.05482823814789712, "grad_norm": 0.43807828426361084, "learning_rate": 2.741705315733143e-05, "loss": 0.2859, "step": 3074 }, { "epoch": 0.054846074269610814, "grad_norm": 0.3859362006187439, "learning_rate": 2.7425972172672137e-05, "loss": 0.301, "step": 3075 }, { "epoch": 0.05486391039132451, "grad_norm": 0.4559905529022217, "learning_rate": 2.7434891188012846e-05, "loss": 0.2836, "step": 3076 }, { "epoch": 0.054881746513038204, "grad_norm": 0.36179181933403015, "learning_rate": 2.7443810203353553e-05, "loss": 0.2946, "step": 3077 }, { "epoch": 0.0548995826347519, "grad_norm": 0.4795432686805725, "learning_rate": 2.7452729218694256e-05, "loss": 0.3012, "step": 3078 }, { "epoch": 0.054917418756465594, "grad_norm": 0.36690446734428406, "learning_rate": 2.7461648234034966e-05, "loss": 0.2312, "step": 3079 }, { "epoch": 0.05493525487817929, "grad_norm": 0.4657461941242218, "learning_rate": 2.7470567249375672e-05, "loss": 0.2878, "step": 3080 }, { "epoch": 0.054953090999892984, "grad_norm": 0.3776685893535614, "learning_rate": 2.7479486264716375e-05, "loss": 0.317, "step": 3081 }, { "epoch": 0.05497092712160668, "grad_norm": 0.5808919072151184, "learning_rate": 2.7488405280057085e-05, "loss": 0.3307, "step": 3082 }, { "epoch": 0.054988763243320374, "grad_norm": 0.35996294021606445, "learning_rate": 2.749732429539779e-05, "loss": 0.2642, "step": 3083 }, { "epoch": 0.05500659936503407, "grad_norm": 0.45299363136291504, "learning_rate": 2.7506243310738494e-05, "loss": 0.2558, "step": 3084 }, { "epoch": 0.055024435486747764, "grad_norm": 0.35902100801467896, "learning_rate": 2.7515162326079204e-05, "loss": 0.2665, "step": 3085 }, { "epoch": 0.05504227160846146, "grad_norm": 0.42359569668769836, "learning_rate": 2.752408134141991e-05, "loss": 0.2782, "step": 3086 }, { "epoch": 0.055060107730175153, "grad_norm": 0.5339044332504272, "learning_rate": 2.7533000356760613e-05, "loss": 0.3598, "step": 3087 }, { "epoch": 0.05507794385188885, "grad_norm": 0.3382940888404846, "learning_rate": 2.7541919372101323e-05, "loss": 0.2719, "step": 3088 }, { "epoch": 0.05509577997360254, "grad_norm": 0.5310173630714417, "learning_rate": 2.755083838744203e-05, "loss": 0.3674, "step": 3089 }, { "epoch": 0.05511361609531623, "grad_norm": 0.6789987683296204, "learning_rate": 2.7559757402782732e-05, "loss": 0.3267, "step": 3090 }, { "epoch": 0.055131452217029926, "grad_norm": 0.42640647292137146, "learning_rate": 2.756867641812344e-05, "loss": 0.2734, "step": 3091 }, { "epoch": 0.05514928833874362, "grad_norm": 0.47140589356422424, "learning_rate": 2.757759543346415e-05, "loss": 0.3008, "step": 3092 }, { "epoch": 0.055167124460457316, "grad_norm": 0.42404425144195557, "learning_rate": 2.758651444880485e-05, "loss": 0.3032, "step": 3093 }, { "epoch": 0.05518496058217101, "grad_norm": 0.7277231812477112, "learning_rate": 2.7595433464145558e-05, "loss": 0.3785, "step": 3094 }, { "epoch": 0.055202796703884706, "grad_norm": 0.4522213339805603, "learning_rate": 2.7604352479486268e-05, "loss": 0.2837, "step": 3095 }, { "epoch": 0.0552206328255984, "grad_norm": 0.367829829454422, "learning_rate": 2.761327149482697e-05, "loss": 0.2646, "step": 3096 }, { "epoch": 0.055238468947312096, "grad_norm": 0.384769469499588, "learning_rate": 2.7622190510167677e-05, "loss": 0.268, "step": 3097 }, { "epoch": 0.05525630506902579, "grad_norm": 0.4247475564479828, "learning_rate": 2.7631109525508387e-05, "loss": 0.3188, "step": 3098 }, { "epoch": 0.055274141190739486, "grad_norm": 0.5494495630264282, "learning_rate": 2.764002854084909e-05, "loss": 0.3724, "step": 3099 }, { "epoch": 0.05529197731245318, "grad_norm": 0.5495931506156921, "learning_rate": 2.7648947556189796e-05, "loss": 0.3231, "step": 3100 }, { "epoch": 0.055309813434166875, "grad_norm": 0.5376266241073608, "learning_rate": 2.7657866571530506e-05, "loss": 0.2964, "step": 3101 }, { "epoch": 0.05532764955588057, "grad_norm": 0.49344953894615173, "learning_rate": 2.7666785586871212e-05, "loss": 0.3465, "step": 3102 }, { "epoch": 0.055345485677594265, "grad_norm": 0.3952866196632385, "learning_rate": 2.7675704602211915e-05, "loss": 0.3028, "step": 3103 }, { "epoch": 0.05536332179930796, "grad_norm": 0.39372286200523376, "learning_rate": 2.7684623617552625e-05, "loss": 0.2968, "step": 3104 }, { "epoch": 0.055381157921021655, "grad_norm": 0.53909832239151, "learning_rate": 2.769354263289333e-05, "loss": 0.288, "step": 3105 }, { "epoch": 0.05539899404273535, "grad_norm": 0.6521942615509033, "learning_rate": 2.7702461648234034e-05, "loss": 0.3753, "step": 3106 }, { "epoch": 0.055416830164449045, "grad_norm": 0.3129529654979706, "learning_rate": 2.7711380663574744e-05, "loss": 0.2636, "step": 3107 }, { "epoch": 0.05543466628616274, "grad_norm": 0.3928622007369995, "learning_rate": 2.772029967891545e-05, "loss": 0.2633, "step": 3108 }, { "epoch": 0.055452502407876435, "grad_norm": 0.4225257635116577, "learning_rate": 2.7729218694256154e-05, "loss": 0.2967, "step": 3109 }, { "epoch": 0.05547033852959012, "grad_norm": 0.43595537543296814, "learning_rate": 2.7738137709596863e-05, "loss": 0.2984, "step": 3110 }, { "epoch": 0.05548817465130382, "grad_norm": 0.43720415234565735, "learning_rate": 2.774705672493757e-05, "loss": 0.3059, "step": 3111 }, { "epoch": 0.05550601077301751, "grad_norm": 0.6234900951385498, "learning_rate": 2.7755975740278273e-05, "loss": 0.2663, "step": 3112 }, { "epoch": 0.05552384689473121, "grad_norm": 0.4558577239513397, "learning_rate": 2.7764894755618983e-05, "loss": 0.3029, "step": 3113 }, { "epoch": 0.0555416830164449, "grad_norm": 0.4778018593788147, "learning_rate": 2.777381377095969e-05, "loss": 0.3317, "step": 3114 }, { "epoch": 0.0555595191381586, "grad_norm": 0.43754979968070984, "learning_rate": 2.7782732786300392e-05, "loss": 0.2861, "step": 3115 }, { "epoch": 0.05557735525987229, "grad_norm": 0.441175639629364, "learning_rate": 2.7791651801641098e-05, "loss": 0.2817, "step": 3116 }, { "epoch": 0.05559519138158599, "grad_norm": 0.40181174874305725, "learning_rate": 2.7800570816981808e-05, "loss": 0.2885, "step": 3117 }, { "epoch": 0.05561302750329968, "grad_norm": 0.38011544942855835, "learning_rate": 2.780948983232251e-05, "loss": 0.2688, "step": 3118 }, { "epoch": 0.05563086362501338, "grad_norm": 0.42211681604385376, "learning_rate": 2.7818408847663217e-05, "loss": 0.258, "step": 3119 }, { "epoch": 0.05564869974672707, "grad_norm": 0.35453712940216064, "learning_rate": 2.7827327863003927e-05, "loss": 0.252, "step": 3120 }, { "epoch": 0.05566653586844077, "grad_norm": 0.4386732578277588, "learning_rate": 2.783624687834463e-05, "loss": 0.2356, "step": 3121 }, { "epoch": 0.05568437199015446, "grad_norm": 0.38466301560401917, "learning_rate": 2.7845165893685337e-05, "loss": 0.3157, "step": 3122 }, { "epoch": 0.05570220811186816, "grad_norm": 0.5103141069412231, "learning_rate": 2.7854084909026046e-05, "loss": 0.3021, "step": 3123 }, { "epoch": 0.05572004423358185, "grad_norm": 0.5577016472816467, "learning_rate": 2.7863003924366753e-05, "loss": 0.3025, "step": 3124 }, { "epoch": 0.055737880355295547, "grad_norm": 0.34246450662612915, "learning_rate": 2.7871922939707456e-05, "loss": 0.2536, "step": 3125 }, { "epoch": 0.05575571647700924, "grad_norm": 0.4228273630142212, "learning_rate": 2.7880841955048165e-05, "loss": 0.2965, "step": 3126 }, { "epoch": 0.055773552598722936, "grad_norm": 0.3759939670562744, "learning_rate": 2.7889760970388872e-05, "loss": 0.2798, "step": 3127 }, { "epoch": 0.05579138872043663, "grad_norm": 0.49205154180526733, "learning_rate": 2.7898679985729575e-05, "loss": 0.3592, "step": 3128 }, { "epoch": 0.055809224842150326, "grad_norm": 0.46467289328575134, "learning_rate": 2.7907599001070285e-05, "loss": 0.2554, "step": 3129 }, { "epoch": 0.055827060963864014, "grad_norm": 0.3272590637207031, "learning_rate": 2.791651801641099e-05, "loss": 0.2732, "step": 3130 }, { "epoch": 0.05584489708557771, "grad_norm": 0.45574623346328735, "learning_rate": 2.7925437031751694e-05, "loss": 0.2875, "step": 3131 }, { "epoch": 0.055862733207291404, "grad_norm": 0.41233009099960327, "learning_rate": 2.7934356047092404e-05, "loss": 0.3017, "step": 3132 }, { "epoch": 0.0558805693290051, "grad_norm": 0.5841701030731201, "learning_rate": 2.794327506243311e-05, "loss": 0.2297, "step": 3133 }, { "epoch": 0.055898405450718794, "grad_norm": 0.41142526268959045, "learning_rate": 2.7952194077773813e-05, "loss": 0.2537, "step": 3134 }, { "epoch": 0.05591624157243249, "grad_norm": 0.3923339247703552, "learning_rate": 2.7961113093114523e-05, "loss": 0.2709, "step": 3135 }, { "epoch": 0.055934077694146184, "grad_norm": 0.34697508811950684, "learning_rate": 2.797003210845523e-05, "loss": 0.3215, "step": 3136 }, { "epoch": 0.05595191381585988, "grad_norm": 0.631820797920227, "learning_rate": 2.7978951123795932e-05, "loss": 0.3416, "step": 3137 }, { "epoch": 0.055969749937573574, "grad_norm": 0.4826674461364746, "learning_rate": 2.7987870139136642e-05, "loss": 0.311, "step": 3138 }, { "epoch": 0.05598758605928727, "grad_norm": 0.4957122504711151, "learning_rate": 2.799678915447735e-05, "loss": 0.2977, "step": 3139 }, { "epoch": 0.05600542218100096, "grad_norm": 0.495969295501709, "learning_rate": 2.800570816981805e-05, "loss": 0.2867, "step": 3140 }, { "epoch": 0.05602325830271466, "grad_norm": 0.5296710729598999, "learning_rate": 2.8014627185158758e-05, "loss": 0.3355, "step": 3141 }, { "epoch": 0.05604109442442835, "grad_norm": 0.42540377378463745, "learning_rate": 2.8023546200499468e-05, "loss": 0.2963, "step": 3142 }, { "epoch": 0.05605893054614205, "grad_norm": 0.38158974051475525, "learning_rate": 2.803246521584017e-05, "loss": 0.2822, "step": 3143 }, { "epoch": 0.05607676666785574, "grad_norm": 0.5512579083442688, "learning_rate": 2.8041384231180877e-05, "loss": 0.3114, "step": 3144 }, { "epoch": 0.05609460278956944, "grad_norm": 0.5109096765518188, "learning_rate": 2.8050303246521587e-05, "loss": 0.32, "step": 3145 }, { "epoch": 0.05611243891128313, "grad_norm": 0.3573777377605438, "learning_rate": 2.805922226186229e-05, "loss": 0.312, "step": 3146 }, { "epoch": 0.05613027503299683, "grad_norm": 0.525793194770813, "learning_rate": 2.8068141277202996e-05, "loss": 0.3378, "step": 3147 }, { "epoch": 0.05614811115471052, "grad_norm": 1.7347043752670288, "learning_rate": 2.8077060292543706e-05, "loss": 0.292, "step": 3148 }, { "epoch": 0.05616594727642422, "grad_norm": 0.477344274520874, "learning_rate": 2.8085979307884412e-05, "loss": 0.3393, "step": 3149 }, { "epoch": 0.056183783398137906, "grad_norm": 0.36261841654777527, "learning_rate": 2.8094898323225115e-05, "loss": 0.2598, "step": 3150 }, { "epoch": 0.0562016195198516, "grad_norm": 0.3269127905368805, "learning_rate": 2.8103817338565825e-05, "loss": 0.2411, "step": 3151 }, { "epoch": 0.056219455641565295, "grad_norm": 0.43891724944114685, "learning_rate": 2.811273635390653e-05, "loss": 0.3135, "step": 3152 }, { "epoch": 0.05623729176327899, "grad_norm": 0.6503772735595703, "learning_rate": 2.8121655369247234e-05, "loss": 0.326, "step": 3153 }, { "epoch": 0.056255127884992685, "grad_norm": 0.43998587131500244, "learning_rate": 2.8130574384587944e-05, "loss": 0.2725, "step": 3154 }, { "epoch": 0.05627296400670638, "grad_norm": 0.4373207688331604, "learning_rate": 2.813949339992865e-05, "loss": 0.2543, "step": 3155 }, { "epoch": 0.056290800128420075, "grad_norm": 0.5187900066375732, "learning_rate": 2.8148412415269353e-05, "loss": 0.3075, "step": 3156 }, { "epoch": 0.05630863625013377, "grad_norm": 0.6164479851722717, "learning_rate": 2.8157331430610063e-05, "loss": 0.3565, "step": 3157 }, { "epoch": 0.056326472371847465, "grad_norm": 0.4550468623638153, "learning_rate": 2.816625044595077e-05, "loss": 0.276, "step": 3158 }, { "epoch": 0.05634430849356116, "grad_norm": 0.3523094654083252, "learning_rate": 2.8175169461291473e-05, "loss": 0.2965, "step": 3159 }, { "epoch": 0.056362144615274855, "grad_norm": 0.3450637757778168, "learning_rate": 2.8184088476632182e-05, "loss": 0.2419, "step": 3160 }, { "epoch": 0.05637998073698855, "grad_norm": 0.4436189532279968, "learning_rate": 2.819300749197289e-05, "loss": 0.2392, "step": 3161 }, { "epoch": 0.056397816858702245, "grad_norm": 0.5303342342376709, "learning_rate": 2.8201926507313592e-05, "loss": 0.3043, "step": 3162 }, { "epoch": 0.05641565298041594, "grad_norm": 0.46184423565864563, "learning_rate": 2.82108455226543e-05, "loss": 0.2825, "step": 3163 }, { "epoch": 0.056433489102129634, "grad_norm": 0.5427693724632263, "learning_rate": 2.8219764537995008e-05, "loss": 0.2885, "step": 3164 }, { "epoch": 0.05645132522384333, "grad_norm": 0.46107038855552673, "learning_rate": 2.822868355333571e-05, "loss": 0.3059, "step": 3165 }, { "epoch": 0.056469161345557024, "grad_norm": 0.3580935001373291, "learning_rate": 2.8237602568676417e-05, "loss": 0.2672, "step": 3166 }, { "epoch": 0.05648699746727072, "grad_norm": 0.3495483696460724, "learning_rate": 2.8246521584017127e-05, "loss": 0.2541, "step": 3167 }, { "epoch": 0.056504833588984414, "grad_norm": 0.44915691018104553, "learning_rate": 2.825544059935783e-05, "loss": 0.2697, "step": 3168 }, { "epoch": 0.05652266971069811, "grad_norm": 0.8128893971443176, "learning_rate": 2.8264359614698536e-05, "loss": 0.2975, "step": 3169 }, { "epoch": 0.0565405058324118, "grad_norm": 0.7385756969451904, "learning_rate": 2.8273278630039246e-05, "loss": 0.3223, "step": 3170 }, { "epoch": 0.05655834195412549, "grad_norm": 0.43033161759376526, "learning_rate": 2.828219764537995e-05, "loss": 0.2983, "step": 3171 }, { "epoch": 0.05657617807583919, "grad_norm": 0.5504375696182251, "learning_rate": 2.8291116660720656e-05, "loss": 0.2947, "step": 3172 }, { "epoch": 0.05659401419755288, "grad_norm": 0.4506465792655945, "learning_rate": 2.8300035676061365e-05, "loss": 0.2637, "step": 3173 }, { "epoch": 0.05661185031926658, "grad_norm": 0.39166396856307983, "learning_rate": 2.8308954691402072e-05, "loss": 0.2906, "step": 3174 }, { "epoch": 0.05662968644098027, "grad_norm": 0.45482513308525085, "learning_rate": 2.8317873706742775e-05, "loss": 0.3005, "step": 3175 }, { "epoch": 0.05664752256269397, "grad_norm": 0.4915219843387604, "learning_rate": 2.8326792722083484e-05, "loss": 0.2792, "step": 3176 }, { "epoch": 0.05666535868440766, "grad_norm": 0.3989510238170624, "learning_rate": 2.833571173742419e-05, "loss": 0.3002, "step": 3177 }, { "epoch": 0.056683194806121356, "grad_norm": 0.351775199174881, "learning_rate": 2.8344630752764894e-05, "loss": 0.2325, "step": 3178 }, { "epoch": 0.05670103092783505, "grad_norm": 0.47354358434677124, "learning_rate": 2.8353549768105604e-05, "loss": 0.3084, "step": 3179 }, { "epoch": 0.056718867049548746, "grad_norm": 0.44085946679115295, "learning_rate": 2.836246878344631e-05, "loss": 0.3073, "step": 3180 }, { "epoch": 0.05673670317126244, "grad_norm": 0.49750787019729614, "learning_rate": 2.8371387798787013e-05, "loss": 0.2789, "step": 3181 }, { "epoch": 0.056754539292976136, "grad_norm": 0.5748381614685059, "learning_rate": 2.8380306814127723e-05, "loss": 0.31, "step": 3182 }, { "epoch": 0.05677237541468983, "grad_norm": 0.45841026306152344, "learning_rate": 2.838922582946843e-05, "loss": 0.3374, "step": 3183 }, { "epoch": 0.056790211536403526, "grad_norm": 0.5971012711524963, "learning_rate": 2.8398144844809132e-05, "loss": 0.3508, "step": 3184 }, { "epoch": 0.05680804765811722, "grad_norm": 0.507256269454956, "learning_rate": 2.8407063860149842e-05, "loss": 0.2557, "step": 3185 }, { "epoch": 0.056825883779830916, "grad_norm": 0.7194445133209229, "learning_rate": 2.8415982875490548e-05, "loss": 0.3054, "step": 3186 }, { "epoch": 0.05684371990154461, "grad_norm": 0.4229884445667267, "learning_rate": 2.842490189083125e-05, "loss": 0.319, "step": 3187 }, { "epoch": 0.056861556023258306, "grad_norm": 0.4635477364063263, "learning_rate": 2.843382090617196e-05, "loss": 0.316, "step": 3188 }, { "epoch": 0.056879392144972, "grad_norm": 0.39817649126052856, "learning_rate": 2.8442739921512667e-05, "loss": 0.2788, "step": 3189 }, { "epoch": 0.05689722826668569, "grad_norm": 0.42672228813171387, "learning_rate": 2.845165893685337e-05, "loss": 0.2903, "step": 3190 }, { "epoch": 0.05691506438839938, "grad_norm": 0.5355145931243896, "learning_rate": 2.846057795219408e-05, "loss": 0.3236, "step": 3191 }, { "epoch": 0.05693290051011308, "grad_norm": 0.4850665330886841, "learning_rate": 2.8469496967534787e-05, "loss": 0.2521, "step": 3192 }, { "epoch": 0.05695073663182677, "grad_norm": 0.3593953549861908, "learning_rate": 2.847841598287549e-05, "loss": 0.2494, "step": 3193 }, { "epoch": 0.05696857275354047, "grad_norm": 0.48539307713508606, "learning_rate": 2.8487334998216196e-05, "loss": 0.2909, "step": 3194 }, { "epoch": 0.05698640887525416, "grad_norm": 0.4970082938671112, "learning_rate": 2.8496254013556906e-05, "loss": 0.2917, "step": 3195 }, { "epoch": 0.05700424499696786, "grad_norm": 0.7430002093315125, "learning_rate": 2.8505173028897612e-05, "loss": 0.2189, "step": 3196 }, { "epoch": 0.05702208111868155, "grad_norm": 0.30442625284194946, "learning_rate": 2.8514092044238315e-05, "loss": 0.217, "step": 3197 }, { "epoch": 0.05703991724039525, "grad_norm": 0.3949531316757202, "learning_rate": 2.8523011059579025e-05, "loss": 0.306, "step": 3198 }, { "epoch": 0.05705775336210894, "grad_norm": 0.42441514134407043, "learning_rate": 2.853193007491973e-05, "loss": 0.3281, "step": 3199 }, { "epoch": 0.05707558948382264, "grad_norm": 0.3786030411720276, "learning_rate": 2.8540849090260434e-05, "loss": 0.3032, "step": 3200 }, { "epoch": 0.05709342560553633, "grad_norm": 0.46103936433792114, "learning_rate": 2.8549768105601144e-05, "loss": 0.341, "step": 3201 }, { "epoch": 0.05711126172725003, "grad_norm": 0.4143133759498596, "learning_rate": 2.855868712094185e-05, "loss": 0.2032, "step": 3202 }, { "epoch": 0.05712909784896372, "grad_norm": 0.6137906908988953, "learning_rate": 2.8567606136282553e-05, "loss": 0.3155, "step": 3203 }, { "epoch": 0.05714693397067742, "grad_norm": 0.607319712638855, "learning_rate": 2.8576525151623263e-05, "loss": 0.2459, "step": 3204 }, { "epoch": 0.05716477009239111, "grad_norm": 0.503551721572876, "learning_rate": 2.858544416696397e-05, "loss": 0.3582, "step": 3205 }, { "epoch": 0.05718260621410481, "grad_norm": 0.37274956703186035, "learning_rate": 2.8594363182304672e-05, "loss": 0.2643, "step": 3206 }, { "epoch": 0.0572004423358185, "grad_norm": 0.4710714817047119, "learning_rate": 2.8603282197645382e-05, "loss": 0.2802, "step": 3207 }, { "epoch": 0.0572182784575322, "grad_norm": 0.5470686554908752, "learning_rate": 2.861220121298609e-05, "loss": 0.3143, "step": 3208 }, { "epoch": 0.05723611457924589, "grad_norm": 0.4025944471359253, "learning_rate": 2.862112022832679e-05, "loss": 0.2134, "step": 3209 }, { "epoch": 0.05725395070095958, "grad_norm": 0.4664728045463562, "learning_rate": 2.86300392436675e-05, "loss": 0.3075, "step": 3210 }, { "epoch": 0.057271786822673275, "grad_norm": 0.4440355896949768, "learning_rate": 2.8638958259008208e-05, "loss": 0.3069, "step": 3211 }, { "epoch": 0.05728962294438697, "grad_norm": 0.36694321036338806, "learning_rate": 2.864787727434891e-05, "loss": 0.2778, "step": 3212 }, { "epoch": 0.057307459066100665, "grad_norm": 0.3843688368797302, "learning_rate": 2.865679628968962e-05, "loss": 0.2474, "step": 3213 }, { "epoch": 0.05732529518781436, "grad_norm": 0.41927361488342285, "learning_rate": 2.8665715305030327e-05, "loss": 0.3291, "step": 3214 }, { "epoch": 0.057343131309528055, "grad_norm": 0.4616337716579437, "learning_rate": 2.867463432037103e-05, "loss": 0.3504, "step": 3215 }, { "epoch": 0.05736096743124175, "grad_norm": 0.6417986154556274, "learning_rate": 2.868355333571174e-05, "loss": 0.3368, "step": 3216 }, { "epoch": 0.057378803552955444, "grad_norm": 0.42413899302482605, "learning_rate": 2.8692472351052446e-05, "loss": 0.3405, "step": 3217 }, { "epoch": 0.05739663967466914, "grad_norm": 0.44954735040664673, "learning_rate": 2.870139136639315e-05, "loss": 0.2605, "step": 3218 }, { "epoch": 0.057414475796382834, "grad_norm": 0.3479384779930115, "learning_rate": 2.8710310381733855e-05, "loss": 0.2449, "step": 3219 }, { "epoch": 0.05743231191809653, "grad_norm": 0.5870296955108643, "learning_rate": 2.8719229397074565e-05, "loss": 0.3004, "step": 3220 }, { "epoch": 0.057450148039810224, "grad_norm": 0.34973809123039246, "learning_rate": 2.8728148412415275e-05, "loss": 0.2774, "step": 3221 }, { "epoch": 0.05746798416152392, "grad_norm": 0.45131930708885193, "learning_rate": 2.8737067427755975e-05, "loss": 0.2686, "step": 3222 }, { "epoch": 0.057485820283237614, "grad_norm": 0.3726236820220947, "learning_rate": 2.8745986443096684e-05, "loss": 0.2909, "step": 3223 }, { "epoch": 0.05750365640495131, "grad_norm": 0.39420604705810547, "learning_rate": 2.875490545843739e-05, "loss": 0.2844, "step": 3224 }, { "epoch": 0.057521492526665004, "grad_norm": 0.6217480301856995, "learning_rate": 2.8763824473778094e-05, "loss": 0.2524, "step": 3225 }, { "epoch": 0.0575393286483787, "grad_norm": 0.5144774913787842, "learning_rate": 2.8772743489118803e-05, "loss": 0.2974, "step": 3226 }, { "epoch": 0.057557164770092394, "grad_norm": 0.45274215936660767, "learning_rate": 2.878166250445951e-05, "loss": 0.2856, "step": 3227 }, { "epoch": 0.05757500089180609, "grad_norm": 0.4292668402194977, "learning_rate": 2.8790581519800213e-05, "loss": 0.2682, "step": 3228 }, { "epoch": 0.05759283701351978, "grad_norm": 0.4211280047893524, "learning_rate": 2.8799500535140923e-05, "loss": 0.2684, "step": 3229 }, { "epoch": 0.05761067313523347, "grad_norm": 0.39303717017173767, "learning_rate": 2.880841955048163e-05, "loss": 0.2816, "step": 3230 }, { "epoch": 0.057628509256947166, "grad_norm": 0.3582131266593933, "learning_rate": 2.8817338565822332e-05, "loss": 0.2378, "step": 3231 }, { "epoch": 0.05764634537866086, "grad_norm": 0.5083575248718262, "learning_rate": 2.8826257581163042e-05, "loss": 0.3035, "step": 3232 }, { "epoch": 0.057664181500374556, "grad_norm": 0.39351096749305725, "learning_rate": 2.8835176596503748e-05, "loss": 0.2771, "step": 3233 }, { "epoch": 0.05768201762208825, "grad_norm": 0.5952514410018921, "learning_rate": 2.884409561184445e-05, "loss": 0.3101, "step": 3234 }, { "epoch": 0.057699853743801946, "grad_norm": 0.41662126779556274, "learning_rate": 2.885301462718516e-05, "loss": 0.2836, "step": 3235 }, { "epoch": 0.05771768986551564, "grad_norm": 0.6105010509490967, "learning_rate": 2.8861933642525867e-05, "loss": 0.2997, "step": 3236 }, { "epoch": 0.057735525987229336, "grad_norm": 0.4033588767051697, "learning_rate": 2.887085265786657e-05, "loss": 0.2772, "step": 3237 }, { "epoch": 0.05775336210894303, "grad_norm": 0.44319161772727966, "learning_rate": 2.887977167320728e-05, "loss": 0.2574, "step": 3238 }, { "epoch": 0.057771198230656726, "grad_norm": 0.31570732593536377, "learning_rate": 2.8888690688547986e-05, "loss": 0.2735, "step": 3239 }, { "epoch": 0.05778903435237042, "grad_norm": 0.48176056146621704, "learning_rate": 2.889760970388869e-05, "loss": 0.3235, "step": 3240 }, { "epoch": 0.057806870474084115, "grad_norm": 0.554405927658081, "learning_rate": 2.89065287192294e-05, "loss": 0.2098, "step": 3241 }, { "epoch": 0.05782470659579781, "grad_norm": 0.31652870774269104, "learning_rate": 2.8915447734570106e-05, "loss": 0.2697, "step": 3242 }, { "epoch": 0.057842542717511505, "grad_norm": 0.434579074382782, "learning_rate": 2.8924366749910815e-05, "loss": 0.3108, "step": 3243 }, { "epoch": 0.0578603788392252, "grad_norm": 0.4084996283054352, "learning_rate": 2.8933285765251515e-05, "loss": 0.2703, "step": 3244 }, { "epoch": 0.057878214960938895, "grad_norm": 0.41963431239128113, "learning_rate": 2.8942204780592225e-05, "loss": 0.2826, "step": 3245 }, { "epoch": 0.05789605108265259, "grad_norm": 0.36174237728118896, "learning_rate": 2.8951123795932934e-05, "loss": 0.3111, "step": 3246 }, { "epoch": 0.057913887204366285, "grad_norm": 0.4229978919029236, "learning_rate": 2.8960042811273634e-05, "loss": 0.2628, "step": 3247 }, { "epoch": 0.05793172332607998, "grad_norm": 0.4644685387611389, "learning_rate": 2.8968961826614344e-05, "loss": 0.2847, "step": 3248 }, { "epoch": 0.057949559447793675, "grad_norm": 0.43232446908950806, "learning_rate": 2.897788084195505e-05, "loss": 0.2995, "step": 3249 }, { "epoch": 0.05796739556950737, "grad_norm": 0.4653993844985962, "learning_rate": 2.8986799857295753e-05, "loss": 0.2814, "step": 3250 }, { "epoch": 0.05798523169122106, "grad_norm": 0.314706414937973, "learning_rate": 2.8995718872636463e-05, "loss": 0.2123, "step": 3251 }, { "epoch": 0.05800306781293475, "grad_norm": 0.5298987030982971, "learning_rate": 2.900463788797717e-05, "loss": 0.2624, "step": 3252 }, { "epoch": 0.05802090393464845, "grad_norm": 0.40126827359199524, "learning_rate": 2.9013556903317872e-05, "loss": 0.3133, "step": 3253 }, { "epoch": 0.05803874005636214, "grad_norm": 0.371821790933609, "learning_rate": 2.9022475918658582e-05, "loss": 0.2738, "step": 3254 }, { "epoch": 0.05805657617807584, "grad_norm": 0.43823331594467163, "learning_rate": 2.903139493399929e-05, "loss": 0.2927, "step": 3255 }, { "epoch": 0.05807441229978953, "grad_norm": 0.4936355650424957, "learning_rate": 2.904031394933999e-05, "loss": 0.3306, "step": 3256 }, { "epoch": 0.05809224842150323, "grad_norm": 0.4949367344379425, "learning_rate": 2.90492329646807e-05, "loss": 0.31, "step": 3257 }, { "epoch": 0.05811008454321692, "grad_norm": 0.5128595232963562, "learning_rate": 2.9058151980021408e-05, "loss": 0.2954, "step": 3258 }, { "epoch": 0.05812792066493062, "grad_norm": 0.46815380454063416, "learning_rate": 2.906707099536211e-05, "loss": 0.332, "step": 3259 }, { "epoch": 0.05814575678664431, "grad_norm": 0.4047621190547943, "learning_rate": 2.907599001070282e-05, "loss": 0.3294, "step": 3260 }, { "epoch": 0.05816359290835801, "grad_norm": 0.5759481191635132, "learning_rate": 2.9084909026043527e-05, "loss": 0.3203, "step": 3261 }, { "epoch": 0.0581814290300717, "grad_norm": 0.4076763093471527, "learning_rate": 2.909382804138423e-05, "loss": 0.3038, "step": 3262 }, { "epoch": 0.0581992651517854, "grad_norm": 0.3462975323200226, "learning_rate": 2.910274705672494e-05, "loss": 0.2987, "step": 3263 }, { "epoch": 0.05821710127349909, "grad_norm": 0.3002260625362396, "learning_rate": 2.9111666072065646e-05, "loss": 0.2713, "step": 3264 }, { "epoch": 0.05823493739521279, "grad_norm": 0.3066962659358978, "learning_rate": 2.912058508740635e-05, "loss": 0.2486, "step": 3265 }, { "epoch": 0.05825277351692648, "grad_norm": 0.42274385690689087, "learning_rate": 2.912950410274706e-05, "loss": 0.2916, "step": 3266 }, { "epoch": 0.058270609638640176, "grad_norm": 0.39436569809913635, "learning_rate": 2.9138423118087765e-05, "loss": 0.2891, "step": 3267 }, { "epoch": 0.05828844576035387, "grad_norm": 0.39736512303352356, "learning_rate": 2.9147342133428475e-05, "loss": 0.2744, "step": 3268 }, { "epoch": 0.058306281882067566, "grad_norm": 0.44282907247543335, "learning_rate": 2.9156261148769174e-05, "loss": 0.2887, "step": 3269 }, { "epoch": 0.05832411800378126, "grad_norm": 0.5134050846099854, "learning_rate": 2.9165180164109884e-05, "loss": 0.2974, "step": 3270 }, { "epoch": 0.05834195412549495, "grad_norm": 0.5337728261947632, "learning_rate": 2.9174099179450594e-05, "loss": 0.2578, "step": 3271 }, { "epoch": 0.058359790247208644, "grad_norm": 0.42524704337120056, "learning_rate": 2.9183018194791294e-05, "loss": 0.2468, "step": 3272 }, { "epoch": 0.05837762636892234, "grad_norm": 0.4540986716747284, "learning_rate": 2.9191937210132003e-05, "loss": 0.2605, "step": 3273 }, { "epoch": 0.058395462490636034, "grad_norm": 0.49019667506217957, "learning_rate": 2.920085622547271e-05, "loss": 0.2807, "step": 3274 }, { "epoch": 0.05841329861234973, "grad_norm": 0.6835748553276062, "learning_rate": 2.9209775240813413e-05, "loss": 0.2746, "step": 3275 }, { "epoch": 0.058431134734063424, "grad_norm": 0.5628316402435303, "learning_rate": 2.9218694256154123e-05, "loss": 0.3014, "step": 3276 }, { "epoch": 0.05844897085577712, "grad_norm": 0.6078162789344788, "learning_rate": 2.922761327149483e-05, "loss": 0.3247, "step": 3277 }, { "epoch": 0.058466806977490814, "grad_norm": 0.5041225552558899, "learning_rate": 2.9236532286835532e-05, "loss": 0.2905, "step": 3278 }, { "epoch": 0.05848464309920451, "grad_norm": 0.4063977599143982, "learning_rate": 2.924545130217624e-05, "loss": 0.3328, "step": 3279 }, { "epoch": 0.0585024792209182, "grad_norm": 0.34691980481147766, "learning_rate": 2.9254370317516948e-05, "loss": 0.2946, "step": 3280 }, { "epoch": 0.0585203153426319, "grad_norm": 0.5628437399864197, "learning_rate": 2.926328933285765e-05, "loss": 0.2631, "step": 3281 }, { "epoch": 0.05853815146434559, "grad_norm": 0.4472805857658386, "learning_rate": 2.927220834819836e-05, "loss": 0.2897, "step": 3282 }, { "epoch": 0.05855598758605929, "grad_norm": 0.4052281379699707, "learning_rate": 2.9281127363539067e-05, "loss": 0.2916, "step": 3283 }, { "epoch": 0.05857382370777298, "grad_norm": 0.36523932218551636, "learning_rate": 2.929004637887977e-05, "loss": 0.2733, "step": 3284 }, { "epoch": 0.05859165982948668, "grad_norm": 0.359784334897995, "learning_rate": 2.929896539422048e-05, "loss": 0.2527, "step": 3285 }, { "epoch": 0.05860949595120037, "grad_norm": 0.47422704100608826, "learning_rate": 2.9307884409561186e-05, "loss": 0.2655, "step": 3286 }, { "epoch": 0.05862733207291407, "grad_norm": 0.3405548334121704, "learning_rate": 2.931680342490189e-05, "loss": 0.2814, "step": 3287 }, { "epoch": 0.05864516819462776, "grad_norm": 0.820716381072998, "learning_rate": 2.93257224402426e-05, "loss": 0.3035, "step": 3288 }, { "epoch": 0.05866300431634146, "grad_norm": 0.37102848291397095, "learning_rate": 2.9334641455583305e-05, "loss": 0.2539, "step": 3289 }, { "epoch": 0.05868084043805515, "grad_norm": 0.393527090549469, "learning_rate": 2.9343560470924015e-05, "loss": 0.2849, "step": 3290 }, { "epoch": 0.05869867655976884, "grad_norm": 0.363849937915802, "learning_rate": 2.9352479486264718e-05, "loss": 0.2399, "step": 3291 }, { "epoch": 0.058716512681482536, "grad_norm": 0.3990871012210846, "learning_rate": 2.9361398501605425e-05, "loss": 0.2837, "step": 3292 }, { "epoch": 0.05873434880319623, "grad_norm": 0.48869359493255615, "learning_rate": 2.9370317516946134e-05, "loss": 0.2937, "step": 3293 }, { "epoch": 0.058752184924909925, "grad_norm": 0.53365558385849, "learning_rate": 2.9379236532286837e-05, "loss": 0.2619, "step": 3294 }, { "epoch": 0.05877002104662362, "grad_norm": 0.3252737522125244, "learning_rate": 2.9388155547627544e-05, "loss": 0.2834, "step": 3295 }, { "epoch": 0.058787857168337315, "grad_norm": 0.38567742705345154, "learning_rate": 2.9397074562968254e-05, "loss": 0.2501, "step": 3296 }, { "epoch": 0.05880569329005101, "grad_norm": 0.5182756781578064, "learning_rate": 2.9405993578308953e-05, "loss": 0.2895, "step": 3297 }, { "epoch": 0.058823529411764705, "grad_norm": 0.4358648359775543, "learning_rate": 2.9414912593649663e-05, "loss": 0.2808, "step": 3298 }, { "epoch": 0.0588413655334784, "grad_norm": 0.4711143672466278, "learning_rate": 2.942383160899037e-05, "loss": 0.2937, "step": 3299 }, { "epoch": 0.058859201655192095, "grad_norm": 0.34037455916404724, "learning_rate": 2.9432750624331072e-05, "loss": 0.2976, "step": 3300 }, { "epoch": 0.05887703777690579, "grad_norm": 0.35021427273750305, "learning_rate": 2.9441669639671782e-05, "loss": 0.3063, "step": 3301 }, { "epoch": 0.058894873898619485, "grad_norm": 0.2765119671821594, "learning_rate": 2.945058865501249e-05, "loss": 0.2473, "step": 3302 }, { "epoch": 0.05891271002033318, "grad_norm": 0.35572266578674316, "learning_rate": 2.945950767035319e-05, "loss": 0.3039, "step": 3303 }, { "epoch": 0.058930546142046875, "grad_norm": 0.5072250962257385, "learning_rate": 2.94684266856939e-05, "loss": 0.2853, "step": 3304 }, { "epoch": 0.05894838226376057, "grad_norm": 0.45801055431365967, "learning_rate": 2.9477345701034608e-05, "loss": 0.299, "step": 3305 }, { "epoch": 0.058966218385474264, "grad_norm": 0.44712984561920166, "learning_rate": 2.948626471637531e-05, "loss": 0.2835, "step": 3306 }, { "epoch": 0.05898405450718796, "grad_norm": 0.5450993180274963, "learning_rate": 2.949518373171602e-05, "loss": 0.2719, "step": 3307 }, { "epoch": 0.059001890628901654, "grad_norm": 0.48879894614219666, "learning_rate": 2.9504102747056727e-05, "loss": 0.2843, "step": 3308 }, { "epoch": 0.05901972675061535, "grad_norm": 0.6736392974853516, "learning_rate": 2.951302176239743e-05, "loss": 0.3514, "step": 3309 }, { "epoch": 0.059037562872329044, "grad_norm": 0.422929584980011, "learning_rate": 2.952194077773814e-05, "loss": 0.3429, "step": 3310 }, { "epoch": 0.05905539899404273, "grad_norm": 0.3613201975822449, "learning_rate": 2.9530859793078846e-05, "loss": 0.2876, "step": 3311 }, { "epoch": 0.05907323511575643, "grad_norm": 0.4560771584510803, "learning_rate": 2.953977880841955e-05, "loss": 0.3221, "step": 3312 }, { "epoch": 0.05909107123747012, "grad_norm": 0.5454785227775574, "learning_rate": 2.954869782376026e-05, "loss": 0.3115, "step": 3313 }, { "epoch": 0.05910890735918382, "grad_norm": 0.3850862681865692, "learning_rate": 2.9557616839100965e-05, "loss": 0.298, "step": 3314 }, { "epoch": 0.05912674348089751, "grad_norm": 0.3268927335739136, "learning_rate": 2.9566535854441675e-05, "loss": 0.2836, "step": 3315 }, { "epoch": 0.05914457960261121, "grad_norm": 0.4352155923843384, "learning_rate": 2.9575454869782378e-05, "loss": 0.2801, "step": 3316 }, { "epoch": 0.0591624157243249, "grad_norm": 0.4850205183029175, "learning_rate": 2.9584373885123084e-05, "loss": 0.2127, "step": 3317 }, { "epoch": 0.059180251846038597, "grad_norm": 0.3261580467224121, "learning_rate": 2.9593292900463794e-05, "loss": 0.276, "step": 3318 }, { "epoch": 0.05919808796775229, "grad_norm": 0.285980761051178, "learning_rate": 2.9602211915804497e-05, "loss": 0.2693, "step": 3319 }, { "epoch": 0.059215924089465986, "grad_norm": 0.3186729848384857, "learning_rate": 2.9611130931145203e-05, "loss": 0.2809, "step": 3320 }, { "epoch": 0.05923376021117968, "grad_norm": 0.4164327383041382, "learning_rate": 2.9620049946485913e-05, "loss": 0.2969, "step": 3321 }, { "epoch": 0.059251596332893376, "grad_norm": 0.32970285415649414, "learning_rate": 2.9628968961826613e-05, "loss": 0.2078, "step": 3322 }, { "epoch": 0.05926943245460707, "grad_norm": 0.4292951822280884, "learning_rate": 2.9637887977167322e-05, "loss": 0.3259, "step": 3323 }, { "epoch": 0.059287268576320766, "grad_norm": 0.4053258001804352, "learning_rate": 2.9646806992508032e-05, "loss": 0.2574, "step": 3324 }, { "epoch": 0.05930510469803446, "grad_norm": 0.37580397725105286, "learning_rate": 2.9655726007848732e-05, "loss": 0.2447, "step": 3325 }, { "epoch": 0.059322940819748156, "grad_norm": 0.35199934244155884, "learning_rate": 2.966464502318944e-05, "loss": 0.2659, "step": 3326 }, { "epoch": 0.05934077694146185, "grad_norm": 0.2869519889354706, "learning_rate": 2.9673564038530148e-05, "loss": 0.2857, "step": 3327 }, { "epoch": 0.059358613063175546, "grad_norm": 0.621752917766571, "learning_rate": 2.968248305387085e-05, "loss": 0.3008, "step": 3328 }, { "epoch": 0.05937644918488924, "grad_norm": 0.3756980299949646, "learning_rate": 2.969140206921156e-05, "loss": 0.2663, "step": 3329 }, { "epoch": 0.059394285306602936, "grad_norm": 0.3002112805843353, "learning_rate": 2.9700321084552267e-05, "loss": 0.2411, "step": 3330 }, { "epoch": 0.059412121428316624, "grad_norm": 0.40645831823349, "learning_rate": 2.970924009989297e-05, "loss": 0.2975, "step": 3331 }, { "epoch": 0.05942995755003032, "grad_norm": 0.42232415080070496, "learning_rate": 2.971815911523368e-05, "loss": 0.2972, "step": 3332 }, { "epoch": 0.05944779367174401, "grad_norm": 0.3313126564025879, "learning_rate": 2.9727078130574386e-05, "loss": 0.2475, "step": 3333 }, { "epoch": 0.05946562979345771, "grad_norm": 0.43232089281082153, "learning_rate": 2.973599714591509e-05, "loss": 0.2695, "step": 3334 }, { "epoch": 0.0594834659151714, "grad_norm": 0.5244986414909363, "learning_rate": 2.97449161612558e-05, "loss": 0.3476, "step": 3335 }, { "epoch": 0.0595013020368851, "grad_norm": 0.5530703067779541, "learning_rate": 2.9753835176596505e-05, "loss": 0.4008, "step": 3336 }, { "epoch": 0.05951913815859879, "grad_norm": 0.5310500860214233, "learning_rate": 2.976275419193721e-05, "loss": 0.3504, "step": 3337 }, { "epoch": 0.05953697428031249, "grad_norm": 0.49514028429985046, "learning_rate": 2.9771673207277918e-05, "loss": 0.2656, "step": 3338 }, { "epoch": 0.05955481040202618, "grad_norm": 0.49703454971313477, "learning_rate": 2.9780592222618624e-05, "loss": 0.2958, "step": 3339 }, { "epoch": 0.05957264652373988, "grad_norm": 0.4270615577697754, "learning_rate": 2.9789511237959334e-05, "loss": 0.2962, "step": 3340 }, { "epoch": 0.05959048264545357, "grad_norm": 0.36220037937164307, "learning_rate": 2.9798430253300037e-05, "loss": 0.2802, "step": 3341 }, { "epoch": 0.05960831876716727, "grad_norm": 0.39774414896965027, "learning_rate": 2.9807349268640744e-05, "loss": 0.2688, "step": 3342 }, { "epoch": 0.05962615488888096, "grad_norm": 0.41961362957954407, "learning_rate": 2.9816268283981453e-05, "loss": 0.3242, "step": 3343 }, { "epoch": 0.05964399101059466, "grad_norm": 0.3730867803096771, "learning_rate": 2.9825187299322156e-05, "loss": 0.3207, "step": 3344 }, { "epoch": 0.05966182713230835, "grad_norm": 0.3605899512767792, "learning_rate": 2.9834106314662863e-05, "loss": 0.2675, "step": 3345 }, { "epoch": 0.05967966325402205, "grad_norm": 0.44611263275146484, "learning_rate": 2.9843025330003573e-05, "loss": 0.28, "step": 3346 }, { "epoch": 0.05969749937573574, "grad_norm": 0.3818523585796356, "learning_rate": 2.9851944345344272e-05, "loss": 0.303, "step": 3347 }, { "epoch": 0.05971533549744944, "grad_norm": 0.46706482768058777, "learning_rate": 2.9860863360684982e-05, "loss": 0.3455, "step": 3348 }, { "epoch": 0.05973317161916313, "grad_norm": 0.3815406858921051, "learning_rate": 2.986978237602569e-05, "loss": 0.2768, "step": 3349 }, { "epoch": 0.05975100774087683, "grad_norm": 0.5149982571601868, "learning_rate": 2.987870139136639e-05, "loss": 0.3037, "step": 3350 }, { "epoch": 0.059768843862590515, "grad_norm": 0.5229239463806152, "learning_rate": 2.98876204067071e-05, "loss": 0.2862, "step": 3351 }, { "epoch": 0.05978667998430421, "grad_norm": 0.4386540651321411, "learning_rate": 2.9896539422047807e-05, "loss": 0.2427, "step": 3352 }, { "epoch": 0.059804516106017905, "grad_norm": 0.7024639248847961, "learning_rate": 2.990545843738851e-05, "loss": 0.2877, "step": 3353 }, { "epoch": 0.0598223522277316, "grad_norm": 0.47102904319763184, "learning_rate": 2.991437745272922e-05, "loss": 0.2814, "step": 3354 }, { "epoch": 0.059840188349445295, "grad_norm": 0.39220118522644043, "learning_rate": 2.9923296468069927e-05, "loss": 0.3083, "step": 3355 }, { "epoch": 0.05985802447115899, "grad_norm": 0.3915637135505676, "learning_rate": 2.993221548341063e-05, "loss": 0.2863, "step": 3356 }, { "epoch": 0.059875860592872684, "grad_norm": 0.35335150361061096, "learning_rate": 2.994113449875134e-05, "loss": 0.2521, "step": 3357 }, { "epoch": 0.05989369671458638, "grad_norm": 0.44048142433166504, "learning_rate": 2.9950053514092046e-05, "loss": 0.2733, "step": 3358 }, { "epoch": 0.059911532836300074, "grad_norm": 0.3673648238182068, "learning_rate": 2.995897252943275e-05, "loss": 0.212, "step": 3359 }, { "epoch": 0.05992936895801377, "grad_norm": 0.46625861525535583, "learning_rate": 2.996789154477346e-05, "loss": 0.2779, "step": 3360 }, { "epoch": 0.059947205079727464, "grad_norm": 0.39080703258514404, "learning_rate": 2.9976810560114165e-05, "loss": 0.3021, "step": 3361 }, { "epoch": 0.05996504120144116, "grad_norm": 0.5033069849014282, "learning_rate": 2.9985729575454875e-05, "loss": 0.2822, "step": 3362 }, { "epoch": 0.059982877323154854, "grad_norm": 0.455512672662735, "learning_rate": 2.9994648590795578e-05, "loss": 0.3281, "step": 3363 }, { "epoch": 0.06000071344486855, "grad_norm": 0.3449738323688507, "learning_rate": 3.0003567606136284e-05, "loss": 0.2781, "step": 3364 }, { "epoch": 0.060018549566582244, "grad_norm": 0.37134668231010437, "learning_rate": 3.0012486621476994e-05, "loss": 0.2746, "step": 3365 }, { "epoch": 0.06003638568829594, "grad_norm": 0.5062718391418457, "learning_rate": 3.0021405636817697e-05, "loss": 0.2992, "step": 3366 }, { "epoch": 0.060054221810009634, "grad_norm": 0.4795890748500824, "learning_rate": 3.0030324652158403e-05, "loss": 0.2962, "step": 3367 }, { "epoch": 0.06007205793172333, "grad_norm": 1.082323670387268, "learning_rate": 3.0039243667499113e-05, "loss": 0.3115, "step": 3368 }, { "epoch": 0.060089894053437024, "grad_norm": 0.4421440362930298, "learning_rate": 3.0048162682839816e-05, "loss": 0.2604, "step": 3369 }, { "epoch": 0.06010773017515072, "grad_norm": 0.3249231278896332, "learning_rate": 3.0057081698180522e-05, "loss": 0.2524, "step": 3370 }, { "epoch": 0.060125566296864406, "grad_norm": 0.4153946340084076, "learning_rate": 3.0066000713521232e-05, "loss": 0.3268, "step": 3371 }, { "epoch": 0.0601434024185781, "grad_norm": 0.35149988532066345, "learning_rate": 3.007491972886193e-05, "loss": 0.2984, "step": 3372 }, { "epoch": 0.060161238540291796, "grad_norm": 0.4687574803829193, "learning_rate": 3.008383874420264e-05, "loss": 0.3355, "step": 3373 }, { "epoch": 0.06017907466200549, "grad_norm": 0.4659954309463501, "learning_rate": 3.009275775954335e-05, "loss": 0.2917, "step": 3374 }, { "epoch": 0.060196910783719186, "grad_norm": 0.461733341217041, "learning_rate": 3.010167677488405e-05, "loss": 0.2683, "step": 3375 }, { "epoch": 0.06021474690543288, "grad_norm": 0.44231653213500977, "learning_rate": 3.011059579022476e-05, "loss": 0.2721, "step": 3376 }, { "epoch": 0.060232583027146576, "grad_norm": 0.38076111674308777, "learning_rate": 3.0119514805565467e-05, "loss": 0.26, "step": 3377 }, { "epoch": 0.06025041914886027, "grad_norm": 0.32276830077171326, "learning_rate": 3.012843382090617e-05, "loss": 0.2661, "step": 3378 }, { "epoch": 0.060268255270573966, "grad_norm": 0.40255284309387207, "learning_rate": 3.013735283624688e-05, "loss": 0.3408, "step": 3379 }, { "epoch": 0.06028609139228766, "grad_norm": 0.34506767988204956, "learning_rate": 3.0146271851587586e-05, "loss": 0.2859, "step": 3380 }, { "epoch": 0.060303927514001356, "grad_norm": 0.4300769865512848, "learning_rate": 3.015519086692829e-05, "loss": 0.2825, "step": 3381 }, { "epoch": 0.06032176363571505, "grad_norm": 0.41303813457489014, "learning_rate": 3.0164109882269e-05, "loss": 0.325, "step": 3382 }, { "epoch": 0.060339599757428745, "grad_norm": 0.33736518025398254, "learning_rate": 3.0173028897609705e-05, "loss": 0.2657, "step": 3383 }, { "epoch": 0.06035743587914244, "grad_norm": 0.46587398648262024, "learning_rate": 3.0181947912950408e-05, "loss": 0.3808, "step": 3384 }, { "epoch": 0.060375272000856135, "grad_norm": 0.3951577842235565, "learning_rate": 3.0190866928291118e-05, "loss": 0.258, "step": 3385 }, { "epoch": 0.06039310812256983, "grad_norm": 0.4404039680957794, "learning_rate": 3.0199785943631824e-05, "loss": 0.2642, "step": 3386 }, { "epoch": 0.060410944244283525, "grad_norm": 0.42050695419311523, "learning_rate": 3.0208704958972534e-05, "loss": 0.2364, "step": 3387 }, { "epoch": 0.06042878036599722, "grad_norm": 0.5026699304580688, "learning_rate": 3.0217623974313237e-05, "loss": 0.2618, "step": 3388 }, { "epoch": 0.060446616487710915, "grad_norm": 0.3759438097476959, "learning_rate": 3.0226542989653943e-05, "loss": 0.3204, "step": 3389 }, { "epoch": 0.06046445260942461, "grad_norm": 0.34291404485702515, "learning_rate": 3.0235462004994653e-05, "loss": 0.2917, "step": 3390 }, { "epoch": 0.0604822887311383, "grad_norm": 0.4279941916465759, "learning_rate": 3.0244381020335356e-05, "loss": 0.3, "step": 3391 }, { "epoch": 0.06050012485285199, "grad_norm": 0.5196157693862915, "learning_rate": 3.0253300035676063e-05, "loss": 0.3284, "step": 3392 }, { "epoch": 0.06051796097456569, "grad_norm": 0.4175237715244293, "learning_rate": 3.0262219051016772e-05, "loss": 0.3099, "step": 3393 }, { "epoch": 0.06053579709627938, "grad_norm": 0.5252898931503296, "learning_rate": 3.0271138066357475e-05, "loss": 0.3187, "step": 3394 }, { "epoch": 0.06055363321799308, "grad_norm": 0.3560069799423218, "learning_rate": 3.0280057081698182e-05, "loss": 0.2334, "step": 3395 }, { "epoch": 0.06057146933970677, "grad_norm": 0.3487046957015991, "learning_rate": 3.028897609703889e-05, "loss": 0.2925, "step": 3396 }, { "epoch": 0.06058930546142047, "grad_norm": 0.35152021050453186, "learning_rate": 3.0297895112379595e-05, "loss": 0.3076, "step": 3397 }, { "epoch": 0.06060714158313416, "grad_norm": 0.3704475462436676, "learning_rate": 3.03068141277203e-05, "loss": 0.281, "step": 3398 }, { "epoch": 0.06062497770484786, "grad_norm": 0.3680011034011841, "learning_rate": 3.031573314306101e-05, "loss": 0.3198, "step": 3399 }, { "epoch": 0.06064281382656155, "grad_norm": 0.30677318572998047, "learning_rate": 3.032465215840171e-05, "loss": 0.2769, "step": 3400 }, { "epoch": 0.06066064994827525, "grad_norm": 0.3701297342777252, "learning_rate": 3.033357117374242e-05, "loss": 0.3025, "step": 3401 }, { "epoch": 0.06067848606998894, "grad_norm": 0.44856762886047363, "learning_rate": 3.0342490189083126e-05, "loss": 0.2882, "step": 3402 }, { "epoch": 0.06069632219170264, "grad_norm": 0.4001961350440979, "learning_rate": 3.035140920442383e-05, "loss": 0.3045, "step": 3403 }, { "epoch": 0.06071415831341633, "grad_norm": 0.35330164432525635, "learning_rate": 3.036032821976454e-05, "loss": 0.278, "step": 3404 }, { "epoch": 0.06073199443513003, "grad_norm": 0.47330203652381897, "learning_rate": 3.0369247235105246e-05, "loss": 0.2694, "step": 3405 }, { "epoch": 0.06074983055684372, "grad_norm": 0.5003327131271362, "learning_rate": 3.037816625044595e-05, "loss": 0.3379, "step": 3406 }, { "epoch": 0.06076766667855742, "grad_norm": 0.3417317867279053, "learning_rate": 3.038708526578666e-05, "loss": 0.274, "step": 3407 }, { "epoch": 0.06078550280027111, "grad_norm": 0.42910662293434143, "learning_rate": 3.0396004281127365e-05, "loss": 0.2872, "step": 3408 }, { "epoch": 0.060803338921984806, "grad_norm": 0.5343186855316162, "learning_rate": 3.0404923296468074e-05, "loss": 0.2583, "step": 3409 }, { "epoch": 0.0608211750436985, "grad_norm": 0.30897626280784607, "learning_rate": 3.0413842311808777e-05, "loss": 0.2667, "step": 3410 }, { "epoch": 0.060839011165412196, "grad_norm": 0.43187278509140015, "learning_rate": 3.0422761327149484e-05, "loss": 0.3362, "step": 3411 }, { "epoch": 0.060856847287125884, "grad_norm": 0.48747536540031433, "learning_rate": 3.0431680342490194e-05, "loss": 0.3141, "step": 3412 }, { "epoch": 0.06087468340883958, "grad_norm": 0.4179791808128357, "learning_rate": 3.0440599357830897e-05, "loss": 0.2906, "step": 3413 }, { "epoch": 0.060892519530553274, "grad_norm": 0.4218780994415283, "learning_rate": 3.0449518373171603e-05, "loss": 0.285, "step": 3414 }, { "epoch": 0.06091035565226697, "grad_norm": 0.4083127975463867, "learning_rate": 3.0458437388512313e-05, "loss": 0.2415, "step": 3415 }, { "epoch": 0.060928191773980664, "grad_norm": 0.42035984992980957, "learning_rate": 3.0467356403853016e-05, "loss": 0.3514, "step": 3416 }, { "epoch": 0.06094602789569436, "grad_norm": 0.520683765411377, "learning_rate": 3.0476275419193722e-05, "loss": 0.3256, "step": 3417 }, { "epoch": 0.060963864017408054, "grad_norm": 0.310117244720459, "learning_rate": 3.0485194434534432e-05, "loss": 0.256, "step": 3418 }, { "epoch": 0.06098170013912175, "grad_norm": 0.4333760738372803, "learning_rate": 3.0494113449875135e-05, "loss": 0.2229, "step": 3419 }, { "epoch": 0.060999536260835444, "grad_norm": 0.32307708263397217, "learning_rate": 3.050303246521584e-05, "loss": 0.2846, "step": 3420 }, { "epoch": 0.06101737238254914, "grad_norm": 0.7281751036643982, "learning_rate": 3.051195148055655e-05, "loss": 0.2683, "step": 3421 }, { "epoch": 0.06103520850426283, "grad_norm": 0.2901424765586853, "learning_rate": 3.0520870495897254e-05, "loss": 0.2593, "step": 3422 }, { "epoch": 0.06105304462597653, "grad_norm": 0.6676464080810547, "learning_rate": 3.052978951123796e-05, "loss": 0.2919, "step": 3423 }, { "epoch": 0.06107088074769022, "grad_norm": 0.5041801333427429, "learning_rate": 3.053870852657867e-05, "loss": 0.2544, "step": 3424 }, { "epoch": 0.06108871686940392, "grad_norm": 0.5568588376045227, "learning_rate": 3.054762754191937e-05, "loss": 0.2998, "step": 3425 }, { "epoch": 0.06110655299111761, "grad_norm": 0.4442998170852661, "learning_rate": 3.055654655726008e-05, "loss": 0.2591, "step": 3426 }, { "epoch": 0.06112438911283131, "grad_norm": 0.5679812431335449, "learning_rate": 3.0565465572600786e-05, "loss": 0.3145, "step": 3427 }, { "epoch": 0.061142225234545, "grad_norm": 0.4436073303222656, "learning_rate": 3.057438458794149e-05, "loss": 0.2811, "step": 3428 }, { "epoch": 0.0611600613562587, "grad_norm": 0.2981335520744324, "learning_rate": 3.05833036032822e-05, "loss": 0.2326, "step": 3429 }, { "epoch": 0.06117789747797239, "grad_norm": 0.3030025064945221, "learning_rate": 3.0592222618622905e-05, "loss": 0.2513, "step": 3430 }, { "epoch": 0.06119573359968609, "grad_norm": 0.36721524596214294, "learning_rate": 3.060114163396361e-05, "loss": 0.2662, "step": 3431 }, { "epoch": 0.061213569721399776, "grad_norm": 0.43514400720596313, "learning_rate": 3.061006064930432e-05, "loss": 0.2615, "step": 3432 }, { "epoch": 0.06123140584311347, "grad_norm": 0.4660835266113281, "learning_rate": 3.0618979664645024e-05, "loss": 0.2659, "step": 3433 }, { "epoch": 0.061249241964827165, "grad_norm": 0.3673686683177948, "learning_rate": 3.062789867998573e-05, "loss": 0.259, "step": 3434 }, { "epoch": 0.06126707808654086, "grad_norm": 0.37457528710365295, "learning_rate": 3.063681769532644e-05, "loss": 0.2998, "step": 3435 }, { "epoch": 0.061284914208254555, "grad_norm": 0.37120434641838074, "learning_rate": 3.064573671066714e-05, "loss": 0.3192, "step": 3436 }, { "epoch": 0.06130275032996825, "grad_norm": 0.3894599378108978, "learning_rate": 3.065465572600785e-05, "loss": 0.2815, "step": 3437 }, { "epoch": 0.061320586451681945, "grad_norm": 0.4034176766872406, "learning_rate": 3.0663574741348556e-05, "loss": 0.3205, "step": 3438 }, { "epoch": 0.06133842257339564, "grad_norm": 0.39361411333084106, "learning_rate": 3.067249375668926e-05, "loss": 0.2544, "step": 3439 }, { "epoch": 0.061356258695109335, "grad_norm": 0.37965863943099976, "learning_rate": 3.068141277202997e-05, "loss": 0.2889, "step": 3440 }, { "epoch": 0.06137409481682303, "grad_norm": 0.5079521536827087, "learning_rate": 3.0690331787370675e-05, "loss": 0.2821, "step": 3441 }, { "epoch": 0.061391930938536725, "grad_norm": 0.3491654396057129, "learning_rate": 3.069925080271138e-05, "loss": 0.2874, "step": 3442 }, { "epoch": 0.06140976706025042, "grad_norm": 0.5786492824554443, "learning_rate": 3.070816981805209e-05, "loss": 0.3637, "step": 3443 }, { "epoch": 0.061427603181964115, "grad_norm": 0.3809356689453125, "learning_rate": 3.0717088833392794e-05, "loss": 0.2616, "step": 3444 }, { "epoch": 0.06144543930367781, "grad_norm": 0.2865469753742218, "learning_rate": 3.07260078487335e-05, "loss": 0.2462, "step": 3445 }, { "epoch": 0.061463275425391505, "grad_norm": 0.2671143710613251, "learning_rate": 3.073492686407421e-05, "loss": 0.2725, "step": 3446 }, { "epoch": 0.0614811115471052, "grad_norm": 0.38948559761047363, "learning_rate": 3.0743845879414914e-05, "loss": 0.3297, "step": 3447 }, { "epoch": 0.061498947668818894, "grad_norm": 0.4456574618816376, "learning_rate": 3.075276489475562e-05, "loss": 0.3657, "step": 3448 }, { "epoch": 0.06151678379053259, "grad_norm": 0.29803067445755005, "learning_rate": 3.0761683910096326e-05, "loss": 0.2696, "step": 3449 }, { "epoch": 0.061534619912246284, "grad_norm": 0.33508118987083435, "learning_rate": 3.077060292543703e-05, "loss": 0.291, "step": 3450 }, { "epoch": 0.06155245603395998, "grad_norm": 0.5954883098602295, "learning_rate": 3.077952194077774e-05, "loss": 0.3106, "step": 3451 }, { "epoch": 0.06157029215567367, "grad_norm": 0.4862188994884491, "learning_rate": 3.0788440956118445e-05, "loss": 0.2664, "step": 3452 }, { "epoch": 0.06158812827738736, "grad_norm": 0.347736656665802, "learning_rate": 3.079735997145915e-05, "loss": 0.3012, "step": 3453 }, { "epoch": 0.06160596439910106, "grad_norm": 0.4035290777683258, "learning_rate": 3.080627898679986e-05, "loss": 0.2769, "step": 3454 }, { "epoch": 0.06162380052081475, "grad_norm": 0.4547941982746124, "learning_rate": 3.0815198002140565e-05, "loss": 0.327, "step": 3455 }, { "epoch": 0.06164163664252845, "grad_norm": 0.34405431151390076, "learning_rate": 3.082411701748127e-05, "loss": 0.3032, "step": 3456 }, { "epoch": 0.06165947276424214, "grad_norm": 0.36265790462493896, "learning_rate": 3.083303603282198e-05, "loss": 0.277, "step": 3457 }, { "epoch": 0.06167730888595584, "grad_norm": 0.41423872113227844, "learning_rate": 3.0841955048162684e-05, "loss": 0.3509, "step": 3458 }, { "epoch": 0.06169514500766953, "grad_norm": 0.3635760247707367, "learning_rate": 3.085087406350339e-05, "loss": 0.1995, "step": 3459 }, { "epoch": 0.061712981129383226, "grad_norm": 0.5014716982841492, "learning_rate": 3.0859793078844097e-05, "loss": 0.2987, "step": 3460 }, { "epoch": 0.06173081725109692, "grad_norm": 0.3897286653518677, "learning_rate": 3.08687120941848e-05, "loss": 0.307, "step": 3461 }, { "epoch": 0.061748653372810616, "grad_norm": 0.3641478717327118, "learning_rate": 3.087763110952551e-05, "loss": 0.3255, "step": 3462 }, { "epoch": 0.06176648949452431, "grad_norm": 0.32254454493522644, "learning_rate": 3.0886550124866216e-05, "loss": 0.2529, "step": 3463 }, { "epoch": 0.061784325616238006, "grad_norm": 0.44392848014831543, "learning_rate": 3.089546914020692e-05, "loss": 0.3061, "step": 3464 }, { "epoch": 0.0618021617379517, "grad_norm": 0.39981019496917725, "learning_rate": 3.090438815554763e-05, "loss": 0.2907, "step": 3465 }, { "epoch": 0.061819997859665396, "grad_norm": 0.43697279691696167, "learning_rate": 3.0913307170888335e-05, "loss": 0.2752, "step": 3466 }, { "epoch": 0.06183783398137909, "grad_norm": 0.33411675691604614, "learning_rate": 3.092222618622904e-05, "loss": 0.2644, "step": 3467 }, { "epoch": 0.061855670103092786, "grad_norm": 0.3456276059150696, "learning_rate": 3.093114520156975e-05, "loss": 0.2696, "step": 3468 }, { "epoch": 0.06187350622480648, "grad_norm": 0.38292551040649414, "learning_rate": 3.0940064216910454e-05, "loss": 0.2888, "step": 3469 }, { "epoch": 0.061891342346520176, "grad_norm": 0.5598361492156982, "learning_rate": 3.094898323225116e-05, "loss": 0.33, "step": 3470 }, { "epoch": 0.06190917846823387, "grad_norm": 0.3957209885120392, "learning_rate": 3.095790224759187e-05, "loss": 0.2901, "step": 3471 }, { "epoch": 0.06192701458994756, "grad_norm": 0.2978615164756775, "learning_rate": 3.096682126293257e-05, "loss": 0.2637, "step": 3472 }, { "epoch": 0.06194485071166125, "grad_norm": 0.29691120982170105, "learning_rate": 3.097574027827328e-05, "loss": 0.2315, "step": 3473 }, { "epoch": 0.06196268683337495, "grad_norm": 0.32011178135871887, "learning_rate": 3.0984659293613986e-05, "loss": 0.2801, "step": 3474 }, { "epoch": 0.06198052295508864, "grad_norm": 0.3960849344730377, "learning_rate": 3.099357830895469e-05, "loss": 0.2955, "step": 3475 }, { "epoch": 0.06199835907680234, "grad_norm": 0.42267149686813354, "learning_rate": 3.10024973242954e-05, "loss": 0.2892, "step": 3476 }, { "epoch": 0.06201619519851603, "grad_norm": 0.4643610715866089, "learning_rate": 3.1011416339636105e-05, "loss": 0.2749, "step": 3477 }, { "epoch": 0.06203403132022973, "grad_norm": 0.3296380043029785, "learning_rate": 3.102033535497681e-05, "loss": 0.283, "step": 3478 }, { "epoch": 0.06205186744194342, "grad_norm": 0.43161651492118835, "learning_rate": 3.102925437031752e-05, "loss": 0.3162, "step": 3479 }, { "epoch": 0.06206970356365712, "grad_norm": 0.31484830379486084, "learning_rate": 3.1038173385658224e-05, "loss": 0.289, "step": 3480 }, { "epoch": 0.06208753968537081, "grad_norm": 0.29794466495513916, "learning_rate": 3.104709240099894e-05, "loss": 0.2864, "step": 3481 }, { "epoch": 0.06210537580708451, "grad_norm": 0.31414487957954407, "learning_rate": 3.105601141633964e-05, "loss": 0.2573, "step": 3482 }, { "epoch": 0.0621232119287982, "grad_norm": 0.40964171290397644, "learning_rate": 3.106493043168034e-05, "loss": 0.3083, "step": 3483 }, { "epoch": 0.0621410480505119, "grad_norm": 0.5551015138626099, "learning_rate": 3.107384944702105e-05, "loss": 0.3108, "step": 3484 }, { "epoch": 0.06215888417222559, "grad_norm": 0.43493130803108215, "learning_rate": 3.1082768462361756e-05, "loss": 0.2971, "step": 3485 }, { "epoch": 0.06217672029393929, "grad_norm": 0.5982611179351807, "learning_rate": 3.109168747770246e-05, "loss": 0.3517, "step": 3486 }, { "epoch": 0.06219455641565298, "grad_norm": 0.45484572649002075, "learning_rate": 3.110060649304317e-05, "loss": 0.3272, "step": 3487 }, { "epoch": 0.06221239253736668, "grad_norm": 0.3246980607509613, "learning_rate": 3.1109525508383875e-05, "loss": 0.2528, "step": 3488 }, { "epoch": 0.06223022865908037, "grad_norm": 0.2784026563167572, "learning_rate": 3.111844452372458e-05, "loss": 0.2535, "step": 3489 }, { "epoch": 0.06224806478079407, "grad_norm": 0.3554953336715698, "learning_rate": 3.112736353906529e-05, "loss": 0.2815, "step": 3490 }, { "epoch": 0.06226590090250776, "grad_norm": 0.3764471113681793, "learning_rate": 3.1136282554405994e-05, "loss": 0.2925, "step": 3491 }, { "epoch": 0.06228373702422145, "grad_norm": 0.3088894188404083, "learning_rate": 3.11452015697467e-05, "loss": 0.2624, "step": 3492 }, { "epoch": 0.062301573145935145, "grad_norm": 0.347292959690094, "learning_rate": 3.115412058508741e-05, "loss": 0.251, "step": 3493 }, { "epoch": 0.06231940926764884, "grad_norm": 0.26496273279190063, "learning_rate": 3.1163039600428113e-05, "loss": 0.2185, "step": 3494 }, { "epoch": 0.062337245389362535, "grad_norm": 0.3232201933860779, "learning_rate": 3.117195861576882e-05, "loss": 0.2905, "step": 3495 }, { "epoch": 0.06235508151107623, "grad_norm": 0.3585107922554016, "learning_rate": 3.1180877631109526e-05, "loss": 0.2946, "step": 3496 }, { "epoch": 0.062372917632789925, "grad_norm": 0.31758782267570496, "learning_rate": 3.118979664645023e-05, "loss": 0.2713, "step": 3497 }, { "epoch": 0.06239075375450362, "grad_norm": 0.33788156509399414, "learning_rate": 3.119871566179094e-05, "loss": 0.2883, "step": 3498 }, { "epoch": 0.062408589876217314, "grad_norm": 0.699823260307312, "learning_rate": 3.1207634677131645e-05, "loss": 0.2454, "step": 3499 }, { "epoch": 0.06242642599793101, "grad_norm": 0.37101513147354126, "learning_rate": 3.121655369247235e-05, "loss": 0.3186, "step": 3500 }, { "epoch": 0.062444262119644704, "grad_norm": 0.3172360360622406, "learning_rate": 3.122547270781306e-05, "loss": 0.3276, "step": 3501 }, { "epoch": 0.0624620982413584, "grad_norm": 0.3600565493106842, "learning_rate": 3.1234391723153764e-05, "loss": 0.2516, "step": 3502 }, { "epoch": 0.062479934363072094, "grad_norm": 0.41846194863319397, "learning_rate": 3.124331073849447e-05, "loss": 0.3684, "step": 3503 }, { "epoch": 0.06249777048478579, "grad_norm": 0.4761829376220703, "learning_rate": 3.125222975383518e-05, "loss": 0.3737, "step": 3504 }, { "epoch": 0.06251560660649948, "grad_norm": 0.36271214485168457, "learning_rate": 3.1261148769175884e-05, "loss": 0.2952, "step": 3505 }, { "epoch": 0.06253344272821318, "grad_norm": 0.37938109040260315, "learning_rate": 3.12700677845166e-05, "loss": 0.196, "step": 3506 }, { "epoch": 0.06255127884992687, "grad_norm": 0.29861417412757874, "learning_rate": 3.1278986799857296e-05, "loss": 0.2606, "step": 3507 }, { "epoch": 0.06256911497164057, "grad_norm": 0.27266961336135864, "learning_rate": 3.1287905815198e-05, "loss": 0.2467, "step": 3508 }, { "epoch": 0.06258695109335426, "grad_norm": 0.450681209564209, "learning_rate": 3.129682483053871e-05, "loss": 0.3139, "step": 3509 }, { "epoch": 0.06260478721506796, "grad_norm": 0.3376198709011078, "learning_rate": 3.1305743845879416e-05, "loss": 0.2381, "step": 3510 }, { "epoch": 0.06262262333678165, "grad_norm": 0.37846675515174866, "learning_rate": 3.131466286122012e-05, "loss": 0.3134, "step": 3511 }, { "epoch": 0.06264045945849535, "grad_norm": 0.4619830250740051, "learning_rate": 3.132358187656083e-05, "loss": 0.2902, "step": 3512 }, { "epoch": 0.06265829558020904, "grad_norm": 0.42697346210479736, "learning_rate": 3.1332500891901535e-05, "loss": 0.3192, "step": 3513 }, { "epoch": 0.06267613170192274, "grad_norm": 0.3949091136455536, "learning_rate": 3.134141990724224e-05, "loss": 0.304, "step": 3514 }, { "epoch": 0.06269396782363643, "grad_norm": 0.37694236636161804, "learning_rate": 3.135033892258295e-05, "loss": 0.3137, "step": 3515 }, { "epoch": 0.06271180394535013, "grad_norm": 0.2839398682117462, "learning_rate": 3.1359257937923654e-05, "loss": 0.2351, "step": 3516 }, { "epoch": 0.06272964006706382, "grad_norm": 0.392132431268692, "learning_rate": 3.136817695326436e-05, "loss": 0.2615, "step": 3517 }, { "epoch": 0.06274747618877752, "grad_norm": 0.38654953241348267, "learning_rate": 3.1377095968605067e-05, "loss": 0.2745, "step": 3518 }, { "epoch": 0.0627653123104912, "grad_norm": 0.44307559728622437, "learning_rate": 3.138601498394577e-05, "loss": 0.3217, "step": 3519 }, { "epoch": 0.06278314843220491, "grad_norm": 0.41050493717193604, "learning_rate": 3.139493399928648e-05, "loss": 0.2646, "step": 3520 }, { "epoch": 0.0628009845539186, "grad_norm": 0.49982941150665283, "learning_rate": 3.1403853014627186e-05, "loss": 0.2802, "step": 3521 }, { "epoch": 0.06281882067563228, "grad_norm": 0.3722202479839325, "learning_rate": 3.141277202996789e-05, "loss": 0.2991, "step": 3522 }, { "epoch": 0.06283665679734599, "grad_norm": 0.31066420674324036, "learning_rate": 3.14216910453086e-05, "loss": 0.2825, "step": 3523 }, { "epoch": 0.06285449291905967, "grad_norm": 0.5403444170951843, "learning_rate": 3.1430610060649305e-05, "loss": 0.3364, "step": 3524 }, { "epoch": 0.06287232904077338, "grad_norm": 0.3716685473918915, "learning_rate": 3.143952907599001e-05, "loss": 0.3082, "step": 3525 }, { "epoch": 0.06289016516248706, "grad_norm": 0.36720171570777893, "learning_rate": 3.144844809133072e-05, "loss": 0.2294, "step": 3526 }, { "epoch": 0.06290800128420077, "grad_norm": 0.4109004735946655, "learning_rate": 3.1457367106671424e-05, "loss": 0.325, "step": 3527 }, { "epoch": 0.06292583740591445, "grad_norm": 0.40544942021369934, "learning_rate": 3.146628612201214e-05, "loss": 0.2929, "step": 3528 }, { "epoch": 0.06294367352762816, "grad_norm": 0.43174639344215393, "learning_rate": 3.147520513735284e-05, "loss": 0.2968, "step": 3529 }, { "epoch": 0.06296150964934184, "grad_norm": 0.42321160435676575, "learning_rate": 3.148412415269354e-05, "loss": 0.3088, "step": 3530 }, { "epoch": 0.06297934577105554, "grad_norm": 0.406230092048645, "learning_rate": 3.1493043168034256e-05, "loss": 0.2873, "step": 3531 }, { "epoch": 0.06299718189276923, "grad_norm": 0.457004189491272, "learning_rate": 3.1501962183374956e-05, "loss": 0.2867, "step": 3532 }, { "epoch": 0.06301501801448293, "grad_norm": 0.34689781069755554, "learning_rate": 3.151088119871566e-05, "loss": 0.3056, "step": 3533 }, { "epoch": 0.06303285413619662, "grad_norm": 0.34096047282218933, "learning_rate": 3.151980021405637e-05, "loss": 0.2842, "step": 3534 }, { "epoch": 0.06305069025791032, "grad_norm": 0.2742103636264801, "learning_rate": 3.1528719229397075e-05, "loss": 0.2458, "step": 3535 }, { "epoch": 0.06306852637962401, "grad_norm": 0.5038346648216248, "learning_rate": 3.153763824473778e-05, "loss": 0.3018, "step": 3536 }, { "epoch": 0.06308636250133771, "grad_norm": 0.5167878270149231, "learning_rate": 3.154655726007849e-05, "loss": 0.296, "step": 3537 }, { "epoch": 0.0631041986230514, "grad_norm": 0.37561002373695374, "learning_rate": 3.1555476275419194e-05, "loss": 0.2719, "step": 3538 }, { "epoch": 0.0631220347447651, "grad_norm": 0.390768438577652, "learning_rate": 3.15643952907599e-05, "loss": 0.2553, "step": 3539 }, { "epoch": 0.06313987086647879, "grad_norm": 0.4308592975139618, "learning_rate": 3.157331430610061e-05, "loss": 0.2845, "step": 3540 }, { "epoch": 0.0631577069881925, "grad_norm": 0.36119189858436584, "learning_rate": 3.158223332144131e-05, "loss": 0.29, "step": 3541 }, { "epoch": 0.06317554310990618, "grad_norm": 0.46786993741989136, "learning_rate": 3.159115233678202e-05, "loss": 0.306, "step": 3542 }, { "epoch": 0.06319337923161987, "grad_norm": 0.3725983202457428, "learning_rate": 3.1600071352122726e-05, "loss": 0.2224, "step": 3543 }, { "epoch": 0.06321121535333357, "grad_norm": 0.30451688170433044, "learning_rate": 3.160899036746343e-05, "loss": 0.2545, "step": 3544 }, { "epoch": 0.06322905147504726, "grad_norm": 0.3579949140548706, "learning_rate": 3.161790938280414e-05, "loss": 0.2581, "step": 3545 }, { "epoch": 0.06324688759676096, "grad_norm": 0.4460763931274414, "learning_rate": 3.1626828398144845e-05, "loss": 0.3024, "step": 3546 }, { "epoch": 0.06326472371847465, "grad_norm": 0.3446510136127472, "learning_rate": 3.163574741348555e-05, "loss": 0.2252, "step": 3547 }, { "epoch": 0.06328255984018835, "grad_norm": 0.43941184878349304, "learning_rate": 3.164466642882626e-05, "loss": 0.2902, "step": 3548 }, { "epoch": 0.06330039596190204, "grad_norm": 0.4447469413280487, "learning_rate": 3.1653585444166964e-05, "loss": 0.312, "step": 3549 }, { "epoch": 0.06331823208361574, "grad_norm": 0.4542385935783386, "learning_rate": 3.166250445950767e-05, "loss": 0.3153, "step": 3550 }, { "epoch": 0.06333606820532943, "grad_norm": 0.4392056167125702, "learning_rate": 3.167142347484838e-05, "loss": 0.3273, "step": 3551 }, { "epoch": 0.06335390432704313, "grad_norm": 0.5697815418243408, "learning_rate": 3.1680342490189084e-05, "loss": 0.3172, "step": 3552 }, { "epoch": 0.06337174044875682, "grad_norm": 0.4335101842880249, "learning_rate": 3.16892615055298e-05, "loss": 0.3071, "step": 3553 }, { "epoch": 0.06338957657047052, "grad_norm": 0.36603108048439026, "learning_rate": 3.1698180520870496e-05, "loss": 0.2853, "step": 3554 }, { "epoch": 0.06340741269218421, "grad_norm": 0.34847649931907654, "learning_rate": 3.17070995362112e-05, "loss": 0.3197, "step": 3555 }, { "epoch": 0.06342524881389791, "grad_norm": 0.3712030351161957, "learning_rate": 3.1716018551551916e-05, "loss": 0.3124, "step": 3556 }, { "epoch": 0.0634430849356116, "grad_norm": 0.3167654573917389, "learning_rate": 3.1724937566892615e-05, "loss": 0.2548, "step": 3557 }, { "epoch": 0.0634609210573253, "grad_norm": 0.27881067991256714, "learning_rate": 3.173385658223332e-05, "loss": 0.2637, "step": 3558 }, { "epoch": 0.06347875717903899, "grad_norm": 0.34619900584220886, "learning_rate": 3.174277559757403e-05, "loss": 0.2414, "step": 3559 }, { "epoch": 0.06349659330075269, "grad_norm": 0.4399753510951996, "learning_rate": 3.1751694612914735e-05, "loss": 0.2797, "step": 3560 }, { "epoch": 0.06351442942246638, "grad_norm": 0.44486740231513977, "learning_rate": 3.176061362825544e-05, "loss": 0.3748, "step": 3561 }, { "epoch": 0.06353226554418007, "grad_norm": 0.364371657371521, "learning_rate": 3.176953264359615e-05, "loss": 0.3002, "step": 3562 }, { "epoch": 0.06355010166589377, "grad_norm": 0.43306100368499756, "learning_rate": 3.1778451658936854e-05, "loss": 0.3043, "step": 3563 }, { "epoch": 0.06356793778760746, "grad_norm": 0.38034042716026306, "learning_rate": 3.178737067427756e-05, "loss": 0.2758, "step": 3564 }, { "epoch": 0.06358577390932116, "grad_norm": 0.37408167123794556, "learning_rate": 3.1796289689618266e-05, "loss": 0.2848, "step": 3565 }, { "epoch": 0.06360361003103485, "grad_norm": 0.41384997963905334, "learning_rate": 3.180520870495897e-05, "loss": 0.3306, "step": 3566 }, { "epoch": 0.06362144615274855, "grad_norm": 0.2774743437767029, "learning_rate": 3.181412772029968e-05, "loss": 0.2325, "step": 3567 }, { "epoch": 0.06363928227446224, "grad_norm": 0.428325891494751, "learning_rate": 3.1823046735640386e-05, "loss": 0.3005, "step": 3568 }, { "epoch": 0.06365711839617594, "grad_norm": 0.36428940296173096, "learning_rate": 3.183196575098109e-05, "loss": 0.2596, "step": 3569 }, { "epoch": 0.06367495451788963, "grad_norm": 0.4848487079143524, "learning_rate": 3.18408847663218e-05, "loss": 0.2533, "step": 3570 }, { "epoch": 0.06369279063960333, "grad_norm": 0.3982924818992615, "learning_rate": 3.1849803781662505e-05, "loss": 0.2648, "step": 3571 }, { "epoch": 0.06371062676131702, "grad_norm": 0.46774011850357056, "learning_rate": 3.185872279700321e-05, "loss": 0.257, "step": 3572 }, { "epoch": 0.06372846288303072, "grad_norm": 0.6224254369735718, "learning_rate": 3.186764181234392e-05, "loss": 0.3214, "step": 3573 }, { "epoch": 0.0637462990047444, "grad_norm": 0.31088629364967346, "learning_rate": 3.1876560827684624e-05, "loss": 0.2476, "step": 3574 }, { "epoch": 0.06376413512645811, "grad_norm": 0.543942391872406, "learning_rate": 3.188547984302533e-05, "loss": 0.2444, "step": 3575 }, { "epoch": 0.0637819712481718, "grad_norm": 0.5251997709274292, "learning_rate": 3.189439885836604e-05, "loss": 0.274, "step": 3576 }, { "epoch": 0.0637998073698855, "grad_norm": 0.32639220356941223, "learning_rate": 3.190331787370674e-05, "loss": 0.2272, "step": 3577 }, { "epoch": 0.06381764349159919, "grad_norm": 0.41618624329566956, "learning_rate": 3.1912236889047456e-05, "loss": 0.2965, "step": 3578 }, { "epoch": 0.06383547961331289, "grad_norm": 0.5315070748329163, "learning_rate": 3.1921155904388156e-05, "loss": 0.3144, "step": 3579 }, { "epoch": 0.06385331573502658, "grad_norm": 0.463633269071579, "learning_rate": 3.193007491972886e-05, "loss": 0.265, "step": 3580 }, { "epoch": 0.06387115185674028, "grad_norm": 0.4081169068813324, "learning_rate": 3.1938993935069575e-05, "loss": 0.2818, "step": 3581 }, { "epoch": 0.06388898797845396, "grad_norm": 0.3589589297771454, "learning_rate": 3.1947912950410275e-05, "loss": 0.2707, "step": 3582 }, { "epoch": 0.06390682410016765, "grad_norm": 0.3879191279411316, "learning_rate": 3.195683196575098e-05, "loss": 0.3095, "step": 3583 }, { "epoch": 0.06392466022188135, "grad_norm": 0.38584789633750916, "learning_rate": 3.1965750981091694e-05, "loss": 0.2721, "step": 3584 }, { "epoch": 0.06394249634359504, "grad_norm": 0.3974877893924713, "learning_rate": 3.1974669996432394e-05, "loss": 0.3207, "step": 3585 }, { "epoch": 0.06396033246530874, "grad_norm": 0.4395434856414795, "learning_rate": 3.19835890117731e-05, "loss": 0.287, "step": 3586 }, { "epoch": 0.06397816858702243, "grad_norm": 0.37194618582725525, "learning_rate": 3.199250802711381e-05, "loss": 0.3245, "step": 3587 }, { "epoch": 0.06399600470873613, "grad_norm": 0.47053906321525574, "learning_rate": 3.200142704245451e-05, "loss": 0.287, "step": 3588 }, { "epoch": 0.06401384083044982, "grad_norm": 0.4476097524166107, "learning_rate": 3.201034605779522e-05, "loss": 0.2847, "step": 3589 }, { "epoch": 0.06403167695216352, "grad_norm": 0.35727089643478394, "learning_rate": 3.2019265073135926e-05, "loss": 0.2516, "step": 3590 }, { "epoch": 0.06404951307387721, "grad_norm": 0.43988677859306335, "learning_rate": 3.202818408847663e-05, "loss": 0.277, "step": 3591 }, { "epoch": 0.06406734919559091, "grad_norm": 0.341366171836853, "learning_rate": 3.203710310381734e-05, "loss": 0.2247, "step": 3592 }, { "epoch": 0.0640851853173046, "grad_norm": 0.32592204213142395, "learning_rate": 3.2046022119158045e-05, "loss": 0.2191, "step": 3593 }, { "epoch": 0.0641030214390183, "grad_norm": 0.41548824310302734, "learning_rate": 3.205494113449875e-05, "loss": 0.2722, "step": 3594 }, { "epoch": 0.06412085756073199, "grad_norm": 0.7157866954803467, "learning_rate": 3.206386014983946e-05, "loss": 0.3582, "step": 3595 }, { "epoch": 0.0641386936824457, "grad_norm": 0.39347320795059204, "learning_rate": 3.2072779165180164e-05, "loss": 0.308, "step": 3596 }, { "epoch": 0.06415652980415938, "grad_norm": 0.4340120553970337, "learning_rate": 3.208169818052087e-05, "loss": 0.2817, "step": 3597 }, { "epoch": 0.06417436592587308, "grad_norm": 0.36449071764945984, "learning_rate": 3.209061719586158e-05, "loss": 0.3306, "step": 3598 }, { "epoch": 0.06419220204758677, "grad_norm": 0.3659517765045166, "learning_rate": 3.209953621120228e-05, "loss": 0.2781, "step": 3599 }, { "epoch": 0.06421003816930047, "grad_norm": 0.3556494116783142, "learning_rate": 3.2108455226542997e-05, "loss": 0.2497, "step": 3600 }, { "epoch": 0.06422787429101416, "grad_norm": 0.3153342008590698, "learning_rate": 3.2117374241883696e-05, "loss": 0.2479, "step": 3601 }, { "epoch": 0.06424571041272786, "grad_norm": 0.3864276707172394, "learning_rate": 3.21262932572244e-05, "loss": 0.2888, "step": 3602 }, { "epoch": 0.06426354653444155, "grad_norm": 0.34588074684143066, "learning_rate": 3.2135212272565116e-05, "loss": 0.2665, "step": 3603 }, { "epoch": 0.06428138265615524, "grad_norm": 0.5801905393600464, "learning_rate": 3.2144131287905815e-05, "loss": 0.355, "step": 3604 }, { "epoch": 0.06429921877786894, "grad_norm": 0.40911412239074707, "learning_rate": 3.215305030324652e-05, "loss": 0.3098, "step": 3605 }, { "epoch": 0.06431705489958263, "grad_norm": 0.37570998072624207, "learning_rate": 3.2161969318587235e-05, "loss": 0.2766, "step": 3606 }, { "epoch": 0.06433489102129633, "grad_norm": 0.30709129571914673, "learning_rate": 3.2170888333927934e-05, "loss": 0.295, "step": 3607 }, { "epoch": 0.06435272714301002, "grad_norm": 0.39083558320999146, "learning_rate": 3.217980734926864e-05, "loss": 0.2984, "step": 3608 }, { "epoch": 0.06437056326472372, "grad_norm": 0.30531859397888184, "learning_rate": 3.2188726364609354e-05, "loss": 0.2722, "step": 3609 }, { "epoch": 0.06438839938643741, "grad_norm": 0.2474583238363266, "learning_rate": 3.2197645379950054e-05, "loss": 0.2155, "step": 3610 }, { "epoch": 0.06440623550815111, "grad_norm": 0.46579623222351074, "learning_rate": 3.220656439529076e-05, "loss": 0.2127, "step": 3611 }, { "epoch": 0.0644240716298648, "grad_norm": 0.4663061201572418, "learning_rate": 3.2215483410631466e-05, "loss": 0.3162, "step": 3612 }, { "epoch": 0.0644419077515785, "grad_norm": 0.4508155584335327, "learning_rate": 3.222440242597217e-05, "loss": 0.3349, "step": 3613 }, { "epoch": 0.06445974387329219, "grad_norm": 0.4759390354156494, "learning_rate": 3.223332144131288e-05, "loss": 0.3283, "step": 3614 }, { "epoch": 0.06447757999500589, "grad_norm": 0.35923439264297485, "learning_rate": 3.2242240456653585e-05, "loss": 0.3003, "step": 3615 }, { "epoch": 0.06449541611671958, "grad_norm": 0.37564563751220703, "learning_rate": 3.225115947199429e-05, "loss": 0.2498, "step": 3616 }, { "epoch": 0.06451325223843328, "grad_norm": 0.5484115481376648, "learning_rate": 3.2260078487335e-05, "loss": 0.245, "step": 3617 }, { "epoch": 0.06453108836014697, "grad_norm": 0.26685571670532227, "learning_rate": 3.2268997502675705e-05, "loss": 0.2475, "step": 3618 }, { "epoch": 0.06454892448186067, "grad_norm": 0.288831502199173, "learning_rate": 3.227791651801641e-05, "loss": 0.2275, "step": 3619 }, { "epoch": 0.06456676060357436, "grad_norm": 0.3965565860271454, "learning_rate": 3.228683553335712e-05, "loss": 0.2997, "step": 3620 }, { "epoch": 0.06458459672528806, "grad_norm": 0.2891729176044464, "learning_rate": 3.2295754548697824e-05, "loss": 0.2471, "step": 3621 }, { "epoch": 0.06460243284700175, "grad_norm": 0.295961856842041, "learning_rate": 3.230467356403853e-05, "loss": 0.2369, "step": 3622 }, { "epoch": 0.06462026896871544, "grad_norm": 0.463190495967865, "learning_rate": 3.2313592579379237e-05, "loss": 0.2722, "step": 3623 }, { "epoch": 0.06463810509042914, "grad_norm": 0.46243876218795776, "learning_rate": 3.232251159471994e-05, "loss": 0.2688, "step": 3624 }, { "epoch": 0.06465594121214283, "grad_norm": 0.3638942837715149, "learning_rate": 3.2331430610060656e-05, "loss": 0.2565, "step": 3625 }, { "epoch": 0.06467377733385653, "grad_norm": 0.3956579267978668, "learning_rate": 3.2340349625401356e-05, "loss": 0.2673, "step": 3626 }, { "epoch": 0.06469161345557022, "grad_norm": 0.33142149448394775, "learning_rate": 3.234926864074206e-05, "loss": 0.227, "step": 3627 }, { "epoch": 0.06470944957728392, "grad_norm": 0.36795392632484436, "learning_rate": 3.2358187656082775e-05, "loss": 0.2773, "step": 3628 }, { "epoch": 0.0647272856989976, "grad_norm": 0.4311741590499878, "learning_rate": 3.2367106671423475e-05, "loss": 0.2779, "step": 3629 }, { "epoch": 0.06474512182071131, "grad_norm": 0.3273148238658905, "learning_rate": 3.237602568676418e-05, "loss": 0.2647, "step": 3630 }, { "epoch": 0.064762957942425, "grad_norm": 0.3899909555912018, "learning_rate": 3.2384944702104894e-05, "loss": 0.2757, "step": 3631 }, { "epoch": 0.0647807940641387, "grad_norm": 0.5008993148803711, "learning_rate": 3.2393863717445594e-05, "loss": 0.2592, "step": 3632 }, { "epoch": 0.06479863018585238, "grad_norm": 0.41284021735191345, "learning_rate": 3.24027827327863e-05, "loss": 0.3079, "step": 3633 }, { "epoch": 0.06481646630756609, "grad_norm": 0.3901015520095825, "learning_rate": 3.2411701748127013e-05, "loss": 0.2754, "step": 3634 }, { "epoch": 0.06483430242927977, "grad_norm": 0.39610543847084045, "learning_rate": 3.242062076346771e-05, "loss": 0.3192, "step": 3635 }, { "epoch": 0.06485213855099348, "grad_norm": 0.42231959104537964, "learning_rate": 3.242953977880842e-05, "loss": 0.3102, "step": 3636 }, { "epoch": 0.06486997467270716, "grad_norm": 0.39817750453948975, "learning_rate": 3.2438458794149126e-05, "loss": 0.2594, "step": 3637 }, { "epoch": 0.06488781079442087, "grad_norm": 0.39597004652023315, "learning_rate": 3.244737780948983e-05, "loss": 0.2865, "step": 3638 }, { "epoch": 0.06490564691613455, "grad_norm": 0.6299710273742676, "learning_rate": 3.245629682483054e-05, "loss": 0.3464, "step": 3639 }, { "epoch": 0.06492348303784826, "grad_norm": 0.2853766977787018, "learning_rate": 3.2465215840171245e-05, "loss": 0.2727, "step": 3640 }, { "epoch": 0.06494131915956194, "grad_norm": 0.33514028787612915, "learning_rate": 3.247413485551195e-05, "loss": 0.3077, "step": 3641 }, { "epoch": 0.06495915528127565, "grad_norm": 0.4610214829444885, "learning_rate": 3.248305387085266e-05, "loss": 0.2716, "step": 3642 }, { "epoch": 0.06497699140298933, "grad_norm": 0.38317734003067017, "learning_rate": 3.2491972886193364e-05, "loss": 0.2327, "step": 3643 }, { "epoch": 0.06499482752470302, "grad_norm": 0.48809385299682617, "learning_rate": 3.250089190153407e-05, "loss": 0.29, "step": 3644 }, { "epoch": 0.06501266364641672, "grad_norm": 0.335397332906723, "learning_rate": 3.250981091687478e-05, "loss": 0.2377, "step": 3645 }, { "epoch": 0.06503049976813041, "grad_norm": 0.39842742681503296, "learning_rate": 3.251872993221548e-05, "loss": 0.2766, "step": 3646 }, { "epoch": 0.06504833588984411, "grad_norm": 0.38238203525543213, "learning_rate": 3.2527648947556196e-05, "loss": 0.2747, "step": 3647 }, { "epoch": 0.0650661720115578, "grad_norm": 0.5524346232414246, "learning_rate": 3.2536567962896896e-05, "loss": 0.3329, "step": 3648 }, { "epoch": 0.0650840081332715, "grad_norm": 0.6394782662391663, "learning_rate": 3.25454869782376e-05, "loss": 0.3151, "step": 3649 }, { "epoch": 0.06510184425498519, "grad_norm": 0.4849854111671448, "learning_rate": 3.2554405993578316e-05, "loss": 0.2973, "step": 3650 }, { "epoch": 0.0651196803766989, "grad_norm": 0.39272361993789673, "learning_rate": 3.2563325008919015e-05, "loss": 0.3105, "step": 3651 }, { "epoch": 0.06513751649841258, "grad_norm": 0.45926278829574585, "learning_rate": 3.257224402425972e-05, "loss": 0.2627, "step": 3652 }, { "epoch": 0.06515535262012628, "grad_norm": 0.3858993351459503, "learning_rate": 3.2581163039600435e-05, "loss": 0.2869, "step": 3653 }, { "epoch": 0.06517318874183997, "grad_norm": 0.5117769241333008, "learning_rate": 3.2590082054941134e-05, "loss": 0.2853, "step": 3654 }, { "epoch": 0.06519102486355367, "grad_norm": 0.3571889400482178, "learning_rate": 3.259900107028184e-05, "loss": 0.2114, "step": 3655 }, { "epoch": 0.06520886098526736, "grad_norm": 0.4695039391517639, "learning_rate": 3.2607920085622554e-05, "loss": 0.2397, "step": 3656 }, { "epoch": 0.06522669710698106, "grad_norm": 0.35199299454689026, "learning_rate": 3.2616839100963253e-05, "loss": 0.2937, "step": 3657 }, { "epoch": 0.06524453322869475, "grad_norm": 0.31346431374549866, "learning_rate": 3.262575811630396e-05, "loss": 0.2429, "step": 3658 }, { "epoch": 0.06526236935040845, "grad_norm": 0.3331112265586853, "learning_rate": 3.263467713164467e-05, "loss": 0.2519, "step": 3659 }, { "epoch": 0.06528020547212214, "grad_norm": 0.4858771562576294, "learning_rate": 3.264359614698537e-05, "loss": 0.2245, "step": 3660 }, { "epoch": 0.06529804159383584, "grad_norm": 0.572431743144989, "learning_rate": 3.265251516232608e-05, "loss": 0.3461, "step": 3661 }, { "epoch": 0.06531587771554953, "grad_norm": 0.39851173758506775, "learning_rate": 3.2661434177666785e-05, "loss": 0.2891, "step": 3662 }, { "epoch": 0.06533371383726322, "grad_norm": 0.48051783442497253, "learning_rate": 3.267035319300749e-05, "loss": 0.3196, "step": 3663 }, { "epoch": 0.06535154995897692, "grad_norm": 0.3494710922241211, "learning_rate": 3.26792722083482e-05, "loss": 0.2234, "step": 3664 }, { "epoch": 0.06536938608069061, "grad_norm": 0.4011964499950409, "learning_rate": 3.2688191223688904e-05, "loss": 0.2579, "step": 3665 }, { "epoch": 0.06538722220240431, "grad_norm": 1.0890759229660034, "learning_rate": 3.269711023902961e-05, "loss": 0.2564, "step": 3666 }, { "epoch": 0.065405058324118, "grad_norm": 0.4458734691143036, "learning_rate": 3.270602925437032e-05, "loss": 0.2897, "step": 3667 }, { "epoch": 0.0654228944458317, "grad_norm": 0.36088064312934875, "learning_rate": 3.2714948269711024e-05, "loss": 0.2671, "step": 3668 }, { "epoch": 0.06544073056754539, "grad_norm": 0.33298173546791077, "learning_rate": 3.272386728505173e-05, "loss": 0.2884, "step": 3669 }, { "epoch": 0.06545856668925909, "grad_norm": 0.3666765093803406, "learning_rate": 3.2732786300392436e-05, "loss": 0.234, "step": 3670 }, { "epoch": 0.06547640281097278, "grad_norm": 0.41905704140663147, "learning_rate": 3.274170531573314e-05, "loss": 0.2734, "step": 3671 }, { "epoch": 0.06549423893268648, "grad_norm": 0.3498532772064209, "learning_rate": 3.2750624331073856e-05, "loss": 0.2335, "step": 3672 }, { "epoch": 0.06551207505440017, "grad_norm": 0.2984253764152527, "learning_rate": 3.2759543346414556e-05, "loss": 0.2417, "step": 3673 }, { "epoch": 0.06552991117611387, "grad_norm": 0.3009212911128998, "learning_rate": 3.276846236175526e-05, "loss": 0.2879, "step": 3674 }, { "epoch": 0.06554774729782756, "grad_norm": 0.4682937562465668, "learning_rate": 3.2777381377095975e-05, "loss": 0.3186, "step": 3675 }, { "epoch": 0.06556558341954126, "grad_norm": 0.3837187886238098, "learning_rate": 3.2786300392436675e-05, "loss": 0.288, "step": 3676 }, { "epoch": 0.06558341954125495, "grad_norm": 0.4935886561870575, "learning_rate": 3.279521940777738e-05, "loss": 0.271, "step": 3677 }, { "epoch": 0.06560125566296865, "grad_norm": 0.5601122975349426, "learning_rate": 3.2804138423118094e-05, "loss": 0.3151, "step": 3678 }, { "epoch": 0.06561909178468234, "grad_norm": 0.40890321135520935, "learning_rate": 3.2813057438458794e-05, "loss": 0.2328, "step": 3679 }, { "epoch": 0.06563692790639604, "grad_norm": 0.35992440581321716, "learning_rate": 3.28219764537995e-05, "loss": 0.2751, "step": 3680 }, { "epoch": 0.06565476402810973, "grad_norm": 0.4070022702217102, "learning_rate": 3.283089546914021e-05, "loss": 0.2957, "step": 3681 }, { "epoch": 0.06567260014982343, "grad_norm": 0.3672012686729431, "learning_rate": 3.283981448448091e-05, "loss": 0.2714, "step": 3682 }, { "epoch": 0.06569043627153712, "grad_norm": 0.37558531761169434, "learning_rate": 3.284873349982162e-05, "loss": 0.2856, "step": 3683 }, { "epoch": 0.0657082723932508, "grad_norm": 0.3072632849216461, "learning_rate": 3.285765251516233e-05, "loss": 0.2335, "step": 3684 }, { "epoch": 0.0657261085149645, "grad_norm": 0.37256982922554016, "learning_rate": 3.286657153050303e-05, "loss": 0.289, "step": 3685 }, { "epoch": 0.0657439446366782, "grad_norm": 0.48035314679145813, "learning_rate": 3.287549054584374e-05, "loss": 0.2732, "step": 3686 }, { "epoch": 0.0657617807583919, "grad_norm": 0.4267140328884125, "learning_rate": 3.288440956118445e-05, "loss": 0.2876, "step": 3687 }, { "epoch": 0.06577961688010558, "grad_norm": 0.4718540906906128, "learning_rate": 3.289332857652515e-05, "loss": 0.2523, "step": 3688 }, { "epoch": 0.06579745300181929, "grad_norm": 0.457587867975235, "learning_rate": 3.290224759186586e-05, "loss": 0.2334, "step": 3689 }, { "epoch": 0.06581528912353297, "grad_norm": 0.4299467206001282, "learning_rate": 3.2911166607206564e-05, "loss": 0.3089, "step": 3690 }, { "epoch": 0.06583312524524668, "grad_norm": 0.5274016857147217, "learning_rate": 3.292008562254727e-05, "loss": 0.3176, "step": 3691 }, { "epoch": 0.06585096136696036, "grad_norm": 0.4209674298763275, "learning_rate": 3.292900463788798e-05, "loss": 0.2692, "step": 3692 }, { "epoch": 0.06586879748867407, "grad_norm": 0.47414302825927734, "learning_rate": 3.293792365322868e-05, "loss": 0.2704, "step": 3693 }, { "epoch": 0.06588663361038775, "grad_norm": 0.46226176619529724, "learning_rate": 3.2946842668569396e-05, "loss": 0.3259, "step": 3694 }, { "epoch": 0.06590446973210146, "grad_norm": 0.30368199944496155, "learning_rate": 3.2955761683910096e-05, "loss": 0.2585, "step": 3695 }, { "epoch": 0.06592230585381514, "grad_norm": 0.44019100069999695, "learning_rate": 3.29646806992508e-05, "loss": 0.306, "step": 3696 }, { "epoch": 0.06594014197552885, "grad_norm": 0.2963986098766327, "learning_rate": 3.2973599714591515e-05, "loss": 0.2265, "step": 3697 }, { "epoch": 0.06595797809724253, "grad_norm": 0.3986966609954834, "learning_rate": 3.2982518729932215e-05, "loss": 0.2693, "step": 3698 }, { "epoch": 0.06597581421895624, "grad_norm": 0.4319208562374115, "learning_rate": 3.299143774527292e-05, "loss": 0.2329, "step": 3699 }, { "epoch": 0.06599365034066992, "grad_norm": 0.395369291305542, "learning_rate": 3.3000356760613635e-05, "loss": 0.3071, "step": 3700 }, { "epoch": 0.06601148646238363, "grad_norm": 0.333501398563385, "learning_rate": 3.3009275775954334e-05, "loss": 0.198, "step": 3701 }, { "epoch": 0.06602932258409731, "grad_norm": 0.36454272270202637, "learning_rate": 3.301819479129504e-05, "loss": 0.2753, "step": 3702 }, { "epoch": 0.066047158705811, "grad_norm": 0.3821306526660919, "learning_rate": 3.3027113806635754e-05, "loss": 0.3269, "step": 3703 }, { "epoch": 0.0660649948275247, "grad_norm": 0.45392683148384094, "learning_rate": 3.303603282197645e-05, "loss": 0.2712, "step": 3704 }, { "epoch": 0.06608283094923839, "grad_norm": 0.34545281529426575, "learning_rate": 3.304495183731716e-05, "loss": 0.2623, "step": 3705 }, { "epoch": 0.0661006670709521, "grad_norm": 0.45389291644096375, "learning_rate": 3.305387085265787e-05, "loss": 0.3121, "step": 3706 }, { "epoch": 0.06611850319266578, "grad_norm": 0.4296933710575104, "learning_rate": 3.306278986799857e-05, "loss": 0.2881, "step": 3707 }, { "epoch": 0.06613633931437948, "grad_norm": 0.3538849651813507, "learning_rate": 3.307170888333928e-05, "loss": 0.2602, "step": 3708 }, { "epoch": 0.06615417543609317, "grad_norm": 0.364667147397995, "learning_rate": 3.308062789867999e-05, "loss": 0.2652, "step": 3709 }, { "epoch": 0.06617201155780687, "grad_norm": 0.37340229749679565, "learning_rate": 3.308954691402069e-05, "loss": 0.2335, "step": 3710 }, { "epoch": 0.06618984767952056, "grad_norm": 0.4735252857208252, "learning_rate": 3.30984659293614e-05, "loss": 0.2283, "step": 3711 }, { "epoch": 0.06620768380123426, "grad_norm": 0.47776302695274353, "learning_rate": 3.310738494470211e-05, "loss": 0.2659, "step": 3712 }, { "epoch": 0.06622551992294795, "grad_norm": 0.32557588815689087, "learning_rate": 3.311630396004281e-05, "loss": 0.2596, "step": 3713 }, { "epoch": 0.06624335604466165, "grad_norm": 0.3454476296901703, "learning_rate": 3.312522297538352e-05, "loss": 0.3095, "step": 3714 }, { "epoch": 0.06626119216637534, "grad_norm": 0.30635809898376465, "learning_rate": 3.3134141990724224e-05, "loss": 0.2544, "step": 3715 }, { "epoch": 0.06627902828808904, "grad_norm": 0.2915476858615875, "learning_rate": 3.314306100606493e-05, "loss": 0.2495, "step": 3716 }, { "epoch": 0.06629686440980273, "grad_norm": 0.3952147662639618, "learning_rate": 3.3151980021405636e-05, "loss": 0.2576, "step": 3717 }, { "epoch": 0.06631470053151643, "grad_norm": 0.3233875632286072, "learning_rate": 3.316089903674634e-05, "loss": 0.293, "step": 3718 }, { "epoch": 0.06633253665323012, "grad_norm": 0.4111897945404053, "learning_rate": 3.3169818052087056e-05, "loss": 0.289, "step": 3719 }, { "epoch": 0.06635037277494382, "grad_norm": 0.4717160761356354, "learning_rate": 3.3178737067427755e-05, "loss": 0.3012, "step": 3720 }, { "epoch": 0.06636820889665751, "grad_norm": 0.4196506142616272, "learning_rate": 3.318765608276846e-05, "loss": 0.3043, "step": 3721 }, { "epoch": 0.06638604501837121, "grad_norm": 0.3230936825275421, "learning_rate": 3.3196575098109175e-05, "loss": 0.2729, "step": 3722 }, { "epoch": 0.0664038811400849, "grad_norm": 0.3927689790725708, "learning_rate": 3.3205494113449875e-05, "loss": 0.313, "step": 3723 }, { "epoch": 0.06642171726179859, "grad_norm": 0.34997114539146423, "learning_rate": 3.321441312879058e-05, "loss": 0.2494, "step": 3724 }, { "epoch": 0.06643955338351229, "grad_norm": 0.5233971476554871, "learning_rate": 3.3223332144131294e-05, "loss": 0.3109, "step": 3725 }, { "epoch": 0.06645738950522598, "grad_norm": 0.40648800134658813, "learning_rate": 3.3232251159471994e-05, "loss": 0.261, "step": 3726 }, { "epoch": 0.06647522562693968, "grad_norm": 0.3055565357208252, "learning_rate": 3.32411701748127e-05, "loss": 0.2286, "step": 3727 }, { "epoch": 0.06649306174865337, "grad_norm": 0.3339119255542755, "learning_rate": 3.325008919015341e-05, "loss": 0.2776, "step": 3728 }, { "epoch": 0.06651089787036707, "grad_norm": 0.3152942955493927, "learning_rate": 3.325900820549411e-05, "loss": 0.2692, "step": 3729 }, { "epoch": 0.06652873399208076, "grad_norm": 0.3575889766216278, "learning_rate": 3.326792722083482e-05, "loss": 0.2838, "step": 3730 }, { "epoch": 0.06654657011379446, "grad_norm": 0.3464291989803314, "learning_rate": 3.327684623617553e-05, "loss": 0.259, "step": 3731 }, { "epoch": 0.06656440623550815, "grad_norm": 0.41277116537094116, "learning_rate": 3.328576525151623e-05, "loss": 0.3467, "step": 3732 }, { "epoch": 0.06658224235722185, "grad_norm": 0.39770737290382385, "learning_rate": 3.329468426685694e-05, "loss": 0.2349, "step": 3733 }, { "epoch": 0.06660007847893554, "grad_norm": 0.488250195980072, "learning_rate": 3.330360328219765e-05, "loss": 0.2937, "step": 3734 }, { "epoch": 0.06661791460064924, "grad_norm": 0.3958609402179718, "learning_rate": 3.331252229753835e-05, "loss": 0.264, "step": 3735 }, { "epoch": 0.06663575072236293, "grad_norm": 0.3692286014556885, "learning_rate": 3.332144131287906e-05, "loss": 0.2635, "step": 3736 }, { "epoch": 0.06665358684407663, "grad_norm": 0.35026273131370544, "learning_rate": 3.333036032821977e-05, "loss": 0.2521, "step": 3737 }, { "epoch": 0.06667142296579032, "grad_norm": 0.3904498517513275, "learning_rate": 3.333927934356047e-05, "loss": 0.2839, "step": 3738 }, { "epoch": 0.06668925908750402, "grad_norm": 0.5343726873397827, "learning_rate": 3.334819835890118e-05, "loss": 0.3157, "step": 3739 }, { "epoch": 0.0667070952092177, "grad_norm": 0.33324769139289856, "learning_rate": 3.335711737424188e-05, "loss": 0.312, "step": 3740 }, { "epoch": 0.06672493133093141, "grad_norm": 0.28762882947921753, "learning_rate": 3.336603638958259e-05, "loss": 0.2498, "step": 3741 }, { "epoch": 0.0667427674526451, "grad_norm": 0.3631058633327484, "learning_rate": 3.3374955404923296e-05, "loss": 0.2942, "step": 3742 }, { "epoch": 0.0667606035743588, "grad_norm": 0.36567845940589905, "learning_rate": 3.3383874420264e-05, "loss": 0.2842, "step": 3743 }, { "epoch": 0.06677843969607249, "grad_norm": 0.43224385380744934, "learning_rate": 3.3392793435604715e-05, "loss": 0.2397, "step": 3744 }, { "epoch": 0.06679627581778617, "grad_norm": 0.551596999168396, "learning_rate": 3.3401712450945415e-05, "loss": 0.2957, "step": 3745 }, { "epoch": 0.06681411193949988, "grad_norm": 0.40593668818473816, "learning_rate": 3.341063146628612e-05, "loss": 0.3221, "step": 3746 }, { "epoch": 0.06683194806121356, "grad_norm": 0.3912992477416992, "learning_rate": 3.3419550481626834e-05, "loss": 0.2849, "step": 3747 }, { "epoch": 0.06684978418292727, "grad_norm": 0.45108121633529663, "learning_rate": 3.3428469496967534e-05, "loss": 0.3243, "step": 3748 }, { "epoch": 0.06686762030464095, "grad_norm": 0.355159729719162, "learning_rate": 3.343738851230824e-05, "loss": 0.319, "step": 3749 }, { "epoch": 0.06688545642635466, "grad_norm": 0.3352654278278351, "learning_rate": 3.3446307527648954e-05, "loss": 0.2296, "step": 3750 }, { "epoch": 0.06690329254806834, "grad_norm": 0.38211750984191895, "learning_rate": 3.345522654298965e-05, "loss": 0.275, "step": 3751 }, { "epoch": 0.06692112866978205, "grad_norm": 0.35876527428627014, "learning_rate": 3.346414555833036e-05, "loss": 0.32, "step": 3752 }, { "epoch": 0.06693896479149573, "grad_norm": 0.3367149829864502, "learning_rate": 3.347306457367107e-05, "loss": 0.2959, "step": 3753 }, { "epoch": 0.06695680091320944, "grad_norm": 0.41035372018814087, "learning_rate": 3.348198358901177e-05, "loss": 0.3198, "step": 3754 }, { "epoch": 0.06697463703492312, "grad_norm": 0.45888209342956543, "learning_rate": 3.349090260435248e-05, "loss": 0.366, "step": 3755 }, { "epoch": 0.06699247315663683, "grad_norm": 0.32987546920776367, "learning_rate": 3.349982161969319e-05, "loss": 0.2587, "step": 3756 }, { "epoch": 0.06701030927835051, "grad_norm": 0.4455758333206177, "learning_rate": 3.350874063503389e-05, "loss": 0.2873, "step": 3757 }, { "epoch": 0.06702814540006422, "grad_norm": 0.3964990973472595, "learning_rate": 3.35176596503746e-05, "loss": 0.3251, "step": 3758 }, { "epoch": 0.0670459815217779, "grad_norm": 0.3634512722492218, "learning_rate": 3.352657866571531e-05, "loss": 0.3136, "step": 3759 }, { "epoch": 0.0670638176434916, "grad_norm": 0.469315767288208, "learning_rate": 3.353549768105601e-05, "loss": 0.2852, "step": 3760 }, { "epoch": 0.06708165376520529, "grad_norm": 0.3811940550804138, "learning_rate": 3.354441669639672e-05, "loss": 0.2642, "step": 3761 }, { "epoch": 0.067099489886919, "grad_norm": 0.38483431935310364, "learning_rate": 3.355333571173743e-05, "loss": 0.2834, "step": 3762 }, { "epoch": 0.06711732600863268, "grad_norm": 0.46264129877090454, "learning_rate": 3.356225472707813e-05, "loss": 0.2831, "step": 3763 }, { "epoch": 0.06713516213034637, "grad_norm": 0.6043981909751892, "learning_rate": 3.3571173742418836e-05, "loss": 0.2669, "step": 3764 }, { "epoch": 0.06715299825206007, "grad_norm": 0.4582202732563019, "learning_rate": 3.358009275775954e-05, "loss": 0.3498, "step": 3765 }, { "epoch": 0.06717083437377376, "grad_norm": 0.41268259286880493, "learning_rate": 3.3589011773100256e-05, "loss": 0.3132, "step": 3766 }, { "epoch": 0.06718867049548746, "grad_norm": 0.43114957213401794, "learning_rate": 3.3597930788440955e-05, "loss": 0.2888, "step": 3767 }, { "epoch": 0.06720650661720115, "grad_norm": 0.45636048913002014, "learning_rate": 3.360684980378166e-05, "loss": 0.3206, "step": 3768 }, { "epoch": 0.06722434273891485, "grad_norm": 0.349748820066452, "learning_rate": 3.3615768819122375e-05, "loss": 0.3148, "step": 3769 }, { "epoch": 0.06724217886062854, "grad_norm": 0.39590850472450256, "learning_rate": 3.3624687834463074e-05, "loss": 0.2405, "step": 3770 }, { "epoch": 0.06726001498234224, "grad_norm": 0.36425086855888367, "learning_rate": 3.363360684980378e-05, "loss": 0.3189, "step": 3771 }, { "epoch": 0.06727785110405593, "grad_norm": 0.39826929569244385, "learning_rate": 3.3642525865144494e-05, "loss": 0.2915, "step": 3772 }, { "epoch": 0.06729568722576963, "grad_norm": 0.4167521297931671, "learning_rate": 3.3651444880485194e-05, "loss": 0.2234, "step": 3773 }, { "epoch": 0.06731352334748332, "grad_norm": 0.3726811408996582, "learning_rate": 3.36603638958259e-05, "loss": 0.2925, "step": 3774 }, { "epoch": 0.06733135946919702, "grad_norm": 0.40640175342559814, "learning_rate": 3.366928291116661e-05, "loss": 0.3425, "step": 3775 }, { "epoch": 0.06734919559091071, "grad_norm": 0.39370375871658325, "learning_rate": 3.367820192650731e-05, "loss": 0.2884, "step": 3776 }, { "epoch": 0.06736703171262441, "grad_norm": 0.4084145724773407, "learning_rate": 3.368712094184802e-05, "loss": 0.2457, "step": 3777 }, { "epoch": 0.0673848678343381, "grad_norm": 0.3812226355075836, "learning_rate": 3.369603995718873e-05, "loss": 0.2311, "step": 3778 }, { "epoch": 0.0674027039560518, "grad_norm": 0.3929128646850586, "learning_rate": 3.370495897252943e-05, "loss": 0.3168, "step": 3779 }, { "epoch": 0.06742054007776549, "grad_norm": 0.47142481803894043, "learning_rate": 3.371387798787014e-05, "loss": 0.3074, "step": 3780 }, { "epoch": 0.06743837619947919, "grad_norm": 0.45046961307525635, "learning_rate": 3.372279700321085e-05, "loss": 0.2902, "step": 3781 }, { "epoch": 0.06745621232119288, "grad_norm": 0.2943269908428192, "learning_rate": 3.373171601855155e-05, "loss": 0.3122, "step": 3782 }, { "epoch": 0.06747404844290658, "grad_norm": 0.33599725365638733, "learning_rate": 3.374063503389226e-05, "loss": 0.2657, "step": 3783 }, { "epoch": 0.06749188456462027, "grad_norm": 0.3057636022567749, "learning_rate": 3.374955404923297e-05, "loss": 0.2462, "step": 3784 }, { "epoch": 0.06750972068633396, "grad_norm": 0.46105214953422546, "learning_rate": 3.375847306457367e-05, "loss": 0.2781, "step": 3785 }, { "epoch": 0.06752755680804766, "grad_norm": 0.31299689412117004, "learning_rate": 3.3767392079914377e-05, "loss": 0.2442, "step": 3786 }, { "epoch": 0.06754539292976135, "grad_norm": 0.2745397090911865, "learning_rate": 3.377631109525509e-05, "loss": 0.2368, "step": 3787 }, { "epoch": 0.06756322905147505, "grad_norm": 0.3316921889781952, "learning_rate": 3.378523011059579e-05, "loss": 0.2968, "step": 3788 }, { "epoch": 0.06758106517318874, "grad_norm": 0.4789838492870331, "learning_rate": 3.3794149125936496e-05, "loss": 0.298, "step": 3789 }, { "epoch": 0.06759890129490244, "grad_norm": 0.42464661598205566, "learning_rate": 3.380306814127721e-05, "loss": 0.2966, "step": 3790 }, { "epoch": 0.06761673741661613, "grad_norm": 0.487000972032547, "learning_rate": 3.3811987156617915e-05, "loss": 0.3135, "step": 3791 }, { "epoch": 0.06763457353832983, "grad_norm": 0.5048577785491943, "learning_rate": 3.3820906171958615e-05, "loss": 0.2712, "step": 3792 }, { "epoch": 0.06765240966004352, "grad_norm": 0.4884682893753052, "learning_rate": 3.382982518729932e-05, "loss": 0.338, "step": 3793 }, { "epoch": 0.06767024578175722, "grad_norm": 0.496457576751709, "learning_rate": 3.3838744202640034e-05, "loss": 0.3332, "step": 3794 }, { "epoch": 0.0676880819034709, "grad_norm": 0.41949984431266785, "learning_rate": 3.3847663217980734e-05, "loss": 0.3072, "step": 3795 }, { "epoch": 0.06770591802518461, "grad_norm": 0.36391133069992065, "learning_rate": 3.385658223332144e-05, "loss": 0.3448, "step": 3796 }, { "epoch": 0.0677237541468983, "grad_norm": 0.39628341794013977, "learning_rate": 3.3865501248662153e-05, "loss": 0.2987, "step": 3797 }, { "epoch": 0.067741590268612, "grad_norm": 0.38303142786026, "learning_rate": 3.387442026400285e-05, "loss": 0.3096, "step": 3798 }, { "epoch": 0.06775942639032569, "grad_norm": 0.34878623485565186, "learning_rate": 3.388333927934356e-05, "loss": 0.2538, "step": 3799 }, { "epoch": 0.06777726251203939, "grad_norm": 0.5400440096855164, "learning_rate": 3.389225829468427e-05, "loss": 0.279, "step": 3800 }, { "epoch": 0.06779509863375308, "grad_norm": 0.356579452753067, "learning_rate": 3.390117731002497e-05, "loss": 0.2814, "step": 3801 }, { "epoch": 0.06781293475546678, "grad_norm": 0.4017574191093445, "learning_rate": 3.391009632536568e-05, "loss": 0.2788, "step": 3802 }, { "epoch": 0.06783077087718047, "grad_norm": 0.44138064980506897, "learning_rate": 3.391901534070639e-05, "loss": 0.2787, "step": 3803 }, { "epoch": 0.06784860699889415, "grad_norm": 0.31653985381126404, "learning_rate": 3.392793435604709e-05, "loss": 0.2499, "step": 3804 }, { "epoch": 0.06786644312060786, "grad_norm": 0.33393359184265137, "learning_rate": 3.39368533713878e-05, "loss": 0.2892, "step": 3805 }, { "epoch": 0.06788427924232154, "grad_norm": 0.3687049150466919, "learning_rate": 3.394577238672851e-05, "loss": 0.3218, "step": 3806 }, { "epoch": 0.06790211536403525, "grad_norm": 0.32453712821006775, "learning_rate": 3.395469140206921e-05, "loss": 0.3042, "step": 3807 }, { "epoch": 0.06791995148574893, "grad_norm": 0.3493601083755493, "learning_rate": 3.396361041740992e-05, "loss": 0.3096, "step": 3808 }, { "epoch": 0.06793778760746264, "grad_norm": 0.42554107308387756, "learning_rate": 3.397252943275063e-05, "loss": 0.344, "step": 3809 }, { "epoch": 0.06795562372917632, "grad_norm": 0.4338083565235138, "learning_rate": 3.398144844809133e-05, "loss": 0.2956, "step": 3810 }, { "epoch": 0.06797345985089003, "grad_norm": 0.3571920096874237, "learning_rate": 3.3990367463432036e-05, "loss": 0.3157, "step": 3811 }, { "epoch": 0.06799129597260371, "grad_norm": 0.3709001839160919, "learning_rate": 3.399928647877275e-05, "loss": 0.2741, "step": 3812 }, { "epoch": 0.06800913209431741, "grad_norm": 0.5087075233459473, "learning_rate": 3.4008205494113456e-05, "loss": 0.2707, "step": 3813 }, { "epoch": 0.0680269682160311, "grad_norm": 0.5026617050170898, "learning_rate": 3.4017124509454155e-05, "loss": 0.3218, "step": 3814 }, { "epoch": 0.0680448043377448, "grad_norm": 0.6777828931808472, "learning_rate": 3.402604352479487e-05, "loss": 0.2865, "step": 3815 }, { "epoch": 0.06806264045945849, "grad_norm": 0.3735826313495636, "learning_rate": 3.4034962540135575e-05, "loss": 0.2753, "step": 3816 }, { "epoch": 0.0680804765811722, "grad_norm": 0.4043390154838562, "learning_rate": 3.4043881555476274e-05, "loss": 0.2149, "step": 3817 }, { "epoch": 0.06809831270288588, "grad_norm": 0.3440175950527191, "learning_rate": 3.405280057081698e-05, "loss": 0.2406, "step": 3818 }, { "epoch": 0.06811614882459958, "grad_norm": 0.3285858631134033, "learning_rate": 3.4061719586157694e-05, "loss": 0.2485, "step": 3819 }, { "epoch": 0.06813398494631327, "grad_norm": 1.160111427307129, "learning_rate": 3.4070638601498393e-05, "loss": 0.3213, "step": 3820 }, { "epoch": 0.06815182106802697, "grad_norm": 0.3132086396217346, "learning_rate": 3.40795576168391e-05, "loss": 0.2925, "step": 3821 }, { "epoch": 0.06816965718974066, "grad_norm": 0.28393521904945374, "learning_rate": 3.408847663217981e-05, "loss": 0.227, "step": 3822 }, { "epoch": 0.06818749331145436, "grad_norm": 0.32588687539100647, "learning_rate": 3.409739564752051e-05, "loss": 0.2418, "step": 3823 }, { "epoch": 0.06820532943316805, "grad_norm": 0.7148998379707336, "learning_rate": 3.410631466286122e-05, "loss": 0.2946, "step": 3824 }, { "epoch": 0.06822316555488174, "grad_norm": 0.5567273497581482, "learning_rate": 3.411523367820193e-05, "loss": 0.294, "step": 3825 }, { "epoch": 0.06824100167659544, "grad_norm": 0.6443267464637756, "learning_rate": 3.412415269354263e-05, "loss": 0.3973, "step": 3826 }, { "epoch": 0.06825883779830913, "grad_norm": 0.3435632586479187, "learning_rate": 3.413307170888334e-05, "loss": 0.2265, "step": 3827 }, { "epoch": 0.06827667392002283, "grad_norm": 0.5158803462982178, "learning_rate": 3.414199072422405e-05, "loss": 0.2797, "step": 3828 }, { "epoch": 0.06829451004173652, "grad_norm": 0.3423105478286743, "learning_rate": 3.415090973956475e-05, "loss": 0.2347, "step": 3829 }, { "epoch": 0.06831234616345022, "grad_norm": 0.3610781729221344, "learning_rate": 3.415982875490546e-05, "loss": 0.2695, "step": 3830 }, { "epoch": 0.06833018228516391, "grad_norm": 0.44379502534866333, "learning_rate": 3.416874777024617e-05, "loss": 0.3214, "step": 3831 }, { "epoch": 0.06834801840687761, "grad_norm": 0.4114704132080078, "learning_rate": 3.417766678558687e-05, "loss": 0.3092, "step": 3832 }, { "epoch": 0.0683658545285913, "grad_norm": 0.4542233943939209, "learning_rate": 3.4186585800927576e-05, "loss": 0.3203, "step": 3833 }, { "epoch": 0.068383690650305, "grad_norm": 0.36704519391059875, "learning_rate": 3.419550481626829e-05, "loss": 0.3023, "step": 3834 }, { "epoch": 0.06840152677201869, "grad_norm": 0.46070805191993713, "learning_rate": 3.420442383160899e-05, "loss": 0.3288, "step": 3835 }, { "epoch": 0.06841936289373239, "grad_norm": 0.41485700011253357, "learning_rate": 3.4213342846949696e-05, "loss": 0.2391, "step": 3836 }, { "epoch": 0.06843719901544608, "grad_norm": 0.38974466919898987, "learning_rate": 3.422226186229041e-05, "loss": 0.2638, "step": 3837 }, { "epoch": 0.06845503513715978, "grad_norm": 0.39768311381340027, "learning_rate": 3.4231180877631115e-05, "loss": 0.294, "step": 3838 }, { "epoch": 0.06847287125887347, "grad_norm": 0.47503039240837097, "learning_rate": 3.4240099892971815e-05, "loss": 0.2459, "step": 3839 }, { "epoch": 0.06849070738058717, "grad_norm": 0.3560754358768463, "learning_rate": 3.424901890831253e-05, "loss": 0.2705, "step": 3840 }, { "epoch": 0.06850854350230086, "grad_norm": 0.47175276279449463, "learning_rate": 3.4257937923653234e-05, "loss": 0.3157, "step": 3841 }, { "epoch": 0.06852637962401456, "grad_norm": 0.3012832999229431, "learning_rate": 3.4266856938993934e-05, "loss": 0.2555, "step": 3842 }, { "epoch": 0.06854421574572825, "grad_norm": 0.33884280920028687, "learning_rate": 3.427577595433464e-05, "loss": 0.3307, "step": 3843 }, { "epoch": 0.06856205186744194, "grad_norm": 0.3281342387199402, "learning_rate": 3.428469496967535e-05, "loss": 0.2783, "step": 3844 }, { "epoch": 0.06857988798915564, "grad_norm": 0.3344869315624237, "learning_rate": 3.429361398501605e-05, "loss": 0.2581, "step": 3845 }, { "epoch": 0.06859772411086933, "grad_norm": 0.44027483463287354, "learning_rate": 3.430253300035676e-05, "loss": 0.2441, "step": 3846 }, { "epoch": 0.06861556023258303, "grad_norm": 0.450890451669693, "learning_rate": 3.431145201569747e-05, "loss": 0.2752, "step": 3847 }, { "epoch": 0.06863339635429672, "grad_norm": 0.36177682876586914, "learning_rate": 3.432037103103817e-05, "loss": 0.3086, "step": 3848 }, { "epoch": 0.06865123247601042, "grad_norm": 0.32734236121177673, "learning_rate": 3.432929004637888e-05, "loss": 0.2784, "step": 3849 }, { "epoch": 0.0686690685977241, "grad_norm": 0.39323222637176514, "learning_rate": 3.433820906171959e-05, "loss": 0.2962, "step": 3850 }, { "epoch": 0.06868690471943781, "grad_norm": 0.3322768807411194, "learning_rate": 3.434712807706029e-05, "loss": 0.2418, "step": 3851 }, { "epoch": 0.0687047408411515, "grad_norm": 0.4296260178089142, "learning_rate": 3.4356047092401e-05, "loss": 0.3382, "step": 3852 }, { "epoch": 0.0687225769628652, "grad_norm": 0.3480355739593506, "learning_rate": 3.436496610774171e-05, "loss": 0.3022, "step": 3853 }, { "epoch": 0.06874041308457889, "grad_norm": 0.30536550283432007, "learning_rate": 3.437388512308241e-05, "loss": 0.2667, "step": 3854 }, { "epoch": 0.06875824920629259, "grad_norm": 0.3209473788738251, "learning_rate": 3.438280413842312e-05, "loss": 0.2236, "step": 3855 }, { "epoch": 0.06877608532800628, "grad_norm": 0.4037458002567291, "learning_rate": 3.439172315376383e-05, "loss": 0.2622, "step": 3856 }, { "epoch": 0.06879392144971998, "grad_norm": 0.41891300678253174, "learning_rate": 3.440064216910453e-05, "loss": 0.3025, "step": 3857 }, { "epoch": 0.06881175757143367, "grad_norm": 0.5626822113990784, "learning_rate": 3.4409561184445236e-05, "loss": 0.347, "step": 3858 }, { "epoch": 0.06882959369314737, "grad_norm": 0.4131893813610077, "learning_rate": 3.441848019978595e-05, "loss": 0.3589, "step": 3859 }, { "epoch": 0.06884742981486106, "grad_norm": 0.362048864364624, "learning_rate": 3.442739921512665e-05, "loss": 0.2738, "step": 3860 }, { "epoch": 0.06886526593657476, "grad_norm": 0.47151756286621094, "learning_rate": 3.4436318230467355e-05, "loss": 0.2536, "step": 3861 }, { "epoch": 0.06888310205828845, "grad_norm": 0.36796677112579346, "learning_rate": 3.444523724580807e-05, "loss": 0.265, "step": 3862 }, { "epoch": 0.06890093818000215, "grad_norm": 0.4151969254016876, "learning_rate": 3.4454156261148775e-05, "loss": 0.3413, "step": 3863 }, { "epoch": 0.06891877430171583, "grad_norm": 0.3809218406677246, "learning_rate": 3.4463075276489474e-05, "loss": 0.2893, "step": 3864 }, { "epoch": 0.06893661042342952, "grad_norm": 0.32801833748817444, "learning_rate": 3.447199429183019e-05, "loss": 0.2596, "step": 3865 }, { "epoch": 0.06895444654514322, "grad_norm": 0.4232890009880066, "learning_rate": 3.4480913307170894e-05, "loss": 0.3158, "step": 3866 }, { "epoch": 0.06897228266685691, "grad_norm": 0.4460662603378296, "learning_rate": 3.448983232251159e-05, "loss": 0.3222, "step": 3867 }, { "epoch": 0.06899011878857061, "grad_norm": 0.48599353432655334, "learning_rate": 3.44987513378523e-05, "loss": 0.3031, "step": 3868 }, { "epoch": 0.0690079549102843, "grad_norm": 0.3497062623500824, "learning_rate": 3.450767035319301e-05, "loss": 0.321, "step": 3869 }, { "epoch": 0.069025791031998, "grad_norm": 0.359834223985672, "learning_rate": 3.451658936853371e-05, "loss": 0.3348, "step": 3870 }, { "epoch": 0.06904362715371169, "grad_norm": 0.3681846559047699, "learning_rate": 3.452550838387442e-05, "loss": 0.2541, "step": 3871 }, { "epoch": 0.0690614632754254, "grad_norm": 0.49886760115623474, "learning_rate": 3.453442739921513e-05, "loss": 0.3258, "step": 3872 }, { "epoch": 0.06907929939713908, "grad_norm": 0.4724777042865753, "learning_rate": 3.454334641455583e-05, "loss": 0.3097, "step": 3873 }, { "epoch": 0.06909713551885278, "grad_norm": 0.31848880648612976, "learning_rate": 3.455226542989654e-05, "loss": 0.3064, "step": 3874 }, { "epoch": 0.06911497164056647, "grad_norm": 0.3555191159248352, "learning_rate": 3.456118444523725e-05, "loss": 0.2794, "step": 3875 }, { "epoch": 0.06913280776228017, "grad_norm": 0.31329837441444397, "learning_rate": 3.457010346057795e-05, "loss": 0.2147, "step": 3876 }, { "epoch": 0.06915064388399386, "grad_norm": 0.44533130526542664, "learning_rate": 3.457902247591866e-05, "loss": 0.2606, "step": 3877 }, { "epoch": 0.06916848000570756, "grad_norm": 0.48295438289642334, "learning_rate": 3.458794149125937e-05, "loss": 0.2537, "step": 3878 }, { "epoch": 0.06918631612742125, "grad_norm": 0.27674633264541626, "learning_rate": 3.459686050660007e-05, "loss": 0.2392, "step": 3879 }, { "epoch": 0.06920415224913495, "grad_norm": 0.5630616545677185, "learning_rate": 3.4605779521940776e-05, "loss": 0.2683, "step": 3880 }, { "epoch": 0.06922198837084864, "grad_norm": 0.42769375443458557, "learning_rate": 3.461469853728149e-05, "loss": 0.2992, "step": 3881 }, { "epoch": 0.06923982449256234, "grad_norm": 0.32193562388420105, "learning_rate": 3.462361755262219e-05, "loss": 0.2421, "step": 3882 }, { "epoch": 0.06925766061427603, "grad_norm": 0.32061731815338135, "learning_rate": 3.4632536567962895e-05, "loss": 0.2413, "step": 3883 }, { "epoch": 0.06927549673598972, "grad_norm": 0.5281691551208496, "learning_rate": 3.464145558330361e-05, "loss": 0.3424, "step": 3884 }, { "epoch": 0.06929333285770342, "grad_norm": 0.39620906114578247, "learning_rate": 3.4650374598644315e-05, "loss": 0.2695, "step": 3885 }, { "epoch": 0.06931116897941711, "grad_norm": 0.298551082611084, "learning_rate": 3.4659293613985015e-05, "loss": 0.2885, "step": 3886 }, { "epoch": 0.06932900510113081, "grad_norm": 0.3618530035018921, "learning_rate": 3.466821262932573e-05, "loss": 0.2825, "step": 3887 }, { "epoch": 0.0693468412228445, "grad_norm": 0.36034056544303894, "learning_rate": 3.4677131644666434e-05, "loss": 0.272, "step": 3888 }, { "epoch": 0.0693646773445582, "grad_norm": 0.4203408658504486, "learning_rate": 3.4686050660007134e-05, "loss": 0.3086, "step": 3889 }, { "epoch": 0.06938251346627189, "grad_norm": 0.5754683017730713, "learning_rate": 3.469496967534785e-05, "loss": 0.2561, "step": 3890 }, { "epoch": 0.06940034958798559, "grad_norm": 0.2742818593978882, "learning_rate": 3.470388869068855e-05, "loss": 0.2605, "step": 3891 }, { "epoch": 0.06941818570969928, "grad_norm": 0.39720579981803894, "learning_rate": 3.471280770602925e-05, "loss": 0.3189, "step": 3892 }, { "epoch": 0.06943602183141298, "grad_norm": 0.4149906039237976, "learning_rate": 3.4721726721369966e-05, "loss": 0.2566, "step": 3893 }, { "epoch": 0.06945385795312667, "grad_norm": 0.4420456290245056, "learning_rate": 3.473064573671067e-05, "loss": 0.2696, "step": 3894 }, { "epoch": 0.06947169407484037, "grad_norm": 0.4822978377342224, "learning_rate": 3.473956475205137e-05, "loss": 0.3003, "step": 3895 }, { "epoch": 0.06948953019655406, "grad_norm": 0.3878283202648163, "learning_rate": 3.474848376739208e-05, "loss": 0.3146, "step": 3896 }, { "epoch": 0.06950736631826776, "grad_norm": 0.45220157504081726, "learning_rate": 3.475740278273279e-05, "loss": 0.2782, "step": 3897 }, { "epoch": 0.06952520243998145, "grad_norm": 0.6714602708816528, "learning_rate": 3.476632179807349e-05, "loss": 0.2221, "step": 3898 }, { "epoch": 0.06954303856169515, "grad_norm": 0.4458344280719757, "learning_rate": 3.47752408134142e-05, "loss": 0.2841, "step": 3899 }, { "epoch": 0.06956087468340884, "grad_norm": 0.3231016993522644, "learning_rate": 3.478415982875491e-05, "loss": 0.2742, "step": 3900 }, { "epoch": 0.06957871080512254, "grad_norm": 0.35883525013923645, "learning_rate": 3.479307884409561e-05, "loss": 0.2197, "step": 3901 }, { "epoch": 0.06959654692683623, "grad_norm": 0.3651270866394043, "learning_rate": 3.480199785943632e-05, "loss": 0.2386, "step": 3902 }, { "epoch": 0.06961438304854993, "grad_norm": 0.5279820561408997, "learning_rate": 3.481091687477703e-05, "loss": 0.3437, "step": 3903 }, { "epoch": 0.06963221917026362, "grad_norm": 0.3838633596897125, "learning_rate": 3.481983589011773e-05, "loss": 0.2449, "step": 3904 }, { "epoch": 0.0696500552919773, "grad_norm": 0.457460880279541, "learning_rate": 3.4828754905458436e-05, "loss": 0.2595, "step": 3905 }, { "epoch": 0.06966789141369101, "grad_norm": 0.32760775089263916, "learning_rate": 3.483767392079915e-05, "loss": 0.2645, "step": 3906 }, { "epoch": 0.0696857275354047, "grad_norm": 0.3443721532821655, "learning_rate": 3.484659293613985e-05, "loss": 0.2681, "step": 3907 }, { "epoch": 0.0697035636571184, "grad_norm": 0.44602057337760925, "learning_rate": 3.4855511951480555e-05, "loss": 0.2649, "step": 3908 }, { "epoch": 0.06972139977883209, "grad_norm": 0.3007403016090393, "learning_rate": 3.486443096682127e-05, "loss": 0.238, "step": 3909 }, { "epoch": 0.06973923590054579, "grad_norm": 0.4458249807357788, "learning_rate": 3.4873349982161974e-05, "loss": 0.3543, "step": 3910 }, { "epoch": 0.06975707202225948, "grad_norm": 0.3666561543941498, "learning_rate": 3.4882268997502674e-05, "loss": 0.2512, "step": 3911 }, { "epoch": 0.06977490814397318, "grad_norm": 0.42663225531578064, "learning_rate": 3.489118801284339e-05, "loss": 0.2493, "step": 3912 }, { "epoch": 0.06979274426568687, "grad_norm": 0.3961727023124695, "learning_rate": 3.4900107028184094e-05, "loss": 0.2509, "step": 3913 }, { "epoch": 0.06981058038740057, "grad_norm": 0.33371153473854065, "learning_rate": 3.490902604352479e-05, "loss": 0.263, "step": 3914 }, { "epoch": 0.06982841650911426, "grad_norm": 0.26920273900032043, "learning_rate": 3.4917945058865506e-05, "loss": 0.236, "step": 3915 }, { "epoch": 0.06984625263082796, "grad_norm": 0.40448737144470215, "learning_rate": 3.492686407420621e-05, "loss": 0.2938, "step": 3916 }, { "epoch": 0.06986408875254164, "grad_norm": 0.30076855421066284, "learning_rate": 3.493578308954691e-05, "loss": 0.235, "step": 3917 }, { "epoch": 0.06988192487425535, "grad_norm": 0.359047532081604, "learning_rate": 3.4944702104887626e-05, "loss": 0.243, "step": 3918 }, { "epoch": 0.06989976099596903, "grad_norm": 0.4224449396133423, "learning_rate": 3.495362112022833e-05, "loss": 0.3106, "step": 3919 }, { "epoch": 0.06991759711768274, "grad_norm": 0.33418577909469604, "learning_rate": 3.496254013556903e-05, "loss": 0.2603, "step": 3920 }, { "epoch": 0.06993543323939642, "grad_norm": 0.3688289225101471, "learning_rate": 3.497145915090974e-05, "loss": 0.2496, "step": 3921 }, { "epoch": 0.06995326936111013, "grad_norm": 0.36470526456832886, "learning_rate": 3.498037816625045e-05, "loss": 0.2371, "step": 3922 }, { "epoch": 0.06997110548282381, "grad_norm": 0.4117518663406372, "learning_rate": 3.498929718159115e-05, "loss": 0.2466, "step": 3923 }, { "epoch": 0.06998894160453752, "grad_norm": 0.2733934223651886, "learning_rate": 3.499821619693186e-05, "loss": 0.2382, "step": 3924 }, { "epoch": 0.0700067777262512, "grad_norm": 0.2900852859020233, "learning_rate": 3.500713521227257e-05, "loss": 0.2283, "step": 3925 }, { "epoch": 0.07002461384796489, "grad_norm": 0.4061088562011719, "learning_rate": 3.501605422761327e-05, "loss": 0.2618, "step": 3926 }, { "epoch": 0.0700424499696786, "grad_norm": 0.41803494095802307, "learning_rate": 3.5024973242953976e-05, "loss": 0.2771, "step": 3927 }, { "epoch": 0.07006028609139228, "grad_norm": 0.40184682607650757, "learning_rate": 3.503389225829469e-05, "loss": 0.2863, "step": 3928 }, { "epoch": 0.07007812221310598, "grad_norm": 0.3220658302307129, "learning_rate": 3.504281127363539e-05, "loss": 0.2603, "step": 3929 }, { "epoch": 0.07009595833481967, "grad_norm": 0.32238003611564636, "learning_rate": 3.5051730288976095e-05, "loss": 0.2585, "step": 3930 }, { "epoch": 0.07011379445653337, "grad_norm": 0.3569925129413605, "learning_rate": 3.506064930431681e-05, "loss": 0.311, "step": 3931 }, { "epoch": 0.07013163057824706, "grad_norm": 0.34401437640190125, "learning_rate": 3.5069568319657515e-05, "loss": 0.2875, "step": 3932 }, { "epoch": 0.07014946669996076, "grad_norm": 0.4462296962738037, "learning_rate": 3.5078487334998214e-05, "loss": 0.295, "step": 3933 }, { "epoch": 0.07016730282167445, "grad_norm": 0.3038732409477234, "learning_rate": 3.508740635033893e-05, "loss": 0.2505, "step": 3934 }, { "epoch": 0.07018513894338815, "grad_norm": 0.3701113164424896, "learning_rate": 3.5096325365679634e-05, "loss": 0.328, "step": 3935 }, { "epoch": 0.07020297506510184, "grad_norm": 0.37391605973243713, "learning_rate": 3.5105244381020334e-05, "loss": 0.3039, "step": 3936 }, { "epoch": 0.07022081118681554, "grad_norm": 0.3115938603878021, "learning_rate": 3.511416339636105e-05, "loss": 0.2341, "step": 3937 }, { "epoch": 0.07023864730852923, "grad_norm": 0.39460307359695435, "learning_rate": 3.512308241170175e-05, "loss": 0.3388, "step": 3938 }, { "epoch": 0.07025648343024293, "grad_norm": 0.40903112292289734, "learning_rate": 3.513200142704245e-05, "loss": 0.2475, "step": 3939 }, { "epoch": 0.07027431955195662, "grad_norm": 0.3764410614967346, "learning_rate": 3.5140920442383166e-05, "loss": 0.2743, "step": 3940 }, { "epoch": 0.07029215567367032, "grad_norm": 0.4501705467700958, "learning_rate": 3.514983945772387e-05, "loss": 0.3301, "step": 3941 }, { "epoch": 0.07030999179538401, "grad_norm": 0.39972686767578125, "learning_rate": 3.515875847306457e-05, "loss": 0.3006, "step": 3942 }, { "epoch": 0.07032782791709771, "grad_norm": 0.4252086579799652, "learning_rate": 3.5167677488405285e-05, "loss": 0.2854, "step": 3943 }, { "epoch": 0.0703456640388114, "grad_norm": 0.23194964230060577, "learning_rate": 3.517659650374599e-05, "loss": 0.2212, "step": 3944 }, { "epoch": 0.07036350016052509, "grad_norm": 0.3495014011859894, "learning_rate": 3.518551551908669e-05, "loss": 0.2693, "step": 3945 }, { "epoch": 0.07038133628223879, "grad_norm": 0.409226655960083, "learning_rate": 3.51944345344274e-05, "loss": 0.2739, "step": 3946 }, { "epoch": 0.07039917240395248, "grad_norm": 0.3513595759868622, "learning_rate": 3.520335354976811e-05, "loss": 0.2998, "step": 3947 }, { "epoch": 0.07041700852566618, "grad_norm": 0.48407235741615295, "learning_rate": 3.521227256510881e-05, "loss": 0.2471, "step": 3948 }, { "epoch": 0.07043484464737987, "grad_norm": 0.29064351320266724, "learning_rate": 3.5221191580449517e-05, "loss": 0.2737, "step": 3949 }, { "epoch": 0.07045268076909357, "grad_norm": 0.3176039457321167, "learning_rate": 3.523011059579023e-05, "loss": 0.2277, "step": 3950 }, { "epoch": 0.07047051689080726, "grad_norm": 0.40843313932418823, "learning_rate": 3.523902961113093e-05, "loss": 0.2813, "step": 3951 }, { "epoch": 0.07048835301252096, "grad_norm": 0.4520396590232849, "learning_rate": 3.5247948626471636e-05, "loss": 0.2917, "step": 3952 }, { "epoch": 0.07050618913423465, "grad_norm": 0.3149385452270508, "learning_rate": 3.525686764181235e-05, "loss": 0.2436, "step": 3953 }, { "epoch": 0.07052402525594835, "grad_norm": 0.29427260160446167, "learning_rate": 3.526578665715305e-05, "loss": 0.2319, "step": 3954 }, { "epoch": 0.07054186137766204, "grad_norm": 0.3799297511577606, "learning_rate": 3.5274705672493755e-05, "loss": 0.2549, "step": 3955 }, { "epoch": 0.07055969749937574, "grad_norm": 0.38813266158103943, "learning_rate": 3.528362468783447e-05, "loss": 0.3213, "step": 3956 }, { "epoch": 0.07057753362108943, "grad_norm": 0.3146475553512573, "learning_rate": 3.5292543703175174e-05, "loss": 0.2468, "step": 3957 }, { "epoch": 0.07059536974280313, "grad_norm": 0.3026726543903351, "learning_rate": 3.5301462718515874e-05, "loss": 0.2763, "step": 3958 }, { "epoch": 0.07061320586451682, "grad_norm": 0.46944424510002136, "learning_rate": 3.531038173385659e-05, "loss": 0.2786, "step": 3959 }, { "epoch": 0.07063104198623052, "grad_norm": 0.4122660458087921, "learning_rate": 3.5319300749197293e-05, "loss": 0.2496, "step": 3960 }, { "epoch": 0.07064887810794421, "grad_norm": 0.39696258306503296, "learning_rate": 3.532821976453799e-05, "loss": 0.3159, "step": 3961 }, { "epoch": 0.07066671422965791, "grad_norm": 0.4710720181465149, "learning_rate": 3.5337138779878706e-05, "loss": 0.3014, "step": 3962 }, { "epoch": 0.0706845503513716, "grad_norm": 0.43712565302848816, "learning_rate": 3.534605779521941e-05, "loss": 0.2285, "step": 3963 }, { "epoch": 0.0707023864730853, "grad_norm": 0.4206271171569824, "learning_rate": 3.535497681056011e-05, "loss": 0.2532, "step": 3964 }, { "epoch": 0.07072022259479899, "grad_norm": 0.38638371229171753, "learning_rate": 3.5363895825900825e-05, "loss": 0.2827, "step": 3965 }, { "epoch": 0.07073805871651268, "grad_norm": 0.397139310836792, "learning_rate": 3.537281484124153e-05, "loss": 0.3291, "step": 3966 }, { "epoch": 0.07075589483822638, "grad_norm": 0.3956685960292816, "learning_rate": 3.538173385658223e-05, "loss": 0.2965, "step": 3967 }, { "epoch": 0.07077373095994006, "grad_norm": 0.45953574776649475, "learning_rate": 3.5390652871922945e-05, "loss": 0.3032, "step": 3968 }, { "epoch": 0.07079156708165377, "grad_norm": 0.39530250430107117, "learning_rate": 3.539957188726365e-05, "loss": 0.2633, "step": 3969 }, { "epoch": 0.07080940320336745, "grad_norm": 0.3210938572883606, "learning_rate": 3.540849090260435e-05, "loss": 0.2402, "step": 3970 }, { "epoch": 0.07082723932508116, "grad_norm": 0.4133577346801758, "learning_rate": 3.541740991794506e-05, "loss": 0.2767, "step": 3971 }, { "epoch": 0.07084507544679484, "grad_norm": 0.3882431983947754, "learning_rate": 3.542632893328577e-05, "loss": 0.2685, "step": 3972 }, { "epoch": 0.07086291156850855, "grad_norm": 0.3944385051727295, "learning_rate": 3.543524794862647e-05, "loss": 0.2436, "step": 3973 }, { "epoch": 0.07088074769022223, "grad_norm": 0.3825366795063019, "learning_rate": 3.5444166963967176e-05, "loss": 0.2329, "step": 3974 }, { "epoch": 0.07089858381193594, "grad_norm": 0.39589712023735046, "learning_rate": 3.545308597930789e-05, "loss": 0.2512, "step": 3975 }, { "epoch": 0.07091641993364962, "grad_norm": 0.6119086742401123, "learning_rate": 3.546200499464859e-05, "loss": 0.2515, "step": 3976 }, { "epoch": 0.07093425605536333, "grad_norm": 0.3706018328666687, "learning_rate": 3.5470924009989295e-05, "loss": 0.2414, "step": 3977 }, { "epoch": 0.07095209217707701, "grad_norm": 0.47150352597236633, "learning_rate": 3.547984302533001e-05, "loss": 0.3101, "step": 3978 }, { "epoch": 0.07096992829879072, "grad_norm": 0.36461305618286133, "learning_rate": 3.5488762040670715e-05, "loss": 0.3009, "step": 3979 }, { "epoch": 0.0709877644205044, "grad_norm": 0.28881222009658813, "learning_rate": 3.5497681056011414e-05, "loss": 0.2497, "step": 3980 }, { "epoch": 0.0710056005422181, "grad_norm": 0.29770681262016296, "learning_rate": 3.550660007135213e-05, "loss": 0.297, "step": 3981 }, { "epoch": 0.0710234366639318, "grad_norm": 0.44741079211235046, "learning_rate": 3.5515519086692834e-05, "loss": 0.2879, "step": 3982 }, { "epoch": 0.0710412727856455, "grad_norm": 0.4213384687900543, "learning_rate": 3.5524438102033533e-05, "loss": 0.2741, "step": 3983 }, { "epoch": 0.07105910890735918, "grad_norm": 0.4397996962070465, "learning_rate": 3.553335711737425e-05, "loss": 0.2937, "step": 3984 }, { "epoch": 0.07107694502907287, "grad_norm": 0.3693235218524933, "learning_rate": 3.554227613271495e-05, "loss": 0.239, "step": 3985 }, { "epoch": 0.07109478115078657, "grad_norm": 0.34017184376716614, "learning_rate": 3.555119514805565e-05, "loss": 0.2538, "step": 3986 }, { "epoch": 0.07111261727250026, "grad_norm": 0.5534206628799438, "learning_rate": 3.5560114163396366e-05, "loss": 0.2594, "step": 3987 }, { "epoch": 0.07113045339421396, "grad_norm": 0.3913263976573944, "learning_rate": 3.556903317873707e-05, "loss": 0.2784, "step": 3988 }, { "epoch": 0.07114828951592765, "grad_norm": 0.5018420219421387, "learning_rate": 3.557795219407777e-05, "loss": 0.2451, "step": 3989 }, { "epoch": 0.07116612563764135, "grad_norm": 0.3958134055137634, "learning_rate": 3.5586871209418485e-05, "loss": 0.2874, "step": 3990 }, { "epoch": 0.07118396175935504, "grad_norm": 0.33712950348854065, "learning_rate": 3.559579022475919e-05, "loss": 0.2508, "step": 3991 }, { "epoch": 0.07120179788106874, "grad_norm": 0.429338663816452, "learning_rate": 3.560470924009989e-05, "loss": 0.2761, "step": 3992 }, { "epoch": 0.07121963400278243, "grad_norm": 0.4817236065864563, "learning_rate": 3.5613628255440604e-05, "loss": 0.3382, "step": 3993 }, { "epoch": 0.07123747012449613, "grad_norm": 0.38059717416763306, "learning_rate": 3.562254727078131e-05, "loss": 0.2443, "step": 3994 }, { "epoch": 0.07125530624620982, "grad_norm": 0.38838183879852295, "learning_rate": 3.563146628612201e-05, "loss": 0.2502, "step": 3995 }, { "epoch": 0.07127314236792352, "grad_norm": 0.45766136050224304, "learning_rate": 3.564038530146272e-05, "loss": 0.295, "step": 3996 }, { "epoch": 0.07129097848963721, "grad_norm": 0.406605064868927, "learning_rate": 3.564930431680343e-05, "loss": 0.2862, "step": 3997 }, { "epoch": 0.07130881461135091, "grad_norm": 0.32387053966522217, "learning_rate": 3.565822333214413e-05, "loss": 0.2469, "step": 3998 }, { "epoch": 0.0713266507330646, "grad_norm": 0.4547450542449951, "learning_rate": 3.5667142347484836e-05, "loss": 0.3002, "step": 3999 }, { "epoch": 0.0713444868547783, "grad_norm": 0.33579307794570923, "learning_rate": 3.567606136282555e-05, "loss": 0.2714, "step": 4000 }, { "epoch": 0.0713444868547783, "eval_loss": 0.2601618766784668, "eval_runtime": 1615.4968, "eval_samples_per_second": 0.634, "eval_steps_per_second": 0.106, "step": 4000 }, { "epoch": 0.07136232297649199, "grad_norm": 0.37511202692985535, "learning_rate": 3.568498037816625e-05, "loss": 0.2985, "step": 4001 }, { "epoch": 0.07138015909820569, "grad_norm": 0.4718485176563263, "learning_rate": 3.5693899393506955e-05, "loss": 0.2835, "step": 4002 }, { "epoch": 0.07139799521991938, "grad_norm": 0.3611677885055542, "learning_rate": 3.570281840884767e-05, "loss": 0.2438, "step": 4003 }, { "epoch": 0.07141583134163308, "grad_norm": 0.47874966263771057, "learning_rate": 3.5711737424188374e-05, "loss": 0.2678, "step": 4004 }, { "epoch": 0.07143366746334677, "grad_norm": 0.4065699875354767, "learning_rate": 3.5720656439529074e-05, "loss": 0.2874, "step": 4005 }, { "epoch": 0.07145150358506046, "grad_norm": 0.35797980427742004, "learning_rate": 3.572957545486979e-05, "loss": 0.2596, "step": 4006 }, { "epoch": 0.07146933970677416, "grad_norm": 0.3942461311817169, "learning_rate": 3.573849447021049e-05, "loss": 0.2218, "step": 4007 }, { "epoch": 0.07148717582848785, "grad_norm": 0.48561549186706543, "learning_rate": 3.574741348555119e-05, "loss": 0.4059, "step": 4008 }, { "epoch": 0.07150501195020155, "grad_norm": 0.3484053611755371, "learning_rate": 3.5756332500891906e-05, "loss": 0.2734, "step": 4009 }, { "epoch": 0.07152284807191524, "grad_norm": 0.46138960123062134, "learning_rate": 3.576525151623261e-05, "loss": 0.2802, "step": 4010 }, { "epoch": 0.07154068419362894, "grad_norm": 0.3479671776294708, "learning_rate": 3.577417053157331e-05, "loss": 0.2435, "step": 4011 }, { "epoch": 0.07155852031534263, "grad_norm": 0.35693997144699097, "learning_rate": 3.5783089546914025e-05, "loss": 0.1869, "step": 4012 }, { "epoch": 0.07157635643705633, "grad_norm": 0.39361152052879333, "learning_rate": 3.579200856225473e-05, "loss": 0.3071, "step": 4013 }, { "epoch": 0.07159419255877002, "grad_norm": 0.4611194133758545, "learning_rate": 3.580092757759543e-05, "loss": 0.3552, "step": 4014 }, { "epoch": 0.07161202868048372, "grad_norm": 0.3547293543815613, "learning_rate": 3.5809846592936144e-05, "loss": 0.2846, "step": 4015 }, { "epoch": 0.07162986480219741, "grad_norm": 0.4279080331325531, "learning_rate": 3.581876560827685e-05, "loss": 0.3042, "step": 4016 }, { "epoch": 0.07164770092391111, "grad_norm": 0.3108879327774048, "learning_rate": 3.582768462361755e-05, "loss": 0.2675, "step": 4017 }, { "epoch": 0.0716655370456248, "grad_norm": 0.39170220494270325, "learning_rate": 3.5836603638958264e-05, "loss": 0.3341, "step": 4018 }, { "epoch": 0.0716833731673385, "grad_norm": 0.343044638633728, "learning_rate": 3.584552265429897e-05, "loss": 0.2676, "step": 4019 }, { "epoch": 0.07170120928905219, "grad_norm": 0.40363213419914246, "learning_rate": 3.585444166963967e-05, "loss": 0.3478, "step": 4020 }, { "epoch": 0.07171904541076589, "grad_norm": 0.33018651604652405, "learning_rate": 3.586336068498038e-05, "loss": 0.2652, "step": 4021 }, { "epoch": 0.07173688153247958, "grad_norm": 0.26141229271888733, "learning_rate": 3.587227970032109e-05, "loss": 0.2274, "step": 4022 }, { "epoch": 0.07175471765419328, "grad_norm": 0.37298089265823364, "learning_rate": 3.588119871566179e-05, "loss": 0.2807, "step": 4023 }, { "epoch": 0.07177255377590697, "grad_norm": 0.34697332978248596, "learning_rate": 3.5890117731002495e-05, "loss": 0.2605, "step": 4024 }, { "epoch": 0.07179038989762065, "grad_norm": 0.4437396824359894, "learning_rate": 3.589903674634321e-05, "loss": 0.3161, "step": 4025 }, { "epoch": 0.07180822601933436, "grad_norm": 0.36411213874816895, "learning_rate": 3.590795576168391e-05, "loss": 0.2671, "step": 4026 }, { "epoch": 0.07182606214104804, "grad_norm": 0.3403491973876953, "learning_rate": 3.5916874777024614e-05, "loss": 0.2379, "step": 4027 }, { "epoch": 0.07184389826276175, "grad_norm": 0.34452518820762634, "learning_rate": 3.592579379236533e-05, "loss": 0.2731, "step": 4028 }, { "epoch": 0.07186173438447543, "grad_norm": 0.3484019935131073, "learning_rate": 3.5934712807706034e-05, "loss": 0.3297, "step": 4029 }, { "epoch": 0.07187957050618914, "grad_norm": 0.28667059540748596, "learning_rate": 3.594363182304673e-05, "loss": 0.2342, "step": 4030 }, { "epoch": 0.07189740662790282, "grad_norm": 0.46884414553642273, "learning_rate": 3.5952550838387446e-05, "loss": 0.2701, "step": 4031 }, { "epoch": 0.07191524274961653, "grad_norm": 0.4335917532444, "learning_rate": 3.596146985372815e-05, "loss": 0.3208, "step": 4032 }, { "epoch": 0.07193307887133021, "grad_norm": 0.3155769109725952, "learning_rate": 3.597038886906885e-05, "loss": 0.2903, "step": 4033 }, { "epoch": 0.07195091499304392, "grad_norm": 0.34757399559020996, "learning_rate": 3.5979307884409566e-05, "loss": 0.2854, "step": 4034 }, { "epoch": 0.0719687511147576, "grad_norm": 0.42476123571395874, "learning_rate": 3.598822689975027e-05, "loss": 0.2812, "step": 4035 }, { "epoch": 0.0719865872364713, "grad_norm": 0.3788479268550873, "learning_rate": 3.599714591509097e-05, "loss": 0.2432, "step": 4036 }, { "epoch": 0.072004423358185, "grad_norm": 0.3588060736656189, "learning_rate": 3.6006064930431685e-05, "loss": 0.2805, "step": 4037 }, { "epoch": 0.0720222594798987, "grad_norm": 0.35240060091018677, "learning_rate": 3.601498394577239e-05, "loss": 0.2647, "step": 4038 }, { "epoch": 0.07204009560161238, "grad_norm": 0.3642655313014984, "learning_rate": 3.602390296111309e-05, "loss": 0.2678, "step": 4039 }, { "epoch": 0.07205793172332609, "grad_norm": 0.41785523295402527, "learning_rate": 3.6032821976453804e-05, "loss": 0.269, "step": 4040 }, { "epoch": 0.07207576784503977, "grad_norm": 0.48622530698776245, "learning_rate": 3.604174099179451e-05, "loss": 0.2998, "step": 4041 }, { "epoch": 0.07209360396675348, "grad_norm": 0.5432617664337158, "learning_rate": 3.605066000713521e-05, "loss": 0.2878, "step": 4042 }, { "epoch": 0.07211144008846716, "grad_norm": 0.40627986192703247, "learning_rate": 3.605957902247592e-05, "loss": 0.3137, "step": 4043 }, { "epoch": 0.07212927621018086, "grad_norm": 0.3522103428840637, "learning_rate": 3.606849803781663e-05, "loss": 0.3098, "step": 4044 }, { "epoch": 0.07214711233189455, "grad_norm": 0.2785491347312927, "learning_rate": 3.607741705315733e-05, "loss": 0.2428, "step": 4045 }, { "epoch": 0.07216494845360824, "grad_norm": 0.3922576606273651, "learning_rate": 3.608633606849804e-05, "loss": 0.2949, "step": 4046 }, { "epoch": 0.07218278457532194, "grad_norm": 0.41649967432022095, "learning_rate": 3.609525508383875e-05, "loss": 0.2817, "step": 4047 }, { "epoch": 0.07220062069703563, "grad_norm": 0.284446656703949, "learning_rate": 3.610417409917945e-05, "loss": 0.2594, "step": 4048 }, { "epoch": 0.07221845681874933, "grad_norm": 0.37044814229011536, "learning_rate": 3.6113093114520155e-05, "loss": 0.2537, "step": 4049 }, { "epoch": 0.07223629294046302, "grad_norm": 0.3564845621585846, "learning_rate": 3.612201212986087e-05, "loss": 0.2759, "step": 4050 }, { "epoch": 0.07225412906217672, "grad_norm": 0.44440025091171265, "learning_rate": 3.6130931145201574e-05, "loss": 0.3013, "step": 4051 }, { "epoch": 0.07227196518389041, "grad_norm": 0.39229390025138855, "learning_rate": 3.6139850160542274e-05, "loss": 0.3123, "step": 4052 }, { "epoch": 0.07228980130560411, "grad_norm": 0.3334742486476898, "learning_rate": 3.614876917588299e-05, "loss": 0.2487, "step": 4053 }, { "epoch": 0.0723076374273178, "grad_norm": 0.41078320145606995, "learning_rate": 3.615768819122369e-05, "loss": 0.2915, "step": 4054 }, { "epoch": 0.0723254735490315, "grad_norm": 0.402371883392334, "learning_rate": 3.616660720656439e-05, "loss": 0.2813, "step": 4055 }, { "epoch": 0.07234330967074519, "grad_norm": 0.3465537428855896, "learning_rate": 3.6175526221905106e-05, "loss": 0.2211, "step": 4056 }, { "epoch": 0.07236114579245889, "grad_norm": 0.4069521129131317, "learning_rate": 3.618444523724581e-05, "loss": 0.308, "step": 4057 }, { "epoch": 0.07237898191417258, "grad_norm": 0.3115183115005493, "learning_rate": 3.619336425258651e-05, "loss": 0.2539, "step": 4058 }, { "epoch": 0.07239681803588628, "grad_norm": 0.3484099805355072, "learning_rate": 3.6202283267927225e-05, "loss": 0.2711, "step": 4059 }, { "epoch": 0.07241465415759997, "grad_norm": 0.3692646026611328, "learning_rate": 3.621120228326793e-05, "loss": 0.2349, "step": 4060 }, { "epoch": 0.07243249027931367, "grad_norm": 0.29867902398109436, "learning_rate": 3.622012129860863e-05, "loss": 0.2434, "step": 4061 }, { "epoch": 0.07245032640102736, "grad_norm": 0.3988645672798157, "learning_rate": 3.6229040313949344e-05, "loss": 0.3059, "step": 4062 }, { "epoch": 0.07246816252274106, "grad_norm": 0.3653406500816345, "learning_rate": 3.623795932929005e-05, "loss": 0.2944, "step": 4063 }, { "epoch": 0.07248599864445475, "grad_norm": 0.4839371144771576, "learning_rate": 3.624687834463075e-05, "loss": 0.2584, "step": 4064 }, { "epoch": 0.07250383476616845, "grad_norm": 0.30030038952827454, "learning_rate": 3.6255797359971463e-05, "loss": 0.2203, "step": 4065 }, { "epoch": 0.07252167088788214, "grad_norm": 0.34645602107048035, "learning_rate": 3.626471637531217e-05, "loss": 0.2825, "step": 4066 }, { "epoch": 0.07253950700959583, "grad_norm": 0.37782979011535645, "learning_rate": 3.627363539065287e-05, "loss": 0.2746, "step": 4067 }, { "epoch": 0.07255734313130953, "grad_norm": 0.34229543805122375, "learning_rate": 3.628255440599358e-05, "loss": 0.2778, "step": 4068 }, { "epoch": 0.07257517925302322, "grad_norm": 0.3464083969593048, "learning_rate": 3.629147342133429e-05, "loss": 0.2561, "step": 4069 }, { "epoch": 0.07259301537473692, "grad_norm": 0.31461194157600403, "learning_rate": 3.630039243667499e-05, "loss": 0.2399, "step": 4070 }, { "epoch": 0.0726108514964506, "grad_norm": 0.25036537647247314, "learning_rate": 3.63093114520157e-05, "loss": 0.207, "step": 4071 }, { "epoch": 0.07262868761816431, "grad_norm": 0.314247727394104, "learning_rate": 3.631823046735641e-05, "loss": 0.2621, "step": 4072 }, { "epoch": 0.072646523739878, "grad_norm": 0.3913421332836151, "learning_rate": 3.632714948269711e-05, "loss": 0.2461, "step": 4073 }, { "epoch": 0.0726643598615917, "grad_norm": 0.3956703841686249, "learning_rate": 3.6336068498037814e-05, "loss": 0.3336, "step": 4074 }, { "epoch": 0.07268219598330539, "grad_norm": 0.3772980570793152, "learning_rate": 3.634498751337853e-05, "loss": 0.2774, "step": 4075 }, { "epoch": 0.07270003210501909, "grad_norm": 0.37001925706863403, "learning_rate": 3.6353906528719234e-05, "loss": 0.2694, "step": 4076 }, { "epoch": 0.07271786822673278, "grad_norm": 0.46089768409729004, "learning_rate": 3.636282554405993e-05, "loss": 0.3106, "step": 4077 }, { "epoch": 0.07273570434844648, "grad_norm": 0.3164888620376587, "learning_rate": 3.6371744559400646e-05, "loss": 0.2444, "step": 4078 }, { "epoch": 0.07275354047016017, "grad_norm": 0.3862965703010559, "learning_rate": 3.638066357474135e-05, "loss": 0.3327, "step": 4079 }, { "epoch": 0.07277137659187387, "grad_norm": 0.43099600076675415, "learning_rate": 3.638958259008205e-05, "loss": 0.2883, "step": 4080 }, { "epoch": 0.07278921271358756, "grad_norm": 0.3839138448238373, "learning_rate": 3.6398501605422766e-05, "loss": 0.3069, "step": 4081 }, { "epoch": 0.07280704883530126, "grad_norm": 0.4318084716796875, "learning_rate": 3.640742062076347e-05, "loss": 0.3343, "step": 4082 }, { "epoch": 0.07282488495701495, "grad_norm": 0.4460492432117462, "learning_rate": 3.641633963610417e-05, "loss": 0.2989, "step": 4083 }, { "epoch": 0.07284272107872865, "grad_norm": 0.3706546723842621, "learning_rate": 3.6425258651444885e-05, "loss": 0.28, "step": 4084 }, { "epoch": 0.07286055720044234, "grad_norm": 0.3881326913833618, "learning_rate": 3.643417766678559e-05, "loss": 0.2534, "step": 4085 }, { "epoch": 0.07287839332215602, "grad_norm": 0.34988898038864136, "learning_rate": 3.644309668212629e-05, "loss": 0.2833, "step": 4086 }, { "epoch": 0.07289622944386973, "grad_norm": 0.35669222474098206, "learning_rate": 3.6452015697467004e-05, "loss": 0.2807, "step": 4087 }, { "epoch": 0.07291406556558341, "grad_norm": 0.470527708530426, "learning_rate": 3.646093471280771e-05, "loss": 0.2369, "step": 4088 }, { "epoch": 0.07293190168729712, "grad_norm": 0.3309403359889984, "learning_rate": 3.646985372814841e-05, "loss": 0.2655, "step": 4089 }, { "epoch": 0.0729497378090108, "grad_norm": 0.29176491498947144, "learning_rate": 3.647877274348912e-05, "loss": 0.2524, "step": 4090 }, { "epoch": 0.0729675739307245, "grad_norm": 0.30771803855895996, "learning_rate": 3.648769175882983e-05, "loss": 0.2541, "step": 4091 }, { "epoch": 0.0729854100524382, "grad_norm": 0.37845999002456665, "learning_rate": 3.649661077417053e-05, "loss": 0.2424, "step": 4092 }, { "epoch": 0.0730032461741519, "grad_norm": 0.49875691533088684, "learning_rate": 3.650552978951124e-05, "loss": 0.2667, "step": 4093 }, { "epoch": 0.07302108229586558, "grad_norm": 0.6584007143974304, "learning_rate": 3.651444880485195e-05, "loss": 0.3108, "step": 4094 }, { "epoch": 0.07303891841757928, "grad_norm": 0.43037962913513184, "learning_rate": 3.652336782019265e-05, "loss": 0.2849, "step": 4095 }, { "epoch": 0.07305675453929297, "grad_norm": 0.554920494556427, "learning_rate": 3.653228683553336e-05, "loss": 0.2853, "step": 4096 }, { "epoch": 0.07307459066100667, "grad_norm": 0.3984546959400177, "learning_rate": 3.654120585087407e-05, "loss": 0.2665, "step": 4097 }, { "epoch": 0.07309242678272036, "grad_norm": 0.36884525418281555, "learning_rate": 3.6550124866214774e-05, "loss": 0.29, "step": 4098 }, { "epoch": 0.07311026290443406, "grad_norm": 0.4039285480976105, "learning_rate": 3.655904388155548e-05, "loss": 0.2768, "step": 4099 }, { "epoch": 0.07312809902614775, "grad_norm": 0.4425659775733948, "learning_rate": 3.656796289689619e-05, "loss": 0.3542, "step": 4100 }, { "epoch": 0.07314593514786145, "grad_norm": 0.2978968024253845, "learning_rate": 3.657688191223689e-05, "loss": 0.2677, "step": 4101 }, { "epoch": 0.07316377126957514, "grad_norm": 0.46927177906036377, "learning_rate": 3.658580092757759e-05, "loss": 0.3002, "step": 4102 }, { "epoch": 0.07318160739128884, "grad_norm": 0.42345908284187317, "learning_rate": 3.6594719942918306e-05, "loss": 0.3118, "step": 4103 }, { "epoch": 0.07319944351300253, "grad_norm": 0.37958866357803345, "learning_rate": 3.660363895825901e-05, "loss": 0.3022, "step": 4104 }, { "epoch": 0.07321727963471623, "grad_norm": 0.3434056043624878, "learning_rate": 3.661255797359971e-05, "loss": 0.2343, "step": 4105 }, { "epoch": 0.07323511575642992, "grad_norm": 0.3816780745983124, "learning_rate": 3.6621476988940425e-05, "loss": 0.3118, "step": 4106 }, { "epoch": 0.07325295187814361, "grad_norm": 0.37504827976226807, "learning_rate": 3.663039600428113e-05, "loss": 0.2506, "step": 4107 }, { "epoch": 0.07327078799985731, "grad_norm": 0.5456230044364929, "learning_rate": 3.663931501962183e-05, "loss": 0.3328, "step": 4108 }, { "epoch": 0.073288624121571, "grad_norm": 0.37133750319480896, "learning_rate": 3.6648234034962544e-05, "loss": 0.3241, "step": 4109 }, { "epoch": 0.0733064602432847, "grad_norm": 0.49705299735069275, "learning_rate": 3.665715305030325e-05, "loss": 0.3726, "step": 4110 }, { "epoch": 0.07332429636499839, "grad_norm": 0.32278305292129517, "learning_rate": 3.666607206564395e-05, "loss": 0.2904, "step": 4111 }, { "epoch": 0.07334213248671209, "grad_norm": 0.3209100663661957, "learning_rate": 3.667499108098466e-05, "loss": 0.2812, "step": 4112 }, { "epoch": 0.07335996860842578, "grad_norm": 0.4132683277130127, "learning_rate": 3.668391009632537e-05, "loss": 0.3324, "step": 4113 }, { "epoch": 0.07337780473013948, "grad_norm": 0.5577122569084167, "learning_rate": 3.669282911166607e-05, "loss": 0.3768, "step": 4114 }, { "epoch": 0.07339564085185317, "grad_norm": 0.403633177280426, "learning_rate": 3.670174812700678e-05, "loss": 0.2625, "step": 4115 }, { "epoch": 0.07341347697356687, "grad_norm": 0.3335971534252167, "learning_rate": 3.671066714234749e-05, "loss": 0.2696, "step": 4116 }, { "epoch": 0.07343131309528056, "grad_norm": 0.34153881669044495, "learning_rate": 3.671958615768819e-05, "loss": 0.2817, "step": 4117 }, { "epoch": 0.07344914921699426, "grad_norm": 0.49378255009651184, "learning_rate": 3.67285051730289e-05, "loss": 0.3012, "step": 4118 }, { "epoch": 0.07346698533870795, "grad_norm": 0.38548022508621216, "learning_rate": 3.673742418836961e-05, "loss": 0.2728, "step": 4119 }, { "epoch": 0.07348482146042165, "grad_norm": 0.26984280347824097, "learning_rate": 3.674634320371031e-05, "loss": 0.2286, "step": 4120 }, { "epoch": 0.07350265758213534, "grad_norm": 0.41002365946769714, "learning_rate": 3.675526221905102e-05, "loss": 0.2589, "step": 4121 }, { "epoch": 0.07352049370384904, "grad_norm": 0.45269328355789185, "learning_rate": 3.676418123439173e-05, "loss": 0.2685, "step": 4122 }, { "epoch": 0.07353832982556273, "grad_norm": 0.4685843586921692, "learning_rate": 3.6773100249732433e-05, "loss": 0.353, "step": 4123 }, { "epoch": 0.07355616594727643, "grad_norm": 0.32437804341316223, "learning_rate": 3.678201926507314e-05, "loss": 0.3025, "step": 4124 }, { "epoch": 0.07357400206899012, "grad_norm": 0.29815438389778137, "learning_rate": 3.6790938280413846e-05, "loss": 0.3155, "step": 4125 }, { "epoch": 0.0735918381907038, "grad_norm": 0.4532708525657654, "learning_rate": 3.679985729575455e-05, "loss": 0.3055, "step": 4126 }, { "epoch": 0.07360967431241751, "grad_norm": 0.3672865331172943, "learning_rate": 3.680877631109525e-05, "loss": 0.2607, "step": 4127 }, { "epoch": 0.0736275104341312, "grad_norm": 0.4694015085697174, "learning_rate": 3.6817695326435965e-05, "loss": 0.3161, "step": 4128 }, { "epoch": 0.0736453465558449, "grad_norm": 0.43574950098991394, "learning_rate": 3.682661434177667e-05, "loss": 0.3091, "step": 4129 }, { "epoch": 0.07366318267755859, "grad_norm": 0.3772227466106415, "learning_rate": 3.683553335711737e-05, "loss": 0.2929, "step": 4130 }, { "epoch": 0.07368101879927229, "grad_norm": 0.3468133509159088, "learning_rate": 3.6844452372458085e-05, "loss": 0.2648, "step": 4131 }, { "epoch": 0.07369885492098598, "grad_norm": 0.3430408835411072, "learning_rate": 3.685337138779879e-05, "loss": 0.2749, "step": 4132 }, { "epoch": 0.07371669104269968, "grad_norm": 0.3087609112262726, "learning_rate": 3.686229040313949e-05, "loss": 0.2363, "step": 4133 }, { "epoch": 0.07373452716441337, "grad_norm": 0.29770421981811523, "learning_rate": 3.6871209418480204e-05, "loss": 0.2397, "step": 4134 }, { "epoch": 0.07375236328612707, "grad_norm": 0.4533739984035492, "learning_rate": 3.688012843382091e-05, "loss": 0.3015, "step": 4135 }, { "epoch": 0.07377019940784076, "grad_norm": 0.34918293356895447, "learning_rate": 3.688904744916161e-05, "loss": 0.3049, "step": 4136 }, { "epoch": 0.07378803552955446, "grad_norm": 0.3501115143299103, "learning_rate": 3.689796646450232e-05, "loss": 0.2774, "step": 4137 }, { "epoch": 0.07380587165126815, "grad_norm": 0.31595152616500854, "learning_rate": 3.690688547984303e-05, "loss": 0.2736, "step": 4138 }, { "epoch": 0.07382370777298185, "grad_norm": 0.3417123854160309, "learning_rate": 3.691580449518373e-05, "loss": 0.2105, "step": 4139 }, { "epoch": 0.07384154389469554, "grad_norm": 0.348685622215271, "learning_rate": 3.692472351052444e-05, "loss": 0.2623, "step": 4140 }, { "epoch": 0.07385938001640924, "grad_norm": 0.3683890700340271, "learning_rate": 3.693364252586515e-05, "loss": 0.3127, "step": 4141 }, { "epoch": 0.07387721613812293, "grad_norm": 0.4189675450325012, "learning_rate": 3.694256154120585e-05, "loss": 0.3463, "step": 4142 }, { "epoch": 0.07389505225983663, "grad_norm": 0.28707680106163025, "learning_rate": 3.695148055654656e-05, "loss": 0.2451, "step": 4143 }, { "epoch": 0.07391288838155032, "grad_norm": 0.48650121688842773, "learning_rate": 3.696039957188727e-05, "loss": 0.2875, "step": 4144 }, { "epoch": 0.07393072450326402, "grad_norm": 0.3325197100639343, "learning_rate": 3.696931858722797e-05, "loss": 0.2779, "step": 4145 }, { "epoch": 0.0739485606249777, "grad_norm": 0.3326643109321594, "learning_rate": 3.697823760256868e-05, "loss": 0.2357, "step": 4146 }, { "epoch": 0.07396639674669139, "grad_norm": 0.4176287353038788, "learning_rate": 3.698715661790939e-05, "loss": 0.3024, "step": 4147 }, { "epoch": 0.0739842328684051, "grad_norm": 0.4428405165672302, "learning_rate": 3.699607563325009e-05, "loss": 0.2801, "step": 4148 }, { "epoch": 0.07400206899011878, "grad_norm": 0.33560845255851746, "learning_rate": 3.70049946485908e-05, "loss": 0.2431, "step": 4149 }, { "epoch": 0.07401990511183248, "grad_norm": 0.41161322593688965, "learning_rate": 3.7013913663931506e-05, "loss": 0.3095, "step": 4150 }, { "epoch": 0.07403774123354617, "grad_norm": 0.34065937995910645, "learning_rate": 3.702283267927221e-05, "loss": 0.3246, "step": 4151 }, { "epoch": 0.07405557735525987, "grad_norm": 0.35372689366340637, "learning_rate": 3.703175169461291e-05, "loss": 0.2366, "step": 4152 }, { "epoch": 0.07407341347697356, "grad_norm": 0.41967201232910156, "learning_rate": 3.7040670709953625e-05, "loss": 0.2655, "step": 4153 }, { "epoch": 0.07409124959868726, "grad_norm": 0.47905057668685913, "learning_rate": 3.704958972529433e-05, "loss": 0.2473, "step": 4154 }, { "epoch": 0.07410908572040095, "grad_norm": 0.49359965324401855, "learning_rate": 3.705850874063503e-05, "loss": 0.2915, "step": 4155 }, { "epoch": 0.07412692184211465, "grad_norm": 0.36214080452919006, "learning_rate": 3.7067427755975744e-05, "loss": 0.2135, "step": 4156 }, { "epoch": 0.07414475796382834, "grad_norm": 0.4641368091106415, "learning_rate": 3.707634677131645e-05, "loss": 0.3361, "step": 4157 }, { "epoch": 0.07416259408554204, "grad_norm": 0.4047207832336426, "learning_rate": 3.708526578665715e-05, "loss": 0.2564, "step": 4158 }, { "epoch": 0.07418043020725573, "grad_norm": 0.45328521728515625, "learning_rate": 3.709418480199786e-05, "loss": 0.3188, "step": 4159 }, { "epoch": 0.07419826632896943, "grad_norm": 0.43802186846733093, "learning_rate": 3.710310381733857e-05, "loss": 0.2796, "step": 4160 }, { "epoch": 0.07421610245068312, "grad_norm": 0.2616294026374817, "learning_rate": 3.711202283267927e-05, "loss": 0.2456, "step": 4161 }, { "epoch": 0.07423393857239682, "grad_norm": 0.39143669605255127, "learning_rate": 3.712094184801998e-05, "loss": 0.2534, "step": 4162 }, { "epoch": 0.07425177469411051, "grad_norm": 0.5078184604644775, "learning_rate": 3.712986086336069e-05, "loss": 0.3468, "step": 4163 }, { "epoch": 0.07426961081582421, "grad_norm": 0.5399411916732788, "learning_rate": 3.713877987870139e-05, "loss": 0.3326, "step": 4164 }, { "epoch": 0.0742874469375379, "grad_norm": 0.32032978534698486, "learning_rate": 3.71476988940421e-05, "loss": 0.2905, "step": 4165 }, { "epoch": 0.07430528305925159, "grad_norm": 0.3497765064239502, "learning_rate": 3.715661790938281e-05, "loss": 0.2818, "step": 4166 }, { "epoch": 0.07432311918096529, "grad_norm": 0.44904711842536926, "learning_rate": 3.716553692472351e-05, "loss": 0.3238, "step": 4167 }, { "epoch": 0.07434095530267898, "grad_norm": 0.36625099182128906, "learning_rate": 3.717445594006422e-05, "loss": 0.3127, "step": 4168 }, { "epoch": 0.07435879142439268, "grad_norm": 0.365116685628891, "learning_rate": 3.718337495540493e-05, "loss": 0.2337, "step": 4169 }, { "epoch": 0.07437662754610637, "grad_norm": 1.0939042568206787, "learning_rate": 3.719229397074563e-05, "loss": 0.2261, "step": 4170 }, { "epoch": 0.07439446366782007, "grad_norm": 0.36195090413093567, "learning_rate": 3.720121298608634e-05, "loss": 0.2614, "step": 4171 }, { "epoch": 0.07441229978953376, "grad_norm": 0.49965110421180725, "learning_rate": 3.7210132001427046e-05, "loss": 0.2952, "step": 4172 }, { "epoch": 0.07443013591124746, "grad_norm": 0.6151539087295532, "learning_rate": 3.721905101676775e-05, "loss": 0.2978, "step": 4173 }, { "epoch": 0.07444797203296115, "grad_norm": 0.33340463042259216, "learning_rate": 3.722797003210846e-05, "loss": 0.2262, "step": 4174 }, { "epoch": 0.07446580815467485, "grad_norm": 0.4527425765991211, "learning_rate": 3.7236889047449165e-05, "loss": 0.2469, "step": 4175 }, { "epoch": 0.07448364427638854, "grad_norm": 0.5253557562828064, "learning_rate": 3.724580806278987e-05, "loss": 0.3149, "step": 4176 }, { "epoch": 0.07450148039810224, "grad_norm": 0.41695138812065125, "learning_rate": 3.725472707813057e-05, "loss": 0.2749, "step": 4177 }, { "epoch": 0.07451931651981593, "grad_norm": 0.40068671107292175, "learning_rate": 3.7263646093471284e-05, "loss": 0.2867, "step": 4178 }, { "epoch": 0.07453715264152963, "grad_norm": 0.31247401237487793, "learning_rate": 3.727256510881199e-05, "loss": 0.2669, "step": 4179 }, { "epoch": 0.07455498876324332, "grad_norm": 0.28745976090431213, "learning_rate": 3.728148412415269e-05, "loss": 0.2492, "step": 4180 }, { "epoch": 0.07457282488495702, "grad_norm": 0.3374185264110565, "learning_rate": 3.7290403139493404e-05, "loss": 0.2182, "step": 4181 }, { "epoch": 0.07459066100667071, "grad_norm": 0.35271865129470825, "learning_rate": 3.729932215483411e-05, "loss": 0.258, "step": 4182 }, { "epoch": 0.07460849712838441, "grad_norm": 0.3584800362586975, "learning_rate": 3.730824117017481e-05, "loss": 0.2396, "step": 4183 }, { "epoch": 0.0746263332500981, "grad_norm": 0.40084826946258545, "learning_rate": 3.731716018551552e-05, "loss": 0.2503, "step": 4184 }, { "epoch": 0.0746441693718118, "grad_norm": 0.42655277252197266, "learning_rate": 3.732607920085623e-05, "loss": 0.2585, "step": 4185 }, { "epoch": 0.07466200549352549, "grad_norm": 0.5718599557876587, "learning_rate": 3.733499821619693e-05, "loss": 0.3211, "step": 4186 }, { "epoch": 0.07467984161523918, "grad_norm": 0.3136085867881775, "learning_rate": 3.734391723153764e-05, "loss": 0.2252, "step": 4187 }, { "epoch": 0.07469767773695288, "grad_norm": 0.4330437481403351, "learning_rate": 3.735283624687835e-05, "loss": 0.2141, "step": 4188 }, { "epoch": 0.07471551385866657, "grad_norm": 0.3692575991153717, "learning_rate": 3.736175526221905e-05, "loss": 0.3017, "step": 4189 }, { "epoch": 0.07473334998038027, "grad_norm": 0.39885830879211426, "learning_rate": 3.737067427755976e-05, "loss": 0.3543, "step": 4190 }, { "epoch": 0.07475118610209396, "grad_norm": 0.5391544103622437, "learning_rate": 3.737959329290047e-05, "loss": 0.2122, "step": 4191 }, { "epoch": 0.07476902222380766, "grad_norm": 0.33087414503097534, "learning_rate": 3.738851230824117e-05, "loss": 0.2777, "step": 4192 }, { "epoch": 0.07478685834552135, "grad_norm": 0.32284751534461975, "learning_rate": 3.739743132358188e-05, "loss": 0.2488, "step": 4193 }, { "epoch": 0.07480469446723505, "grad_norm": 0.38482213020324707, "learning_rate": 3.7406350338922586e-05, "loss": 0.3177, "step": 4194 }, { "epoch": 0.07482253058894874, "grad_norm": 0.3215670883655548, "learning_rate": 3.741526935426329e-05, "loss": 0.2671, "step": 4195 }, { "epoch": 0.07484036671066244, "grad_norm": 0.36106786131858826, "learning_rate": 3.7424188369604e-05, "loss": 0.2864, "step": 4196 }, { "epoch": 0.07485820283237613, "grad_norm": 0.4248841106891632, "learning_rate": 3.7433107384944706e-05, "loss": 0.2748, "step": 4197 }, { "epoch": 0.07487603895408983, "grad_norm": 0.3289128839969635, "learning_rate": 3.744202640028541e-05, "loss": 0.2389, "step": 4198 }, { "epoch": 0.07489387507580351, "grad_norm": 0.4033012092113495, "learning_rate": 3.745094541562612e-05, "loss": 0.3701, "step": 4199 }, { "epoch": 0.07491171119751722, "grad_norm": 0.3788880407810211, "learning_rate": 3.7459864430966825e-05, "loss": 0.3216, "step": 4200 }, { "epoch": 0.0749295473192309, "grad_norm": 0.35157310962677, "learning_rate": 3.746878344630753e-05, "loss": 0.3199, "step": 4201 }, { "epoch": 0.0749473834409446, "grad_norm": 0.330655574798584, "learning_rate": 3.747770246164824e-05, "loss": 0.2625, "step": 4202 }, { "epoch": 0.0749652195626583, "grad_norm": 0.3260590732097626, "learning_rate": 3.7486621476988944e-05, "loss": 0.2614, "step": 4203 }, { "epoch": 0.074983055684372, "grad_norm": 0.36597350239753723, "learning_rate": 3.749554049232965e-05, "loss": 0.2439, "step": 4204 }, { "epoch": 0.07500089180608568, "grad_norm": 0.35310667753219604, "learning_rate": 3.750445950767035e-05, "loss": 0.2776, "step": 4205 }, { "epoch": 0.07501872792779937, "grad_norm": 0.3559993803501129, "learning_rate": 3.751337852301106e-05, "loss": 0.2699, "step": 4206 }, { "epoch": 0.07503656404951307, "grad_norm": 0.32624587416648865, "learning_rate": 3.752229753835177e-05, "loss": 0.2645, "step": 4207 }, { "epoch": 0.07505440017122676, "grad_norm": 0.5166625380516052, "learning_rate": 3.753121655369247e-05, "loss": 0.3367, "step": 4208 }, { "epoch": 0.07507223629294046, "grad_norm": 0.2428450733423233, "learning_rate": 3.754013556903318e-05, "loss": 0.2431, "step": 4209 }, { "epoch": 0.07509007241465415, "grad_norm": 0.4116324484348297, "learning_rate": 3.754905458437389e-05, "loss": 0.2807, "step": 4210 }, { "epoch": 0.07510790853636785, "grad_norm": 0.27581650018692017, "learning_rate": 3.755797359971459e-05, "loss": 0.2234, "step": 4211 }, { "epoch": 0.07512574465808154, "grad_norm": 0.3597171902656555, "learning_rate": 3.75668926150553e-05, "loss": 0.2942, "step": 4212 }, { "epoch": 0.07514358077979524, "grad_norm": 0.4038372039794922, "learning_rate": 3.757581163039601e-05, "loss": 0.2864, "step": 4213 }, { "epoch": 0.07516141690150893, "grad_norm": 0.4361227750778198, "learning_rate": 3.758473064573671e-05, "loss": 0.2777, "step": 4214 }, { "epoch": 0.07517925302322263, "grad_norm": 0.5329325795173645, "learning_rate": 3.759364966107742e-05, "loss": 0.2835, "step": 4215 }, { "epoch": 0.07519708914493632, "grad_norm": 0.3828251361846924, "learning_rate": 3.760256867641813e-05, "loss": 0.2802, "step": 4216 }, { "epoch": 0.07521492526665002, "grad_norm": 0.35325562953948975, "learning_rate": 3.761148769175883e-05, "loss": 0.2741, "step": 4217 }, { "epoch": 0.07523276138836371, "grad_norm": 0.37237346172332764, "learning_rate": 3.762040670709954e-05, "loss": 0.2834, "step": 4218 }, { "epoch": 0.07525059751007741, "grad_norm": 0.44692501425743103, "learning_rate": 3.7629325722440246e-05, "loss": 0.2587, "step": 4219 }, { "epoch": 0.0752684336317911, "grad_norm": 0.3682231903076172, "learning_rate": 3.763824473778095e-05, "loss": 0.2642, "step": 4220 }, { "epoch": 0.0752862697535048, "grad_norm": 0.4995605945587158, "learning_rate": 3.764716375312166e-05, "loss": 0.2394, "step": 4221 }, { "epoch": 0.07530410587521849, "grad_norm": 0.33465439081192017, "learning_rate": 3.7656082768462365e-05, "loss": 0.2605, "step": 4222 }, { "epoch": 0.07532194199693219, "grad_norm": 0.39678364992141724, "learning_rate": 3.766500178380307e-05, "loss": 0.355, "step": 4223 }, { "epoch": 0.07533977811864588, "grad_norm": 0.3365713655948639, "learning_rate": 3.767392079914378e-05, "loss": 0.2951, "step": 4224 }, { "epoch": 0.07535761424035958, "grad_norm": 0.35178980231285095, "learning_rate": 3.7682839814484484e-05, "loss": 0.2638, "step": 4225 }, { "epoch": 0.07537545036207327, "grad_norm": 0.47750401496887207, "learning_rate": 3.769175882982519e-05, "loss": 0.3176, "step": 4226 }, { "epoch": 0.07539328648378696, "grad_norm": 0.3698870539665222, "learning_rate": 3.77006778451659e-05, "loss": 0.2659, "step": 4227 }, { "epoch": 0.07541112260550066, "grad_norm": 0.25361302495002747, "learning_rate": 3.7709596860506603e-05, "loss": 0.2323, "step": 4228 }, { "epoch": 0.07542895872721435, "grad_norm": 0.36756184697151184, "learning_rate": 3.771851587584731e-05, "loss": 0.2129, "step": 4229 }, { "epoch": 0.07544679484892805, "grad_norm": 0.516951322555542, "learning_rate": 3.772743489118801e-05, "loss": 0.2984, "step": 4230 }, { "epoch": 0.07546463097064174, "grad_norm": 0.31373366713523865, "learning_rate": 3.773635390652872e-05, "loss": 0.2698, "step": 4231 }, { "epoch": 0.07548246709235544, "grad_norm": 0.2851543724536896, "learning_rate": 3.774527292186943e-05, "loss": 0.2624, "step": 4232 }, { "epoch": 0.07550030321406913, "grad_norm": 0.33759239315986633, "learning_rate": 3.775419193721013e-05, "loss": 0.3008, "step": 4233 }, { "epoch": 0.07551813933578283, "grad_norm": 0.38497021794319153, "learning_rate": 3.776311095255084e-05, "loss": 0.2882, "step": 4234 }, { "epoch": 0.07553597545749652, "grad_norm": 0.4482494294643402, "learning_rate": 3.777202996789155e-05, "loss": 0.2576, "step": 4235 }, { "epoch": 0.07555381157921022, "grad_norm": 0.31993567943573, "learning_rate": 3.778094898323225e-05, "loss": 0.2589, "step": 4236 }, { "epoch": 0.07557164770092391, "grad_norm": 0.3316757380962372, "learning_rate": 3.778986799857296e-05, "loss": 0.3218, "step": 4237 }, { "epoch": 0.07558948382263761, "grad_norm": 0.3332213759422302, "learning_rate": 3.779878701391367e-05, "loss": 0.2829, "step": 4238 }, { "epoch": 0.0756073199443513, "grad_norm": 0.27364617586135864, "learning_rate": 3.780770602925437e-05, "loss": 0.2453, "step": 4239 }, { "epoch": 0.075625156066065, "grad_norm": 0.5406433939933777, "learning_rate": 3.781662504459508e-05, "loss": 0.2576, "step": 4240 }, { "epoch": 0.07564299218777869, "grad_norm": 0.31070637702941895, "learning_rate": 3.7825544059935786e-05, "loss": 0.2643, "step": 4241 }, { "epoch": 0.07566082830949239, "grad_norm": 0.43498286604881287, "learning_rate": 3.783446307527649e-05, "loss": 0.339, "step": 4242 }, { "epoch": 0.07567866443120608, "grad_norm": 0.43137484788894653, "learning_rate": 3.78433820906172e-05, "loss": 0.2889, "step": 4243 }, { "epoch": 0.07569650055291978, "grad_norm": 0.3390316367149353, "learning_rate": 3.7852301105957906e-05, "loss": 0.2694, "step": 4244 }, { "epoch": 0.07571433667463347, "grad_norm": 0.4314836263656616, "learning_rate": 3.786122012129861e-05, "loss": 0.3117, "step": 4245 }, { "epoch": 0.07573217279634717, "grad_norm": 0.5436944365501404, "learning_rate": 3.787013913663932e-05, "loss": 0.3048, "step": 4246 }, { "epoch": 0.07575000891806086, "grad_norm": 0.4359453022480011, "learning_rate": 3.7879058151980025e-05, "loss": 0.2824, "step": 4247 }, { "epoch": 0.07576784503977455, "grad_norm": 0.4316657781600952, "learning_rate": 3.788797716732073e-05, "loss": 0.3018, "step": 4248 }, { "epoch": 0.07578568116148825, "grad_norm": 0.593874990940094, "learning_rate": 3.789689618266144e-05, "loss": 0.2782, "step": 4249 }, { "epoch": 0.07580351728320193, "grad_norm": 0.36799126863479614, "learning_rate": 3.7905815198002144e-05, "loss": 0.2435, "step": 4250 }, { "epoch": 0.07582135340491564, "grad_norm": 0.4341272711753845, "learning_rate": 3.791473421334285e-05, "loss": 0.2978, "step": 4251 }, { "epoch": 0.07583918952662932, "grad_norm": 0.33046677708625793, "learning_rate": 3.7923653228683557e-05, "loss": 0.2872, "step": 4252 }, { "epoch": 0.07585702564834303, "grad_norm": 0.3701927661895752, "learning_rate": 3.793257224402426e-05, "loss": 0.2577, "step": 4253 }, { "epoch": 0.07587486177005671, "grad_norm": 0.38218578696250916, "learning_rate": 3.794149125936497e-05, "loss": 0.2744, "step": 4254 }, { "epoch": 0.07589269789177042, "grad_norm": 0.513279378414154, "learning_rate": 3.795041027470567e-05, "loss": 0.2901, "step": 4255 }, { "epoch": 0.0759105340134841, "grad_norm": 0.3848417103290558, "learning_rate": 3.795932929004638e-05, "loss": 0.3132, "step": 4256 }, { "epoch": 0.0759283701351978, "grad_norm": 0.39559414982795715, "learning_rate": 3.796824830538709e-05, "loss": 0.3409, "step": 4257 }, { "epoch": 0.0759462062569115, "grad_norm": 0.3061651587486267, "learning_rate": 3.797716732072779e-05, "loss": 0.2477, "step": 4258 }, { "epoch": 0.0759640423786252, "grad_norm": 0.3205283284187317, "learning_rate": 3.79860863360685e-05, "loss": 0.2137, "step": 4259 }, { "epoch": 0.07598187850033888, "grad_norm": 0.3794453740119934, "learning_rate": 3.799500535140921e-05, "loss": 0.2938, "step": 4260 }, { "epoch": 0.07599971462205259, "grad_norm": 0.3018369972705841, "learning_rate": 3.800392436674991e-05, "loss": 0.2972, "step": 4261 }, { "epoch": 0.07601755074376627, "grad_norm": 0.28759652376174927, "learning_rate": 3.801284338209062e-05, "loss": 0.2394, "step": 4262 }, { "epoch": 0.07603538686547998, "grad_norm": 0.404039204120636, "learning_rate": 3.802176239743133e-05, "loss": 0.2396, "step": 4263 }, { "epoch": 0.07605322298719366, "grad_norm": 0.3380386531352997, "learning_rate": 3.803068141277203e-05, "loss": 0.3133, "step": 4264 }, { "epoch": 0.07607105910890737, "grad_norm": 0.3418702185153961, "learning_rate": 3.803960042811274e-05, "loss": 0.2456, "step": 4265 }, { "epoch": 0.07608889523062105, "grad_norm": 0.3194468021392822, "learning_rate": 3.8048519443453446e-05, "loss": 0.245, "step": 4266 }, { "epoch": 0.07610673135233474, "grad_norm": 0.32654035091400146, "learning_rate": 3.805743845879415e-05, "loss": 0.2416, "step": 4267 }, { "epoch": 0.07612456747404844, "grad_norm": 0.3586922585964203, "learning_rate": 3.806635747413486e-05, "loss": 0.2773, "step": 4268 }, { "epoch": 0.07614240359576213, "grad_norm": 0.4663313031196594, "learning_rate": 3.8075276489475565e-05, "loss": 0.3336, "step": 4269 }, { "epoch": 0.07616023971747583, "grad_norm": 0.3342205286026001, "learning_rate": 3.808419550481627e-05, "loss": 0.2854, "step": 4270 }, { "epoch": 0.07617807583918952, "grad_norm": 0.3806861937046051, "learning_rate": 3.809311452015698e-05, "loss": 0.3085, "step": 4271 }, { "epoch": 0.07619591196090322, "grad_norm": 0.29361093044281006, "learning_rate": 3.8102033535497684e-05, "loss": 0.2345, "step": 4272 }, { "epoch": 0.07621374808261691, "grad_norm": 0.34427595138549805, "learning_rate": 3.811095255083839e-05, "loss": 0.2064, "step": 4273 }, { "epoch": 0.07623158420433061, "grad_norm": 0.3486906588077545, "learning_rate": 3.81198715661791e-05, "loss": 0.2433, "step": 4274 }, { "epoch": 0.0762494203260443, "grad_norm": 0.390524685382843, "learning_rate": 3.81287905815198e-05, "loss": 0.239, "step": 4275 }, { "epoch": 0.076267256447758, "grad_norm": 0.4219571352005005, "learning_rate": 3.813770959686051e-05, "loss": 0.2721, "step": 4276 }, { "epoch": 0.07628509256947169, "grad_norm": 0.39834532141685486, "learning_rate": 3.8146628612201216e-05, "loss": 0.2484, "step": 4277 }, { "epoch": 0.07630292869118539, "grad_norm": 0.4055795967578888, "learning_rate": 3.815554762754192e-05, "loss": 0.3014, "step": 4278 }, { "epoch": 0.07632076481289908, "grad_norm": 0.4223778247833252, "learning_rate": 3.816446664288263e-05, "loss": 0.3145, "step": 4279 }, { "epoch": 0.07633860093461278, "grad_norm": 0.3109219968318939, "learning_rate": 3.817338565822333e-05, "loss": 0.2424, "step": 4280 }, { "epoch": 0.07635643705632647, "grad_norm": 0.31068381667137146, "learning_rate": 3.818230467356404e-05, "loss": 0.2978, "step": 4281 }, { "epoch": 0.07637427317804017, "grad_norm": 0.3217722773551941, "learning_rate": 3.819122368890475e-05, "loss": 0.25, "step": 4282 }, { "epoch": 0.07639210929975386, "grad_norm": 0.304267942905426, "learning_rate": 3.820014270424545e-05, "loss": 0.2374, "step": 4283 }, { "epoch": 0.07640994542146756, "grad_norm": 0.2824154794216156, "learning_rate": 3.820906171958616e-05, "loss": 0.2216, "step": 4284 }, { "epoch": 0.07642778154318125, "grad_norm": 0.5808196067810059, "learning_rate": 3.821798073492687e-05, "loss": 0.3523, "step": 4285 }, { "epoch": 0.07644561766489495, "grad_norm": 0.46458321809768677, "learning_rate": 3.822689975026757e-05, "loss": 0.3002, "step": 4286 }, { "epoch": 0.07646345378660864, "grad_norm": 0.35721060633659363, "learning_rate": 3.823581876560828e-05, "loss": 0.2472, "step": 4287 }, { "epoch": 0.07648128990832233, "grad_norm": 0.30313968658447266, "learning_rate": 3.8244737780948986e-05, "loss": 0.2663, "step": 4288 }, { "epoch": 0.07649912603003603, "grad_norm": 0.30494025349617004, "learning_rate": 3.825365679628969e-05, "loss": 0.2412, "step": 4289 }, { "epoch": 0.07651696215174972, "grad_norm": 0.46083828806877136, "learning_rate": 3.82625758116304e-05, "loss": 0.2804, "step": 4290 }, { "epoch": 0.07653479827346342, "grad_norm": 0.4277346134185791, "learning_rate": 3.8271494826971105e-05, "loss": 0.283, "step": 4291 }, { "epoch": 0.07655263439517711, "grad_norm": 0.3467538058757782, "learning_rate": 3.828041384231181e-05, "loss": 0.3038, "step": 4292 }, { "epoch": 0.07657047051689081, "grad_norm": 0.3192565143108368, "learning_rate": 3.828933285765252e-05, "loss": 0.2661, "step": 4293 }, { "epoch": 0.0765883066386045, "grad_norm": 0.25358909368515015, "learning_rate": 3.8298251872993225e-05, "loss": 0.2309, "step": 4294 }, { "epoch": 0.0766061427603182, "grad_norm": 0.8111319541931152, "learning_rate": 3.830717088833393e-05, "loss": 0.317, "step": 4295 }, { "epoch": 0.07662397888203189, "grad_norm": 0.21290086209774017, "learning_rate": 3.831608990367464e-05, "loss": 0.2004, "step": 4296 }, { "epoch": 0.07664181500374559, "grad_norm": 0.3975752592086792, "learning_rate": 3.8325008919015344e-05, "loss": 0.2432, "step": 4297 }, { "epoch": 0.07665965112545928, "grad_norm": 0.3503962457180023, "learning_rate": 3.833392793435605e-05, "loss": 0.2263, "step": 4298 }, { "epoch": 0.07667748724717298, "grad_norm": 0.25473836064338684, "learning_rate": 3.8342846949696756e-05, "loss": 0.232, "step": 4299 }, { "epoch": 0.07669532336888667, "grad_norm": 0.423564076423645, "learning_rate": 3.835176596503746e-05, "loss": 0.2917, "step": 4300 }, { "epoch": 0.07671315949060037, "grad_norm": 0.39943602681159973, "learning_rate": 3.836068498037817e-05, "loss": 0.317, "step": 4301 }, { "epoch": 0.07673099561231406, "grad_norm": 0.3379661738872528, "learning_rate": 3.8369603995718876e-05, "loss": 0.2949, "step": 4302 }, { "epoch": 0.07674883173402776, "grad_norm": 0.36557328701019287, "learning_rate": 3.837852301105958e-05, "loss": 0.2942, "step": 4303 }, { "epoch": 0.07676666785574145, "grad_norm": 0.2946944832801819, "learning_rate": 3.838744202640029e-05, "loss": 0.2648, "step": 4304 }, { "epoch": 0.07678450397745515, "grad_norm": 0.3372962474822998, "learning_rate": 3.8396361041740995e-05, "loss": 0.2751, "step": 4305 }, { "epoch": 0.07680234009916884, "grad_norm": 0.31931331753730774, "learning_rate": 3.84052800570817e-05, "loss": 0.2634, "step": 4306 }, { "epoch": 0.07682017622088252, "grad_norm": 0.28606119751930237, "learning_rate": 3.841419907242241e-05, "loss": 0.2607, "step": 4307 }, { "epoch": 0.07683801234259623, "grad_norm": 0.2836773097515106, "learning_rate": 3.842311808776311e-05, "loss": 0.2223, "step": 4308 }, { "epoch": 0.07685584846430991, "grad_norm": 0.35678985714912415, "learning_rate": 3.843203710310382e-05, "loss": 0.3045, "step": 4309 }, { "epoch": 0.07687368458602362, "grad_norm": 0.3318808078765869, "learning_rate": 3.844095611844453e-05, "loss": 0.2658, "step": 4310 }, { "epoch": 0.0768915207077373, "grad_norm": 0.30833348631858826, "learning_rate": 3.8449875133785226e-05, "loss": 0.2436, "step": 4311 }, { "epoch": 0.076909356829451, "grad_norm": 0.21143841743469238, "learning_rate": 3.845879414912594e-05, "loss": 0.2027, "step": 4312 }, { "epoch": 0.0769271929511647, "grad_norm": 0.3417409360408783, "learning_rate": 3.8467713164466646e-05, "loss": 0.2491, "step": 4313 }, { "epoch": 0.0769450290728784, "grad_norm": 0.3178887665271759, "learning_rate": 3.847663217980735e-05, "loss": 0.2471, "step": 4314 }, { "epoch": 0.07696286519459208, "grad_norm": 0.4646736681461334, "learning_rate": 3.848555119514806e-05, "loss": 0.3514, "step": 4315 }, { "epoch": 0.07698070131630579, "grad_norm": 0.414309561252594, "learning_rate": 3.8494470210488765e-05, "loss": 0.3137, "step": 4316 }, { "epoch": 0.07699853743801947, "grad_norm": 0.41656121611595154, "learning_rate": 3.850338922582947e-05, "loss": 0.3102, "step": 4317 }, { "epoch": 0.07701637355973318, "grad_norm": 0.3836608827114105, "learning_rate": 3.851230824117018e-05, "loss": 0.2531, "step": 4318 }, { "epoch": 0.07703420968144686, "grad_norm": 0.34572160243988037, "learning_rate": 3.8521227256510884e-05, "loss": 0.2616, "step": 4319 }, { "epoch": 0.07705204580316057, "grad_norm": 0.2973364591598511, "learning_rate": 3.853014627185159e-05, "loss": 0.2206, "step": 4320 }, { "epoch": 0.07706988192487425, "grad_norm": 0.2860994338989258, "learning_rate": 3.85390652871923e-05, "loss": 0.2764, "step": 4321 }, { "epoch": 0.07708771804658796, "grad_norm": 0.3224015533924103, "learning_rate": 3.8547984302533e-05, "loss": 0.2014, "step": 4322 }, { "epoch": 0.07710555416830164, "grad_norm": 0.35749709606170654, "learning_rate": 3.855690331787371e-05, "loss": 0.2518, "step": 4323 }, { "epoch": 0.07712339029001535, "grad_norm": 0.36966726183891296, "learning_rate": 3.8565822333214416e-05, "loss": 0.25, "step": 4324 }, { "epoch": 0.07714122641172903, "grad_norm": 0.39020729064941406, "learning_rate": 3.857474134855512e-05, "loss": 0.2743, "step": 4325 }, { "epoch": 0.07715906253344273, "grad_norm": 0.41506093740463257, "learning_rate": 3.858366036389583e-05, "loss": 0.243, "step": 4326 }, { "epoch": 0.07717689865515642, "grad_norm": 0.34525230526924133, "learning_rate": 3.8592579379236535e-05, "loss": 0.2623, "step": 4327 }, { "epoch": 0.07719473477687011, "grad_norm": 0.39055317640304565, "learning_rate": 3.860149839457724e-05, "loss": 0.2801, "step": 4328 }, { "epoch": 0.07721257089858381, "grad_norm": 0.2509728968143463, "learning_rate": 3.861041740991795e-05, "loss": 0.2534, "step": 4329 }, { "epoch": 0.0772304070202975, "grad_norm": 0.337094783782959, "learning_rate": 3.8619336425258654e-05, "loss": 0.2573, "step": 4330 }, { "epoch": 0.0772482431420112, "grad_norm": 0.3051152229309082, "learning_rate": 3.862825544059936e-05, "loss": 0.2413, "step": 4331 }, { "epoch": 0.07726607926372489, "grad_norm": 0.38071292638778687, "learning_rate": 3.863717445594007e-05, "loss": 0.2962, "step": 4332 }, { "epoch": 0.07728391538543859, "grad_norm": 0.3132109045982361, "learning_rate": 3.8646093471280767e-05, "loss": 0.2417, "step": 4333 }, { "epoch": 0.07730175150715228, "grad_norm": 0.3177194595336914, "learning_rate": 3.865501248662148e-05, "loss": 0.2613, "step": 4334 }, { "epoch": 0.07731958762886598, "grad_norm": 0.3333134353160858, "learning_rate": 3.8663931501962186e-05, "loss": 0.2534, "step": 4335 }, { "epoch": 0.07733742375057967, "grad_norm": 0.4596266448497772, "learning_rate": 3.867285051730289e-05, "loss": 0.3223, "step": 4336 }, { "epoch": 0.07735525987229337, "grad_norm": 0.3712513744831085, "learning_rate": 3.86817695326436e-05, "loss": 0.2586, "step": 4337 }, { "epoch": 0.07737309599400706, "grad_norm": 0.42716965079307556, "learning_rate": 3.8690688547984305e-05, "loss": 0.2528, "step": 4338 }, { "epoch": 0.07739093211572076, "grad_norm": 0.3493940234184265, "learning_rate": 3.869960756332501e-05, "loss": 0.2737, "step": 4339 }, { "epoch": 0.07740876823743445, "grad_norm": 0.33019155263900757, "learning_rate": 3.870852657866572e-05, "loss": 0.2258, "step": 4340 }, { "epoch": 0.07742660435914815, "grad_norm": 0.37247031927108765, "learning_rate": 3.8717445594006424e-05, "loss": 0.3025, "step": 4341 }, { "epoch": 0.07744444048086184, "grad_norm": 0.4213121831417084, "learning_rate": 3.872636460934713e-05, "loss": 0.2612, "step": 4342 }, { "epoch": 0.07746227660257554, "grad_norm": 0.3395713269710541, "learning_rate": 3.873528362468784e-05, "loss": 0.2536, "step": 4343 }, { "epoch": 0.07748011272428923, "grad_norm": 0.3237455487251282, "learning_rate": 3.8744202640028544e-05, "loss": 0.2746, "step": 4344 }, { "epoch": 0.07749794884600293, "grad_norm": 0.427746057510376, "learning_rate": 3.875312165536925e-05, "loss": 0.2843, "step": 4345 }, { "epoch": 0.07751578496771662, "grad_norm": 0.2901923656463623, "learning_rate": 3.8762040670709956e-05, "loss": 0.2611, "step": 4346 }, { "epoch": 0.07753362108943031, "grad_norm": 0.2814500331878662, "learning_rate": 3.877095968605066e-05, "loss": 0.218, "step": 4347 }, { "epoch": 0.07755145721114401, "grad_norm": 0.35352224111557007, "learning_rate": 3.877987870139137e-05, "loss": 0.236, "step": 4348 }, { "epoch": 0.0775692933328577, "grad_norm": 0.4134027063846588, "learning_rate": 3.8788797716732075e-05, "loss": 0.2982, "step": 4349 }, { "epoch": 0.0775871294545714, "grad_norm": 0.21507737040519714, "learning_rate": 3.879771673207278e-05, "loss": 0.2349, "step": 4350 }, { "epoch": 0.07760496557628509, "grad_norm": 0.37958672642707825, "learning_rate": 3.880663574741349e-05, "loss": 0.2793, "step": 4351 }, { "epoch": 0.07762280169799879, "grad_norm": 0.39292657375335693, "learning_rate": 3.8815554762754195e-05, "loss": 0.3436, "step": 4352 }, { "epoch": 0.07764063781971248, "grad_norm": 0.28580406308174133, "learning_rate": 3.88244737780949e-05, "loss": 0.259, "step": 4353 }, { "epoch": 0.07765847394142618, "grad_norm": 0.3572327494621277, "learning_rate": 3.883339279343561e-05, "loss": 0.2466, "step": 4354 }, { "epoch": 0.07767631006313987, "grad_norm": 0.2864575684070587, "learning_rate": 3.8842311808776314e-05, "loss": 0.2505, "step": 4355 }, { "epoch": 0.07769414618485357, "grad_norm": 0.3204314410686493, "learning_rate": 3.885123082411702e-05, "loss": 0.2431, "step": 4356 }, { "epoch": 0.07771198230656726, "grad_norm": 0.4472067952156067, "learning_rate": 3.8860149839457727e-05, "loss": 0.3314, "step": 4357 }, { "epoch": 0.07772981842828096, "grad_norm": 0.5395408868789673, "learning_rate": 3.8869068854798426e-05, "loss": 0.3384, "step": 4358 }, { "epoch": 0.07774765454999465, "grad_norm": 0.3061109185218811, "learning_rate": 3.887798787013914e-05, "loss": 0.3083, "step": 4359 }, { "epoch": 0.07776549067170835, "grad_norm": 0.3717777132987976, "learning_rate": 3.8886906885479846e-05, "loss": 0.2306, "step": 4360 }, { "epoch": 0.07778332679342204, "grad_norm": 0.3284724950790405, "learning_rate": 3.889582590082055e-05, "loss": 0.2799, "step": 4361 }, { "epoch": 0.07780116291513574, "grad_norm": 0.299607515335083, "learning_rate": 3.890474491616126e-05, "loss": 0.2749, "step": 4362 }, { "epoch": 0.07781899903684943, "grad_norm": 0.2945071756839752, "learning_rate": 3.8913663931501965e-05, "loss": 0.2773, "step": 4363 }, { "epoch": 0.07783683515856313, "grad_norm": 0.32768872380256653, "learning_rate": 3.892258294684267e-05, "loss": 0.3034, "step": 4364 }, { "epoch": 0.07785467128027682, "grad_norm": 0.3400993347167969, "learning_rate": 3.893150196218338e-05, "loss": 0.2791, "step": 4365 }, { "epoch": 0.07787250740199052, "grad_norm": 0.302428662776947, "learning_rate": 3.8940420977524084e-05, "loss": 0.2617, "step": 4366 }, { "epoch": 0.0778903435237042, "grad_norm": 0.3427959978580475, "learning_rate": 3.894933999286479e-05, "loss": 0.2976, "step": 4367 }, { "epoch": 0.0779081796454179, "grad_norm": 0.36683788895606995, "learning_rate": 3.89582590082055e-05, "loss": 0.2897, "step": 4368 }, { "epoch": 0.0779260157671316, "grad_norm": 0.44130566716194153, "learning_rate": 3.89671780235462e-05, "loss": 0.3146, "step": 4369 }, { "epoch": 0.07794385188884528, "grad_norm": 0.3785611093044281, "learning_rate": 3.897609703888691e-05, "loss": 0.2641, "step": 4370 }, { "epoch": 0.07796168801055899, "grad_norm": 0.47966429591178894, "learning_rate": 3.8985016054227616e-05, "loss": 0.2876, "step": 4371 }, { "epoch": 0.07797952413227267, "grad_norm": 0.49851420521736145, "learning_rate": 3.899393506956832e-05, "loss": 0.3524, "step": 4372 }, { "epoch": 0.07799736025398638, "grad_norm": 0.34156444668769836, "learning_rate": 3.900285408490903e-05, "loss": 0.2443, "step": 4373 }, { "epoch": 0.07801519637570006, "grad_norm": 0.35304003953933716, "learning_rate": 3.9011773100249735e-05, "loss": 0.2609, "step": 4374 }, { "epoch": 0.07803303249741377, "grad_norm": 0.5084397196769714, "learning_rate": 3.902069211559044e-05, "loss": 0.2587, "step": 4375 }, { "epoch": 0.07805086861912745, "grad_norm": 0.4033340513706207, "learning_rate": 3.902961113093115e-05, "loss": 0.3194, "step": 4376 }, { "epoch": 0.07806870474084115, "grad_norm": 0.31719550490379333, "learning_rate": 3.9038530146271854e-05, "loss": 0.2694, "step": 4377 }, { "epoch": 0.07808654086255484, "grad_norm": 0.38203608989715576, "learning_rate": 3.904744916161256e-05, "loss": 0.2555, "step": 4378 }, { "epoch": 0.07810437698426854, "grad_norm": 0.34577763080596924, "learning_rate": 3.905636817695327e-05, "loss": 0.2822, "step": 4379 }, { "epoch": 0.07812221310598223, "grad_norm": 0.40282416343688965, "learning_rate": 3.906528719229397e-05, "loss": 0.2629, "step": 4380 }, { "epoch": 0.07814004922769593, "grad_norm": 0.33227238059043884, "learning_rate": 3.907420620763468e-05, "loss": 0.2605, "step": 4381 }, { "epoch": 0.07815788534940962, "grad_norm": 0.32662463188171387, "learning_rate": 3.9083125222975386e-05, "loss": 0.27, "step": 4382 }, { "epoch": 0.07817572147112332, "grad_norm": 0.40394240617752075, "learning_rate": 3.909204423831609e-05, "loss": 0.2442, "step": 4383 }, { "epoch": 0.07819355759283701, "grad_norm": 0.3501577377319336, "learning_rate": 3.91009632536568e-05, "loss": 0.2719, "step": 4384 }, { "epoch": 0.07821139371455071, "grad_norm": 0.3697444796562195, "learning_rate": 3.9109882268997505e-05, "loss": 0.2741, "step": 4385 }, { "epoch": 0.0782292298362644, "grad_norm": 0.47111180424690247, "learning_rate": 3.911880128433821e-05, "loss": 0.2686, "step": 4386 }, { "epoch": 0.0782470659579781, "grad_norm": 0.4611196517944336, "learning_rate": 3.912772029967892e-05, "loss": 0.2824, "step": 4387 }, { "epoch": 0.07826490207969179, "grad_norm": 0.39992693066596985, "learning_rate": 3.9136639315019624e-05, "loss": 0.233, "step": 4388 }, { "epoch": 0.07828273820140548, "grad_norm": 0.32952433824539185, "learning_rate": 3.914555833036033e-05, "loss": 0.2121, "step": 4389 }, { "epoch": 0.07830057432311918, "grad_norm": 0.4698813557624817, "learning_rate": 3.915447734570104e-05, "loss": 0.2841, "step": 4390 }, { "epoch": 0.07831841044483287, "grad_norm": 0.38414064049720764, "learning_rate": 3.9163396361041743e-05, "loss": 0.2978, "step": 4391 }, { "epoch": 0.07833624656654657, "grad_norm": 0.3118569850921631, "learning_rate": 3.917231537638245e-05, "loss": 0.2657, "step": 4392 }, { "epoch": 0.07835408268826026, "grad_norm": 0.3157293200492859, "learning_rate": 3.9181234391723156e-05, "loss": 0.257, "step": 4393 }, { "epoch": 0.07837191880997396, "grad_norm": 0.4550689160823822, "learning_rate": 3.919015340706386e-05, "loss": 0.2503, "step": 4394 }, { "epoch": 0.07838975493168765, "grad_norm": 0.4010894000530243, "learning_rate": 3.919907242240457e-05, "loss": 0.2791, "step": 4395 }, { "epoch": 0.07840759105340135, "grad_norm": 0.2876130938529968, "learning_rate": 3.9207991437745275e-05, "loss": 0.2589, "step": 4396 }, { "epoch": 0.07842542717511504, "grad_norm": 0.4091978371143341, "learning_rate": 3.921691045308598e-05, "loss": 0.2293, "step": 4397 }, { "epoch": 0.07844326329682874, "grad_norm": 0.3687492311000824, "learning_rate": 3.922582946842669e-05, "loss": 0.3011, "step": 4398 }, { "epoch": 0.07846109941854243, "grad_norm": 0.524689257144928, "learning_rate": 3.9234748483767394e-05, "loss": 0.2327, "step": 4399 }, { "epoch": 0.07847893554025613, "grad_norm": 0.3993457555770874, "learning_rate": 3.92436674991081e-05, "loss": 0.2281, "step": 4400 }, { "epoch": 0.07849677166196982, "grad_norm": 0.40008744597435, "learning_rate": 3.925258651444881e-05, "loss": 0.2462, "step": 4401 }, { "epoch": 0.07851460778368352, "grad_norm": 0.36328330636024475, "learning_rate": 3.9261505529789514e-05, "loss": 0.291, "step": 4402 }, { "epoch": 0.07853244390539721, "grad_norm": 0.9162819981575012, "learning_rate": 3.927042454513022e-05, "loss": 0.2926, "step": 4403 }, { "epoch": 0.07855028002711091, "grad_norm": 0.45906102657318115, "learning_rate": 3.9279343560470926e-05, "loss": 0.3202, "step": 4404 }, { "epoch": 0.0785681161488246, "grad_norm": 0.30613192915916443, "learning_rate": 3.928826257581163e-05, "loss": 0.2819, "step": 4405 }, { "epoch": 0.0785859522705383, "grad_norm": 0.40524908900260925, "learning_rate": 3.929718159115234e-05, "loss": 0.289, "step": 4406 }, { "epoch": 0.07860378839225199, "grad_norm": 0.2773537337779999, "learning_rate": 3.9306100606493046e-05, "loss": 0.2311, "step": 4407 }, { "epoch": 0.07862162451396568, "grad_norm": 0.3180605173110962, "learning_rate": 3.931501962183375e-05, "loss": 0.2668, "step": 4408 }, { "epoch": 0.07863946063567938, "grad_norm": 0.3289400339126587, "learning_rate": 3.932393863717446e-05, "loss": 0.3044, "step": 4409 }, { "epoch": 0.07865729675739307, "grad_norm": 0.33620643615722656, "learning_rate": 3.9332857652515165e-05, "loss": 0.2644, "step": 4410 }, { "epoch": 0.07867513287910677, "grad_norm": 0.5794734954833984, "learning_rate": 3.934177666785587e-05, "loss": 0.2851, "step": 4411 }, { "epoch": 0.07869296900082046, "grad_norm": 0.33227694034576416, "learning_rate": 3.935069568319658e-05, "loss": 0.2819, "step": 4412 }, { "epoch": 0.07871080512253416, "grad_norm": 0.33045604825019836, "learning_rate": 3.9359614698537284e-05, "loss": 0.235, "step": 4413 }, { "epoch": 0.07872864124424785, "grad_norm": 0.4035234749317169, "learning_rate": 3.936853371387799e-05, "loss": 0.2513, "step": 4414 }, { "epoch": 0.07874647736596155, "grad_norm": 0.3834822177886963, "learning_rate": 3.9377452729218697e-05, "loss": 0.2543, "step": 4415 }, { "epoch": 0.07876431348767524, "grad_norm": 0.3251235783100128, "learning_rate": 3.93863717445594e-05, "loss": 0.2536, "step": 4416 }, { "epoch": 0.07878214960938894, "grad_norm": 0.2962441146373749, "learning_rate": 3.939529075990011e-05, "loss": 0.2541, "step": 4417 }, { "epoch": 0.07879998573110263, "grad_norm": 0.29714933037757874, "learning_rate": 3.9404209775240816e-05, "loss": 0.2252, "step": 4418 }, { "epoch": 0.07881782185281633, "grad_norm": 0.28641360998153687, "learning_rate": 3.941312879058152e-05, "loss": 0.2547, "step": 4419 }, { "epoch": 0.07883565797453002, "grad_norm": 0.2969341576099396, "learning_rate": 3.942204780592223e-05, "loss": 0.2608, "step": 4420 }, { "epoch": 0.07885349409624372, "grad_norm": 0.3222426772117615, "learning_rate": 3.9430966821262935e-05, "loss": 0.2769, "step": 4421 }, { "epoch": 0.0788713302179574, "grad_norm": 0.3375169634819031, "learning_rate": 3.943988583660364e-05, "loss": 0.2995, "step": 4422 }, { "epoch": 0.07888916633967111, "grad_norm": 0.2864764630794525, "learning_rate": 3.944880485194435e-05, "loss": 0.2167, "step": 4423 }, { "epoch": 0.0789070024613848, "grad_norm": 0.3258938193321228, "learning_rate": 3.9457723867285054e-05, "loss": 0.2607, "step": 4424 }, { "epoch": 0.0789248385830985, "grad_norm": 0.3061988353729248, "learning_rate": 3.946664288262576e-05, "loss": 0.2533, "step": 4425 }, { "epoch": 0.07894267470481219, "grad_norm": 0.4185546636581421, "learning_rate": 3.947556189796647e-05, "loss": 0.262, "step": 4426 }, { "epoch": 0.07896051082652589, "grad_norm": 0.38305729627609253, "learning_rate": 3.948448091330717e-05, "loss": 0.271, "step": 4427 }, { "epoch": 0.07897834694823958, "grad_norm": 0.7182749509811401, "learning_rate": 3.949339992864788e-05, "loss": 0.3611, "step": 4428 }, { "epoch": 0.07899618306995326, "grad_norm": 0.2859683036804199, "learning_rate": 3.9502318943988586e-05, "loss": 0.2356, "step": 4429 }, { "epoch": 0.07901401919166696, "grad_norm": 0.33025452494621277, "learning_rate": 3.951123795932929e-05, "loss": 0.2419, "step": 4430 }, { "epoch": 0.07903185531338065, "grad_norm": 0.5470520853996277, "learning_rate": 3.952015697467e-05, "loss": 0.3139, "step": 4431 }, { "epoch": 0.07904969143509435, "grad_norm": 0.2969801127910614, "learning_rate": 3.9529075990010705e-05, "loss": 0.2381, "step": 4432 }, { "epoch": 0.07906752755680804, "grad_norm": 0.327876478433609, "learning_rate": 3.953799500535141e-05, "loss": 0.2929, "step": 4433 }, { "epoch": 0.07908536367852174, "grad_norm": 0.37913572788238525, "learning_rate": 3.954691402069212e-05, "loss": 0.3041, "step": 4434 }, { "epoch": 0.07910319980023543, "grad_norm": 0.43590933084487915, "learning_rate": 3.9555833036032824e-05, "loss": 0.2954, "step": 4435 }, { "epoch": 0.07912103592194913, "grad_norm": 0.3637465238571167, "learning_rate": 3.956475205137353e-05, "loss": 0.244, "step": 4436 }, { "epoch": 0.07913887204366282, "grad_norm": 0.3469159007072449, "learning_rate": 3.957367106671424e-05, "loss": 0.2725, "step": 4437 }, { "epoch": 0.07915670816537652, "grad_norm": 0.2922632694244385, "learning_rate": 3.958259008205494e-05, "loss": 0.2612, "step": 4438 }, { "epoch": 0.07917454428709021, "grad_norm": 0.3450004756450653, "learning_rate": 3.959150909739565e-05, "loss": 0.2596, "step": 4439 }, { "epoch": 0.07919238040880391, "grad_norm": 0.29621705412864685, "learning_rate": 3.9600428112736356e-05, "loss": 0.2281, "step": 4440 }, { "epoch": 0.0792102165305176, "grad_norm": 0.2661037743091583, "learning_rate": 3.960934712807706e-05, "loss": 0.2408, "step": 4441 }, { "epoch": 0.0792280526522313, "grad_norm": 0.2868179976940155, "learning_rate": 3.961826614341777e-05, "loss": 0.2661, "step": 4442 }, { "epoch": 0.07924588877394499, "grad_norm": 0.34746330976486206, "learning_rate": 3.9627185158758475e-05, "loss": 0.2566, "step": 4443 }, { "epoch": 0.0792637248956587, "grad_norm": 0.2857220470905304, "learning_rate": 3.963610417409918e-05, "loss": 0.2432, "step": 4444 }, { "epoch": 0.07928156101737238, "grad_norm": 0.35333573818206787, "learning_rate": 3.964502318943989e-05, "loss": 0.2684, "step": 4445 }, { "epoch": 0.07929939713908608, "grad_norm": 0.28018221259117126, "learning_rate": 3.9653942204780594e-05, "loss": 0.2479, "step": 4446 }, { "epoch": 0.07931723326079977, "grad_norm": 0.30884823203086853, "learning_rate": 3.96628612201213e-05, "loss": 0.2589, "step": 4447 }, { "epoch": 0.07933506938251346, "grad_norm": 0.41764792799949646, "learning_rate": 3.967178023546201e-05, "loss": 0.2632, "step": 4448 }, { "epoch": 0.07935290550422716, "grad_norm": 0.30291947722435, "learning_rate": 3.9680699250802713e-05, "loss": 0.2336, "step": 4449 }, { "epoch": 0.07937074162594085, "grad_norm": 0.45131614804267883, "learning_rate": 3.968961826614342e-05, "loss": 0.2791, "step": 4450 }, { "epoch": 0.07938857774765455, "grad_norm": 0.951149582862854, "learning_rate": 3.9698537281484126e-05, "loss": 0.2663, "step": 4451 }, { "epoch": 0.07940641386936824, "grad_norm": 0.324270099401474, "learning_rate": 3.970745629682483e-05, "loss": 0.222, "step": 4452 }, { "epoch": 0.07942424999108194, "grad_norm": 0.3507010340690613, "learning_rate": 3.971637531216554e-05, "loss": 0.2753, "step": 4453 }, { "epoch": 0.07944208611279563, "grad_norm": 0.45777058601379395, "learning_rate": 3.9725294327506245e-05, "loss": 0.3207, "step": 4454 }, { "epoch": 0.07945992223450933, "grad_norm": 0.4051269590854645, "learning_rate": 3.973421334284695e-05, "loss": 0.274, "step": 4455 }, { "epoch": 0.07947775835622302, "grad_norm": 0.28481754660606384, "learning_rate": 3.974313235818766e-05, "loss": 0.2515, "step": 4456 }, { "epoch": 0.07949559447793672, "grad_norm": 0.5084226727485657, "learning_rate": 3.9752051373528365e-05, "loss": 0.3343, "step": 4457 }, { "epoch": 0.07951343059965041, "grad_norm": 0.44690635800361633, "learning_rate": 3.976097038886907e-05, "loss": 0.3349, "step": 4458 }, { "epoch": 0.07953126672136411, "grad_norm": 0.3401438593864441, "learning_rate": 3.976988940420978e-05, "loss": 0.2988, "step": 4459 }, { "epoch": 0.0795491028430778, "grad_norm": 0.34159207344055176, "learning_rate": 3.9778808419550484e-05, "loss": 0.2574, "step": 4460 }, { "epoch": 0.0795669389647915, "grad_norm": 0.5162934064865112, "learning_rate": 3.978772743489119e-05, "loss": 0.2968, "step": 4461 }, { "epoch": 0.07958477508650519, "grad_norm": 0.3637939393520355, "learning_rate": 3.9796646450231896e-05, "loss": 0.3083, "step": 4462 }, { "epoch": 0.07960261120821889, "grad_norm": 0.516302764415741, "learning_rate": 3.98055654655726e-05, "loss": 0.3011, "step": 4463 }, { "epoch": 0.07962044732993258, "grad_norm": 0.6440324783325195, "learning_rate": 3.981448448091331e-05, "loss": 0.3123, "step": 4464 }, { "epoch": 0.07963828345164628, "grad_norm": 0.8769925236701965, "learning_rate": 3.9823403496254016e-05, "loss": 0.2623, "step": 4465 }, { "epoch": 0.07965611957335997, "grad_norm": 0.40881508588790894, "learning_rate": 3.983232251159472e-05, "loss": 0.2222, "step": 4466 }, { "epoch": 0.07967395569507367, "grad_norm": 0.36568158864974976, "learning_rate": 3.984124152693543e-05, "loss": 0.2677, "step": 4467 }, { "epoch": 0.07969179181678736, "grad_norm": 0.3032169044017792, "learning_rate": 3.9850160542276135e-05, "loss": 0.2382, "step": 4468 }, { "epoch": 0.07970962793850105, "grad_norm": 0.3037692606449127, "learning_rate": 3.985907955761684e-05, "loss": 0.2552, "step": 4469 }, { "epoch": 0.07972746406021475, "grad_norm": 0.3624633848667145, "learning_rate": 3.986799857295755e-05, "loss": 0.2787, "step": 4470 }, { "epoch": 0.07974530018192844, "grad_norm": 0.31159213185310364, "learning_rate": 3.9876917588298254e-05, "loss": 0.263, "step": 4471 }, { "epoch": 0.07976313630364214, "grad_norm": 0.28747597336769104, "learning_rate": 3.988583660363896e-05, "loss": 0.2823, "step": 4472 }, { "epoch": 0.07978097242535583, "grad_norm": 0.30559128522872925, "learning_rate": 3.989475561897967e-05, "loss": 0.2686, "step": 4473 }, { "epoch": 0.07979880854706953, "grad_norm": 0.35138267278671265, "learning_rate": 3.990367463432037e-05, "loss": 0.2751, "step": 4474 }, { "epoch": 0.07981664466878322, "grad_norm": 0.30839818716049194, "learning_rate": 3.991259364966108e-05, "loss": 0.2345, "step": 4475 }, { "epoch": 0.07983448079049692, "grad_norm": 0.3390248417854309, "learning_rate": 3.9921512665001786e-05, "loss": 0.2221, "step": 4476 }, { "epoch": 0.0798523169122106, "grad_norm": 0.42686232924461365, "learning_rate": 3.993043168034249e-05, "loss": 0.2587, "step": 4477 }, { "epoch": 0.07987015303392431, "grad_norm": 0.36679717898368835, "learning_rate": 3.99393506956832e-05, "loss": 0.2642, "step": 4478 }, { "epoch": 0.079887989155638, "grad_norm": 0.31725236773490906, "learning_rate": 3.9948269711023905e-05, "loss": 0.2474, "step": 4479 }, { "epoch": 0.0799058252773517, "grad_norm": 0.33426448702812195, "learning_rate": 3.995718872636461e-05, "loss": 0.2462, "step": 4480 }, { "epoch": 0.07992366139906538, "grad_norm": 0.3012705445289612, "learning_rate": 3.996610774170532e-05, "loss": 0.2501, "step": 4481 }, { "epoch": 0.07994149752077909, "grad_norm": 0.38509663939476013, "learning_rate": 3.9975026757046024e-05, "loss": 0.2362, "step": 4482 }, { "epoch": 0.07995933364249277, "grad_norm": 0.29118025302886963, "learning_rate": 3.998394577238673e-05, "loss": 0.224, "step": 4483 }, { "epoch": 0.07997716976420648, "grad_norm": 0.4841828942298889, "learning_rate": 3.999286478772744e-05, "loss": 0.2055, "step": 4484 }, { "epoch": 0.07999500588592016, "grad_norm": 0.33768928050994873, "learning_rate": 4.000178380306814e-05, "loss": 0.2323, "step": 4485 }, { "epoch": 0.08001284200763387, "grad_norm": 0.27681782841682434, "learning_rate": 4.001070281840885e-05, "loss": 0.2742, "step": 4486 }, { "epoch": 0.08003067812934755, "grad_norm": 0.3340684771537781, "learning_rate": 4.0019621833749556e-05, "loss": 0.2292, "step": 4487 }, { "epoch": 0.08004851425106124, "grad_norm": 0.889191210269928, "learning_rate": 4.002854084909026e-05, "loss": 0.2753, "step": 4488 }, { "epoch": 0.08006635037277494, "grad_norm": 0.36859461665153503, "learning_rate": 4.003745986443097e-05, "loss": 0.301, "step": 4489 }, { "epoch": 0.08008418649448863, "grad_norm": 0.30707311630249023, "learning_rate": 4.0046378879771675e-05, "loss": 0.2847, "step": 4490 }, { "epoch": 0.08010202261620233, "grad_norm": 0.27876847982406616, "learning_rate": 4.005529789511238e-05, "loss": 0.2281, "step": 4491 }, { "epoch": 0.08011985873791602, "grad_norm": 0.3858083188533783, "learning_rate": 4.006421691045309e-05, "loss": 0.2778, "step": 4492 }, { "epoch": 0.08013769485962972, "grad_norm": 0.3480464518070221, "learning_rate": 4.0073135925793794e-05, "loss": 0.2583, "step": 4493 }, { "epoch": 0.08015553098134341, "grad_norm": 0.4255104660987854, "learning_rate": 4.00820549411345e-05, "loss": 0.2828, "step": 4494 }, { "epoch": 0.08017336710305711, "grad_norm": 0.27423787117004395, "learning_rate": 4.009097395647521e-05, "loss": 0.2374, "step": 4495 }, { "epoch": 0.0801912032247708, "grad_norm": 0.3389829397201538, "learning_rate": 4.009989297181591e-05, "loss": 0.2782, "step": 4496 }, { "epoch": 0.0802090393464845, "grad_norm": 0.319293349981308, "learning_rate": 4.010881198715662e-05, "loss": 0.2538, "step": 4497 }, { "epoch": 0.08022687546819819, "grad_norm": 0.5192918181419373, "learning_rate": 4.0117731002497326e-05, "loss": 0.3155, "step": 4498 }, { "epoch": 0.0802447115899119, "grad_norm": 0.3251531422138214, "learning_rate": 4.012665001783803e-05, "loss": 0.2976, "step": 4499 }, { "epoch": 0.08026254771162558, "grad_norm": 0.2674753963947296, "learning_rate": 4.013556903317874e-05, "loss": 0.2079, "step": 4500 }, { "epoch": 0.08028038383333928, "grad_norm": 0.4634806513786316, "learning_rate": 4.0144488048519445e-05, "loss": 0.2518, "step": 4501 }, { "epoch": 0.08029821995505297, "grad_norm": 0.33979034423828125, "learning_rate": 4.015340706386015e-05, "loss": 0.3003, "step": 4502 }, { "epoch": 0.08031605607676667, "grad_norm": 0.424564391374588, "learning_rate": 4.016232607920086e-05, "loss": 0.3032, "step": 4503 }, { "epoch": 0.08033389219848036, "grad_norm": 0.3706408143043518, "learning_rate": 4.0171245094541564e-05, "loss": 0.2565, "step": 4504 }, { "epoch": 0.08035172832019406, "grad_norm": 0.3482458293437958, "learning_rate": 4.018016410988227e-05, "loss": 0.2641, "step": 4505 }, { "epoch": 0.08036956444190775, "grad_norm": 0.3800894618034363, "learning_rate": 4.018908312522298e-05, "loss": 0.275, "step": 4506 }, { "epoch": 0.08038740056362145, "grad_norm": 0.36897557973861694, "learning_rate": 4.0198002140563684e-05, "loss": 0.2997, "step": 4507 }, { "epoch": 0.08040523668533514, "grad_norm": 0.3793332576751709, "learning_rate": 4.020692115590439e-05, "loss": 0.2919, "step": 4508 }, { "epoch": 0.08042307280704883, "grad_norm": 0.41461849212646484, "learning_rate": 4.0215840171245096e-05, "loss": 0.2816, "step": 4509 }, { "epoch": 0.08044090892876253, "grad_norm": 0.3164636194705963, "learning_rate": 4.02247591865858e-05, "loss": 0.2255, "step": 4510 }, { "epoch": 0.08045874505047622, "grad_norm": 0.32979562878608704, "learning_rate": 4.023367820192651e-05, "loss": 0.2201, "step": 4511 }, { "epoch": 0.08047658117218992, "grad_norm": 0.3270874321460724, "learning_rate": 4.0242597217267215e-05, "loss": 0.2697, "step": 4512 }, { "epoch": 0.08049441729390361, "grad_norm": 0.61639404296875, "learning_rate": 4.025151623260792e-05, "loss": 0.3907, "step": 4513 }, { "epoch": 0.08051225341561731, "grad_norm": 0.2716813385486603, "learning_rate": 4.026043524794863e-05, "loss": 0.2619, "step": 4514 }, { "epoch": 0.080530089537331, "grad_norm": 0.2883681058883667, "learning_rate": 4.0269354263289335e-05, "loss": 0.2264, "step": 4515 }, { "epoch": 0.0805479256590447, "grad_norm": 0.2986631393432617, "learning_rate": 4.027827327863004e-05, "loss": 0.3007, "step": 4516 }, { "epoch": 0.08056576178075839, "grad_norm": 0.36737969517707825, "learning_rate": 4.028719229397075e-05, "loss": 0.3252, "step": 4517 }, { "epoch": 0.08058359790247209, "grad_norm": 0.33752569556236267, "learning_rate": 4.0296111309311454e-05, "loss": 0.2935, "step": 4518 }, { "epoch": 0.08060143402418578, "grad_norm": 0.28609350323677063, "learning_rate": 4.030503032465216e-05, "loss": 0.209, "step": 4519 }, { "epoch": 0.08061927014589948, "grad_norm": 0.3234710693359375, "learning_rate": 4.0313949339992867e-05, "loss": 0.2614, "step": 4520 }, { "epoch": 0.08063710626761317, "grad_norm": 0.47285953164100647, "learning_rate": 4.032286835533357e-05, "loss": 0.2311, "step": 4521 }, { "epoch": 0.08065494238932687, "grad_norm": 0.5395081639289856, "learning_rate": 4.033178737067428e-05, "loss": 0.304, "step": 4522 }, { "epoch": 0.08067277851104056, "grad_norm": 0.2810700833797455, "learning_rate": 4.0340706386014986e-05, "loss": 0.2268, "step": 4523 }, { "epoch": 0.08069061463275426, "grad_norm": 0.30713704228401184, "learning_rate": 4.034962540135569e-05, "loss": 0.2732, "step": 4524 }, { "epoch": 0.08070845075446795, "grad_norm": 0.3583277463912964, "learning_rate": 4.03585444166964e-05, "loss": 0.3099, "step": 4525 }, { "epoch": 0.08072628687618165, "grad_norm": 0.28816354274749756, "learning_rate": 4.0367463432037105e-05, "loss": 0.2827, "step": 4526 }, { "epoch": 0.08074412299789534, "grad_norm": 0.29862967133522034, "learning_rate": 4.037638244737781e-05, "loss": 0.2634, "step": 4527 }, { "epoch": 0.08076195911960903, "grad_norm": 0.28392142057418823, "learning_rate": 4.038530146271852e-05, "loss": 0.2388, "step": 4528 }, { "epoch": 0.08077979524132273, "grad_norm": 0.25666290521621704, "learning_rate": 4.0394220478059224e-05, "loss": 0.2536, "step": 4529 }, { "epoch": 0.08079763136303642, "grad_norm": 0.35175782442092896, "learning_rate": 4.040313949339993e-05, "loss": 0.2528, "step": 4530 }, { "epoch": 0.08081546748475012, "grad_norm": 0.334356427192688, "learning_rate": 4.041205850874064e-05, "loss": 0.2205, "step": 4531 }, { "epoch": 0.0808333036064638, "grad_norm": 0.43859541416168213, "learning_rate": 4.042097752408134e-05, "loss": 0.2763, "step": 4532 }, { "epoch": 0.0808511397281775, "grad_norm": 0.35310041904449463, "learning_rate": 4.042989653942205e-05, "loss": 0.2671, "step": 4533 }, { "epoch": 0.0808689758498912, "grad_norm": 0.3610978126525879, "learning_rate": 4.0438815554762756e-05, "loss": 0.2506, "step": 4534 }, { "epoch": 0.0808868119716049, "grad_norm": 0.3457763195037842, "learning_rate": 4.044773457010346e-05, "loss": 0.2822, "step": 4535 }, { "epoch": 0.08090464809331858, "grad_norm": 0.31126368045806885, "learning_rate": 4.045665358544417e-05, "loss": 0.2295, "step": 4536 }, { "epoch": 0.08092248421503229, "grad_norm": 0.3494953513145447, "learning_rate": 4.0465572600784875e-05, "loss": 0.3176, "step": 4537 }, { "epoch": 0.08094032033674597, "grad_norm": 0.486221045255661, "learning_rate": 4.047449161612558e-05, "loss": 0.3022, "step": 4538 }, { "epoch": 0.08095815645845968, "grad_norm": 0.4887290894985199, "learning_rate": 4.048341063146629e-05, "loss": 0.2503, "step": 4539 }, { "epoch": 0.08097599258017336, "grad_norm": 0.3604911267757416, "learning_rate": 4.0492329646806994e-05, "loss": 0.3364, "step": 4540 }, { "epoch": 0.08099382870188707, "grad_norm": 0.29284030199050903, "learning_rate": 4.05012486621477e-05, "loss": 0.2838, "step": 4541 }, { "epoch": 0.08101166482360075, "grad_norm": 0.3474516272544861, "learning_rate": 4.051016767748841e-05, "loss": 0.2505, "step": 4542 }, { "epoch": 0.08102950094531446, "grad_norm": 0.3401896059513092, "learning_rate": 4.051908669282911e-05, "loss": 0.252, "step": 4543 }, { "epoch": 0.08104733706702814, "grad_norm": 0.3116951882839203, "learning_rate": 4.052800570816982e-05, "loss": 0.2708, "step": 4544 }, { "epoch": 0.08106517318874185, "grad_norm": 0.3368425965309143, "learning_rate": 4.0536924723510526e-05, "loss": 0.2603, "step": 4545 }, { "epoch": 0.08108300931045553, "grad_norm": 0.3442945182323456, "learning_rate": 4.054584373885123e-05, "loss": 0.3056, "step": 4546 }, { "epoch": 0.08110084543216924, "grad_norm": 0.32995864748954773, "learning_rate": 4.055476275419194e-05, "loss": 0.2738, "step": 4547 }, { "epoch": 0.08111868155388292, "grad_norm": 0.3128892481327057, "learning_rate": 4.0563681769532645e-05, "loss": 0.2339, "step": 4548 }, { "epoch": 0.08113651767559661, "grad_norm": 0.30660542845726013, "learning_rate": 4.057260078487335e-05, "loss": 0.2371, "step": 4549 }, { "epoch": 0.08115435379731031, "grad_norm": 0.3472464680671692, "learning_rate": 4.058151980021406e-05, "loss": 0.2861, "step": 4550 }, { "epoch": 0.081172189919024, "grad_norm": 0.2973649203777313, "learning_rate": 4.0590438815554764e-05, "loss": 0.2997, "step": 4551 }, { "epoch": 0.0811900260407377, "grad_norm": 0.41225185990333557, "learning_rate": 4.059935783089547e-05, "loss": 0.2532, "step": 4552 }, { "epoch": 0.08120786216245139, "grad_norm": 0.33151519298553467, "learning_rate": 4.060827684623618e-05, "loss": 0.3301, "step": 4553 }, { "epoch": 0.0812256982841651, "grad_norm": 0.27119380235671997, "learning_rate": 4.0617195861576883e-05, "loss": 0.2326, "step": 4554 }, { "epoch": 0.08124353440587878, "grad_norm": 0.2899776101112366, "learning_rate": 4.062611487691759e-05, "loss": 0.2739, "step": 4555 }, { "epoch": 0.08126137052759248, "grad_norm": 0.4134208559989929, "learning_rate": 4.0635033892258296e-05, "loss": 0.2748, "step": 4556 }, { "epoch": 0.08127920664930617, "grad_norm": 0.4175553321838379, "learning_rate": 4.0643952907599e-05, "loss": 0.288, "step": 4557 }, { "epoch": 0.08129704277101987, "grad_norm": 0.3602147400379181, "learning_rate": 4.065287192293971e-05, "loss": 0.2814, "step": 4558 }, { "epoch": 0.08131487889273356, "grad_norm": 0.42320945858955383, "learning_rate": 4.0661790938280415e-05, "loss": 0.2402, "step": 4559 }, { "epoch": 0.08133271501444726, "grad_norm": 0.3441101014614105, "learning_rate": 4.067070995362112e-05, "loss": 0.2287, "step": 4560 }, { "epoch": 0.08135055113616095, "grad_norm": 0.42264416813850403, "learning_rate": 4.067962896896183e-05, "loss": 0.2616, "step": 4561 }, { "epoch": 0.08136838725787465, "grad_norm": 0.48212215304374695, "learning_rate": 4.0688547984302534e-05, "loss": 0.3084, "step": 4562 }, { "epoch": 0.08138622337958834, "grad_norm": 0.2982146143913269, "learning_rate": 4.069746699964324e-05, "loss": 0.2819, "step": 4563 }, { "epoch": 0.08140405950130204, "grad_norm": 0.28909289836883545, "learning_rate": 4.070638601498395e-05, "loss": 0.2011, "step": 4564 }, { "epoch": 0.08142189562301573, "grad_norm": 0.3757196068763733, "learning_rate": 4.0715305030324654e-05, "loss": 0.2634, "step": 4565 }, { "epoch": 0.08143973174472943, "grad_norm": 0.29326966404914856, "learning_rate": 4.072422404566536e-05, "loss": 0.2332, "step": 4566 }, { "epoch": 0.08145756786644312, "grad_norm": 0.38509637117385864, "learning_rate": 4.0733143061006066e-05, "loss": 0.3351, "step": 4567 }, { "epoch": 0.08147540398815682, "grad_norm": 0.4050680994987488, "learning_rate": 4.074206207634677e-05, "loss": 0.2804, "step": 4568 }, { "epoch": 0.08149324010987051, "grad_norm": 0.2764883041381836, "learning_rate": 4.075098109168748e-05, "loss": 0.2557, "step": 4569 }, { "epoch": 0.0815110762315842, "grad_norm": 0.35010194778442383, "learning_rate": 4.0759900107028186e-05, "loss": 0.2069, "step": 4570 }, { "epoch": 0.0815289123532979, "grad_norm": 0.4712359607219696, "learning_rate": 4.076881912236889e-05, "loss": 0.2897, "step": 4571 }, { "epoch": 0.08154674847501159, "grad_norm": 0.3436199724674225, "learning_rate": 4.07777381377096e-05, "loss": 0.2809, "step": 4572 }, { "epoch": 0.08156458459672529, "grad_norm": 0.408003032207489, "learning_rate": 4.0786657153050305e-05, "loss": 0.2496, "step": 4573 }, { "epoch": 0.08158242071843898, "grad_norm": 0.3031524121761322, "learning_rate": 4.079557616839101e-05, "loss": 0.2403, "step": 4574 }, { "epoch": 0.08160025684015268, "grad_norm": 0.36796116828918457, "learning_rate": 4.080449518373172e-05, "loss": 0.2636, "step": 4575 }, { "epoch": 0.08161809296186637, "grad_norm": 0.3667079210281372, "learning_rate": 4.0813414199072424e-05, "loss": 0.2927, "step": 4576 }, { "epoch": 0.08163592908358007, "grad_norm": 0.3545800745487213, "learning_rate": 4.082233321441313e-05, "loss": 0.2731, "step": 4577 }, { "epoch": 0.08165376520529376, "grad_norm": 0.44302111864089966, "learning_rate": 4.0831252229753837e-05, "loss": 0.3496, "step": 4578 }, { "epoch": 0.08167160132700746, "grad_norm": 0.3333475887775421, "learning_rate": 4.084017124509454e-05, "loss": 0.2595, "step": 4579 }, { "epoch": 0.08168943744872115, "grad_norm": 0.353995144367218, "learning_rate": 4.084909026043525e-05, "loss": 0.2505, "step": 4580 }, { "epoch": 0.08170727357043485, "grad_norm": 0.3846065104007721, "learning_rate": 4.0858009275775956e-05, "loss": 0.2802, "step": 4581 }, { "epoch": 0.08172510969214854, "grad_norm": 0.4532749056816101, "learning_rate": 4.086692829111666e-05, "loss": 0.2179, "step": 4582 }, { "epoch": 0.08174294581386224, "grad_norm": 0.3650430738925934, "learning_rate": 4.087584730645737e-05, "loss": 0.2687, "step": 4583 }, { "epoch": 0.08176078193557593, "grad_norm": 0.49413353204727173, "learning_rate": 4.0884766321798075e-05, "loss": 0.4242, "step": 4584 }, { "epoch": 0.08177861805728963, "grad_norm": 0.34336015582084656, "learning_rate": 4.089368533713878e-05, "loss": 0.2786, "step": 4585 }, { "epoch": 0.08179645417900332, "grad_norm": 0.355006605386734, "learning_rate": 4.090260435247949e-05, "loss": 0.3308, "step": 4586 }, { "epoch": 0.08181429030071702, "grad_norm": 0.30480387806892395, "learning_rate": 4.0911523367820194e-05, "loss": 0.2222, "step": 4587 }, { "epoch": 0.0818321264224307, "grad_norm": 0.2543972134590149, "learning_rate": 4.09204423831609e-05, "loss": 0.2506, "step": 4588 }, { "epoch": 0.0818499625441444, "grad_norm": 0.350434273481369, "learning_rate": 4.092936139850161e-05, "loss": 0.2291, "step": 4589 }, { "epoch": 0.0818677986658581, "grad_norm": 0.3976143002510071, "learning_rate": 4.093828041384231e-05, "loss": 0.2843, "step": 4590 }, { "epoch": 0.08188563478757178, "grad_norm": 0.3548804819583893, "learning_rate": 4.094719942918302e-05, "loss": 0.2537, "step": 4591 }, { "epoch": 0.08190347090928549, "grad_norm": 0.37451016902923584, "learning_rate": 4.0956118444523726e-05, "loss": 0.2519, "step": 4592 }, { "epoch": 0.08192130703099917, "grad_norm": 0.3483447730541229, "learning_rate": 4.096503745986443e-05, "loss": 0.2511, "step": 4593 }, { "epoch": 0.08193914315271288, "grad_norm": 0.33760204911231995, "learning_rate": 4.097395647520514e-05, "loss": 0.2498, "step": 4594 }, { "epoch": 0.08195697927442656, "grad_norm": 0.3115421235561371, "learning_rate": 4.0982875490545845e-05, "loss": 0.2285, "step": 4595 }, { "epoch": 0.08197481539614027, "grad_norm": 0.3708973228931427, "learning_rate": 4.099179450588655e-05, "loss": 0.2727, "step": 4596 }, { "epoch": 0.08199265151785395, "grad_norm": 0.3650602102279663, "learning_rate": 4.100071352122726e-05, "loss": 0.2245, "step": 4597 }, { "epoch": 0.08201048763956766, "grad_norm": 0.2705913782119751, "learning_rate": 4.1009632536567964e-05, "loss": 0.2337, "step": 4598 }, { "epoch": 0.08202832376128134, "grad_norm": 0.3268061876296997, "learning_rate": 4.101855155190867e-05, "loss": 0.2284, "step": 4599 }, { "epoch": 0.08204615988299505, "grad_norm": 0.4855281412601471, "learning_rate": 4.102747056724938e-05, "loss": 0.2852, "step": 4600 }, { "epoch": 0.08206399600470873, "grad_norm": 0.33552852272987366, "learning_rate": 4.103638958259008e-05, "loss": 0.2666, "step": 4601 }, { "epoch": 0.08208183212642244, "grad_norm": 0.29675179719924927, "learning_rate": 4.104530859793079e-05, "loss": 0.2499, "step": 4602 }, { "epoch": 0.08209966824813612, "grad_norm": 0.38519686460494995, "learning_rate": 4.1054227613271496e-05, "loss": 0.2671, "step": 4603 }, { "epoch": 0.08211750436984983, "grad_norm": 0.29932722449302673, "learning_rate": 4.10631466286122e-05, "loss": 0.2524, "step": 4604 }, { "epoch": 0.08213534049156351, "grad_norm": 0.3295300602912903, "learning_rate": 4.107206564395291e-05, "loss": 0.2657, "step": 4605 }, { "epoch": 0.08215317661327722, "grad_norm": 0.2959982752799988, "learning_rate": 4.1080984659293615e-05, "loss": 0.2508, "step": 4606 }, { "epoch": 0.0821710127349909, "grad_norm": 0.4345143437385559, "learning_rate": 4.108990367463432e-05, "loss": 0.2031, "step": 4607 }, { "epoch": 0.0821888488567046, "grad_norm": 0.3866029977798462, "learning_rate": 4.109882268997503e-05, "loss": 0.3025, "step": 4608 }, { "epoch": 0.08220668497841829, "grad_norm": 0.2634138762950897, "learning_rate": 4.1107741705315734e-05, "loss": 0.214, "step": 4609 }, { "epoch": 0.08222452110013198, "grad_norm": 0.3317263722419739, "learning_rate": 4.111666072065644e-05, "loss": 0.2711, "step": 4610 }, { "epoch": 0.08224235722184568, "grad_norm": 0.3610650897026062, "learning_rate": 4.112557973599715e-05, "loss": 0.2235, "step": 4611 }, { "epoch": 0.08226019334355937, "grad_norm": 0.42297473549842834, "learning_rate": 4.1134498751337854e-05, "loss": 0.2279, "step": 4612 }, { "epoch": 0.08227802946527307, "grad_norm": 0.58136385679245, "learning_rate": 4.114341776667856e-05, "loss": 0.2404, "step": 4613 }, { "epoch": 0.08229586558698676, "grad_norm": 0.3265692889690399, "learning_rate": 4.1152336782019266e-05, "loss": 0.2354, "step": 4614 }, { "epoch": 0.08231370170870046, "grad_norm": 0.34121695160865784, "learning_rate": 4.116125579735997e-05, "loss": 0.2437, "step": 4615 }, { "epoch": 0.08233153783041415, "grad_norm": 0.3945043087005615, "learning_rate": 4.117017481270068e-05, "loss": 0.2072, "step": 4616 }, { "epoch": 0.08234937395212785, "grad_norm": 0.48390698432922363, "learning_rate": 4.1179093828041385e-05, "loss": 0.3129, "step": 4617 }, { "epoch": 0.08236721007384154, "grad_norm": 0.46820446848869324, "learning_rate": 4.118801284338209e-05, "loss": 0.2492, "step": 4618 }, { "epoch": 0.08238504619555524, "grad_norm": 0.2748047709465027, "learning_rate": 4.11969318587228e-05, "loss": 0.2898, "step": 4619 }, { "epoch": 0.08240288231726893, "grad_norm": 0.28265729546546936, "learning_rate": 4.1205850874063505e-05, "loss": 0.2751, "step": 4620 }, { "epoch": 0.08242071843898263, "grad_norm": 0.37369540333747864, "learning_rate": 4.121476988940422e-05, "loss": 0.2816, "step": 4621 }, { "epoch": 0.08243855456069632, "grad_norm": 0.3699372112751007, "learning_rate": 4.122368890474492e-05, "loss": 0.2982, "step": 4622 }, { "epoch": 0.08245639068241002, "grad_norm": 0.289980947971344, "learning_rate": 4.1232607920085624e-05, "loss": 0.2412, "step": 4623 }, { "epoch": 0.08247422680412371, "grad_norm": 0.36862096190452576, "learning_rate": 4.124152693542633e-05, "loss": 0.303, "step": 4624 }, { "epoch": 0.08249206292583741, "grad_norm": 0.4404594600200653, "learning_rate": 4.1250445950767036e-05, "loss": 0.2698, "step": 4625 }, { "epoch": 0.0825098990475511, "grad_norm": 0.2618829309940338, "learning_rate": 4.125936496610774e-05, "loss": 0.2127, "step": 4626 }, { "epoch": 0.0825277351692648, "grad_norm": 0.2572970986366272, "learning_rate": 4.126828398144845e-05, "loss": 0.2156, "step": 4627 }, { "epoch": 0.08254557129097849, "grad_norm": 0.3470957279205322, "learning_rate": 4.1277202996789156e-05, "loss": 0.2556, "step": 4628 }, { "epoch": 0.08256340741269218, "grad_norm": 0.42865556478500366, "learning_rate": 4.128612201212986e-05, "loss": 0.3219, "step": 4629 }, { "epoch": 0.08258124353440588, "grad_norm": 0.43934494256973267, "learning_rate": 4.129504102747057e-05, "loss": 0.3883, "step": 4630 }, { "epoch": 0.08259907965611957, "grad_norm": 0.3967353105545044, "learning_rate": 4.1303960042811275e-05, "loss": 0.3307, "step": 4631 }, { "epoch": 0.08261691577783327, "grad_norm": 0.3168458342552185, "learning_rate": 4.131287905815198e-05, "loss": 0.2746, "step": 4632 }, { "epoch": 0.08263475189954696, "grad_norm": 0.35633203387260437, "learning_rate": 4.132179807349269e-05, "loss": 0.2502, "step": 4633 }, { "epoch": 0.08265258802126066, "grad_norm": 0.36781826615333557, "learning_rate": 4.1330717088833394e-05, "loss": 0.2349, "step": 4634 }, { "epoch": 0.08267042414297435, "grad_norm": 0.4396607279777527, "learning_rate": 4.13396361041741e-05, "loss": 0.2412, "step": 4635 }, { "epoch": 0.08268826026468805, "grad_norm": 0.35209110379219055, "learning_rate": 4.134855511951481e-05, "loss": 0.259, "step": 4636 }, { "epoch": 0.08270609638640174, "grad_norm": 0.3407614529132843, "learning_rate": 4.135747413485551e-05, "loss": 0.271, "step": 4637 }, { "epoch": 0.08272393250811544, "grad_norm": 0.3865599036216736, "learning_rate": 4.136639315019622e-05, "loss": 0.2731, "step": 4638 }, { "epoch": 0.08274176862982913, "grad_norm": 0.3795560300350189, "learning_rate": 4.1375312165536926e-05, "loss": 0.2632, "step": 4639 }, { "epoch": 0.08275960475154283, "grad_norm": 0.4535025358200073, "learning_rate": 4.138423118087763e-05, "loss": 0.3025, "step": 4640 }, { "epoch": 0.08277744087325652, "grad_norm": 0.36958104372024536, "learning_rate": 4.139315019621834e-05, "loss": 0.2574, "step": 4641 }, { "epoch": 0.08279527699497022, "grad_norm": 0.30590102076530457, "learning_rate": 4.1402069211559045e-05, "loss": 0.2429, "step": 4642 }, { "epoch": 0.0828131131166839, "grad_norm": 0.41996458172798157, "learning_rate": 4.141098822689975e-05, "loss": 0.3062, "step": 4643 }, { "epoch": 0.08283094923839761, "grad_norm": 0.28037649393081665, "learning_rate": 4.141990724224046e-05, "loss": 0.238, "step": 4644 }, { "epoch": 0.0828487853601113, "grad_norm": 0.23906131088733673, "learning_rate": 4.1428826257581164e-05, "loss": 0.2348, "step": 4645 }, { "epoch": 0.082866621481825, "grad_norm": 0.30123984813690186, "learning_rate": 4.143774527292188e-05, "loss": 0.272, "step": 4646 }, { "epoch": 0.08288445760353869, "grad_norm": 0.45062610507011414, "learning_rate": 4.144666428826258e-05, "loss": 0.2212, "step": 4647 }, { "epoch": 0.08290229372525239, "grad_norm": 0.3282548189163208, "learning_rate": 4.145558330360328e-05, "loss": 0.2685, "step": 4648 }, { "epoch": 0.08292012984696608, "grad_norm": 0.37194085121154785, "learning_rate": 4.146450231894399e-05, "loss": 0.2971, "step": 4649 }, { "epoch": 0.08293796596867976, "grad_norm": 0.35184234380722046, "learning_rate": 4.1473421334284696e-05, "loss": 0.3189, "step": 4650 }, { "epoch": 0.08295580209039347, "grad_norm": 0.3653015196323395, "learning_rate": 4.14823403496254e-05, "loss": 0.2731, "step": 4651 }, { "epoch": 0.08297363821210715, "grad_norm": 0.2842966616153717, "learning_rate": 4.149125936496611e-05, "loss": 0.225, "step": 4652 }, { "epoch": 0.08299147433382086, "grad_norm": 0.29867467284202576, "learning_rate": 4.1500178380306815e-05, "loss": 0.216, "step": 4653 }, { "epoch": 0.08300931045553454, "grad_norm": 0.49323219060897827, "learning_rate": 4.150909739564752e-05, "loss": 0.3274, "step": 4654 }, { "epoch": 0.08302714657724825, "grad_norm": 0.3544803261756897, "learning_rate": 4.151801641098823e-05, "loss": 0.2479, "step": 4655 }, { "epoch": 0.08304498269896193, "grad_norm": 0.4178151488304138, "learning_rate": 4.1526935426328934e-05, "loss": 0.2595, "step": 4656 }, { "epoch": 0.08306281882067564, "grad_norm": 0.36962756514549255, "learning_rate": 4.153585444166964e-05, "loss": 0.3009, "step": 4657 }, { "epoch": 0.08308065494238932, "grad_norm": 0.3434098958969116, "learning_rate": 4.154477345701035e-05, "loss": 0.3182, "step": 4658 }, { "epoch": 0.08309849106410302, "grad_norm": 0.36081305146217346, "learning_rate": 4.155369247235105e-05, "loss": 0.262, "step": 4659 }, { "epoch": 0.08311632718581671, "grad_norm": 0.3830731511116028, "learning_rate": 4.156261148769176e-05, "loss": 0.2614, "step": 4660 }, { "epoch": 0.08313416330753041, "grad_norm": 0.353041410446167, "learning_rate": 4.1571530503032466e-05, "loss": 0.2485, "step": 4661 }, { "epoch": 0.0831519994292441, "grad_norm": 0.34955528378486633, "learning_rate": 4.158044951837317e-05, "loss": 0.3005, "step": 4662 }, { "epoch": 0.0831698355509578, "grad_norm": 1.1263805627822876, "learning_rate": 4.158936853371388e-05, "loss": 0.2772, "step": 4663 }, { "epoch": 0.08318767167267149, "grad_norm": 0.29771795868873596, "learning_rate": 4.1598287549054585e-05, "loss": 0.2828, "step": 4664 }, { "epoch": 0.0832055077943852, "grad_norm": 0.36346837878227234, "learning_rate": 4.160720656439529e-05, "loss": 0.2666, "step": 4665 }, { "epoch": 0.08322334391609888, "grad_norm": 0.3174096345901489, "learning_rate": 4.1616125579736e-05, "loss": 0.2775, "step": 4666 }, { "epoch": 0.08324118003781258, "grad_norm": 0.316211998462677, "learning_rate": 4.1625044595076704e-05, "loss": 0.246, "step": 4667 }, { "epoch": 0.08325901615952627, "grad_norm": 0.5889096260070801, "learning_rate": 4.163396361041742e-05, "loss": 0.3302, "step": 4668 }, { "epoch": 0.08327685228123996, "grad_norm": 0.3345775008201599, "learning_rate": 4.164288262575812e-05, "loss": 0.2898, "step": 4669 }, { "epoch": 0.08329468840295366, "grad_norm": 0.3662806451320648, "learning_rate": 4.1651801641098824e-05, "loss": 0.2822, "step": 4670 }, { "epoch": 0.08331252452466735, "grad_norm": 0.36785194277763367, "learning_rate": 4.166072065643954e-05, "loss": 0.2595, "step": 4671 }, { "epoch": 0.08333036064638105, "grad_norm": 0.35829958319664, "learning_rate": 4.1669639671780236e-05, "loss": 0.2396, "step": 4672 }, { "epoch": 0.08334819676809474, "grad_norm": 0.303079754114151, "learning_rate": 4.167855868712094e-05, "loss": 0.2397, "step": 4673 }, { "epoch": 0.08336603288980844, "grad_norm": 0.2755659520626068, "learning_rate": 4.1687477702461656e-05, "loss": 0.2469, "step": 4674 }, { "epoch": 0.08338386901152213, "grad_norm": 0.26110124588012695, "learning_rate": 4.1696396717802355e-05, "loss": 0.2236, "step": 4675 }, { "epoch": 0.08340170513323583, "grad_norm": 0.34288933873176575, "learning_rate": 4.170531573314306e-05, "loss": 0.2766, "step": 4676 }, { "epoch": 0.08341954125494952, "grad_norm": 0.31066226959228516, "learning_rate": 4.171423474848377e-05, "loss": 0.241, "step": 4677 }, { "epoch": 0.08343737737666322, "grad_norm": 0.3882811367511749, "learning_rate": 4.1723153763824475e-05, "loss": 0.2887, "step": 4678 }, { "epoch": 0.08345521349837691, "grad_norm": 0.3345036208629608, "learning_rate": 4.173207277916518e-05, "loss": 0.2761, "step": 4679 }, { "epoch": 0.08347304962009061, "grad_norm": 0.37505725026130676, "learning_rate": 4.174099179450589e-05, "loss": 0.2276, "step": 4680 }, { "epoch": 0.0834908857418043, "grad_norm": 0.32556086778640747, "learning_rate": 4.1749910809846594e-05, "loss": 0.2486, "step": 4681 }, { "epoch": 0.083508721863518, "grad_norm": 0.4078800678253174, "learning_rate": 4.17588298251873e-05, "loss": 0.3024, "step": 4682 }, { "epoch": 0.08352655798523169, "grad_norm": 0.24213099479675293, "learning_rate": 4.1767748840528007e-05, "loss": 0.2071, "step": 4683 }, { "epoch": 0.08354439410694539, "grad_norm": 0.39423611760139465, "learning_rate": 4.177666785586871e-05, "loss": 0.2496, "step": 4684 }, { "epoch": 0.08356223022865908, "grad_norm": 0.3784507215023041, "learning_rate": 4.178558687120942e-05, "loss": 0.3164, "step": 4685 }, { "epoch": 0.08358006635037278, "grad_norm": 0.304168164730072, "learning_rate": 4.1794505886550126e-05, "loss": 0.2341, "step": 4686 }, { "epoch": 0.08359790247208647, "grad_norm": 0.29561179876327515, "learning_rate": 4.180342490189083e-05, "loss": 0.264, "step": 4687 }, { "epoch": 0.08361573859380017, "grad_norm": 0.2837843894958496, "learning_rate": 4.181234391723154e-05, "loss": 0.2345, "step": 4688 }, { "epoch": 0.08363357471551386, "grad_norm": 0.26228925585746765, "learning_rate": 4.1821262932572245e-05, "loss": 0.2656, "step": 4689 }, { "epoch": 0.08365141083722755, "grad_norm": 0.32619327306747437, "learning_rate": 4.183018194791295e-05, "loss": 0.2209, "step": 4690 }, { "epoch": 0.08366924695894125, "grad_norm": 0.49905359745025635, "learning_rate": 4.183910096325366e-05, "loss": 0.2872, "step": 4691 }, { "epoch": 0.08368708308065494, "grad_norm": 0.4743531346321106, "learning_rate": 4.1848019978594364e-05, "loss": 0.2501, "step": 4692 }, { "epoch": 0.08370491920236864, "grad_norm": 0.46660152077674866, "learning_rate": 4.185693899393508e-05, "loss": 0.3212, "step": 4693 }, { "epoch": 0.08372275532408233, "grad_norm": 0.5808331966400146, "learning_rate": 4.186585800927578e-05, "loss": 0.3478, "step": 4694 }, { "epoch": 0.08374059144579603, "grad_norm": 0.3189285099506378, "learning_rate": 4.187477702461648e-05, "loss": 0.3159, "step": 4695 }, { "epoch": 0.08375842756750972, "grad_norm": 0.374616414308548, "learning_rate": 4.1883696039957196e-05, "loss": 0.2653, "step": 4696 }, { "epoch": 0.08377626368922342, "grad_norm": 0.2993185520172119, "learning_rate": 4.1892615055297896e-05, "loss": 0.2669, "step": 4697 }, { "epoch": 0.0837940998109371, "grad_norm": 0.2776098847389221, "learning_rate": 4.19015340706386e-05, "loss": 0.3056, "step": 4698 }, { "epoch": 0.08381193593265081, "grad_norm": 0.2787780165672302, "learning_rate": 4.1910453085979315e-05, "loss": 0.2396, "step": 4699 }, { "epoch": 0.0838297720543645, "grad_norm": 0.36942046880722046, "learning_rate": 4.1919372101320015e-05, "loss": 0.2533, "step": 4700 }, { "epoch": 0.0838476081760782, "grad_norm": 0.3226310610771179, "learning_rate": 4.192829111666072e-05, "loss": 0.2087, "step": 4701 }, { "epoch": 0.08386544429779189, "grad_norm": 0.23606103658676147, "learning_rate": 4.193721013200143e-05, "loss": 0.229, "step": 4702 }, { "epoch": 0.08388328041950559, "grad_norm": 0.32441607117652893, "learning_rate": 4.1946129147342134e-05, "loss": 0.1895, "step": 4703 }, { "epoch": 0.08390111654121928, "grad_norm": 0.9351107478141785, "learning_rate": 4.195504816268284e-05, "loss": 0.306, "step": 4704 }, { "epoch": 0.08391895266293298, "grad_norm": 0.2972072958946228, "learning_rate": 4.196396717802355e-05, "loss": 0.2418, "step": 4705 }, { "epoch": 0.08393678878464667, "grad_norm": 0.2862665057182312, "learning_rate": 4.197288619336425e-05, "loss": 0.2529, "step": 4706 }, { "epoch": 0.08395462490636037, "grad_norm": 0.3217792510986328, "learning_rate": 4.198180520870496e-05, "loss": 0.233, "step": 4707 }, { "epoch": 0.08397246102807406, "grad_norm": 0.38995981216430664, "learning_rate": 4.1990724224045666e-05, "loss": 0.2702, "step": 4708 }, { "epoch": 0.08399029714978776, "grad_norm": 0.46338629722595215, "learning_rate": 4.199964323938637e-05, "loss": 0.3342, "step": 4709 }, { "epoch": 0.08400813327150145, "grad_norm": 0.44229286909103394, "learning_rate": 4.200856225472708e-05, "loss": 0.2368, "step": 4710 }, { "epoch": 0.08402596939321513, "grad_norm": 0.6200202107429504, "learning_rate": 4.2017481270067785e-05, "loss": 0.3033, "step": 4711 }, { "epoch": 0.08404380551492883, "grad_norm": 0.34727299213409424, "learning_rate": 4.202640028540849e-05, "loss": 0.2699, "step": 4712 }, { "epoch": 0.08406164163664252, "grad_norm": 0.31790176033973694, "learning_rate": 4.20353193007492e-05, "loss": 0.2482, "step": 4713 }, { "epoch": 0.08407947775835622, "grad_norm": 0.37162309885025024, "learning_rate": 4.2044238316089904e-05, "loss": 0.2802, "step": 4714 }, { "epoch": 0.08409731388006991, "grad_norm": 0.23286005854606628, "learning_rate": 4.205315733143061e-05, "loss": 0.2324, "step": 4715 }, { "epoch": 0.08411515000178361, "grad_norm": 0.3594549894332886, "learning_rate": 4.206207634677132e-05, "loss": 0.2363, "step": 4716 }, { "epoch": 0.0841329861234973, "grad_norm": 0.2847643196582794, "learning_rate": 4.2070995362112023e-05, "loss": 0.2127, "step": 4717 }, { "epoch": 0.084150822245211, "grad_norm": 0.3336940407752991, "learning_rate": 4.2079914377452737e-05, "loss": 0.3024, "step": 4718 }, { "epoch": 0.08416865836692469, "grad_norm": 0.4410804212093353, "learning_rate": 4.2088833392793436e-05, "loss": 0.3492, "step": 4719 }, { "epoch": 0.0841864944886384, "grad_norm": 0.2904675304889679, "learning_rate": 4.209775240813414e-05, "loss": 0.2854, "step": 4720 }, { "epoch": 0.08420433061035208, "grad_norm": 0.2894403338432312, "learning_rate": 4.2106671423474856e-05, "loss": 0.2432, "step": 4721 }, { "epoch": 0.08422216673206578, "grad_norm": 0.33036935329437256, "learning_rate": 4.2115590438815555e-05, "loss": 0.2492, "step": 4722 }, { "epoch": 0.08424000285377947, "grad_norm": 0.3528401255607605, "learning_rate": 4.212450945415626e-05, "loss": 0.2365, "step": 4723 }, { "epoch": 0.08425783897549317, "grad_norm": 0.46657130122184753, "learning_rate": 4.2133428469496975e-05, "loss": 0.2519, "step": 4724 }, { "epoch": 0.08427567509720686, "grad_norm": 0.3133507966995239, "learning_rate": 4.2142347484837674e-05, "loss": 0.2592, "step": 4725 }, { "epoch": 0.08429351121892056, "grad_norm": 0.3040401339530945, "learning_rate": 4.215126650017838e-05, "loss": 0.2392, "step": 4726 }, { "epoch": 0.08431134734063425, "grad_norm": 0.32957813143730164, "learning_rate": 4.216018551551909e-05, "loss": 0.2375, "step": 4727 }, { "epoch": 0.08432918346234795, "grad_norm": 0.3673066198825836, "learning_rate": 4.2169104530859794e-05, "loss": 0.2749, "step": 4728 }, { "epoch": 0.08434701958406164, "grad_norm": 0.366629958152771, "learning_rate": 4.21780235462005e-05, "loss": 0.2603, "step": 4729 }, { "epoch": 0.08436485570577533, "grad_norm": 0.33293500542640686, "learning_rate": 4.2186942561541206e-05, "loss": 0.2725, "step": 4730 }, { "epoch": 0.08438269182748903, "grad_norm": 0.3752448558807373, "learning_rate": 4.219586157688191e-05, "loss": 0.3308, "step": 4731 }, { "epoch": 0.08440052794920272, "grad_norm": 0.44500109553337097, "learning_rate": 4.220478059222262e-05, "loss": 0.2536, "step": 4732 }, { "epoch": 0.08441836407091642, "grad_norm": 0.48183923959732056, "learning_rate": 4.2213699607563326e-05, "loss": 0.2799, "step": 4733 }, { "epoch": 0.08443620019263011, "grad_norm": 0.3570837676525116, "learning_rate": 4.222261862290403e-05, "loss": 0.2604, "step": 4734 }, { "epoch": 0.08445403631434381, "grad_norm": 0.35328057408332825, "learning_rate": 4.223153763824474e-05, "loss": 0.2773, "step": 4735 }, { "epoch": 0.0844718724360575, "grad_norm": 0.40506699681282043, "learning_rate": 4.2240456653585445e-05, "loss": 0.3003, "step": 4736 }, { "epoch": 0.0844897085577712, "grad_norm": 0.4074116051197052, "learning_rate": 4.224937566892615e-05, "loss": 0.2588, "step": 4737 }, { "epoch": 0.08450754467948489, "grad_norm": 0.27763742208480835, "learning_rate": 4.225829468426686e-05, "loss": 0.2267, "step": 4738 }, { "epoch": 0.08452538080119859, "grad_norm": 0.4040248394012451, "learning_rate": 4.2267213699607564e-05, "loss": 0.2307, "step": 4739 }, { "epoch": 0.08454321692291228, "grad_norm": 0.24238283932209015, "learning_rate": 4.227613271494828e-05, "loss": 0.219, "step": 4740 }, { "epoch": 0.08456105304462598, "grad_norm": 0.27702492475509644, "learning_rate": 4.2285051730288977e-05, "loss": 0.2147, "step": 4741 }, { "epoch": 0.08457888916633967, "grad_norm": 0.3211560547351837, "learning_rate": 4.229397074562968e-05, "loss": 0.275, "step": 4742 }, { "epoch": 0.08459672528805337, "grad_norm": 0.3873059153556824, "learning_rate": 4.2302889760970396e-05, "loss": 0.2974, "step": 4743 }, { "epoch": 0.08461456140976706, "grad_norm": 0.3301689922809601, "learning_rate": 4.2311808776311096e-05, "loss": 0.2102, "step": 4744 }, { "epoch": 0.08463239753148076, "grad_norm": 0.3535199761390686, "learning_rate": 4.23207277916518e-05, "loss": 0.2865, "step": 4745 }, { "epoch": 0.08465023365319445, "grad_norm": 0.45673027634620667, "learning_rate": 4.2329646806992515e-05, "loss": 0.2752, "step": 4746 }, { "epoch": 0.08466806977490815, "grad_norm": 0.6006236672401428, "learning_rate": 4.2338565822333215e-05, "loss": 0.31, "step": 4747 }, { "epoch": 0.08468590589662184, "grad_norm": 1.914745569229126, "learning_rate": 4.234748483767392e-05, "loss": 0.5449, "step": 4748 }, { "epoch": 0.08470374201833554, "grad_norm": 0.38808149099349976, "learning_rate": 4.2356403853014634e-05, "loss": 0.276, "step": 4749 }, { "epoch": 0.08472157814004923, "grad_norm": 0.3978177309036255, "learning_rate": 4.2365322868355334e-05, "loss": 0.2015, "step": 4750 }, { "epoch": 0.08473941426176292, "grad_norm": 0.37178748846054077, "learning_rate": 4.237424188369604e-05, "loss": 0.2532, "step": 4751 }, { "epoch": 0.08475725038347662, "grad_norm": 0.40695932507514954, "learning_rate": 4.238316089903675e-05, "loss": 0.2796, "step": 4752 }, { "epoch": 0.0847750865051903, "grad_norm": 0.31890979409217834, "learning_rate": 4.239207991437745e-05, "loss": 0.2404, "step": 4753 }, { "epoch": 0.08479292262690401, "grad_norm": 0.30806058645248413, "learning_rate": 4.240099892971816e-05, "loss": 0.2567, "step": 4754 }, { "epoch": 0.0848107587486177, "grad_norm": 0.5412458181381226, "learning_rate": 4.2409917945058866e-05, "loss": 0.2848, "step": 4755 }, { "epoch": 0.0848285948703314, "grad_norm": 0.38702741265296936, "learning_rate": 4.241883696039957e-05, "loss": 0.2732, "step": 4756 }, { "epoch": 0.08484643099204509, "grad_norm": 0.2890656590461731, "learning_rate": 4.242775597574028e-05, "loss": 0.2578, "step": 4757 }, { "epoch": 0.08486426711375879, "grad_norm": 0.3491271436214447, "learning_rate": 4.2436674991080985e-05, "loss": 0.3044, "step": 4758 }, { "epoch": 0.08488210323547248, "grad_norm": 0.2936517596244812, "learning_rate": 4.244559400642169e-05, "loss": 0.2387, "step": 4759 }, { "epoch": 0.08489993935718618, "grad_norm": 0.3863624036312103, "learning_rate": 4.24545130217624e-05, "loss": 0.2293, "step": 4760 }, { "epoch": 0.08491777547889987, "grad_norm": 0.4840512275695801, "learning_rate": 4.2463432037103104e-05, "loss": 0.2772, "step": 4761 }, { "epoch": 0.08493561160061357, "grad_norm": 0.3251959979534149, "learning_rate": 4.247235105244381e-05, "loss": 0.259, "step": 4762 }, { "epoch": 0.08495344772232725, "grad_norm": 0.3124004602432251, "learning_rate": 4.248127006778452e-05, "loss": 0.2654, "step": 4763 }, { "epoch": 0.08497128384404096, "grad_norm": 0.35165154933929443, "learning_rate": 4.249018908312522e-05, "loss": 0.2775, "step": 4764 }, { "epoch": 0.08498911996575464, "grad_norm": 0.4545726478099823, "learning_rate": 4.2499108098465936e-05, "loss": 0.269, "step": 4765 }, { "epoch": 0.08500695608746835, "grad_norm": 0.31335318088531494, "learning_rate": 4.2508027113806636e-05, "loss": 0.2238, "step": 4766 }, { "epoch": 0.08502479220918203, "grad_norm": 0.2870141863822937, "learning_rate": 4.251694612914734e-05, "loss": 0.2346, "step": 4767 }, { "epoch": 0.08504262833089574, "grad_norm": 0.5971843004226685, "learning_rate": 4.2525865144488056e-05, "loss": 0.2718, "step": 4768 }, { "epoch": 0.08506046445260942, "grad_norm": 0.3538011610507965, "learning_rate": 4.2534784159828755e-05, "loss": 0.3217, "step": 4769 }, { "epoch": 0.08507830057432311, "grad_norm": 0.36990422010421753, "learning_rate": 4.254370317516946e-05, "loss": 0.2289, "step": 4770 }, { "epoch": 0.08509613669603681, "grad_norm": 0.2870731055736542, "learning_rate": 4.2552622190510175e-05, "loss": 0.2477, "step": 4771 }, { "epoch": 0.0851139728177505, "grad_norm": 0.3276619017124176, "learning_rate": 4.2561541205850874e-05, "loss": 0.2961, "step": 4772 }, { "epoch": 0.0851318089394642, "grad_norm": 0.7462770342826843, "learning_rate": 4.257046022119158e-05, "loss": 0.2708, "step": 4773 }, { "epoch": 0.08514964506117789, "grad_norm": 0.28238746523857117, "learning_rate": 4.2579379236532294e-05, "loss": 0.2303, "step": 4774 }, { "epoch": 0.0851674811828916, "grad_norm": 0.3139684796333313, "learning_rate": 4.2588298251872994e-05, "loss": 0.2244, "step": 4775 }, { "epoch": 0.08518531730460528, "grad_norm": 0.4672798216342926, "learning_rate": 4.25972172672137e-05, "loss": 0.3217, "step": 4776 }, { "epoch": 0.08520315342631898, "grad_norm": 0.5569276809692383, "learning_rate": 4.260613628255441e-05, "loss": 0.3116, "step": 4777 }, { "epoch": 0.08522098954803267, "grad_norm": 0.2758088707923889, "learning_rate": 4.261505529789511e-05, "loss": 0.2401, "step": 4778 }, { "epoch": 0.08523882566974637, "grad_norm": 0.4149360954761505, "learning_rate": 4.262397431323582e-05, "loss": 0.2809, "step": 4779 }, { "epoch": 0.08525666179146006, "grad_norm": 0.3154550790786743, "learning_rate": 4.2632893328576525e-05, "loss": 0.2593, "step": 4780 }, { "epoch": 0.08527449791317376, "grad_norm": 0.27809467911720276, "learning_rate": 4.264181234391723e-05, "loss": 0.2198, "step": 4781 }, { "epoch": 0.08529233403488745, "grad_norm": 0.29021480679512024, "learning_rate": 4.265073135925794e-05, "loss": 0.2276, "step": 4782 }, { "epoch": 0.08531017015660115, "grad_norm": 0.31899502873420715, "learning_rate": 4.2659650374598645e-05, "loss": 0.2811, "step": 4783 }, { "epoch": 0.08532800627831484, "grad_norm": 0.6962217092514038, "learning_rate": 4.266856938993935e-05, "loss": 0.2812, "step": 4784 }, { "epoch": 0.08534584240002854, "grad_norm": 0.46125882863998413, "learning_rate": 4.267748840528006e-05, "loss": 0.3221, "step": 4785 }, { "epoch": 0.08536367852174223, "grad_norm": 0.4934694468975067, "learning_rate": 4.2686407420620764e-05, "loss": 0.3668, "step": 4786 }, { "epoch": 0.08538151464345593, "grad_norm": 0.45117488503456116, "learning_rate": 4.269532643596148e-05, "loss": 0.3162, "step": 4787 }, { "epoch": 0.08539935076516962, "grad_norm": 0.4191190302371979, "learning_rate": 4.2704245451302176e-05, "loss": 0.2955, "step": 4788 }, { "epoch": 0.08541718688688332, "grad_norm": 0.3699866533279419, "learning_rate": 4.271316446664288e-05, "loss": 0.2278, "step": 4789 }, { "epoch": 0.08543502300859701, "grad_norm": 0.4417411983013153, "learning_rate": 4.2722083481983596e-05, "loss": 0.2011, "step": 4790 }, { "epoch": 0.0854528591303107, "grad_norm": 0.3505558669567108, "learning_rate": 4.2731002497324296e-05, "loss": 0.2514, "step": 4791 }, { "epoch": 0.0854706952520244, "grad_norm": 0.5170753002166748, "learning_rate": 4.2739921512665e-05, "loss": 0.2881, "step": 4792 }, { "epoch": 0.08548853137373809, "grad_norm": 0.29534634947776794, "learning_rate": 4.2748840528005715e-05, "loss": 0.2593, "step": 4793 }, { "epoch": 0.08550636749545179, "grad_norm": 0.523849606513977, "learning_rate": 4.2757759543346415e-05, "loss": 0.2785, "step": 4794 }, { "epoch": 0.08552420361716548, "grad_norm": 0.7056588530540466, "learning_rate": 4.276667855868712e-05, "loss": 0.2263, "step": 4795 }, { "epoch": 0.08554203973887918, "grad_norm": 0.3499701917171478, "learning_rate": 4.2775597574027834e-05, "loss": 0.2559, "step": 4796 }, { "epoch": 0.08555987586059287, "grad_norm": 0.39317333698272705, "learning_rate": 4.2784516589368534e-05, "loss": 0.2301, "step": 4797 }, { "epoch": 0.08557771198230657, "grad_norm": 0.4301225244998932, "learning_rate": 4.279343560470924e-05, "loss": 0.2462, "step": 4798 }, { "epoch": 0.08559554810402026, "grad_norm": 0.3244914710521698, "learning_rate": 4.2802354620049953e-05, "loss": 0.2666, "step": 4799 }, { "epoch": 0.08561338422573396, "grad_norm": 0.31595122814178467, "learning_rate": 4.281127363539065e-05, "loss": 0.2732, "step": 4800 }, { "epoch": 0.08563122034744765, "grad_norm": 0.4005768597126007, "learning_rate": 4.282019265073136e-05, "loss": 0.274, "step": 4801 }, { "epoch": 0.08564905646916135, "grad_norm": 0.524563729763031, "learning_rate": 4.282911166607207e-05, "loss": 0.3328, "step": 4802 }, { "epoch": 0.08566689259087504, "grad_norm": 0.3591447174549103, "learning_rate": 4.283803068141277e-05, "loss": 0.2556, "step": 4803 }, { "epoch": 0.08568472871258874, "grad_norm": 0.3772144913673401, "learning_rate": 4.284694969675348e-05, "loss": 0.2674, "step": 4804 }, { "epoch": 0.08570256483430243, "grad_norm": 0.39869189262390137, "learning_rate": 4.2855868712094185e-05, "loss": 0.2846, "step": 4805 }, { "epoch": 0.08572040095601613, "grad_norm": 0.27792346477508545, "learning_rate": 4.286478772743489e-05, "loss": 0.2132, "step": 4806 }, { "epoch": 0.08573823707772982, "grad_norm": 0.39283329248428345, "learning_rate": 4.28737067427756e-05, "loss": 0.2601, "step": 4807 }, { "epoch": 0.08575607319944352, "grad_norm": 0.3831244111061096, "learning_rate": 4.2882625758116304e-05, "loss": 0.2661, "step": 4808 }, { "epoch": 0.08577390932115721, "grad_norm": 0.298026442527771, "learning_rate": 4.289154477345701e-05, "loss": 0.2356, "step": 4809 }, { "epoch": 0.0857917454428709, "grad_norm": 0.40651407837867737, "learning_rate": 4.290046378879772e-05, "loss": 0.2839, "step": 4810 }, { "epoch": 0.0858095815645846, "grad_norm": 0.398034930229187, "learning_rate": 4.290938280413842e-05, "loss": 0.2299, "step": 4811 }, { "epoch": 0.08582741768629829, "grad_norm": 0.3721598982810974, "learning_rate": 4.2918301819479136e-05, "loss": 0.2356, "step": 4812 }, { "epoch": 0.08584525380801199, "grad_norm": 0.5915565490722656, "learning_rate": 4.2927220834819836e-05, "loss": 0.2818, "step": 4813 }, { "epoch": 0.08586308992972567, "grad_norm": 0.31399139761924744, "learning_rate": 4.293613985016054e-05, "loss": 0.2513, "step": 4814 }, { "epoch": 0.08588092605143938, "grad_norm": 0.5328007340431213, "learning_rate": 4.2945058865501256e-05, "loss": 0.2917, "step": 4815 }, { "epoch": 0.08589876217315306, "grad_norm": 0.39275050163269043, "learning_rate": 4.2953977880841955e-05, "loss": 0.2309, "step": 4816 }, { "epoch": 0.08591659829486677, "grad_norm": 0.3472362160682678, "learning_rate": 4.296289689618266e-05, "loss": 0.2641, "step": 4817 }, { "epoch": 0.08593443441658045, "grad_norm": 0.3209400177001953, "learning_rate": 4.2971815911523375e-05, "loss": 0.2538, "step": 4818 }, { "epoch": 0.08595227053829416, "grad_norm": 0.9367354512214661, "learning_rate": 4.2980734926864074e-05, "loss": 0.2625, "step": 4819 }, { "epoch": 0.08597010666000784, "grad_norm": 0.3279639780521393, "learning_rate": 4.298965394220478e-05, "loss": 0.1536, "step": 4820 }, { "epoch": 0.08598794278172155, "grad_norm": 0.39545565843582153, "learning_rate": 4.2998572957545494e-05, "loss": 0.261, "step": 4821 }, { "epoch": 0.08600577890343523, "grad_norm": 0.4068867266178131, "learning_rate": 4.300749197288619e-05, "loss": 0.2438, "step": 4822 }, { "epoch": 0.08602361502514894, "grad_norm": 0.5767574906349182, "learning_rate": 4.30164109882269e-05, "loss": 0.2691, "step": 4823 }, { "epoch": 0.08604145114686262, "grad_norm": 0.44339218735694885, "learning_rate": 4.302533000356761e-05, "loss": 0.3335, "step": 4824 }, { "epoch": 0.08605928726857633, "grad_norm": 0.4091067314147949, "learning_rate": 4.303424901890831e-05, "loss": 0.3007, "step": 4825 }, { "epoch": 0.08607712339029001, "grad_norm": 0.38071176409721375, "learning_rate": 4.304316803424902e-05, "loss": 0.313, "step": 4826 }, { "epoch": 0.08609495951200372, "grad_norm": 0.4248177707195282, "learning_rate": 4.305208704958973e-05, "loss": 0.2525, "step": 4827 }, { "epoch": 0.0861127956337174, "grad_norm": 0.41568171977996826, "learning_rate": 4.306100606493043e-05, "loss": 0.272, "step": 4828 }, { "epoch": 0.0861306317554311, "grad_norm": 0.33429378271102905, "learning_rate": 4.306992508027114e-05, "loss": 0.2147, "step": 4829 }, { "epoch": 0.0861484678771448, "grad_norm": 0.4362045228481293, "learning_rate": 4.3078844095611844e-05, "loss": 0.2641, "step": 4830 }, { "epoch": 0.08616630399885848, "grad_norm": 0.2800341248512268, "learning_rate": 4.308776311095255e-05, "loss": 0.2061, "step": 4831 }, { "epoch": 0.08618414012057218, "grad_norm": 0.49194765090942383, "learning_rate": 4.309668212629326e-05, "loss": 0.283, "step": 4832 }, { "epoch": 0.08620197624228587, "grad_norm": 0.4083649814128876, "learning_rate": 4.3105601141633964e-05, "loss": 0.29, "step": 4833 }, { "epoch": 0.08621981236399957, "grad_norm": 0.32335180044174194, "learning_rate": 4.311452015697468e-05, "loss": 0.2529, "step": 4834 }, { "epoch": 0.08623764848571326, "grad_norm": 0.3874923586845398, "learning_rate": 4.3123439172315376e-05, "loss": 0.27, "step": 4835 }, { "epoch": 0.08625548460742696, "grad_norm": 0.3521764874458313, "learning_rate": 4.313235818765608e-05, "loss": 0.22, "step": 4836 }, { "epoch": 0.08627332072914065, "grad_norm": 0.3749926686286926, "learning_rate": 4.3141277202996796e-05, "loss": 0.3044, "step": 4837 }, { "epoch": 0.08629115685085435, "grad_norm": 0.3045305609703064, "learning_rate": 4.3150196218337495e-05, "loss": 0.2834, "step": 4838 }, { "epoch": 0.08630899297256804, "grad_norm": 0.30828580260276794, "learning_rate": 4.31591152336782e-05, "loss": 0.2686, "step": 4839 }, { "epoch": 0.08632682909428174, "grad_norm": 0.30732011795043945, "learning_rate": 4.3168034249018915e-05, "loss": 0.2588, "step": 4840 }, { "epoch": 0.08634466521599543, "grad_norm": 0.3094451427459717, "learning_rate": 4.3176953264359615e-05, "loss": 0.2927, "step": 4841 }, { "epoch": 0.08636250133770913, "grad_norm": 0.32332518696784973, "learning_rate": 4.318587227970032e-05, "loss": 0.2538, "step": 4842 }, { "epoch": 0.08638033745942282, "grad_norm": 0.33366659283638, "learning_rate": 4.3194791295041034e-05, "loss": 0.2611, "step": 4843 }, { "epoch": 0.08639817358113652, "grad_norm": 0.36465194821357727, "learning_rate": 4.3203710310381734e-05, "loss": 0.2466, "step": 4844 }, { "epoch": 0.08641600970285021, "grad_norm": 0.3875217139720917, "learning_rate": 4.321262932572244e-05, "loss": 0.2456, "step": 4845 }, { "epoch": 0.08643384582456391, "grad_norm": 0.4086056649684906, "learning_rate": 4.322154834106315e-05, "loss": 0.289, "step": 4846 }, { "epoch": 0.0864516819462776, "grad_norm": 0.3160537779331207, "learning_rate": 4.323046735640385e-05, "loss": 0.2946, "step": 4847 }, { "epoch": 0.0864695180679913, "grad_norm": 0.29965537786483765, "learning_rate": 4.323938637174456e-05, "loss": 0.2784, "step": 4848 }, { "epoch": 0.08648735418970499, "grad_norm": 0.2963491976261139, "learning_rate": 4.324830538708527e-05, "loss": 0.243, "step": 4849 }, { "epoch": 0.08650519031141868, "grad_norm": 0.41574347019195557, "learning_rate": 4.325722440242597e-05, "loss": 0.2818, "step": 4850 }, { "epoch": 0.08652302643313238, "grad_norm": 0.44083961844444275, "learning_rate": 4.326614341776668e-05, "loss": 0.2307, "step": 4851 }, { "epoch": 0.08654086255484607, "grad_norm": 0.7231858372688293, "learning_rate": 4.327506243310739e-05, "loss": 0.3058, "step": 4852 }, { "epoch": 0.08655869867655977, "grad_norm": 0.28039807081222534, "learning_rate": 4.328398144844809e-05, "loss": 0.2623, "step": 4853 }, { "epoch": 0.08657653479827346, "grad_norm": 0.23842006921768188, "learning_rate": 4.32929004637888e-05, "loss": 0.2636, "step": 4854 }, { "epoch": 0.08659437091998716, "grad_norm": 0.28298041224479675, "learning_rate": 4.3301819479129504e-05, "loss": 0.245, "step": 4855 }, { "epoch": 0.08661220704170085, "grad_norm": 0.34194597601890564, "learning_rate": 4.331073849447021e-05, "loss": 0.3005, "step": 4856 }, { "epoch": 0.08663004316341455, "grad_norm": 0.3175301253795624, "learning_rate": 4.331965750981092e-05, "loss": 0.2486, "step": 4857 }, { "epoch": 0.08664787928512824, "grad_norm": 0.3859957754611969, "learning_rate": 4.332857652515162e-05, "loss": 0.2834, "step": 4858 }, { "epoch": 0.08666571540684194, "grad_norm": 0.373903751373291, "learning_rate": 4.3337495540492336e-05, "loss": 0.2815, "step": 4859 }, { "epoch": 0.08668355152855563, "grad_norm": 0.32694053649902344, "learning_rate": 4.3346414555833036e-05, "loss": 0.2797, "step": 4860 }, { "epoch": 0.08670138765026933, "grad_norm": 0.35671985149383545, "learning_rate": 4.335533357117374e-05, "loss": 0.2894, "step": 4861 }, { "epoch": 0.08671922377198302, "grad_norm": 0.35586464405059814, "learning_rate": 4.3364252586514455e-05, "loss": 0.223, "step": 4862 }, { "epoch": 0.08673705989369672, "grad_norm": 0.36284685134887695, "learning_rate": 4.3373171601855155e-05, "loss": 0.267, "step": 4863 }, { "epoch": 0.08675489601541041, "grad_norm": 0.3396737277507782, "learning_rate": 4.338209061719586e-05, "loss": 0.2622, "step": 4864 }, { "epoch": 0.08677273213712411, "grad_norm": 0.2970930337905884, "learning_rate": 4.3391009632536575e-05, "loss": 0.2633, "step": 4865 }, { "epoch": 0.0867905682588378, "grad_norm": 0.3354485332965851, "learning_rate": 4.3399928647877274e-05, "loss": 0.2849, "step": 4866 }, { "epoch": 0.0868084043805515, "grad_norm": 0.38399219512939453, "learning_rate": 4.340884766321798e-05, "loss": 0.2683, "step": 4867 }, { "epoch": 0.08682624050226519, "grad_norm": 0.2917868494987488, "learning_rate": 4.3417766678558694e-05, "loss": 0.2204, "step": 4868 }, { "epoch": 0.08684407662397889, "grad_norm": 0.28292855620384216, "learning_rate": 4.342668569389939e-05, "loss": 0.2388, "step": 4869 }, { "epoch": 0.08686191274569258, "grad_norm": 0.329440712928772, "learning_rate": 4.34356047092401e-05, "loss": 0.2281, "step": 4870 }, { "epoch": 0.08687974886740626, "grad_norm": 0.39377719163894653, "learning_rate": 4.344452372458081e-05, "loss": 0.2473, "step": 4871 }, { "epoch": 0.08689758498911997, "grad_norm": 0.3619420826435089, "learning_rate": 4.345344273992151e-05, "loss": 0.2349, "step": 4872 }, { "epoch": 0.08691542111083365, "grad_norm": 0.4105657935142517, "learning_rate": 4.346236175526222e-05, "loss": 0.2981, "step": 4873 }, { "epoch": 0.08693325723254736, "grad_norm": 0.3426814675331116, "learning_rate": 4.347128077060293e-05, "loss": 0.2835, "step": 4874 }, { "epoch": 0.08695109335426104, "grad_norm": 0.46077296137809753, "learning_rate": 4.348019978594363e-05, "loss": 0.3519, "step": 4875 }, { "epoch": 0.08696892947597475, "grad_norm": 0.3634946048259735, "learning_rate": 4.348911880128434e-05, "loss": 0.2829, "step": 4876 }, { "epoch": 0.08698676559768843, "grad_norm": 0.3040212094783783, "learning_rate": 4.349803781662505e-05, "loss": 0.2569, "step": 4877 }, { "epoch": 0.08700460171940214, "grad_norm": 0.4141254723072052, "learning_rate": 4.350695683196575e-05, "loss": 0.2879, "step": 4878 }, { "epoch": 0.08702243784111582, "grad_norm": 0.35047221183776855, "learning_rate": 4.351587584730646e-05, "loss": 0.2323, "step": 4879 }, { "epoch": 0.08704027396282953, "grad_norm": 0.3511478006839752, "learning_rate": 4.352479486264717e-05, "loss": 0.2889, "step": 4880 }, { "epoch": 0.08705811008454321, "grad_norm": 0.2961639165878296, "learning_rate": 4.353371387798787e-05, "loss": 0.2844, "step": 4881 }, { "epoch": 0.08707594620625692, "grad_norm": 0.32740113139152527, "learning_rate": 4.3542632893328576e-05, "loss": 0.2521, "step": 4882 }, { "epoch": 0.0870937823279706, "grad_norm": 0.39200204610824585, "learning_rate": 4.355155190866928e-05, "loss": 0.2783, "step": 4883 }, { "epoch": 0.0871116184496843, "grad_norm": 0.3524141311645508, "learning_rate": 4.3560470924009996e-05, "loss": 0.2351, "step": 4884 }, { "epoch": 0.087129454571398, "grad_norm": 0.5068562030792236, "learning_rate": 4.3569389939350695e-05, "loss": 0.262, "step": 4885 }, { "epoch": 0.0871472906931117, "grad_norm": 0.3330923616886139, "learning_rate": 4.35783089546914e-05, "loss": 0.2847, "step": 4886 }, { "epoch": 0.08716512681482538, "grad_norm": 0.4281753897666931, "learning_rate": 4.3587227970032115e-05, "loss": 0.2862, "step": 4887 }, { "epoch": 0.08718296293653909, "grad_norm": 0.39838263392448425, "learning_rate": 4.3596146985372814e-05, "loss": 0.2529, "step": 4888 }, { "epoch": 0.08720079905825277, "grad_norm": 0.431342214345932, "learning_rate": 4.360506600071352e-05, "loss": 0.2347, "step": 4889 }, { "epoch": 0.08721863517996647, "grad_norm": 0.34118667244911194, "learning_rate": 4.3613985016054234e-05, "loss": 0.1946, "step": 4890 }, { "epoch": 0.08723647130168016, "grad_norm": 0.6141383051872253, "learning_rate": 4.3622904031394934e-05, "loss": 0.2618, "step": 4891 }, { "epoch": 0.08725430742339385, "grad_norm": 0.3077762722969055, "learning_rate": 4.363182304673564e-05, "loss": 0.2068, "step": 4892 }, { "epoch": 0.08727214354510755, "grad_norm": 0.4342086911201477, "learning_rate": 4.364074206207635e-05, "loss": 0.2514, "step": 4893 }, { "epoch": 0.08728997966682124, "grad_norm": 0.48361364006996155, "learning_rate": 4.364966107741705e-05, "loss": 0.3261, "step": 4894 }, { "epoch": 0.08730781578853494, "grad_norm": 0.4839523732662201, "learning_rate": 4.365858009275776e-05, "loss": 0.346, "step": 4895 }, { "epoch": 0.08732565191024863, "grad_norm": 0.32546982169151306, "learning_rate": 4.366749910809847e-05, "loss": 0.2169, "step": 4896 }, { "epoch": 0.08734348803196233, "grad_norm": 0.3815949857234955, "learning_rate": 4.367641812343917e-05, "loss": 0.2752, "step": 4897 }, { "epoch": 0.08736132415367602, "grad_norm": 0.3673657476902008, "learning_rate": 4.368533713877988e-05, "loss": 0.3015, "step": 4898 }, { "epoch": 0.08737916027538972, "grad_norm": 0.42818859219551086, "learning_rate": 4.369425615412059e-05, "loss": 0.2208, "step": 4899 }, { "epoch": 0.08739699639710341, "grad_norm": 0.3679737150669098, "learning_rate": 4.370317516946129e-05, "loss": 0.2526, "step": 4900 }, { "epoch": 0.08741483251881711, "grad_norm": 0.3902592360973358, "learning_rate": 4.3712094184802e-05, "loss": 0.3113, "step": 4901 }, { "epoch": 0.0874326686405308, "grad_norm": 0.33876484632492065, "learning_rate": 4.372101320014271e-05, "loss": 0.2189, "step": 4902 }, { "epoch": 0.0874505047622445, "grad_norm": 0.4807701110839844, "learning_rate": 4.372993221548341e-05, "loss": 0.2128, "step": 4903 }, { "epoch": 0.08746834088395819, "grad_norm": 0.37112441658973694, "learning_rate": 4.3738851230824117e-05, "loss": 0.2826, "step": 4904 }, { "epoch": 0.08748617700567189, "grad_norm": 0.41438496112823486, "learning_rate": 4.374777024616483e-05, "loss": 0.3159, "step": 4905 }, { "epoch": 0.08750401312738558, "grad_norm": 0.3171751797199249, "learning_rate": 4.3756689261505536e-05, "loss": 0.2804, "step": 4906 }, { "epoch": 0.08752184924909928, "grad_norm": 0.3540882170200348, "learning_rate": 4.3765608276846236e-05, "loss": 0.282, "step": 4907 }, { "epoch": 0.08753968537081297, "grad_norm": 0.2823682725429535, "learning_rate": 4.377452729218694e-05, "loss": 0.2582, "step": 4908 }, { "epoch": 0.08755752149252667, "grad_norm": 0.3573967218399048, "learning_rate": 4.3783446307527655e-05, "loss": 0.2801, "step": 4909 }, { "epoch": 0.08757535761424036, "grad_norm": 0.3363186717033386, "learning_rate": 4.3792365322868355e-05, "loss": 0.2844, "step": 4910 }, { "epoch": 0.08759319373595405, "grad_norm": 0.32518690824508667, "learning_rate": 4.380128433820906e-05, "loss": 0.2676, "step": 4911 }, { "epoch": 0.08761102985766775, "grad_norm": 0.37290501594543457, "learning_rate": 4.3810203353549774e-05, "loss": 0.2933, "step": 4912 }, { "epoch": 0.08762886597938144, "grad_norm": 0.29589971899986267, "learning_rate": 4.3819122368890474e-05, "loss": 0.2411, "step": 4913 }, { "epoch": 0.08764670210109514, "grad_norm": 0.23060445487499237, "learning_rate": 4.382804138423118e-05, "loss": 0.2192, "step": 4914 }, { "epoch": 0.08766453822280883, "grad_norm": 0.2838451564311981, "learning_rate": 4.3836960399571894e-05, "loss": 0.2231, "step": 4915 }, { "epoch": 0.08768237434452253, "grad_norm": 0.243721142411232, "learning_rate": 4.384587941491259e-05, "loss": 0.2279, "step": 4916 }, { "epoch": 0.08770021046623622, "grad_norm": 0.30721530318260193, "learning_rate": 4.38547984302533e-05, "loss": 0.2438, "step": 4917 }, { "epoch": 0.08771804658794992, "grad_norm": 0.2916942536830902, "learning_rate": 4.386371744559401e-05, "loss": 0.2324, "step": 4918 }, { "epoch": 0.0877358827096636, "grad_norm": 0.4337362051010132, "learning_rate": 4.387263646093471e-05, "loss": 0.2572, "step": 4919 }, { "epoch": 0.08775371883137731, "grad_norm": 0.3278912901878357, "learning_rate": 4.388155547627542e-05, "loss": 0.2708, "step": 4920 }, { "epoch": 0.087771554953091, "grad_norm": 0.30770426988601685, "learning_rate": 4.389047449161613e-05, "loss": 0.2531, "step": 4921 }, { "epoch": 0.0877893910748047, "grad_norm": 0.36513054370880127, "learning_rate": 4.389939350695683e-05, "loss": 0.2824, "step": 4922 }, { "epoch": 0.08780722719651839, "grad_norm": 0.3902776837348938, "learning_rate": 4.390831252229754e-05, "loss": 0.3049, "step": 4923 }, { "epoch": 0.08782506331823209, "grad_norm": 0.46934065222740173, "learning_rate": 4.391723153763825e-05, "loss": 0.3479, "step": 4924 }, { "epoch": 0.08784289943994578, "grad_norm": 0.3181341588497162, "learning_rate": 4.392615055297895e-05, "loss": 0.243, "step": 4925 }, { "epoch": 0.08786073556165948, "grad_norm": 0.3682335317134857, "learning_rate": 4.393506956831966e-05, "loss": 0.288, "step": 4926 }, { "epoch": 0.08787857168337317, "grad_norm": 0.37030425667762756, "learning_rate": 4.394398858366037e-05, "loss": 0.1971, "step": 4927 }, { "epoch": 0.08789640780508687, "grad_norm": 0.3158690631389618, "learning_rate": 4.395290759900107e-05, "loss": 0.2084, "step": 4928 }, { "epoch": 0.08791424392680056, "grad_norm": 0.4250951111316681, "learning_rate": 4.3961826614341776e-05, "loss": 0.2729, "step": 4929 }, { "epoch": 0.08793208004851426, "grad_norm": 0.36358439922332764, "learning_rate": 4.397074562968249e-05, "loss": 0.3262, "step": 4930 }, { "epoch": 0.08794991617022795, "grad_norm": 0.3131383955478668, "learning_rate": 4.3979664645023196e-05, "loss": 0.2715, "step": 4931 }, { "epoch": 0.08796775229194163, "grad_norm": 0.33680614829063416, "learning_rate": 4.3988583660363895e-05, "loss": 0.1973, "step": 4932 }, { "epoch": 0.08798558841365534, "grad_norm": 0.25862354040145874, "learning_rate": 4.39975026757046e-05, "loss": 0.2638, "step": 4933 }, { "epoch": 0.08800342453536902, "grad_norm": 0.32706525921821594, "learning_rate": 4.4006421691045315e-05, "loss": 0.2727, "step": 4934 }, { "epoch": 0.08802126065708273, "grad_norm": 0.4544707238674164, "learning_rate": 4.4015340706386014e-05, "loss": 0.2808, "step": 4935 }, { "epoch": 0.08803909677879641, "grad_norm": 0.3016246557235718, "learning_rate": 4.402425972172672e-05, "loss": 0.2681, "step": 4936 }, { "epoch": 0.08805693290051012, "grad_norm": 0.3369017243385315, "learning_rate": 4.4033178737067434e-05, "loss": 0.2466, "step": 4937 }, { "epoch": 0.0880747690222238, "grad_norm": 0.2844545543193817, "learning_rate": 4.4042097752408134e-05, "loss": 0.2976, "step": 4938 }, { "epoch": 0.0880926051439375, "grad_norm": 0.3822677433490753, "learning_rate": 4.405101676774884e-05, "loss": 0.2206, "step": 4939 }, { "epoch": 0.0881104412656512, "grad_norm": 0.2786327600479126, "learning_rate": 4.405993578308955e-05, "loss": 0.2259, "step": 4940 }, { "epoch": 0.0881282773873649, "grad_norm": 0.3102079927921295, "learning_rate": 4.406885479843025e-05, "loss": 0.2371, "step": 4941 }, { "epoch": 0.08814611350907858, "grad_norm": 0.39839714765548706, "learning_rate": 4.407777381377096e-05, "loss": 0.2997, "step": 4942 }, { "epoch": 0.08816394963079228, "grad_norm": 0.43621885776519775, "learning_rate": 4.408669282911167e-05, "loss": 0.262, "step": 4943 }, { "epoch": 0.08818178575250597, "grad_norm": 0.2633258104324341, "learning_rate": 4.409561184445237e-05, "loss": 0.2241, "step": 4944 }, { "epoch": 0.08819962187421967, "grad_norm": 0.3812224268913269, "learning_rate": 4.410453085979308e-05, "loss": 0.308, "step": 4945 }, { "epoch": 0.08821745799593336, "grad_norm": 0.38104119896888733, "learning_rate": 4.411344987513379e-05, "loss": 0.2128, "step": 4946 }, { "epoch": 0.08823529411764706, "grad_norm": 0.46683916449546814, "learning_rate": 4.412236889047449e-05, "loss": 0.2555, "step": 4947 }, { "epoch": 0.08825313023936075, "grad_norm": 0.2919166088104248, "learning_rate": 4.41312879058152e-05, "loss": 0.2596, "step": 4948 }, { "epoch": 0.08827096636107445, "grad_norm": 0.36694854497909546, "learning_rate": 4.414020692115591e-05, "loss": 0.2762, "step": 4949 }, { "epoch": 0.08828880248278814, "grad_norm": 0.3404950797557831, "learning_rate": 4.414912593649661e-05, "loss": 0.2348, "step": 4950 }, { "epoch": 0.08830663860450183, "grad_norm": 0.3095071017742157, "learning_rate": 4.4158044951837316e-05, "loss": 0.239, "step": 4951 }, { "epoch": 0.08832447472621553, "grad_norm": 0.36194124817848206, "learning_rate": 4.416696396717803e-05, "loss": 0.2648, "step": 4952 }, { "epoch": 0.08834231084792922, "grad_norm": 0.46115776896476746, "learning_rate": 4.4175882982518736e-05, "loss": 0.2588, "step": 4953 }, { "epoch": 0.08836014696964292, "grad_norm": 0.289004921913147, "learning_rate": 4.4184801997859436e-05, "loss": 0.2245, "step": 4954 }, { "epoch": 0.08837798309135661, "grad_norm": 0.5100419521331787, "learning_rate": 4.419372101320015e-05, "loss": 0.1669, "step": 4955 }, { "epoch": 0.08839581921307031, "grad_norm": 0.35682612657546997, "learning_rate": 4.4202640028540855e-05, "loss": 0.306, "step": 4956 }, { "epoch": 0.088413655334784, "grad_norm": 0.27299579977989197, "learning_rate": 4.4211559043881555e-05, "loss": 0.2441, "step": 4957 }, { "epoch": 0.0884314914564977, "grad_norm": 0.292504221200943, "learning_rate": 4.422047805922226e-05, "loss": 0.2482, "step": 4958 }, { "epoch": 0.08844932757821139, "grad_norm": 0.2863588035106659, "learning_rate": 4.4229397074562974e-05, "loss": 0.2306, "step": 4959 }, { "epoch": 0.08846716369992509, "grad_norm": 0.3589595556259155, "learning_rate": 4.4238316089903674e-05, "loss": 0.2794, "step": 4960 }, { "epoch": 0.08848499982163878, "grad_norm": 0.2773011028766632, "learning_rate": 4.424723510524438e-05, "loss": 0.2462, "step": 4961 }, { "epoch": 0.08850283594335248, "grad_norm": 0.26829320192337036, "learning_rate": 4.4256154120585093e-05, "loss": 0.2119, "step": 4962 }, { "epoch": 0.08852067206506617, "grad_norm": 0.4274921119213104, "learning_rate": 4.426507313592579e-05, "loss": 0.2513, "step": 4963 }, { "epoch": 0.08853850818677987, "grad_norm": 1.098132848739624, "learning_rate": 4.42739921512665e-05, "loss": 0.2293, "step": 4964 }, { "epoch": 0.08855634430849356, "grad_norm": 0.2858351171016693, "learning_rate": 4.428291116660721e-05, "loss": 0.2577, "step": 4965 }, { "epoch": 0.08857418043020726, "grad_norm": 0.25789639353752136, "learning_rate": 4.429183018194791e-05, "loss": 0.23, "step": 4966 }, { "epoch": 0.08859201655192095, "grad_norm": 0.3119671940803528, "learning_rate": 4.430074919728862e-05, "loss": 0.2268, "step": 4967 }, { "epoch": 0.08860985267363465, "grad_norm": 0.3688700199127197, "learning_rate": 4.430966821262933e-05, "loss": 0.295, "step": 4968 }, { "epoch": 0.08862768879534834, "grad_norm": 0.3509555160999298, "learning_rate": 4.431858722797003e-05, "loss": 0.3027, "step": 4969 }, { "epoch": 0.08864552491706204, "grad_norm": 0.2632255256175995, "learning_rate": 4.432750624331074e-05, "loss": 0.1859, "step": 4970 }, { "epoch": 0.08866336103877573, "grad_norm": 0.28133201599121094, "learning_rate": 4.433642525865145e-05, "loss": 0.2543, "step": 4971 }, { "epoch": 0.08868119716048942, "grad_norm": 0.26210176944732666, "learning_rate": 4.434534427399215e-05, "loss": 0.2369, "step": 4972 }, { "epoch": 0.08869903328220312, "grad_norm": 0.2326745241880417, "learning_rate": 4.435426328933286e-05, "loss": 0.2463, "step": 4973 }, { "epoch": 0.0887168694039168, "grad_norm": 0.2702323794364929, "learning_rate": 4.436318230467357e-05, "loss": 0.2667, "step": 4974 }, { "epoch": 0.08873470552563051, "grad_norm": 0.2708186209201813, "learning_rate": 4.437210132001427e-05, "loss": 0.2358, "step": 4975 }, { "epoch": 0.0887525416473442, "grad_norm": 0.3113460838794708, "learning_rate": 4.4381020335354976e-05, "loss": 0.2446, "step": 4976 }, { "epoch": 0.0887703777690579, "grad_norm": 0.42462819814682007, "learning_rate": 4.438993935069569e-05, "loss": 0.277, "step": 4977 }, { "epoch": 0.08878821389077159, "grad_norm": 0.3471202254295349, "learning_rate": 4.4398858366036396e-05, "loss": 0.2758, "step": 4978 }, { "epoch": 0.08880605001248529, "grad_norm": 0.4598161578178406, "learning_rate": 4.4407777381377095e-05, "loss": 0.3003, "step": 4979 }, { "epoch": 0.08882388613419898, "grad_norm": 0.3271152079105377, "learning_rate": 4.441669639671781e-05, "loss": 0.3212, "step": 4980 }, { "epoch": 0.08884172225591268, "grad_norm": 0.35090136528015137, "learning_rate": 4.4425615412058515e-05, "loss": 0.2549, "step": 4981 }, { "epoch": 0.08885955837762637, "grad_norm": 0.3053995668888092, "learning_rate": 4.4434534427399214e-05, "loss": 0.2736, "step": 4982 }, { "epoch": 0.08887739449934007, "grad_norm": 0.3149849772453308, "learning_rate": 4.444345344273993e-05, "loss": 0.2712, "step": 4983 }, { "epoch": 0.08889523062105376, "grad_norm": 0.37885212898254395, "learning_rate": 4.4452372458080634e-05, "loss": 0.2931, "step": 4984 }, { "epoch": 0.08891306674276746, "grad_norm": 0.30231600999832153, "learning_rate": 4.4461291473421333e-05, "loss": 0.2529, "step": 4985 }, { "epoch": 0.08893090286448115, "grad_norm": 0.371078222990036, "learning_rate": 4.447021048876204e-05, "loss": 0.3152, "step": 4986 }, { "epoch": 0.08894873898619485, "grad_norm": 0.29761597514152527, "learning_rate": 4.447912950410275e-05, "loss": 0.3016, "step": 4987 }, { "epoch": 0.08896657510790854, "grad_norm": 0.32581159472465515, "learning_rate": 4.448804851944345e-05, "loss": 0.2872, "step": 4988 }, { "epoch": 0.08898441122962224, "grad_norm": 0.32894980907440186, "learning_rate": 4.449696753478416e-05, "loss": 0.2952, "step": 4989 }, { "epoch": 0.08900224735133593, "grad_norm": 0.36970022320747375, "learning_rate": 4.450588655012487e-05, "loss": 0.261, "step": 4990 }, { "epoch": 0.08902008347304961, "grad_norm": 0.38638272881507874, "learning_rate": 4.451480556546557e-05, "loss": 0.3021, "step": 4991 }, { "epoch": 0.08903791959476332, "grad_norm": 0.2911534607410431, "learning_rate": 4.452372458080628e-05, "loss": 0.2856, "step": 4992 }, { "epoch": 0.089055755716477, "grad_norm": 0.33513301610946655, "learning_rate": 4.453264359614699e-05, "loss": 0.2912, "step": 4993 }, { "epoch": 0.0890735918381907, "grad_norm": 0.2692374289035797, "learning_rate": 4.454156261148769e-05, "loss": 0.2442, "step": 4994 }, { "epoch": 0.08909142795990439, "grad_norm": 0.38486340641975403, "learning_rate": 4.45504816268284e-05, "loss": 0.2701, "step": 4995 }, { "epoch": 0.0891092640816181, "grad_norm": 0.34908363223075867, "learning_rate": 4.455940064216911e-05, "loss": 0.2173, "step": 4996 }, { "epoch": 0.08912710020333178, "grad_norm": 0.30877748131752014, "learning_rate": 4.456831965750981e-05, "loss": 0.2494, "step": 4997 }, { "epoch": 0.08914493632504548, "grad_norm": 0.3526933789253235, "learning_rate": 4.4577238672850516e-05, "loss": 0.2588, "step": 4998 }, { "epoch": 0.08916277244675917, "grad_norm": 0.36577802896499634, "learning_rate": 4.458615768819123e-05, "loss": 0.2813, "step": 4999 }, { "epoch": 0.08918060856847287, "grad_norm": 0.29689255356788635, "learning_rate": 4.459507670353193e-05, "loss": 0.2499, "step": 5000 }, { "epoch": 0.08918060856847287, "eval_loss": 0.2507364749908447, "eval_runtime": 2922.4672, "eval_samples_per_second": 0.35, "eval_steps_per_second": 0.059, "step": 5000 }, { "epoch": 0.08919844469018656, "grad_norm": 0.3900417983531952, "learning_rate": 4.4603995718872635e-05, "loss": 0.2357, "step": 5001 }, { "epoch": 0.08921628081190026, "grad_norm": 0.38965821266174316, "learning_rate": 4.461291473421335e-05, "loss": 0.2817, "step": 5002 }, { "epoch": 0.08923411693361395, "grad_norm": 0.367204874753952, "learning_rate": 4.4621833749554055e-05, "loss": 0.2932, "step": 5003 }, { "epoch": 0.08925195305532765, "grad_norm": 0.3464229702949524, "learning_rate": 4.4630752764894755e-05, "loss": 0.2551, "step": 5004 }, { "epoch": 0.08926978917704134, "grad_norm": 0.4570404589176178, "learning_rate": 4.463967178023547e-05, "loss": 0.3015, "step": 5005 }, { "epoch": 0.08928762529875504, "grad_norm": 0.35674813389778137, "learning_rate": 4.4648590795576174e-05, "loss": 0.2493, "step": 5006 }, { "epoch": 0.08930546142046873, "grad_norm": 0.3751763701438904, "learning_rate": 4.4657509810916874e-05, "loss": 0.2948, "step": 5007 }, { "epoch": 0.08932329754218243, "grad_norm": 0.3086523115634918, "learning_rate": 4.466642882625759e-05, "loss": 0.2599, "step": 5008 }, { "epoch": 0.08934113366389612, "grad_norm": 0.2897014319896698, "learning_rate": 4.467534784159829e-05, "loss": 0.2575, "step": 5009 }, { "epoch": 0.08935896978560982, "grad_norm": 0.29343584179878235, "learning_rate": 4.468426685693899e-05, "loss": 0.2027, "step": 5010 }, { "epoch": 0.08937680590732351, "grad_norm": 0.30803829431533813, "learning_rate": 4.46931858722797e-05, "loss": 0.2133, "step": 5011 }, { "epoch": 0.0893946420290372, "grad_norm": 0.3076460063457489, "learning_rate": 4.470210488762041e-05, "loss": 0.2652, "step": 5012 }, { "epoch": 0.0894124781507509, "grad_norm": 0.3930947780609131, "learning_rate": 4.471102390296111e-05, "loss": 0.3208, "step": 5013 }, { "epoch": 0.08943031427246459, "grad_norm": 0.2848929762840271, "learning_rate": 4.471994291830182e-05, "loss": 0.2346, "step": 5014 }, { "epoch": 0.08944815039417829, "grad_norm": 0.3582049608230591, "learning_rate": 4.472886193364253e-05, "loss": 0.2863, "step": 5015 }, { "epoch": 0.08946598651589198, "grad_norm": 0.28518861532211304, "learning_rate": 4.473778094898323e-05, "loss": 0.3019, "step": 5016 }, { "epoch": 0.08948382263760568, "grad_norm": 0.44029179215431213, "learning_rate": 4.474669996432394e-05, "loss": 0.2976, "step": 5017 }, { "epoch": 0.08950165875931937, "grad_norm": 0.26706603169441223, "learning_rate": 4.475561897966465e-05, "loss": 0.2644, "step": 5018 }, { "epoch": 0.08951949488103307, "grad_norm": 0.4160476326942444, "learning_rate": 4.476453799500535e-05, "loss": 0.2355, "step": 5019 }, { "epoch": 0.08953733100274676, "grad_norm": 0.35158583521842957, "learning_rate": 4.477345701034606e-05, "loss": 0.2537, "step": 5020 }, { "epoch": 0.08955516712446046, "grad_norm": 0.3176898956298828, "learning_rate": 4.478237602568677e-05, "loss": 0.2393, "step": 5021 }, { "epoch": 0.08957300324617415, "grad_norm": 0.3681906461715698, "learning_rate": 4.479129504102747e-05, "loss": 0.3118, "step": 5022 }, { "epoch": 0.08959083936788785, "grad_norm": 0.4188217520713806, "learning_rate": 4.4800214056368176e-05, "loss": 0.2476, "step": 5023 }, { "epoch": 0.08960867548960154, "grad_norm": 0.25581496953964233, "learning_rate": 4.480913307170889e-05, "loss": 0.255, "step": 5024 }, { "epoch": 0.08962651161131524, "grad_norm": 0.29971587657928467, "learning_rate": 4.4818052087049595e-05, "loss": 0.2445, "step": 5025 }, { "epoch": 0.08964434773302893, "grad_norm": 0.312463641166687, "learning_rate": 4.4826971102390295e-05, "loss": 0.2476, "step": 5026 }, { "epoch": 0.08966218385474263, "grad_norm": 0.35160860419273376, "learning_rate": 4.483589011773101e-05, "loss": 0.2737, "step": 5027 }, { "epoch": 0.08968001997645632, "grad_norm": 0.4038321375846863, "learning_rate": 4.4844809133071715e-05, "loss": 0.3297, "step": 5028 }, { "epoch": 0.08969785609817002, "grad_norm": 0.43063589930534363, "learning_rate": 4.4853728148412414e-05, "loss": 0.2686, "step": 5029 }, { "epoch": 0.08971569221988371, "grad_norm": 0.31962692737579346, "learning_rate": 4.486264716375313e-05, "loss": 0.2471, "step": 5030 }, { "epoch": 0.08973352834159741, "grad_norm": 0.40811246633529663, "learning_rate": 4.4871566179093834e-05, "loss": 0.231, "step": 5031 }, { "epoch": 0.0897513644633111, "grad_norm": 0.2768667936325073, "learning_rate": 4.488048519443453e-05, "loss": 0.2312, "step": 5032 }, { "epoch": 0.08976920058502479, "grad_norm": 0.38604292273521423, "learning_rate": 4.4889404209775246e-05, "loss": 0.1854, "step": 5033 }, { "epoch": 0.08978703670673849, "grad_norm": 0.27677640318870544, "learning_rate": 4.489832322511595e-05, "loss": 0.2339, "step": 5034 }, { "epoch": 0.08980487282845218, "grad_norm": 0.32057884335517883, "learning_rate": 4.490724224045665e-05, "loss": 0.2316, "step": 5035 }, { "epoch": 0.08982270895016588, "grad_norm": 0.3619246780872345, "learning_rate": 4.491616125579736e-05, "loss": 0.2728, "step": 5036 }, { "epoch": 0.08984054507187957, "grad_norm": 0.4090946316719055, "learning_rate": 4.492508027113807e-05, "loss": 0.2635, "step": 5037 }, { "epoch": 0.08985838119359327, "grad_norm": 0.4344578683376312, "learning_rate": 4.493399928647877e-05, "loss": 0.3319, "step": 5038 }, { "epoch": 0.08987621731530696, "grad_norm": 0.3746897578239441, "learning_rate": 4.494291830181948e-05, "loss": 0.2776, "step": 5039 }, { "epoch": 0.08989405343702066, "grad_norm": 0.45085152983665466, "learning_rate": 4.495183731716019e-05, "loss": 0.2663, "step": 5040 }, { "epoch": 0.08991188955873435, "grad_norm": 0.3534441590309143, "learning_rate": 4.496075633250089e-05, "loss": 0.2743, "step": 5041 }, { "epoch": 0.08992972568044805, "grad_norm": 0.319785475730896, "learning_rate": 4.49696753478416e-05, "loss": 0.2439, "step": 5042 }, { "epoch": 0.08994756180216174, "grad_norm": 0.37395069003105164, "learning_rate": 4.497859436318231e-05, "loss": 0.2875, "step": 5043 }, { "epoch": 0.08996539792387544, "grad_norm": 0.294408917427063, "learning_rate": 4.498751337852301e-05, "loss": 0.2552, "step": 5044 }, { "epoch": 0.08998323404558912, "grad_norm": 0.291103720664978, "learning_rate": 4.4996432393863716e-05, "loss": 0.2545, "step": 5045 }, { "epoch": 0.09000107016730283, "grad_norm": 0.24002714455127716, "learning_rate": 4.500535140920443e-05, "loss": 0.2506, "step": 5046 }, { "epoch": 0.09001890628901651, "grad_norm": 0.2830826938152313, "learning_rate": 4.501427042454513e-05, "loss": 0.2416, "step": 5047 }, { "epoch": 0.09003674241073022, "grad_norm": 0.4030207395553589, "learning_rate": 4.5023189439885835e-05, "loss": 0.3342, "step": 5048 }, { "epoch": 0.0900545785324439, "grad_norm": 0.31538429856300354, "learning_rate": 4.503210845522655e-05, "loss": 0.2747, "step": 5049 }, { "epoch": 0.0900724146541576, "grad_norm": 0.2968837320804596, "learning_rate": 4.5041027470567255e-05, "loss": 0.2613, "step": 5050 }, { "epoch": 0.0900902507758713, "grad_norm": 0.2753019630908966, "learning_rate": 4.5049946485907954e-05, "loss": 0.2555, "step": 5051 }, { "epoch": 0.09010808689758498, "grad_norm": 0.3237540125846863, "learning_rate": 4.505886550124867e-05, "loss": 0.2543, "step": 5052 }, { "epoch": 0.09012592301929868, "grad_norm": 0.26061129570007324, "learning_rate": 4.5067784516589374e-05, "loss": 0.2532, "step": 5053 }, { "epoch": 0.09014375914101237, "grad_norm": 0.23188789188861847, "learning_rate": 4.5076703531930074e-05, "loss": 0.2195, "step": 5054 }, { "epoch": 0.09016159526272607, "grad_norm": 0.3326053321361542, "learning_rate": 4.508562254727079e-05, "loss": 0.2786, "step": 5055 }, { "epoch": 0.09017943138443976, "grad_norm": 0.30801907181739807, "learning_rate": 4.509454156261149e-05, "loss": 0.2436, "step": 5056 }, { "epoch": 0.09019726750615346, "grad_norm": 0.24577617645263672, "learning_rate": 4.510346057795219e-05, "loss": 0.2679, "step": 5057 }, { "epoch": 0.09021510362786715, "grad_norm": 0.2857646048069, "learning_rate": 4.5112379593292906e-05, "loss": 0.2798, "step": 5058 }, { "epoch": 0.09023293974958085, "grad_norm": 0.3690529465675354, "learning_rate": 4.512129860863361e-05, "loss": 0.3231, "step": 5059 }, { "epoch": 0.09025077587129454, "grad_norm": 0.5927678346633911, "learning_rate": 4.513021762397431e-05, "loss": 0.3099, "step": 5060 }, { "epoch": 0.09026861199300824, "grad_norm": 0.3103553056716919, "learning_rate": 4.513913663931502e-05, "loss": 0.3029, "step": 5061 }, { "epoch": 0.09028644811472193, "grad_norm": 0.28680020570755005, "learning_rate": 4.514805565465573e-05, "loss": 0.2599, "step": 5062 }, { "epoch": 0.09030428423643563, "grad_norm": 0.3033756911754608, "learning_rate": 4.515697466999643e-05, "loss": 0.2815, "step": 5063 }, { "epoch": 0.09032212035814932, "grad_norm": 0.3972911238670349, "learning_rate": 4.516589368533714e-05, "loss": 0.2772, "step": 5064 }, { "epoch": 0.09033995647986302, "grad_norm": 0.3096371293067932, "learning_rate": 4.517481270067785e-05, "loss": 0.2618, "step": 5065 }, { "epoch": 0.09035779260157671, "grad_norm": 0.45703354477882385, "learning_rate": 4.518373171601855e-05, "loss": 0.2896, "step": 5066 }, { "epoch": 0.09037562872329041, "grad_norm": 0.3486328125, "learning_rate": 4.5192650731359257e-05, "loss": 0.3234, "step": 5067 }, { "epoch": 0.0903934648450041, "grad_norm": 0.29529869556427, "learning_rate": 4.520156974669997e-05, "loss": 0.2375, "step": 5068 }, { "epoch": 0.0904113009667178, "grad_norm": 0.28080740571022034, "learning_rate": 4.521048876204067e-05, "loss": 0.2382, "step": 5069 }, { "epoch": 0.09042913708843149, "grad_norm": 0.3457990288734436, "learning_rate": 4.5219407777381376e-05, "loss": 0.2526, "step": 5070 }, { "epoch": 0.09044697321014519, "grad_norm": 0.36086606979370117, "learning_rate": 4.522832679272209e-05, "loss": 0.2684, "step": 5071 }, { "epoch": 0.09046480933185888, "grad_norm": 0.3191145062446594, "learning_rate": 4.5237245808062795e-05, "loss": 0.2999, "step": 5072 }, { "epoch": 0.09048264545357257, "grad_norm": 0.28212714195251465, "learning_rate": 4.5246164823403495e-05, "loss": 0.2525, "step": 5073 }, { "epoch": 0.09050048157528627, "grad_norm": 0.31841787695884705, "learning_rate": 4.525508383874421e-05, "loss": 0.2332, "step": 5074 }, { "epoch": 0.09051831769699996, "grad_norm": 0.26870378851890564, "learning_rate": 4.5264002854084914e-05, "loss": 0.2565, "step": 5075 }, { "epoch": 0.09053615381871366, "grad_norm": 0.31851646304130554, "learning_rate": 4.5272921869425614e-05, "loss": 0.246, "step": 5076 }, { "epoch": 0.09055398994042735, "grad_norm": 0.3371538817882538, "learning_rate": 4.528184088476633e-05, "loss": 0.2946, "step": 5077 }, { "epoch": 0.09057182606214105, "grad_norm": 0.34399983286857605, "learning_rate": 4.5290759900107034e-05, "loss": 0.2527, "step": 5078 }, { "epoch": 0.09058966218385474, "grad_norm": 0.34212473034858704, "learning_rate": 4.529967891544773e-05, "loss": 0.2578, "step": 5079 }, { "epoch": 0.09060749830556844, "grad_norm": 0.31323686242103577, "learning_rate": 4.5308597930788446e-05, "loss": 0.265, "step": 5080 }, { "epoch": 0.09062533442728213, "grad_norm": 0.31873616576194763, "learning_rate": 4.531751694612915e-05, "loss": 0.2875, "step": 5081 }, { "epoch": 0.09064317054899583, "grad_norm": 0.28577306866645813, "learning_rate": 4.532643596146985e-05, "loss": 0.2017, "step": 5082 }, { "epoch": 0.09066100667070952, "grad_norm": 0.2733782231807709, "learning_rate": 4.5335354976810565e-05, "loss": 0.2722, "step": 5083 }, { "epoch": 0.09067884279242322, "grad_norm": 0.32702550292015076, "learning_rate": 4.534427399215127e-05, "loss": 0.3115, "step": 5084 }, { "epoch": 0.09069667891413691, "grad_norm": 0.36680689454078674, "learning_rate": 4.535319300749197e-05, "loss": 0.2593, "step": 5085 }, { "epoch": 0.09071451503585061, "grad_norm": 0.3376055955886841, "learning_rate": 4.5362112022832685e-05, "loss": 0.2365, "step": 5086 }, { "epoch": 0.0907323511575643, "grad_norm": 0.3718792200088501, "learning_rate": 4.537103103817339e-05, "loss": 0.247, "step": 5087 }, { "epoch": 0.090750187279278, "grad_norm": 0.28810974955558777, "learning_rate": 4.537995005351409e-05, "loss": 0.2292, "step": 5088 }, { "epoch": 0.09076802340099169, "grad_norm": 0.3739653527736664, "learning_rate": 4.53888690688548e-05, "loss": 0.2592, "step": 5089 }, { "epoch": 0.09078585952270539, "grad_norm": 0.6185100078582764, "learning_rate": 4.539778808419551e-05, "loss": 0.2439, "step": 5090 }, { "epoch": 0.09080369564441908, "grad_norm": 0.33142533898353577, "learning_rate": 4.540670709953621e-05, "loss": 0.2394, "step": 5091 }, { "epoch": 0.09082153176613277, "grad_norm": 0.38457319140434265, "learning_rate": 4.5415626114876916e-05, "loss": 0.3097, "step": 5092 }, { "epoch": 0.09083936788784647, "grad_norm": 0.34872955083847046, "learning_rate": 4.542454513021763e-05, "loss": 0.273, "step": 5093 }, { "epoch": 0.09085720400956016, "grad_norm": 0.3680689036846161, "learning_rate": 4.543346414555833e-05, "loss": 0.2887, "step": 5094 }, { "epoch": 0.09087504013127386, "grad_norm": 0.35620009899139404, "learning_rate": 4.5442383160899035e-05, "loss": 0.2702, "step": 5095 }, { "epoch": 0.09089287625298755, "grad_norm": 0.32059526443481445, "learning_rate": 4.545130217623975e-05, "loss": 0.2848, "step": 5096 }, { "epoch": 0.09091071237470125, "grad_norm": 0.29133662581443787, "learning_rate": 4.5460221191580455e-05, "loss": 0.2701, "step": 5097 }, { "epoch": 0.09092854849641493, "grad_norm": 0.31909459829330444, "learning_rate": 4.5469140206921154e-05, "loss": 0.2644, "step": 5098 }, { "epoch": 0.09094638461812864, "grad_norm": 0.8448095917701721, "learning_rate": 4.547805922226187e-05, "loss": 0.3252, "step": 5099 }, { "epoch": 0.09096422073984232, "grad_norm": 0.3424850404262543, "learning_rate": 4.5486978237602574e-05, "loss": 0.311, "step": 5100 }, { "epoch": 0.09098205686155603, "grad_norm": 0.2732887864112854, "learning_rate": 4.5495897252943274e-05, "loss": 0.2886, "step": 5101 }, { "epoch": 0.09099989298326971, "grad_norm": 0.265444815158844, "learning_rate": 4.550481626828399e-05, "loss": 0.2267, "step": 5102 }, { "epoch": 0.09101772910498342, "grad_norm": 0.4205586314201355, "learning_rate": 4.551373528362469e-05, "loss": 0.322, "step": 5103 }, { "epoch": 0.0910355652266971, "grad_norm": 0.3834914267063141, "learning_rate": 4.552265429896539e-05, "loss": 0.3337, "step": 5104 }, { "epoch": 0.0910534013484108, "grad_norm": 0.2619171142578125, "learning_rate": 4.5531573314306106e-05, "loss": 0.2565, "step": 5105 }, { "epoch": 0.0910712374701245, "grad_norm": 0.3529984951019287, "learning_rate": 4.554049232964681e-05, "loss": 0.2569, "step": 5106 }, { "epoch": 0.0910890735918382, "grad_norm": 0.32578882575035095, "learning_rate": 4.554941134498751e-05, "loss": 0.2809, "step": 5107 }, { "epoch": 0.09110690971355188, "grad_norm": 0.3088315427303314, "learning_rate": 4.5558330360328225e-05, "loss": 0.2674, "step": 5108 }, { "epoch": 0.09112474583526559, "grad_norm": 0.3643244504928589, "learning_rate": 4.556724937566893e-05, "loss": 0.2374, "step": 5109 }, { "epoch": 0.09114258195697927, "grad_norm": 0.3342103958129883, "learning_rate": 4.557616839100963e-05, "loss": 0.2853, "step": 5110 }, { "epoch": 0.09116041807869298, "grad_norm": 0.2729085087776184, "learning_rate": 4.5585087406350344e-05, "loss": 0.2261, "step": 5111 }, { "epoch": 0.09117825420040666, "grad_norm": 0.3470782935619354, "learning_rate": 4.559400642169105e-05, "loss": 0.2994, "step": 5112 }, { "epoch": 0.09119609032212035, "grad_norm": 0.4202685058116913, "learning_rate": 4.560292543703175e-05, "loss": 0.2927, "step": 5113 }, { "epoch": 0.09121392644383405, "grad_norm": 0.42108237743377686, "learning_rate": 4.5611844452372456e-05, "loss": 0.2534, "step": 5114 }, { "epoch": 0.09123176256554774, "grad_norm": 0.29236888885498047, "learning_rate": 4.562076346771317e-05, "loss": 0.2436, "step": 5115 }, { "epoch": 0.09124959868726144, "grad_norm": 0.40555712580680847, "learning_rate": 4.562968248305387e-05, "loss": 0.2821, "step": 5116 }, { "epoch": 0.09126743480897513, "grad_norm": 0.3334695100784302, "learning_rate": 4.5638601498394576e-05, "loss": 0.253, "step": 5117 }, { "epoch": 0.09128527093068883, "grad_norm": 0.2780584394931793, "learning_rate": 4.564752051373529e-05, "loss": 0.2183, "step": 5118 }, { "epoch": 0.09130310705240252, "grad_norm": 0.4002425968647003, "learning_rate": 4.565643952907599e-05, "loss": 0.2399, "step": 5119 }, { "epoch": 0.09132094317411622, "grad_norm": 0.3527221083641052, "learning_rate": 4.5665358544416695e-05, "loss": 0.2548, "step": 5120 }, { "epoch": 0.09133877929582991, "grad_norm": 0.5263444185256958, "learning_rate": 4.567427755975741e-05, "loss": 0.2404, "step": 5121 }, { "epoch": 0.09135661541754361, "grad_norm": 0.4406464695930481, "learning_rate": 4.5683196575098114e-05, "loss": 0.2126, "step": 5122 }, { "epoch": 0.0913744515392573, "grad_norm": 0.29340827465057373, "learning_rate": 4.5692115590438814e-05, "loss": 0.2557, "step": 5123 }, { "epoch": 0.091392287660971, "grad_norm": 0.4390156865119934, "learning_rate": 4.570103460577953e-05, "loss": 0.3126, "step": 5124 }, { "epoch": 0.09141012378268469, "grad_norm": 0.26983797550201416, "learning_rate": 4.5709953621120233e-05, "loss": 0.26, "step": 5125 }, { "epoch": 0.09142795990439839, "grad_norm": 0.34206798672676086, "learning_rate": 4.571887263646093e-05, "loss": 0.2877, "step": 5126 }, { "epoch": 0.09144579602611208, "grad_norm": 0.3147221803665161, "learning_rate": 4.5727791651801646e-05, "loss": 0.3144, "step": 5127 }, { "epoch": 0.09146363214782578, "grad_norm": 0.3480060398578644, "learning_rate": 4.573671066714235e-05, "loss": 0.2452, "step": 5128 }, { "epoch": 0.09148146826953947, "grad_norm": 0.2871723175048828, "learning_rate": 4.574562968248305e-05, "loss": 0.2648, "step": 5129 }, { "epoch": 0.09149930439125317, "grad_norm": 0.44824180006980896, "learning_rate": 4.5754548697823765e-05, "loss": 0.2682, "step": 5130 }, { "epoch": 0.09151714051296686, "grad_norm": 0.2663950026035309, "learning_rate": 4.576346771316447e-05, "loss": 0.2481, "step": 5131 }, { "epoch": 0.09153497663468055, "grad_norm": 0.33258599042892456, "learning_rate": 4.577238672850517e-05, "loss": 0.2881, "step": 5132 }, { "epoch": 0.09155281275639425, "grad_norm": 0.33275696635246277, "learning_rate": 4.5781305743845884e-05, "loss": 0.1321, "step": 5133 }, { "epoch": 0.09157064887810794, "grad_norm": 0.2972774803638458, "learning_rate": 4.579022475918659e-05, "loss": 0.2484, "step": 5134 }, { "epoch": 0.09158848499982164, "grad_norm": 0.2894943654537201, "learning_rate": 4.579914377452729e-05, "loss": 0.2503, "step": 5135 }, { "epoch": 0.09160632112153533, "grad_norm": 0.26859593391418457, "learning_rate": 4.5808062789868004e-05, "loss": 0.2456, "step": 5136 }, { "epoch": 0.09162415724324903, "grad_norm": 0.31814223527908325, "learning_rate": 4.581698180520871e-05, "loss": 0.2894, "step": 5137 }, { "epoch": 0.09164199336496272, "grad_norm": 0.31929415464401245, "learning_rate": 4.582590082054941e-05, "loss": 0.2556, "step": 5138 }, { "epoch": 0.09165982948667642, "grad_norm": 0.28758710622787476, "learning_rate": 4.5834819835890116e-05, "loss": 0.2454, "step": 5139 }, { "epoch": 0.09167766560839011, "grad_norm": 0.3237939178943634, "learning_rate": 4.584373885123083e-05, "loss": 0.2807, "step": 5140 }, { "epoch": 0.09169550173010381, "grad_norm": 0.434439480304718, "learning_rate": 4.585265786657153e-05, "loss": 0.3086, "step": 5141 }, { "epoch": 0.0917133378518175, "grad_norm": 0.3564210832118988, "learning_rate": 4.5861576881912235e-05, "loss": 0.2419, "step": 5142 }, { "epoch": 0.0917311739735312, "grad_norm": 0.3027274012565613, "learning_rate": 4.587049589725295e-05, "loss": 0.2107, "step": 5143 }, { "epoch": 0.09174901009524489, "grad_norm": 0.3049252927303314, "learning_rate": 4.5879414912593655e-05, "loss": 0.2887, "step": 5144 }, { "epoch": 0.09176684621695859, "grad_norm": 0.31892675161361694, "learning_rate": 4.5888333927934354e-05, "loss": 0.251, "step": 5145 }, { "epoch": 0.09178468233867228, "grad_norm": 0.34583544731140137, "learning_rate": 4.589725294327507e-05, "loss": 0.2338, "step": 5146 }, { "epoch": 0.09180251846038598, "grad_norm": 0.2539772391319275, "learning_rate": 4.5906171958615774e-05, "loss": 0.2323, "step": 5147 }, { "epoch": 0.09182035458209967, "grad_norm": 0.20981602370738983, "learning_rate": 4.5915090973956473e-05, "loss": 0.2041, "step": 5148 }, { "epoch": 0.09183819070381337, "grad_norm": 0.32836177945137024, "learning_rate": 4.5924009989297187e-05, "loss": 0.262, "step": 5149 }, { "epoch": 0.09185602682552706, "grad_norm": 0.3607218265533447, "learning_rate": 4.593292900463789e-05, "loss": 0.2704, "step": 5150 }, { "epoch": 0.09187386294724076, "grad_norm": 0.33284297585487366, "learning_rate": 4.594184801997859e-05, "loss": 0.254, "step": 5151 }, { "epoch": 0.09189169906895445, "grad_norm": 0.28724274039268494, "learning_rate": 4.5950767035319306e-05, "loss": 0.2936, "step": 5152 }, { "epoch": 0.09190953519066813, "grad_norm": 0.3357253968715668, "learning_rate": 4.595968605066001e-05, "loss": 0.2872, "step": 5153 }, { "epoch": 0.09192737131238184, "grad_norm": 0.28800761699676514, "learning_rate": 4.596860506600071e-05, "loss": 0.2446, "step": 5154 }, { "epoch": 0.09194520743409552, "grad_norm": 0.41326186060905457, "learning_rate": 4.5977524081341425e-05, "loss": 0.2245, "step": 5155 }, { "epoch": 0.09196304355580923, "grad_norm": 0.3237703740596771, "learning_rate": 4.598644309668213e-05, "loss": 0.292, "step": 5156 }, { "epoch": 0.09198087967752291, "grad_norm": 0.33980509638786316, "learning_rate": 4.599536211202283e-05, "loss": 0.2641, "step": 5157 }, { "epoch": 0.09199871579923662, "grad_norm": 0.4123384356498718, "learning_rate": 4.6004281127363544e-05, "loss": 0.2272, "step": 5158 }, { "epoch": 0.0920165519209503, "grad_norm": 0.30670472979545593, "learning_rate": 4.601320014270425e-05, "loss": 0.2422, "step": 5159 }, { "epoch": 0.092034388042664, "grad_norm": 0.31674468517303467, "learning_rate": 4.602211915804495e-05, "loss": 0.2867, "step": 5160 }, { "epoch": 0.0920522241643777, "grad_norm": 0.3755401074886322, "learning_rate": 4.603103817338566e-05, "loss": 0.317, "step": 5161 }, { "epoch": 0.0920700602860914, "grad_norm": 0.4044760763645172, "learning_rate": 4.603995718872637e-05, "loss": 0.3102, "step": 5162 }, { "epoch": 0.09208789640780508, "grad_norm": 0.2901434600353241, "learning_rate": 4.604887620406707e-05, "loss": 0.2637, "step": 5163 }, { "epoch": 0.09210573252951879, "grad_norm": 0.2719894349575043, "learning_rate": 4.6057795219407775e-05, "loss": 0.2421, "step": 5164 }, { "epoch": 0.09212356865123247, "grad_norm": 0.27546483278274536, "learning_rate": 4.606671423474849e-05, "loss": 0.2401, "step": 5165 }, { "epoch": 0.09214140477294618, "grad_norm": 0.3608858585357666, "learning_rate": 4.607563325008919e-05, "loss": 0.3028, "step": 5166 }, { "epoch": 0.09215924089465986, "grad_norm": 0.2674354016780853, "learning_rate": 4.6084552265429895e-05, "loss": 0.2262, "step": 5167 }, { "epoch": 0.09217707701637357, "grad_norm": 0.41327106952667236, "learning_rate": 4.609347128077061e-05, "loss": 0.2814, "step": 5168 }, { "epoch": 0.09219491313808725, "grad_norm": 0.31517377495765686, "learning_rate": 4.6102390296111314e-05, "loss": 0.2511, "step": 5169 }, { "epoch": 0.09221274925980096, "grad_norm": 0.2404153048992157, "learning_rate": 4.6111309311452014e-05, "loss": 0.2113, "step": 5170 }, { "epoch": 0.09223058538151464, "grad_norm": 0.30768147110939026, "learning_rate": 4.612022832679273e-05, "loss": 0.2614, "step": 5171 }, { "epoch": 0.09224842150322833, "grad_norm": 0.26597127318382263, "learning_rate": 4.612914734213343e-05, "loss": 0.2392, "step": 5172 }, { "epoch": 0.09226625762494203, "grad_norm": 0.43181541562080383, "learning_rate": 4.613806635747413e-05, "loss": 0.2166, "step": 5173 }, { "epoch": 0.09228409374665572, "grad_norm": 0.3380992114543915, "learning_rate": 4.6146985372814846e-05, "loss": 0.2685, "step": 5174 }, { "epoch": 0.09230192986836942, "grad_norm": 0.32007479667663574, "learning_rate": 4.615590438815555e-05, "loss": 0.319, "step": 5175 }, { "epoch": 0.09231976599008311, "grad_norm": 0.4780307710170746, "learning_rate": 4.616482340349625e-05, "loss": 0.2976, "step": 5176 }, { "epoch": 0.09233760211179681, "grad_norm": 0.37829354405403137, "learning_rate": 4.6173742418836965e-05, "loss": 0.2676, "step": 5177 }, { "epoch": 0.0923554382335105, "grad_norm": 0.34576961398124695, "learning_rate": 4.618266143417767e-05, "loss": 0.2991, "step": 5178 }, { "epoch": 0.0923732743552242, "grad_norm": 0.3248400390148163, "learning_rate": 4.619158044951837e-05, "loss": 0.2702, "step": 5179 }, { "epoch": 0.09239111047693789, "grad_norm": 0.3472265601158142, "learning_rate": 4.6200499464859084e-05, "loss": 0.2362, "step": 5180 }, { "epoch": 0.09240894659865159, "grad_norm": 0.3690730035305023, "learning_rate": 4.620941848019979e-05, "loss": 0.2594, "step": 5181 }, { "epoch": 0.09242678272036528, "grad_norm": 0.4123768210411072, "learning_rate": 4.621833749554049e-05, "loss": 0.3584, "step": 5182 }, { "epoch": 0.09244461884207898, "grad_norm": 0.34526124596595764, "learning_rate": 4.6227256510881203e-05, "loss": 0.3334, "step": 5183 }, { "epoch": 0.09246245496379267, "grad_norm": 0.4360329508781433, "learning_rate": 4.623617552622191e-05, "loss": 0.1976, "step": 5184 }, { "epoch": 0.09248029108550637, "grad_norm": 0.27596762776374817, "learning_rate": 4.624509454156261e-05, "loss": 0.2612, "step": 5185 }, { "epoch": 0.09249812720722006, "grad_norm": 0.3860202431678772, "learning_rate": 4.625401355690332e-05, "loss": 0.2488, "step": 5186 }, { "epoch": 0.09251596332893376, "grad_norm": 0.3286002576351166, "learning_rate": 4.626293257224403e-05, "loss": 0.2386, "step": 5187 }, { "epoch": 0.09253379945064745, "grad_norm": 0.6195679903030396, "learning_rate": 4.627185158758473e-05, "loss": 0.2499, "step": 5188 }, { "epoch": 0.09255163557236115, "grad_norm": 0.30288946628570557, "learning_rate": 4.628077060292544e-05, "loss": 0.2776, "step": 5189 }, { "epoch": 0.09256947169407484, "grad_norm": 0.43319621682167053, "learning_rate": 4.628968961826615e-05, "loss": 0.3176, "step": 5190 }, { "epoch": 0.09258730781578854, "grad_norm": 0.2994520664215088, "learning_rate": 4.6298608633606855e-05, "loss": 0.276, "step": 5191 }, { "epoch": 0.09260514393750223, "grad_norm": 0.3264714479446411, "learning_rate": 4.6307527648947554e-05, "loss": 0.327, "step": 5192 }, { "epoch": 0.09262298005921592, "grad_norm": 0.4793960154056549, "learning_rate": 4.631644666428827e-05, "loss": 0.2698, "step": 5193 }, { "epoch": 0.09264081618092962, "grad_norm": 0.3602599501609802, "learning_rate": 4.6325365679628974e-05, "loss": 0.2978, "step": 5194 }, { "epoch": 0.09265865230264331, "grad_norm": 0.2885452210903168, "learning_rate": 4.633428469496967e-05, "loss": 0.2504, "step": 5195 }, { "epoch": 0.09267648842435701, "grad_norm": 0.3294510245323181, "learning_rate": 4.6343203710310386e-05, "loss": 0.2831, "step": 5196 }, { "epoch": 0.0926943245460707, "grad_norm": 0.25922679901123047, "learning_rate": 4.635212272565109e-05, "loss": 0.2595, "step": 5197 }, { "epoch": 0.0927121606677844, "grad_norm": 0.25883087515830994, "learning_rate": 4.636104174099179e-05, "loss": 0.2436, "step": 5198 }, { "epoch": 0.09272999678949809, "grad_norm": 0.35774874687194824, "learning_rate": 4.6369960756332506e-05, "loss": 0.2953, "step": 5199 }, { "epoch": 0.09274783291121179, "grad_norm": 0.39790821075439453, "learning_rate": 4.637887977167321e-05, "loss": 0.288, "step": 5200 }, { "epoch": 0.09276566903292548, "grad_norm": 0.3712117373943329, "learning_rate": 4.638779878701391e-05, "loss": 0.29, "step": 5201 }, { "epoch": 0.09278350515463918, "grad_norm": 0.22073934972286224, "learning_rate": 4.6396717802354625e-05, "loss": 0.235, "step": 5202 }, { "epoch": 0.09280134127635287, "grad_norm": 0.4138656258583069, "learning_rate": 4.640563681769533e-05, "loss": 0.2835, "step": 5203 }, { "epoch": 0.09281917739806657, "grad_norm": 0.3466394543647766, "learning_rate": 4.641455583303603e-05, "loss": 0.2616, "step": 5204 }, { "epoch": 0.09283701351978026, "grad_norm": 0.27189114689826965, "learning_rate": 4.6423474848376744e-05, "loss": 0.2538, "step": 5205 }, { "epoch": 0.09285484964149396, "grad_norm": 0.2862494885921478, "learning_rate": 4.643239386371745e-05, "loss": 0.2021, "step": 5206 }, { "epoch": 0.09287268576320765, "grad_norm": 0.3478764593601227, "learning_rate": 4.644131287905815e-05, "loss": 0.2739, "step": 5207 }, { "epoch": 0.09289052188492135, "grad_norm": 0.4753137528896332, "learning_rate": 4.645023189439886e-05, "loss": 0.35, "step": 5208 }, { "epoch": 0.09290835800663504, "grad_norm": 0.263412207365036, "learning_rate": 4.645915090973957e-05, "loss": 0.2261, "step": 5209 }, { "epoch": 0.09292619412834874, "grad_norm": 0.33546873927116394, "learning_rate": 4.646806992508027e-05, "loss": 0.2356, "step": 5210 }, { "epoch": 0.09294403025006243, "grad_norm": 0.4098803997039795, "learning_rate": 4.647698894042098e-05, "loss": 0.28, "step": 5211 }, { "epoch": 0.09296186637177613, "grad_norm": 0.37068724632263184, "learning_rate": 4.648590795576169e-05, "loss": 0.312, "step": 5212 }, { "epoch": 0.09297970249348982, "grad_norm": 0.2915990948677063, "learning_rate": 4.649482697110239e-05, "loss": 0.2282, "step": 5213 }, { "epoch": 0.0929975386152035, "grad_norm": 0.425316721200943, "learning_rate": 4.65037459864431e-05, "loss": 0.2766, "step": 5214 }, { "epoch": 0.0930153747369172, "grad_norm": 0.2935897707939148, "learning_rate": 4.651266500178381e-05, "loss": 0.2302, "step": 5215 }, { "epoch": 0.0930332108586309, "grad_norm": 0.3157883584499359, "learning_rate": 4.6521584017124514e-05, "loss": 0.2216, "step": 5216 }, { "epoch": 0.0930510469803446, "grad_norm": 0.2781791090965271, "learning_rate": 4.6530503032465214e-05, "loss": 0.243, "step": 5217 }, { "epoch": 0.09306888310205828, "grad_norm": 0.25051501393318176, "learning_rate": 4.653942204780593e-05, "loss": 0.211, "step": 5218 }, { "epoch": 0.09308671922377199, "grad_norm": 0.3523801565170288, "learning_rate": 4.654834106314663e-05, "loss": 0.2804, "step": 5219 }, { "epoch": 0.09310455534548567, "grad_norm": 0.29995816946029663, "learning_rate": 4.655726007848733e-05, "loss": 0.2772, "step": 5220 }, { "epoch": 0.09312239146719938, "grad_norm": 0.35171619057655334, "learning_rate": 4.6566179093828046e-05, "loss": 0.2686, "step": 5221 }, { "epoch": 0.09314022758891306, "grad_norm": 0.37922653555870056, "learning_rate": 4.657509810916875e-05, "loss": 0.2992, "step": 5222 }, { "epoch": 0.09315806371062677, "grad_norm": 0.3115438222885132, "learning_rate": 4.658401712450945e-05, "loss": 0.2606, "step": 5223 }, { "epoch": 0.09317589983234045, "grad_norm": 0.3204931616783142, "learning_rate": 4.6592936139850165e-05, "loss": 0.2658, "step": 5224 }, { "epoch": 0.09319373595405415, "grad_norm": 0.38351380825042725, "learning_rate": 4.660185515519087e-05, "loss": 0.239, "step": 5225 }, { "epoch": 0.09321157207576784, "grad_norm": 0.3427961766719818, "learning_rate": 4.661077417053157e-05, "loss": 0.2878, "step": 5226 }, { "epoch": 0.09322940819748154, "grad_norm": 0.3301418721675873, "learning_rate": 4.6619693185872284e-05, "loss": 0.2721, "step": 5227 }, { "epoch": 0.09324724431919523, "grad_norm": 0.5384332537651062, "learning_rate": 4.662861220121299e-05, "loss": 0.3164, "step": 5228 }, { "epoch": 0.09326508044090893, "grad_norm": 0.2636808753013611, "learning_rate": 4.663753121655369e-05, "loss": 0.2435, "step": 5229 }, { "epoch": 0.09328291656262262, "grad_norm": 0.4945782423019409, "learning_rate": 4.66464502318944e-05, "loss": 0.2538, "step": 5230 }, { "epoch": 0.09330075268433632, "grad_norm": 0.25015437602996826, "learning_rate": 4.665536924723511e-05, "loss": 0.2274, "step": 5231 }, { "epoch": 0.09331858880605001, "grad_norm": 0.32471516728401184, "learning_rate": 4.666428826257581e-05, "loss": 0.2635, "step": 5232 }, { "epoch": 0.0933364249277637, "grad_norm": 0.2583746314048767, "learning_rate": 4.667320727791652e-05, "loss": 0.2349, "step": 5233 }, { "epoch": 0.0933542610494774, "grad_norm": 0.3352038264274597, "learning_rate": 4.668212629325723e-05, "loss": 0.2629, "step": 5234 }, { "epoch": 0.09337209717119109, "grad_norm": 0.3794134259223938, "learning_rate": 4.669104530859793e-05, "loss": 0.3188, "step": 5235 }, { "epoch": 0.09338993329290479, "grad_norm": 0.2985273003578186, "learning_rate": 4.669996432393864e-05, "loss": 0.2435, "step": 5236 }, { "epoch": 0.09340776941461848, "grad_norm": 0.2983872592449188, "learning_rate": 4.670888333927935e-05, "loss": 0.2601, "step": 5237 }, { "epoch": 0.09342560553633218, "grad_norm": 0.3920135498046875, "learning_rate": 4.6717802354620054e-05, "loss": 0.2733, "step": 5238 }, { "epoch": 0.09344344165804587, "grad_norm": 0.3617052137851715, "learning_rate": 4.672672136996076e-05, "loss": 0.2423, "step": 5239 }, { "epoch": 0.09346127777975957, "grad_norm": 0.3396837115287781, "learning_rate": 4.673564038530147e-05, "loss": 0.2946, "step": 5240 }, { "epoch": 0.09347911390147326, "grad_norm": 0.2999764680862427, "learning_rate": 4.6744559400642174e-05, "loss": 0.2479, "step": 5241 }, { "epoch": 0.09349695002318696, "grad_norm": 0.3175443112850189, "learning_rate": 4.675347841598287e-05, "loss": 0.2647, "step": 5242 }, { "epoch": 0.09351478614490065, "grad_norm": 0.2902558445930481, "learning_rate": 4.6762397431323586e-05, "loss": 0.2912, "step": 5243 }, { "epoch": 0.09353262226661435, "grad_norm": 0.2970888614654541, "learning_rate": 4.677131644666429e-05, "loss": 0.2887, "step": 5244 }, { "epoch": 0.09355045838832804, "grad_norm": 0.26533448696136475, "learning_rate": 4.678023546200499e-05, "loss": 0.2495, "step": 5245 }, { "epoch": 0.09356829451004174, "grad_norm": 0.42240414023399353, "learning_rate": 4.6789154477345705e-05, "loss": 0.2821, "step": 5246 }, { "epoch": 0.09358613063175543, "grad_norm": 0.34711533784866333, "learning_rate": 4.679807349268641e-05, "loss": 0.3043, "step": 5247 }, { "epoch": 0.09360396675346913, "grad_norm": 0.3298856317996979, "learning_rate": 4.680699250802711e-05, "loss": 0.3044, "step": 5248 }, { "epoch": 0.09362180287518282, "grad_norm": 0.4107668101787567, "learning_rate": 4.6815911523367825e-05, "loss": 0.2271, "step": 5249 }, { "epoch": 0.09363963899689652, "grad_norm": 0.3656250238418579, "learning_rate": 4.682483053870853e-05, "loss": 0.2262, "step": 5250 }, { "epoch": 0.09365747511861021, "grad_norm": 0.2461140900850296, "learning_rate": 4.683374955404923e-05, "loss": 0.2149, "step": 5251 }, { "epoch": 0.09367531124032391, "grad_norm": 0.3072248101234436, "learning_rate": 4.6842668569389944e-05, "loss": 0.255, "step": 5252 }, { "epoch": 0.0936931473620376, "grad_norm": 0.27899712324142456, "learning_rate": 4.685158758473065e-05, "loss": 0.2694, "step": 5253 }, { "epoch": 0.09371098348375129, "grad_norm": 0.23378194868564606, "learning_rate": 4.686050660007135e-05, "loss": 0.2397, "step": 5254 }, { "epoch": 0.09372881960546499, "grad_norm": 0.3421534597873688, "learning_rate": 4.686942561541206e-05, "loss": 0.2887, "step": 5255 }, { "epoch": 0.09374665572717868, "grad_norm": 0.2325400561094284, "learning_rate": 4.687834463075277e-05, "loss": 0.2041, "step": 5256 }, { "epoch": 0.09376449184889238, "grad_norm": 0.3640061318874359, "learning_rate": 4.688726364609347e-05, "loss": 0.2629, "step": 5257 }, { "epoch": 0.09378232797060607, "grad_norm": 0.41511374711990356, "learning_rate": 4.689618266143418e-05, "loss": 0.3519, "step": 5258 }, { "epoch": 0.09380016409231977, "grad_norm": 0.30702728033065796, "learning_rate": 4.690510167677489e-05, "loss": 0.2396, "step": 5259 }, { "epoch": 0.09381800021403346, "grad_norm": 0.3504076600074768, "learning_rate": 4.691402069211559e-05, "loss": 0.2682, "step": 5260 }, { "epoch": 0.09383583633574716, "grad_norm": 0.4777551293373108, "learning_rate": 4.69229397074563e-05, "loss": 0.314, "step": 5261 }, { "epoch": 0.09385367245746085, "grad_norm": 0.32501375675201416, "learning_rate": 4.693185872279701e-05, "loss": 0.2605, "step": 5262 }, { "epoch": 0.09387150857917455, "grad_norm": 0.3325204849243164, "learning_rate": 4.6940777738137714e-05, "loss": 0.2222, "step": 5263 }, { "epoch": 0.09388934470088824, "grad_norm": 0.2469899207353592, "learning_rate": 4.694969675347842e-05, "loss": 0.22, "step": 5264 }, { "epoch": 0.09390718082260194, "grad_norm": 0.30303481221199036, "learning_rate": 4.695861576881913e-05, "loss": 0.253, "step": 5265 }, { "epoch": 0.09392501694431563, "grad_norm": 0.25014814734458923, "learning_rate": 4.696753478415983e-05, "loss": 0.2438, "step": 5266 }, { "epoch": 0.09394285306602933, "grad_norm": 0.3906334936618805, "learning_rate": 4.697645379950053e-05, "loss": 0.3427, "step": 5267 }, { "epoch": 0.09396068918774302, "grad_norm": 0.32432055473327637, "learning_rate": 4.6985372814841246e-05, "loss": 0.2924, "step": 5268 }, { "epoch": 0.09397852530945672, "grad_norm": 0.34173494577407837, "learning_rate": 4.699429183018195e-05, "loss": 0.2475, "step": 5269 }, { "epoch": 0.0939963614311704, "grad_norm": 0.3409624397754669, "learning_rate": 4.700321084552265e-05, "loss": 0.2213, "step": 5270 }, { "epoch": 0.09401419755288411, "grad_norm": 0.29635345935821533, "learning_rate": 4.7012129860863365e-05, "loss": 0.2292, "step": 5271 }, { "epoch": 0.0940320336745978, "grad_norm": 0.2836691737174988, "learning_rate": 4.702104887620407e-05, "loss": 0.252, "step": 5272 }, { "epoch": 0.09404986979631148, "grad_norm": 0.31412312388420105, "learning_rate": 4.702996789154477e-05, "loss": 0.2582, "step": 5273 }, { "epoch": 0.09406770591802519, "grad_norm": 0.45611318945884705, "learning_rate": 4.7038886906885484e-05, "loss": 0.2486, "step": 5274 }, { "epoch": 0.09408554203973887, "grad_norm": 0.30665716528892517, "learning_rate": 4.704780592222619e-05, "loss": 0.2198, "step": 5275 }, { "epoch": 0.09410337816145257, "grad_norm": 0.3930690884590149, "learning_rate": 4.705672493756689e-05, "loss": 0.2439, "step": 5276 }, { "epoch": 0.09412121428316626, "grad_norm": 0.2558427155017853, "learning_rate": 4.70656439529076e-05, "loss": 0.2428, "step": 5277 }, { "epoch": 0.09413905040487996, "grad_norm": 0.3950040340423584, "learning_rate": 4.707456296824831e-05, "loss": 0.2951, "step": 5278 }, { "epoch": 0.09415688652659365, "grad_norm": 0.41172513365745544, "learning_rate": 4.708348198358901e-05, "loss": 0.3457, "step": 5279 }, { "epoch": 0.09417472264830735, "grad_norm": 0.31354084610939026, "learning_rate": 4.709240099892972e-05, "loss": 0.3089, "step": 5280 }, { "epoch": 0.09419255877002104, "grad_norm": 0.26249364018440247, "learning_rate": 4.710132001427043e-05, "loss": 0.2708, "step": 5281 }, { "epoch": 0.09421039489173474, "grad_norm": 0.3628444969654083, "learning_rate": 4.711023902961113e-05, "loss": 0.3027, "step": 5282 }, { "epoch": 0.09422823101344843, "grad_norm": 0.40232929587364197, "learning_rate": 4.711915804495184e-05, "loss": 0.2462, "step": 5283 }, { "epoch": 0.09424606713516213, "grad_norm": 0.4444749653339386, "learning_rate": 4.712807706029255e-05, "loss": 0.2286, "step": 5284 }, { "epoch": 0.09426390325687582, "grad_norm": 0.31657105684280396, "learning_rate": 4.713699607563325e-05, "loss": 0.2834, "step": 5285 }, { "epoch": 0.09428173937858952, "grad_norm": 0.36973780393600464, "learning_rate": 4.714591509097396e-05, "loss": 0.2961, "step": 5286 }, { "epoch": 0.09429957550030321, "grad_norm": 0.28114810585975647, "learning_rate": 4.715483410631467e-05, "loss": 0.2506, "step": 5287 }, { "epoch": 0.09431741162201691, "grad_norm": 0.3041308522224426, "learning_rate": 4.7163753121655373e-05, "loss": 0.2832, "step": 5288 }, { "epoch": 0.0943352477437306, "grad_norm": 0.3077862560749054, "learning_rate": 4.717267213699608e-05, "loss": 0.2693, "step": 5289 }, { "epoch": 0.0943530838654443, "grad_norm": 0.2772987484931946, "learning_rate": 4.7181591152336786e-05, "loss": 0.2663, "step": 5290 }, { "epoch": 0.09437091998715799, "grad_norm": 0.3235217332839966, "learning_rate": 4.719051016767749e-05, "loss": 0.2553, "step": 5291 }, { "epoch": 0.0943887561088717, "grad_norm": 0.3222964406013489, "learning_rate": 4.71994291830182e-05, "loss": 0.228, "step": 5292 }, { "epoch": 0.09440659223058538, "grad_norm": 0.3652717173099518, "learning_rate": 4.7208348198358905e-05, "loss": 0.3347, "step": 5293 }, { "epoch": 0.09442442835229907, "grad_norm": 0.3086948096752167, "learning_rate": 4.721726721369961e-05, "loss": 0.2518, "step": 5294 }, { "epoch": 0.09444226447401277, "grad_norm": 0.3053515553474426, "learning_rate": 4.722618622904031e-05, "loss": 0.2566, "step": 5295 }, { "epoch": 0.09446010059572646, "grad_norm": 0.3497569262981415, "learning_rate": 4.7235105244381024e-05, "loss": 0.2766, "step": 5296 }, { "epoch": 0.09447793671744016, "grad_norm": 0.2803959846496582, "learning_rate": 4.724402425972173e-05, "loss": 0.2321, "step": 5297 }, { "epoch": 0.09449577283915385, "grad_norm": 0.2764897346496582, "learning_rate": 4.725294327506243e-05, "loss": 0.2117, "step": 5298 }, { "epoch": 0.09451360896086755, "grad_norm": 0.4972369074821472, "learning_rate": 4.7261862290403144e-05, "loss": 0.2178, "step": 5299 }, { "epoch": 0.09453144508258124, "grad_norm": 0.31249862909317017, "learning_rate": 4.727078130574385e-05, "loss": 0.2337, "step": 5300 }, { "epoch": 0.09454928120429494, "grad_norm": 0.39970827102661133, "learning_rate": 4.727970032108455e-05, "loss": 0.2638, "step": 5301 }, { "epoch": 0.09456711732600863, "grad_norm": 0.28730496764183044, "learning_rate": 4.728861933642526e-05, "loss": 0.2407, "step": 5302 }, { "epoch": 0.09458495344772233, "grad_norm": 0.3620060384273529, "learning_rate": 4.729753835176597e-05, "loss": 0.2429, "step": 5303 }, { "epoch": 0.09460278956943602, "grad_norm": 0.281197726726532, "learning_rate": 4.730645736710667e-05, "loss": 0.1939, "step": 5304 }, { "epoch": 0.09462062569114972, "grad_norm": 0.3197873830795288, "learning_rate": 4.731537638244738e-05, "loss": 0.2763, "step": 5305 }, { "epoch": 0.09463846181286341, "grad_norm": 0.23553217947483063, "learning_rate": 4.732429539778809e-05, "loss": 0.2014, "step": 5306 }, { "epoch": 0.09465629793457711, "grad_norm": 0.355283260345459, "learning_rate": 4.733321441312879e-05, "loss": 0.2888, "step": 5307 }, { "epoch": 0.0946741340562908, "grad_norm": 0.34615418314933777, "learning_rate": 4.73421334284695e-05, "loss": 0.2915, "step": 5308 }, { "epoch": 0.0946919701780045, "grad_norm": 0.3840189278125763, "learning_rate": 4.735105244381021e-05, "loss": 0.2847, "step": 5309 }, { "epoch": 0.09470980629971819, "grad_norm": 0.37069171667099, "learning_rate": 4.7359971459150914e-05, "loss": 0.2917, "step": 5310 }, { "epoch": 0.09472764242143189, "grad_norm": 0.27350127696990967, "learning_rate": 4.736889047449162e-05, "loss": 0.2408, "step": 5311 }, { "epoch": 0.09474547854314558, "grad_norm": 0.31825244426727295, "learning_rate": 4.7377809489832327e-05, "loss": 0.2986, "step": 5312 }, { "epoch": 0.09476331466485927, "grad_norm": 0.524906575679779, "learning_rate": 4.738672850517303e-05, "loss": 0.2627, "step": 5313 }, { "epoch": 0.09478115078657297, "grad_norm": 0.3162332773208618, "learning_rate": 4.739564752051374e-05, "loss": 0.2529, "step": 5314 }, { "epoch": 0.09479898690828666, "grad_norm": 0.3042309582233429, "learning_rate": 4.7404566535854446e-05, "loss": 0.2023, "step": 5315 }, { "epoch": 0.09481682303000036, "grad_norm": 0.27491849660873413, "learning_rate": 4.741348555119515e-05, "loss": 0.2531, "step": 5316 }, { "epoch": 0.09483465915171405, "grad_norm": 0.2788602411746979, "learning_rate": 4.742240456653586e-05, "loss": 0.2505, "step": 5317 }, { "epoch": 0.09485249527342775, "grad_norm": 0.23961052298545837, "learning_rate": 4.7431323581876565e-05, "loss": 0.2002, "step": 5318 }, { "epoch": 0.09487033139514144, "grad_norm": 0.27642861008644104, "learning_rate": 4.744024259721727e-05, "loss": 0.2329, "step": 5319 }, { "epoch": 0.09488816751685514, "grad_norm": 0.21278324723243713, "learning_rate": 4.744916161255797e-05, "loss": 0.2096, "step": 5320 }, { "epoch": 0.09490600363856883, "grad_norm": 0.24676239490509033, "learning_rate": 4.7458080627898684e-05, "loss": 0.2357, "step": 5321 }, { "epoch": 0.09492383976028253, "grad_norm": 0.2675091326236725, "learning_rate": 4.746699964323939e-05, "loss": 0.2178, "step": 5322 }, { "epoch": 0.09494167588199622, "grad_norm": 0.26896363496780396, "learning_rate": 4.747591865858009e-05, "loss": 0.228, "step": 5323 }, { "epoch": 0.09495951200370992, "grad_norm": 0.3176270127296448, "learning_rate": 4.74848376739208e-05, "loss": 0.2915, "step": 5324 }, { "epoch": 0.0949773481254236, "grad_norm": 0.2676611840724945, "learning_rate": 4.749375668926151e-05, "loss": 0.2518, "step": 5325 }, { "epoch": 0.09499518424713731, "grad_norm": 0.4852003753185272, "learning_rate": 4.750267570460221e-05, "loss": 0.3007, "step": 5326 }, { "epoch": 0.095013020368851, "grad_norm": 0.32852569222450256, "learning_rate": 4.751159471994292e-05, "loss": 0.2334, "step": 5327 }, { "epoch": 0.0950308564905647, "grad_norm": 0.33095842599868774, "learning_rate": 4.752051373528363e-05, "loss": 0.2846, "step": 5328 }, { "epoch": 0.09504869261227838, "grad_norm": 0.2583049535751343, "learning_rate": 4.752943275062433e-05, "loss": 0.2385, "step": 5329 }, { "epoch": 0.09506652873399209, "grad_norm": 0.3359510004520416, "learning_rate": 4.753835176596504e-05, "loss": 0.2899, "step": 5330 }, { "epoch": 0.09508436485570577, "grad_norm": 0.3022625744342804, "learning_rate": 4.754727078130575e-05, "loss": 0.2725, "step": 5331 }, { "epoch": 0.09510220097741948, "grad_norm": 0.3416677713394165, "learning_rate": 4.755618979664645e-05, "loss": 0.2925, "step": 5332 }, { "epoch": 0.09512003709913316, "grad_norm": 0.3762374520301819, "learning_rate": 4.756510881198716e-05, "loss": 0.2084, "step": 5333 }, { "epoch": 0.09513787322084685, "grad_norm": 0.28740838170051575, "learning_rate": 4.757402782732787e-05, "loss": 0.2459, "step": 5334 }, { "epoch": 0.09515570934256055, "grad_norm": 0.30601343512535095, "learning_rate": 4.758294684266857e-05, "loss": 0.2231, "step": 5335 }, { "epoch": 0.09517354546427424, "grad_norm": 0.3672378659248352, "learning_rate": 4.759186585800928e-05, "loss": 0.2593, "step": 5336 }, { "epoch": 0.09519138158598794, "grad_norm": 0.39896172285079956, "learning_rate": 4.7600784873349986e-05, "loss": 0.3436, "step": 5337 }, { "epoch": 0.09520921770770163, "grad_norm": 0.22721467912197113, "learning_rate": 4.760970388869069e-05, "loss": 0.2066, "step": 5338 }, { "epoch": 0.09522705382941533, "grad_norm": 0.22346056997776031, "learning_rate": 4.76186229040314e-05, "loss": 0.2144, "step": 5339 }, { "epoch": 0.09524488995112902, "grad_norm": 0.29692816734313965, "learning_rate": 4.7627541919372105e-05, "loss": 0.2772, "step": 5340 }, { "epoch": 0.09526272607284272, "grad_norm": 0.2819810211658478, "learning_rate": 4.763646093471281e-05, "loss": 0.2394, "step": 5341 }, { "epoch": 0.09528056219455641, "grad_norm": 0.3281337022781372, "learning_rate": 4.764537995005352e-05, "loss": 0.2827, "step": 5342 }, { "epoch": 0.09529839831627011, "grad_norm": 0.4458293318748474, "learning_rate": 4.7654298965394224e-05, "loss": 0.3562, "step": 5343 }, { "epoch": 0.0953162344379838, "grad_norm": 0.33938318490982056, "learning_rate": 4.766321798073493e-05, "loss": 0.2642, "step": 5344 }, { "epoch": 0.0953340705596975, "grad_norm": 0.4199501872062683, "learning_rate": 4.767213699607563e-05, "loss": 0.2311, "step": 5345 }, { "epoch": 0.09535190668141119, "grad_norm": 0.25305017828941345, "learning_rate": 4.7681056011416343e-05, "loss": 0.2398, "step": 5346 }, { "epoch": 0.0953697428031249, "grad_norm": 0.2190845012664795, "learning_rate": 4.768997502675705e-05, "loss": 0.241, "step": 5347 }, { "epoch": 0.09538757892483858, "grad_norm": 0.2902757227420807, "learning_rate": 4.769889404209775e-05, "loss": 0.2832, "step": 5348 }, { "epoch": 0.09540541504655228, "grad_norm": 0.302707314491272, "learning_rate": 4.770781305743846e-05, "loss": 0.2757, "step": 5349 }, { "epoch": 0.09542325116826597, "grad_norm": 0.29162052273750305, "learning_rate": 4.771673207277917e-05, "loss": 0.2517, "step": 5350 }, { "epoch": 0.09544108728997967, "grad_norm": 0.22246886789798737, "learning_rate": 4.772565108811987e-05, "loss": 0.2389, "step": 5351 }, { "epoch": 0.09545892341169336, "grad_norm": 0.2861518859863281, "learning_rate": 4.773457010346058e-05, "loss": 0.1903, "step": 5352 }, { "epoch": 0.09547675953340706, "grad_norm": 0.28319862484931946, "learning_rate": 4.774348911880129e-05, "loss": 0.1957, "step": 5353 }, { "epoch": 0.09549459565512075, "grad_norm": 0.31579911708831787, "learning_rate": 4.775240813414199e-05, "loss": 0.2167, "step": 5354 }, { "epoch": 0.09551243177683444, "grad_norm": 0.2679722309112549, "learning_rate": 4.77613271494827e-05, "loss": 0.2543, "step": 5355 }, { "epoch": 0.09553026789854814, "grad_norm": 0.40429264307022095, "learning_rate": 4.777024616482341e-05, "loss": 0.2748, "step": 5356 }, { "epoch": 0.09554810402026183, "grad_norm": 0.2967761158943176, "learning_rate": 4.7779165180164114e-05, "loss": 0.2459, "step": 5357 }, { "epoch": 0.09556594014197553, "grad_norm": 0.35104766488075256, "learning_rate": 4.778808419550482e-05, "loss": 0.2629, "step": 5358 }, { "epoch": 0.09558377626368922, "grad_norm": 0.31640005111694336, "learning_rate": 4.7797003210845526e-05, "loss": 0.2828, "step": 5359 }, { "epoch": 0.09560161238540292, "grad_norm": 0.32061511278152466, "learning_rate": 4.780592222618623e-05, "loss": 0.3031, "step": 5360 }, { "epoch": 0.09561944850711661, "grad_norm": 0.31277865171432495, "learning_rate": 4.781484124152694e-05, "loss": 0.2654, "step": 5361 }, { "epoch": 0.09563728462883031, "grad_norm": 0.26078227162361145, "learning_rate": 4.7823760256867646e-05, "loss": 0.2486, "step": 5362 }, { "epoch": 0.095655120750544, "grad_norm": 0.3343501389026642, "learning_rate": 4.783267927220835e-05, "loss": 0.2137, "step": 5363 }, { "epoch": 0.0956729568722577, "grad_norm": 0.35817328095436096, "learning_rate": 4.784159828754906e-05, "loss": 0.2698, "step": 5364 }, { "epoch": 0.09569079299397139, "grad_norm": 0.3671835958957672, "learning_rate": 4.7850517302889765e-05, "loss": 0.3125, "step": 5365 }, { "epoch": 0.09570862911568509, "grad_norm": 0.27490052580833435, "learning_rate": 4.785943631823047e-05, "loss": 0.2397, "step": 5366 }, { "epoch": 0.09572646523739878, "grad_norm": 0.3614984154701233, "learning_rate": 4.786835533357118e-05, "loss": 0.285, "step": 5367 }, { "epoch": 0.09574430135911248, "grad_norm": 0.3216387927532196, "learning_rate": 4.7877274348911884e-05, "loss": 0.2746, "step": 5368 }, { "epoch": 0.09576213748082617, "grad_norm": 0.43186280131340027, "learning_rate": 4.788619336425259e-05, "loss": 0.3013, "step": 5369 }, { "epoch": 0.09577997360253987, "grad_norm": 0.29047054052352905, "learning_rate": 4.789511237959329e-05, "loss": 0.2795, "step": 5370 }, { "epoch": 0.09579780972425356, "grad_norm": 0.30678704380989075, "learning_rate": 4.7904031394934e-05, "loss": 0.2537, "step": 5371 }, { "epoch": 0.09581564584596726, "grad_norm": 0.2642018795013428, "learning_rate": 4.791295041027471e-05, "loss": 0.2307, "step": 5372 }, { "epoch": 0.09583348196768095, "grad_norm": 0.30768072605133057, "learning_rate": 4.792186942561541e-05, "loss": 0.3143, "step": 5373 }, { "epoch": 0.09585131808939464, "grad_norm": 0.3095901310443878, "learning_rate": 4.793078844095612e-05, "loss": 0.2578, "step": 5374 }, { "epoch": 0.09586915421110834, "grad_norm": 0.34432873129844666, "learning_rate": 4.793970745629683e-05, "loss": 0.2356, "step": 5375 }, { "epoch": 0.09588699033282203, "grad_norm": 0.24537573754787445, "learning_rate": 4.794862647163753e-05, "loss": 0.2332, "step": 5376 }, { "epoch": 0.09590482645453573, "grad_norm": 0.2724122703075409, "learning_rate": 4.795754548697824e-05, "loss": 0.2105, "step": 5377 }, { "epoch": 0.09592266257624942, "grad_norm": 0.42682817578315735, "learning_rate": 4.796646450231895e-05, "loss": 0.2668, "step": 5378 }, { "epoch": 0.09594049869796312, "grad_norm": 0.29142510890960693, "learning_rate": 4.797538351765965e-05, "loss": 0.2147, "step": 5379 }, { "epoch": 0.0959583348196768, "grad_norm": 0.357695996761322, "learning_rate": 4.798430253300036e-05, "loss": 0.2597, "step": 5380 }, { "epoch": 0.0959761709413905, "grad_norm": 0.35795679688453674, "learning_rate": 4.799322154834107e-05, "loss": 0.208, "step": 5381 }, { "epoch": 0.0959940070631042, "grad_norm": 0.299063503742218, "learning_rate": 4.800214056368177e-05, "loss": 0.2133, "step": 5382 }, { "epoch": 0.0960118431848179, "grad_norm": 0.34796178340911865, "learning_rate": 4.801105957902248e-05, "loss": 0.2668, "step": 5383 }, { "epoch": 0.09602967930653158, "grad_norm": 0.32690906524658203, "learning_rate": 4.8019978594363186e-05, "loss": 0.2751, "step": 5384 }, { "epoch": 0.09604751542824529, "grad_norm": 0.271119624376297, "learning_rate": 4.802889760970389e-05, "loss": 0.2691, "step": 5385 }, { "epoch": 0.09606535154995897, "grad_norm": 0.35632744431495667, "learning_rate": 4.80378166250446e-05, "loss": 0.3074, "step": 5386 }, { "epoch": 0.09608318767167268, "grad_norm": 0.32233545184135437, "learning_rate": 4.8046735640385305e-05, "loss": 0.3139, "step": 5387 }, { "epoch": 0.09610102379338636, "grad_norm": 0.3475266695022583, "learning_rate": 4.805565465572601e-05, "loss": 0.2816, "step": 5388 }, { "epoch": 0.09611885991510007, "grad_norm": 0.3758280575275421, "learning_rate": 4.806457367106672e-05, "loss": 0.2633, "step": 5389 }, { "epoch": 0.09613669603681375, "grad_norm": 0.2562137544155121, "learning_rate": 4.8073492686407424e-05, "loss": 0.2342, "step": 5390 }, { "epoch": 0.09615453215852746, "grad_norm": 0.25612273812294006, "learning_rate": 4.808241170174813e-05, "loss": 0.2334, "step": 5391 }, { "epoch": 0.09617236828024114, "grad_norm": 0.22181525826454163, "learning_rate": 4.809133071708884e-05, "loss": 0.2256, "step": 5392 }, { "epoch": 0.09619020440195485, "grad_norm": 0.28419962525367737, "learning_rate": 4.810024973242954e-05, "loss": 0.2522, "step": 5393 }, { "epoch": 0.09620804052366853, "grad_norm": 0.2620362639427185, "learning_rate": 4.810916874777025e-05, "loss": 0.2352, "step": 5394 }, { "epoch": 0.09622587664538222, "grad_norm": 0.48510581254959106, "learning_rate": 4.8118087763110956e-05, "loss": 0.3419, "step": 5395 }, { "epoch": 0.09624371276709592, "grad_norm": 0.20889174938201904, "learning_rate": 4.812700677845166e-05, "loss": 0.2265, "step": 5396 }, { "epoch": 0.09626154888880961, "grad_norm": 0.20810353755950928, "learning_rate": 4.813592579379237e-05, "loss": 0.2407, "step": 5397 }, { "epoch": 0.09627938501052331, "grad_norm": 0.25173771381378174, "learning_rate": 4.814484480913307e-05, "loss": 0.2459, "step": 5398 }, { "epoch": 0.096297221132237, "grad_norm": 0.25757449865341187, "learning_rate": 4.815376382447378e-05, "loss": 0.2338, "step": 5399 }, { "epoch": 0.0963150572539507, "grad_norm": 0.27500173449516296, "learning_rate": 4.816268283981449e-05, "loss": 0.2514, "step": 5400 }, { "epoch": 0.09633289337566439, "grad_norm": 0.3292427659034729, "learning_rate": 4.817160185515519e-05, "loss": 0.2288, "step": 5401 }, { "epoch": 0.0963507294973781, "grad_norm": 0.38144394755363464, "learning_rate": 4.81805208704959e-05, "loss": 0.3091, "step": 5402 }, { "epoch": 0.09636856561909178, "grad_norm": 0.2699589729309082, "learning_rate": 4.818943988583661e-05, "loss": 0.2236, "step": 5403 }, { "epoch": 0.09638640174080548, "grad_norm": 0.31738099455833435, "learning_rate": 4.819835890117731e-05, "loss": 0.2289, "step": 5404 }, { "epoch": 0.09640423786251917, "grad_norm": 0.36914145946502686, "learning_rate": 4.820727791651802e-05, "loss": 0.2181, "step": 5405 }, { "epoch": 0.09642207398423287, "grad_norm": 0.37151944637298584, "learning_rate": 4.8216196931858726e-05, "loss": 0.2308, "step": 5406 }, { "epoch": 0.09643991010594656, "grad_norm": 0.3182366192340851, "learning_rate": 4.822511594719943e-05, "loss": 0.2641, "step": 5407 }, { "epoch": 0.09645774622766026, "grad_norm": 0.3091346323490143, "learning_rate": 4.823403496254014e-05, "loss": 0.2352, "step": 5408 }, { "epoch": 0.09647558234937395, "grad_norm": 0.3038909137248993, "learning_rate": 4.8242953977880845e-05, "loss": 0.247, "step": 5409 }, { "epoch": 0.09649341847108765, "grad_norm": 0.41333627700805664, "learning_rate": 4.825187299322155e-05, "loss": 0.3271, "step": 5410 }, { "epoch": 0.09651125459280134, "grad_norm": 0.46484991908073425, "learning_rate": 4.826079200856226e-05, "loss": 0.3217, "step": 5411 }, { "epoch": 0.09652909071451504, "grad_norm": 0.3277193605899811, "learning_rate": 4.8269711023902965e-05, "loss": 0.2014, "step": 5412 }, { "epoch": 0.09654692683622873, "grad_norm": 0.3197810649871826, "learning_rate": 4.827863003924367e-05, "loss": 0.2664, "step": 5413 }, { "epoch": 0.09656476295794242, "grad_norm": 0.31339699029922485, "learning_rate": 4.828754905458438e-05, "loss": 0.2722, "step": 5414 }, { "epoch": 0.09658259907965612, "grad_norm": 0.3026168942451477, "learning_rate": 4.8296468069925084e-05, "loss": 0.2607, "step": 5415 }, { "epoch": 0.09660043520136981, "grad_norm": 0.44053876399993896, "learning_rate": 4.830538708526579e-05, "loss": 0.2941, "step": 5416 }, { "epoch": 0.09661827132308351, "grad_norm": 0.33031144738197327, "learning_rate": 4.8314306100606497e-05, "loss": 0.2289, "step": 5417 }, { "epoch": 0.0966361074447972, "grad_norm": 0.3416697680950165, "learning_rate": 4.83232251159472e-05, "loss": 0.2692, "step": 5418 }, { "epoch": 0.0966539435665109, "grad_norm": 0.2503843903541565, "learning_rate": 4.833214413128791e-05, "loss": 0.2092, "step": 5419 }, { "epoch": 0.09667177968822459, "grad_norm": 0.3404706120491028, "learning_rate": 4.8341063146628616e-05, "loss": 0.2676, "step": 5420 }, { "epoch": 0.09668961580993829, "grad_norm": 0.33275705575942993, "learning_rate": 4.834998216196932e-05, "loss": 0.2713, "step": 5421 }, { "epoch": 0.09670745193165198, "grad_norm": 0.32658156752586365, "learning_rate": 4.835890117731003e-05, "loss": 0.2691, "step": 5422 }, { "epoch": 0.09672528805336568, "grad_norm": 0.270504355430603, "learning_rate": 4.836782019265073e-05, "loss": 0.2213, "step": 5423 }, { "epoch": 0.09674312417507937, "grad_norm": 0.410617470741272, "learning_rate": 4.837673920799144e-05, "loss": 0.2993, "step": 5424 }, { "epoch": 0.09676096029679307, "grad_norm": 0.2827683091163635, "learning_rate": 4.838565822333215e-05, "loss": 0.218, "step": 5425 }, { "epoch": 0.09677879641850676, "grad_norm": 0.2774164080619812, "learning_rate": 4.839457723867285e-05, "loss": 0.2318, "step": 5426 }, { "epoch": 0.09679663254022046, "grad_norm": 0.570665717124939, "learning_rate": 4.840349625401356e-05, "loss": 0.2653, "step": 5427 }, { "epoch": 0.09681446866193415, "grad_norm": 0.3123404383659363, "learning_rate": 4.841241526935427e-05, "loss": 0.2858, "step": 5428 }, { "epoch": 0.09683230478364785, "grad_norm": 0.3542862832546234, "learning_rate": 4.842133428469497e-05, "loss": 0.2319, "step": 5429 }, { "epoch": 0.09685014090536154, "grad_norm": 0.26503410935401917, "learning_rate": 4.843025330003568e-05, "loss": 0.218, "step": 5430 }, { "epoch": 0.09686797702707524, "grad_norm": 0.26394808292388916, "learning_rate": 4.8439172315376386e-05, "loss": 0.2321, "step": 5431 }, { "epoch": 0.09688581314878893, "grad_norm": 0.37308600544929504, "learning_rate": 4.844809133071709e-05, "loss": 0.3088, "step": 5432 }, { "epoch": 0.09690364927050263, "grad_norm": 0.2688678801059723, "learning_rate": 4.84570103460578e-05, "loss": 0.2611, "step": 5433 }, { "epoch": 0.09692148539221632, "grad_norm": 0.2813870310783386, "learning_rate": 4.8465929361398505e-05, "loss": 0.2311, "step": 5434 }, { "epoch": 0.09693932151393, "grad_norm": 0.27423232793807983, "learning_rate": 4.847484837673921e-05, "loss": 0.2561, "step": 5435 }, { "epoch": 0.0969571576356437, "grad_norm": 0.2912781834602356, "learning_rate": 4.848376739207992e-05, "loss": 0.2867, "step": 5436 }, { "epoch": 0.0969749937573574, "grad_norm": 0.38686278462409973, "learning_rate": 4.8492686407420624e-05, "loss": 0.217, "step": 5437 }, { "epoch": 0.0969928298790711, "grad_norm": 0.48345714807510376, "learning_rate": 4.850160542276133e-05, "loss": 0.3164, "step": 5438 }, { "epoch": 0.09701066600078478, "grad_norm": 0.3831360638141632, "learning_rate": 4.851052443810204e-05, "loss": 0.2246, "step": 5439 }, { "epoch": 0.09702850212249849, "grad_norm": 0.32400625944137573, "learning_rate": 4.851944345344274e-05, "loss": 0.2892, "step": 5440 }, { "epoch": 0.09704633824421217, "grad_norm": 0.24737146496772766, "learning_rate": 4.852836246878345e-05, "loss": 0.2222, "step": 5441 }, { "epoch": 0.09706417436592588, "grad_norm": 0.31008994579315186, "learning_rate": 4.8537281484124156e-05, "loss": 0.2704, "step": 5442 }, { "epoch": 0.09708201048763956, "grad_norm": 0.27175506949424744, "learning_rate": 4.854620049946486e-05, "loss": 0.1977, "step": 5443 }, { "epoch": 0.09709984660935327, "grad_norm": 0.39254119992256165, "learning_rate": 4.855511951480557e-05, "loss": 0.2606, "step": 5444 }, { "epoch": 0.09711768273106695, "grad_norm": 0.33907565474510193, "learning_rate": 4.8564038530146275e-05, "loss": 0.3119, "step": 5445 }, { "epoch": 0.09713551885278066, "grad_norm": 0.3085770010948181, "learning_rate": 4.857295754548698e-05, "loss": 0.2404, "step": 5446 }, { "epoch": 0.09715335497449434, "grad_norm": 0.3873482048511505, "learning_rate": 4.858187656082769e-05, "loss": 0.2773, "step": 5447 }, { "epoch": 0.09717119109620805, "grad_norm": 0.3149682283401489, "learning_rate": 4.859079557616839e-05, "loss": 0.2989, "step": 5448 }, { "epoch": 0.09718902721792173, "grad_norm": 0.35003674030303955, "learning_rate": 4.85997145915091e-05, "loss": 0.257, "step": 5449 }, { "epoch": 0.09720686333963544, "grad_norm": 0.2753257155418396, "learning_rate": 4.860863360684981e-05, "loss": 0.2999, "step": 5450 }, { "epoch": 0.09722469946134912, "grad_norm": 0.2559419870376587, "learning_rate": 4.861755262219051e-05, "loss": 0.2464, "step": 5451 }, { "epoch": 0.09724253558306283, "grad_norm": 0.34245607256889343, "learning_rate": 4.862647163753122e-05, "loss": 0.3227, "step": 5452 }, { "epoch": 0.09726037170477651, "grad_norm": 0.28112515807151794, "learning_rate": 4.8635390652871926e-05, "loss": 0.2514, "step": 5453 }, { "epoch": 0.0972782078264902, "grad_norm": 0.32024791836738586, "learning_rate": 4.864430966821263e-05, "loss": 0.2171, "step": 5454 }, { "epoch": 0.0972960439482039, "grad_norm": 0.2943370044231415, "learning_rate": 4.865322868355334e-05, "loss": 0.263, "step": 5455 }, { "epoch": 0.09731388006991759, "grad_norm": 0.2543685734272003, "learning_rate": 4.8662147698894045e-05, "loss": 0.273, "step": 5456 }, { "epoch": 0.09733171619163129, "grad_norm": 0.4515843689441681, "learning_rate": 4.867106671423475e-05, "loss": 0.2636, "step": 5457 }, { "epoch": 0.09734955231334498, "grad_norm": 0.29468849301338196, "learning_rate": 4.867998572957546e-05, "loss": 0.2461, "step": 5458 }, { "epoch": 0.09736738843505868, "grad_norm": 0.353140652179718, "learning_rate": 4.8688904744916164e-05, "loss": 0.2307, "step": 5459 }, { "epoch": 0.09738522455677237, "grad_norm": 0.3664565086364746, "learning_rate": 4.869782376025687e-05, "loss": 0.3103, "step": 5460 }, { "epoch": 0.09740306067848607, "grad_norm": 0.3541789948940277, "learning_rate": 4.870674277559758e-05, "loss": 0.27, "step": 5461 }, { "epoch": 0.09742089680019976, "grad_norm": 0.4054414927959442, "learning_rate": 4.8715661790938284e-05, "loss": 0.1805, "step": 5462 }, { "epoch": 0.09743873292191346, "grad_norm": 0.34643250703811646, "learning_rate": 4.872458080627899e-05, "loss": 0.2653, "step": 5463 }, { "epoch": 0.09745656904362715, "grad_norm": 0.30009132623672485, "learning_rate": 4.8733499821619696e-05, "loss": 0.2255, "step": 5464 }, { "epoch": 0.09747440516534085, "grad_norm": 0.29243677854537964, "learning_rate": 4.87424188369604e-05, "loss": 0.2862, "step": 5465 }, { "epoch": 0.09749224128705454, "grad_norm": 0.3629752993583679, "learning_rate": 4.875133785230111e-05, "loss": 0.31, "step": 5466 }, { "epoch": 0.09751007740876824, "grad_norm": 0.29389265179634094, "learning_rate": 4.8760256867641816e-05, "loss": 0.2725, "step": 5467 }, { "epoch": 0.09752791353048193, "grad_norm": 0.35133081674575806, "learning_rate": 4.876917588298252e-05, "loss": 0.2451, "step": 5468 }, { "epoch": 0.09754574965219563, "grad_norm": 0.3372002840042114, "learning_rate": 4.877809489832323e-05, "loss": 0.3232, "step": 5469 }, { "epoch": 0.09756358577390932, "grad_norm": 0.2796429395675659, "learning_rate": 4.8787013913663935e-05, "loss": 0.2139, "step": 5470 }, { "epoch": 0.09758142189562302, "grad_norm": 0.3312259018421173, "learning_rate": 4.879593292900464e-05, "loss": 0.3258, "step": 5471 }, { "epoch": 0.09759925801733671, "grad_norm": 0.3399675190448761, "learning_rate": 4.880485194434535e-05, "loss": 0.2568, "step": 5472 }, { "epoch": 0.09761709413905041, "grad_norm": 0.31488168239593506, "learning_rate": 4.881377095968605e-05, "loss": 0.2757, "step": 5473 }, { "epoch": 0.0976349302607641, "grad_norm": 0.34534600377082825, "learning_rate": 4.882268997502676e-05, "loss": 0.3318, "step": 5474 }, { "epoch": 0.09765276638247779, "grad_norm": 0.32041308283805847, "learning_rate": 4.8831608990367467e-05, "loss": 0.2694, "step": 5475 }, { "epoch": 0.09767060250419149, "grad_norm": 0.23527833819389343, "learning_rate": 4.884052800570817e-05, "loss": 0.2659, "step": 5476 }, { "epoch": 0.09768843862590518, "grad_norm": 0.43653279542922974, "learning_rate": 4.884944702104888e-05, "loss": 0.2946, "step": 5477 }, { "epoch": 0.09770627474761888, "grad_norm": 0.2972584068775177, "learning_rate": 4.8858366036389586e-05, "loss": 0.2748, "step": 5478 }, { "epoch": 0.09772411086933257, "grad_norm": 0.2785152494907379, "learning_rate": 4.886728505173029e-05, "loss": 0.2956, "step": 5479 }, { "epoch": 0.09774194699104627, "grad_norm": 0.2990381121635437, "learning_rate": 4.8876204067071e-05, "loss": 0.2354, "step": 5480 }, { "epoch": 0.09775978311275996, "grad_norm": 0.29772382974624634, "learning_rate": 4.8885123082411705e-05, "loss": 0.247, "step": 5481 }, { "epoch": 0.09777761923447366, "grad_norm": 0.3232877850532532, "learning_rate": 4.889404209775241e-05, "loss": 0.2143, "step": 5482 }, { "epoch": 0.09779545535618735, "grad_norm": 0.3917143940925598, "learning_rate": 4.890296111309312e-05, "loss": 0.2619, "step": 5483 }, { "epoch": 0.09781329147790105, "grad_norm": 0.2928524911403656, "learning_rate": 4.8911880128433824e-05, "loss": 0.2634, "step": 5484 }, { "epoch": 0.09783112759961474, "grad_norm": 0.4801047444343567, "learning_rate": 4.892079914377453e-05, "loss": 0.3373, "step": 5485 }, { "epoch": 0.09784896372132844, "grad_norm": 0.2984585464000702, "learning_rate": 4.892971815911524e-05, "loss": 0.2119, "step": 5486 }, { "epoch": 0.09786679984304213, "grad_norm": 0.2977357804775238, "learning_rate": 4.893863717445594e-05, "loss": 0.272, "step": 5487 }, { "epoch": 0.09788463596475583, "grad_norm": 0.24359388649463654, "learning_rate": 4.894755618979665e-05, "loss": 0.2274, "step": 5488 }, { "epoch": 0.09790247208646952, "grad_norm": 0.3061986565589905, "learning_rate": 4.8956475205137356e-05, "loss": 0.223, "step": 5489 }, { "epoch": 0.09792030820818322, "grad_norm": 0.36622732877731323, "learning_rate": 4.896539422047806e-05, "loss": 0.2769, "step": 5490 }, { "epoch": 0.0979381443298969, "grad_norm": 0.3394840657711029, "learning_rate": 4.897431323581877e-05, "loss": 0.189, "step": 5491 }, { "epoch": 0.09795598045161061, "grad_norm": 0.3280045986175537, "learning_rate": 4.8983232251159475e-05, "loss": 0.2626, "step": 5492 }, { "epoch": 0.0979738165733243, "grad_norm": 0.2870257496833801, "learning_rate": 4.899215126650018e-05, "loss": 0.2451, "step": 5493 }, { "epoch": 0.09799165269503798, "grad_norm": 0.19842657446861267, "learning_rate": 4.900107028184089e-05, "loss": 0.2062, "step": 5494 }, { "epoch": 0.09800948881675169, "grad_norm": 0.30510666966438293, "learning_rate": 4.9009989297181594e-05, "loss": 0.2533, "step": 5495 }, { "epoch": 0.09802732493846537, "grad_norm": 0.28291577100753784, "learning_rate": 4.90189083125223e-05, "loss": 0.2441, "step": 5496 }, { "epoch": 0.09804516106017908, "grad_norm": 0.3856520652770996, "learning_rate": 4.902782732786301e-05, "loss": 0.1969, "step": 5497 }, { "epoch": 0.09806299718189276, "grad_norm": 0.2804949879646301, "learning_rate": 4.903674634320371e-05, "loss": 0.2579, "step": 5498 }, { "epoch": 0.09808083330360647, "grad_norm": 0.3010903596878052, "learning_rate": 4.904566535854442e-05, "loss": 0.2279, "step": 5499 }, { "epoch": 0.09809866942532015, "grad_norm": 0.3615068197250366, "learning_rate": 4.9054584373885126e-05, "loss": 0.2812, "step": 5500 }, { "epoch": 0.09811650554703386, "grad_norm": 0.24918560683727264, "learning_rate": 4.906350338922583e-05, "loss": 0.2402, "step": 5501 }, { "epoch": 0.09813434166874754, "grad_norm": 0.37588775157928467, "learning_rate": 4.907242240456654e-05, "loss": 0.2753, "step": 5502 }, { "epoch": 0.09815217779046125, "grad_norm": 0.35019493103027344, "learning_rate": 4.9081341419907245e-05, "loss": 0.3374, "step": 5503 }, { "epoch": 0.09817001391217493, "grad_norm": 0.28179264068603516, "learning_rate": 4.909026043524795e-05, "loss": 0.2798, "step": 5504 }, { "epoch": 0.09818785003388864, "grad_norm": 0.2809401750564575, "learning_rate": 4.909917945058866e-05, "loss": 0.2487, "step": 5505 }, { "epoch": 0.09820568615560232, "grad_norm": 0.6416525840759277, "learning_rate": 4.9108098465929364e-05, "loss": 0.2463, "step": 5506 }, { "epoch": 0.09822352227731602, "grad_norm": 0.25745102763175964, "learning_rate": 4.911701748127007e-05, "loss": 0.2536, "step": 5507 }, { "epoch": 0.09824135839902971, "grad_norm": 0.27652454376220703, "learning_rate": 4.912593649661078e-05, "loss": 0.2515, "step": 5508 }, { "epoch": 0.09825919452074341, "grad_norm": 0.3008754253387451, "learning_rate": 4.9134855511951483e-05, "loss": 0.3092, "step": 5509 }, { "epoch": 0.0982770306424571, "grad_norm": 0.320084810256958, "learning_rate": 4.914377452729219e-05, "loss": 0.264, "step": 5510 }, { "epoch": 0.0982948667641708, "grad_norm": 0.23950159549713135, "learning_rate": 4.9152693542632896e-05, "loss": 0.2431, "step": 5511 }, { "epoch": 0.09831270288588449, "grad_norm": 0.28555992245674133, "learning_rate": 4.91616125579736e-05, "loss": 0.2205, "step": 5512 }, { "epoch": 0.0983305390075982, "grad_norm": 0.29226920008659363, "learning_rate": 4.917053157331431e-05, "loss": 0.2353, "step": 5513 }, { "epoch": 0.09834837512931188, "grad_norm": 0.28247639536857605, "learning_rate": 4.9179450588655015e-05, "loss": 0.2411, "step": 5514 }, { "epoch": 0.09836621125102557, "grad_norm": 0.3029201924800873, "learning_rate": 4.918836960399572e-05, "loss": 0.2995, "step": 5515 }, { "epoch": 0.09838404737273927, "grad_norm": 0.4019959568977356, "learning_rate": 4.919728861933643e-05, "loss": 0.3222, "step": 5516 }, { "epoch": 0.09840188349445296, "grad_norm": 0.2866835296154022, "learning_rate": 4.9206207634677135e-05, "loss": 0.2493, "step": 5517 }, { "epoch": 0.09841971961616666, "grad_norm": 0.2806166708469391, "learning_rate": 4.921512665001784e-05, "loss": 0.2411, "step": 5518 }, { "epoch": 0.09843755573788035, "grad_norm": 0.26713719964027405, "learning_rate": 4.922404566535855e-05, "loss": 0.2303, "step": 5519 }, { "epoch": 0.09845539185959405, "grad_norm": 0.34005218744277954, "learning_rate": 4.9232964680699254e-05, "loss": 0.2905, "step": 5520 }, { "epoch": 0.09847322798130774, "grad_norm": 0.2824673056602478, "learning_rate": 4.924188369603996e-05, "loss": 0.2812, "step": 5521 }, { "epoch": 0.09849106410302144, "grad_norm": 0.27901577949523926, "learning_rate": 4.9250802711380666e-05, "loss": 0.2723, "step": 5522 }, { "epoch": 0.09850890022473513, "grad_norm": 0.2748558223247528, "learning_rate": 4.925972172672137e-05, "loss": 0.2371, "step": 5523 }, { "epoch": 0.09852673634644883, "grad_norm": 0.29768821597099304, "learning_rate": 4.926864074206208e-05, "loss": 0.2534, "step": 5524 }, { "epoch": 0.09854457246816252, "grad_norm": 0.2774190902709961, "learning_rate": 4.9277559757402786e-05, "loss": 0.2607, "step": 5525 }, { "epoch": 0.09856240858987622, "grad_norm": 0.3144364058971405, "learning_rate": 4.928647877274349e-05, "loss": 0.3096, "step": 5526 }, { "epoch": 0.09858024471158991, "grad_norm": 0.28779545426368713, "learning_rate": 4.92953977880842e-05, "loss": 0.2668, "step": 5527 }, { "epoch": 0.09859808083330361, "grad_norm": 0.34362363815307617, "learning_rate": 4.9304316803424905e-05, "loss": 0.2792, "step": 5528 }, { "epoch": 0.0986159169550173, "grad_norm": 0.29747605323791504, "learning_rate": 4.931323581876561e-05, "loss": 0.2639, "step": 5529 }, { "epoch": 0.098633753076731, "grad_norm": 0.4144671857357025, "learning_rate": 4.932215483410632e-05, "loss": 0.2851, "step": 5530 }, { "epoch": 0.09865158919844469, "grad_norm": 0.28177231550216675, "learning_rate": 4.9331073849447024e-05, "loss": 0.2432, "step": 5531 }, { "epoch": 0.09866942532015839, "grad_norm": 0.7640838027000427, "learning_rate": 4.933999286478773e-05, "loss": 0.3023, "step": 5532 }, { "epoch": 0.09868726144187208, "grad_norm": 0.21771247684955597, "learning_rate": 4.934891188012844e-05, "loss": 0.1982, "step": 5533 }, { "epoch": 0.09870509756358578, "grad_norm": 0.33608943223953247, "learning_rate": 4.935783089546914e-05, "loss": 0.2625, "step": 5534 }, { "epoch": 0.09872293368529947, "grad_norm": 0.46725186705589294, "learning_rate": 4.936674991080985e-05, "loss": 0.3063, "step": 5535 }, { "epoch": 0.09874076980701316, "grad_norm": 0.2878686487674713, "learning_rate": 4.9375668926150556e-05, "loss": 0.2658, "step": 5536 }, { "epoch": 0.09875860592872686, "grad_norm": 0.40644723176956177, "learning_rate": 4.938458794149126e-05, "loss": 0.2406, "step": 5537 }, { "epoch": 0.09877644205044055, "grad_norm": 0.25225239992141724, "learning_rate": 4.939350695683197e-05, "loss": 0.2202, "step": 5538 }, { "epoch": 0.09879427817215425, "grad_norm": 0.2533118724822998, "learning_rate": 4.9402425972172675e-05, "loss": 0.2248, "step": 5539 }, { "epoch": 0.09881211429386794, "grad_norm": 0.2974401116371155, "learning_rate": 4.941134498751338e-05, "loss": 0.2286, "step": 5540 }, { "epoch": 0.09882995041558164, "grad_norm": 0.4625476896762848, "learning_rate": 4.942026400285409e-05, "loss": 0.2355, "step": 5541 }, { "epoch": 0.09884778653729533, "grad_norm": 0.37035489082336426, "learning_rate": 4.9429183018194794e-05, "loss": 0.2426, "step": 5542 }, { "epoch": 0.09886562265900903, "grad_norm": 0.34101033210754395, "learning_rate": 4.94381020335355e-05, "loss": 0.2118, "step": 5543 }, { "epoch": 0.09888345878072272, "grad_norm": 0.24928370118141174, "learning_rate": 4.944702104887621e-05, "loss": 0.2442, "step": 5544 }, { "epoch": 0.09890129490243642, "grad_norm": 0.36414381861686707, "learning_rate": 4.945594006421691e-05, "loss": 0.2824, "step": 5545 }, { "epoch": 0.0989191310241501, "grad_norm": 0.3175963759422302, "learning_rate": 4.946485907955762e-05, "loss": 0.2028, "step": 5546 }, { "epoch": 0.09893696714586381, "grad_norm": 0.3281267285346985, "learning_rate": 4.9473778094898326e-05, "loss": 0.2759, "step": 5547 }, { "epoch": 0.0989548032675775, "grad_norm": 0.26249727606773376, "learning_rate": 4.948269711023903e-05, "loss": 0.2248, "step": 5548 }, { "epoch": 0.0989726393892912, "grad_norm": 0.2374362200498581, "learning_rate": 4.949161612557974e-05, "loss": 0.2033, "step": 5549 }, { "epoch": 0.09899047551100489, "grad_norm": 0.41762882471084595, "learning_rate": 4.9500535140920445e-05, "loss": 0.2507, "step": 5550 }, { "epoch": 0.09900831163271859, "grad_norm": 0.29843926429748535, "learning_rate": 4.950945415626115e-05, "loss": 0.2185, "step": 5551 }, { "epoch": 0.09902614775443228, "grad_norm": 0.3904193937778473, "learning_rate": 4.951837317160186e-05, "loss": 0.2024, "step": 5552 }, { "epoch": 0.09904398387614598, "grad_norm": 0.3317916691303253, "learning_rate": 4.9527292186942564e-05, "loss": 0.2769, "step": 5553 }, { "epoch": 0.09906181999785967, "grad_norm": 0.32630684971809387, "learning_rate": 4.953621120228327e-05, "loss": 0.2816, "step": 5554 }, { "epoch": 0.09907965611957335, "grad_norm": 0.2944846749305725, "learning_rate": 4.954513021762398e-05, "loss": 0.2315, "step": 5555 }, { "epoch": 0.09909749224128706, "grad_norm": 0.22315825521945953, "learning_rate": 4.955404923296468e-05, "loss": 0.1903, "step": 5556 }, { "epoch": 0.09911532836300074, "grad_norm": 0.3081098794937134, "learning_rate": 4.956296824830539e-05, "loss": 0.277, "step": 5557 }, { "epoch": 0.09913316448471444, "grad_norm": 0.216439887881279, "learning_rate": 4.9571887263646096e-05, "loss": 0.2403, "step": 5558 }, { "epoch": 0.09915100060642813, "grad_norm": 0.3412705659866333, "learning_rate": 4.95808062789868e-05, "loss": 0.2618, "step": 5559 }, { "epoch": 0.09916883672814183, "grad_norm": 0.24157923460006714, "learning_rate": 4.958972529432751e-05, "loss": 0.2594, "step": 5560 }, { "epoch": 0.09918667284985552, "grad_norm": 0.3635483980178833, "learning_rate": 4.9598644309668215e-05, "loss": 0.2726, "step": 5561 }, { "epoch": 0.09920450897156922, "grad_norm": 0.27654680609703064, "learning_rate": 4.960756332500892e-05, "loss": 0.2553, "step": 5562 }, { "epoch": 0.09922234509328291, "grad_norm": 0.2773616909980774, "learning_rate": 4.961648234034963e-05, "loss": 0.2837, "step": 5563 }, { "epoch": 0.09924018121499661, "grad_norm": 0.31285059452056885, "learning_rate": 4.9625401355690334e-05, "loss": 0.2946, "step": 5564 }, { "epoch": 0.0992580173367103, "grad_norm": 0.3870413303375244, "learning_rate": 4.963432037103104e-05, "loss": 0.2738, "step": 5565 }, { "epoch": 0.099275853458424, "grad_norm": 0.40917330980300903, "learning_rate": 4.964323938637175e-05, "loss": 0.2788, "step": 5566 }, { "epoch": 0.09929368958013769, "grad_norm": 0.22341284155845642, "learning_rate": 4.9652158401712454e-05, "loss": 0.2471, "step": 5567 }, { "epoch": 0.0993115257018514, "grad_norm": 0.22843582928180695, "learning_rate": 4.966107741705316e-05, "loss": 0.2513, "step": 5568 }, { "epoch": 0.09932936182356508, "grad_norm": 0.24894820153713226, "learning_rate": 4.9669996432393866e-05, "loss": 0.2078, "step": 5569 }, { "epoch": 0.09934719794527878, "grad_norm": 0.28142452239990234, "learning_rate": 4.967891544773457e-05, "loss": 0.2699, "step": 5570 }, { "epoch": 0.09936503406699247, "grad_norm": 0.33661895990371704, "learning_rate": 4.968783446307528e-05, "loss": 0.2675, "step": 5571 }, { "epoch": 0.09938287018870617, "grad_norm": 0.2572460472583771, "learning_rate": 4.9696753478415985e-05, "loss": 0.2508, "step": 5572 }, { "epoch": 0.09940070631041986, "grad_norm": 0.3926204442977905, "learning_rate": 4.970567249375669e-05, "loss": 0.352, "step": 5573 }, { "epoch": 0.09941854243213356, "grad_norm": 0.2722055912017822, "learning_rate": 4.97145915090974e-05, "loss": 0.2167, "step": 5574 }, { "epoch": 0.09943637855384725, "grad_norm": 0.42704060673713684, "learning_rate": 4.9723510524438105e-05, "loss": 0.2592, "step": 5575 }, { "epoch": 0.09945421467556094, "grad_norm": 0.25675666332244873, "learning_rate": 4.973242953977881e-05, "loss": 0.2744, "step": 5576 }, { "epoch": 0.09947205079727464, "grad_norm": 0.24697034060955048, "learning_rate": 4.974134855511952e-05, "loss": 0.2202, "step": 5577 }, { "epoch": 0.09948988691898833, "grad_norm": 0.31476113200187683, "learning_rate": 4.9750267570460224e-05, "loss": 0.2498, "step": 5578 }, { "epoch": 0.09950772304070203, "grad_norm": 0.4493424594402313, "learning_rate": 4.975918658580093e-05, "loss": 0.2208, "step": 5579 }, { "epoch": 0.09952555916241572, "grad_norm": 0.2961020767688751, "learning_rate": 4.9768105601141637e-05, "loss": 0.2459, "step": 5580 }, { "epoch": 0.09954339528412942, "grad_norm": 0.3160838186740875, "learning_rate": 4.977702461648234e-05, "loss": 0.2754, "step": 5581 }, { "epoch": 0.09956123140584311, "grad_norm": 0.364326536655426, "learning_rate": 4.978594363182305e-05, "loss": 0.2418, "step": 5582 }, { "epoch": 0.09957906752755681, "grad_norm": 0.4317879378795624, "learning_rate": 4.9794862647163756e-05, "loss": 0.2736, "step": 5583 }, { "epoch": 0.0995969036492705, "grad_norm": 0.30823612213134766, "learning_rate": 4.980378166250446e-05, "loss": 0.2642, "step": 5584 }, { "epoch": 0.0996147397709842, "grad_norm": 0.3373742997646332, "learning_rate": 4.981270067784517e-05, "loss": 0.2666, "step": 5585 }, { "epoch": 0.09963257589269789, "grad_norm": 0.34264281392097473, "learning_rate": 4.9821619693185875e-05, "loss": 0.2491, "step": 5586 }, { "epoch": 0.09965041201441159, "grad_norm": 0.35036471486091614, "learning_rate": 4.983053870852658e-05, "loss": 0.2584, "step": 5587 }, { "epoch": 0.09966824813612528, "grad_norm": 0.2319411039352417, "learning_rate": 4.983945772386729e-05, "loss": 0.2341, "step": 5588 }, { "epoch": 0.09968608425783898, "grad_norm": 0.27314355969429016, "learning_rate": 4.9848376739207994e-05, "loss": 0.1929, "step": 5589 }, { "epoch": 0.09970392037955267, "grad_norm": 0.35963067412376404, "learning_rate": 4.98572957545487e-05, "loss": 0.2817, "step": 5590 }, { "epoch": 0.09972175650126637, "grad_norm": 0.2947644591331482, "learning_rate": 4.986621476988941e-05, "loss": 0.254, "step": 5591 }, { "epoch": 0.09973959262298006, "grad_norm": 0.3208530843257904, "learning_rate": 4.987513378523011e-05, "loss": 0.2564, "step": 5592 }, { "epoch": 0.09975742874469376, "grad_norm": 0.2744399607181549, "learning_rate": 4.988405280057082e-05, "loss": 0.2137, "step": 5593 }, { "epoch": 0.09977526486640745, "grad_norm": 0.3182908892631531, "learning_rate": 4.9892971815911526e-05, "loss": 0.2708, "step": 5594 }, { "epoch": 0.09979310098812114, "grad_norm": 0.272396057844162, "learning_rate": 4.990189083125223e-05, "loss": 0.1979, "step": 5595 }, { "epoch": 0.09981093710983484, "grad_norm": 0.2817678153514862, "learning_rate": 4.991080984659294e-05, "loss": 0.229, "step": 5596 }, { "epoch": 0.09982877323154853, "grad_norm": 0.29018929600715637, "learning_rate": 4.9919728861933645e-05, "loss": 0.2305, "step": 5597 }, { "epoch": 0.09984660935326223, "grad_norm": 0.27567121386528015, "learning_rate": 4.992864787727435e-05, "loss": 0.2499, "step": 5598 }, { "epoch": 0.09986444547497592, "grad_norm": 0.3599814474582672, "learning_rate": 4.993756689261506e-05, "loss": 0.2833, "step": 5599 }, { "epoch": 0.09988228159668962, "grad_norm": 0.4604446291923523, "learning_rate": 4.9946485907955764e-05, "loss": 0.3642, "step": 5600 }, { "epoch": 0.0999001177184033, "grad_norm": 0.27221474051475525, "learning_rate": 4.995540492329647e-05, "loss": 0.2104, "step": 5601 }, { "epoch": 0.09991795384011701, "grad_norm": 0.22608445584774017, "learning_rate": 4.996432393863718e-05, "loss": 0.2142, "step": 5602 }, { "epoch": 0.0999357899618307, "grad_norm": 0.2663249373435974, "learning_rate": 4.997324295397788e-05, "loss": 0.1488, "step": 5603 }, { "epoch": 0.0999536260835444, "grad_norm": 0.33296507596969604, "learning_rate": 4.998216196931859e-05, "loss": 0.2993, "step": 5604 }, { "epoch": 0.09997146220525809, "grad_norm": 0.36306607723236084, "learning_rate": 4.9991080984659296e-05, "loss": 0.2707, "step": 5605 }, { "epoch": 0.09998929832697179, "grad_norm": 0.3702142536640167, "learning_rate": 5e-05, "loss": 0.2977, "step": 5606 }, { "epoch": 0.10000713444868548, "grad_norm": 0.3072991669178009, "learning_rate": 4.9999999951545686e-05, "loss": 0.3107, "step": 5607 }, { "epoch": 0.10002497057039918, "grad_norm": 0.2944137156009674, "learning_rate": 4.999999980618273e-05, "loss": 0.2808, "step": 5608 }, { "epoch": 0.10004280669211287, "grad_norm": 0.24709758162498474, "learning_rate": 4.999999956391115e-05, "loss": 0.2487, "step": 5609 }, { "epoch": 0.10006064281382657, "grad_norm": 0.25758326053619385, "learning_rate": 4.999999922473093e-05, "loss": 0.2091, "step": 5610 }, { "epoch": 0.10007847893554025, "grad_norm": 0.35120391845703125, "learning_rate": 4.999999878864208e-05, "loss": 0.2288, "step": 5611 }, { "epoch": 0.10009631505725396, "grad_norm": 0.2889556586742401, "learning_rate": 4.9999998255644596e-05, "loss": 0.2766, "step": 5612 }, { "epoch": 0.10011415117896764, "grad_norm": 0.2990957796573639, "learning_rate": 4.999999762573849e-05, "loss": 0.2544, "step": 5613 }, { "epoch": 0.10013198730068135, "grad_norm": 0.3255777359008789, "learning_rate": 4.999999689892375e-05, "loss": 0.2411, "step": 5614 }, { "epoch": 0.10014982342239503, "grad_norm": 0.34438732266426086, "learning_rate": 4.9999996075200396e-05, "loss": 0.3331, "step": 5615 }, { "epoch": 0.10016765954410872, "grad_norm": 0.3306005597114563, "learning_rate": 4.9999995154568424e-05, "loss": 0.2287, "step": 5616 }, { "epoch": 0.10018549566582242, "grad_norm": 0.3421599864959717, "learning_rate": 4.9999994137027826e-05, "loss": 0.3158, "step": 5617 }, { "epoch": 0.10020333178753611, "grad_norm": 0.3410884737968445, "learning_rate": 4.999999302257863e-05, "loss": 0.2612, "step": 5618 }, { "epoch": 0.10022116790924981, "grad_norm": 0.2748681306838989, "learning_rate": 4.999999181122081e-05, "loss": 0.2826, "step": 5619 }, { "epoch": 0.1002390040309635, "grad_norm": 0.24706009030342102, "learning_rate": 4.9999990502954396e-05, "loss": 0.2432, "step": 5620 }, { "epoch": 0.1002568401526772, "grad_norm": 0.30699586868286133, "learning_rate": 4.999998909777939e-05, "loss": 0.2646, "step": 5621 }, { "epoch": 0.10027467627439089, "grad_norm": 0.3587009906768799, "learning_rate": 4.999998759569578e-05, "loss": 0.235, "step": 5622 }, { "epoch": 0.1002925123961046, "grad_norm": 0.3154565393924713, "learning_rate": 4.999998599670359e-05, "loss": 0.2378, "step": 5623 }, { "epoch": 0.10031034851781828, "grad_norm": 0.35002949833869934, "learning_rate": 4.999998430080282e-05, "loss": 0.2322, "step": 5624 }, { "epoch": 0.10032818463953198, "grad_norm": 0.2938433587551117, "learning_rate": 4.999998250799347e-05, "loss": 0.2588, "step": 5625 }, { "epoch": 0.10034602076124567, "grad_norm": 0.23168541491031647, "learning_rate": 4.999998061827555e-05, "loss": 0.2228, "step": 5626 }, { "epoch": 0.10036385688295937, "grad_norm": 0.23731808364391327, "learning_rate": 4.999997863164908e-05, "loss": 0.2205, "step": 5627 }, { "epoch": 0.10038169300467306, "grad_norm": 0.2636774182319641, "learning_rate": 4.999997654811406e-05, "loss": 0.2284, "step": 5628 }, { "epoch": 0.10039952912638676, "grad_norm": 0.28431543707847595, "learning_rate": 4.9999974367670485e-05, "loss": 0.209, "step": 5629 }, { "epoch": 0.10041736524810045, "grad_norm": 0.25456735491752625, "learning_rate": 4.9999972090318384e-05, "loss": 0.2457, "step": 5630 }, { "epoch": 0.10043520136981415, "grad_norm": 0.2534412443637848, "learning_rate": 4.999996971605776e-05, "loss": 0.2239, "step": 5631 }, { "epoch": 0.10045303749152784, "grad_norm": 0.28452956676483154, "learning_rate": 4.999996724488861e-05, "loss": 0.2831, "step": 5632 }, { "epoch": 0.10047087361324154, "grad_norm": 0.4677983820438385, "learning_rate": 4.9999964676810954e-05, "loss": 0.2832, "step": 5633 }, { "epoch": 0.10048870973495523, "grad_norm": 0.23367977142333984, "learning_rate": 4.9999962011824795e-05, "loss": 0.2143, "step": 5634 }, { "epoch": 0.10050654585666892, "grad_norm": 0.39357468485832214, "learning_rate": 4.999995924993016e-05, "loss": 0.2005, "step": 5635 }, { "epoch": 0.10052438197838262, "grad_norm": 0.3034566044807434, "learning_rate": 4.999995639112705e-05, "loss": 0.2211, "step": 5636 }, { "epoch": 0.10054221810009631, "grad_norm": 0.3892858028411865, "learning_rate": 4.999995343541546e-05, "loss": 0.2833, "step": 5637 }, { "epoch": 0.10056005422181001, "grad_norm": 0.277678519487381, "learning_rate": 4.9999950382795425e-05, "loss": 0.2725, "step": 5638 }, { "epoch": 0.1005778903435237, "grad_norm": 0.2668309807777405, "learning_rate": 4.999994723326694e-05, "loss": 0.2385, "step": 5639 }, { "epoch": 0.1005957264652374, "grad_norm": 0.2892187833786011, "learning_rate": 4.9999943986830036e-05, "loss": 0.2852, "step": 5640 }, { "epoch": 0.10061356258695109, "grad_norm": 0.3583581745624542, "learning_rate": 4.999994064348471e-05, "loss": 0.2539, "step": 5641 }, { "epoch": 0.10063139870866479, "grad_norm": 0.42422887682914734, "learning_rate": 4.999993720323097e-05, "loss": 0.2262, "step": 5642 }, { "epoch": 0.10064923483037848, "grad_norm": 0.33774709701538086, "learning_rate": 4.999993366606885e-05, "loss": 0.2758, "step": 5643 }, { "epoch": 0.10066707095209218, "grad_norm": 0.3390183746814728, "learning_rate": 4.999993003199834e-05, "loss": 0.2244, "step": 5644 }, { "epoch": 0.10068490707380587, "grad_norm": 0.27029553055763245, "learning_rate": 4.9999926301019484e-05, "loss": 0.2306, "step": 5645 }, { "epoch": 0.10070274319551957, "grad_norm": 0.2525337338447571, "learning_rate": 4.9999922473132264e-05, "loss": 0.2746, "step": 5646 }, { "epoch": 0.10072057931723326, "grad_norm": 0.2487715780735016, "learning_rate": 4.9999918548336724e-05, "loss": 0.2114, "step": 5647 }, { "epoch": 0.10073841543894696, "grad_norm": 0.3058359920978546, "learning_rate": 4.999991452663285e-05, "loss": 0.2158, "step": 5648 }, { "epoch": 0.10075625156066065, "grad_norm": 0.5530399084091187, "learning_rate": 4.9999910408020686e-05, "loss": 0.2488, "step": 5649 }, { "epoch": 0.10077408768237435, "grad_norm": 0.3230868875980377, "learning_rate": 4.999990619250022e-05, "loss": 0.2466, "step": 5650 }, { "epoch": 0.10079192380408804, "grad_norm": 0.2973959445953369, "learning_rate": 4.99999018800715e-05, "loss": 0.2662, "step": 5651 }, { "epoch": 0.10080975992580174, "grad_norm": 0.3296448886394501, "learning_rate": 4.9999897470734516e-05, "loss": 0.2797, "step": 5652 }, { "epoch": 0.10082759604751543, "grad_norm": 0.307454913854599, "learning_rate": 4.999989296448929e-05, "loss": 0.2391, "step": 5653 }, { "epoch": 0.10084543216922913, "grad_norm": 0.3258278965950012, "learning_rate": 4.9999888361335855e-05, "loss": 0.2601, "step": 5654 }, { "epoch": 0.10086326829094282, "grad_norm": 0.348908007144928, "learning_rate": 4.999988366127421e-05, "loss": 0.2148, "step": 5655 }, { "epoch": 0.1008811044126565, "grad_norm": 0.29810166358947754, "learning_rate": 4.9999878864304385e-05, "loss": 0.2393, "step": 5656 }, { "epoch": 0.10089894053437021, "grad_norm": 0.3841148614883423, "learning_rate": 4.999987397042639e-05, "loss": 0.2822, "step": 5657 }, { "epoch": 0.1009167766560839, "grad_norm": 0.26545315980911255, "learning_rate": 4.999986897964026e-05, "loss": 0.2279, "step": 5658 }, { "epoch": 0.1009346127777976, "grad_norm": 0.3630913197994232, "learning_rate": 4.9999863891945996e-05, "loss": 0.2391, "step": 5659 }, { "epoch": 0.10095244889951129, "grad_norm": 0.23462393879890442, "learning_rate": 4.999985870734362e-05, "loss": 0.2047, "step": 5660 }, { "epoch": 0.10097028502122499, "grad_norm": 0.32495900988578796, "learning_rate": 4.999985342583316e-05, "loss": 0.2041, "step": 5661 }, { "epoch": 0.10098812114293867, "grad_norm": 0.27943190932273865, "learning_rate": 4.999984804741464e-05, "loss": 0.249, "step": 5662 }, { "epoch": 0.10100595726465238, "grad_norm": 0.30690786242485046, "learning_rate": 4.999984257208807e-05, "loss": 0.298, "step": 5663 }, { "epoch": 0.10102379338636606, "grad_norm": 0.21446751058101654, "learning_rate": 4.999983699985348e-05, "loss": 0.199, "step": 5664 }, { "epoch": 0.10104162950807977, "grad_norm": 0.29535484313964844, "learning_rate": 4.9999831330710875e-05, "loss": 0.256, "step": 5665 }, { "epoch": 0.10105946562979345, "grad_norm": 0.30024975538253784, "learning_rate": 4.9999825564660295e-05, "loss": 0.3043, "step": 5666 }, { "epoch": 0.10107730175150716, "grad_norm": 0.5012380480766296, "learning_rate": 4.999981970170176e-05, "loss": 0.3331, "step": 5667 }, { "epoch": 0.10109513787322084, "grad_norm": 0.3446192145347595, "learning_rate": 4.999981374183529e-05, "loss": 0.2525, "step": 5668 }, { "epoch": 0.10111297399493455, "grad_norm": 0.2471040040254593, "learning_rate": 4.99998076850609e-05, "loss": 0.2047, "step": 5669 }, { "epoch": 0.10113081011664823, "grad_norm": 0.2476903200149536, "learning_rate": 4.999980153137862e-05, "loss": 0.2291, "step": 5670 }, { "epoch": 0.10114864623836194, "grad_norm": 0.35032692551612854, "learning_rate": 4.999979528078849e-05, "loss": 0.2677, "step": 5671 }, { "epoch": 0.10116648236007562, "grad_norm": 0.3351408839225769, "learning_rate": 4.99997889332905e-05, "loss": 0.2191, "step": 5672 }, { "epoch": 0.10118431848178933, "grad_norm": 0.29639095067977905, "learning_rate": 4.99997824888847e-05, "loss": 0.2514, "step": 5673 }, { "epoch": 0.10120215460350301, "grad_norm": 0.31570735573768616, "learning_rate": 4.9999775947571117e-05, "loss": 0.2302, "step": 5674 }, { "epoch": 0.10121999072521672, "grad_norm": 0.4335422217845917, "learning_rate": 4.9999769309349765e-05, "loss": 0.2015, "step": 5675 }, { "epoch": 0.1012378268469304, "grad_norm": 0.24284081161022186, "learning_rate": 4.999976257422067e-05, "loss": 0.2256, "step": 5676 }, { "epoch": 0.10125566296864409, "grad_norm": 0.2578946053981781, "learning_rate": 4.9999755742183854e-05, "loss": 0.2317, "step": 5677 }, { "epoch": 0.1012734990903578, "grad_norm": 0.271124005317688, "learning_rate": 4.9999748813239355e-05, "loss": 0.274, "step": 5678 }, { "epoch": 0.10129133521207148, "grad_norm": 0.3532411456108093, "learning_rate": 4.999974178738719e-05, "loss": 0.3066, "step": 5679 }, { "epoch": 0.10130917133378518, "grad_norm": 0.283805251121521, "learning_rate": 4.99997346646274e-05, "loss": 0.2396, "step": 5680 }, { "epoch": 0.10132700745549887, "grad_norm": 0.4033229947090149, "learning_rate": 4.9999727444959996e-05, "loss": 0.2961, "step": 5681 }, { "epoch": 0.10134484357721257, "grad_norm": 0.3610017001628876, "learning_rate": 4.999972012838502e-05, "loss": 0.2043, "step": 5682 }, { "epoch": 0.10136267969892626, "grad_norm": 0.29770293831825256, "learning_rate": 4.999971271490249e-05, "loss": 0.2342, "step": 5683 }, { "epoch": 0.10138051582063996, "grad_norm": 0.3253975212574005, "learning_rate": 4.999970520451245e-05, "loss": 0.2812, "step": 5684 }, { "epoch": 0.10139835194235365, "grad_norm": 0.2652103006839752, "learning_rate": 4.9999697597214905e-05, "loss": 0.1806, "step": 5685 }, { "epoch": 0.10141618806406735, "grad_norm": 0.2191380113363266, "learning_rate": 4.999968989300991e-05, "loss": 0.2126, "step": 5686 }, { "epoch": 0.10143402418578104, "grad_norm": 0.33241501450538635, "learning_rate": 4.999968209189746e-05, "loss": 0.2798, "step": 5687 }, { "epoch": 0.10145186030749474, "grad_norm": 0.3966387212276459, "learning_rate": 4.9999674193877626e-05, "loss": 0.2326, "step": 5688 }, { "epoch": 0.10146969642920843, "grad_norm": 0.34030452370643616, "learning_rate": 4.9999666198950416e-05, "loss": 0.2784, "step": 5689 }, { "epoch": 0.10148753255092213, "grad_norm": 0.24570925533771515, "learning_rate": 4.999965810711587e-05, "loss": 0.259, "step": 5690 }, { "epoch": 0.10150536867263582, "grad_norm": 0.3062693178653717, "learning_rate": 4.9999649918374e-05, "loss": 0.2246, "step": 5691 }, { "epoch": 0.10152320479434952, "grad_norm": 0.3460516333580017, "learning_rate": 4.999964163272487e-05, "loss": 0.2719, "step": 5692 }, { "epoch": 0.10154104091606321, "grad_norm": 0.35425227880477905, "learning_rate": 4.999963325016849e-05, "loss": 0.2319, "step": 5693 }, { "epoch": 0.10155887703777691, "grad_norm": 0.3349691331386566, "learning_rate": 4.999962477070489e-05, "loss": 0.3157, "step": 5694 }, { "epoch": 0.1015767131594906, "grad_norm": 0.36242732405662537, "learning_rate": 4.999961619433411e-05, "loss": 0.2333, "step": 5695 }, { "epoch": 0.10159454928120429, "grad_norm": 0.2270660549402237, "learning_rate": 4.999960752105619e-05, "loss": 0.1958, "step": 5696 }, { "epoch": 0.10161238540291799, "grad_norm": 0.3322550058364868, "learning_rate": 4.999959875087115e-05, "loss": 0.2724, "step": 5697 }, { "epoch": 0.10163022152463168, "grad_norm": 0.42938780784606934, "learning_rate": 4.999958988377903e-05, "loss": 0.3261, "step": 5698 }, { "epoch": 0.10164805764634538, "grad_norm": 0.3311031758785248, "learning_rate": 4.999958091977987e-05, "loss": 0.3073, "step": 5699 }, { "epoch": 0.10166589376805907, "grad_norm": 0.38504278659820557, "learning_rate": 4.99995718588737e-05, "loss": 0.2384, "step": 5700 }, { "epoch": 0.10168372988977277, "grad_norm": 0.274027556180954, "learning_rate": 4.999956270106055e-05, "loss": 0.27, "step": 5701 }, { "epoch": 0.10170156601148646, "grad_norm": 0.3279690444469452, "learning_rate": 4.999955344634046e-05, "loss": 0.2501, "step": 5702 }, { "epoch": 0.10171940213320016, "grad_norm": 0.27175334095954895, "learning_rate": 4.999954409471347e-05, "loss": 0.2419, "step": 5703 }, { "epoch": 0.10173723825491385, "grad_norm": 0.3714657127857208, "learning_rate": 4.999953464617961e-05, "loss": 0.2913, "step": 5704 }, { "epoch": 0.10175507437662755, "grad_norm": 0.22872139513492584, "learning_rate": 4.999952510073893e-05, "loss": 0.2326, "step": 5705 }, { "epoch": 0.10177291049834124, "grad_norm": 0.2462315410375595, "learning_rate": 4.9999515458391445e-05, "loss": 0.2677, "step": 5706 }, { "epoch": 0.10179074662005494, "grad_norm": 0.3426641523838043, "learning_rate": 4.999950571913721e-05, "loss": 0.2055, "step": 5707 }, { "epoch": 0.10180858274176863, "grad_norm": 0.25017473101615906, "learning_rate": 4.999949588297625e-05, "loss": 0.2119, "step": 5708 }, { "epoch": 0.10182641886348233, "grad_norm": 0.3377556800842285, "learning_rate": 4.999948594990861e-05, "loss": 0.252, "step": 5709 }, { "epoch": 0.10184425498519602, "grad_norm": 0.2705136835575104, "learning_rate": 4.9999475919934335e-05, "loss": 0.2757, "step": 5710 }, { "epoch": 0.10186209110690972, "grad_norm": 0.2471962869167328, "learning_rate": 4.999946579305345e-05, "loss": 0.2451, "step": 5711 }, { "epoch": 0.10187992722862341, "grad_norm": 0.37865251302719116, "learning_rate": 4.9999455569266e-05, "loss": 0.2051, "step": 5712 }, { "epoch": 0.10189776335033711, "grad_norm": 0.24027401208877563, "learning_rate": 4.999944524857203e-05, "loss": 0.2222, "step": 5713 }, { "epoch": 0.1019155994720508, "grad_norm": 0.26842200756073, "learning_rate": 4.999943483097157e-05, "loss": 0.2364, "step": 5714 }, { "epoch": 0.1019334355937645, "grad_norm": 0.30966103076934814, "learning_rate": 4.999942431646467e-05, "loss": 0.2507, "step": 5715 }, { "epoch": 0.10195127171547819, "grad_norm": 0.3614092469215393, "learning_rate": 4.999941370505137e-05, "loss": 0.2796, "step": 5716 }, { "epoch": 0.10196910783719187, "grad_norm": 0.30434978008270264, "learning_rate": 4.99994029967317e-05, "loss": 0.2268, "step": 5717 }, { "epoch": 0.10198694395890558, "grad_norm": 0.35362622141838074, "learning_rate": 4.999939219150572e-05, "loss": 0.3003, "step": 5718 }, { "epoch": 0.10200478008061926, "grad_norm": 0.28988972306251526, "learning_rate": 4.9999381289373454e-05, "loss": 0.2105, "step": 5719 }, { "epoch": 0.10202261620233297, "grad_norm": 0.4682668149471283, "learning_rate": 4.9999370290334955e-05, "loss": 0.2407, "step": 5720 }, { "epoch": 0.10204045232404665, "grad_norm": 0.3408457338809967, "learning_rate": 4.999935919439026e-05, "loss": 0.247, "step": 5721 }, { "epoch": 0.10205828844576036, "grad_norm": 0.31839820742607117, "learning_rate": 4.999934800153942e-05, "loss": 0.2188, "step": 5722 }, { "epoch": 0.10207612456747404, "grad_norm": 0.25762316584587097, "learning_rate": 4.9999336711782466e-05, "loss": 0.1989, "step": 5723 }, { "epoch": 0.10209396068918775, "grad_norm": 0.39140570163726807, "learning_rate": 4.9999325325119444e-05, "loss": 0.2484, "step": 5724 }, { "epoch": 0.10211179681090143, "grad_norm": 0.26773861050605774, "learning_rate": 4.999931384155041e-05, "loss": 0.2661, "step": 5725 }, { "epoch": 0.10212963293261514, "grad_norm": 0.39659813046455383, "learning_rate": 4.9999302261075395e-05, "loss": 0.2233, "step": 5726 }, { "epoch": 0.10214746905432882, "grad_norm": 0.3014323115348816, "learning_rate": 4.9999290583694456e-05, "loss": 0.2384, "step": 5727 }, { "epoch": 0.10216530517604253, "grad_norm": 0.27936092019081116, "learning_rate": 4.9999278809407636e-05, "loss": 0.2289, "step": 5728 }, { "epoch": 0.10218314129775621, "grad_norm": 0.26172640919685364, "learning_rate": 4.999926693821497e-05, "loss": 0.2483, "step": 5729 }, { "epoch": 0.10220097741946992, "grad_norm": 0.4068194627761841, "learning_rate": 4.999925497011651e-05, "loss": 0.2644, "step": 5730 }, { "epoch": 0.1022188135411836, "grad_norm": 0.2824203073978424, "learning_rate": 4.999924290511231e-05, "loss": 0.2548, "step": 5731 }, { "epoch": 0.1022366496628973, "grad_norm": 0.28223201632499695, "learning_rate": 4.9999230743202404e-05, "loss": 0.2183, "step": 5732 }, { "epoch": 0.102254485784611, "grad_norm": 0.34141239523887634, "learning_rate": 4.9999218484386846e-05, "loss": 0.2794, "step": 5733 }, { "epoch": 0.1022723219063247, "grad_norm": 0.3454535901546478, "learning_rate": 4.9999206128665684e-05, "loss": 0.2327, "step": 5734 }, { "epoch": 0.10229015802803838, "grad_norm": 0.2345094233751297, "learning_rate": 4.999919367603896e-05, "loss": 0.2354, "step": 5735 }, { "epoch": 0.10230799414975207, "grad_norm": 0.2947339415550232, "learning_rate": 4.999918112650673e-05, "loss": 0.2253, "step": 5736 }, { "epoch": 0.10232583027146577, "grad_norm": 0.4752649664878845, "learning_rate": 4.999916848006904e-05, "loss": 0.2339, "step": 5737 }, { "epoch": 0.10234366639317946, "grad_norm": 0.331943541765213, "learning_rate": 4.9999155736725945e-05, "loss": 0.2462, "step": 5738 }, { "epoch": 0.10236150251489316, "grad_norm": 0.31686171889305115, "learning_rate": 4.999914289647748e-05, "loss": 0.2493, "step": 5739 }, { "epoch": 0.10237933863660685, "grad_norm": 0.3673936724662781, "learning_rate": 4.9999129959323705e-05, "loss": 0.2504, "step": 5740 }, { "epoch": 0.10239717475832055, "grad_norm": 0.4037603735923767, "learning_rate": 4.9999116925264664e-05, "loss": 0.2993, "step": 5741 }, { "epoch": 0.10241501088003424, "grad_norm": 0.3081677556037903, "learning_rate": 4.999910379430042e-05, "loss": 0.211, "step": 5742 }, { "epoch": 0.10243284700174794, "grad_norm": 0.3046434819698334, "learning_rate": 4.9999090566431e-05, "loss": 0.2212, "step": 5743 }, { "epoch": 0.10245068312346163, "grad_norm": 0.2917368710041046, "learning_rate": 4.999907724165649e-05, "loss": 0.2807, "step": 5744 }, { "epoch": 0.10246851924517533, "grad_norm": 0.40417367219924927, "learning_rate": 4.999906381997691e-05, "loss": 0.2992, "step": 5745 }, { "epoch": 0.10248635536688902, "grad_norm": 0.3042461574077606, "learning_rate": 4.9999050301392324e-05, "loss": 0.2463, "step": 5746 }, { "epoch": 0.10250419148860272, "grad_norm": 0.22511489689350128, "learning_rate": 4.999903668590279e-05, "loss": 0.2004, "step": 5747 }, { "epoch": 0.10252202761031641, "grad_norm": 0.241920605301857, "learning_rate": 4.9999022973508357e-05, "loss": 0.2162, "step": 5748 }, { "epoch": 0.10253986373203011, "grad_norm": 0.49574771523475647, "learning_rate": 4.999900916420907e-05, "loss": 0.3038, "step": 5749 }, { "epoch": 0.1025576998537438, "grad_norm": 0.28990188241004944, "learning_rate": 4.9998995258004996e-05, "loss": 0.2031, "step": 5750 }, { "epoch": 0.1025755359754575, "grad_norm": 0.42998576164245605, "learning_rate": 4.999898125489617e-05, "loss": 0.2422, "step": 5751 }, { "epoch": 0.10259337209717119, "grad_norm": 0.5102670788764954, "learning_rate": 4.999896715488267e-05, "loss": 0.212, "step": 5752 }, { "epoch": 0.10261120821888489, "grad_norm": 0.36709269881248474, "learning_rate": 4.999895295796453e-05, "loss": 0.2441, "step": 5753 }, { "epoch": 0.10262904434059858, "grad_norm": 0.3221031725406647, "learning_rate": 4.999893866414183e-05, "loss": 0.3232, "step": 5754 }, { "epoch": 0.10264688046231228, "grad_norm": 0.3416358530521393, "learning_rate": 4.999892427341459e-05, "loss": 0.2875, "step": 5755 }, { "epoch": 0.10266471658402597, "grad_norm": 0.3036465346813202, "learning_rate": 4.99989097857829e-05, "loss": 0.2678, "step": 5756 }, { "epoch": 0.10268255270573966, "grad_norm": 0.2555624842643738, "learning_rate": 4.9998895201246795e-05, "loss": 0.2577, "step": 5757 }, { "epoch": 0.10270038882745336, "grad_norm": 0.3073364794254303, "learning_rate": 4.999888051980634e-05, "loss": 0.2499, "step": 5758 }, { "epoch": 0.10271822494916705, "grad_norm": 0.4039995074272156, "learning_rate": 4.9998865741461584e-05, "loss": 0.287, "step": 5759 }, { "epoch": 0.10273606107088075, "grad_norm": 0.28116482496261597, "learning_rate": 4.9998850866212595e-05, "loss": 0.2452, "step": 5760 }, { "epoch": 0.10275389719259444, "grad_norm": 0.29124900698661804, "learning_rate": 4.999883589405942e-05, "loss": 0.2399, "step": 5761 }, { "epoch": 0.10277173331430814, "grad_norm": 0.34035050868988037, "learning_rate": 4.999882082500213e-05, "loss": 0.2678, "step": 5762 }, { "epoch": 0.10278956943602183, "grad_norm": 0.3873896896839142, "learning_rate": 4.999880565904077e-05, "loss": 0.3366, "step": 5763 }, { "epoch": 0.10280740555773553, "grad_norm": 0.23793981969356537, "learning_rate": 4.999879039617541e-05, "loss": 0.205, "step": 5764 }, { "epoch": 0.10282524167944922, "grad_norm": 0.26890796422958374, "learning_rate": 4.9998775036406104e-05, "loss": 0.1626, "step": 5765 }, { "epoch": 0.10284307780116292, "grad_norm": 0.2527380883693695, "learning_rate": 4.999875957973291e-05, "loss": 0.2383, "step": 5766 }, { "epoch": 0.1028609139228766, "grad_norm": 0.27098050713539124, "learning_rate": 4.999874402615588e-05, "loss": 0.228, "step": 5767 }, { "epoch": 0.10287875004459031, "grad_norm": 0.39944028854370117, "learning_rate": 4.99987283756751e-05, "loss": 0.2379, "step": 5768 }, { "epoch": 0.102896586166304, "grad_norm": 0.294586181640625, "learning_rate": 4.999871262829061e-05, "loss": 0.262, "step": 5769 }, { "epoch": 0.1029144222880177, "grad_norm": 0.3405201733112335, "learning_rate": 4.9998696784002476e-05, "loss": 0.2371, "step": 5770 }, { "epoch": 0.10293225840973139, "grad_norm": 0.2673748731613159, "learning_rate": 4.999868084281075e-05, "loss": 0.199, "step": 5771 }, { "epoch": 0.10295009453144509, "grad_norm": 0.25981229543685913, "learning_rate": 4.9998664804715514e-05, "loss": 0.2129, "step": 5772 }, { "epoch": 0.10296793065315878, "grad_norm": 0.2996998131275177, "learning_rate": 4.9998648669716816e-05, "loss": 0.2112, "step": 5773 }, { "epoch": 0.10298576677487248, "grad_norm": 0.29292044043540955, "learning_rate": 4.9998632437814715e-05, "loss": 0.2557, "step": 5774 }, { "epoch": 0.10300360289658617, "grad_norm": 0.3624270260334015, "learning_rate": 4.999861610900929e-05, "loss": 0.2401, "step": 5775 }, { "epoch": 0.10302143901829985, "grad_norm": 0.29216569662094116, "learning_rate": 4.999859968330059e-05, "loss": 0.2192, "step": 5776 }, { "epoch": 0.10303927514001356, "grad_norm": 0.3137858510017395, "learning_rate": 4.999858316068868e-05, "loss": 0.2675, "step": 5777 }, { "epoch": 0.10305711126172724, "grad_norm": 0.3349331021308899, "learning_rate": 4.999856654117363e-05, "loss": 0.259, "step": 5778 }, { "epoch": 0.10307494738344095, "grad_norm": 0.3540928363800049, "learning_rate": 4.9998549824755506e-05, "loss": 0.2862, "step": 5779 }, { "epoch": 0.10309278350515463, "grad_norm": 0.3264806568622589, "learning_rate": 4.9998533011434365e-05, "loss": 0.2485, "step": 5780 }, { "epoch": 0.10311061962686834, "grad_norm": 0.3131437301635742, "learning_rate": 4.9998516101210276e-05, "loss": 0.285, "step": 5781 }, { "epoch": 0.10312845574858202, "grad_norm": 0.3235114812850952, "learning_rate": 4.999849909408331e-05, "loss": 0.1961, "step": 5782 }, { "epoch": 0.10314629187029573, "grad_norm": 0.2606179118156433, "learning_rate": 4.999848199005351e-05, "loss": 0.2159, "step": 5783 }, { "epoch": 0.10316412799200941, "grad_norm": 0.3242022395133972, "learning_rate": 4.999846478912098e-05, "loss": 0.2388, "step": 5784 }, { "epoch": 0.10318196411372312, "grad_norm": 0.3644527792930603, "learning_rate": 4.999844749128576e-05, "loss": 0.2711, "step": 5785 }, { "epoch": 0.1031998002354368, "grad_norm": 0.2748754024505615, "learning_rate": 4.999843009654791e-05, "loss": 0.2912, "step": 5786 }, { "epoch": 0.1032176363571505, "grad_norm": 0.25084471702575684, "learning_rate": 4.999841260490753e-05, "loss": 0.246, "step": 5787 }, { "epoch": 0.1032354724788642, "grad_norm": 0.30010634660720825, "learning_rate": 4.9998395016364655e-05, "loss": 0.2393, "step": 5788 }, { "epoch": 0.1032533086005779, "grad_norm": 0.24173887073993683, "learning_rate": 4.999837733091938e-05, "loss": 0.2316, "step": 5789 }, { "epoch": 0.10327114472229158, "grad_norm": 0.3200397789478302, "learning_rate": 4.999835954857175e-05, "loss": 0.2771, "step": 5790 }, { "epoch": 0.10328898084400528, "grad_norm": 0.38963747024536133, "learning_rate": 4.999834166932185e-05, "loss": 0.2775, "step": 5791 }, { "epoch": 0.10330681696571897, "grad_norm": 0.38244643807411194, "learning_rate": 4.999832369316973e-05, "loss": 0.2183, "step": 5792 }, { "epoch": 0.10332465308743267, "grad_norm": 0.35715675354003906, "learning_rate": 4.999830562011549e-05, "loss": 0.2464, "step": 5793 }, { "epoch": 0.10334248920914636, "grad_norm": 0.3591226637363434, "learning_rate": 4.999828745015917e-05, "loss": 0.2362, "step": 5794 }, { "epoch": 0.10336032533086006, "grad_norm": 0.33126264810562134, "learning_rate": 4.999826918330086e-05, "loss": 0.253, "step": 5795 }, { "epoch": 0.10337816145257375, "grad_norm": 0.316311776638031, "learning_rate": 4.9998250819540625e-05, "loss": 0.2309, "step": 5796 }, { "epoch": 0.10339599757428744, "grad_norm": 0.3163365423679352, "learning_rate": 4.999823235887854e-05, "loss": 0.235, "step": 5797 }, { "epoch": 0.10341383369600114, "grad_norm": 0.30197620391845703, "learning_rate": 4.999821380131466e-05, "loss": 0.2085, "step": 5798 }, { "epoch": 0.10343166981771483, "grad_norm": 0.30523353815078735, "learning_rate": 4.9998195146849084e-05, "loss": 0.2587, "step": 5799 }, { "epoch": 0.10344950593942853, "grad_norm": 0.35222527384757996, "learning_rate": 4.9998176395481865e-05, "loss": 0.2618, "step": 5800 }, { "epoch": 0.10346734206114222, "grad_norm": 0.2785562574863434, "learning_rate": 4.999815754721307e-05, "loss": 0.2285, "step": 5801 }, { "epoch": 0.10348517818285592, "grad_norm": 0.2471916824579239, "learning_rate": 4.99981386020428e-05, "loss": 0.2459, "step": 5802 }, { "epoch": 0.10350301430456961, "grad_norm": 0.2487059384584427, "learning_rate": 4.999811955997109e-05, "loss": 0.2219, "step": 5803 }, { "epoch": 0.10352085042628331, "grad_norm": 0.3147665560245514, "learning_rate": 4.999810042099805e-05, "loss": 0.2411, "step": 5804 }, { "epoch": 0.103538686547997, "grad_norm": 0.292324036359787, "learning_rate": 4.999808118512373e-05, "loss": 0.2462, "step": 5805 }, { "epoch": 0.1035565226697107, "grad_norm": 0.33168265223503113, "learning_rate": 4.999806185234822e-05, "loss": 0.2359, "step": 5806 }, { "epoch": 0.10357435879142439, "grad_norm": 0.29198211431503296, "learning_rate": 4.999804242267159e-05, "loss": 0.2226, "step": 5807 }, { "epoch": 0.10359219491313809, "grad_norm": 0.2885533571243286, "learning_rate": 4.999802289609391e-05, "loss": 0.1946, "step": 5808 }, { "epoch": 0.10361003103485178, "grad_norm": 0.32933491468429565, "learning_rate": 4.9998003272615256e-05, "loss": 0.31, "step": 5809 }, { "epoch": 0.10362786715656548, "grad_norm": 0.3106703460216522, "learning_rate": 4.999798355223571e-05, "loss": 0.2462, "step": 5810 }, { "epoch": 0.10364570327827917, "grad_norm": 0.351720929145813, "learning_rate": 4.999796373495535e-05, "loss": 0.2698, "step": 5811 }, { "epoch": 0.10366353939999287, "grad_norm": 0.2700147032737732, "learning_rate": 4.999794382077424e-05, "loss": 0.2287, "step": 5812 }, { "epoch": 0.10368137552170656, "grad_norm": 0.2648630440235138, "learning_rate": 4.999792380969247e-05, "loss": 0.233, "step": 5813 }, { "epoch": 0.10369921164342026, "grad_norm": 0.39086654782295227, "learning_rate": 4.999790370171011e-05, "loss": 0.2633, "step": 5814 }, { "epoch": 0.10371704776513395, "grad_norm": 0.43334561586380005, "learning_rate": 4.999788349682725e-05, "loss": 0.3197, "step": 5815 }, { "epoch": 0.10373488388684764, "grad_norm": 0.27810490131378174, "learning_rate": 4.999786319504395e-05, "loss": 0.2838, "step": 5816 }, { "epoch": 0.10375272000856134, "grad_norm": 0.2762657403945923, "learning_rate": 4.99978427963603e-05, "loss": 0.2933, "step": 5817 }, { "epoch": 0.10377055613027503, "grad_norm": 0.26107022166252136, "learning_rate": 4.999782230077638e-05, "loss": 0.238, "step": 5818 }, { "epoch": 0.10378839225198873, "grad_norm": 0.31598934531211853, "learning_rate": 4.999780170829227e-05, "loss": 0.2712, "step": 5819 }, { "epoch": 0.10380622837370242, "grad_norm": 0.2611783444881439, "learning_rate": 4.999778101890804e-05, "loss": 0.2322, "step": 5820 }, { "epoch": 0.10382406449541612, "grad_norm": 0.3476533889770508, "learning_rate": 4.9997760232623784e-05, "loss": 0.3011, "step": 5821 }, { "epoch": 0.1038419006171298, "grad_norm": 0.3373652994632721, "learning_rate": 4.9997739349439564e-05, "loss": 0.2402, "step": 5822 }, { "epoch": 0.10385973673884351, "grad_norm": 0.2581215500831604, "learning_rate": 4.9997718369355486e-05, "loss": 0.1991, "step": 5823 }, { "epoch": 0.1038775728605572, "grad_norm": 0.2656016945838928, "learning_rate": 4.9997697292371605e-05, "loss": 0.2623, "step": 5824 }, { "epoch": 0.1038954089822709, "grad_norm": 0.3778791129589081, "learning_rate": 4.999767611848802e-05, "loss": 0.2231, "step": 5825 }, { "epoch": 0.10391324510398459, "grad_norm": 0.32868921756744385, "learning_rate": 4.999765484770481e-05, "loss": 0.217, "step": 5826 }, { "epoch": 0.10393108122569829, "grad_norm": 0.3548049032688141, "learning_rate": 4.9997633480022056e-05, "loss": 0.2353, "step": 5827 }, { "epoch": 0.10394891734741198, "grad_norm": 0.35475409030914307, "learning_rate": 4.999761201543984e-05, "loss": 0.2562, "step": 5828 }, { "epoch": 0.10396675346912568, "grad_norm": 0.33357155323028564, "learning_rate": 4.999759045395825e-05, "loss": 0.2434, "step": 5829 }, { "epoch": 0.10398458959083937, "grad_norm": 0.43696609139442444, "learning_rate": 4.999756879557736e-05, "loss": 0.3114, "step": 5830 }, { "epoch": 0.10400242571255307, "grad_norm": 0.2838720977306366, "learning_rate": 4.999754704029726e-05, "loss": 0.2984, "step": 5831 }, { "epoch": 0.10402026183426676, "grad_norm": 0.38694697618484497, "learning_rate": 4.9997525188118034e-05, "loss": 0.2429, "step": 5832 }, { "epoch": 0.10403809795598046, "grad_norm": 0.37511295080184937, "learning_rate": 4.9997503239039764e-05, "loss": 0.2851, "step": 5833 }, { "epoch": 0.10405593407769415, "grad_norm": 0.46333423256874084, "learning_rate": 4.9997481193062544e-05, "loss": 0.3164, "step": 5834 }, { "epoch": 0.10407377019940785, "grad_norm": 0.29584962129592896, "learning_rate": 4.999745905018645e-05, "loss": 0.2635, "step": 5835 }, { "epoch": 0.10409160632112154, "grad_norm": 0.35017719864845276, "learning_rate": 4.9997436810411575e-05, "loss": 0.2436, "step": 5836 }, { "epoch": 0.10410944244283522, "grad_norm": 0.3249483108520508, "learning_rate": 4.9997414473737994e-05, "loss": 0.2944, "step": 5837 }, { "epoch": 0.10412727856454893, "grad_norm": 0.3080081343650818, "learning_rate": 4.99973920401658e-05, "loss": 0.2757, "step": 5838 }, { "epoch": 0.10414511468626261, "grad_norm": 0.4317755401134491, "learning_rate": 4.999736950969509e-05, "loss": 0.351, "step": 5839 }, { "epoch": 0.10416295080797632, "grad_norm": 0.4012046456336975, "learning_rate": 4.999734688232593e-05, "loss": 0.319, "step": 5840 }, { "epoch": 0.10418078692969, "grad_norm": 0.3114517033100128, "learning_rate": 4.999732415805844e-05, "loss": 0.2468, "step": 5841 }, { "epoch": 0.1041986230514037, "grad_norm": 0.41210392117500305, "learning_rate": 4.999730133689266e-05, "loss": 0.3366, "step": 5842 }, { "epoch": 0.10421645917311739, "grad_norm": 0.38541272282600403, "learning_rate": 4.9997278418828725e-05, "loss": 0.3144, "step": 5843 }, { "epoch": 0.1042342952948311, "grad_norm": 0.34774795174598694, "learning_rate": 4.9997255403866705e-05, "loss": 0.2748, "step": 5844 }, { "epoch": 0.10425213141654478, "grad_norm": 0.4897577464580536, "learning_rate": 4.999723229200668e-05, "loss": 0.2691, "step": 5845 }, { "epoch": 0.10426996753825848, "grad_norm": 0.36025020480155945, "learning_rate": 4.999720908324875e-05, "loss": 0.2654, "step": 5846 }, { "epoch": 0.10428780365997217, "grad_norm": 0.3165666460990906, "learning_rate": 4.999718577759301e-05, "loss": 0.2814, "step": 5847 }, { "epoch": 0.10430563978168587, "grad_norm": 0.25389689207077026, "learning_rate": 4.9997162375039544e-05, "loss": 0.2511, "step": 5848 }, { "epoch": 0.10432347590339956, "grad_norm": 0.3201467990875244, "learning_rate": 4.999713887558844e-05, "loss": 0.2896, "step": 5849 }, { "epoch": 0.10434131202511326, "grad_norm": 0.34935447573661804, "learning_rate": 4.999711527923979e-05, "loss": 0.2486, "step": 5850 }, { "epoch": 0.10435914814682695, "grad_norm": 0.3226553797721863, "learning_rate": 4.9997091585993695e-05, "loss": 0.2357, "step": 5851 }, { "epoch": 0.10437698426854065, "grad_norm": 0.31256410479545593, "learning_rate": 4.999706779585023e-05, "loss": 0.2493, "step": 5852 }, { "epoch": 0.10439482039025434, "grad_norm": 0.34214910864830017, "learning_rate": 4.99970439088095e-05, "loss": 0.2646, "step": 5853 }, { "epoch": 0.10441265651196804, "grad_norm": 0.40745365619659424, "learning_rate": 4.999701992487159e-05, "loss": 0.2674, "step": 5854 }, { "epoch": 0.10443049263368173, "grad_norm": 0.2790455222129822, "learning_rate": 4.999699584403661e-05, "loss": 0.2163, "step": 5855 }, { "epoch": 0.10444832875539543, "grad_norm": 0.2634226083755493, "learning_rate": 4.999697166630463e-05, "loss": 0.2138, "step": 5856 }, { "epoch": 0.10446616487710912, "grad_norm": 0.33517295122146606, "learning_rate": 4.999694739167575e-05, "loss": 0.2415, "step": 5857 }, { "epoch": 0.10448400099882281, "grad_norm": 0.34214141964912415, "learning_rate": 4.999692302015008e-05, "loss": 0.2779, "step": 5858 }, { "epoch": 0.10450183712053651, "grad_norm": 0.3027956187725067, "learning_rate": 4.9996898551727694e-05, "loss": 0.2355, "step": 5859 }, { "epoch": 0.1045196732422502, "grad_norm": 0.2922017574310303, "learning_rate": 4.99968739864087e-05, "loss": 0.2166, "step": 5860 }, { "epoch": 0.1045375093639639, "grad_norm": 0.3346848487854004, "learning_rate": 4.999684932419318e-05, "loss": 0.26, "step": 5861 }, { "epoch": 0.10455534548567759, "grad_norm": 0.2706536650657654, "learning_rate": 4.9996824565081254e-05, "loss": 0.2521, "step": 5862 }, { "epoch": 0.10457318160739129, "grad_norm": 0.3135501742362976, "learning_rate": 4.9996799709073e-05, "loss": 0.2548, "step": 5863 }, { "epoch": 0.10459101772910498, "grad_norm": 0.32489100098609924, "learning_rate": 4.999677475616851e-05, "loss": 0.2502, "step": 5864 }, { "epoch": 0.10460885385081868, "grad_norm": 0.3062833547592163, "learning_rate": 4.999674970636788e-05, "loss": 0.2481, "step": 5865 }, { "epoch": 0.10462668997253237, "grad_norm": 0.29157453775405884, "learning_rate": 4.999672455967123e-05, "loss": 0.2616, "step": 5866 }, { "epoch": 0.10464452609424607, "grad_norm": 0.48722150921821594, "learning_rate": 4.999669931607863e-05, "loss": 0.2408, "step": 5867 }, { "epoch": 0.10466236221595976, "grad_norm": 0.3130400478839874, "learning_rate": 4.999667397559019e-05, "loss": 0.2491, "step": 5868 }, { "epoch": 0.10468019833767346, "grad_norm": 0.32957836985588074, "learning_rate": 4.9996648538206015e-05, "loss": 0.2785, "step": 5869 }, { "epoch": 0.10469803445938715, "grad_norm": 0.5479748845100403, "learning_rate": 4.99966230039262e-05, "loss": 0.3644, "step": 5870 }, { "epoch": 0.10471587058110085, "grad_norm": 0.28692811727523804, "learning_rate": 4.999659737275083e-05, "loss": 0.2437, "step": 5871 }, { "epoch": 0.10473370670281454, "grad_norm": 0.33135369420051575, "learning_rate": 4.9996571644680024e-05, "loss": 0.2929, "step": 5872 }, { "epoch": 0.10475154282452824, "grad_norm": 0.2850666046142578, "learning_rate": 4.999654581971387e-05, "loss": 0.2455, "step": 5873 }, { "epoch": 0.10476937894624193, "grad_norm": 0.3062702417373657, "learning_rate": 4.9996519897852464e-05, "loss": 0.2169, "step": 5874 }, { "epoch": 0.10478721506795563, "grad_norm": 0.3672844469547272, "learning_rate": 4.9996493879095925e-05, "loss": 0.2922, "step": 5875 }, { "epoch": 0.10480505118966932, "grad_norm": 0.33063754439353943, "learning_rate": 4.999646776344433e-05, "loss": 0.2719, "step": 5876 }, { "epoch": 0.104822887311383, "grad_norm": 0.39049437642097473, "learning_rate": 4.99964415508978e-05, "loss": 0.2576, "step": 5877 }, { "epoch": 0.10484072343309671, "grad_norm": 0.41826221346855164, "learning_rate": 4.999641524145643e-05, "loss": 0.2655, "step": 5878 }, { "epoch": 0.1048585595548104, "grad_norm": 0.3125578463077545, "learning_rate": 4.9996388835120325e-05, "loss": 0.257, "step": 5879 }, { "epoch": 0.1048763956765241, "grad_norm": 0.24703185260295868, "learning_rate": 4.9996362331889576e-05, "loss": 0.2294, "step": 5880 }, { "epoch": 0.10489423179823779, "grad_norm": 0.3470175564289093, "learning_rate": 4.9996335731764296e-05, "loss": 0.2481, "step": 5881 }, { "epoch": 0.10491206791995149, "grad_norm": 0.2972065806388855, "learning_rate": 4.999630903474458e-05, "loss": 0.2645, "step": 5882 }, { "epoch": 0.10492990404166518, "grad_norm": 0.3808644711971283, "learning_rate": 4.999628224083054e-05, "loss": 0.2175, "step": 5883 }, { "epoch": 0.10494774016337888, "grad_norm": 0.22417323291301727, "learning_rate": 4.999625535002228e-05, "loss": 0.2087, "step": 5884 }, { "epoch": 0.10496557628509257, "grad_norm": 0.3009660840034485, "learning_rate": 4.99962283623199e-05, "loss": 0.2511, "step": 5885 }, { "epoch": 0.10498341240680627, "grad_norm": 0.20104964077472687, "learning_rate": 4.99962012777235e-05, "loss": 0.2113, "step": 5886 }, { "epoch": 0.10500124852851996, "grad_norm": 0.24783194065093994, "learning_rate": 4.999617409623319e-05, "loss": 0.236, "step": 5887 }, { "epoch": 0.10501908465023366, "grad_norm": 0.3385120630264282, "learning_rate": 4.9996146817849084e-05, "loss": 0.2301, "step": 5888 }, { "epoch": 0.10503692077194735, "grad_norm": 0.312938392162323, "learning_rate": 4.999611944257128e-05, "loss": 0.2524, "step": 5889 }, { "epoch": 0.10505475689366105, "grad_norm": 0.24730493128299713, "learning_rate": 4.999609197039987e-05, "loss": 0.2224, "step": 5890 }, { "epoch": 0.10507259301537474, "grad_norm": 0.3070394694805145, "learning_rate": 4.999606440133499e-05, "loss": 0.2688, "step": 5891 }, { "epoch": 0.10509042913708844, "grad_norm": 0.3449821472167969, "learning_rate": 4.999603673537672e-05, "loss": 0.2964, "step": 5892 }, { "epoch": 0.10510826525880212, "grad_norm": 0.4380045235157013, "learning_rate": 4.9996008972525184e-05, "loss": 0.2741, "step": 5893 }, { "epoch": 0.10512610138051583, "grad_norm": 0.34592729806900024, "learning_rate": 4.999598111278048e-05, "loss": 0.2765, "step": 5894 }, { "epoch": 0.10514393750222951, "grad_norm": 0.3012355864048004, "learning_rate": 4.999595315614272e-05, "loss": 0.2323, "step": 5895 }, { "epoch": 0.10516177362394322, "grad_norm": 0.280671626329422, "learning_rate": 4.999592510261202e-05, "loss": 0.2605, "step": 5896 }, { "epoch": 0.1051796097456569, "grad_norm": 0.3939959704875946, "learning_rate": 4.999589695218847e-05, "loss": 0.3428, "step": 5897 }, { "epoch": 0.10519744586737059, "grad_norm": 0.34676697850227356, "learning_rate": 4.9995868704872195e-05, "loss": 0.1837, "step": 5898 }, { "epoch": 0.1052152819890843, "grad_norm": 0.30784639716148376, "learning_rate": 4.9995840360663305e-05, "loss": 0.2864, "step": 5899 }, { "epoch": 0.10523311811079798, "grad_norm": 0.4207679331302643, "learning_rate": 4.9995811919561895e-05, "loss": 0.3062, "step": 5900 }, { "epoch": 0.10525095423251168, "grad_norm": 0.28703030943870544, "learning_rate": 4.9995783381568095e-05, "loss": 0.2601, "step": 5901 }, { "epoch": 0.10526879035422537, "grad_norm": 0.34877583384513855, "learning_rate": 4.9995754746682e-05, "loss": 0.2362, "step": 5902 }, { "epoch": 0.10528662647593907, "grad_norm": 0.24295946955680847, "learning_rate": 4.999572601490372e-05, "loss": 0.2107, "step": 5903 }, { "epoch": 0.10530446259765276, "grad_norm": 0.31496644020080566, "learning_rate": 4.999569718623338e-05, "loss": 0.2445, "step": 5904 }, { "epoch": 0.10532229871936646, "grad_norm": 0.34387901425361633, "learning_rate": 4.999566826067108e-05, "loss": 0.2827, "step": 5905 }, { "epoch": 0.10534013484108015, "grad_norm": 0.3703104257583618, "learning_rate": 4.9995639238216944e-05, "loss": 0.2736, "step": 5906 }, { "epoch": 0.10535797096279385, "grad_norm": 0.3689406216144562, "learning_rate": 4.999561011887107e-05, "loss": 0.2828, "step": 5907 }, { "epoch": 0.10537580708450754, "grad_norm": 0.30629584193229675, "learning_rate": 4.9995580902633584e-05, "loss": 0.2434, "step": 5908 }, { "epoch": 0.10539364320622124, "grad_norm": 0.4159982204437256, "learning_rate": 4.9995551589504586e-05, "loss": 0.2585, "step": 5909 }, { "epoch": 0.10541147932793493, "grad_norm": 0.37532517313957214, "learning_rate": 4.999552217948421e-05, "loss": 0.2657, "step": 5910 }, { "epoch": 0.10542931544964863, "grad_norm": 0.302166223526001, "learning_rate": 4.999549267257254e-05, "loss": 0.2633, "step": 5911 }, { "epoch": 0.10544715157136232, "grad_norm": 0.2652914822101593, "learning_rate": 4.9995463068769715e-05, "loss": 0.2203, "step": 5912 }, { "epoch": 0.10546498769307602, "grad_norm": 0.37150490283966064, "learning_rate": 4.9995433368075846e-05, "loss": 0.2608, "step": 5913 }, { "epoch": 0.10548282381478971, "grad_norm": 0.37731674313545227, "learning_rate": 4.999540357049104e-05, "loss": 0.3231, "step": 5914 }, { "epoch": 0.10550065993650341, "grad_norm": 0.28298869729042053, "learning_rate": 4.999537367601541e-05, "loss": 0.271, "step": 5915 }, { "epoch": 0.1055184960582171, "grad_norm": 0.35270267724990845, "learning_rate": 4.9995343684649084e-05, "loss": 0.2672, "step": 5916 }, { "epoch": 0.10553633217993079, "grad_norm": 0.27312782406806946, "learning_rate": 4.999531359639218e-05, "loss": 0.2213, "step": 5917 }, { "epoch": 0.10555416830164449, "grad_norm": 0.24734452366828918, "learning_rate": 4.9995283411244795e-05, "loss": 0.1981, "step": 5918 }, { "epoch": 0.10557200442335818, "grad_norm": 0.2973858416080475, "learning_rate": 4.9995253129207074e-05, "loss": 0.2462, "step": 5919 }, { "epoch": 0.10558984054507188, "grad_norm": 0.3610922694206238, "learning_rate": 4.99952227502791e-05, "loss": 0.2494, "step": 5920 }, { "epoch": 0.10560767666678557, "grad_norm": 0.27888768911361694, "learning_rate": 4.999519227446102e-05, "loss": 0.239, "step": 5921 }, { "epoch": 0.10562551278849927, "grad_norm": 0.3393993377685547, "learning_rate": 4.9995161701752945e-05, "loss": 0.2399, "step": 5922 }, { "epoch": 0.10564334891021296, "grad_norm": 0.29176065325737, "learning_rate": 4.999513103215499e-05, "loss": 0.2528, "step": 5923 }, { "epoch": 0.10566118503192666, "grad_norm": 0.302426278591156, "learning_rate": 4.999510026566727e-05, "loss": 0.2504, "step": 5924 }, { "epoch": 0.10567902115364035, "grad_norm": 0.3877299129962921, "learning_rate": 4.999506940228991e-05, "loss": 0.275, "step": 5925 }, { "epoch": 0.10569685727535405, "grad_norm": 0.3485565185546875, "learning_rate": 4.999503844202302e-05, "loss": 0.2489, "step": 5926 }, { "epoch": 0.10571469339706774, "grad_norm": 0.24428917467594147, "learning_rate": 4.999500738486673e-05, "loss": 0.2167, "step": 5927 }, { "epoch": 0.10573252951878144, "grad_norm": 0.3241652846336365, "learning_rate": 4.9994976230821167e-05, "loss": 0.2855, "step": 5928 }, { "epoch": 0.10575036564049513, "grad_norm": 0.3077361583709717, "learning_rate": 4.999494497988644e-05, "loss": 0.2577, "step": 5929 }, { "epoch": 0.10576820176220883, "grad_norm": 0.4071911871433258, "learning_rate": 4.9994913632062674e-05, "loss": 0.2539, "step": 5930 }, { "epoch": 0.10578603788392252, "grad_norm": 0.2919321060180664, "learning_rate": 4.999488218734999e-05, "loss": 0.201, "step": 5931 }, { "epoch": 0.10580387400563622, "grad_norm": 0.4423716962337494, "learning_rate": 4.9994850645748504e-05, "loss": 0.3363, "step": 5932 }, { "epoch": 0.10582171012734991, "grad_norm": 0.21169425547122955, "learning_rate": 4.999481900725835e-05, "loss": 0.2288, "step": 5933 }, { "epoch": 0.10583954624906361, "grad_norm": 0.31259584426879883, "learning_rate": 4.999478727187964e-05, "loss": 0.2952, "step": 5934 }, { "epoch": 0.1058573823707773, "grad_norm": 0.34835946559906006, "learning_rate": 4.9994755439612507e-05, "loss": 0.237, "step": 5935 }, { "epoch": 0.105875218492491, "grad_norm": 0.23921677470207214, "learning_rate": 4.999472351045707e-05, "loss": 0.2478, "step": 5936 }, { "epoch": 0.10589305461420469, "grad_norm": 0.30249902606010437, "learning_rate": 4.999469148441344e-05, "loss": 0.2483, "step": 5937 }, { "epoch": 0.10591089073591838, "grad_norm": 0.30154699087142944, "learning_rate": 4.999465936148176e-05, "loss": 0.2412, "step": 5938 }, { "epoch": 0.10592872685763208, "grad_norm": 0.37488991022109985, "learning_rate": 4.9994627141662145e-05, "loss": 0.2899, "step": 5939 }, { "epoch": 0.10594656297934577, "grad_norm": 0.3827170729637146, "learning_rate": 4.999459482495473e-05, "loss": 0.3188, "step": 5940 }, { "epoch": 0.10596439910105947, "grad_norm": 0.3265140950679779, "learning_rate": 4.9994562411359626e-05, "loss": 0.2274, "step": 5941 }, { "epoch": 0.10598223522277316, "grad_norm": 0.23505325615406036, "learning_rate": 4.999452990087697e-05, "loss": 0.2478, "step": 5942 }, { "epoch": 0.10600007134448686, "grad_norm": 0.3313054144382477, "learning_rate": 4.999449729350688e-05, "loss": 0.2755, "step": 5943 }, { "epoch": 0.10601790746620054, "grad_norm": 0.29447782039642334, "learning_rate": 4.999446458924949e-05, "loss": 0.2366, "step": 5944 }, { "epoch": 0.10603574358791425, "grad_norm": 0.35569271445274353, "learning_rate": 4.9994431788104914e-05, "loss": 0.3261, "step": 5945 }, { "epoch": 0.10605357970962793, "grad_norm": 0.8849342465400696, "learning_rate": 4.999439889007329e-05, "loss": 0.2117, "step": 5946 }, { "epoch": 0.10607141583134164, "grad_norm": 0.21632613241672516, "learning_rate": 4.999436589515475e-05, "loss": 0.1907, "step": 5947 }, { "epoch": 0.10608925195305532, "grad_norm": 0.37242913246154785, "learning_rate": 4.999433280334941e-05, "loss": 0.3247, "step": 5948 }, { "epoch": 0.10610708807476903, "grad_norm": 0.37078890204429626, "learning_rate": 4.99942996146574e-05, "loss": 0.2243, "step": 5949 }, { "epoch": 0.10612492419648271, "grad_norm": 0.2567066550254822, "learning_rate": 4.9994266329078856e-05, "loss": 0.2658, "step": 5950 }, { "epoch": 0.10614276031819642, "grad_norm": 0.2881944477558136, "learning_rate": 4.9994232946613905e-05, "loss": 0.2323, "step": 5951 }, { "epoch": 0.1061605964399101, "grad_norm": 0.30119019746780396, "learning_rate": 4.999419946726267e-05, "loss": 0.2344, "step": 5952 }, { "epoch": 0.1061784325616238, "grad_norm": 0.3263953626155853, "learning_rate": 4.9994165891025285e-05, "loss": 0.2812, "step": 5953 }, { "epoch": 0.1061962686833375, "grad_norm": 0.47165006399154663, "learning_rate": 4.999413221790188e-05, "loss": 0.2212, "step": 5954 }, { "epoch": 0.1062141048050512, "grad_norm": 0.36640140414237976, "learning_rate": 4.999409844789259e-05, "loss": 0.2945, "step": 5955 }, { "epoch": 0.10623194092676488, "grad_norm": 0.3732265532016754, "learning_rate": 4.999406458099754e-05, "loss": 0.2663, "step": 5956 }, { "epoch": 0.10624977704847857, "grad_norm": 0.3062219023704529, "learning_rate": 4.999403061721686e-05, "loss": 0.2458, "step": 5957 }, { "epoch": 0.10626761317019227, "grad_norm": 0.28339648246765137, "learning_rate": 4.9993996556550694e-05, "loss": 0.1878, "step": 5958 }, { "epoch": 0.10628544929190596, "grad_norm": 0.26308074593544006, "learning_rate": 4.999396239899916e-05, "loss": 0.2428, "step": 5959 }, { "epoch": 0.10630328541361966, "grad_norm": 0.3475037217140198, "learning_rate": 4.999392814456239e-05, "loss": 0.2759, "step": 5960 }, { "epoch": 0.10632112153533335, "grad_norm": 0.3121028244495392, "learning_rate": 4.999389379324052e-05, "loss": 0.2766, "step": 5961 }, { "epoch": 0.10633895765704705, "grad_norm": 0.19273342192173004, "learning_rate": 4.99938593450337e-05, "loss": 0.1833, "step": 5962 }, { "epoch": 0.10635679377876074, "grad_norm": 0.268231064081192, "learning_rate": 4.999382479994204e-05, "loss": 0.2217, "step": 5963 }, { "epoch": 0.10637462990047444, "grad_norm": 0.29513055086135864, "learning_rate": 4.999379015796567e-05, "loss": 0.2268, "step": 5964 }, { "epoch": 0.10639246602218813, "grad_norm": 0.3670935034751892, "learning_rate": 4.9993755419104746e-05, "loss": 0.2741, "step": 5965 }, { "epoch": 0.10641030214390183, "grad_norm": 0.3474009037017822, "learning_rate": 4.999372058335941e-05, "loss": 0.2813, "step": 5966 }, { "epoch": 0.10642813826561552, "grad_norm": 0.3425934910774231, "learning_rate": 4.999368565072976e-05, "loss": 0.2901, "step": 5967 }, { "epoch": 0.10644597438732922, "grad_norm": 0.29651206731796265, "learning_rate": 4.9993650621215954e-05, "loss": 0.2657, "step": 5968 }, { "epoch": 0.10646381050904291, "grad_norm": 0.3206726014614105, "learning_rate": 4.999361549481813e-05, "loss": 0.2541, "step": 5969 }, { "epoch": 0.10648164663075661, "grad_norm": 0.2874189615249634, "learning_rate": 4.999358027153642e-05, "loss": 0.2617, "step": 5970 }, { "epoch": 0.1064994827524703, "grad_norm": 0.36465299129486084, "learning_rate": 4.999354495137096e-05, "loss": 0.3122, "step": 5971 }, { "epoch": 0.106517318874184, "grad_norm": 0.26799142360687256, "learning_rate": 4.999350953432189e-05, "loss": 0.217, "step": 5972 }, { "epoch": 0.10653515499589769, "grad_norm": 0.2865329682826996, "learning_rate": 4.999347402038934e-05, "loss": 0.2723, "step": 5973 }, { "epoch": 0.10655299111761139, "grad_norm": 0.34854480624198914, "learning_rate": 4.999343840957345e-05, "loss": 0.256, "step": 5974 }, { "epoch": 0.10657082723932508, "grad_norm": 0.39281558990478516, "learning_rate": 4.9993402701874363e-05, "loss": 0.2716, "step": 5975 }, { "epoch": 0.10658866336103878, "grad_norm": 0.3169569671154022, "learning_rate": 4.999336689729222e-05, "loss": 0.2712, "step": 5976 }, { "epoch": 0.10660649948275247, "grad_norm": 0.4224412441253662, "learning_rate": 4.999333099582715e-05, "loss": 0.2605, "step": 5977 }, { "epoch": 0.10662433560446616, "grad_norm": 0.33814242482185364, "learning_rate": 4.99932949974793e-05, "loss": 0.2375, "step": 5978 }, { "epoch": 0.10664217172617986, "grad_norm": 0.27266937494277954, "learning_rate": 4.9993258902248795e-05, "loss": 0.2702, "step": 5979 }, { "epoch": 0.10666000784789355, "grad_norm": 0.35742419958114624, "learning_rate": 4.99932227101358e-05, "loss": 0.2282, "step": 5980 }, { "epoch": 0.10667784396960725, "grad_norm": 0.3397842049598694, "learning_rate": 4.9993186421140434e-05, "loss": 0.241, "step": 5981 }, { "epoch": 0.10669568009132094, "grad_norm": 0.3398186266422272, "learning_rate": 4.9993150035262846e-05, "loss": 0.2372, "step": 5982 }, { "epoch": 0.10671351621303464, "grad_norm": 0.395740270614624, "learning_rate": 4.9993113552503176e-05, "loss": 0.3187, "step": 5983 }, { "epoch": 0.10673135233474833, "grad_norm": 0.36592522263526917, "learning_rate": 4.9993076972861564e-05, "loss": 0.352, "step": 5984 }, { "epoch": 0.10674918845646203, "grad_norm": 0.28423792123794556, "learning_rate": 4.999304029633815e-05, "loss": 0.2569, "step": 5985 }, { "epoch": 0.10676702457817572, "grad_norm": 0.3130965232849121, "learning_rate": 4.999300352293309e-05, "loss": 0.233, "step": 5986 }, { "epoch": 0.10678486069988942, "grad_norm": 0.30348554253578186, "learning_rate": 4.999296665264651e-05, "loss": 0.2592, "step": 5987 }, { "epoch": 0.10680269682160311, "grad_norm": 0.3310333788394928, "learning_rate": 4.999292968547856e-05, "loss": 0.2513, "step": 5988 }, { "epoch": 0.10682053294331681, "grad_norm": 0.3381810784339905, "learning_rate": 4.9992892621429386e-05, "loss": 0.2422, "step": 5989 }, { "epoch": 0.1068383690650305, "grad_norm": 0.33122992515563965, "learning_rate": 4.999285546049912e-05, "loss": 0.2937, "step": 5990 }, { "epoch": 0.1068562051867442, "grad_norm": 0.25606706738471985, "learning_rate": 4.999281820268792e-05, "loss": 0.2294, "step": 5991 }, { "epoch": 0.10687404130845789, "grad_norm": 0.3229588568210602, "learning_rate": 4.999278084799592e-05, "loss": 0.2417, "step": 5992 }, { "epoch": 0.10689187743017159, "grad_norm": 0.3730687201023102, "learning_rate": 4.9992743396423275e-05, "loss": 0.3012, "step": 5993 }, { "epoch": 0.10690971355188528, "grad_norm": 0.361680805683136, "learning_rate": 4.9992705847970125e-05, "loss": 0.2948, "step": 5994 }, { "epoch": 0.10692754967359898, "grad_norm": 0.3156869411468506, "learning_rate": 4.9992668202636606e-05, "loss": 0.2573, "step": 5995 }, { "epoch": 0.10694538579531267, "grad_norm": 0.3901461362838745, "learning_rate": 4.999263046042288e-05, "loss": 0.2043, "step": 5996 }, { "epoch": 0.10696322191702637, "grad_norm": 0.26782023906707764, "learning_rate": 4.999259262132908e-05, "loss": 0.1929, "step": 5997 }, { "epoch": 0.10698105803874006, "grad_norm": 0.32201719284057617, "learning_rate": 4.9992554685355365e-05, "loss": 0.2242, "step": 5998 }, { "epoch": 0.10699889416045374, "grad_norm": 0.31323421001434326, "learning_rate": 4.999251665250187e-05, "loss": 0.2226, "step": 5999 }, { "epoch": 0.10701673028216745, "grad_norm": 0.28290367126464844, "learning_rate": 4.999247852276876e-05, "loss": 0.2689, "step": 6000 }, { "epoch": 0.10701673028216745, "eval_loss": 0.23532024025917053, "eval_runtime": 794.127, "eval_samples_per_second": 1.289, "eval_steps_per_second": 0.215, "step": 6000 }, { "epoch": 0.10703456640388113, "grad_norm": 0.2984603941440582, "learning_rate": 4.999244029615616e-05, "loss": 0.2576, "step": 6001 }, { "epoch": 0.10705240252559484, "grad_norm": 0.22015346586704254, "learning_rate": 4.999240197266424e-05, "loss": 0.1942, "step": 6002 }, { "epoch": 0.10707023864730852, "grad_norm": 0.29506194591522217, "learning_rate": 4.999236355229313e-05, "loss": 0.2088, "step": 6003 }, { "epoch": 0.10708807476902223, "grad_norm": 0.31577104330062866, "learning_rate": 4.999232503504298e-05, "loss": 0.2538, "step": 6004 }, { "epoch": 0.10710591089073591, "grad_norm": 0.3176822364330292, "learning_rate": 4.9992286420913956e-05, "loss": 0.2341, "step": 6005 }, { "epoch": 0.10712374701244962, "grad_norm": 0.2654493749141693, "learning_rate": 4.9992247709906195e-05, "loss": 0.2331, "step": 6006 }, { "epoch": 0.1071415831341633, "grad_norm": 0.3566538691520691, "learning_rate": 4.999220890201985e-05, "loss": 0.2523, "step": 6007 }, { "epoch": 0.107159419255877, "grad_norm": 0.3755199611186981, "learning_rate": 4.999216999725507e-05, "loss": 0.261, "step": 6008 }, { "epoch": 0.1071772553775907, "grad_norm": 0.3827899694442749, "learning_rate": 4.999213099561201e-05, "loss": 0.2879, "step": 6009 }, { "epoch": 0.1071950914993044, "grad_norm": 0.30096253752708435, "learning_rate": 4.999209189709081e-05, "loss": 0.2389, "step": 6010 }, { "epoch": 0.10721292762101808, "grad_norm": 0.30005961656570435, "learning_rate": 4.9992052701691635e-05, "loss": 0.2306, "step": 6011 }, { "epoch": 0.10723076374273179, "grad_norm": 0.267581045627594, "learning_rate": 4.999201340941464e-05, "loss": 0.2216, "step": 6012 }, { "epoch": 0.10724859986444547, "grad_norm": 0.298268586397171, "learning_rate": 4.999197402025996e-05, "loss": 0.2681, "step": 6013 }, { "epoch": 0.10726643598615918, "grad_norm": 0.3477075397968292, "learning_rate": 4.999193453422776e-05, "loss": 0.2782, "step": 6014 }, { "epoch": 0.10728427210787286, "grad_norm": 0.3017905354499817, "learning_rate": 4.999189495131819e-05, "loss": 0.2688, "step": 6015 }, { "epoch": 0.10730210822958657, "grad_norm": 0.3463447093963623, "learning_rate": 4.99918552715314e-05, "loss": 0.292, "step": 6016 }, { "epoch": 0.10731994435130025, "grad_norm": 0.339599609375, "learning_rate": 4.9991815494867547e-05, "loss": 0.2528, "step": 6017 }, { "epoch": 0.10733778047301394, "grad_norm": 0.36239829659461975, "learning_rate": 4.9991775621326785e-05, "loss": 0.2529, "step": 6018 }, { "epoch": 0.10735561659472764, "grad_norm": 0.3503287732601166, "learning_rate": 4.999173565090928e-05, "loss": 0.2379, "step": 6019 }, { "epoch": 0.10737345271644133, "grad_norm": 0.3132491409778595, "learning_rate": 4.9991695583615164e-05, "loss": 0.2603, "step": 6020 }, { "epoch": 0.10739128883815503, "grad_norm": 0.34829726815223694, "learning_rate": 4.9991655419444605e-05, "loss": 0.2233, "step": 6021 }, { "epoch": 0.10740912495986872, "grad_norm": 0.22812891006469727, "learning_rate": 4.999161515839776e-05, "loss": 0.2249, "step": 6022 }, { "epoch": 0.10742696108158242, "grad_norm": 0.3753577470779419, "learning_rate": 4.9991574800474785e-05, "loss": 0.217, "step": 6023 }, { "epoch": 0.10744479720329611, "grad_norm": 0.263096421957016, "learning_rate": 4.999153434567583e-05, "loss": 0.2259, "step": 6024 }, { "epoch": 0.10746263332500981, "grad_norm": 0.28593751788139343, "learning_rate": 4.999149379400105e-05, "loss": 0.2848, "step": 6025 }, { "epoch": 0.1074804694467235, "grad_norm": 0.31411415338516235, "learning_rate": 4.999145314545062e-05, "loss": 0.2286, "step": 6026 }, { "epoch": 0.1074983055684372, "grad_norm": 0.3467569649219513, "learning_rate": 4.999141240002468e-05, "loss": 0.2233, "step": 6027 }, { "epoch": 0.10751614169015089, "grad_norm": 0.26406314969062805, "learning_rate": 4.99913715577234e-05, "loss": 0.2314, "step": 6028 }, { "epoch": 0.10753397781186459, "grad_norm": 0.3606877326965332, "learning_rate": 4.999133061854693e-05, "loss": 0.2605, "step": 6029 }, { "epoch": 0.10755181393357828, "grad_norm": 0.39641469717025757, "learning_rate": 4.9991289582495424e-05, "loss": 0.2972, "step": 6030 }, { "epoch": 0.10756965005529198, "grad_norm": 0.2957558333873749, "learning_rate": 4.9991248449569054e-05, "loss": 0.2513, "step": 6031 }, { "epoch": 0.10758748617700567, "grad_norm": 0.3177221417427063, "learning_rate": 4.999120721976797e-05, "loss": 0.2681, "step": 6032 }, { "epoch": 0.10760532229871937, "grad_norm": 0.3491055369377136, "learning_rate": 4.999116589309234e-05, "loss": 0.2091, "step": 6033 }, { "epoch": 0.10762315842043306, "grad_norm": 0.4598277807235718, "learning_rate": 4.9991124469542315e-05, "loss": 0.2542, "step": 6034 }, { "epoch": 0.10764099454214676, "grad_norm": 0.5211182236671448, "learning_rate": 4.999108294911806e-05, "loss": 0.2541, "step": 6035 }, { "epoch": 0.10765883066386045, "grad_norm": 0.27361178398132324, "learning_rate": 4.999104133181974e-05, "loss": 0.2298, "step": 6036 }, { "epoch": 0.10767666678557415, "grad_norm": 0.333618700504303, "learning_rate": 4.999099961764751e-05, "loss": 0.2798, "step": 6037 }, { "epoch": 0.10769450290728784, "grad_norm": 0.32966190576553345, "learning_rate": 4.999095780660153e-05, "loss": 0.2935, "step": 6038 }, { "epoch": 0.10771233902900153, "grad_norm": 0.3095170259475708, "learning_rate": 4.9990915898681964e-05, "loss": 0.2609, "step": 6039 }, { "epoch": 0.10773017515071523, "grad_norm": 0.4958934783935547, "learning_rate": 4.999087389388899e-05, "loss": 0.2342, "step": 6040 }, { "epoch": 0.10774801127242892, "grad_norm": 0.2796764075756073, "learning_rate": 4.999083179222275e-05, "loss": 0.266, "step": 6041 }, { "epoch": 0.10776584739414262, "grad_norm": 0.4637889564037323, "learning_rate": 4.9990789593683406e-05, "loss": 0.2424, "step": 6042 }, { "epoch": 0.10778368351585631, "grad_norm": 0.3296448588371277, "learning_rate": 4.999074729827114e-05, "loss": 0.2743, "step": 6043 }, { "epoch": 0.10780151963757001, "grad_norm": 0.4007031321525574, "learning_rate": 4.99907049059861e-05, "loss": 0.2988, "step": 6044 }, { "epoch": 0.1078193557592837, "grad_norm": 0.2786741256713867, "learning_rate": 4.999066241682846e-05, "loss": 0.2043, "step": 6045 }, { "epoch": 0.1078371918809974, "grad_norm": 0.3915177285671234, "learning_rate": 4.999061983079838e-05, "loss": 0.2911, "step": 6046 }, { "epoch": 0.10785502800271109, "grad_norm": 0.3464531898498535, "learning_rate": 4.999057714789603e-05, "loss": 0.2519, "step": 6047 }, { "epoch": 0.10787286412442479, "grad_norm": 0.29628807306289673, "learning_rate": 4.9990534368121564e-05, "loss": 0.2497, "step": 6048 }, { "epoch": 0.10789070024613848, "grad_norm": 0.3597511649131775, "learning_rate": 4.9990491491475164e-05, "loss": 0.3015, "step": 6049 }, { "epoch": 0.10790853636785218, "grad_norm": 0.3681495487689972, "learning_rate": 4.999044851795698e-05, "loss": 0.3212, "step": 6050 }, { "epoch": 0.10792637248956587, "grad_norm": 0.3813852369785309, "learning_rate": 4.999040544756719e-05, "loss": 0.3013, "step": 6051 }, { "epoch": 0.10794420861127957, "grad_norm": 0.2874419093132019, "learning_rate": 4.9990362280305955e-05, "loss": 0.2736, "step": 6052 }, { "epoch": 0.10796204473299326, "grad_norm": 0.327522873878479, "learning_rate": 4.9990319016173444e-05, "loss": 0.2763, "step": 6053 }, { "epoch": 0.10797988085470696, "grad_norm": 0.31113898754119873, "learning_rate": 4.999027565516983e-05, "loss": 0.2377, "step": 6054 }, { "epoch": 0.10799771697642065, "grad_norm": 0.32192838191986084, "learning_rate": 4.9990232197295267e-05, "loss": 0.2245, "step": 6055 }, { "epoch": 0.10801555309813435, "grad_norm": 0.3355289399623871, "learning_rate": 4.999018864254994e-05, "loss": 0.2257, "step": 6056 }, { "epoch": 0.10803338921984804, "grad_norm": 0.30110621452331543, "learning_rate": 4.999014499093401e-05, "loss": 0.2119, "step": 6057 }, { "epoch": 0.10805122534156172, "grad_norm": 0.3693029582500458, "learning_rate": 4.999010124244764e-05, "loss": 0.2438, "step": 6058 }, { "epoch": 0.10806906146327543, "grad_norm": 0.27824169397354126, "learning_rate": 4.9990057397091014e-05, "loss": 0.219, "step": 6059 }, { "epoch": 0.10808689758498911, "grad_norm": 0.25682544708251953, "learning_rate": 4.999001345486429e-05, "loss": 0.2107, "step": 6060 }, { "epoch": 0.10810473370670282, "grad_norm": 0.35045406222343445, "learning_rate": 4.998996941576764e-05, "loss": 0.2276, "step": 6061 }, { "epoch": 0.1081225698284165, "grad_norm": 0.2425319403409958, "learning_rate": 4.998992527980125e-05, "loss": 0.1976, "step": 6062 }, { "epoch": 0.1081404059501302, "grad_norm": 0.3771149218082428, "learning_rate": 4.998988104696527e-05, "loss": 0.2801, "step": 6063 }, { "epoch": 0.1081582420718439, "grad_norm": 0.31544533371925354, "learning_rate": 4.9989836717259875e-05, "loss": 0.2673, "step": 6064 }, { "epoch": 0.1081760781935576, "grad_norm": 0.2958180904388428, "learning_rate": 4.998979229068525e-05, "loss": 0.2704, "step": 6065 }, { "epoch": 0.10819391431527128, "grad_norm": 0.318117618560791, "learning_rate": 4.998974776724156e-05, "loss": 0.2595, "step": 6066 }, { "epoch": 0.10821175043698499, "grad_norm": 0.28931739926338196, "learning_rate": 4.9989703146928966e-05, "loss": 0.2173, "step": 6067 }, { "epoch": 0.10822958655869867, "grad_norm": 0.2558846175670624, "learning_rate": 4.998965842974766e-05, "loss": 0.1761, "step": 6068 }, { "epoch": 0.10824742268041238, "grad_norm": 0.3012659549713135, "learning_rate": 4.99896136156978e-05, "loss": 0.2723, "step": 6069 }, { "epoch": 0.10826525880212606, "grad_norm": 0.4154328405857086, "learning_rate": 4.998956870477958e-05, "loss": 0.2006, "step": 6070 }, { "epoch": 0.10828309492383976, "grad_norm": 0.3246530294418335, "learning_rate": 4.9989523696993145e-05, "loss": 0.2338, "step": 6071 }, { "epoch": 0.10830093104555345, "grad_norm": 0.3683794140815735, "learning_rate": 4.998947859233869e-05, "loss": 0.2945, "step": 6072 }, { "epoch": 0.10831876716726715, "grad_norm": 0.35793155431747437, "learning_rate": 4.998943339081639e-05, "loss": 0.2587, "step": 6073 }, { "epoch": 0.10833660328898084, "grad_norm": 0.3311821222305298, "learning_rate": 4.998938809242641e-05, "loss": 0.2491, "step": 6074 }, { "epoch": 0.10835443941069454, "grad_norm": 0.30905959010124207, "learning_rate": 4.998934269716893e-05, "loss": 0.2262, "step": 6075 }, { "epoch": 0.10837227553240823, "grad_norm": 0.2520501911640167, "learning_rate": 4.998929720504413e-05, "loss": 0.2377, "step": 6076 }, { "epoch": 0.10839011165412193, "grad_norm": 0.34372684359550476, "learning_rate": 4.998925161605218e-05, "loss": 0.2369, "step": 6077 }, { "epoch": 0.10840794777583562, "grad_norm": 0.2450200766324997, "learning_rate": 4.998920593019326e-05, "loss": 0.2144, "step": 6078 }, { "epoch": 0.10842578389754931, "grad_norm": 0.31581997871398926, "learning_rate": 4.998916014746755e-05, "loss": 0.28, "step": 6079 }, { "epoch": 0.10844362001926301, "grad_norm": 0.42305368185043335, "learning_rate": 4.9989114267875224e-05, "loss": 0.304, "step": 6080 }, { "epoch": 0.1084614561409767, "grad_norm": 0.3959745168685913, "learning_rate": 4.998906829141646e-05, "loss": 0.3352, "step": 6081 }, { "epoch": 0.1084792922626904, "grad_norm": 0.28059807419776917, "learning_rate": 4.998902221809143e-05, "loss": 0.2479, "step": 6082 }, { "epoch": 0.10849712838440409, "grad_norm": 0.37664470076560974, "learning_rate": 4.998897604790033e-05, "loss": 0.2808, "step": 6083 }, { "epoch": 0.10851496450611779, "grad_norm": 0.33188992738723755, "learning_rate": 4.998892978084332e-05, "loss": 0.265, "step": 6084 }, { "epoch": 0.10853280062783148, "grad_norm": 0.24675920605659485, "learning_rate": 4.9988883416920586e-05, "loss": 0.2402, "step": 6085 }, { "epoch": 0.10855063674954518, "grad_norm": 0.2949751317501068, "learning_rate": 4.998883695613231e-05, "loss": 0.2281, "step": 6086 }, { "epoch": 0.10856847287125887, "grad_norm": 0.30466875433921814, "learning_rate": 4.998879039847868e-05, "loss": 0.2761, "step": 6087 }, { "epoch": 0.10858630899297257, "grad_norm": 0.26031750440597534, "learning_rate": 4.9988743743959855e-05, "loss": 0.269, "step": 6088 }, { "epoch": 0.10860414511468626, "grad_norm": 0.2275160551071167, "learning_rate": 4.998869699257604e-05, "loss": 0.2081, "step": 6089 }, { "epoch": 0.10862198123639996, "grad_norm": 0.3278014361858368, "learning_rate": 4.9988650144327395e-05, "loss": 0.2316, "step": 6090 }, { "epoch": 0.10863981735811365, "grad_norm": 0.27700796723365784, "learning_rate": 4.998860319921411e-05, "loss": 0.2219, "step": 6091 }, { "epoch": 0.10865765347982735, "grad_norm": 0.26910465955734253, "learning_rate": 4.998855615723638e-05, "loss": 0.2289, "step": 6092 }, { "epoch": 0.10867548960154104, "grad_norm": 0.31600767374038696, "learning_rate": 4.9988509018394366e-05, "loss": 0.2177, "step": 6093 }, { "epoch": 0.10869332572325474, "grad_norm": 0.34261491894721985, "learning_rate": 4.998846178268827e-05, "loss": 0.3016, "step": 6094 }, { "epoch": 0.10871116184496843, "grad_norm": 0.3501835763454437, "learning_rate": 4.998841445011826e-05, "loss": 0.23, "step": 6095 }, { "epoch": 0.10872899796668213, "grad_norm": 0.3551133871078491, "learning_rate": 4.998836702068451e-05, "loss": 0.2488, "step": 6096 }, { "epoch": 0.10874683408839582, "grad_norm": 0.3116627633571625, "learning_rate": 4.9988319494387235e-05, "loss": 0.2868, "step": 6097 }, { "epoch": 0.10876467021010951, "grad_norm": 0.2815439701080322, "learning_rate": 4.99882718712266e-05, "loss": 0.2173, "step": 6098 }, { "epoch": 0.10878250633182321, "grad_norm": 0.3250252306461334, "learning_rate": 4.99882241512028e-05, "loss": 0.2407, "step": 6099 }, { "epoch": 0.1088003424535369, "grad_norm": 0.3013324737548828, "learning_rate": 4.9988176334316e-05, "loss": 0.246, "step": 6100 }, { "epoch": 0.1088181785752506, "grad_norm": 0.27790069580078125, "learning_rate": 4.998812842056641e-05, "loss": 0.1996, "step": 6101 }, { "epoch": 0.10883601469696429, "grad_norm": 0.4440501630306244, "learning_rate": 4.998808040995419e-05, "loss": 0.2417, "step": 6102 }, { "epoch": 0.10885385081867799, "grad_norm": 0.2666701376438141, "learning_rate": 4.998803230247955e-05, "loss": 0.1837, "step": 6103 }, { "epoch": 0.10887168694039168, "grad_norm": 0.3506120443344116, "learning_rate": 4.998798409814266e-05, "loss": 0.2753, "step": 6104 }, { "epoch": 0.10888952306210538, "grad_norm": 0.28628668189048767, "learning_rate": 4.998793579694372e-05, "loss": 0.2452, "step": 6105 }, { "epoch": 0.10890735918381907, "grad_norm": 0.24965091049671173, "learning_rate": 4.9987887398882906e-05, "loss": 0.2357, "step": 6106 }, { "epoch": 0.10892519530553277, "grad_norm": 0.2993568778038025, "learning_rate": 4.9987838903960405e-05, "loss": 0.2715, "step": 6107 }, { "epoch": 0.10894303142724646, "grad_norm": 0.2944696247577667, "learning_rate": 4.9987790312176414e-05, "loss": 0.2335, "step": 6108 }, { "epoch": 0.10896086754896016, "grad_norm": 0.29676422476768494, "learning_rate": 4.9987741623531115e-05, "loss": 0.2433, "step": 6109 }, { "epoch": 0.10897870367067385, "grad_norm": 0.311562716960907, "learning_rate": 4.9987692838024705e-05, "loss": 0.2176, "step": 6110 }, { "epoch": 0.10899653979238755, "grad_norm": 0.23696637153625488, "learning_rate": 4.998764395565737e-05, "loss": 0.2096, "step": 6111 }, { "epoch": 0.10901437591410124, "grad_norm": 0.29208680987358093, "learning_rate": 4.9987594976429284e-05, "loss": 0.2599, "step": 6112 }, { "epoch": 0.10903221203581494, "grad_norm": 0.3510104715824127, "learning_rate": 4.998754590034066e-05, "loss": 0.2258, "step": 6113 }, { "epoch": 0.10905004815752863, "grad_norm": 0.2819032669067383, "learning_rate": 4.998749672739167e-05, "loss": 0.2605, "step": 6114 }, { "epoch": 0.10906788427924233, "grad_norm": 0.2493922859430313, "learning_rate": 4.9987447457582516e-05, "loss": 0.2415, "step": 6115 }, { "epoch": 0.10908572040095602, "grad_norm": 0.29971078038215637, "learning_rate": 4.9987398090913384e-05, "loss": 0.2315, "step": 6116 }, { "epoch": 0.10910355652266972, "grad_norm": 0.3297044634819031, "learning_rate": 4.998734862738447e-05, "loss": 0.2432, "step": 6117 }, { "epoch": 0.1091213926443834, "grad_norm": 0.31636524200439453, "learning_rate": 4.998729906699596e-05, "loss": 0.2606, "step": 6118 }, { "epoch": 0.1091392287660971, "grad_norm": 0.8150376081466675, "learning_rate": 4.998724940974805e-05, "loss": 0.2372, "step": 6119 }, { "epoch": 0.1091570648878108, "grad_norm": 0.2824878692626953, "learning_rate": 4.9987199655640925e-05, "loss": 0.2242, "step": 6120 }, { "epoch": 0.10917490100952448, "grad_norm": 0.3265892267227173, "learning_rate": 4.9987149804674797e-05, "loss": 0.2565, "step": 6121 }, { "epoch": 0.10919273713123819, "grad_norm": 0.3480130732059479, "learning_rate": 4.9987099856849834e-05, "loss": 0.23, "step": 6122 }, { "epoch": 0.10921057325295187, "grad_norm": 0.3382149040699005, "learning_rate": 4.998704981216624e-05, "loss": 0.2605, "step": 6123 }, { "epoch": 0.10922840937466557, "grad_norm": 0.24673400819301605, "learning_rate": 4.9986999670624226e-05, "loss": 0.222, "step": 6124 }, { "epoch": 0.10924624549637926, "grad_norm": 0.30811798572540283, "learning_rate": 4.9986949432223953e-05, "loss": 0.2254, "step": 6125 }, { "epoch": 0.10926408161809296, "grad_norm": 0.3038312792778015, "learning_rate": 4.9986899096965646e-05, "loss": 0.2588, "step": 6126 }, { "epoch": 0.10928191773980665, "grad_norm": 0.2873574197292328, "learning_rate": 4.998684866484948e-05, "loss": 0.2267, "step": 6127 }, { "epoch": 0.10929975386152035, "grad_norm": 0.4430965781211853, "learning_rate": 4.998679813587567e-05, "loss": 0.2601, "step": 6128 }, { "epoch": 0.10931758998323404, "grad_norm": 0.280263215303421, "learning_rate": 4.998674751004439e-05, "loss": 0.2569, "step": 6129 }, { "epoch": 0.10933542610494774, "grad_norm": 0.31165051460266113, "learning_rate": 4.998669678735585e-05, "loss": 0.2697, "step": 6130 }, { "epoch": 0.10935326222666143, "grad_norm": 0.2865162193775177, "learning_rate": 4.998664596781023e-05, "loss": 0.244, "step": 6131 }, { "epoch": 0.10937109834837513, "grad_norm": 0.2541183829307556, "learning_rate": 4.998659505140776e-05, "loss": 0.2101, "step": 6132 }, { "epoch": 0.10938893447008882, "grad_norm": 0.2579323947429657, "learning_rate": 4.998654403814861e-05, "loss": 0.2211, "step": 6133 }, { "epoch": 0.10940677059180252, "grad_norm": 0.410474956035614, "learning_rate": 4.998649292803298e-05, "loss": 0.2, "step": 6134 }, { "epoch": 0.10942460671351621, "grad_norm": 0.2245824933052063, "learning_rate": 4.998644172106108e-05, "loss": 0.2042, "step": 6135 }, { "epoch": 0.10944244283522991, "grad_norm": 0.2705850899219513, "learning_rate": 4.99863904172331e-05, "loss": 0.2172, "step": 6136 }, { "epoch": 0.1094602789569436, "grad_norm": 0.3629564940929413, "learning_rate": 4.998633901654924e-05, "loss": 0.2701, "step": 6137 }, { "epoch": 0.10947811507865729, "grad_norm": 0.2784716784954071, "learning_rate": 4.99862875190097e-05, "loss": 0.23, "step": 6138 }, { "epoch": 0.10949595120037099, "grad_norm": 0.28572648763656616, "learning_rate": 4.998623592461468e-05, "loss": 0.2295, "step": 6139 }, { "epoch": 0.10951378732208468, "grad_norm": 0.31702741980552673, "learning_rate": 4.998618423336439e-05, "loss": 0.2704, "step": 6140 }, { "epoch": 0.10953162344379838, "grad_norm": 0.27289608120918274, "learning_rate": 4.9986132445259e-05, "loss": 0.1875, "step": 6141 }, { "epoch": 0.10954945956551207, "grad_norm": 0.25540637969970703, "learning_rate": 4.998608056029874e-05, "loss": 0.2339, "step": 6142 }, { "epoch": 0.10956729568722577, "grad_norm": 0.31396380066871643, "learning_rate": 4.998602857848381e-05, "loss": 0.2224, "step": 6143 }, { "epoch": 0.10958513180893946, "grad_norm": 0.28415483236312866, "learning_rate": 4.998597649981439e-05, "loss": 0.2423, "step": 6144 }, { "epoch": 0.10960296793065316, "grad_norm": 0.41271770000457764, "learning_rate": 4.99859243242907e-05, "loss": 0.2975, "step": 6145 }, { "epoch": 0.10962080405236685, "grad_norm": 0.2675800323486328, "learning_rate": 4.9985872051912944e-05, "loss": 0.2519, "step": 6146 }, { "epoch": 0.10963864017408055, "grad_norm": 0.3569709062576294, "learning_rate": 4.998581968268131e-05, "loss": 0.2755, "step": 6147 }, { "epoch": 0.10965647629579424, "grad_norm": 0.25617632269859314, "learning_rate": 4.9985767216596016e-05, "loss": 0.2191, "step": 6148 }, { "epoch": 0.10967431241750794, "grad_norm": 0.2642269730567932, "learning_rate": 4.998571465365725e-05, "loss": 0.2284, "step": 6149 }, { "epoch": 0.10969214853922163, "grad_norm": 0.2254253327846527, "learning_rate": 4.998566199386523e-05, "loss": 0.18, "step": 6150 }, { "epoch": 0.10970998466093533, "grad_norm": 0.22765931487083435, "learning_rate": 4.998560923722015e-05, "loss": 0.2193, "step": 6151 }, { "epoch": 0.10972782078264902, "grad_norm": 0.3216729164123535, "learning_rate": 4.998555638372222e-05, "loss": 0.2509, "step": 6152 }, { "epoch": 0.10974565690436272, "grad_norm": 0.2652188241481781, "learning_rate": 4.998550343337165e-05, "loss": 0.2612, "step": 6153 }, { "epoch": 0.10976349302607641, "grad_norm": 0.22096742689609528, "learning_rate": 4.998545038616863e-05, "loss": 0.2263, "step": 6154 }, { "epoch": 0.10978132914779011, "grad_norm": 0.3716314435005188, "learning_rate": 4.998539724211338e-05, "loss": 0.3624, "step": 6155 }, { "epoch": 0.1097991652695038, "grad_norm": 0.328799843788147, "learning_rate": 4.9985344001206105e-05, "loss": 0.2156, "step": 6156 }, { "epoch": 0.1098170013912175, "grad_norm": 0.3530081510543823, "learning_rate": 4.9985290663446996e-05, "loss": 0.221, "step": 6157 }, { "epoch": 0.10983483751293119, "grad_norm": 0.2691895365715027, "learning_rate": 4.9985237228836276e-05, "loss": 0.223, "step": 6158 }, { "epoch": 0.10985267363464488, "grad_norm": 0.3150103688240051, "learning_rate": 4.998518369737415e-05, "loss": 0.2054, "step": 6159 }, { "epoch": 0.10987050975635858, "grad_norm": 0.25021952390670776, "learning_rate": 4.998513006906082e-05, "loss": 0.2369, "step": 6160 }, { "epoch": 0.10988834587807227, "grad_norm": 0.32279905676841736, "learning_rate": 4.998507634389649e-05, "loss": 0.2603, "step": 6161 }, { "epoch": 0.10990618199978597, "grad_norm": 0.3455301821231842, "learning_rate": 4.998502252188138e-05, "loss": 0.2754, "step": 6162 }, { "epoch": 0.10992401812149966, "grad_norm": 0.256946325302124, "learning_rate": 4.9984968603015694e-05, "loss": 0.2082, "step": 6163 }, { "epoch": 0.10994185424321336, "grad_norm": 0.3117446005344391, "learning_rate": 4.998491458729964e-05, "loss": 0.2522, "step": 6164 }, { "epoch": 0.10995969036492705, "grad_norm": 0.2743896245956421, "learning_rate": 4.998486047473343e-05, "loss": 0.2451, "step": 6165 }, { "epoch": 0.10997752648664075, "grad_norm": 0.2710326910018921, "learning_rate": 4.998480626531726e-05, "loss": 0.2303, "step": 6166 }, { "epoch": 0.10999536260835444, "grad_norm": 0.26594817638397217, "learning_rate": 4.998475195905137e-05, "loss": 0.2119, "step": 6167 }, { "epoch": 0.11001319873006814, "grad_norm": 0.3471861779689789, "learning_rate": 4.998469755593594e-05, "loss": 0.2587, "step": 6168 }, { "epoch": 0.11003103485178183, "grad_norm": 0.2982967495918274, "learning_rate": 4.998464305597119e-05, "loss": 0.1988, "step": 6169 }, { "epoch": 0.11004887097349553, "grad_norm": 0.4203164577484131, "learning_rate": 4.9984588459157346e-05, "loss": 0.2208, "step": 6170 }, { "epoch": 0.11006670709520922, "grad_norm": 0.4424651861190796, "learning_rate": 4.9984533765494594e-05, "loss": 0.2392, "step": 6171 }, { "epoch": 0.11008454321692292, "grad_norm": 0.4270980954170227, "learning_rate": 4.9984478974983165e-05, "loss": 0.2957, "step": 6172 }, { "epoch": 0.1101023793386366, "grad_norm": 0.24900686740875244, "learning_rate": 4.998442408762327e-05, "loss": 0.2188, "step": 6173 }, { "epoch": 0.11012021546035031, "grad_norm": 0.36748048663139343, "learning_rate": 4.998436910341512e-05, "loss": 0.2705, "step": 6174 }, { "epoch": 0.110138051582064, "grad_norm": 0.3921428322792053, "learning_rate": 4.998431402235891e-05, "loss": 0.2952, "step": 6175 }, { "epoch": 0.1101558877037777, "grad_norm": 0.29848167300224304, "learning_rate": 4.998425884445489e-05, "loss": 0.248, "step": 6176 }, { "epoch": 0.11017372382549138, "grad_norm": 0.3298819065093994, "learning_rate": 4.9984203569703244e-05, "loss": 0.2825, "step": 6177 }, { "epoch": 0.11019155994720509, "grad_norm": 0.3297257721424103, "learning_rate": 4.9984148198104194e-05, "loss": 0.2818, "step": 6178 }, { "epoch": 0.11020939606891877, "grad_norm": 0.29978567361831665, "learning_rate": 4.998409272965796e-05, "loss": 0.2187, "step": 6179 }, { "epoch": 0.11022723219063246, "grad_norm": 0.2707396149635315, "learning_rate": 4.998403716436475e-05, "loss": 0.2495, "step": 6180 }, { "epoch": 0.11024506831234616, "grad_norm": 0.4788021743297577, "learning_rate": 4.9983981502224783e-05, "loss": 0.267, "step": 6181 }, { "epoch": 0.11026290443405985, "grad_norm": 0.2787097692489624, "learning_rate": 4.998392574323827e-05, "loss": 0.2364, "step": 6182 }, { "epoch": 0.11028074055577355, "grad_norm": 0.3392464518547058, "learning_rate": 4.998386988740544e-05, "loss": 0.3185, "step": 6183 }, { "epoch": 0.11029857667748724, "grad_norm": 0.2750253677368164, "learning_rate": 4.9983813934726495e-05, "loss": 0.2046, "step": 6184 }, { "epoch": 0.11031641279920094, "grad_norm": 0.3816703259944916, "learning_rate": 4.9983757885201664e-05, "loss": 0.2903, "step": 6185 }, { "epoch": 0.11033424892091463, "grad_norm": 0.598724365234375, "learning_rate": 4.998370173883116e-05, "loss": 0.3125, "step": 6186 }, { "epoch": 0.11035208504262833, "grad_norm": 0.25722193717956543, "learning_rate": 4.9983645495615197e-05, "loss": 0.2287, "step": 6187 }, { "epoch": 0.11036992116434202, "grad_norm": 0.3098689317703247, "learning_rate": 4.998358915555399e-05, "loss": 0.1934, "step": 6188 }, { "epoch": 0.11038775728605572, "grad_norm": 0.29725563526153564, "learning_rate": 4.9983532718647765e-05, "loss": 0.2689, "step": 6189 }, { "epoch": 0.11040559340776941, "grad_norm": 0.29313573241233826, "learning_rate": 4.9983476184896736e-05, "loss": 0.2638, "step": 6190 }, { "epoch": 0.11042342952948311, "grad_norm": 0.26852619647979736, "learning_rate": 4.998341955430113e-05, "loss": 0.2294, "step": 6191 }, { "epoch": 0.1104412656511968, "grad_norm": 0.23123401403427124, "learning_rate": 4.998336282686116e-05, "loss": 0.2208, "step": 6192 }, { "epoch": 0.1104591017729105, "grad_norm": 0.2545979917049408, "learning_rate": 4.998330600257704e-05, "loss": 0.2482, "step": 6193 }, { "epoch": 0.11047693789462419, "grad_norm": 0.31205466389656067, "learning_rate": 4.9983249081449e-05, "loss": 0.2341, "step": 6194 }, { "epoch": 0.1104947740163379, "grad_norm": 0.3120054006576538, "learning_rate": 4.998319206347726e-05, "loss": 0.2513, "step": 6195 }, { "epoch": 0.11051261013805158, "grad_norm": 0.32524827122688293, "learning_rate": 4.998313494866204e-05, "loss": 0.2498, "step": 6196 }, { "epoch": 0.11053044625976528, "grad_norm": 0.29201456904411316, "learning_rate": 4.998307773700356e-05, "loss": 0.2354, "step": 6197 }, { "epoch": 0.11054828238147897, "grad_norm": 0.2925111651420593, "learning_rate": 4.9983020428502035e-05, "loss": 0.2252, "step": 6198 }, { "epoch": 0.11056611850319266, "grad_norm": 0.2674996852874756, "learning_rate": 4.99829630231577e-05, "loss": 0.2106, "step": 6199 }, { "epoch": 0.11058395462490636, "grad_norm": 0.4428156018257141, "learning_rate": 4.998290552097077e-05, "loss": 0.3163, "step": 6200 }, { "epoch": 0.11060179074662005, "grad_norm": 0.3531860411167145, "learning_rate": 4.9982847921941465e-05, "loss": 0.1911, "step": 6201 }, { "epoch": 0.11061962686833375, "grad_norm": 0.4287932813167572, "learning_rate": 4.998279022607002e-05, "loss": 0.2908, "step": 6202 }, { "epoch": 0.11063746299004744, "grad_norm": 0.25531309843063354, "learning_rate": 4.998273243335665e-05, "loss": 0.249, "step": 6203 }, { "epoch": 0.11065529911176114, "grad_norm": 0.2637823224067688, "learning_rate": 4.998267454380158e-05, "loss": 0.228, "step": 6204 }, { "epoch": 0.11067313523347483, "grad_norm": 0.23722325265407562, "learning_rate": 4.998261655740503e-05, "loss": 0.2065, "step": 6205 }, { "epoch": 0.11069097135518853, "grad_norm": 0.27698805928230286, "learning_rate": 4.998255847416724e-05, "loss": 0.2218, "step": 6206 }, { "epoch": 0.11070880747690222, "grad_norm": 0.22974862158298492, "learning_rate": 4.998250029408841e-05, "loss": 0.2292, "step": 6207 }, { "epoch": 0.11072664359861592, "grad_norm": 0.2588344216346741, "learning_rate": 4.998244201716879e-05, "loss": 0.244, "step": 6208 }, { "epoch": 0.11074447972032961, "grad_norm": 0.26613056659698486, "learning_rate": 4.998238364340859e-05, "loss": 0.2387, "step": 6209 }, { "epoch": 0.11076231584204331, "grad_norm": 0.2978186011314392, "learning_rate": 4.998232517280805e-05, "loss": 0.2729, "step": 6210 }, { "epoch": 0.110780151963757, "grad_norm": 0.3010079264640808, "learning_rate": 4.9982266605367384e-05, "loss": 0.2557, "step": 6211 }, { "epoch": 0.1107979880854707, "grad_norm": 0.22793611884117126, "learning_rate": 4.9982207941086825e-05, "loss": 0.2086, "step": 6212 }, { "epoch": 0.11081582420718439, "grad_norm": 0.3061627745628357, "learning_rate": 4.9982149179966594e-05, "loss": 0.2224, "step": 6213 }, { "epoch": 0.11083366032889809, "grad_norm": 0.3212285041809082, "learning_rate": 4.998209032200694e-05, "loss": 0.2814, "step": 6214 }, { "epoch": 0.11085149645061178, "grad_norm": 0.351762592792511, "learning_rate": 4.998203136720806e-05, "loss": 0.2228, "step": 6215 }, { "epoch": 0.11086933257232548, "grad_norm": 0.227890282869339, "learning_rate": 4.99819723155702e-05, "loss": 0.2452, "step": 6216 }, { "epoch": 0.11088716869403917, "grad_norm": 0.23476283252239227, "learning_rate": 4.99819131670936e-05, "loss": 0.2362, "step": 6217 }, { "epoch": 0.11090500481575287, "grad_norm": 0.25866883993148804, "learning_rate": 4.998185392177846e-05, "loss": 0.2064, "step": 6218 }, { "epoch": 0.11092284093746656, "grad_norm": 0.24904648959636688, "learning_rate": 4.998179457962503e-05, "loss": 0.2447, "step": 6219 }, { "epoch": 0.11094067705918025, "grad_norm": 0.22712509334087372, "learning_rate": 4.9981735140633536e-05, "loss": 0.2186, "step": 6220 }, { "epoch": 0.11095851318089395, "grad_norm": 0.2733319401741028, "learning_rate": 4.998167560480421e-05, "loss": 0.248, "step": 6221 }, { "epoch": 0.11097634930260764, "grad_norm": 0.341614693403244, "learning_rate": 4.9981615972137285e-05, "loss": 0.2572, "step": 6222 }, { "epoch": 0.11099418542432134, "grad_norm": 0.28420737385749817, "learning_rate": 4.998155624263298e-05, "loss": 0.2251, "step": 6223 }, { "epoch": 0.11101202154603503, "grad_norm": 0.24382448196411133, "learning_rate": 4.998149641629154e-05, "loss": 0.2384, "step": 6224 }, { "epoch": 0.11102985766774873, "grad_norm": 0.29594287276268005, "learning_rate": 4.998143649311319e-05, "loss": 0.1826, "step": 6225 }, { "epoch": 0.11104769378946241, "grad_norm": 0.27644747495651245, "learning_rate": 4.998137647309816e-05, "loss": 0.253, "step": 6226 }, { "epoch": 0.11106552991117612, "grad_norm": 0.36379367113113403, "learning_rate": 4.9981316356246695e-05, "loss": 0.2897, "step": 6227 }, { "epoch": 0.1110833660328898, "grad_norm": 0.40374189615249634, "learning_rate": 4.9981256142559015e-05, "loss": 0.254, "step": 6228 }, { "epoch": 0.1111012021546035, "grad_norm": 0.3151934742927551, "learning_rate": 4.9981195832035356e-05, "loss": 0.2596, "step": 6229 }, { "epoch": 0.1111190382763172, "grad_norm": 0.29207053780555725, "learning_rate": 4.998113542467596e-05, "loss": 0.2347, "step": 6230 }, { "epoch": 0.1111368743980309, "grad_norm": 0.2925536036491394, "learning_rate": 4.998107492048105e-05, "loss": 0.2415, "step": 6231 }, { "epoch": 0.11115471051974458, "grad_norm": 0.39146339893341064, "learning_rate": 4.998101431945086e-05, "loss": 0.2845, "step": 6232 }, { "epoch": 0.11117254664145829, "grad_norm": 0.2833290994167328, "learning_rate": 4.9980953621585634e-05, "loss": 0.1937, "step": 6233 }, { "epoch": 0.11119038276317197, "grad_norm": 0.37179383635520935, "learning_rate": 4.9980892826885604e-05, "loss": 0.284, "step": 6234 }, { "epoch": 0.11120821888488568, "grad_norm": 0.31555330753326416, "learning_rate": 4.998083193535101e-05, "loss": 0.2576, "step": 6235 }, { "epoch": 0.11122605500659936, "grad_norm": 0.2974212169647217, "learning_rate": 4.998077094698208e-05, "loss": 0.2481, "step": 6236 }, { "epoch": 0.11124389112831307, "grad_norm": 0.32543492317199707, "learning_rate": 4.998070986177906e-05, "loss": 0.2611, "step": 6237 }, { "epoch": 0.11126172725002675, "grad_norm": 0.24908842146396637, "learning_rate": 4.998064867974217e-05, "loss": 0.2637, "step": 6238 }, { "epoch": 0.11127956337174044, "grad_norm": 0.30848029255867004, "learning_rate": 4.998058740087166e-05, "loss": 0.275, "step": 6239 }, { "epoch": 0.11129739949345414, "grad_norm": 0.24796777963638306, "learning_rate": 4.998052602516777e-05, "loss": 0.2479, "step": 6240 }, { "epoch": 0.11131523561516783, "grad_norm": 0.24908721446990967, "learning_rate": 4.998046455263074e-05, "loss": 0.2378, "step": 6241 }, { "epoch": 0.11133307173688153, "grad_norm": 0.25761616230010986, "learning_rate": 4.998040298326079e-05, "loss": 0.2202, "step": 6242 }, { "epoch": 0.11135090785859522, "grad_norm": 0.3541248142719269, "learning_rate": 4.9980341317058166e-05, "loss": 0.2754, "step": 6243 }, { "epoch": 0.11136874398030892, "grad_norm": 0.2676992118358612, "learning_rate": 4.998027955402312e-05, "loss": 0.2242, "step": 6244 }, { "epoch": 0.11138658010202261, "grad_norm": 0.233119934797287, "learning_rate": 4.998021769415587e-05, "loss": 0.2297, "step": 6245 }, { "epoch": 0.11140441622373631, "grad_norm": 0.26920872926712036, "learning_rate": 4.998015573745668e-05, "loss": 0.2243, "step": 6246 }, { "epoch": 0.11142225234545, "grad_norm": 0.2761625647544861, "learning_rate": 4.998009368392578e-05, "loss": 0.2291, "step": 6247 }, { "epoch": 0.1114400884671637, "grad_norm": 0.3195796310901642, "learning_rate": 4.998003153356341e-05, "loss": 0.2158, "step": 6248 }, { "epoch": 0.11145792458887739, "grad_norm": 0.43256333470344543, "learning_rate": 4.99799692863698e-05, "loss": 0.2463, "step": 6249 }, { "epoch": 0.11147576071059109, "grad_norm": 0.4290931820869446, "learning_rate": 4.997990694234521e-05, "loss": 0.186, "step": 6250 }, { "epoch": 0.11149359683230478, "grad_norm": 0.3229944705963135, "learning_rate": 4.997984450148987e-05, "loss": 0.2246, "step": 6251 }, { "epoch": 0.11151143295401848, "grad_norm": 0.2988409399986267, "learning_rate": 4.997978196380402e-05, "loss": 0.2592, "step": 6252 }, { "epoch": 0.11152926907573217, "grad_norm": 0.3372195065021515, "learning_rate": 4.997971932928792e-05, "loss": 0.2119, "step": 6253 }, { "epoch": 0.11154710519744587, "grad_norm": 0.24260346591472626, "learning_rate": 4.9979656597941786e-05, "loss": 0.2259, "step": 6254 }, { "epoch": 0.11156494131915956, "grad_norm": 0.2870357036590576, "learning_rate": 4.997959376976589e-05, "loss": 0.2433, "step": 6255 }, { "epoch": 0.11158277744087326, "grad_norm": 0.32909926772117615, "learning_rate": 4.9979530844760446e-05, "loss": 0.2515, "step": 6256 }, { "epoch": 0.11160061356258695, "grad_norm": 0.35816195607185364, "learning_rate": 4.997946782292572e-05, "loss": 0.2196, "step": 6257 }, { "epoch": 0.11161844968430065, "grad_norm": 0.4002450704574585, "learning_rate": 4.997940470426195e-05, "loss": 0.212, "step": 6258 }, { "epoch": 0.11163628580601434, "grad_norm": 0.37571850419044495, "learning_rate": 4.9979341488769374e-05, "loss": 0.2869, "step": 6259 }, { "epoch": 0.11165412192772803, "grad_norm": 0.33898311853408813, "learning_rate": 4.997927817644825e-05, "loss": 0.2697, "step": 6260 }, { "epoch": 0.11167195804944173, "grad_norm": 0.29540374875068665, "learning_rate": 4.997921476729881e-05, "loss": 0.2543, "step": 6261 }, { "epoch": 0.11168979417115542, "grad_norm": 0.2542315125465393, "learning_rate": 4.997915126132131e-05, "loss": 0.2412, "step": 6262 }, { "epoch": 0.11170763029286912, "grad_norm": 0.29263821244239807, "learning_rate": 4.997908765851599e-05, "loss": 0.2449, "step": 6263 }, { "epoch": 0.11172546641458281, "grad_norm": 0.3407067358493805, "learning_rate": 4.99790239588831e-05, "loss": 0.253, "step": 6264 }, { "epoch": 0.11174330253629651, "grad_norm": 0.3167259693145752, "learning_rate": 4.997896016242289e-05, "loss": 0.2535, "step": 6265 }, { "epoch": 0.1117611386580102, "grad_norm": 0.21316608786582947, "learning_rate": 4.99788962691356e-05, "loss": 0.2159, "step": 6266 }, { "epoch": 0.1117789747797239, "grad_norm": 0.23873494565486908, "learning_rate": 4.997883227902147e-05, "loss": 0.2027, "step": 6267 }, { "epoch": 0.11179681090143759, "grad_norm": 0.2827610373497009, "learning_rate": 4.997876819208077e-05, "loss": 0.2453, "step": 6268 }, { "epoch": 0.11181464702315129, "grad_norm": 0.410043329000473, "learning_rate": 4.997870400831374e-05, "loss": 0.2306, "step": 6269 }, { "epoch": 0.11183248314486498, "grad_norm": 0.29575398564338684, "learning_rate": 4.997863972772062e-05, "loss": 0.251, "step": 6270 }, { "epoch": 0.11185031926657868, "grad_norm": 0.3259303569793701, "learning_rate": 4.9978575350301664e-05, "loss": 0.2596, "step": 6271 }, { "epoch": 0.11186815538829237, "grad_norm": 0.3475322425365448, "learning_rate": 4.9978510876057124e-05, "loss": 0.2582, "step": 6272 }, { "epoch": 0.11188599151000607, "grad_norm": 0.2831577956676483, "learning_rate": 4.9978446304987245e-05, "loss": 0.254, "step": 6273 }, { "epoch": 0.11190382763171976, "grad_norm": 0.3299480080604553, "learning_rate": 4.997838163709229e-05, "loss": 0.2024, "step": 6274 }, { "epoch": 0.11192166375343346, "grad_norm": 0.30874449014663696, "learning_rate": 4.99783168723725e-05, "loss": 0.226, "step": 6275 }, { "epoch": 0.11193949987514715, "grad_norm": 0.339728444814682, "learning_rate": 4.997825201082812e-05, "loss": 0.2566, "step": 6276 }, { "epoch": 0.11195733599686085, "grad_norm": 0.2576361894607544, "learning_rate": 4.9978187052459406e-05, "loss": 0.1971, "step": 6277 }, { "epoch": 0.11197517211857454, "grad_norm": 0.26656538248062134, "learning_rate": 4.997812199726662e-05, "loss": 0.187, "step": 6278 }, { "epoch": 0.11199300824028822, "grad_norm": 0.311737596988678, "learning_rate": 4.997805684525e-05, "loss": 0.2497, "step": 6279 }, { "epoch": 0.11201084436200193, "grad_norm": 0.23330265283584595, "learning_rate": 4.997799159640981e-05, "loss": 0.2234, "step": 6280 }, { "epoch": 0.11202868048371561, "grad_norm": 0.2843758463859558, "learning_rate": 4.997792625074629e-05, "loss": 0.2138, "step": 6281 }, { "epoch": 0.11204651660542932, "grad_norm": 0.3988863527774811, "learning_rate": 4.99778608082597e-05, "loss": 0.291, "step": 6282 }, { "epoch": 0.112064352727143, "grad_norm": 0.23501631617546082, "learning_rate": 4.99777952689503e-05, "loss": 0.2299, "step": 6283 }, { "epoch": 0.1120821888488567, "grad_norm": 0.25568756461143494, "learning_rate": 4.9977729632818335e-05, "loss": 0.1858, "step": 6284 }, { "epoch": 0.1121000249705704, "grad_norm": 0.27769866585731506, "learning_rate": 4.9977663899864066e-05, "loss": 0.2274, "step": 6285 }, { "epoch": 0.1121178610922841, "grad_norm": 0.28924545645713806, "learning_rate": 4.997759807008775e-05, "loss": 0.2104, "step": 6286 }, { "epoch": 0.11213569721399778, "grad_norm": 0.33430570363998413, "learning_rate": 4.997753214348963e-05, "loss": 0.2393, "step": 6287 }, { "epoch": 0.11215353333571149, "grad_norm": 0.29556894302368164, "learning_rate": 4.997746612006997e-05, "loss": 0.2698, "step": 6288 }, { "epoch": 0.11217136945742517, "grad_norm": 0.3289881646633148, "learning_rate": 4.997739999982902e-05, "loss": 0.2697, "step": 6289 }, { "epoch": 0.11218920557913888, "grad_norm": 0.4218645691871643, "learning_rate": 4.997733378276705e-05, "loss": 0.2174, "step": 6290 }, { "epoch": 0.11220704170085256, "grad_norm": 0.31306329369544983, "learning_rate": 4.99772674688843e-05, "loss": 0.2056, "step": 6291 }, { "epoch": 0.11222487782256627, "grad_norm": 0.27947649359703064, "learning_rate": 4.9977201058181036e-05, "loss": 0.2352, "step": 6292 }, { "epoch": 0.11224271394427995, "grad_norm": 0.30447328090667725, "learning_rate": 4.9977134550657514e-05, "loss": 0.2313, "step": 6293 }, { "epoch": 0.11226055006599366, "grad_norm": 0.3301723301410675, "learning_rate": 4.9977067946313993e-05, "loss": 0.2706, "step": 6294 }, { "epoch": 0.11227838618770734, "grad_norm": 0.3076240122318268, "learning_rate": 4.997700124515073e-05, "loss": 0.2819, "step": 6295 }, { "epoch": 0.11229622230942105, "grad_norm": 0.3603857457637787, "learning_rate": 4.997693444716798e-05, "loss": 0.2595, "step": 6296 }, { "epoch": 0.11231405843113473, "grad_norm": 0.27384883165359497, "learning_rate": 4.997686755236601e-05, "loss": 0.2458, "step": 6297 }, { "epoch": 0.11233189455284844, "grad_norm": 0.2910906672477722, "learning_rate": 4.997680056074507e-05, "loss": 0.2109, "step": 6298 }, { "epoch": 0.11234973067456212, "grad_norm": 0.27966779470443726, "learning_rate": 4.9976733472305425e-05, "loss": 0.2459, "step": 6299 }, { "epoch": 0.11236756679627581, "grad_norm": 0.328244686126709, "learning_rate": 4.9976666287047335e-05, "loss": 0.2739, "step": 6300 }, { "epoch": 0.11238540291798951, "grad_norm": 0.3239681124687195, "learning_rate": 4.997659900497106e-05, "loss": 0.2886, "step": 6301 }, { "epoch": 0.1124032390397032, "grad_norm": 0.2994845509529114, "learning_rate": 4.9976531626076864e-05, "loss": 0.2705, "step": 6302 }, { "epoch": 0.1124210751614169, "grad_norm": 0.25063931941986084, "learning_rate": 4.9976464150365e-05, "loss": 0.2299, "step": 6303 }, { "epoch": 0.11243891128313059, "grad_norm": 0.22160574793815613, "learning_rate": 4.997639657783574e-05, "loss": 0.2041, "step": 6304 }, { "epoch": 0.11245674740484429, "grad_norm": 0.2589114010334015, "learning_rate": 4.9976328908489336e-05, "loss": 0.2013, "step": 6305 }, { "epoch": 0.11247458352655798, "grad_norm": 0.23705771565437317, "learning_rate": 4.9976261142326056e-05, "loss": 0.2158, "step": 6306 }, { "epoch": 0.11249241964827168, "grad_norm": 0.2486175298690796, "learning_rate": 4.997619327934616e-05, "loss": 0.2093, "step": 6307 }, { "epoch": 0.11251025576998537, "grad_norm": 0.32091838121414185, "learning_rate": 4.997612531954991e-05, "loss": 0.2503, "step": 6308 }, { "epoch": 0.11252809189169907, "grad_norm": 0.2645072042942047, "learning_rate": 4.997605726293757e-05, "loss": 0.209, "step": 6309 }, { "epoch": 0.11254592801341276, "grad_norm": 0.20899713039398193, "learning_rate": 4.9975989109509414e-05, "loss": 0.2095, "step": 6310 }, { "epoch": 0.11256376413512646, "grad_norm": 0.315972238779068, "learning_rate": 4.9975920859265694e-05, "loss": 0.2751, "step": 6311 }, { "epoch": 0.11258160025684015, "grad_norm": 0.3038237988948822, "learning_rate": 4.997585251220668e-05, "loss": 0.2485, "step": 6312 }, { "epoch": 0.11259943637855385, "grad_norm": 0.27426764369010925, "learning_rate": 4.9975784068332634e-05, "loss": 0.2084, "step": 6313 }, { "epoch": 0.11261727250026754, "grad_norm": 0.31844186782836914, "learning_rate": 4.997571552764382e-05, "loss": 0.2872, "step": 6314 }, { "epoch": 0.11263510862198124, "grad_norm": 0.2916257381439209, "learning_rate": 4.997564689014051e-05, "loss": 0.2347, "step": 6315 }, { "epoch": 0.11265294474369493, "grad_norm": 0.2968656122684479, "learning_rate": 4.997557815582297e-05, "loss": 0.2968, "step": 6316 }, { "epoch": 0.11267078086540863, "grad_norm": 0.30372437834739685, "learning_rate": 4.9975509324691446e-05, "loss": 0.2675, "step": 6317 }, { "epoch": 0.11268861698712232, "grad_norm": 0.2827107012271881, "learning_rate": 4.997544039674624e-05, "loss": 0.2411, "step": 6318 }, { "epoch": 0.11270645310883601, "grad_norm": 0.26831698417663574, "learning_rate": 4.9975371371987594e-05, "loss": 0.2447, "step": 6319 }, { "epoch": 0.11272428923054971, "grad_norm": 0.3394215404987335, "learning_rate": 4.9975302250415785e-05, "loss": 0.3011, "step": 6320 }, { "epoch": 0.1127421253522634, "grad_norm": 0.23676671087741852, "learning_rate": 4.997523303203108e-05, "loss": 0.2029, "step": 6321 }, { "epoch": 0.1127599614739771, "grad_norm": 0.2872179448604584, "learning_rate": 4.997516371683373e-05, "loss": 0.2259, "step": 6322 }, { "epoch": 0.11277779759569079, "grad_norm": 0.28405171632766724, "learning_rate": 4.9975094304824036e-05, "loss": 0.2674, "step": 6323 }, { "epoch": 0.11279563371740449, "grad_norm": 0.25588372349739075, "learning_rate": 4.997502479600225e-05, "loss": 0.2451, "step": 6324 }, { "epoch": 0.11281346983911818, "grad_norm": 0.29073432087898254, "learning_rate": 4.9974955190368634e-05, "loss": 0.2812, "step": 6325 }, { "epoch": 0.11283130596083188, "grad_norm": 0.29777273535728455, "learning_rate": 4.9974885487923464e-05, "loss": 0.2973, "step": 6326 }, { "epoch": 0.11284914208254557, "grad_norm": 0.27667126059532166, "learning_rate": 4.997481568866702e-05, "loss": 0.2042, "step": 6327 }, { "epoch": 0.11286697820425927, "grad_norm": 0.3610670268535614, "learning_rate": 4.997474579259956e-05, "loss": 0.2552, "step": 6328 }, { "epoch": 0.11288481432597296, "grad_norm": 0.23689375817775726, "learning_rate": 4.997467579972136e-05, "loss": 0.2121, "step": 6329 }, { "epoch": 0.11290265044768666, "grad_norm": 0.27611005306243896, "learning_rate": 4.997460571003269e-05, "loss": 0.2419, "step": 6330 }, { "epoch": 0.11292048656940035, "grad_norm": 0.2606268525123596, "learning_rate": 4.9974535523533815e-05, "loss": 0.1892, "step": 6331 }, { "epoch": 0.11293832269111405, "grad_norm": 0.27849745750427246, "learning_rate": 4.997446524022503e-05, "loss": 0.2306, "step": 6332 }, { "epoch": 0.11295615881282774, "grad_norm": 0.33862075209617615, "learning_rate": 4.997439486010658e-05, "loss": 0.2262, "step": 6333 }, { "epoch": 0.11297399493454144, "grad_norm": 0.3877054452896118, "learning_rate": 4.997432438317876e-05, "loss": 0.2952, "step": 6334 }, { "epoch": 0.11299183105625513, "grad_norm": 0.30139851570129395, "learning_rate": 4.997425380944182e-05, "loss": 0.2475, "step": 6335 }, { "epoch": 0.11300966717796883, "grad_norm": 0.28107771277427673, "learning_rate": 4.9974183138896056e-05, "loss": 0.2732, "step": 6336 }, { "epoch": 0.11302750329968252, "grad_norm": 0.24334359169006348, "learning_rate": 4.9974112371541725e-05, "loss": 0.2249, "step": 6337 }, { "epoch": 0.11304533942139622, "grad_norm": 0.2737603485584259, "learning_rate": 4.997404150737912e-05, "loss": 0.22, "step": 6338 }, { "epoch": 0.1130631755431099, "grad_norm": 0.3706985116004944, "learning_rate": 4.997397054640849e-05, "loss": 0.3006, "step": 6339 }, { "epoch": 0.1130810116648236, "grad_norm": 0.2653593122959137, "learning_rate": 4.9973899488630136e-05, "loss": 0.2339, "step": 6340 }, { "epoch": 0.1130988477865373, "grad_norm": 0.33987531065940857, "learning_rate": 4.997382833404432e-05, "loss": 0.2428, "step": 6341 }, { "epoch": 0.11311668390825098, "grad_norm": 0.38109177350997925, "learning_rate": 4.9973757082651315e-05, "loss": 0.2732, "step": 6342 }, { "epoch": 0.11313452002996469, "grad_norm": 0.2975313663482666, "learning_rate": 4.9973685734451404e-05, "loss": 0.2383, "step": 6343 }, { "epoch": 0.11315235615167837, "grad_norm": 0.23836776614189148, "learning_rate": 4.997361428944486e-05, "loss": 0.2286, "step": 6344 }, { "epoch": 0.11317019227339208, "grad_norm": 0.36299118399620056, "learning_rate": 4.9973542747631964e-05, "loss": 0.2606, "step": 6345 }, { "epoch": 0.11318802839510576, "grad_norm": 0.3544904291629791, "learning_rate": 4.9973471109012984e-05, "loss": 0.2463, "step": 6346 }, { "epoch": 0.11320586451681947, "grad_norm": 0.2779907286167145, "learning_rate": 4.9973399373588214e-05, "loss": 0.1852, "step": 6347 }, { "epoch": 0.11322370063853315, "grad_norm": 0.33224770426750183, "learning_rate": 4.997332754135792e-05, "loss": 0.2596, "step": 6348 }, { "epoch": 0.11324153676024686, "grad_norm": 0.3348247706890106, "learning_rate": 4.9973255612322376e-05, "loss": 0.2257, "step": 6349 }, { "epoch": 0.11325937288196054, "grad_norm": 0.29954883456230164, "learning_rate": 4.997318358648188e-05, "loss": 0.2286, "step": 6350 }, { "epoch": 0.11327720900367425, "grad_norm": 0.30738696455955505, "learning_rate": 4.9973111463836686e-05, "loss": 0.2517, "step": 6351 }, { "epoch": 0.11329504512538793, "grad_norm": 0.2955269515514374, "learning_rate": 4.99730392443871e-05, "loss": 0.2784, "step": 6352 }, { "epoch": 0.11331288124710164, "grad_norm": 0.27649739384651184, "learning_rate": 4.997296692813338e-05, "loss": 0.2681, "step": 6353 }, { "epoch": 0.11333071736881532, "grad_norm": 0.22541755437850952, "learning_rate": 4.9972894515075816e-05, "loss": 0.1894, "step": 6354 }, { "epoch": 0.11334855349052902, "grad_norm": 0.3360641896724701, "learning_rate": 4.9972822005214684e-05, "loss": 0.2214, "step": 6355 }, { "epoch": 0.11336638961224271, "grad_norm": 0.381287544965744, "learning_rate": 4.997274939855027e-05, "loss": 0.2642, "step": 6356 }, { "epoch": 0.11338422573395641, "grad_norm": 0.45694929361343384, "learning_rate": 4.997267669508286e-05, "loss": 0.2628, "step": 6357 }, { "epoch": 0.1134020618556701, "grad_norm": 0.3429132401943207, "learning_rate": 4.9972603894812725e-05, "loss": 0.2534, "step": 6358 }, { "epoch": 0.1134198979773838, "grad_norm": 0.3763284385204315, "learning_rate": 4.997253099774015e-05, "loss": 0.2125, "step": 6359 }, { "epoch": 0.11343773409909749, "grad_norm": 0.26830655336380005, "learning_rate": 4.9972458003865426e-05, "loss": 0.2206, "step": 6360 }, { "epoch": 0.11345557022081118, "grad_norm": 0.30152490735054016, "learning_rate": 4.997238491318882e-05, "loss": 0.2296, "step": 6361 }, { "epoch": 0.11347340634252488, "grad_norm": 0.3029189109802246, "learning_rate": 4.9972311725710635e-05, "loss": 0.2644, "step": 6362 }, { "epoch": 0.11349124246423857, "grad_norm": 0.27133142948150635, "learning_rate": 4.997223844143114e-05, "loss": 0.2141, "step": 6363 }, { "epoch": 0.11350907858595227, "grad_norm": 0.3439355492591858, "learning_rate": 4.997216506035063e-05, "loss": 0.2409, "step": 6364 }, { "epoch": 0.11352691470766596, "grad_norm": 0.3204980492591858, "learning_rate": 4.997209158246937e-05, "loss": 0.2508, "step": 6365 }, { "epoch": 0.11354475082937966, "grad_norm": 0.431309312582016, "learning_rate": 4.9972018007787666e-05, "loss": 0.2948, "step": 6366 }, { "epoch": 0.11356258695109335, "grad_norm": 0.25124138593673706, "learning_rate": 4.99719443363058e-05, "loss": 0.2442, "step": 6367 }, { "epoch": 0.11358042307280705, "grad_norm": 0.3142741918563843, "learning_rate": 4.997187056802405e-05, "loss": 0.2233, "step": 6368 }, { "epoch": 0.11359825919452074, "grad_norm": 0.29613104462623596, "learning_rate": 4.99717967029427e-05, "loss": 0.2738, "step": 6369 }, { "epoch": 0.11361609531623444, "grad_norm": 0.3058638870716095, "learning_rate": 4.9971722741062046e-05, "loss": 0.2274, "step": 6370 }, { "epoch": 0.11363393143794813, "grad_norm": 0.6070004105567932, "learning_rate": 4.997164868238236e-05, "loss": 0.2278, "step": 6371 }, { "epoch": 0.11365176755966183, "grad_norm": 0.2776007056236267, "learning_rate": 4.997157452690395e-05, "loss": 0.2321, "step": 6372 }, { "epoch": 0.11366960368137552, "grad_norm": 0.3096480071544647, "learning_rate": 4.997150027462708e-05, "loss": 0.259, "step": 6373 }, { "epoch": 0.11368743980308922, "grad_norm": 0.2727145850658417, "learning_rate": 4.9971425925552064e-05, "loss": 0.2359, "step": 6374 }, { "epoch": 0.11370527592480291, "grad_norm": 0.3384600579738617, "learning_rate": 4.997135147967917e-05, "loss": 0.2703, "step": 6375 }, { "epoch": 0.11372311204651661, "grad_norm": 0.403394490480423, "learning_rate": 4.997127693700869e-05, "loss": 0.3012, "step": 6376 }, { "epoch": 0.1137409481682303, "grad_norm": 0.26332804560661316, "learning_rate": 4.997120229754092e-05, "loss": 0.2519, "step": 6377 }, { "epoch": 0.113758784289944, "grad_norm": 0.28521740436553955, "learning_rate": 4.9971127561276144e-05, "loss": 0.2077, "step": 6378 }, { "epoch": 0.11377662041165769, "grad_norm": 0.3364304006099701, "learning_rate": 4.997105272821465e-05, "loss": 0.2361, "step": 6379 }, { "epoch": 0.11379445653337138, "grad_norm": 0.2692857086658478, "learning_rate": 4.997097779835673e-05, "loss": 0.2087, "step": 6380 }, { "epoch": 0.11381229265508508, "grad_norm": 0.3222743570804596, "learning_rate": 4.997090277170269e-05, "loss": 0.2304, "step": 6381 }, { "epoch": 0.11383012877679877, "grad_norm": 0.34658974409103394, "learning_rate": 4.9970827648252787e-05, "loss": 0.2453, "step": 6382 }, { "epoch": 0.11384796489851247, "grad_norm": 0.3299717307090759, "learning_rate": 4.997075242800734e-05, "loss": 0.2882, "step": 6383 }, { "epoch": 0.11386580102022616, "grad_norm": 0.25349822640419006, "learning_rate": 4.997067711096663e-05, "loss": 0.2119, "step": 6384 }, { "epoch": 0.11388363714193986, "grad_norm": 0.31958621740341187, "learning_rate": 4.997060169713096e-05, "loss": 0.2819, "step": 6385 }, { "epoch": 0.11390147326365355, "grad_norm": 0.22673916816711426, "learning_rate": 4.99705261865006e-05, "loss": 0.2459, "step": 6386 }, { "epoch": 0.11391930938536725, "grad_norm": 0.3198469877243042, "learning_rate": 4.997045057907586e-05, "loss": 0.2673, "step": 6387 }, { "epoch": 0.11393714550708094, "grad_norm": 0.25635942816734314, "learning_rate": 4.997037487485703e-05, "loss": 0.2291, "step": 6388 }, { "epoch": 0.11395498162879464, "grad_norm": 0.2754196524620056, "learning_rate": 4.997029907384441e-05, "loss": 0.2331, "step": 6389 }, { "epoch": 0.11397281775050833, "grad_norm": 0.31777021288871765, "learning_rate": 4.9970223176038276e-05, "loss": 0.2586, "step": 6390 }, { "epoch": 0.11399065387222203, "grad_norm": 0.42584753036499023, "learning_rate": 4.997014718143893e-05, "loss": 0.1865, "step": 6391 }, { "epoch": 0.11400848999393572, "grad_norm": 0.3233788311481476, "learning_rate": 4.9970071090046675e-05, "loss": 0.224, "step": 6392 }, { "epoch": 0.11402632611564942, "grad_norm": 0.2840725779533386, "learning_rate": 4.9969994901861805e-05, "loss": 0.2431, "step": 6393 }, { "epoch": 0.1140441622373631, "grad_norm": 0.2885996997356415, "learning_rate": 4.99699186168846e-05, "loss": 0.2349, "step": 6394 }, { "epoch": 0.11406199835907681, "grad_norm": 0.3769633173942566, "learning_rate": 4.996984223511538e-05, "loss": 0.231, "step": 6395 }, { "epoch": 0.1140798344807905, "grad_norm": 0.3439430296421051, "learning_rate": 4.9969765756554414e-05, "loss": 0.2088, "step": 6396 }, { "epoch": 0.1140976706025042, "grad_norm": 0.3478592038154602, "learning_rate": 4.996968918120202e-05, "loss": 0.2713, "step": 6397 }, { "epoch": 0.11411550672421789, "grad_norm": 0.27024587988853455, "learning_rate": 4.996961250905848e-05, "loss": 0.2235, "step": 6398 }, { "epoch": 0.11413334284593159, "grad_norm": 0.35732901096343994, "learning_rate": 4.99695357401241e-05, "loss": 0.2179, "step": 6399 }, { "epoch": 0.11415117896764528, "grad_norm": 0.2081969529390335, "learning_rate": 4.996945887439918e-05, "loss": 0.2018, "step": 6400 }, { "epoch": 0.11416901508935896, "grad_norm": 0.29519927501678467, "learning_rate": 4.9969381911884e-05, "loss": 0.243, "step": 6401 }, { "epoch": 0.11418685121107267, "grad_norm": 0.35555705428123474, "learning_rate": 4.9969304852578886e-05, "loss": 0.3235, "step": 6402 }, { "epoch": 0.11420468733278635, "grad_norm": 0.3278159201145172, "learning_rate": 4.996922769648412e-05, "loss": 0.2316, "step": 6403 }, { "epoch": 0.11422252345450006, "grad_norm": 0.2941592037677765, "learning_rate": 4.99691504436e-05, "loss": 0.2137, "step": 6404 }, { "epoch": 0.11424035957621374, "grad_norm": 0.30727818608283997, "learning_rate": 4.996907309392683e-05, "loss": 0.2572, "step": 6405 }, { "epoch": 0.11425819569792744, "grad_norm": 0.2645432651042938, "learning_rate": 4.9968995647464906e-05, "loss": 0.2195, "step": 6406 }, { "epoch": 0.11427603181964113, "grad_norm": 0.35110896825790405, "learning_rate": 4.9968918104214534e-05, "loss": 0.2446, "step": 6407 }, { "epoch": 0.11429386794135483, "grad_norm": 0.26152899861335754, "learning_rate": 4.9968840464176004e-05, "loss": 0.2329, "step": 6408 }, { "epoch": 0.11431170406306852, "grad_norm": 0.3267683982849121, "learning_rate": 4.9968762727349636e-05, "loss": 0.2762, "step": 6409 }, { "epoch": 0.11432954018478222, "grad_norm": 0.352154940366745, "learning_rate": 4.996868489373571e-05, "loss": 0.2472, "step": 6410 }, { "epoch": 0.11434737630649591, "grad_norm": 0.29199671745300293, "learning_rate": 4.996860696333454e-05, "loss": 0.2135, "step": 6411 }, { "epoch": 0.11436521242820961, "grad_norm": 0.25746950507164, "learning_rate": 4.996852893614643e-05, "loss": 0.2302, "step": 6412 }, { "epoch": 0.1143830485499233, "grad_norm": 0.2342277467250824, "learning_rate": 4.996845081217168e-05, "loss": 0.2516, "step": 6413 }, { "epoch": 0.114400884671637, "grad_norm": 0.35752689838409424, "learning_rate": 4.9968372591410585e-05, "loss": 0.2359, "step": 6414 }, { "epoch": 0.11441872079335069, "grad_norm": 0.692800760269165, "learning_rate": 4.996829427386345e-05, "loss": 0.2611, "step": 6415 }, { "epoch": 0.1144365569150644, "grad_norm": 0.272035151720047, "learning_rate": 4.996821585953059e-05, "loss": 0.2189, "step": 6416 }, { "epoch": 0.11445439303677808, "grad_norm": 0.4375305771827698, "learning_rate": 4.99681373484123e-05, "loss": 0.2213, "step": 6417 }, { "epoch": 0.11447222915849178, "grad_norm": 0.4251265227794647, "learning_rate": 4.996805874050888e-05, "loss": 0.2138, "step": 6418 }, { "epoch": 0.11449006528020547, "grad_norm": 0.31208524107933044, "learning_rate": 4.996798003582065e-05, "loss": 0.2425, "step": 6419 }, { "epoch": 0.11450790140191916, "grad_norm": 0.32257500290870667, "learning_rate": 4.99679012343479e-05, "loss": 0.2843, "step": 6420 }, { "epoch": 0.11452573752363286, "grad_norm": 0.3965137004852295, "learning_rate": 4.9967822336090943e-05, "loss": 0.2367, "step": 6421 }, { "epoch": 0.11454357364534655, "grad_norm": 0.4175276756286621, "learning_rate": 4.996774334105008e-05, "loss": 0.3549, "step": 6422 }, { "epoch": 0.11456140976706025, "grad_norm": 0.3600330650806427, "learning_rate": 4.996766424922563e-05, "loss": 0.2308, "step": 6423 }, { "epoch": 0.11457924588877394, "grad_norm": 0.35123971104621887, "learning_rate": 4.996758506061788e-05, "loss": 0.2521, "step": 6424 }, { "epoch": 0.11459708201048764, "grad_norm": 0.20919214189052582, "learning_rate": 4.9967505775227145e-05, "loss": 0.2236, "step": 6425 }, { "epoch": 0.11461491813220133, "grad_norm": 0.25192296504974365, "learning_rate": 4.996742639305374e-05, "loss": 0.253, "step": 6426 }, { "epoch": 0.11463275425391503, "grad_norm": 0.3408343195915222, "learning_rate": 4.996734691409797e-05, "loss": 0.2927, "step": 6427 }, { "epoch": 0.11465059037562872, "grad_norm": 0.25281551480293274, "learning_rate": 4.996726733836013e-05, "loss": 0.1761, "step": 6428 }, { "epoch": 0.11466842649734242, "grad_norm": 0.48902568221092224, "learning_rate": 4.996718766584054e-05, "loss": 0.2263, "step": 6429 }, { "epoch": 0.11468626261905611, "grad_norm": 0.23247681558132172, "learning_rate": 4.996710789653952e-05, "loss": 0.2398, "step": 6430 }, { "epoch": 0.11470409874076981, "grad_norm": 0.24835175275802612, "learning_rate": 4.9967028030457354e-05, "loss": 0.2648, "step": 6431 }, { "epoch": 0.1147219348624835, "grad_norm": 0.40737810730934143, "learning_rate": 4.996694806759436e-05, "loss": 0.2457, "step": 6432 }, { "epoch": 0.1147397709841972, "grad_norm": 0.23159782588481903, "learning_rate": 4.9966868007950865e-05, "loss": 0.2263, "step": 6433 }, { "epoch": 0.11475760710591089, "grad_norm": 0.34643515944480896, "learning_rate": 4.9966787851527164e-05, "loss": 0.2836, "step": 6434 }, { "epoch": 0.11477544322762459, "grad_norm": 0.357815146446228, "learning_rate": 4.9966707598323565e-05, "loss": 0.2776, "step": 6435 }, { "epoch": 0.11479327934933828, "grad_norm": 0.28077706694602966, "learning_rate": 4.996662724834039e-05, "loss": 0.254, "step": 6436 }, { "epoch": 0.11481111547105198, "grad_norm": 0.29435211420059204, "learning_rate": 4.996654680157794e-05, "loss": 0.2312, "step": 6437 }, { "epoch": 0.11482895159276567, "grad_norm": 0.3501879870891571, "learning_rate": 4.996646625803653e-05, "loss": 0.3071, "step": 6438 }, { "epoch": 0.11484678771447937, "grad_norm": 0.29916924238204956, "learning_rate": 4.996638561771647e-05, "loss": 0.2341, "step": 6439 }, { "epoch": 0.11486462383619306, "grad_norm": 0.3069373071193695, "learning_rate": 4.996630488061809e-05, "loss": 0.2665, "step": 6440 }, { "epoch": 0.11488245995790675, "grad_norm": 0.32497838139533997, "learning_rate": 4.996622404674168e-05, "loss": 0.2189, "step": 6441 }, { "epoch": 0.11490029607962045, "grad_norm": 0.31551235914230347, "learning_rate": 4.996614311608756e-05, "loss": 0.2158, "step": 6442 }, { "epoch": 0.11491813220133414, "grad_norm": 0.32928216457366943, "learning_rate": 4.996606208865605e-05, "loss": 0.2308, "step": 6443 }, { "epoch": 0.11493596832304784, "grad_norm": 0.25787994265556335, "learning_rate": 4.9965980964447456e-05, "loss": 0.2259, "step": 6444 }, { "epoch": 0.11495380444476153, "grad_norm": 0.41256728768348694, "learning_rate": 4.996589974346211e-05, "loss": 0.3111, "step": 6445 }, { "epoch": 0.11497164056647523, "grad_norm": 0.3078208267688751, "learning_rate": 4.99658184257003e-05, "loss": 0.253, "step": 6446 }, { "epoch": 0.11498947668818892, "grad_norm": 0.30306053161621094, "learning_rate": 4.996573701116236e-05, "loss": 0.2293, "step": 6447 }, { "epoch": 0.11500731280990262, "grad_norm": 0.26847824454307556, "learning_rate": 4.9965655499848595e-05, "loss": 0.2103, "step": 6448 }, { "epoch": 0.1150251489316163, "grad_norm": 0.25561755895614624, "learning_rate": 4.996557389175933e-05, "loss": 0.2148, "step": 6449 }, { "epoch": 0.11504298505333001, "grad_norm": 0.38018563389778137, "learning_rate": 4.996549218689488e-05, "loss": 0.2907, "step": 6450 }, { "epoch": 0.1150608211750437, "grad_norm": 0.24978797137737274, "learning_rate": 4.9965410385255563e-05, "loss": 0.2327, "step": 6451 }, { "epoch": 0.1150786572967574, "grad_norm": 0.25283369421958923, "learning_rate": 4.996532848684169e-05, "loss": 0.2186, "step": 6452 }, { "epoch": 0.11509649341847109, "grad_norm": 0.36950114369392395, "learning_rate": 4.996524649165358e-05, "loss": 0.3156, "step": 6453 }, { "epoch": 0.11511432954018479, "grad_norm": 0.37075960636138916, "learning_rate": 4.9965164399691546e-05, "loss": 0.2716, "step": 6454 }, { "epoch": 0.11513216566189848, "grad_norm": 0.2191033512353897, "learning_rate": 4.996508221095592e-05, "loss": 0.1946, "step": 6455 }, { "epoch": 0.11515000178361218, "grad_norm": 0.2517928183078766, "learning_rate": 4.9964999925447006e-05, "loss": 0.1969, "step": 6456 }, { "epoch": 0.11516783790532586, "grad_norm": 0.34651434421539307, "learning_rate": 4.9964917543165136e-05, "loss": 0.2295, "step": 6457 }, { "epoch": 0.11518567402703957, "grad_norm": 0.39816907048225403, "learning_rate": 4.996483506411062e-05, "loss": 0.323, "step": 6458 }, { "epoch": 0.11520351014875325, "grad_norm": 0.2937268614768982, "learning_rate": 4.9964752488283786e-05, "loss": 0.2753, "step": 6459 }, { "epoch": 0.11522134627046694, "grad_norm": 0.26291102170944214, "learning_rate": 4.996466981568494e-05, "loss": 0.2612, "step": 6460 }, { "epoch": 0.11523918239218064, "grad_norm": 0.2093595564365387, "learning_rate": 4.996458704631442e-05, "loss": 0.1959, "step": 6461 }, { "epoch": 0.11525701851389433, "grad_norm": 0.25515130162239075, "learning_rate": 4.996450418017253e-05, "loss": 0.269, "step": 6462 }, { "epoch": 0.11527485463560803, "grad_norm": 0.3007342517375946, "learning_rate": 4.9964421217259604e-05, "loss": 0.2474, "step": 6463 }, { "epoch": 0.11529269075732172, "grad_norm": 0.396407812833786, "learning_rate": 4.9964338157575954e-05, "loss": 0.2741, "step": 6464 }, { "epoch": 0.11531052687903542, "grad_norm": 0.3987675905227661, "learning_rate": 4.9964255001121914e-05, "loss": 0.2597, "step": 6465 }, { "epoch": 0.11532836300074911, "grad_norm": 0.3393239974975586, "learning_rate": 4.996417174789779e-05, "loss": 0.3094, "step": 6466 }, { "epoch": 0.11534619912246281, "grad_norm": 0.28795576095581055, "learning_rate": 4.996408839790392e-05, "loss": 0.2929, "step": 6467 }, { "epoch": 0.1153640352441765, "grad_norm": 0.24852922558784485, "learning_rate": 4.996400495114062e-05, "loss": 0.2523, "step": 6468 }, { "epoch": 0.1153818713658902, "grad_norm": 0.32010847330093384, "learning_rate": 4.9963921407608214e-05, "loss": 0.2641, "step": 6469 }, { "epoch": 0.11539970748760389, "grad_norm": 0.3046504259109497, "learning_rate": 4.9963837767307034e-05, "loss": 0.2756, "step": 6470 }, { "epoch": 0.1154175436093176, "grad_norm": 0.3047195374965668, "learning_rate": 4.996375403023739e-05, "loss": 0.2424, "step": 6471 }, { "epoch": 0.11543537973103128, "grad_norm": 0.2599313259124756, "learning_rate": 4.9963670196399614e-05, "loss": 0.257, "step": 6472 }, { "epoch": 0.11545321585274498, "grad_norm": 0.32083696126937866, "learning_rate": 4.9963586265794025e-05, "loss": 0.2477, "step": 6473 }, { "epoch": 0.11547105197445867, "grad_norm": 0.33297500014305115, "learning_rate": 4.996350223842096e-05, "loss": 0.1974, "step": 6474 }, { "epoch": 0.11548888809617237, "grad_norm": 0.44310811161994934, "learning_rate": 4.9963418114280736e-05, "loss": 0.2823, "step": 6475 }, { "epoch": 0.11550672421788606, "grad_norm": 0.31379446387290955, "learning_rate": 4.996333389337368e-05, "loss": 0.2603, "step": 6476 }, { "epoch": 0.11552456033959976, "grad_norm": 0.3003830313682556, "learning_rate": 4.996324957570012e-05, "loss": 0.2681, "step": 6477 }, { "epoch": 0.11554239646131345, "grad_norm": 0.2535483241081238, "learning_rate": 4.996316516126038e-05, "loss": 0.2071, "step": 6478 }, { "epoch": 0.11556023258302715, "grad_norm": 0.41141194105148315, "learning_rate": 4.9963080650054807e-05, "loss": 0.1867, "step": 6479 }, { "epoch": 0.11557806870474084, "grad_norm": 0.3306072950363159, "learning_rate": 4.996299604208369e-05, "loss": 0.2536, "step": 6480 }, { "epoch": 0.11559590482645453, "grad_norm": 0.2728017270565033, "learning_rate": 4.996291133734739e-05, "loss": 0.244, "step": 6481 }, { "epoch": 0.11561374094816823, "grad_norm": 0.3277038335800171, "learning_rate": 4.996282653584622e-05, "loss": 0.1984, "step": 6482 }, { "epoch": 0.11563157706988192, "grad_norm": 0.26794102787971497, "learning_rate": 4.996274163758051e-05, "loss": 0.2643, "step": 6483 }, { "epoch": 0.11564941319159562, "grad_norm": 0.2872062921524048, "learning_rate": 4.9962656642550596e-05, "loss": 0.2108, "step": 6484 }, { "epoch": 0.11566724931330931, "grad_norm": 0.2878411114215851, "learning_rate": 4.9962571550756796e-05, "loss": 0.2314, "step": 6485 }, { "epoch": 0.11568508543502301, "grad_norm": 0.22507423162460327, "learning_rate": 4.996248636219946e-05, "loss": 0.2133, "step": 6486 }, { "epoch": 0.1157029215567367, "grad_norm": 0.29494550824165344, "learning_rate": 4.9962401076878896e-05, "loss": 0.2535, "step": 6487 }, { "epoch": 0.1157207576784504, "grad_norm": 0.30076175928115845, "learning_rate": 4.996231569479545e-05, "loss": 0.256, "step": 6488 }, { "epoch": 0.11573859380016409, "grad_norm": 0.29725104570388794, "learning_rate": 4.996223021594943e-05, "loss": 0.237, "step": 6489 }, { "epoch": 0.11575642992187779, "grad_norm": 0.2872684895992279, "learning_rate": 4.99621446403412e-05, "loss": 0.2464, "step": 6490 }, { "epoch": 0.11577426604359148, "grad_norm": 0.2946079671382904, "learning_rate": 4.996205896797107e-05, "loss": 0.2098, "step": 6491 }, { "epoch": 0.11579210216530518, "grad_norm": 0.32457631826400757, "learning_rate": 4.9961973198839385e-05, "loss": 0.2796, "step": 6492 }, { "epoch": 0.11580993828701887, "grad_norm": 0.2626686990261078, "learning_rate": 4.9961887332946464e-05, "loss": 0.2303, "step": 6493 }, { "epoch": 0.11582777440873257, "grad_norm": 0.376022070646286, "learning_rate": 4.9961801370292646e-05, "loss": 0.2298, "step": 6494 }, { "epoch": 0.11584561053044626, "grad_norm": 0.29716289043426514, "learning_rate": 4.9961715310878263e-05, "loss": 0.2096, "step": 6495 }, { "epoch": 0.11586344665215996, "grad_norm": 0.3023572862148285, "learning_rate": 4.9961629154703655e-05, "loss": 0.2592, "step": 6496 }, { "epoch": 0.11588128277387365, "grad_norm": 0.3096959590911865, "learning_rate": 4.9961542901769146e-05, "loss": 0.2651, "step": 6497 }, { "epoch": 0.11589911889558735, "grad_norm": 0.32270315289497375, "learning_rate": 4.996145655207508e-05, "loss": 0.2717, "step": 6498 }, { "epoch": 0.11591695501730104, "grad_norm": 0.2928166687488556, "learning_rate": 4.996137010562179e-05, "loss": 0.243, "step": 6499 }, { "epoch": 0.11593479113901474, "grad_norm": 0.31219711899757385, "learning_rate": 4.9961283562409595e-05, "loss": 0.3059, "step": 6500 }, { "epoch": 0.11595262726072843, "grad_norm": 0.36242711544036865, "learning_rate": 4.9961196922438843e-05, "loss": 0.3463, "step": 6501 }, { "epoch": 0.11597046338244212, "grad_norm": 0.3275732100009918, "learning_rate": 4.9961110185709886e-05, "loss": 0.2772, "step": 6502 }, { "epoch": 0.11598829950415582, "grad_norm": 0.3870683014392853, "learning_rate": 4.996102335222303e-05, "loss": 0.3145, "step": 6503 }, { "epoch": 0.1160061356258695, "grad_norm": 0.3099972605705261, "learning_rate": 4.996093642197864e-05, "loss": 0.2505, "step": 6504 }, { "epoch": 0.11602397174758321, "grad_norm": 0.273925244808197, "learning_rate": 4.996084939497703e-05, "loss": 0.2285, "step": 6505 }, { "epoch": 0.1160418078692969, "grad_norm": 0.33993473649024963, "learning_rate": 4.9960762271218554e-05, "loss": 0.3025, "step": 6506 }, { "epoch": 0.1160596439910106, "grad_norm": 0.29106321930885315, "learning_rate": 4.996067505070353e-05, "loss": 0.1543, "step": 6507 }, { "epoch": 0.11607748011272429, "grad_norm": 0.3417963981628418, "learning_rate": 4.996058773343232e-05, "loss": 0.2547, "step": 6508 }, { "epoch": 0.11609531623443799, "grad_norm": 0.2559162974357605, "learning_rate": 4.9960500319405246e-05, "loss": 0.2384, "step": 6509 }, { "epoch": 0.11611315235615167, "grad_norm": 0.523456871509552, "learning_rate": 4.996041280862265e-05, "loss": 0.2227, "step": 6510 }, { "epoch": 0.11613098847786538, "grad_norm": 0.2539406716823578, "learning_rate": 4.996032520108488e-05, "loss": 0.246, "step": 6511 }, { "epoch": 0.11614882459957906, "grad_norm": 0.2890263497829437, "learning_rate": 4.996023749679226e-05, "loss": 0.2105, "step": 6512 }, { "epoch": 0.11616666072129277, "grad_norm": 0.2841342091560364, "learning_rate": 4.996014969574514e-05, "loss": 0.2724, "step": 6513 }, { "epoch": 0.11618449684300645, "grad_norm": 0.3194986581802368, "learning_rate": 4.996006179794386e-05, "loss": 0.1871, "step": 6514 }, { "epoch": 0.11620233296472016, "grad_norm": 0.28453782200813293, "learning_rate": 4.995997380338876e-05, "loss": 0.2868, "step": 6515 }, { "epoch": 0.11622016908643384, "grad_norm": 0.26048335433006287, "learning_rate": 4.995988571208018e-05, "loss": 0.2275, "step": 6516 }, { "epoch": 0.11623800520814755, "grad_norm": 0.35794925689697266, "learning_rate": 4.995979752401847e-05, "loss": 0.2656, "step": 6517 }, { "epoch": 0.11625584132986123, "grad_norm": 0.2667000889778137, "learning_rate": 4.9959709239203954e-05, "loss": 0.2628, "step": 6518 }, { "epoch": 0.11627367745157494, "grad_norm": 0.3313361704349518, "learning_rate": 4.995962085763699e-05, "loss": 0.2469, "step": 6519 }, { "epoch": 0.11629151357328862, "grad_norm": 0.3026551306247711, "learning_rate": 4.995953237931791e-05, "loss": 0.2531, "step": 6520 }, { "epoch": 0.11630934969500231, "grad_norm": 0.2981475293636322, "learning_rate": 4.995944380424706e-05, "loss": 0.2563, "step": 6521 }, { "epoch": 0.11632718581671601, "grad_norm": 0.44038650393486023, "learning_rate": 4.995935513242479e-05, "loss": 0.296, "step": 6522 }, { "epoch": 0.1163450219384297, "grad_norm": 0.21879370510578156, "learning_rate": 4.995926636385144e-05, "loss": 0.198, "step": 6523 }, { "epoch": 0.1163628580601434, "grad_norm": 0.4341939389705658, "learning_rate": 4.995917749852735e-05, "loss": 0.2931, "step": 6524 }, { "epoch": 0.11638069418185709, "grad_norm": 0.21881984174251556, "learning_rate": 4.995908853645287e-05, "loss": 0.2055, "step": 6525 }, { "epoch": 0.1163985303035708, "grad_norm": 0.25374263525009155, "learning_rate": 4.995899947762834e-05, "loss": 0.2823, "step": 6526 }, { "epoch": 0.11641636642528448, "grad_norm": 0.4448148310184479, "learning_rate": 4.995891032205411e-05, "loss": 0.2673, "step": 6527 }, { "epoch": 0.11643420254699818, "grad_norm": 0.2964814305305481, "learning_rate": 4.9958821069730514e-05, "loss": 0.2506, "step": 6528 }, { "epoch": 0.11645203866871187, "grad_norm": 0.2353295385837555, "learning_rate": 4.995873172065791e-05, "loss": 0.1917, "step": 6529 }, { "epoch": 0.11646987479042557, "grad_norm": 0.3661505877971649, "learning_rate": 4.995864227483665e-05, "loss": 0.2805, "step": 6530 }, { "epoch": 0.11648771091213926, "grad_norm": 0.3254474103450775, "learning_rate": 4.995855273226707e-05, "loss": 0.2197, "step": 6531 }, { "epoch": 0.11650554703385296, "grad_norm": 0.28213346004486084, "learning_rate": 4.995846309294952e-05, "loss": 0.2391, "step": 6532 }, { "epoch": 0.11652338315556665, "grad_norm": 0.38951122760772705, "learning_rate": 4.9958373356884334e-05, "loss": 0.2189, "step": 6533 }, { "epoch": 0.11654121927728035, "grad_norm": 0.2582286596298218, "learning_rate": 4.995828352407189e-05, "loss": 0.2568, "step": 6534 }, { "epoch": 0.11655905539899404, "grad_norm": 0.3363356292247772, "learning_rate": 4.99581935945125e-05, "loss": 0.2397, "step": 6535 }, { "epoch": 0.11657689152070774, "grad_norm": 0.24810463190078735, "learning_rate": 4.995810356820655e-05, "loss": 0.2711, "step": 6536 }, { "epoch": 0.11659472764242143, "grad_norm": 0.26700448989868164, "learning_rate": 4.9958013445154365e-05, "loss": 0.2649, "step": 6537 }, { "epoch": 0.11661256376413513, "grad_norm": 0.3393983244895935, "learning_rate": 4.995792322535629e-05, "loss": 0.2674, "step": 6538 }, { "epoch": 0.11663039988584882, "grad_norm": 0.3047894835472107, "learning_rate": 4.99578329088127e-05, "loss": 0.2624, "step": 6539 }, { "epoch": 0.11664823600756252, "grad_norm": 0.37343868613243103, "learning_rate": 4.995774249552391e-05, "loss": 0.3026, "step": 6540 }, { "epoch": 0.11666607212927621, "grad_norm": 0.3277461528778076, "learning_rate": 4.9957651985490305e-05, "loss": 0.2905, "step": 6541 }, { "epoch": 0.1166839082509899, "grad_norm": 0.3397107720375061, "learning_rate": 4.9957561378712216e-05, "loss": 0.2455, "step": 6542 }, { "epoch": 0.1167017443727036, "grad_norm": 0.20923329889774323, "learning_rate": 4.995747067519e-05, "loss": 0.2026, "step": 6543 }, { "epoch": 0.11671958049441729, "grad_norm": 0.31039029359817505, "learning_rate": 4.995737987492401e-05, "loss": 0.2255, "step": 6544 }, { "epoch": 0.11673741661613099, "grad_norm": 0.23596099019050598, "learning_rate": 4.9957288977914585e-05, "loss": 0.2107, "step": 6545 }, { "epoch": 0.11675525273784468, "grad_norm": 0.4043689966201782, "learning_rate": 4.9957197984162094e-05, "loss": 0.2299, "step": 6546 }, { "epoch": 0.11677308885955838, "grad_norm": 0.2682308852672577, "learning_rate": 4.995710689366689e-05, "loss": 0.2398, "step": 6547 }, { "epoch": 0.11679092498127207, "grad_norm": 0.26397573947906494, "learning_rate": 4.995701570642931e-05, "loss": 0.2, "step": 6548 }, { "epoch": 0.11680876110298577, "grad_norm": 0.31530824303627014, "learning_rate": 4.995692442244972e-05, "loss": 0.3019, "step": 6549 }, { "epoch": 0.11682659722469946, "grad_norm": 0.2686672508716583, "learning_rate": 4.995683304172848e-05, "loss": 0.2578, "step": 6550 }, { "epoch": 0.11684443334641316, "grad_norm": 0.4198894202709198, "learning_rate": 4.995674156426593e-05, "loss": 0.1865, "step": 6551 }, { "epoch": 0.11686226946812685, "grad_norm": 0.3906414210796356, "learning_rate": 4.9956649990062425e-05, "loss": 0.2459, "step": 6552 }, { "epoch": 0.11688010558984055, "grad_norm": 0.303987056016922, "learning_rate": 4.995655831911833e-05, "loss": 0.2608, "step": 6553 }, { "epoch": 0.11689794171155424, "grad_norm": 0.24724708497524261, "learning_rate": 4.9956466551433996e-05, "loss": 0.2321, "step": 6554 }, { "epoch": 0.11691577783326794, "grad_norm": 0.3111753463745117, "learning_rate": 4.995637468700978e-05, "loss": 0.2368, "step": 6555 }, { "epoch": 0.11693361395498163, "grad_norm": 0.2826472818851471, "learning_rate": 4.9956282725846025e-05, "loss": 0.235, "step": 6556 }, { "epoch": 0.11695145007669533, "grad_norm": 0.21869373321533203, "learning_rate": 4.9956190667943105e-05, "loss": 0.2272, "step": 6557 }, { "epoch": 0.11696928619840902, "grad_norm": 0.21034951508045197, "learning_rate": 4.995609851330137e-05, "loss": 0.2254, "step": 6558 }, { "epoch": 0.11698712232012272, "grad_norm": 0.30858585238456726, "learning_rate": 4.995600626192118e-05, "loss": 0.2623, "step": 6559 }, { "epoch": 0.1170049584418364, "grad_norm": 0.2884853780269623, "learning_rate": 4.995591391380289e-05, "loss": 0.2639, "step": 6560 }, { "epoch": 0.1170227945635501, "grad_norm": 0.2359286993741989, "learning_rate": 4.995582146894685e-05, "loss": 0.2539, "step": 6561 }, { "epoch": 0.1170406306852638, "grad_norm": 0.27310407161712646, "learning_rate": 4.995572892735344e-05, "loss": 0.2584, "step": 6562 }, { "epoch": 0.11705846680697748, "grad_norm": 0.2923947274684906, "learning_rate": 4.9955636289023e-05, "loss": 0.2785, "step": 6563 }, { "epoch": 0.11707630292869119, "grad_norm": 0.23849262297153473, "learning_rate": 4.9955543553955886e-05, "loss": 0.2302, "step": 6564 }, { "epoch": 0.11709413905040487, "grad_norm": 0.33659827709198, "learning_rate": 4.995545072215248e-05, "loss": 0.2384, "step": 6565 }, { "epoch": 0.11711197517211858, "grad_norm": 0.222193643450737, "learning_rate": 4.9955357793613115e-05, "loss": 0.1611, "step": 6566 }, { "epoch": 0.11712981129383226, "grad_norm": 0.3508221209049225, "learning_rate": 4.995526476833817e-05, "loss": 0.2726, "step": 6567 }, { "epoch": 0.11714764741554597, "grad_norm": 0.26974058151245117, "learning_rate": 4.9955171646328e-05, "loss": 0.2534, "step": 6568 }, { "epoch": 0.11716548353725965, "grad_norm": 0.29008787870407104, "learning_rate": 4.995507842758296e-05, "loss": 0.2275, "step": 6569 }, { "epoch": 0.11718331965897336, "grad_norm": 0.3137807250022888, "learning_rate": 4.9954985112103426e-05, "loss": 0.2848, "step": 6570 }, { "epoch": 0.11720115578068704, "grad_norm": 0.3047144114971161, "learning_rate": 4.9954891699889745e-05, "loss": 0.269, "step": 6571 }, { "epoch": 0.11721899190240075, "grad_norm": 0.36958450078964233, "learning_rate": 4.9954798190942286e-05, "loss": 0.2438, "step": 6572 }, { "epoch": 0.11723682802411443, "grad_norm": 0.31683841347694397, "learning_rate": 4.99547045852614e-05, "loss": 0.177, "step": 6573 }, { "epoch": 0.11725466414582814, "grad_norm": 0.30695050954818726, "learning_rate": 4.995461088284748e-05, "loss": 0.254, "step": 6574 }, { "epoch": 0.11727250026754182, "grad_norm": 0.2589077651500702, "learning_rate": 4.995451708370085e-05, "loss": 0.242, "step": 6575 }, { "epoch": 0.11729033638925553, "grad_norm": 0.4255754053592682, "learning_rate": 4.9954423187821906e-05, "loss": 0.28, "step": 6576 }, { "epoch": 0.11730817251096921, "grad_norm": 0.32474592328071594, "learning_rate": 4.995432919521099e-05, "loss": 0.3075, "step": 6577 }, { "epoch": 0.11732600863268292, "grad_norm": 0.2682439982891083, "learning_rate": 4.9954235105868486e-05, "loss": 0.2371, "step": 6578 }, { "epoch": 0.1173438447543966, "grad_norm": 0.3613157868385315, "learning_rate": 4.995414091979474e-05, "loss": 0.2335, "step": 6579 }, { "epoch": 0.1173616808761103, "grad_norm": 0.226851224899292, "learning_rate": 4.9954046636990124e-05, "loss": 0.2178, "step": 6580 }, { "epoch": 0.117379516997824, "grad_norm": 0.3016261160373688, "learning_rate": 4.9953952257455005e-05, "loss": 0.2686, "step": 6581 }, { "epoch": 0.11739735311953768, "grad_norm": 0.26512303948402405, "learning_rate": 4.995385778118975e-05, "loss": 0.2245, "step": 6582 }, { "epoch": 0.11741518924125138, "grad_norm": 0.30120185017585754, "learning_rate": 4.995376320819472e-05, "loss": 0.2316, "step": 6583 }, { "epoch": 0.11743302536296507, "grad_norm": 0.38008370995521545, "learning_rate": 4.995366853847029e-05, "loss": 0.2354, "step": 6584 }, { "epoch": 0.11745086148467877, "grad_norm": 0.3327029049396515, "learning_rate": 4.995357377201682e-05, "loss": 0.2229, "step": 6585 }, { "epoch": 0.11746869760639246, "grad_norm": 0.27921929955482483, "learning_rate": 4.9953478908834675e-05, "loss": 0.211, "step": 6586 }, { "epoch": 0.11748653372810616, "grad_norm": 0.2631414234638214, "learning_rate": 4.995338394892423e-05, "loss": 0.2494, "step": 6587 }, { "epoch": 0.11750436984981985, "grad_norm": 0.2520674765110016, "learning_rate": 4.9953288892285854e-05, "loss": 0.2591, "step": 6588 }, { "epoch": 0.11752220597153355, "grad_norm": 0.22743797302246094, "learning_rate": 4.9953193738919914e-05, "loss": 0.1758, "step": 6589 }, { "epoch": 0.11754004209324724, "grad_norm": 0.353944331407547, "learning_rate": 4.995309848882677e-05, "loss": 0.2928, "step": 6590 }, { "epoch": 0.11755787821496094, "grad_norm": 0.26722222566604614, "learning_rate": 4.995300314200679e-05, "loss": 0.2199, "step": 6591 }, { "epoch": 0.11757571433667463, "grad_norm": 0.4017643928527832, "learning_rate": 4.9952907698460366e-05, "loss": 0.2431, "step": 6592 }, { "epoch": 0.11759355045838833, "grad_norm": 0.3151828944683075, "learning_rate": 4.995281215818785e-05, "loss": 0.2712, "step": 6593 }, { "epoch": 0.11761138658010202, "grad_norm": 0.21078132092952728, "learning_rate": 4.9952716521189616e-05, "loss": 0.2107, "step": 6594 }, { "epoch": 0.11762922270181572, "grad_norm": 0.2749013304710388, "learning_rate": 4.9952620787466034e-05, "loss": 0.2412, "step": 6595 }, { "epoch": 0.11764705882352941, "grad_norm": 0.26387470960617065, "learning_rate": 4.9952524957017464e-05, "loss": 0.1953, "step": 6596 }, { "epoch": 0.11766489494524311, "grad_norm": 0.3293147683143616, "learning_rate": 4.9952429029844304e-05, "loss": 0.2751, "step": 6597 }, { "epoch": 0.1176827310669568, "grad_norm": 0.25535276532173157, "learning_rate": 4.9952333005946906e-05, "loss": 0.2381, "step": 6598 }, { "epoch": 0.1177005671886705, "grad_norm": 0.30123427510261536, "learning_rate": 4.9952236885325644e-05, "loss": 0.2064, "step": 6599 }, { "epoch": 0.11771840331038419, "grad_norm": 0.3245164453983307, "learning_rate": 4.99521406679809e-05, "loss": 0.2512, "step": 6600 }, { "epoch": 0.11773623943209788, "grad_norm": 0.2973853051662445, "learning_rate": 4.995204435391303e-05, "loss": 0.2492, "step": 6601 }, { "epoch": 0.11775407555381158, "grad_norm": 0.4251580238342285, "learning_rate": 4.9951947943122425e-05, "loss": 0.3153, "step": 6602 }, { "epoch": 0.11777191167552527, "grad_norm": 0.23978105187416077, "learning_rate": 4.995185143560945e-05, "loss": 0.2151, "step": 6603 }, { "epoch": 0.11778974779723897, "grad_norm": 0.25439050793647766, "learning_rate": 4.9951754831374485e-05, "loss": 0.2507, "step": 6604 }, { "epoch": 0.11780758391895266, "grad_norm": 0.27356353402137756, "learning_rate": 4.9951658130417897e-05, "loss": 0.189, "step": 6605 }, { "epoch": 0.11782542004066636, "grad_norm": 0.3462202250957489, "learning_rate": 4.995156133274006e-05, "loss": 0.2398, "step": 6606 }, { "epoch": 0.11784325616238005, "grad_norm": 0.27835625410079956, "learning_rate": 4.995146443834136e-05, "loss": 0.2305, "step": 6607 }, { "epoch": 0.11786109228409375, "grad_norm": 0.42552027106285095, "learning_rate": 4.995136744722216e-05, "loss": 0.2283, "step": 6608 }, { "epoch": 0.11787892840580744, "grad_norm": 0.33396610617637634, "learning_rate": 4.9951270359382854e-05, "loss": 0.2483, "step": 6609 }, { "epoch": 0.11789676452752114, "grad_norm": 0.36073634028434753, "learning_rate": 4.995117317482379e-05, "loss": 0.2802, "step": 6610 }, { "epoch": 0.11791460064923483, "grad_norm": 0.3328351676464081, "learning_rate": 4.995107589354537e-05, "loss": 0.2249, "step": 6611 }, { "epoch": 0.11793243677094853, "grad_norm": 0.27836957573890686, "learning_rate": 4.9950978515547955e-05, "loss": 0.2508, "step": 6612 }, { "epoch": 0.11795027289266222, "grad_norm": 0.31090670824050903, "learning_rate": 4.995088104083194e-05, "loss": 0.247, "step": 6613 }, { "epoch": 0.11796810901437592, "grad_norm": 0.32610180974006653, "learning_rate": 4.995078346939769e-05, "loss": 0.2439, "step": 6614 }, { "epoch": 0.1179859451360896, "grad_norm": 0.2930186688899994, "learning_rate": 4.995068580124558e-05, "loss": 0.2517, "step": 6615 }, { "epoch": 0.11800378125780331, "grad_norm": 0.3002059757709503, "learning_rate": 4.9950588036375996e-05, "loss": 0.2864, "step": 6616 }, { "epoch": 0.118021617379517, "grad_norm": 0.25812119245529175, "learning_rate": 4.995049017478931e-05, "loss": 0.2364, "step": 6617 }, { "epoch": 0.1180394535012307, "grad_norm": 0.23621611297130585, "learning_rate": 4.995039221648592e-05, "loss": 0.2157, "step": 6618 }, { "epoch": 0.11805728962294439, "grad_norm": 0.3015047311782837, "learning_rate": 4.995029416146618e-05, "loss": 0.2681, "step": 6619 }, { "epoch": 0.11807512574465809, "grad_norm": 0.38616377115249634, "learning_rate": 4.995019600973049e-05, "loss": 0.2638, "step": 6620 }, { "epoch": 0.11809296186637178, "grad_norm": 0.2730218768119812, "learning_rate": 4.9950097761279216e-05, "loss": 0.2351, "step": 6621 }, { "epoch": 0.11811079798808546, "grad_norm": 0.3057653307914734, "learning_rate": 4.994999941611275e-05, "loss": 0.2449, "step": 6622 }, { "epoch": 0.11812863410979917, "grad_norm": 0.2960295081138611, "learning_rate": 4.9949900974231466e-05, "loss": 0.2147, "step": 6623 }, { "epoch": 0.11814647023151285, "grad_norm": 0.3486759066581726, "learning_rate": 4.994980243563575e-05, "loss": 0.2918, "step": 6624 }, { "epoch": 0.11816430635322656, "grad_norm": 0.335227906703949, "learning_rate": 4.994970380032599e-05, "loss": 0.2261, "step": 6625 }, { "epoch": 0.11818214247494024, "grad_norm": 0.2913608253002167, "learning_rate": 4.994960506830255e-05, "loss": 0.2308, "step": 6626 }, { "epoch": 0.11819997859665395, "grad_norm": 0.3032926023006439, "learning_rate": 4.9949506239565823e-05, "loss": 0.1946, "step": 6627 }, { "epoch": 0.11821781471836763, "grad_norm": 0.29746878147125244, "learning_rate": 4.99494073141162e-05, "loss": 0.2485, "step": 6628 }, { "epoch": 0.11823565084008134, "grad_norm": 0.24762023985385895, "learning_rate": 4.994930829195405e-05, "loss": 0.1897, "step": 6629 }, { "epoch": 0.11825348696179502, "grad_norm": 0.2895682156085968, "learning_rate": 4.994920917307977e-05, "loss": 0.2041, "step": 6630 }, { "epoch": 0.11827132308350873, "grad_norm": 0.36560821533203125, "learning_rate": 4.994910995749373e-05, "loss": 0.2905, "step": 6631 }, { "epoch": 0.11828915920522241, "grad_norm": 0.247646301984787, "learning_rate": 4.994901064519633e-05, "loss": 0.2463, "step": 6632 }, { "epoch": 0.11830699532693612, "grad_norm": 0.2746172547340393, "learning_rate": 4.994891123618794e-05, "loss": 0.2764, "step": 6633 }, { "epoch": 0.1183248314486498, "grad_norm": 0.2499716579914093, "learning_rate": 4.9948811730468964e-05, "loss": 0.2391, "step": 6634 }, { "epoch": 0.1183426675703635, "grad_norm": 0.2748628556728363, "learning_rate": 4.994871212803977e-05, "loss": 0.2461, "step": 6635 }, { "epoch": 0.11836050369207719, "grad_norm": 0.32685232162475586, "learning_rate": 4.9948612428900755e-05, "loss": 0.2476, "step": 6636 }, { "epoch": 0.1183783398137909, "grad_norm": 0.2630250155925751, "learning_rate": 4.99485126330523e-05, "loss": 0.245, "step": 6637 }, { "epoch": 0.11839617593550458, "grad_norm": 0.214598149061203, "learning_rate": 4.994841274049479e-05, "loss": 0.1883, "step": 6638 }, { "epoch": 0.11841401205721828, "grad_norm": 0.3095002770423889, "learning_rate": 4.994831275122862e-05, "loss": 0.2162, "step": 6639 }, { "epoch": 0.11843184817893197, "grad_norm": 0.3015035390853882, "learning_rate": 4.9948212665254164e-05, "loss": 0.2522, "step": 6640 }, { "epoch": 0.11844968430064566, "grad_norm": 0.22176417708396912, "learning_rate": 4.9948112482571824e-05, "loss": 0.2248, "step": 6641 }, { "epoch": 0.11846752042235936, "grad_norm": 0.17620062828063965, "learning_rate": 4.9948012203181984e-05, "loss": 0.1728, "step": 6642 }, { "epoch": 0.11848535654407305, "grad_norm": 0.266944944858551, "learning_rate": 4.994791182708503e-05, "loss": 0.2401, "step": 6643 }, { "epoch": 0.11850319266578675, "grad_norm": 0.35771605372428894, "learning_rate": 4.9947811354281356e-05, "loss": 0.2796, "step": 6644 }, { "epoch": 0.11852102878750044, "grad_norm": 0.27012234926223755, "learning_rate": 4.994771078477135e-05, "loss": 0.2315, "step": 6645 }, { "epoch": 0.11853886490921414, "grad_norm": 0.3303559124469757, "learning_rate": 4.99476101185554e-05, "loss": 0.2775, "step": 6646 }, { "epoch": 0.11855670103092783, "grad_norm": 0.29805228114128113, "learning_rate": 4.9947509355633885e-05, "loss": 0.2268, "step": 6647 }, { "epoch": 0.11857453715264153, "grad_norm": 0.2512591481208801, "learning_rate": 4.994740849600722e-05, "loss": 0.2129, "step": 6648 }, { "epoch": 0.11859237327435522, "grad_norm": 0.3391852378845215, "learning_rate": 4.994730753967578e-05, "loss": 0.2284, "step": 6649 }, { "epoch": 0.11861020939606892, "grad_norm": 0.22234192490577698, "learning_rate": 4.9947206486639956e-05, "loss": 0.2152, "step": 6650 }, { "epoch": 0.11862804551778261, "grad_norm": 0.2869161367416382, "learning_rate": 4.994710533690015e-05, "loss": 0.164, "step": 6651 }, { "epoch": 0.11864588163949631, "grad_norm": 0.4121025800704956, "learning_rate": 4.994700409045674e-05, "loss": 0.2403, "step": 6652 }, { "epoch": 0.11866371776121, "grad_norm": 0.2744717299938202, "learning_rate": 4.994690274731013e-05, "loss": 0.2896, "step": 6653 }, { "epoch": 0.1186815538829237, "grad_norm": 0.24404260516166687, "learning_rate": 4.9946801307460705e-05, "loss": 0.2179, "step": 6654 }, { "epoch": 0.11869939000463739, "grad_norm": 0.2969261705875397, "learning_rate": 4.994669977090887e-05, "loss": 0.2448, "step": 6655 }, { "epoch": 0.11871722612635109, "grad_norm": 0.31903931498527527, "learning_rate": 4.9946598137655e-05, "loss": 0.2262, "step": 6656 }, { "epoch": 0.11873506224806478, "grad_norm": 0.5626586675643921, "learning_rate": 4.9946496407699505e-05, "loss": 0.2645, "step": 6657 }, { "epoch": 0.11875289836977848, "grad_norm": 0.26961636543273926, "learning_rate": 4.9946394581042766e-05, "loss": 0.2242, "step": 6658 }, { "epoch": 0.11877073449149217, "grad_norm": 0.33832380175590515, "learning_rate": 4.9946292657685194e-05, "loss": 0.2846, "step": 6659 }, { "epoch": 0.11878857061320587, "grad_norm": 0.20829878747463226, "learning_rate": 4.994619063762718e-05, "loss": 0.2231, "step": 6660 }, { "epoch": 0.11880640673491956, "grad_norm": 0.27362382411956787, "learning_rate": 4.994608852086911e-05, "loss": 0.2139, "step": 6661 }, { "epoch": 0.11882424285663325, "grad_norm": 0.2556726634502411, "learning_rate": 4.994598630741137e-05, "loss": 0.241, "step": 6662 }, { "epoch": 0.11884207897834695, "grad_norm": 0.2956306040287018, "learning_rate": 4.9945883997254395e-05, "loss": 0.317, "step": 6663 }, { "epoch": 0.11885991510006064, "grad_norm": 0.29402780532836914, "learning_rate": 4.9945781590398546e-05, "loss": 0.242, "step": 6664 }, { "epoch": 0.11887775122177434, "grad_norm": 0.22185377776622772, "learning_rate": 4.994567908684423e-05, "loss": 0.2097, "step": 6665 }, { "epoch": 0.11889558734348803, "grad_norm": 0.3091468811035156, "learning_rate": 4.994557648659185e-05, "loss": 0.2363, "step": 6666 }, { "epoch": 0.11891342346520173, "grad_norm": 0.31103816628456116, "learning_rate": 4.9945473789641794e-05, "loss": 0.287, "step": 6667 }, { "epoch": 0.11893125958691542, "grad_norm": 0.30507156252861023, "learning_rate": 4.994537099599447e-05, "loss": 0.3044, "step": 6668 }, { "epoch": 0.11894909570862912, "grad_norm": 0.34385067224502563, "learning_rate": 4.9945268105650274e-05, "loss": 0.2604, "step": 6669 }, { "epoch": 0.1189669318303428, "grad_norm": 0.2492048591375351, "learning_rate": 4.99451651186096e-05, "loss": 0.2526, "step": 6670 }, { "epoch": 0.11898476795205651, "grad_norm": 0.32889285683631897, "learning_rate": 4.994506203487285e-05, "loss": 0.2379, "step": 6671 }, { "epoch": 0.1190026040737702, "grad_norm": 0.26086732745170593, "learning_rate": 4.994495885444043e-05, "loss": 0.2191, "step": 6672 }, { "epoch": 0.1190204401954839, "grad_norm": 0.3591679036617279, "learning_rate": 4.994485557731272e-05, "loss": 0.2758, "step": 6673 }, { "epoch": 0.11903827631719759, "grad_norm": 0.29329365491867065, "learning_rate": 4.9944752203490144e-05, "loss": 0.2618, "step": 6674 }, { "epoch": 0.11905611243891129, "grad_norm": 0.34661364555358887, "learning_rate": 4.994464873297309e-05, "loss": 0.2673, "step": 6675 }, { "epoch": 0.11907394856062498, "grad_norm": 0.2907252311706543, "learning_rate": 4.994454516576197e-05, "loss": 0.253, "step": 6676 }, { "epoch": 0.11909178468233868, "grad_norm": 0.3732890486717224, "learning_rate": 4.994444150185716e-05, "loss": 0.2695, "step": 6677 }, { "epoch": 0.11910962080405237, "grad_norm": 0.24741658568382263, "learning_rate": 4.9944337741259095e-05, "loss": 0.2255, "step": 6678 }, { "epoch": 0.11912745692576607, "grad_norm": 0.35463738441467285, "learning_rate": 4.9944233883968163e-05, "loss": 0.2489, "step": 6679 }, { "epoch": 0.11914529304747976, "grad_norm": 0.28461650013923645, "learning_rate": 4.994412992998475e-05, "loss": 0.2562, "step": 6680 }, { "epoch": 0.11916312916919346, "grad_norm": 0.4393540024757385, "learning_rate": 4.994402587930928e-05, "loss": 0.2461, "step": 6681 }, { "epoch": 0.11918096529090715, "grad_norm": 0.24490773677825928, "learning_rate": 4.9943921731942155e-05, "loss": 0.2249, "step": 6682 }, { "epoch": 0.11919880141262083, "grad_norm": 0.23941229283809662, "learning_rate": 4.994381748788377e-05, "loss": 0.1848, "step": 6683 }, { "epoch": 0.11921663753433454, "grad_norm": 0.3012526035308838, "learning_rate": 4.994371314713454e-05, "loss": 0.2049, "step": 6684 }, { "epoch": 0.11923447365604822, "grad_norm": 0.45927903056144714, "learning_rate": 4.994360870969486e-05, "loss": 0.2074, "step": 6685 }, { "epoch": 0.11925230977776193, "grad_norm": 0.5445613861083984, "learning_rate": 4.9943504175565134e-05, "loss": 0.245, "step": 6686 }, { "epoch": 0.11927014589947561, "grad_norm": 0.3541640043258667, "learning_rate": 4.9943399544745765e-05, "loss": 0.2455, "step": 6687 }, { "epoch": 0.11928798202118931, "grad_norm": 0.2198963761329651, "learning_rate": 4.994329481723717e-05, "loss": 0.2239, "step": 6688 }, { "epoch": 0.119305818142903, "grad_norm": 0.2399929016828537, "learning_rate": 4.994318999303975e-05, "loss": 0.1995, "step": 6689 }, { "epoch": 0.1193236542646167, "grad_norm": 0.30972346663475037, "learning_rate": 4.994308507215392e-05, "loss": 0.1912, "step": 6690 }, { "epoch": 0.11934149038633039, "grad_norm": 0.25458747148513794, "learning_rate": 4.994298005458006e-05, "loss": 0.2028, "step": 6691 }, { "epoch": 0.1193593265080441, "grad_norm": 0.31524714827537537, "learning_rate": 4.99428749403186e-05, "loss": 0.2667, "step": 6692 }, { "epoch": 0.11937716262975778, "grad_norm": 0.26774513721466064, "learning_rate": 4.994276972936994e-05, "loss": 0.259, "step": 6693 }, { "epoch": 0.11939499875147148, "grad_norm": 0.38334810733795166, "learning_rate": 4.99426644217345e-05, "loss": 0.2902, "step": 6694 }, { "epoch": 0.11941283487318517, "grad_norm": 0.2599673867225647, "learning_rate": 4.994255901741267e-05, "loss": 0.2221, "step": 6695 }, { "epoch": 0.11943067099489887, "grad_norm": 0.3482665419578552, "learning_rate": 4.994245351640486e-05, "loss": 0.2359, "step": 6696 }, { "epoch": 0.11944850711661256, "grad_norm": 0.34844645857810974, "learning_rate": 4.99423479187115e-05, "loss": 0.271, "step": 6697 }, { "epoch": 0.11946634323832626, "grad_norm": 0.3628528118133545, "learning_rate": 4.9942242224332975e-05, "loss": 0.2892, "step": 6698 }, { "epoch": 0.11948417936003995, "grad_norm": 0.2471059113740921, "learning_rate": 4.99421364332697e-05, "loss": 0.2482, "step": 6699 }, { "epoch": 0.11950201548175365, "grad_norm": 0.3259129226207733, "learning_rate": 4.99420305455221e-05, "loss": 0.3085, "step": 6700 }, { "epoch": 0.11951985160346734, "grad_norm": 0.3657534420490265, "learning_rate": 4.994192456109057e-05, "loss": 0.2476, "step": 6701 }, { "epoch": 0.11953768772518103, "grad_norm": 0.29605263471603394, "learning_rate": 4.9941818479975535e-05, "loss": 0.2237, "step": 6702 }, { "epoch": 0.11955552384689473, "grad_norm": 0.32068613171577454, "learning_rate": 4.994171230217738e-05, "loss": 0.2713, "step": 6703 }, { "epoch": 0.11957335996860842, "grad_norm": 0.27349868416786194, "learning_rate": 4.994160602769654e-05, "loss": 0.2632, "step": 6704 }, { "epoch": 0.11959119609032212, "grad_norm": 0.30878397822380066, "learning_rate": 4.994149965653343e-05, "loss": 0.2176, "step": 6705 }, { "epoch": 0.11960903221203581, "grad_norm": 0.27538296580314636, "learning_rate": 4.9941393188688444e-05, "loss": 0.2307, "step": 6706 }, { "epoch": 0.11962686833374951, "grad_norm": 0.2698029577732086, "learning_rate": 4.9941286624162e-05, "loss": 0.2221, "step": 6707 }, { "epoch": 0.1196447044554632, "grad_norm": 0.29200440645217896, "learning_rate": 4.994117996295452e-05, "loss": 0.1846, "step": 6708 }, { "epoch": 0.1196625405771769, "grad_norm": 0.21572646498680115, "learning_rate": 4.9941073205066414e-05, "loss": 0.2047, "step": 6709 }, { "epoch": 0.11968037669889059, "grad_norm": 0.3035578727722168, "learning_rate": 4.994096635049809e-05, "loss": 0.2245, "step": 6710 }, { "epoch": 0.11969821282060429, "grad_norm": 0.27503350377082825, "learning_rate": 4.9940859399249965e-05, "loss": 0.2405, "step": 6711 }, { "epoch": 0.11971604894231798, "grad_norm": 0.268382728099823, "learning_rate": 4.994075235132246e-05, "loss": 0.2561, "step": 6712 }, { "epoch": 0.11973388506403168, "grad_norm": 0.2590249478816986, "learning_rate": 4.994064520671598e-05, "loss": 0.2279, "step": 6713 }, { "epoch": 0.11975172118574537, "grad_norm": 0.2540910840034485, "learning_rate": 4.9940537965430943e-05, "loss": 0.2034, "step": 6714 }, { "epoch": 0.11976955730745907, "grad_norm": 0.24925297498703003, "learning_rate": 4.994043062746778e-05, "loss": 0.2024, "step": 6715 }, { "epoch": 0.11978739342917276, "grad_norm": 0.26008927822113037, "learning_rate": 4.994032319282688e-05, "loss": 0.2176, "step": 6716 }, { "epoch": 0.11980522955088646, "grad_norm": 0.35265469551086426, "learning_rate": 4.994021566150868e-05, "loss": 0.2135, "step": 6717 }, { "epoch": 0.11982306567260015, "grad_norm": 0.36726319789886475, "learning_rate": 4.9940108033513585e-05, "loss": 0.1925, "step": 6718 }, { "epoch": 0.11984090179431385, "grad_norm": 0.33205559849739075, "learning_rate": 4.9940000308842015e-05, "loss": 0.3123, "step": 6719 }, { "epoch": 0.11985873791602754, "grad_norm": 0.29143860936164856, "learning_rate": 4.99398924874944e-05, "loss": 0.2415, "step": 6720 }, { "epoch": 0.11987657403774124, "grad_norm": 0.2193698287010193, "learning_rate": 4.9939784569471135e-05, "loss": 0.2014, "step": 6721 }, { "epoch": 0.11989441015945493, "grad_norm": 0.37905389070510864, "learning_rate": 4.9939676554772665e-05, "loss": 0.2994, "step": 6722 }, { "epoch": 0.11991224628116862, "grad_norm": 0.3636080324649811, "learning_rate": 4.9939568443399384e-05, "loss": 0.2016, "step": 6723 }, { "epoch": 0.11993008240288232, "grad_norm": 0.27542924880981445, "learning_rate": 4.993946023535173e-05, "loss": 0.2301, "step": 6724 }, { "epoch": 0.119947918524596, "grad_norm": 0.30708178877830505, "learning_rate": 4.993935193063011e-05, "loss": 0.2058, "step": 6725 }, { "epoch": 0.11996575464630971, "grad_norm": 0.3631151020526886, "learning_rate": 4.993924352923495e-05, "loss": 0.1853, "step": 6726 }, { "epoch": 0.1199835907680234, "grad_norm": 0.31861039996147156, "learning_rate": 4.993913503116666e-05, "loss": 0.2565, "step": 6727 }, { "epoch": 0.1200014268897371, "grad_norm": 0.3508286774158478, "learning_rate": 4.993902643642568e-05, "loss": 0.2511, "step": 6728 }, { "epoch": 0.12001926301145079, "grad_norm": 0.26951679587364197, "learning_rate": 4.993891774501241e-05, "loss": 0.1625, "step": 6729 }, { "epoch": 0.12003709913316449, "grad_norm": 0.49868273735046387, "learning_rate": 4.993880895692729e-05, "loss": 0.2422, "step": 6730 }, { "epoch": 0.12005493525487818, "grad_norm": 0.27037322521209717, "learning_rate": 4.993870007217073e-05, "loss": 0.2366, "step": 6731 }, { "epoch": 0.12007277137659188, "grad_norm": 0.29594314098358154, "learning_rate": 4.993859109074315e-05, "loss": 0.2601, "step": 6732 }, { "epoch": 0.12009060749830557, "grad_norm": 0.32387587428092957, "learning_rate": 4.993848201264498e-05, "loss": 0.2454, "step": 6733 }, { "epoch": 0.12010844362001927, "grad_norm": 0.29027819633483887, "learning_rate": 4.993837283787664e-05, "loss": 0.1717, "step": 6734 }, { "epoch": 0.12012627974173296, "grad_norm": 0.24900208413600922, "learning_rate": 4.993826356643856e-05, "loss": 0.2277, "step": 6735 }, { "epoch": 0.12014411586344666, "grad_norm": 0.29552027583122253, "learning_rate": 4.9938154198331155e-05, "loss": 0.216, "step": 6736 }, { "epoch": 0.12016195198516035, "grad_norm": 0.39090508222579956, "learning_rate": 4.993804473355485e-05, "loss": 0.283, "step": 6737 }, { "epoch": 0.12017978810687405, "grad_norm": 0.4318279027938843, "learning_rate": 4.9937935172110065e-05, "loss": 0.2041, "step": 6738 }, { "epoch": 0.12019762422858773, "grad_norm": 0.36057740449905396, "learning_rate": 4.993782551399724e-05, "loss": 0.2527, "step": 6739 }, { "epoch": 0.12021546035030144, "grad_norm": 0.7333322167396545, "learning_rate": 4.993771575921678e-05, "loss": 0.225, "step": 6740 }, { "epoch": 0.12023329647201512, "grad_norm": 0.3208678364753723, "learning_rate": 4.993760590776913e-05, "loss": 0.2134, "step": 6741 }, { "epoch": 0.12025113259372881, "grad_norm": 0.33157041668891907, "learning_rate": 4.993749595965469e-05, "loss": 0.2635, "step": 6742 }, { "epoch": 0.12026896871544251, "grad_norm": 0.22630712389945984, "learning_rate": 4.9937385914873916e-05, "loss": 0.2186, "step": 6743 }, { "epoch": 0.1202868048371562, "grad_norm": 0.37185901403427124, "learning_rate": 4.993727577342722e-05, "loss": 0.1725, "step": 6744 }, { "epoch": 0.1203046409588699, "grad_norm": 0.2303929626941681, "learning_rate": 4.993716553531503e-05, "loss": 0.2277, "step": 6745 }, { "epoch": 0.12032247708058359, "grad_norm": 0.3440285325050354, "learning_rate": 4.993705520053777e-05, "loss": 0.2769, "step": 6746 }, { "epoch": 0.1203403132022973, "grad_norm": 0.39371126890182495, "learning_rate": 4.9936944769095874e-05, "loss": 0.2804, "step": 6747 }, { "epoch": 0.12035814932401098, "grad_norm": 0.3641282618045807, "learning_rate": 4.993683424098976e-05, "loss": 0.3108, "step": 6748 }, { "epoch": 0.12037598544572468, "grad_norm": 0.3169698715209961, "learning_rate": 4.993672361621987e-05, "loss": 0.3041, "step": 6749 }, { "epoch": 0.12039382156743837, "grad_norm": 0.42722922563552856, "learning_rate": 4.993661289478663e-05, "loss": 0.2841, "step": 6750 }, { "epoch": 0.12041165768915207, "grad_norm": 0.3565793037414551, "learning_rate": 4.993650207669046e-05, "loss": 0.2598, "step": 6751 }, { "epoch": 0.12042949381086576, "grad_norm": 0.4076734185218811, "learning_rate": 4.99363911619318e-05, "loss": 0.2926, "step": 6752 }, { "epoch": 0.12044732993257946, "grad_norm": 0.2621926963329315, "learning_rate": 4.993628015051107e-05, "loss": 0.2074, "step": 6753 }, { "epoch": 0.12046516605429315, "grad_norm": 0.2789039611816406, "learning_rate": 4.993616904242871e-05, "loss": 0.2603, "step": 6754 }, { "epoch": 0.12048300217600685, "grad_norm": 0.23423604667186737, "learning_rate": 4.993605783768514e-05, "loss": 0.221, "step": 6755 }, { "epoch": 0.12050083829772054, "grad_norm": 0.37556326389312744, "learning_rate": 4.99359465362808e-05, "loss": 0.2258, "step": 6756 }, { "epoch": 0.12051867441943424, "grad_norm": 0.29016271233558655, "learning_rate": 4.993583513821612e-05, "loss": 0.2374, "step": 6757 }, { "epoch": 0.12053651054114793, "grad_norm": 0.3432520925998688, "learning_rate": 4.9935723643491526e-05, "loss": 0.2683, "step": 6758 }, { "epoch": 0.12055434666286163, "grad_norm": 0.3250076174736023, "learning_rate": 4.9935612052107464e-05, "loss": 0.2431, "step": 6759 }, { "epoch": 0.12057218278457532, "grad_norm": 0.3131701350212097, "learning_rate": 4.9935500364064346e-05, "loss": 0.3196, "step": 6760 }, { "epoch": 0.12059001890628902, "grad_norm": 0.36178430914878845, "learning_rate": 4.9935388579362625e-05, "loss": 0.2442, "step": 6761 }, { "epoch": 0.12060785502800271, "grad_norm": 0.4066222906112671, "learning_rate": 4.993527669800272e-05, "loss": 0.3069, "step": 6762 }, { "epoch": 0.1206256911497164, "grad_norm": 0.21849587559700012, "learning_rate": 4.993516471998507e-05, "loss": 0.2223, "step": 6763 }, { "epoch": 0.1206435272714301, "grad_norm": 0.3107141852378845, "learning_rate": 4.993505264531012e-05, "loss": 0.2641, "step": 6764 }, { "epoch": 0.12066136339314379, "grad_norm": 0.37780794501304626, "learning_rate": 4.993494047397828e-05, "loss": 0.2627, "step": 6765 }, { "epoch": 0.12067919951485749, "grad_norm": 0.49505695700645447, "learning_rate": 4.993482820599e-05, "loss": 0.2632, "step": 6766 }, { "epoch": 0.12069703563657118, "grad_norm": 0.3290885090827942, "learning_rate": 4.993471584134573e-05, "loss": 0.2578, "step": 6767 }, { "epoch": 0.12071487175828488, "grad_norm": 0.3457907736301422, "learning_rate": 4.9934603380045865e-05, "loss": 0.2594, "step": 6768 }, { "epoch": 0.12073270787999857, "grad_norm": 0.3863312602043152, "learning_rate": 4.993449082209088e-05, "loss": 0.2666, "step": 6769 }, { "epoch": 0.12075054400171227, "grad_norm": 0.30412641167640686, "learning_rate": 4.993437816748119e-05, "loss": 0.2323, "step": 6770 }, { "epoch": 0.12076838012342596, "grad_norm": 0.6357429623603821, "learning_rate": 4.993426541621724e-05, "loss": 0.3115, "step": 6771 }, { "epoch": 0.12078621624513966, "grad_norm": 0.2708114981651306, "learning_rate": 4.993415256829947e-05, "loss": 0.2863, "step": 6772 }, { "epoch": 0.12080405236685335, "grad_norm": 0.20062050223350525, "learning_rate": 4.993403962372831e-05, "loss": 0.1997, "step": 6773 }, { "epoch": 0.12082188848856705, "grad_norm": 0.32945525646209717, "learning_rate": 4.9933926582504196e-05, "loss": 0.2578, "step": 6774 }, { "epoch": 0.12083972461028074, "grad_norm": 0.3725665807723999, "learning_rate": 4.993381344462757e-05, "loss": 0.2429, "step": 6775 }, { "epoch": 0.12085756073199444, "grad_norm": 0.229294091463089, "learning_rate": 4.9933700210098885e-05, "loss": 0.18, "step": 6776 }, { "epoch": 0.12087539685370813, "grad_norm": 0.36206942796707153, "learning_rate": 4.9933586878918555e-05, "loss": 0.3436, "step": 6777 }, { "epoch": 0.12089323297542183, "grad_norm": 0.26434171199798584, "learning_rate": 4.993347345108703e-05, "loss": 0.2589, "step": 6778 }, { "epoch": 0.12091106909713552, "grad_norm": 0.23119845986366272, "learning_rate": 4.9933359926604754e-05, "loss": 0.245, "step": 6779 }, { "epoch": 0.12092890521884922, "grad_norm": 0.23035800457000732, "learning_rate": 4.993324630547216e-05, "loss": 0.2051, "step": 6780 }, { "epoch": 0.12094674134056291, "grad_norm": 0.21824614703655243, "learning_rate": 4.993313258768969e-05, "loss": 0.1873, "step": 6781 }, { "epoch": 0.1209645774622766, "grad_norm": 0.24966119229793549, "learning_rate": 4.993301877325779e-05, "loss": 0.2338, "step": 6782 }, { "epoch": 0.1209824135839903, "grad_norm": 0.24788899719715118, "learning_rate": 4.99329048621769e-05, "loss": 0.2228, "step": 6783 }, { "epoch": 0.12100024970570399, "grad_norm": 0.336873322725296, "learning_rate": 4.993279085444745e-05, "loss": 0.1714, "step": 6784 }, { "epoch": 0.12101808582741769, "grad_norm": 0.26959362626075745, "learning_rate": 4.9932676750069906e-05, "loss": 0.2045, "step": 6785 }, { "epoch": 0.12103592194913138, "grad_norm": 0.24538543820381165, "learning_rate": 4.993256254904468e-05, "loss": 0.1682, "step": 6786 }, { "epoch": 0.12105375807084508, "grad_norm": 0.27018868923187256, "learning_rate": 4.993244825137224e-05, "loss": 0.2366, "step": 6787 }, { "epoch": 0.12107159419255877, "grad_norm": 0.30733153223991394, "learning_rate": 4.9932333857053015e-05, "loss": 0.2237, "step": 6788 }, { "epoch": 0.12108943031427247, "grad_norm": 0.25243696570396423, "learning_rate": 4.993221936608746e-05, "loss": 0.2187, "step": 6789 }, { "epoch": 0.12110726643598616, "grad_norm": 0.4344727098941803, "learning_rate": 4.9932104778476005e-05, "loss": 0.2699, "step": 6790 }, { "epoch": 0.12112510255769986, "grad_norm": 0.3126193583011627, "learning_rate": 4.9931990094219095e-05, "loss": 0.2161, "step": 6791 }, { "epoch": 0.12114293867941354, "grad_norm": 0.34603577852249146, "learning_rate": 4.993187531331719e-05, "loss": 0.2325, "step": 6792 }, { "epoch": 0.12116077480112725, "grad_norm": 0.46902620792388916, "learning_rate": 4.993176043577072e-05, "loss": 0.3418, "step": 6793 }, { "epoch": 0.12117861092284093, "grad_norm": 0.25489768385887146, "learning_rate": 4.993164546158013e-05, "loss": 0.2325, "step": 6794 }, { "epoch": 0.12119644704455464, "grad_norm": 0.27331244945526123, "learning_rate": 4.9931530390745884e-05, "loss": 0.2092, "step": 6795 }, { "epoch": 0.12121428316626832, "grad_norm": 0.28268101811408997, "learning_rate": 4.993141522326841e-05, "loss": 0.1951, "step": 6796 }, { "epoch": 0.12123211928798203, "grad_norm": 0.24887113273143768, "learning_rate": 4.993129995914816e-05, "loss": 0.1944, "step": 6797 }, { "epoch": 0.12124995540969571, "grad_norm": 0.2970426380634308, "learning_rate": 4.9931184598385575e-05, "loss": 0.2731, "step": 6798 }, { "epoch": 0.12126779153140942, "grad_norm": 0.2415713667869568, "learning_rate": 4.9931069140981115e-05, "loss": 0.2219, "step": 6799 }, { "epoch": 0.1212856276531231, "grad_norm": 0.3172830641269684, "learning_rate": 4.9930953586935216e-05, "loss": 0.2126, "step": 6800 }, { "epoch": 0.1213034637748368, "grad_norm": 0.3285074830055237, "learning_rate": 4.993083793624833e-05, "loss": 0.2399, "step": 6801 }, { "epoch": 0.1213212998965505, "grad_norm": 0.2567670941352844, "learning_rate": 4.99307221889209e-05, "loss": 0.1948, "step": 6802 }, { "epoch": 0.12133913601826418, "grad_norm": 0.23109138011932373, "learning_rate": 4.993060634495339e-05, "loss": 0.2142, "step": 6803 }, { "epoch": 0.12135697213997788, "grad_norm": 0.21992535889148712, "learning_rate": 4.993049040434623e-05, "loss": 0.1925, "step": 6804 }, { "epoch": 0.12137480826169157, "grad_norm": 0.276203453540802, "learning_rate": 4.9930374367099886e-05, "loss": 0.2182, "step": 6805 }, { "epoch": 0.12139264438340527, "grad_norm": 0.24764001369476318, "learning_rate": 4.99302582332148e-05, "loss": 0.2153, "step": 6806 }, { "epoch": 0.12141048050511896, "grad_norm": 0.35662516951560974, "learning_rate": 4.9930142002691416e-05, "loss": 0.2543, "step": 6807 }, { "epoch": 0.12142831662683266, "grad_norm": 0.3767130672931671, "learning_rate": 4.99300256755302e-05, "loss": 0.2175, "step": 6808 }, { "epoch": 0.12144615274854635, "grad_norm": 0.33303138613700867, "learning_rate": 4.992990925173159e-05, "loss": 0.2623, "step": 6809 }, { "epoch": 0.12146398887026005, "grad_norm": 0.24306745827198029, "learning_rate": 4.9929792731296035e-05, "loss": 0.2644, "step": 6810 }, { "epoch": 0.12148182499197374, "grad_norm": 0.45018666982650757, "learning_rate": 4.9929676114224e-05, "loss": 0.2324, "step": 6811 }, { "epoch": 0.12149966111368744, "grad_norm": 0.2308064103126526, "learning_rate": 4.992955940051593e-05, "loss": 0.1957, "step": 6812 }, { "epoch": 0.12151749723540113, "grad_norm": 0.24440710246562958, "learning_rate": 4.992944259017227e-05, "loss": 0.2413, "step": 6813 }, { "epoch": 0.12153533335711483, "grad_norm": 0.2904827296733856, "learning_rate": 4.992932568319349e-05, "loss": 0.2265, "step": 6814 }, { "epoch": 0.12155316947882852, "grad_norm": 0.29666444659233093, "learning_rate": 4.9929208679580034e-05, "loss": 0.2269, "step": 6815 }, { "epoch": 0.12157100560054222, "grad_norm": 0.24752987921237946, "learning_rate": 4.992909157933234e-05, "loss": 0.2141, "step": 6816 }, { "epoch": 0.12158884172225591, "grad_norm": 0.30989548563957214, "learning_rate": 4.992897438245089e-05, "loss": 0.2451, "step": 6817 }, { "epoch": 0.12160667784396961, "grad_norm": 0.353629469871521, "learning_rate": 4.992885708893612e-05, "loss": 0.2336, "step": 6818 }, { "epoch": 0.1216245139656833, "grad_norm": 0.2712937593460083, "learning_rate": 4.9928739698788495e-05, "loss": 0.2644, "step": 6819 }, { "epoch": 0.121642350087397, "grad_norm": 0.33031079173088074, "learning_rate": 4.992862221200846e-05, "loss": 0.2104, "step": 6820 }, { "epoch": 0.12166018620911069, "grad_norm": 0.3419361412525177, "learning_rate": 4.992850462859647e-05, "loss": 0.231, "step": 6821 }, { "epoch": 0.12167802233082439, "grad_norm": 0.4428279995918274, "learning_rate": 4.992838694855299e-05, "loss": 0.2102, "step": 6822 }, { "epoch": 0.12169585845253808, "grad_norm": 0.3489314615726471, "learning_rate": 4.9928269171878485e-05, "loss": 0.2683, "step": 6823 }, { "epoch": 0.12171369457425177, "grad_norm": 0.2731943726539612, "learning_rate": 4.992815129857339e-05, "loss": 0.2125, "step": 6824 }, { "epoch": 0.12173153069596547, "grad_norm": 0.31702354550361633, "learning_rate": 4.992803332863817e-05, "loss": 0.229, "step": 6825 }, { "epoch": 0.12174936681767916, "grad_norm": 0.2625712454319, "learning_rate": 4.9927915262073276e-05, "loss": 0.2298, "step": 6826 }, { "epoch": 0.12176720293939286, "grad_norm": 0.30082428455352783, "learning_rate": 4.992779709887918e-05, "loss": 0.222, "step": 6827 }, { "epoch": 0.12178503906110655, "grad_norm": 0.28463712334632874, "learning_rate": 4.9927678839056336e-05, "loss": 0.2292, "step": 6828 }, { "epoch": 0.12180287518282025, "grad_norm": 0.20966772735118866, "learning_rate": 4.992756048260519e-05, "loss": 0.2277, "step": 6829 }, { "epoch": 0.12182071130453394, "grad_norm": 0.31907060742378235, "learning_rate": 4.9927442029526214e-05, "loss": 0.1992, "step": 6830 }, { "epoch": 0.12183854742624764, "grad_norm": 0.33759158849716187, "learning_rate": 4.992732347981987e-05, "loss": 0.2195, "step": 6831 }, { "epoch": 0.12185638354796133, "grad_norm": 0.21873730421066284, "learning_rate": 4.9927204833486596e-05, "loss": 0.2156, "step": 6832 }, { "epoch": 0.12187421966967503, "grad_norm": 0.40703877806663513, "learning_rate": 4.992708609052688e-05, "loss": 0.2315, "step": 6833 }, { "epoch": 0.12189205579138872, "grad_norm": 0.2686084806919098, "learning_rate": 4.992696725094116e-05, "loss": 0.2249, "step": 6834 }, { "epoch": 0.12190989191310242, "grad_norm": 0.3546275496482849, "learning_rate": 4.9926848314729914e-05, "loss": 0.2807, "step": 6835 }, { "epoch": 0.12192772803481611, "grad_norm": 0.28341612219810486, "learning_rate": 4.992672928189358e-05, "loss": 0.2633, "step": 6836 }, { "epoch": 0.12194556415652981, "grad_norm": 0.2698984444141388, "learning_rate": 4.9926610152432644e-05, "loss": 0.2086, "step": 6837 }, { "epoch": 0.1219634002782435, "grad_norm": 0.3039058744907379, "learning_rate": 4.992649092634756e-05, "loss": 0.2717, "step": 6838 }, { "epoch": 0.1219812363999572, "grad_norm": 0.19431540369987488, "learning_rate": 4.9926371603638786e-05, "loss": 0.1906, "step": 6839 }, { "epoch": 0.12199907252167089, "grad_norm": 0.3405723571777344, "learning_rate": 4.9926252184306785e-05, "loss": 0.2554, "step": 6840 }, { "epoch": 0.12201690864338459, "grad_norm": 0.2744424343109131, "learning_rate": 4.992613266835202e-05, "loss": 0.1976, "step": 6841 }, { "epoch": 0.12203474476509828, "grad_norm": 0.4421294033527374, "learning_rate": 4.9926013055774956e-05, "loss": 0.2431, "step": 6842 }, { "epoch": 0.12205258088681196, "grad_norm": 0.2360885590314865, "learning_rate": 4.9925893346576056e-05, "loss": 0.2236, "step": 6843 }, { "epoch": 0.12207041700852567, "grad_norm": 0.364916056394577, "learning_rate": 4.992577354075578e-05, "loss": 0.2304, "step": 6844 }, { "epoch": 0.12208825313023935, "grad_norm": 0.33794131875038147, "learning_rate": 4.9925653638314603e-05, "loss": 0.2657, "step": 6845 }, { "epoch": 0.12210608925195306, "grad_norm": 0.32369041442871094, "learning_rate": 4.9925533639252986e-05, "loss": 0.2263, "step": 6846 }, { "epoch": 0.12212392537366674, "grad_norm": 0.351128488779068, "learning_rate": 4.992541354357138e-05, "loss": 0.2544, "step": 6847 }, { "epoch": 0.12214176149538045, "grad_norm": 0.2790156602859497, "learning_rate": 4.992529335127028e-05, "loss": 0.2653, "step": 6848 }, { "epoch": 0.12215959761709413, "grad_norm": 0.24617858231067657, "learning_rate": 4.992517306235012e-05, "loss": 0.2477, "step": 6849 }, { "epoch": 0.12217743373880784, "grad_norm": 0.24819330871105194, "learning_rate": 4.992505267681139e-05, "loss": 0.2145, "step": 6850 }, { "epoch": 0.12219526986052152, "grad_norm": 0.2965717613697052, "learning_rate": 4.992493219465454e-05, "loss": 0.1964, "step": 6851 }, { "epoch": 0.12221310598223523, "grad_norm": 0.4467228651046753, "learning_rate": 4.992481161588004e-05, "loss": 0.2555, "step": 6852 }, { "epoch": 0.12223094210394891, "grad_norm": 0.36978498101234436, "learning_rate": 4.9924690940488375e-05, "loss": 0.3208, "step": 6853 }, { "epoch": 0.12224877822566262, "grad_norm": 0.40889060497283936, "learning_rate": 4.992457016847999e-05, "loss": 0.2466, "step": 6854 }, { "epoch": 0.1222666143473763, "grad_norm": 0.27608317136764526, "learning_rate": 4.9924449299855355e-05, "loss": 0.2297, "step": 6855 }, { "epoch": 0.12228445046909, "grad_norm": 0.4811840355396271, "learning_rate": 4.9924328334614954e-05, "loss": 0.2224, "step": 6856 }, { "epoch": 0.1223022865908037, "grad_norm": 0.3006702661514282, "learning_rate": 4.992420727275926e-05, "loss": 0.2652, "step": 6857 }, { "epoch": 0.1223201227125174, "grad_norm": 0.3643052279949188, "learning_rate": 4.992408611428871e-05, "loss": 0.2831, "step": 6858 }, { "epoch": 0.12233795883423108, "grad_norm": 0.3541288375854492, "learning_rate": 4.99239648592038e-05, "loss": 0.31, "step": 6859 }, { "epoch": 0.12235579495594479, "grad_norm": 0.2782037556171417, "learning_rate": 4.9923843507505e-05, "loss": 0.2041, "step": 6860 }, { "epoch": 0.12237363107765847, "grad_norm": 0.3100832998752594, "learning_rate": 4.992372205919277e-05, "loss": 0.2858, "step": 6861 }, { "epoch": 0.12239146719937218, "grad_norm": 0.3420276641845703, "learning_rate": 4.992360051426759e-05, "loss": 0.2443, "step": 6862 }, { "epoch": 0.12240930332108586, "grad_norm": 0.3606860637664795, "learning_rate": 4.992347887272991e-05, "loss": 0.309, "step": 6863 }, { "epoch": 0.12242713944279955, "grad_norm": 0.3299979865550995, "learning_rate": 4.992335713458023e-05, "loss": 0.277, "step": 6864 }, { "epoch": 0.12244497556451325, "grad_norm": 0.3540804088115692, "learning_rate": 4.992323529981901e-05, "loss": 0.2813, "step": 6865 }, { "epoch": 0.12246281168622694, "grad_norm": 0.315603643655777, "learning_rate": 4.992311336844672e-05, "loss": 0.2417, "step": 6866 }, { "epoch": 0.12248064780794064, "grad_norm": 0.3033362627029419, "learning_rate": 4.9922991340463834e-05, "loss": 0.2342, "step": 6867 }, { "epoch": 0.12249848392965433, "grad_norm": 0.2897791266441345, "learning_rate": 4.9922869215870824e-05, "loss": 0.2639, "step": 6868 }, { "epoch": 0.12251632005136803, "grad_norm": 0.3022953271865845, "learning_rate": 4.992274699466817e-05, "loss": 0.1963, "step": 6869 }, { "epoch": 0.12253415617308172, "grad_norm": 0.24022029340267181, "learning_rate": 4.992262467685633e-05, "loss": 0.2159, "step": 6870 }, { "epoch": 0.12255199229479542, "grad_norm": 0.2967076897621155, "learning_rate": 4.99225022624358e-05, "loss": 0.2484, "step": 6871 }, { "epoch": 0.12256982841650911, "grad_norm": 0.3520475924015045, "learning_rate": 4.9922379751407045e-05, "loss": 0.2256, "step": 6872 }, { "epoch": 0.12258766453822281, "grad_norm": 0.30049577355384827, "learning_rate": 4.992225714377053e-05, "loss": 0.1818, "step": 6873 }, { "epoch": 0.1226055006599365, "grad_norm": 0.2451341450214386, "learning_rate": 4.992213443952674e-05, "loss": 0.1951, "step": 6874 }, { "epoch": 0.1226233367816502, "grad_norm": 0.3067533075809479, "learning_rate": 4.992201163867615e-05, "loss": 0.2347, "step": 6875 }, { "epoch": 0.12264117290336389, "grad_norm": 0.2550610899925232, "learning_rate": 4.9921888741219234e-05, "loss": 0.2227, "step": 6876 }, { "epoch": 0.12265900902507759, "grad_norm": 0.36997440457344055, "learning_rate": 4.992176574715647e-05, "loss": 0.2883, "step": 6877 }, { "epoch": 0.12267684514679128, "grad_norm": 0.24751408398151398, "learning_rate": 4.9921642656488334e-05, "loss": 0.2605, "step": 6878 }, { "epoch": 0.12269468126850498, "grad_norm": 0.3025315999984741, "learning_rate": 4.99215194692153e-05, "loss": 0.2219, "step": 6879 }, { "epoch": 0.12271251739021867, "grad_norm": 0.29911789298057556, "learning_rate": 4.992139618533785e-05, "loss": 0.1984, "step": 6880 }, { "epoch": 0.12273035351193237, "grad_norm": 0.37772804498672485, "learning_rate": 4.992127280485647e-05, "loss": 0.2645, "step": 6881 }, { "epoch": 0.12274818963364606, "grad_norm": 0.2674390971660614, "learning_rate": 4.992114932777162e-05, "loss": 0.2172, "step": 6882 }, { "epoch": 0.12276602575535975, "grad_norm": 0.3375721871852875, "learning_rate": 4.9921025754083794e-05, "loss": 0.3013, "step": 6883 }, { "epoch": 0.12278386187707345, "grad_norm": 0.23008044064044952, "learning_rate": 4.992090208379346e-05, "loss": 0.2178, "step": 6884 }, { "epoch": 0.12280169799878714, "grad_norm": 0.3182332217693329, "learning_rate": 4.9920778316901105e-05, "loss": 0.2497, "step": 6885 }, { "epoch": 0.12281953412050084, "grad_norm": 0.293599009513855, "learning_rate": 4.99206544534072e-05, "loss": 0.2041, "step": 6886 }, { "epoch": 0.12283737024221453, "grad_norm": 0.22377248108386993, "learning_rate": 4.992053049331224e-05, "loss": 0.2083, "step": 6887 }, { "epoch": 0.12285520636392823, "grad_norm": 0.3267717957496643, "learning_rate": 4.992040643661669e-05, "loss": 0.2569, "step": 6888 }, { "epoch": 0.12287304248564192, "grad_norm": 0.40441974997520447, "learning_rate": 4.992028228332104e-05, "loss": 0.2593, "step": 6889 }, { "epoch": 0.12289087860735562, "grad_norm": 0.27668124437332153, "learning_rate": 4.9920158033425765e-05, "loss": 0.3579, "step": 6890 }, { "epoch": 0.12290871472906931, "grad_norm": 0.2906394600868225, "learning_rate": 4.9920033686931354e-05, "loss": 0.2355, "step": 6891 }, { "epoch": 0.12292655085078301, "grad_norm": 0.21560589969158173, "learning_rate": 4.991990924383829e-05, "loss": 0.1665, "step": 6892 }, { "epoch": 0.1229443869724967, "grad_norm": 0.31353211402893066, "learning_rate": 4.9919784704147046e-05, "loss": 0.2615, "step": 6893 }, { "epoch": 0.1229622230942104, "grad_norm": 0.2735927999019623, "learning_rate": 4.9919660067858106e-05, "loss": 0.2205, "step": 6894 }, { "epoch": 0.12298005921592409, "grad_norm": 0.2787560522556305, "learning_rate": 4.991953533497196e-05, "loss": 0.2643, "step": 6895 }, { "epoch": 0.12299789533763779, "grad_norm": 0.2606993019580841, "learning_rate": 4.9919410505489086e-05, "loss": 0.2131, "step": 6896 }, { "epoch": 0.12301573145935148, "grad_norm": 0.2543543875217438, "learning_rate": 4.991928557940997e-05, "loss": 0.2143, "step": 6897 }, { "epoch": 0.12303356758106518, "grad_norm": 0.2402089387178421, "learning_rate": 4.9919160556735104e-05, "loss": 0.2459, "step": 6898 }, { "epoch": 0.12305140370277887, "grad_norm": 0.30288493633270264, "learning_rate": 4.9919035437464955e-05, "loss": 0.2601, "step": 6899 }, { "epoch": 0.12306923982449257, "grad_norm": 0.2556219696998596, "learning_rate": 4.9918910221600016e-05, "loss": 0.2384, "step": 6900 }, { "epoch": 0.12308707594620626, "grad_norm": 0.38939225673675537, "learning_rate": 4.991878490914078e-05, "loss": 0.2902, "step": 6901 }, { "epoch": 0.12310491206791996, "grad_norm": 0.23063524067401886, "learning_rate": 4.9918659500087725e-05, "loss": 0.225, "step": 6902 }, { "epoch": 0.12312274818963365, "grad_norm": 0.30427998304367065, "learning_rate": 4.991853399444134e-05, "loss": 0.1915, "step": 6903 }, { "epoch": 0.12314058431134733, "grad_norm": 0.29430699348449707, "learning_rate": 4.9918408392202114e-05, "loss": 0.2301, "step": 6904 }, { "epoch": 0.12315842043306104, "grad_norm": 0.3004782497882843, "learning_rate": 4.9918282693370535e-05, "loss": 0.1757, "step": 6905 }, { "epoch": 0.12317625655477472, "grad_norm": 0.24297845363616943, "learning_rate": 4.991815689794708e-05, "loss": 0.2394, "step": 6906 }, { "epoch": 0.12319409267648843, "grad_norm": 0.32428014278411865, "learning_rate": 4.9918031005932236e-05, "loss": 0.2479, "step": 6907 }, { "epoch": 0.12321192879820211, "grad_norm": 0.1973564326763153, "learning_rate": 4.9917905017326505e-05, "loss": 0.2077, "step": 6908 }, { "epoch": 0.12322976491991582, "grad_norm": 0.2838943302631378, "learning_rate": 4.991777893213037e-05, "loss": 0.2143, "step": 6909 }, { "epoch": 0.1232476010416295, "grad_norm": 0.3316414952278137, "learning_rate": 4.99176527503443e-05, "loss": 0.224, "step": 6910 }, { "epoch": 0.1232654371633432, "grad_norm": 0.2882927358150482, "learning_rate": 4.991752647196882e-05, "loss": 0.2262, "step": 6911 }, { "epoch": 0.1232832732850569, "grad_norm": 0.4223480522632599, "learning_rate": 4.99174000970044e-05, "loss": 0.2436, "step": 6912 }, { "epoch": 0.1233011094067706, "grad_norm": 0.31423330307006836, "learning_rate": 4.9917273625451524e-05, "loss": 0.2832, "step": 6913 }, { "epoch": 0.12331894552848428, "grad_norm": 0.3034530580043793, "learning_rate": 4.991714705731069e-05, "loss": 0.26, "step": 6914 }, { "epoch": 0.12333678165019799, "grad_norm": 0.24791258573532104, "learning_rate": 4.9917020392582395e-05, "loss": 0.1659, "step": 6915 }, { "epoch": 0.12335461777191167, "grad_norm": 0.23711775243282318, "learning_rate": 4.991689363126712e-05, "loss": 0.1974, "step": 6916 }, { "epoch": 0.12337245389362538, "grad_norm": 0.3169557452201843, "learning_rate": 4.991676677336535e-05, "loss": 0.2164, "step": 6917 }, { "epoch": 0.12339029001533906, "grad_norm": 0.2168099284172058, "learning_rate": 4.9916639818877595e-05, "loss": 0.2195, "step": 6918 }, { "epoch": 0.12340812613705276, "grad_norm": 0.27511683106422424, "learning_rate": 4.991651276780433e-05, "loss": 0.2062, "step": 6919 }, { "epoch": 0.12342596225876645, "grad_norm": 0.3275053799152374, "learning_rate": 4.9916385620146066e-05, "loss": 0.2376, "step": 6920 }, { "epoch": 0.12344379838048015, "grad_norm": 0.29005566239356995, "learning_rate": 4.991625837590328e-05, "loss": 0.2192, "step": 6921 }, { "epoch": 0.12346163450219384, "grad_norm": 0.2789865732192993, "learning_rate": 4.9916131035076474e-05, "loss": 0.1732, "step": 6922 }, { "epoch": 0.12347947062390753, "grad_norm": 0.35607048869132996, "learning_rate": 4.991600359766614e-05, "loss": 0.3044, "step": 6923 }, { "epoch": 0.12349730674562123, "grad_norm": 0.3326689600944519, "learning_rate": 4.991587606367276e-05, "loss": 0.1585, "step": 6924 }, { "epoch": 0.12351514286733492, "grad_norm": 0.2452046424150467, "learning_rate": 4.991574843309685e-05, "loss": 0.2547, "step": 6925 }, { "epoch": 0.12353297898904862, "grad_norm": 1.0032877922058105, "learning_rate": 4.991562070593889e-05, "loss": 0.2402, "step": 6926 }, { "epoch": 0.12355081511076231, "grad_norm": 0.29741159081459045, "learning_rate": 4.9915492882199375e-05, "loss": 0.2421, "step": 6927 }, { "epoch": 0.12356865123247601, "grad_norm": 0.24511712789535522, "learning_rate": 4.9915364961878805e-05, "loss": 0.2252, "step": 6928 }, { "epoch": 0.1235864873541897, "grad_norm": 0.29859545826911926, "learning_rate": 4.9915236944977676e-05, "loss": 0.2824, "step": 6929 }, { "epoch": 0.1236043234759034, "grad_norm": 0.26726216077804565, "learning_rate": 4.991510883149649e-05, "loss": 0.229, "step": 6930 }, { "epoch": 0.12362215959761709, "grad_norm": 0.2911757230758667, "learning_rate": 4.991498062143573e-05, "loss": 0.2572, "step": 6931 }, { "epoch": 0.12363999571933079, "grad_norm": 0.4179574251174927, "learning_rate": 4.99148523147959e-05, "loss": 0.2272, "step": 6932 }, { "epoch": 0.12365783184104448, "grad_norm": 0.3357955813407898, "learning_rate": 4.9914723911577496e-05, "loss": 0.1862, "step": 6933 }, { "epoch": 0.12367566796275818, "grad_norm": 0.4733642339706421, "learning_rate": 4.991459541178102e-05, "loss": 0.2488, "step": 6934 }, { "epoch": 0.12369350408447187, "grad_norm": 0.32170218229293823, "learning_rate": 4.9914466815406965e-05, "loss": 0.217, "step": 6935 }, { "epoch": 0.12371134020618557, "grad_norm": 0.32162222266197205, "learning_rate": 4.9914338122455826e-05, "loss": 0.2519, "step": 6936 }, { "epoch": 0.12372917632789926, "grad_norm": 0.257475882768631, "learning_rate": 4.991420933292812e-05, "loss": 0.25, "step": 6937 }, { "epoch": 0.12374701244961296, "grad_norm": 0.2553541362285614, "learning_rate": 4.9914080446824315e-05, "loss": 0.2234, "step": 6938 }, { "epoch": 0.12376484857132665, "grad_norm": 0.31391093134880066, "learning_rate": 4.991395146414495e-05, "loss": 0.2811, "step": 6939 }, { "epoch": 0.12378268469304035, "grad_norm": 0.21910032629966736, "learning_rate": 4.9913822384890494e-05, "loss": 0.2291, "step": 6940 }, { "epoch": 0.12380052081475404, "grad_norm": 0.27430328726768494, "learning_rate": 4.991369320906146e-05, "loss": 0.2178, "step": 6941 }, { "epoch": 0.12381835693646774, "grad_norm": 0.2509065866470337, "learning_rate": 4.9913563936658345e-05, "loss": 0.2475, "step": 6942 }, { "epoch": 0.12383619305818143, "grad_norm": 0.38076311349868774, "learning_rate": 4.991343456768165e-05, "loss": 0.2779, "step": 6943 }, { "epoch": 0.12385402917989512, "grad_norm": 0.2753581702709198, "learning_rate": 4.9913305102131874e-05, "loss": 0.189, "step": 6944 }, { "epoch": 0.12387186530160882, "grad_norm": 0.29255151748657227, "learning_rate": 4.9913175540009525e-05, "loss": 0.1742, "step": 6945 }, { "epoch": 0.1238897014233225, "grad_norm": 0.22305609285831451, "learning_rate": 4.9913045881315106e-05, "loss": 0.2015, "step": 6946 }, { "epoch": 0.12390753754503621, "grad_norm": 0.265197217464447, "learning_rate": 4.991291612604911e-05, "loss": 0.2021, "step": 6947 }, { "epoch": 0.1239253736667499, "grad_norm": 0.3825306296348572, "learning_rate": 4.991278627421206e-05, "loss": 0.2325, "step": 6948 }, { "epoch": 0.1239432097884636, "grad_norm": 0.2112373560667038, "learning_rate": 4.991265632580444e-05, "loss": 0.2315, "step": 6949 }, { "epoch": 0.12396104591017729, "grad_norm": 0.28901270031929016, "learning_rate": 4.991252628082675e-05, "loss": 0.235, "step": 6950 }, { "epoch": 0.12397888203189099, "grad_norm": 0.3946826159954071, "learning_rate": 4.9912396139279514e-05, "loss": 0.2183, "step": 6951 }, { "epoch": 0.12399671815360468, "grad_norm": 0.23901917040348053, "learning_rate": 4.991226590116322e-05, "loss": 0.1935, "step": 6952 }, { "epoch": 0.12401455427531838, "grad_norm": 0.24910520017147064, "learning_rate": 4.9912135566478385e-05, "loss": 0.2386, "step": 6953 }, { "epoch": 0.12403239039703207, "grad_norm": 0.3099457919597626, "learning_rate": 4.99120051352255e-05, "loss": 0.2173, "step": 6954 }, { "epoch": 0.12405022651874577, "grad_norm": 0.2548370361328125, "learning_rate": 4.991187460740509e-05, "loss": 0.2487, "step": 6955 }, { "epoch": 0.12406806264045946, "grad_norm": 0.2176741361618042, "learning_rate": 4.9911743983017646e-05, "loss": 0.2101, "step": 6956 }, { "epoch": 0.12408589876217316, "grad_norm": 0.30715304613113403, "learning_rate": 4.991161326206367e-05, "loss": 0.2348, "step": 6957 }, { "epoch": 0.12410373488388685, "grad_norm": 0.29987630248069763, "learning_rate": 4.991148244454369e-05, "loss": 0.2031, "step": 6958 }, { "epoch": 0.12412157100560055, "grad_norm": 0.3162432610988617, "learning_rate": 4.991135153045819e-05, "loss": 0.2179, "step": 6959 }, { "epoch": 0.12413940712731424, "grad_norm": 0.24059271812438965, "learning_rate": 4.9911220519807686e-05, "loss": 0.2392, "step": 6960 }, { "epoch": 0.12415724324902794, "grad_norm": 0.6025384664535522, "learning_rate": 4.991108941259269e-05, "loss": 0.2131, "step": 6961 }, { "epoch": 0.12417507937074163, "grad_norm": 0.23318976163864136, "learning_rate": 4.991095820881371e-05, "loss": 0.2249, "step": 6962 }, { "epoch": 0.12419291549245531, "grad_norm": 0.19816309213638306, "learning_rate": 4.991082690847125e-05, "loss": 0.2005, "step": 6963 }, { "epoch": 0.12421075161416902, "grad_norm": 0.30013400316238403, "learning_rate": 4.991069551156582e-05, "loss": 0.2298, "step": 6964 }, { "epoch": 0.1242285877358827, "grad_norm": 0.33225423097610474, "learning_rate": 4.991056401809794e-05, "loss": 0.2784, "step": 6965 }, { "epoch": 0.1242464238575964, "grad_norm": 0.23370786011219025, "learning_rate": 4.99104324280681e-05, "loss": 0.1799, "step": 6966 }, { "epoch": 0.1242642599793101, "grad_norm": 0.29927730560302734, "learning_rate": 4.9910300741476815e-05, "loss": 0.2371, "step": 6967 }, { "epoch": 0.1242820961010238, "grad_norm": 0.2729722261428833, "learning_rate": 4.991016895832461e-05, "loss": 0.2465, "step": 6968 }, { "epoch": 0.12429993222273748, "grad_norm": 0.24219760298728943, "learning_rate": 4.991003707861198e-05, "loss": 0.2032, "step": 6969 }, { "epoch": 0.12431776834445118, "grad_norm": 0.3124775290489197, "learning_rate": 4.990990510233945e-05, "loss": 0.2507, "step": 6970 }, { "epoch": 0.12433560446616487, "grad_norm": 0.29919275641441345, "learning_rate": 4.9909773029507525e-05, "loss": 0.2153, "step": 6971 }, { "epoch": 0.12435344058787857, "grad_norm": 0.3777317702770233, "learning_rate": 4.990964086011671e-05, "loss": 0.2294, "step": 6972 }, { "epoch": 0.12437127670959226, "grad_norm": 0.3543206751346588, "learning_rate": 4.990950859416752e-05, "loss": 0.2033, "step": 6973 }, { "epoch": 0.12438911283130596, "grad_norm": 0.2996242940425873, "learning_rate": 4.990937623166048e-05, "loss": 0.2033, "step": 6974 }, { "epoch": 0.12440694895301965, "grad_norm": 0.27193644642829895, "learning_rate": 4.9909243772596096e-05, "loss": 0.2304, "step": 6975 }, { "epoch": 0.12442478507473335, "grad_norm": 0.2828409969806671, "learning_rate": 4.990911121697487e-05, "loss": 0.1804, "step": 6976 }, { "epoch": 0.12444262119644704, "grad_norm": 0.29215842485427856, "learning_rate": 4.990897856479733e-05, "loss": 0.1992, "step": 6977 }, { "epoch": 0.12446045731816074, "grad_norm": 0.2817901074886322, "learning_rate": 4.990884581606399e-05, "loss": 0.2286, "step": 6978 }, { "epoch": 0.12447829343987443, "grad_norm": 0.35553768277168274, "learning_rate": 4.990871297077535e-05, "loss": 0.2617, "step": 6979 }, { "epoch": 0.12449612956158813, "grad_norm": 0.3343108296394348, "learning_rate": 4.9908580028931946e-05, "loss": 0.2858, "step": 6980 }, { "epoch": 0.12451396568330182, "grad_norm": 0.2859632968902588, "learning_rate": 4.990844699053427e-05, "loss": 0.2155, "step": 6981 }, { "epoch": 0.12453180180501552, "grad_norm": 0.38963982462882996, "learning_rate": 4.9908313855582864e-05, "loss": 0.2182, "step": 6982 }, { "epoch": 0.12454963792672921, "grad_norm": 0.30357739329338074, "learning_rate": 4.9908180624078225e-05, "loss": 0.2206, "step": 6983 }, { "epoch": 0.1245674740484429, "grad_norm": 0.26114726066589355, "learning_rate": 4.990804729602088e-05, "loss": 0.234, "step": 6984 }, { "epoch": 0.1245853101701566, "grad_norm": 0.2825394570827484, "learning_rate": 4.9907913871411334e-05, "loss": 0.1947, "step": 6985 }, { "epoch": 0.12460314629187029, "grad_norm": 0.30148234963417053, "learning_rate": 4.990778035025011e-05, "loss": 0.25, "step": 6986 }, { "epoch": 0.12462098241358399, "grad_norm": 0.3360712230205536, "learning_rate": 4.990764673253773e-05, "loss": 0.26, "step": 6987 }, { "epoch": 0.12463881853529768, "grad_norm": 0.2792559266090393, "learning_rate": 4.990751301827471e-05, "loss": 0.2438, "step": 6988 }, { "epoch": 0.12465665465701138, "grad_norm": 0.2513696551322937, "learning_rate": 4.990737920746156e-05, "loss": 0.1989, "step": 6989 }, { "epoch": 0.12467449077872507, "grad_norm": 0.2617587447166443, "learning_rate": 4.990724530009881e-05, "loss": 0.188, "step": 6990 }, { "epoch": 0.12469232690043877, "grad_norm": 0.32672059535980225, "learning_rate": 4.990711129618698e-05, "loss": 0.1809, "step": 6991 }, { "epoch": 0.12471016302215246, "grad_norm": 0.2739046514034271, "learning_rate": 4.990697719572658e-05, "loss": 0.2026, "step": 6992 }, { "epoch": 0.12472799914386616, "grad_norm": 0.3025754690170288, "learning_rate": 4.9906842998718126e-05, "loss": 0.236, "step": 6993 }, { "epoch": 0.12474583526557985, "grad_norm": 0.3255358040332794, "learning_rate": 4.9906708705162155e-05, "loss": 0.2637, "step": 6994 }, { "epoch": 0.12476367138729355, "grad_norm": 0.2542930245399475, "learning_rate": 4.990657431505917e-05, "loss": 0.1842, "step": 6995 }, { "epoch": 0.12478150750900724, "grad_norm": 0.29506662487983704, "learning_rate": 4.9906439828409715e-05, "loss": 0.2241, "step": 6996 }, { "epoch": 0.12479934363072094, "grad_norm": 0.22836719453334808, "learning_rate": 4.9906305245214284e-05, "loss": 0.2128, "step": 6997 }, { "epoch": 0.12481717975243463, "grad_norm": 0.24722233414649963, "learning_rate": 4.990617056547342e-05, "loss": 0.1961, "step": 6998 }, { "epoch": 0.12483501587414833, "grad_norm": 0.2995939552783966, "learning_rate": 4.9906035789187626e-05, "loss": 0.2237, "step": 6999 }, { "epoch": 0.12485285199586202, "grad_norm": 0.2700544595718384, "learning_rate": 4.9905900916357437e-05, "loss": 0.2316, "step": 7000 }, { "epoch": 0.12485285199586202, "eval_loss": 0.230218306183815, "eval_runtime": 113.194, "eval_samples_per_second": 9.046, "eval_steps_per_second": 1.511, "step": 7000 }, { "epoch": 0.12487068811757572, "grad_norm": 0.3256397843360901, "learning_rate": 4.990576594698339e-05, "loss": 0.2307, "step": 7001 }, { "epoch": 0.12488852423928941, "grad_norm": 0.3033009469509125, "learning_rate": 4.990563088106598e-05, "loss": 0.2325, "step": 7002 }, { "epoch": 0.12490636036100311, "grad_norm": 0.2994910478591919, "learning_rate": 4.9905495718605735e-05, "loss": 0.2616, "step": 7003 }, { "epoch": 0.1249241964827168, "grad_norm": 0.23377302289009094, "learning_rate": 4.9905360459603196e-05, "loss": 0.1878, "step": 7004 }, { "epoch": 0.12494203260443049, "grad_norm": 0.9831739664077759, "learning_rate": 4.990522510405887e-05, "loss": 0.3694, "step": 7005 }, { "epoch": 0.12495986872614419, "grad_norm": 0.36338818073272705, "learning_rate": 4.99050896519733e-05, "loss": 0.2196, "step": 7006 }, { "epoch": 0.12497770484785788, "grad_norm": 0.307064026594162, "learning_rate": 4.9904954103347e-05, "loss": 0.2506, "step": 7007 }, { "epoch": 0.12499554096957158, "grad_norm": 0.3263118267059326, "learning_rate": 4.990481845818049e-05, "loss": 0.2595, "step": 7008 }, { "epoch": 0.12501337709128527, "grad_norm": 0.35931575298309326, "learning_rate": 4.9904682716474305e-05, "loss": 0.2843, "step": 7009 }, { "epoch": 0.12503121321299895, "grad_norm": 0.3789559602737427, "learning_rate": 4.990454687822896e-05, "loss": 0.3291, "step": 7010 }, { "epoch": 0.12504904933471267, "grad_norm": 0.3388417065143585, "learning_rate": 4.9904410943445e-05, "loss": 0.2229, "step": 7011 }, { "epoch": 0.12506688545642636, "grad_norm": 0.20887786149978638, "learning_rate": 4.990427491212294e-05, "loss": 0.2237, "step": 7012 }, { "epoch": 0.12508472157814005, "grad_norm": 0.3833588659763336, "learning_rate": 4.990413878426331e-05, "loss": 0.2421, "step": 7013 }, { "epoch": 0.12510255769985373, "grad_norm": 0.3767065405845642, "learning_rate": 4.990400255986663e-05, "loss": 0.2479, "step": 7014 }, { "epoch": 0.12512039382156745, "grad_norm": 0.34950730204582214, "learning_rate": 4.990386623893344e-05, "loss": 0.2592, "step": 7015 }, { "epoch": 0.12513822994328114, "grad_norm": 0.2206435352563858, "learning_rate": 4.9903729821464255e-05, "loss": 0.1807, "step": 7016 }, { "epoch": 0.12515606606499483, "grad_norm": 0.4904520809650421, "learning_rate": 4.990359330745962e-05, "loss": 0.3227, "step": 7017 }, { "epoch": 0.1251739021867085, "grad_norm": 0.22506429255008698, "learning_rate": 4.9903456696920054e-05, "loss": 0.2118, "step": 7018 }, { "epoch": 0.12519173830842223, "grad_norm": 0.2756589949131012, "learning_rate": 4.990331998984609e-05, "loss": 0.2696, "step": 7019 }, { "epoch": 0.12520957443013592, "grad_norm": 0.20561277866363525, "learning_rate": 4.990318318623825e-05, "loss": 0.1979, "step": 7020 }, { "epoch": 0.1252274105518496, "grad_norm": 0.34968605637550354, "learning_rate": 4.990304628609708e-05, "loss": 0.2748, "step": 7021 }, { "epoch": 0.1252452466735633, "grad_norm": 0.2604430019855499, "learning_rate": 4.990290928942309e-05, "loss": 0.232, "step": 7022 }, { "epoch": 0.125263082795277, "grad_norm": 0.3240351974964142, "learning_rate": 4.990277219621683e-05, "loss": 0.2242, "step": 7023 }, { "epoch": 0.1252809189169907, "grad_norm": 0.3275964558124542, "learning_rate": 4.990263500647883e-05, "loss": 0.2374, "step": 7024 }, { "epoch": 0.12529875503870438, "grad_norm": 0.611990749835968, "learning_rate": 4.99024977202096e-05, "loss": 0.2739, "step": 7025 }, { "epoch": 0.12531659116041807, "grad_norm": 0.28065142035484314, "learning_rate": 4.9902360337409695e-05, "loss": 0.2148, "step": 7026 }, { "epoch": 0.12533442728213176, "grad_norm": 0.2256687432527542, "learning_rate": 4.990222285807964e-05, "loss": 0.22, "step": 7027 }, { "epoch": 0.12535226340384548, "grad_norm": 0.3032439351081848, "learning_rate": 4.990208528221997e-05, "loss": 0.2012, "step": 7028 }, { "epoch": 0.12537009952555916, "grad_norm": 0.37160810828208923, "learning_rate": 4.9901947609831216e-05, "loss": 0.2148, "step": 7029 }, { "epoch": 0.12538793564727285, "grad_norm": 0.33610060811042786, "learning_rate": 4.9901809840913915e-05, "loss": 0.2868, "step": 7030 }, { "epoch": 0.12540577176898654, "grad_norm": 0.3161230683326721, "learning_rate": 4.990167197546859e-05, "loss": 0.2118, "step": 7031 }, { "epoch": 0.12542360789070026, "grad_norm": 0.3094927668571472, "learning_rate": 4.990153401349579e-05, "loss": 0.2372, "step": 7032 }, { "epoch": 0.12544144401241394, "grad_norm": 0.31055590510368347, "learning_rate": 4.990139595499605e-05, "loss": 0.2715, "step": 7033 }, { "epoch": 0.12545928013412763, "grad_norm": 0.2605281472206116, "learning_rate": 4.9901257799969884e-05, "loss": 0.2313, "step": 7034 }, { "epoch": 0.12547711625584132, "grad_norm": 0.30338236689567566, "learning_rate": 4.990111954841785e-05, "loss": 0.2343, "step": 7035 }, { "epoch": 0.12549495237755504, "grad_norm": 0.3016177713871002, "learning_rate": 4.990098120034048e-05, "loss": 0.2303, "step": 7036 }, { "epoch": 0.12551278849926872, "grad_norm": 0.3031337559223175, "learning_rate": 4.9900842755738296e-05, "loss": 0.2328, "step": 7037 }, { "epoch": 0.1255306246209824, "grad_norm": 0.4175161123275757, "learning_rate": 4.9900704214611856e-05, "loss": 0.2587, "step": 7038 }, { "epoch": 0.1255484607426961, "grad_norm": 0.3712256848812103, "learning_rate": 4.9900565576961675e-05, "loss": 0.2534, "step": 7039 }, { "epoch": 0.12556629686440982, "grad_norm": 0.28539612889289856, "learning_rate": 4.990042684278831e-05, "loss": 0.2157, "step": 7040 }, { "epoch": 0.1255841329861235, "grad_norm": 0.3292580544948578, "learning_rate": 4.990028801209229e-05, "loss": 0.2726, "step": 7041 }, { "epoch": 0.1256019691078372, "grad_norm": 0.3154231607913971, "learning_rate": 4.9900149084874146e-05, "loss": 0.2791, "step": 7042 }, { "epoch": 0.12561980522955088, "grad_norm": 0.25210949778556824, "learning_rate": 4.990001006113443e-05, "loss": 0.2359, "step": 7043 }, { "epoch": 0.12563764135126457, "grad_norm": 0.32752400636672974, "learning_rate": 4.989987094087367e-05, "loss": 0.2439, "step": 7044 }, { "epoch": 0.12565547747297828, "grad_norm": 0.2529580891132355, "learning_rate": 4.9899731724092414e-05, "loss": 0.2527, "step": 7045 }, { "epoch": 0.12567331359469197, "grad_norm": 0.26535871624946594, "learning_rate": 4.9899592410791203e-05, "loss": 0.2259, "step": 7046 }, { "epoch": 0.12569114971640566, "grad_norm": 0.270537793636322, "learning_rate": 4.989945300097056e-05, "loss": 0.2232, "step": 7047 }, { "epoch": 0.12570898583811935, "grad_norm": 0.46519288420677185, "learning_rate": 4.989931349463105e-05, "loss": 0.2533, "step": 7048 }, { "epoch": 0.12572682195983306, "grad_norm": 0.2930956482887268, "learning_rate": 4.9899173891773185e-05, "loss": 0.2248, "step": 7049 }, { "epoch": 0.12574465808154675, "grad_norm": 0.21747002005577087, "learning_rate": 4.9899034192397534e-05, "loss": 0.1907, "step": 7050 }, { "epoch": 0.12576249420326044, "grad_norm": 0.2502320110797882, "learning_rate": 4.989889439650463e-05, "loss": 0.1992, "step": 7051 }, { "epoch": 0.12578033032497413, "grad_norm": 0.2994558811187744, "learning_rate": 4.9898754504095e-05, "loss": 0.2814, "step": 7052 }, { "epoch": 0.12579816644668784, "grad_norm": 0.2771264910697937, "learning_rate": 4.989861451516921e-05, "loss": 0.2194, "step": 7053 }, { "epoch": 0.12581600256840153, "grad_norm": 0.2600449025630951, "learning_rate": 4.989847442972778e-05, "loss": 0.2088, "step": 7054 }, { "epoch": 0.12583383869011522, "grad_norm": 0.26924750208854675, "learning_rate": 4.989833424777127e-05, "loss": 0.2414, "step": 7055 }, { "epoch": 0.1258516748118289, "grad_norm": 0.2907157242298126, "learning_rate": 4.989819396930021e-05, "loss": 0.243, "step": 7056 }, { "epoch": 0.12586951093354262, "grad_norm": 0.26334866881370544, "learning_rate": 4.989805359431515e-05, "loss": 0.288, "step": 7057 }, { "epoch": 0.1258873470552563, "grad_norm": 0.26659277081489563, "learning_rate": 4.9897913122816644e-05, "loss": 0.2132, "step": 7058 }, { "epoch": 0.12590518317697, "grad_norm": 0.2721278667449951, "learning_rate": 4.9897772554805224e-05, "loss": 0.2501, "step": 7059 }, { "epoch": 0.12592301929868369, "grad_norm": 0.23906442523002625, "learning_rate": 4.989763189028144e-05, "loss": 0.1896, "step": 7060 }, { "epoch": 0.1259408554203974, "grad_norm": 0.25772127509117126, "learning_rate": 4.989749112924583e-05, "loss": 0.2225, "step": 7061 }, { "epoch": 0.1259586915421111, "grad_norm": 0.28775516152381897, "learning_rate": 4.989735027169895e-05, "loss": 0.2317, "step": 7062 }, { "epoch": 0.12597652766382478, "grad_norm": 0.22797401249408722, "learning_rate": 4.989720931764134e-05, "loss": 0.2131, "step": 7063 }, { "epoch": 0.12599436378553847, "grad_norm": 0.2809610366821289, "learning_rate": 4.989706826707354e-05, "loss": 0.2476, "step": 7064 }, { "epoch": 0.12601219990725215, "grad_norm": 0.2797238826751709, "learning_rate": 4.989692711999611e-05, "loss": 0.2934, "step": 7065 }, { "epoch": 0.12603003602896587, "grad_norm": 0.2841593325138092, "learning_rate": 4.9896785876409594e-05, "loss": 0.233, "step": 7066 }, { "epoch": 0.12604787215067956, "grad_norm": 0.2755345404148102, "learning_rate": 4.989664453631453e-05, "loss": 0.2209, "step": 7067 }, { "epoch": 0.12606570827239325, "grad_norm": 0.2395692616701126, "learning_rate": 4.989650309971148e-05, "loss": 0.2216, "step": 7068 }, { "epoch": 0.12608354439410693, "grad_norm": 0.2653280198574066, "learning_rate": 4.989636156660098e-05, "loss": 0.2229, "step": 7069 }, { "epoch": 0.12610138051582065, "grad_norm": 0.3702566623687744, "learning_rate": 4.989621993698359e-05, "loss": 0.2473, "step": 7070 }, { "epoch": 0.12611921663753434, "grad_norm": 0.39347711205482483, "learning_rate": 4.9896078210859845e-05, "loss": 0.2803, "step": 7071 }, { "epoch": 0.12613705275924803, "grad_norm": 0.21345220506191254, "learning_rate": 4.9895936388230304e-05, "loss": 0.1963, "step": 7072 }, { "epoch": 0.1261548888809617, "grad_norm": 0.28686854243278503, "learning_rate": 4.9895794469095514e-05, "loss": 0.2214, "step": 7073 }, { "epoch": 0.12617272500267543, "grad_norm": 0.2529907822608948, "learning_rate": 4.989565245345603e-05, "loss": 0.2153, "step": 7074 }, { "epoch": 0.12619056112438912, "grad_norm": 0.28283044695854187, "learning_rate": 4.98955103413124e-05, "loss": 0.2356, "step": 7075 }, { "epoch": 0.1262083972461028, "grad_norm": 0.35509192943573, "learning_rate": 4.989536813266516e-05, "loss": 0.2731, "step": 7076 }, { "epoch": 0.1262262333678165, "grad_norm": 0.33380529284477234, "learning_rate": 4.9895225827514894e-05, "loss": 0.1978, "step": 7077 }, { "epoch": 0.1262440694895302, "grad_norm": 0.3438994586467743, "learning_rate": 4.989508342586212e-05, "loss": 0.2279, "step": 7078 }, { "epoch": 0.1262619056112439, "grad_norm": 0.31843382120132446, "learning_rate": 4.989494092770741e-05, "loss": 0.2683, "step": 7079 }, { "epoch": 0.12627974173295758, "grad_norm": 0.28281909227371216, "learning_rate": 4.9894798333051305e-05, "loss": 0.2026, "step": 7080 }, { "epoch": 0.12629757785467127, "grad_norm": 0.3684866726398468, "learning_rate": 4.989465564189437e-05, "loss": 0.2174, "step": 7081 }, { "epoch": 0.126315413976385, "grad_norm": 0.27203404903411865, "learning_rate": 4.989451285423715e-05, "loss": 0.2944, "step": 7082 }, { "epoch": 0.12633325009809868, "grad_norm": 0.19522850215435028, "learning_rate": 4.98943699700802e-05, "loss": 0.2127, "step": 7083 }, { "epoch": 0.12635108621981236, "grad_norm": 0.2942343056201935, "learning_rate": 4.989422698942407e-05, "loss": 0.2893, "step": 7084 }, { "epoch": 0.12636892234152605, "grad_norm": 0.1972874253988266, "learning_rate": 4.989408391226932e-05, "loss": 0.2022, "step": 7085 }, { "epoch": 0.12638675846323974, "grad_norm": 0.24227045476436615, "learning_rate": 4.989394073861651e-05, "loss": 0.2418, "step": 7086 }, { "epoch": 0.12640459458495346, "grad_norm": 0.2634616792201996, "learning_rate": 4.989379746846617e-05, "loss": 0.2009, "step": 7087 }, { "epoch": 0.12642243070666714, "grad_norm": 0.327183336019516, "learning_rate": 4.989365410181889e-05, "loss": 0.297, "step": 7088 }, { "epoch": 0.12644026682838083, "grad_norm": 0.2170710265636444, "learning_rate": 4.98935106386752e-05, "loss": 0.2102, "step": 7089 }, { "epoch": 0.12645810295009452, "grad_norm": 0.2536414861679077, "learning_rate": 4.989336707903567e-05, "loss": 0.2414, "step": 7090 }, { "epoch": 0.12647593907180824, "grad_norm": 0.36148250102996826, "learning_rate": 4.9893223422900845e-05, "loss": 0.2368, "step": 7091 }, { "epoch": 0.12649377519352192, "grad_norm": 0.19189855456352234, "learning_rate": 4.9893079670271294e-05, "loss": 0.1983, "step": 7092 }, { "epoch": 0.1265116113152356, "grad_norm": 0.25199437141418457, "learning_rate": 4.989293582114756e-05, "loss": 0.2555, "step": 7093 }, { "epoch": 0.1265294474369493, "grad_norm": 0.30870717763900757, "learning_rate": 4.989279187553022e-05, "loss": 0.2599, "step": 7094 }, { "epoch": 0.12654728355866302, "grad_norm": 0.22379128634929657, "learning_rate": 4.989264783341981e-05, "loss": 0.2159, "step": 7095 }, { "epoch": 0.1265651196803767, "grad_norm": 0.2508992850780487, "learning_rate": 4.989250369481691e-05, "loss": 0.2514, "step": 7096 }, { "epoch": 0.1265829558020904, "grad_norm": 0.2804580330848694, "learning_rate": 4.989235945972206e-05, "loss": 0.272, "step": 7097 }, { "epoch": 0.12660079192380408, "grad_norm": 0.27964669466018677, "learning_rate": 4.9892215128135835e-05, "loss": 0.2291, "step": 7098 }, { "epoch": 0.1266186280455178, "grad_norm": 0.26933884620666504, "learning_rate": 4.989207070005878e-05, "loss": 0.2207, "step": 7099 }, { "epoch": 0.12663646416723148, "grad_norm": 0.21618296205997467, "learning_rate": 4.9891926175491464e-05, "loss": 0.1993, "step": 7100 }, { "epoch": 0.12665430028894517, "grad_norm": 0.46692606806755066, "learning_rate": 4.9891781554434445e-05, "loss": 0.1942, "step": 7101 }, { "epoch": 0.12667213641065886, "grad_norm": 0.49683696031570435, "learning_rate": 4.9891636836888285e-05, "loss": 0.2264, "step": 7102 }, { "epoch": 0.12668997253237257, "grad_norm": 0.4323539435863495, "learning_rate": 4.989149202285354e-05, "loss": 0.2789, "step": 7103 }, { "epoch": 0.12670780865408626, "grad_norm": 0.3104749023914337, "learning_rate": 4.9891347112330775e-05, "loss": 0.2795, "step": 7104 }, { "epoch": 0.12672564477579995, "grad_norm": 0.4278308153152466, "learning_rate": 4.989120210532056e-05, "loss": 0.2591, "step": 7105 }, { "epoch": 0.12674348089751364, "grad_norm": 0.27192848920822144, "learning_rate": 4.9891057001823435e-05, "loss": 0.197, "step": 7106 }, { "epoch": 0.12676131701922733, "grad_norm": 0.3221433460712433, "learning_rate": 4.9890911801839976e-05, "loss": 0.2084, "step": 7107 }, { "epoch": 0.12677915314094104, "grad_norm": 0.3191899061203003, "learning_rate": 4.9890766505370757e-05, "loss": 0.2936, "step": 7108 }, { "epoch": 0.12679698926265473, "grad_norm": 0.23041144013404846, "learning_rate": 4.9890621112416326e-05, "loss": 0.2087, "step": 7109 }, { "epoch": 0.12681482538436842, "grad_norm": 0.2721308171749115, "learning_rate": 4.9890475622977253e-05, "loss": 0.2475, "step": 7110 }, { "epoch": 0.1268326615060821, "grad_norm": 0.3211425244808197, "learning_rate": 4.989033003705409e-05, "loss": 0.2238, "step": 7111 }, { "epoch": 0.12685049762779582, "grad_norm": 0.43620380759239197, "learning_rate": 4.989018435464742e-05, "loss": 0.2644, "step": 7112 }, { "epoch": 0.1268683337495095, "grad_norm": 0.24428436160087585, "learning_rate": 4.9890038575757794e-05, "loss": 0.2495, "step": 7113 }, { "epoch": 0.1268861698712232, "grad_norm": 0.3536229133605957, "learning_rate": 4.9889892700385784e-05, "loss": 0.2727, "step": 7114 }, { "epoch": 0.12690400599293689, "grad_norm": 0.33278393745422363, "learning_rate": 4.988974672853195e-05, "loss": 0.254, "step": 7115 }, { "epoch": 0.1269218421146506, "grad_norm": 0.3001064956188202, "learning_rate": 4.9889600660196863e-05, "loss": 0.2742, "step": 7116 }, { "epoch": 0.1269396782363643, "grad_norm": 0.32525044679641724, "learning_rate": 4.988945449538109e-05, "loss": 0.2575, "step": 7117 }, { "epoch": 0.12695751435807798, "grad_norm": 0.323993444442749, "learning_rate": 4.988930823408519e-05, "loss": 0.214, "step": 7118 }, { "epoch": 0.12697535047979167, "grad_norm": 0.23346802592277527, "learning_rate": 4.9889161876309736e-05, "loss": 0.2648, "step": 7119 }, { "epoch": 0.12699318660150538, "grad_norm": 0.24181324243545532, "learning_rate": 4.98890154220553e-05, "loss": 0.183, "step": 7120 }, { "epoch": 0.12701102272321907, "grad_norm": 0.29001834988594055, "learning_rate": 4.988886887132244e-05, "loss": 0.2229, "step": 7121 }, { "epoch": 0.12702885884493276, "grad_norm": 0.2402995377779007, "learning_rate": 4.988872222411172e-05, "loss": 0.2124, "step": 7122 }, { "epoch": 0.12704669496664645, "grad_norm": 0.3174564242362976, "learning_rate": 4.9888575480423725e-05, "loss": 0.2638, "step": 7123 }, { "epoch": 0.12706453108836013, "grad_norm": 0.30612319707870483, "learning_rate": 4.9888428640259016e-05, "loss": 0.2281, "step": 7124 }, { "epoch": 0.12708236721007385, "grad_norm": 0.2363874316215515, "learning_rate": 4.9888281703618156e-05, "loss": 0.1912, "step": 7125 }, { "epoch": 0.12710020333178754, "grad_norm": 0.3057478666305542, "learning_rate": 4.988813467050173e-05, "loss": 0.2342, "step": 7126 }, { "epoch": 0.12711803945350122, "grad_norm": 0.24093061685562134, "learning_rate": 4.9887987540910285e-05, "loss": 0.2417, "step": 7127 }, { "epoch": 0.1271358755752149, "grad_norm": 0.3051645755767822, "learning_rate": 4.988784031484441e-05, "loss": 0.2063, "step": 7128 }, { "epoch": 0.12715371169692863, "grad_norm": 0.3925570845603943, "learning_rate": 4.9887692992304666e-05, "loss": 0.3614, "step": 7129 }, { "epoch": 0.12717154781864232, "grad_norm": 0.25032737851142883, "learning_rate": 4.988754557329164e-05, "loss": 0.2481, "step": 7130 }, { "epoch": 0.127189383940356, "grad_norm": 0.3387826979160309, "learning_rate": 4.988739805780588e-05, "loss": 0.2183, "step": 7131 }, { "epoch": 0.1272072200620697, "grad_norm": 0.1918492615222931, "learning_rate": 4.988725044584798e-05, "loss": 0.2358, "step": 7132 }, { "epoch": 0.1272250561837834, "grad_norm": 0.2590310871601105, "learning_rate": 4.988710273741849e-05, "loss": 0.2327, "step": 7133 }, { "epoch": 0.1272428923054971, "grad_norm": 0.273307740688324, "learning_rate": 4.9886954932518e-05, "loss": 0.212, "step": 7134 }, { "epoch": 0.12726072842721078, "grad_norm": 0.4322851002216339, "learning_rate": 4.988680703114708e-05, "loss": 0.2269, "step": 7135 }, { "epoch": 0.12727856454892447, "grad_norm": 0.28264355659484863, "learning_rate": 4.988665903330629e-05, "loss": 0.2379, "step": 7136 }, { "epoch": 0.1272964006706382, "grad_norm": 0.40426623821258545, "learning_rate": 4.988651093899622e-05, "loss": 0.3116, "step": 7137 }, { "epoch": 0.12731423679235188, "grad_norm": 0.203849658370018, "learning_rate": 4.988636274821744e-05, "loss": 0.206, "step": 7138 }, { "epoch": 0.12733207291406556, "grad_norm": 0.371700257062912, "learning_rate": 4.988621446097052e-05, "loss": 0.2767, "step": 7139 }, { "epoch": 0.12734990903577925, "grad_norm": 0.24924980103969574, "learning_rate": 4.9886066077256036e-05, "loss": 0.2138, "step": 7140 }, { "epoch": 0.12736774515749297, "grad_norm": 0.31650349497795105, "learning_rate": 4.9885917597074564e-05, "loss": 0.2502, "step": 7141 }, { "epoch": 0.12738558127920666, "grad_norm": 0.37491849064826965, "learning_rate": 4.9885769020426685e-05, "loss": 0.2849, "step": 7142 }, { "epoch": 0.12740341740092034, "grad_norm": 0.23761944472789764, "learning_rate": 4.988562034731297e-05, "loss": 0.2161, "step": 7143 }, { "epoch": 0.12742125352263403, "grad_norm": 0.24542593955993652, "learning_rate": 4.988547157773399e-05, "loss": 0.2216, "step": 7144 }, { "epoch": 0.12743908964434772, "grad_norm": 0.3461378216743469, "learning_rate": 4.988532271169033e-05, "loss": 0.2744, "step": 7145 }, { "epoch": 0.12745692576606144, "grad_norm": 0.24887950718402863, "learning_rate": 4.988517374918257e-05, "loss": 0.2093, "step": 7146 }, { "epoch": 0.12747476188777512, "grad_norm": 0.4498194754123688, "learning_rate": 4.988502469021127e-05, "loss": 0.2834, "step": 7147 }, { "epoch": 0.1274925980094888, "grad_norm": 0.24465256929397583, "learning_rate": 4.988487553477702e-05, "loss": 0.2407, "step": 7148 }, { "epoch": 0.1275104341312025, "grad_norm": 0.2679983675479889, "learning_rate": 4.9884726282880407e-05, "loss": 0.2291, "step": 7149 }, { "epoch": 0.12752827025291621, "grad_norm": 0.26163923740386963, "learning_rate": 4.988457693452199e-05, "loss": 0.2076, "step": 7150 }, { "epoch": 0.1275461063746299, "grad_norm": 0.2710815370082855, "learning_rate": 4.988442748970237e-05, "loss": 0.2657, "step": 7151 }, { "epoch": 0.1275639424963436, "grad_norm": 0.3420710265636444, "learning_rate": 4.98842779484221e-05, "loss": 0.2604, "step": 7152 }, { "epoch": 0.12758177861805728, "grad_norm": 0.26653480529785156, "learning_rate": 4.988412831068179e-05, "loss": 0.2263, "step": 7153 }, { "epoch": 0.127599614739771, "grad_norm": 0.42929571866989136, "learning_rate": 4.9883978576481996e-05, "loss": 0.2385, "step": 7154 }, { "epoch": 0.12761745086148468, "grad_norm": 0.28003376722335815, "learning_rate": 4.98838287458233e-05, "loss": 0.2799, "step": 7155 }, { "epoch": 0.12763528698319837, "grad_norm": 0.2639767825603485, "learning_rate": 4.988367881870629e-05, "loss": 0.2215, "step": 7156 }, { "epoch": 0.12765312310491206, "grad_norm": 0.214997336268425, "learning_rate": 4.988352879513155e-05, "loss": 0.2058, "step": 7157 }, { "epoch": 0.12767095922662577, "grad_norm": 0.40716752409935, "learning_rate": 4.988337867509967e-05, "loss": 0.2574, "step": 7158 }, { "epoch": 0.12768879534833946, "grad_norm": 0.2493445724248886, "learning_rate": 4.9883228458611204e-05, "loss": 0.2082, "step": 7159 }, { "epoch": 0.12770663147005315, "grad_norm": 0.3026892840862274, "learning_rate": 4.988307814566675e-05, "loss": 0.2066, "step": 7160 }, { "epoch": 0.12772446759176684, "grad_norm": 0.22813519835472107, "learning_rate": 4.98829277362669e-05, "loss": 0.2255, "step": 7161 }, { "epoch": 0.12774230371348055, "grad_norm": 0.31025686860084534, "learning_rate": 4.9882777230412227e-05, "loss": 0.2771, "step": 7162 }, { "epoch": 0.12776013983519424, "grad_norm": 0.2800917327404022, "learning_rate": 4.988262662810331e-05, "loss": 0.2425, "step": 7163 }, { "epoch": 0.12777797595690793, "grad_norm": 0.30579638481140137, "learning_rate": 4.988247592934074e-05, "loss": 0.2653, "step": 7164 }, { "epoch": 0.12779581207862162, "grad_norm": 0.2706877589225769, "learning_rate": 4.9882325134125096e-05, "loss": 0.2159, "step": 7165 }, { "epoch": 0.1278136482003353, "grad_norm": 0.3356277048587799, "learning_rate": 4.988217424245697e-05, "loss": 0.2746, "step": 7166 }, { "epoch": 0.12783148432204902, "grad_norm": 0.2686861455440521, "learning_rate": 4.988202325433694e-05, "loss": 0.2254, "step": 7167 }, { "epoch": 0.1278493204437627, "grad_norm": 0.24877183139324188, "learning_rate": 4.98818721697656e-05, "loss": 0.2136, "step": 7168 }, { "epoch": 0.1278671565654764, "grad_norm": 0.2516845166683197, "learning_rate": 4.988172098874352e-05, "loss": 0.2355, "step": 7169 }, { "epoch": 0.12788499268719009, "grad_norm": 0.27416130900382996, "learning_rate": 4.98815697112713e-05, "loss": 0.2415, "step": 7170 }, { "epoch": 0.1279028288089038, "grad_norm": 0.3243381083011627, "learning_rate": 4.988141833734952e-05, "loss": 0.2601, "step": 7171 }, { "epoch": 0.1279206649306175, "grad_norm": 0.2594567537307739, "learning_rate": 4.988126686697877e-05, "loss": 0.2234, "step": 7172 }, { "epoch": 0.12793850105233118, "grad_norm": 0.23373720049858093, "learning_rate": 4.9881115300159644e-05, "loss": 0.2238, "step": 7173 }, { "epoch": 0.12795633717404487, "grad_norm": 0.2778187692165375, "learning_rate": 4.988096363689271e-05, "loss": 0.2632, "step": 7174 }, { "epoch": 0.12797417329575858, "grad_norm": 0.2085629254579544, "learning_rate": 4.988081187717857e-05, "loss": 0.2305, "step": 7175 }, { "epoch": 0.12799200941747227, "grad_norm": 0.272589772939682, "learning_rate": 4.988066002101781e-05, "loss": 0.1807, "step": 7176 }, { "epoch": 0.12800984553918596, "grad_norm": 0.2666608989238739, "learning_rate": 4.988050806841102e-05, "loss": 0.2161, "step": 7177 }, { "epoch": 0.12802768166089964, "grad_norm": 0.24305440485477448, "learning_rate": 4.9880356019358786e-05, "loss": 0.2268, "step": 7178 }, { "epoch": 0.12804551778261336, "grad_norm": 0.25793445110321045, "learning_rate": 4.9880203873861705e-05, "loss": 0.2286, "step": 7179 }, { "epoch": 0.12806335390432705, "grad_norm": 0.33685505390167236, "learning_rate": 4.9880051631920355e-05, "loss": 0.2512, "step": 7180 }, { "epoch": 0.12808119002604074, "grad_norm": 0.422828733921051, "learning_rate": 4.9879899293535325e-05, "loss": 0.2657, "step": 7181 }, { "epoch": 0.12809902614775442, "grad_norm": 0.3008039593696594, "learning_rate": 4.987974685870722e-05, "loss": 0.2593, "step": 7182 }, { "epoch": 0.12811686226946814, "grad_norm": 0.2825548052787781, "learning_rate": 4.9879594327436625e-05, "loss": 0.2266, "step": 7183 }, { "epoch": 0.12813469839118183, "grad_norm": 0.331137090921402, "learning_rate": 4.987944169972413e-05, "loss": 0.2633, "step": 7184 }, { "epoch": 0.12815253451289552, "grad_norm": 0.19249378144741058, "learning_rate": 4.9879288975570315e-05, "loss": 0.1942, "step": 7185 }, { "epoch": 0.1281703706346092, "grad_norm": 0.3144652843475342, "learning_rate": 4.987913615497579e-05, "loss": 0.2052, "step": 7186 }, { "epoch": 0.1281882067563229, "grad_norm": 0.3076115548610687, "learning_rate": 4.987898323794114e-05, "loss": 0.3222, "step": 7187 }, { "epoch": 0.1282060428780366, "grad_norm": 0.3074822723865509, "learning_rate": 4.987883022446696e-05, "loss": 0.2512, "step": 7188 }, { "epoch": 0.1282238789997503, "grad_norm": 0.32042768597602844, "learning_rate": 4.987867711455384e-05, "loss": 0.2065, "step": 7189 }, { "epoch": 0.12824171512146398, "grad_norm": 0.27149438858032227, "learning_rate": 4.987852390820237e-05, "loss": 0.2664, "step": 7190 }, { "epoch": 0.12825955124317767, "grad_norm": 0.47419872879981995, "learning_rate": 4.987837060541316e-05, "loss": 0.2312, "step": 7191 }, { "epoch": 0.1282773873648914, "grad_norm": 0.3739066421985626, "learning_rate": 4.987821720618678e-05, "loss": 0.2824, "step": 7192 }, { "epoch": 0.12829522348660508, "grad_norm": 0.25958433747291565, "learning_rate": 4.987806371052384e-05, "loss": 0.2229, "step": 7193 }, { "epoch": 0.12831305960831876, "grad_norm": 0.38448628783226013, "learning_rate": 4.9877910118424936e-05, "loss": 0.2412, "step": 7194 }, { "epoch": 0.12833089573003245, "grad_norm": 0.29795241355895996, "learning_rate": 4.987775642989066e-05, "loss": 0.1777, "step": 7195 }, { "epoch": 0.12834873185174617, "grad_norm": 0.3055592477321625, "learning_rate": 4.9877602644921606e-05, "loss": 0.2968, "step": 7196 }, { "epoch": 0.12836656797345986, "grad_norm": 0.29176947474479675, "learning_rate": 4.9877448763518374e-05, "loss": 0.2482, "step": 7197 }, { "epoch": 0.12838440409517354, "grad_norm": 0.342585951089859, "learning_rate": 4.987729478568155e-05, "loss": 0.2275, "step": 7198 }, { "epoch": 0.12840224021688723, "grad_norm": 0.38080471754074097, "learning_rate": 4.987714071141175e-05, "loss": 0.2813, "step": 7199 }, { "epoch": 0.12842007633860095, "grad_norm": 0.3040217459201813, "learning_rate": 4.987698654070956e-05, "loss": 0.2595, "step": 7200 }, { "epoch": 0.12843791246031463, "grad_norm": 0.25592151284217834, "learning_rate": 4.9876832273575566e-05, "loss": 0.2186, "step": 7201 }, { "epoch": 0.12845574858202832, "grad_norm": 0.4482082426548004, "learning_rate": 4.987667791001039e-05, "loss": 0.2359, "step": 7202 }, { "epoch": 0.128473584703742, "grad_norm": 0.3252483010292053, "learning_rate": 4.987652345001461e-05, "loss": 0.26, "step": 7203 }, { "epoch": 0.12849142082545573, "grad_norm": 0.5447587966918945, "learning_rate": 4.987636889358884e-05, "loss": 0.2533, "step": 7204 }, { "epoch": 0.12850925694716941, "grad_norm": 0.2592175006866455, "learning_rate": 4.987621424073366e-05, "loss": 0.2131, "step": 7205 }, { "epoch": 0.1285270930688831, "grad_norm": 0.32026323676109314, "learning_rate": 4.98760594914497e-05, "loss": 0.2803, "step": 7206 }, { "epoch": 0.1285449291905968, "grad_norm": 0.2672712206840515, "learning_rate": 4.987590464573753e-05, "loss": 0.1968, "step": 7207 }, { "epoch": 0.12856276531231048, "grad_norm": 0.27520185708999634, "learning_rate": 4.9875749703597765e-05, "loss": 0.2167, "step": 7208 }, { "epoch": 0.1285806014340242, "grad_norm": 0.297823041677475, "learning_rate": 4.9875594665031e-05, "loss": 0.2811, "step": 7209 }, { "epoch": 0.12859843755573788, "grad_norm": 0.2841673493385315, "learning_rate": 4.987543953003784e-05, "loss": 0.2356, "step": 7210 }, { "epoch": 0.12861627367745157, "grad_norm": 0.24720583856105804, "learning_rate": 4.987528429861889e-05, "loss": 0.2153, "step": 7211 }, { "epoch": 0.12863410979916526, "grad_norm": 0.2523522675037384, "learning_rate": 4.987512897077474e-05, "loss": 0.2502, "step": 7212 }, { "epoch": 0.12865194592087897, "grad_norm": 0.38946402072906494, "learning_rate": 4.9874973546506e-05, "loss": 0.335, "step": 7213 }, { "epoch": 0.12866978204259266, "grad_norm": 0.2724445164203644, "learning_rate": 4.987481802581326e-05, "loss": 0.2455, "step": 7214 }, { "epoch": 0.12868761816430635, "grad_norm": 0.2129668891429901, "learning_rate": 4.9874662408697146e-05, "loss": 0.2358, "step": 7215 }, { "epoch": 0.12870545428602004, "grad_norm": 0.2719738483428955, "learning_rate": 4.9874506695158254e-05, "loss": 0.2488, "step": 7216 }, { "epoch": 0.12872329040773375, "grad_norm": 0.22912661731243134, "learning_rate": 4.987435088519718e-05, "loss": 0.2124, "step": 7217 }, { "epoch": 0.12874112652944744, "grad_norm": 0.35899364948272705, "learning_rate": 4.987419497881452e-05, "loss": 0.2554, "step": 7218 }, { "epoch": 0.12875896265116113, "grad_norm": 0.23842033743858337, "learning_rate": 4.987403897601089e-05, "loss": 0.2106, "step": 7219 }, { "epoch": 0.12877679877287482, "grad_norm": 0.27896079421043396, "learning_rate": 4.9873882876786905e-05, "loss": 0.2493, "step": 7220 }, { "epoch": 0.12879463489458853, "grad_norm": 0.31804925203323364, "learning_rate": 4.987372668114315e-05, "loss": 0.2306, "step": 7221 }, { "epoch": 0.12881247101630222, "grad_norm": 0.3121148943901062, "learning_rate": 4.987357038908024e-05, "loss": 0.2749, "step": 7222 }, { "epoch": 0.1288303071380159, "grad_norm": 0.28705453872680664, "learning_rate": 4.9873414000598785e-05, "loss": 0.2367, "step": 7223 }, { "epoch": 0.1288481432597296, "grad_norm": 0.24653789401054382, "learning_rate": 4.9873257515699386e-05, "loss": 0.1778, "step": 7224 }, { "epoch": 0.12886597938144329, "grad_norm": 0.3480568528175354, "learning_rate": 4.9873100934382646e-05, "loss": 0.2243, "step": 7225 }, { "epoch": 0.128883815503157, "grad_norm": 0.30845171213150024, "learning_rate": 4.9872944256649176e-05, "loss": 0.2111, "step": 7226 }, { "epoch": 0.1289016516248707, "grad_norm": 0.30911576747894287, "learning_rate": 4.9872787482499585e-05, "loss": 0.2733, "step": 7227 }, { "epoch": 0.12891948774658438, "grad_norm": 0.26741185784339905, "learning_rate": 4.9872630611934477e-05, "loss": 0.2411, "step": 7228 }, { "epoch": 0.12893732386829806, "grad_norm": 0.2957748472690582, "learning_rate": 4.987247364495447e-05, "loss": 0.2229, "step": 7229 }, { "epoch": 0.12895515999001178, "grad_norm": 0.2056843489408493, "learning_rate": 4.9872316581560154e-05, "loss": 0.2201, "step": 7230 }, { "epoch": 0.12897299611172547, "grad_norm": 0.21183530986309052, "learning_rate": 4.987215942175215e-05, "loss": 0.2334, "step": 7231 }, { "epoch": 0.12899083223343916, "grad_norm": 0.2882821261882782, "learning_rate": 4.987200216553107e-05, "loss": 0.2005, "step": 7232 }, { "epoch": 0.12900866835515284, "grad_norm": 0.2475908249616623, "learning_rate": 4.987184481289752e-05, "loss": 0.2269, "step": 7233 }, { "epoch": 0.12902650447686656, "grad_norm": 0.2058449685573578, "learning_rate": 4.9871687363852106e-05, "loss": 0.2194, "step": 7234 }, { "epoch": 0.12904434059858025, "grad_norm": 0.2869996130466461, "learning_rate": 4.987152981839544e-05, "loss": 0.2365, "step": 7235 }, { "epoch": 0.12906217672029394, "grad_norm": 0.5378527641296387, "learning_rate": 4.987137217652814e-05, "loss": 0.2745, "step": 7236 }, { "epoch": 0.12908001284200762, "grad_norm": 0.28777915239334106, "learning_rate": 4.9871214438250804e-05, "loss": 0.1801, "step": 7237 }, { "epoch": 0.12909784896372134, "grad_norm": 0.20621678233146667, "learning_rate": 4.987105660356405e-05, "loss": 0.1761, "step": 7238 }, { "epoch": 0.12911568508543503, "grad_norm": 0.2804788053035736, "learning_rate": 4.9870898672468495e-05, "loss": 0.2099, "step": 7239 }, { "epoch": 0.12913352120714872, "grad_norm": 0.39618542790412903, "learning_rate": 4.987074064496474e-05, "loss": 0.2601, "step": 7240 }, { "epoch": 0.1291513573288624, "grad_norm": 0.27285629510879517, "learning_rate": 4.9870582521053416e-05, "loss": 0.2128, "step": 7241 }, { "epoch": 0.12916919345057612, "grad_norm": 0.24478517472743988, "learning_rate": 4.987042430073512e-05, "loss": 0.2281, "step": 7242 }, { "epoch": 0.1291870295722898, "grad_norm": 0.3979249596595764, "learning_rate": 4.987026598401047e-05, "loss": 0.3359, "step": 7243 }, { "epoch": 0.1292048656940035, "grad_norm": 0.2648313045501709, "learning_rate": 4.987010757088007e-05, "loss": 0.2329, "step": 7244 }, { "epoch": 0.12922270181571718, "grad_norm": 0.27400505542755127, "learning_rate": 4.986994906134455e-05, "loss": 0.2202, "step": 7245 }, { "epoch": 0.12924053793743087, "grad_norm": 0.2718895375728607, "learning_rate": 4.986979045540452e-05, "loss": 0.1965, "step": 7246 }, { "epoch": 0.1292583740591446, "grad_norm": 0.2907407283782959, "learning_rate": 4.9869631753060584e-05, "loss": 0.2692, "step": 7247 }, { "epoch": 0.12927621018085828, "grad_norm": 0.284167617559433, "learning_rate": 4.9869472954313374e-05, "loss": 0.2336, "step": 7248 }, { "epoch": 0.12929404630257196, "grad_norm": 0.2803809642791748, "learning_rate": 4.9869314059163495e-05, "loss": 0.2546, "step": 7249 }, { "epoch": 0.12931188242428565, "grad_norm": 0.3222905695438385, "learning_rate": 4.986915506761156e-05, "loss": 0.2377, "step": 7250 }, { "epoch": 0.12932971854599937, "grad_norm": 0.28519296646118164, "learning_rate": 4.9868995979658194e-05, "loss": 0.2338, "step": 7251 }, { "epoch": 0.12934755466771305, "grad_norm": 0.26038432121276855, "learning_rate": 4.986883679530401e-05, "loss": 0.2282, "step": 7252 }, { "epoch": 0.12936539078942674, "grad_norm": 0.30088332295417786, "learning_rate": 4.9868677514549635e-05, "loss": 0.2655, "step": 7253 }, { "epoch": 0.12938322691114043, "grad_norm": 0.2853420078754425, "learning_rate": 4.9868518137395664e-05, "loss": 0.2086, "step": 7254 }, { "epoch": 0.12940106303285415, "grad_norm": 0.28434690833091736, "learning_rate": 4.986835866384273e-05, "loss": 0.2432, "step": 7255 }, { "epoch": 0.12941889915456783, "grad_norm": 0.2156800478696823, "learning_rate": 4.986819909389145e-05, "loss": 0.1931, "step": 7256 }, { "epoch": 0.12943673527628152, "grad_norm": 0.3005669414997101, "learning_rate": 4.986803942754244e-05, "loss": 0.2059, "step": 7257 }, { "epoch": 0.1294545713979952, "grad_norm": 0.27690404653549194, "learning_rate": 4.986787966479632e-05, "loss": 0.2131, "step": 7258 }, { "epoch": 0.12947240751970893, "grad_norm": 0.26980459690093994, "learning_rate": 4.986771980565371e-05, "loss": 0.2851, "step": 7259 }, { "epoch": 0.12949024364142261, "grad_norm": 0.23820079863071442, "learning_rate": 4.986755985011523e-05, "loss": 0.1944, "step": 7260 }, { "epoch": 0.1295080797631363, "grad_norm": 0.25404220819473267, "learning_rate": 4.98673997981815e-05, "loss": 0.2101, "step": 7261 }, { "epoch": 0.12952591588485, "grad_norm": 0.2757260799407959, "learning_rate": 4.986723964985314e-05, "loss": 0.2174, "step": 7262 }, { "epoch": 0.1295437520065637, "grad_norm": 0.28552815318107605, "learning_rate": 4.986707940513077e-05, "loss": 0.2209, "step": 7263 }, { "epoch": 0.1295615881282774, "grad_norm": 0.3079882562160492, "learning_rate": 4.9866919064015003e-05, "loss": 0.254, "step": 7264 }, { "epoch": 0.12957942424999108, "grad_norm": 0.365155965089798, "learning_rate": 4.9866758626506476e-05, "loss": 0.1941, "step": 7265 }, { "epoch": 0.12959726037170477, "grad_norm": 0.22253163158893585, "learning_rate": 4.98665980926058e-05, "loss": 0.2079, "step": 7266 }, { "epoch": 0.12961509649341846, "grad_norm": 0.2523843050003052, "learning_rate": 4.986643746231361e-05, "loss": 0.2262, "step": 7267 }, { "epoch": 0.12963293261513217, "grad_norm": 0.29638391733169556, "learning_rate": 4.986627673563051e-05, "loss": 0.1368, "step": 7268 }, { "epoch": 0.12965076873684586, "grad_norm": 0.2818562984466553, "learning_rate": 4.986611591255714e-05, "loss": 0.2181, "step": 7269 }, { "epoch": 0.12966860485855955, "grad_norm": 0.29367595911026, "learning_rate": 4.986595499309411e-05, "loss": 0.2396, "step": 7270 }, { "epoch": 0.12968644098027324, "grad_norm": 0.339464008808136, "learning_rate": 4.986579397724205e-05, "loss": 0.2391, "step": 7271 }, { "epoch": 0.12970427710198695, "grad_norm": 0.27732816338539124, "learning_rate": 4.986563286500159e-05, "loss": 0.2258, "step": 7272 }, { "epoch": 0.12972211322370064, "grad_norm": 0.2927684783935547, "learning_rate": 4.9865471656373345e-05, "loss": 0.2061, "step": 7273 }, { "epoch": 0.12973994934541433, "grad_norm": 0.2760705053806305, "learning_rate": 4.9865310351357946e-05, "loss": 0.2343, "step": 7274 }, { "epoch": 0.12975778546712802, "grad_norm": 0.2814476788043976, "learning_rate": 4.986514894995602e-05, "loss": 0.2495, "step": 7275 }, { "epoch": 0.12977562158884173, "grad_norm": 0.2642858922481537, "learning_rate": 4.986498745216818e-05, "loss": 0.2121, "step": 7276 }, { "epoch": 0.12979345771055542, "grad_norm": 0.23780542612075806, "learning_rate": 4.9864825857995064e-05, "loss": 0.2167, "step": 7277 }, { "epoch": 0.1298112938322691, "grad_norm": 0.301698237657547, "learning_rate": 4.986466416743729e-05, "loss": 0.2434, "step": 7278 }, { "epoch": 0.1298291299539828, "grad_norm": 0.3107871413230896, "learning_rate": 4.9864502380495496e-05, "loss": 0.2982, "step": 7279 }, { "epoch": 0.1298469660756965, "grad_norm": 0.22600796818733215, "learning_rate": 4.9864340497170304e-05, "loss": 0.2023, "step": 7280 }, { "epoch": 0.1298648021974102, "grad_norm": 0.2335529625415802, "learning_rate": 4.986417851746234e-05, "loss": 0.224, "step": 7281 }, { "epoch": 0.1298826383191239, "grad_norm": 0.24890993535518646, "learning_rate": 4.986401644137223e-05, "loss": 0.2266, "step": 7282 }, { "epoch": 0.12990047444083758, "grad_norm": 0.2737363576889038, "learning_rate": 4.98638542689006e-05, "loss": 0.2667, "step": 7283 }, { "epoch": 0.1299183105625513, "grad_norm": 0.20195892453193665, "learning_rate": 4.986369200004809e-05, "loss": 0.2062, "step": 7284 }, { "epoch": 0.12993614668426498, "grad_norm": 0.2856735587120056, "learning_rate": 4.986352963481532e-05, "loss": 0.234, "step": 7285 }, { "epoch": 0.12995398280597867, "grad_norm": 0.25961220264434814, "learning_rate": 4.986336717320292e-05, "loss": 0.2041, "step": 7286 }, { "epoch": 0.12997181892769236, "grad_norm": 0.25032082200050354, "learning_rate": 4.986320461521152e-05, "loss": 0.2153, "step": 7287 }, { "epoch": 0.12998965504940604, "grad_norm": 0.4232363700866699, "learning_rate": 4.986304196084176e-05, "loss": 0.2961, "step": 7288 }, { "epoch": 0.13000749117111976, "grad_norm": 0.25651246309280396, "learning_rate": 4.9862879210094247e-05, "loss": 0.1977, "step": 7289 }, { "epoch": 0.13002532729283345, "grad_norm": 0.33551570773124695, "learning_rate": 4.986271636296964e-05, "loss": 0.2081, "step": 7290 }, { "epoch": 0.13004316341454714, "grad_norm": 0.30205658078193665, "learning_rate": 4.986255341946855e-05, "loss": 0.2803, "step": 7291 }, { "epoch": 0.13006099953626082, "grad_norm": 0.3281456232070923, "learning_rate": 4.986239037959162e-05, "loss": 0.2086, "step": 7292 }, { "epoch": 0.13007883565797454, "grad_norm": 0.34779486060142517, "learning_rate": 4.986222724333948e-05, "loss": 0.2819, "step": 7293 }, { "epoch": 0.13009667177968823, "grad_norm": 0.3936409652233124, "learning_rate": 4.986206401071275e-05, "loss": 0.2155, "step": 7294 }, { "epoch": 0.13011450790140192, "grad_norm": 0.2572975158691406, "learning_rate": 4.986190068171208e-05, "loss": 0.1798, "step": 7295 }, { "epoch": 0.1301323440231156, "grad_norm": 0.29663142561912537, "learning_rate": 4.9861737256338094e-05, "loss": 0.2346, "step": 7296 }, { "epoch": 0.13015018014482932, "grad_norm": 0.24904395639896393, "learning_rate": 4.986157373459143e-05, "loss": 0.2422, "step": 7297 }, { "epoch": 0.130168016266543, "grad_norm": 0.28781792521476746, "learning_rate": 4.986141011647272e-05, "loss": 0.193, "step": 7298 }, { "epoch": 0.1301858523882567, "grad_norm": 0.3377923369407654, "learning_rate": 4.9861246401982594e-05, "loss": 0.299, "step": 7299 }, { "epoch": 0.13020368850997038, "grad_norm": 0.2569024860858917, "learning_rate": 4.986108259112169e-05, "loss": 0.2206, "step": 7300 }, { "epoch": 0.1302215246316841, "grad_norm": 0.24787509441375732, "learning_rate": 4.9860918683890646e-05, "loss": 0.213, "step": 7301 }, { "epoch": 0.1302393607533978, "grad_norm": 0.3531358242034912, "learning_rate": 4.986075468029009e-05, "loss": 0.2745, "step": 7302 }, { "epoch": 0.13025719687511148, "grad_norm": 0.30264678597450256, "learning_rate": 4.9860590580320664e-05, "loss": 0.2203, "step": 7303 }, { "epoch": 0.13027503299682516, "grad_norm": 0.33674877882003784, "learning_rate": 4.9860426383983006e-05, "loss": 0.2484, "step": 7304 }, { "epoch": 0.13029286911853888, "grad_norm": 0.3663444519042969, "learning_rate": 4.9860262091277745e-05, "loss": 0.2497, "step": 7305 }, { "epoch": 0.13031070524025257, "grad_norm": 0.3186067044734955, "learning_rate": 4.986009770220552e-05, "loss": 0.2653, "step": 7306 }, { "epoch": 0.13032854136196625, "grad_norm": 0.243214949965477, "learning_rate": 4.985993321676698e-05, "loss": 0.1699, "step": 7307 }, { "epoch": 0.13034637748367994, "grad_norm": 0.3463163673877716, "learning_rate": 4.9859768634962744e-05, "loss": 0.2308, "step": 7308 }, { "epoch": 0.13036421360539363, "grad_norm": 0.3058823049068451, "learning_rate": 4.9859603956793456e-05, "loss": 0.2483, "step": 7309 }, { "epoch": 0.13038204972710735, "grad_norm": 0.2753719985485077, "learning_rate": 4.985943918225976e-05, "loss": 0.2232, "step": 7310 }, { "epoch": 0.13039988584882103, "grad_norm": 0.30290111899375916, "learning_rate": 4.985927431136229e-05, "loss": 0.2378, "step": 7311 }, { "epoch": 0.13041772197053472, "grad_norm": 0.23741087317466736, "learning_rate": 4.9859109344101697e-05, "loss": 0.1981, "step": 7312 }, { "epoch": 0.1304355580922484, "grad_norm": 0.30447036027908325, "learning_rate": 4.98589442804786e-05, "loss": 0.2011, "step": 7313 }, { "epoch": 0.13045339421396213, "grad_norm": 0.3168881833553314, "learning_rate": 4.985877912049365e-05, "loss": 0.2594, "step": 7314 }, { "epoch": 0.13047123033567581, "grad_norm": 0.21747729182243347, "learning_rate": 4.9858613864147485e-05, "loss": 0.1846, "step": 7315 }, { "epoch": 0.1304890664573895, "grad_norm": 0.23899520933628082, "learning_rate": 4.9858448511440745e-05, "loss": 0.2008, "step": 7316 }, { "epoch": 0.1305069025791032, "grad_norm": 0.3052922189235687, "learning_rate": 4.985828306237409e-05, "loss": 0.2549, "step": 7317 }, { "epoch": 0.1305247387008169, "grad_norm": 0.2646942138671875, "learning_rate": 4.985811751694812e-05, "loss": 0.2114, "step": 7318 }, { "epoch": 0.1305425748225306, "grad_norm": 0.24101893603801727, "learning_rate": 4.985795187516351e-05, "loss": 0.2271, "step": 7319 }, { "epoch": 0.13056041094424428, "grad_norm": 0.2978793680667877, "learning_rate": 4.98577861370209e-05, "loss": 0.2594, "step": 7320 }, { "epoch": 0.13057824706595797, "grad_norm": 0.3433240056037903, "learning_rate": 4.985762030252092e-05, "loss": 0.2729, "step": 7321 }, { "epoch": 0.13059608318767169, "grad_norm": 0.27056026458740234, "learning_rate": 4.9857454371664216e-05, "loss": 0.2169, "step": 7322 }, { "epoch": 0.13061391930938537, "grad_norm": 0.24161820113658905, "learning_rate": 4.985728834445144e-05, "loss": 0.2506, "step": 7323 }, { "epoch": 0.13063175543109906, "grad_norm": 0.381404310464859, "learning_rate": 4.985712222088322e-05, "loss": 0.322, "step": 7324 }, { "epoch": 0.13064959155281275, "grad_norm": 0.35411256551742554, "learning_rate": 4.985695600096022e-05, "loss": 0.2483, "step": 7325 }, { "epoch": 0.13066742767452644, "grad_norm": 0.3423224091529846, "learning_rate": 4.985678968468306e-05, "loss": 0.2482, "step": 7326 }, { "epoch": 0.13068526379624015, "grad_norm": 0.24416756629943848, "learning_rate": 4.98566232720524e-05, "loss": 0.2548, "step": 7327 }, { "epoch": 0.13070309991795384, "grad_norm": 0.3216147720813751, "learning_rate": 4.9856456763068895e-05, "loss": 0.2331, "step": 7328 }, { "epoch": 0.13072093603966753, "grad_norm": 0.32868513464927673, "learning_rate": 4.9856290157733166e-05, "loss": 0.2664, "step": 7329 }, { "epoch": 0.13073877216138122, "grad_norm": 0.2403426617383957, "learning_rate": 4.985612345604588e-05, "loss": 0.2157, "step": 7330 }, { "epoch": 0.13075660828309493, "grad_norm": 0.3066485822200775, "learning_rate": 4.985595665800767e-05, "loss": 0.2603, "step": 7331 }, { "epoch": 0.13077444440480862, "grad_norm": 0.33103078603744507, "learning_rate": 4.985578976361919e-05, "loss": 0.2256, "step": 7332 }, { "epoch": 0.1307922805265223, "grad_norm": 0.2382347583770752, "learning_rate": 4.985562277288108e-05, "loss": 0.2273, "step": 7333 }, { "epoch": 0.130810116648236, "grad_norm": 0.2508715093135834, "learning_rate": 4.985545568579399e-05, "loss": 0.2027, "step": 7334 }, { "epoch": 0.1308279527699497, "grad_norm": 0.2651994824409485, "learning_rate": 4.985528850235857e-05, "loss": 0.2797, "step": 7335 }, { "epoch": 0.1308457888916634, "grad_norm": 0.442849338054657, "learning_rate": 4.985512122257547e-05, "loss": 0.2864, "step": 7336 }, { "epoch": 0.1308636250133771, "grad_norm": 0.39108017086982727, "learning_rate": 4.985495384644534e-05, "loss": 0.1999, "step": 7337 }, { "epoch": 0.13088146113509078, "grad_norm": 0.36775991320610046, "learning_rate": 4.985478637396881e-05, "loss": 0.2797, "step": 7338 }, { "epoch": 0.1308992972568045, "grad_norm": 0.25715216994285583, "learning_rate": 4.9854618805146556e-05, "loss": 0.2472, "step": 7339 }, { "epoch": 0.13091713337851818, "grad_norm": 0.2287001609802246, "learning_rate": 4.985445113997921e-05, "loss": 0.2388, "step": 7340 }, { "epoch": 0.13093496950023187, "grad_norm": 0.21074619889259338, "learning_rate": 4.985428337846743e-05, "loss": 0.2302, "step": 7341 }, { "epoch": 0.13095280562194556, "grad_norm": 0.29683929681777954, "learning_rate": 4.9854115520611856e-05, "loss": 0.1928, "step": 7342 }, { "epoch": 0.13097064174365927, "grad_norm": 0.208047017455101, "learning_rate": 4.985394756641315e-05, "loss": 0.2079, "step": 7343 }, { "epoch": 0.13098847786537296, "grad_norm": 0.19809278845787048, "learning_rate": 4.985377951587196e-05, "loss": 0.1892, "step": 7344 }, { "epoch": 0.13100631398708665, "grad_norm": 0.3061544895172119, "learning_rate": 4.985361136898894e-05, "loss": 0.2173, "step": 7345 }, { "epoch": 0.13102415010880034, "grad_norm": 0.30910956859588623, "learning_rate": 4.985344312576473e-05, "loss": 0.2203, "step": 7346 }, { "epoch": 0.13104198623051402, "grad_norm": 0.28544309735298157, "learning_rate": 4.98532747862e-05, "loss": 0.232, "step": 7347 }, { "epoch": 0.13105982235222774, "grad_norm": 0.30717405676841736, "learning_rate": 4.985310635029538e-05, "loss": 0.219, "step": 7348 }, { "epoch": 0.13107765847394143, "grad_norm": 0.3384716212749481, "learning_rate": 4.9852937818051545e-05, "loss": 0.2279, "step": 7349 }, { "epoch": 0.13109549459565512, "grad_norm": 0.23273895680904388, "learning_rate": 4.985276918946914e-05, "loss": 0.2464, "step": 7350 }, { "epoch": 0.1311133307173688, "grad_norm": 0.22510305047035217, "learning_rate": 4.9852600464548814e-05, "loss": 0.1801, "step": 7351 }, { "epoch": 0.13113116683908252, "grad_norm": 0.2647697329521179, "learning_rate": 4.985243164329123e-05, "loss": 0.2144, "step": 7352 }, { "epoch": 0.1311490029607962, "grad_norm": 0.24195782840251923, "learning_rate": 4.985226272569703e-05, "loss": 0.2408, "step": 7353 }, { "epoch": 0.1311668390825099, "grad_norm": 0.2554917633533478, "learning_rate": 4.985209371176688e-05, "loss": 0.2075, "step": 7354 }, { "epoch": 0.13118467520422358, "grad_norm": 0.35380035638809204, "learning_rate": 4.9851924601501435e-05, "loss": 0.2639, "step": 7355 }, { "epoch": 0.1312025113259373, "grad_norm": 0.2879059910774231, "learning_rate": 4.985175539490134e-05, "loss": 0.2594, "step": 7356 }, { "epoch": 0.131220347447651, "grad_norm": 0.3154323697090149, "learning_rate": 4.9851586091967264e-05, "loss": 0.1967, "step": 7357 }, { "epoch": 0.13123818356936467, "grad_norm": 0.34300410747528076, "learning_rate": 4.985141669269985e-05, "loss": 0.2231, "step": 7358 }, { "epoch": 0.13125601969107836, "grad_norm": 0.2851123809814453, "learning_rate": 4.985124719709976e-05, "loss": 0.1936, "step": 7359 }, { "epoch": 0.13127385581279208, "grad_norm": 0.26627442240715027, "learning_rate": 4.9851077605167654e-05, "loss": 0.2074, "step": 7360 }, { "epoch": 0.13129169193450577, "grad_norm": 0.2769263982772827, "learning_rate": 4.9850907916904193e-05, "loss": 0.1697, "step": 7361 }, { "epoch": 0.13130952805621945, "grad_norm": 0.3018946051597595, "learning_rate": 4.985073813231003e-05, "loss": 0.2341, "step": 7362 }, { "epoch": 0.13132736417793314, "grad_norm": 0.3470059335231781, "learning_rate": 4.985056825138582e-05, "loss": 0.2934, "step": 7363 }, { "epoch": 0.13134520029964686, "grad_norm": 0.25130894780158997, "learning_rate": 4.985039827413222e-05, "loss": 0.2158, "step": 7364 }, { "epoch": 0.13136303642136055, "grad_norm": 0.2852213382720947, "learning_rate": 4.98502282005499e-05, "loss": 0.2334, "step": 7365 }, { "epoch": 0.13138087254307423, "grad_norm": 0.30131983757019043, "learning_rate": 4.985005803063951e-05, "loss": 0.2024, "step": 7366 }, { "epoch": 0.13139870866478792, "grad_norm": 0.32201939821243286, "learning_rate": 4.9849887764401715e-05, "loss": 0.2665, "step": 7367 }, { "epoch": 0.1314165447865016, "grad_norm": 0.31907594203948975, "learning_rate": 4.9849717401837165e-05, "loss": 0.2239, "step": 7368 }, { "epoch": 0.13143438090821533, "grad_norm": 0.23806947469711304, "learning_rate": 4.984954694294654e-05, "loss": 0.2133, "step": 7369 }, { "epoch": 0.131452217029929, "grad_norm": 0.29021796584129333, "learning_rate": 4.9849376387730475e-05, "loss": 0.2284, "step": 7370 }, { "epoch": 0.1314700531516427, "grad_norm": 0.2276151478290558, "learning_rate": 4.9849205736189644e-05, "loss": 0.1933, "step": 7371 }, { "epoch": 0.1314878892733564, "grad_norm": 0.3081069886684418, "learning_rate": 4.9849034988324714e-05, "loss": 0.2603, "step": 7372 }, { "epoch": 0.1315057253950701, "grad_norm": 0.2812741696834564, "learning_rate": 4.984886414413634e-05, "loss": 0.2767, "step": 7373 }, { "epoch": 0.1315235615167838, "grad_norm": 0.2832390069961548, "learning_rate": 4.98486932036252e-05, "loss": 0.2416, "step": 7374 }, { "epoch": 0.13154139763849748, "grad_norm": 0.2827257812023163, "learning_rate": 4.984852216679192e-05, "loss": 0.244, "step": 7375 }, { "epoch": 0.13155923376021117, "grad_norm": 0.29743492603302, "learning_rate": 4.9848351033637204e-05, "loss": 0.2342, "step": 7376 }, { "epoch": 0.13157706988192489, "grad_norm": 0.21614915132522583, "learning_rate": 4.984817980416169e-05, "loss": 0.2135, "step": 7377 }, { "epoch": 0.13159490600363857, "grad_norm": 0.31758689880371094, "learning_rate": 4.984800847836605e-05, "loss": 0.2247, "step": 7378 }, { "epoch": 0.13161274212535226, "grad_norm": 0.27685463428497314, "learning_rate": 4.984783705625094e-05, "loss": 0.2143, "step": 7379 }, { "epoch": 0.13163057824706595, "grad_norm": 0.2874879240989685, "learning_rate": 4.9847665537817036e-05, "loss": 0.2186, "step": 7380 }, { "epoch": 0.13164841436877966, "grad_norm": 0.21807777881622314, "learning_rate": 4.9847493923065004e-05, "loss": 0.2182, "step": 7381 }, { "epoch": 0.13166625049049335, "grad_norm": 0.2918378710746765, "learning_rate": 4.9847322211995494e-05, "loss": 0.2186, "step": 7382 }, { "epoch": 0.13168408661220704, "grad_norm": 0.36302876472473145, "learning_rate": 4.984715040460919e-05, "loss": 0.2428, "step": 7383 }, { "epoch": 0.13170192273392073, "grad_norm": 0.2821221947669983, "learning_rate": 4.984697850090674e-05, "loss": 0.2846, "step": 7384 }, { "epoch": 0.13171975885563444, "grad_norm": 0.348736435174942, "learning_rate": 4.984680650088883e-05, "loss": 0.2693, "step": 7385 }, { "epoch": 0.13173759497734813, "grad_norm": 0.2931061089038849, "learning_rate": 4.9846634404556106e-05, "loss": 0.2384, "step": 7386 }, { "epoch": 0.13175543109906182, "grad_norm": 0.23608984053134918, "learning_rate": 4.984646221190925e-05, "loss": 0.1999, "step": 7387 }, { "epoch": 0.1317732672207755, "grad_norm": 0.27440783381462097, "learning_rate": 4.9846289922948926e-05, "loss": 0.2103, "step": 7388 }, { "epoch": 0.1317911033424892, "grad_norm": 0.4285084009170532, "learning_rate": 4.98461175376758e-05, "loss": 0.2412, "step": 7389 }, { "epoch": 0.1318089394642029, "grad_norm": 0.2917060852050781, "learning_rate": 4.9845945056090535e-05, "loss": 0.2005, "step": 7390 }, { "epoch": 0.1318267755859166, "grad_norm": 0.35007357597351074, "learning_rate": 4.984577247819381e-05, "loss": 0.2758, "step": 7391 }, { "epoch": 0.1318446117076303, "grad_norm": 0.3258071839809418, "learning_rate": 4.984559980398629e-05, "loss": 0.2322, "step": 7392 }, { "epoch": 0.13186244782934398, "grad_norm": 0.3649417459964752, "learning_rate": 4.984542703346865e-05, "loss": 0.219, "step": 7393 }, { "epoch": 0.1318802839510577, "grad_norm": 0.31614258885383606, "learning_rate": 4.9845254166641545e-05, "loss": 0.2399, "step": 7394 }, { "epoch": 0.13189812007277138, "grad_norm": 0.2513994574546814, "learning_rate": 4.984508120350566e-05, "loss": 0.2293, "step": 7395 }, { "epoch": 0.13191595619448507, "grad_norm": 0.31215736269950867, "learning_rate": 4.984490814406165e-05, "loss": 0.2178, "step": 7396 }, { "epoch": 0.13193379231619876, "grad_norm": 0.2803609371185303, "learning_rate": 4.9844734988310196e-05, "loss": 0.1633, "step": 7397 }, { "epoch": 0.13195162843791247, "grad_norm": 0.4034261405467987, "learning_rate": 4.9844561736251975e-05, "loss": 0.3008, "step": 7398 }, { "epoch": 0.13196946455962616, "grad_norm": 0.2939809560775757, "learning_rate": 4.984438838788765e-05, "loss": 0.2614, "step": 7399 }, { "epoch": 0.13198730068133985, "grad_norm": 0.29928505420684814, "learning_rate": 4.9844214943217893e-05, "loss": 0.2184, "step": 7400 }, { "epoch": 0.13200513680305354, "grad_norm": 0.2603811025619507, "learning_rate": 4.984404140224338e-05, "loss": 0.2246, "step": 7401 }, { "epoch": 0.13202297292476725, "grad_norm": 0.34333038330078125, "learning_rate": 4.984386776496478e-05, "loss": 0.2713, "step": 7402 }, { "epoch": 0.13204080904648094, "grad_norm": 0.23444755375385284, "learning_rate": 4.984369403138276e-05, "loss": 0.2139, "step": 7403 }, { "epoch": 0.13205864516819463, "grad_norm": 0.2562881112098694, "learning_rate": 4.984352020149801e-05, "loss": 0.2343, "step": 7404 }, { "epoch": 0.13207648128990832, "grad_norm": 0.21762588620185852, "learning_rate": 4.98433462753112e-05, "loss": 0.2157, "step": 7405 }, { "epoch": 0.132094317411622, "grad_norm": 0.23702339828014374, "learning_rate": 4.984317225282299e-05, "loss": 0.1997, "step": 7406 }, { "epoch": 0.13211215353333572, "grad_norm": 0.2891445457935333, "learning_rate": 4.984299813403407e-05, "loss": 0.2365, "step": 7407 }, { "epoch": 0.1321299896550494, "grad_norm": 0.2948136031627655, "learning_rate": 4.98428239189451e-05, "loss": 0.2539, "step": 7408 }, { "epoch": 0.1321478257767631, "grad_norm": 0.4793187975883484, "learning_rate": 4.984264960755677e-05, "loss": 0.2017, "step": 7409 }, { "epoch": 0.13216566189847678, "grad_norm": 0.2965013086795807, "learning_rate": 4.984247519986975e-05, "loss": 0.2459, "step": 7410 }, { "epoch": 0.1321834980201905, "grad_norm": 0.3114519417285919, "learning_rate": 4.984230069588471e-05, "loss": 0.2457, "step": 7411 }, { "epoch": 0.1322013341419042, "grad_norm": 0.2890632152557373, "learning_rate": 4.9842126095602345e-05, "loss": 0.236, "step": 7412 }, { "epoch": 0.13221917026361787, "grad_norm": 0.2622167468070984, "learning_rate": 4.984195139902331e-05, "loss": 0.2601, "step": 7413 }, { "epoch": 0.13223700638533156, "grad_norm": 0.355807900428772, "learning_rate": 4.984177660614829e-05, "loss": 0.3013, "step": 7414 }, { "epoch": 0.13225484250704528, "grad_norm": 0.38276156783103943, "learning_rate": 4.984160171697797e-05, "loss": 0.3612, "step": 7415 }, { "epoch": 0.13227267862875897, "grad_norm": 0.3555077016353607, "learning_rate": 4.984142673151302e-05, "loss": 0.2059, "step": 7416 }, { "epoch": 0.13229051475047265, "grad_norm": 0.19492195546627045, "learning_rate": 4.9841251649754115e-05, "loss": 0.2003, "step": 7417 }, { "epoch": 0.13230835087218634, "grad_norm": 0.23308365046977997, "learning_rate": 4.984107647170194e-05, "loss": 0.2407, "step": 7418 }, { "epoch": 0.13232618699390006, "grad_norm": 0.4899159371852875, "learning_rate": 4.9840901197357174e-05, "loss": 0.2352, "step": 7419 }, { "epoch": 0.13234402311561375, "grad_norm": 0.24222806096076965, "learning_rate": 4.9840725826720495e-05, "loss": 0.2122, "step": 7420 }, { "epoch": 0.13236185923732743, "grad_norm": 0.31503376364707947, "learning_rate": 4.984055035979258e-05, "loss": 0.2196, "step": 7421 }, { "epoch": 0.13237969535904112, "grad_norm": 0.3184950649738312, "learning_rate": 4.984037479657412e-05, "loss": 0.2498, "step": 7422 }, { "epoch": 0.13239753148075484, "grad_norm": 0.3167721927165985, "learning_rate": 4.984019913706578e-05, "loss": 0.2236, "step": 7423 }, { "epoch": 0.13241536760246853, "grad_norm": 0.26916879415512085, "learning_rate": 4.984002338126826e-05, "loss": 0.1883, "step": 7424 }, { "epoch": 0.1324332037241822, "grad_norm": 0.38445138931274414, "learning_rate": 4.983984752918221e-05, "loss": 0.2736, "step": 7425 }, { "epoch": 0.1324510398458959, "grad_norm": 0.22318536043167114, "learning_rate": 4.9839671580808355e-05, "loss": 0.202, "step": 7426 }, { "epoch": 0.1324688759676096, "grad_norm": 0.30396726727485657, "learning_rate": 4.983949553614734e-05, "loss": 0.2405, "step": 7427 }, { "epoch": 0.1324867120893233, "grad_norm": 0.3115949034690857, "learning_rate": 4.9839319395199865e-05, "loss": 0.2934, "step": 7428 }, { "epoch": 0.132504548211037, "grad_norm": 0.2768704295158386, "learning_rate": 4.98391431579666e-05, "loss": 0.2351, "step": 7429 }, { "epoch": 0.13252238433275068, "grad_norm": 0.2526929974555969, "learning_rate": 4.983896682444825e-05, "loss": 0.1906, "step": 7430 }, { "epoch": 0.13254022045446437, "grad_norm": 0.28604450821876526, "learning_rate": 4.983879039464548e-05, "loss": 0.2842, "step": 7431 }, { "epoch": 0.13255805657617808, "grad_norm": 0.2940399646759033, "learning_rate": 4.983861386855898e-05, "loss": 0.2227, "step": 7432 }, { "epoch": 0.13257589269789177, "grad_norm": 0.3287406265735626, "learning_rate": 4.983843724618943e-05, "loss": 0.2872, "step": 7433 }, { "epoch": 0.13259372881960546, "grad_norm": 0.41262704133987427, "learning_rate": 4.9838260527537524e-05, "loss": 0.2893, "step": 7434 }, { "epoch": 0.13261156494131915, "grad_norm": 0.27647820115089417, "learning_rate": 4.983808371260393e-05, "loss": 0.2182, "step": 7435 }, { "epoch": 0.13262940106303286, "grad_norm": 0.2574402987957001, "learning_rate": 4.983790680138935e-05, "loss": 0.1768, "step": 7436 }, { "epoch": 0.13264723718474655, "grad_norm": 0.2551569640636444, "learning_rate": 4.9837729793894476e-05, "loss": 0.1964, "step": 7437 }, { "epoch": 0.13266507330646024, "grad_norm": 0.2723551094532013, "learning_rate": 4.983755269011998e-05, "loss": 0.2523, "step": 7438 }, { "epoch": 0.13268290942817393, "grad_norm": 0.32283326983451843, "learning_rate": 4.983737549006654e-05, "loss": 0.2537, "step": 7439 }, { "epoch": 0.13270074554988764, "grad_norm": 0.23678213357925415, "learning_rate": 4.983719819373486e-05, "loss": 0.2307, "step": 7440 }, { "epoch": 0.13271858167160133, "grad_norm": 0.31286126375198364, "learning_rate": 4.9837020801125624e-05, "loss": 0.1984, "step": 7441 }, { "epoch": 0.13273641779331502, "grad_norm": 0.37980055809020996, "learning_rate": 4.9836843312239514e-05, "loss": 0.245, "step": 7442 }, { "epoch": 0.1327542539150287, "grad_norm": 0.2871837019920349, "learning_rate": 4.983666572707721e-05, "loss": 0.2573, "step": 7443 }, { "epoch": 0.13277209003674242, "grad_norm": 0.3354865312576294, "learning_rate": 4.9836488045639426e-05, "loss": 0.2576, "step": 7444 }, { "epoch": 0.1327899261584561, "grad_norm": 0.2894785404205322, "learning_rate": 4.983631026792683e-05, "loss": 0.228, "step": 7445 }, { "epoch": 0.1328077622801698, "grad_norm": 0.29659420251846313, "learning_rate": 4.9836132393940126e-05, "loss": 0.2526, "step": 7446 }, { "epoch": 0.1328255984018835, "grad_norm": 0.45775067806243896, "learning_rate": 4.983595442367999e-05, "loss": 0.2359, "step": 7447 }, { "epoch": 0.13284343452359718, "grad_norm": 0.2509719431400299, "learning_rate": 4.9835776357147115e-05, "loss": 0.2034, "step": 7448 }, { "epoch": 0.1328612706453109, "grad_norm": 0.30304911732673645, "learning_rate": 4.9835598194342185e-05, "loss": 0.2317, "step": 7449 }, { "epoch": 0.13287910676702458, "grad_norm": 0.35082000494003296, "learning_rate": 4.983541993526591e-05, "loss": 0.2846, "step": 7450 }, { "epoch": 0.13289694288873827, "grad_norm": 0.33010074496269226, "learning_rate": 4.9835241579918965e-05, "loss": 0.197, "step": 7451 }, { "epoch": 0.13291477901045196, "grad_norm": 0.33244559168815613, "learning_rate": 4.9835063128302044e-05, "loss": 0.2421, "step": 7452 }, { "epoch": 0.13293261513216567, "grad_norm": 0.24155063927173615, "learning_rate": 4.9834884580415845e-05, "loss": 0.1957, "step": 7453 }, { "epoch": 0.13295045125387936, "grad_norm": 0.2793540358543396, "learning_rate": 4.983470593626105e-05, "loss": 0.2112, "step": 7454 }, { "epoch": 0.13296828737559305, "grad_norm": 0.24453821778297424, "learning_rate": 4.983452719583837e-05, "loss": 0.2217, "step": 7455 }, { "epoch": 0.13298612349730674, "grad_norm": 0.34428685903549194, "learning_rate": 4.9834348359148464e-05, "loss": 0.2909, "step": 7456 }, { "epoch": 0.13300395961902045, "grad_norm": 0.32234349846839905, "learning_rate": 4.983416942619206e-05, "loss": 0.2448, "step": 7457 }, { "epoch": 0.13302179574073414, "grad_norm": 0.3137282431125641, "learning_rate": 4.9833990396969834e-05, "loss": 0.2445, "step": 7458 }, { "epoch": 0.13303963186244783, "grad_norm": 0.36944276094436646, "learning_rate": 4.983381127148249e-05, "loss": 0.2005, "step": 7459 }, { "epoch": 0.13305746798416151, "grad_norm": 0.29638609290122986, "learning_rate": 4.983363204973071e-05, "loss": 0.2449, "step": 7460 }, { "epoch": 0.13307530410587523, "grad_norm": 0.23793640732765198, "learning_rate": 4.98334527317152e-05, "loss": 0.2209, "step": 7461 }, { "epoch": 0.13309314022758892, "grad_norm": 0.34974244236946106, "learning_rate": 4.9833273317436645e-05, "loss": 0.2812, "step": 7462 }, { "epoch": 0.1331109763493026, "grad_norm": 0.33365777134895325, "learning_rate": 4.9833093806895745e-05, "loss": 0.2071, "step": 7463 }, { "epoch": 0.1331288124710163, "grad_norm": 0.3364517390727997, "learning_rate": 4.98329142000932e-05, "loss": 0.2148, "step": 7464 }, { "epoch": 0.13314664859273, "grad_norm": 0.27121469378471375, "learning_rate": 4.98327344970297e-05, "loss": 0.1943, "step": 7465 }, { "epoch": 0.1331644847144437, "grad_norm": 0.3351459205150604, "learning_rate": 4.983255469770595e-05, "loss": 0.2306, "step": 7466 }, { "epoch": 0.1331823208361574, "grad_norm": 0.34545639157295227, "learning_rate": 4.9832374802122626e-05, "loss": 0.279, "step": 7467 }, { "epoch": 0.13320015695787107, "grad_norm": 0.29428547620773315, "learning_rate": 4.983219481028045e-05, "loss": 0.2129, "step": 7468 }, { "epoch": 0.13321799307958476, "grad_norm": 0.24164363741874695, "learning_rate": 4.983201472218011e-05, "loss": 0.2321, "step": 7469 }, { "epoch": 0.13323582920129848, "grad_norm": 0.25894299149513245, "learning_rate": 4.983183453782231e-05, "loss": 0.2383, "step": 7470 }, { "epoch": 0.13325366532301217, "grad_norm": 0.28912675380706787, "learning_rate": 4.983165425720774e-05, "loss": 0.2316, "step": 7471 }, { "epoch": 0.13327150144472585, "grad_norm": 0.3120078146457672, "learning_rate": 4.9831473880337095e-05, "loss": 0.2464, "step": 7472 }, { "epoch": 0.13328933756643954, "grad_norm": 0.2935456931591034, "learning_rate": 4.983129340721109e-05, "loss": 0.2744, "step": 7473 }, { "epoch": 0.13330717368815326, "grad_norm": 0.2654741108417511, "learning_rate": 4.9831112837830406e-05, "loss": 0.1975, "step": 7474 }, { "epoch": 0.13332500980986695, "grad_norm": 0.26337653398513794, "learning_rate": 4.983093217219575e-05, "loss": 0.2111, "step": 7475 }, { "epoch": 0.13334284593158063, "grad_norm": 0.2705751359462738, "learning_rate": 4.983075141030784e-05, "loss": 0.219, "step": 7476 }, { "epoch": 0.13336068205329432, "grad_norm": 0.26234325766563416, "learning_rate": 4.9830570552167356e-05, "loss": 0.2491, "step": 7477 }, { "epoch": 0.13337851817500804, "grad_norm": 0.31031814217567444, "learning_rate": 4.9830389597774996e-05, "loss": 0.2297, "step": 7478 }, { "epoch": 0.13339635429672173, "grad_norm": 0.23101654648780823, "learning_rate": 4.983020854713147e-05, "loss": 0.2085, "step": 7479 }, { "epoch": 0.1334141904184354, "grad_norm": 0.34721609950065613, "learning_rate": 4.983002740023749e-05, "loss": 0.2502, "step": 7480 }, { "epoch": 0.1334320265401491, "grad_norm": 0.2588844895362854, "learning_rate": 4.982984615709374e-05, "loss": 0.2182, "step": 7481 }, { "epoch": 0.13344986266186282, "grad_norm": 0.3425954282283783, "learning_rate": 4.982966481770093e-05, "loss": 0.2143, "step": 7482 }, { "epoch": 0.1334676987835765, "grad_norm": 0.297648549079895, "learning_rate": 4.982948338205977e-05, "loss": 0.2721, "step": 7483 }, { "epoch": 0.1334855349052902, "grad_norm": 0.3153221011161804, "learning_rate": 4.982930185017095e-05, "loss": 0.2366, "step": 7484 }, { "epoch": 0.13350337102700388, "grad_norm": 0.46999824047088623, "learning_rate": 4.982912022203519e-05, "loss": 0.2137, "step": 7485 }, { "epoch": 0.1335212071487176, "grad_norm": 0.21449771523475647, "learning_rate": 4.9828938497653165e-05, "loss": 0.1918, "step": 7486 }, { "epoch": 0.13353904327043128, "grad_norm": 0.27725961804389954, "learning_rate": 4.9828756677025614e-05, "loss": 0.2502, "step": 7487 }, { "epoch": 0.13355687939214497, "grad_norm": 0.39225447177886963, "learning_rate": 4.9828574760153227e-05, "loss": 0.2417, "step": 7488 }, { "epoch": 0.13357471551385866, "grad_norm": 0.22374741733074188, "learning_rate": 4.98283927470367e-05, "loss": 0.2059, "step": 7489 }, { "epoch": 0.13359255163557235, "grad_norm": 0.29595109820365906, "learning_rate": 4.982821063767675e-05, "loss": 0.2066, "step": 7490 }, { "epoch": 0.13361038775728606, "grad_norm": 0.2838314175605774, "learning_rate": 4.982802843207408e-05, "loss": 0.2573, "step": 7491 }, { "epoch": 0.13362822387899975, "grad_norm": 0.18106932938098907, "learning_rate": 4.982784613022941e-05, "loss": 0.2278, "step": 7492 }, { "epoch": 0.13364606000071344, "grad_norm": 0.33832937479019165, "learning_rate": 4.9827663732143414e-05, "loss": 0.2553, "step": 7493 }, { "epoch": 0.13366389612242713, "grad_norm": 0.23699772357940674, "learning_rate": 4.9827481237816824e-05, "loss": 0.1986, "step": 7494 }, { "epoch": 0.13368173224414084, "grad_norm": 0.30046558380126953, "learning_rate": 4.9827298647250344e-05, "loss": 0.2143, "step": 7495 }, { "epoch": 0.13369956836585453, "grad_norm": 0.2665005624294281, "learning_rate": 4.982711596044468e-05, "loss": 0.2324, "step": 7496 }, { "epoch": 0.13371740448756822, "grad_norm": 0.30308958888053894, "learning_rate": 4.982693317740053e-05, "loss": 0.2719, "step": 7497 }, { "epoch": 0.1337352406092819, "grad_norm": 0.4038073420524597, "learning_rate": 4.982675029811863e-05, "loss": 0.3033, "step": 7498 }, { "epoch": 0.13375307673099562, "grad_norm": 0.24031807482242584, "learning_rate": 4.982656732259966e-05, "loss": 0.1977, "step": 7499 }, { "epoch": 0.1337709128527093, "grad_norm": 0.3382944166660309, "learning_rate": 4.9826384250844346e-05, "loss": 0.2559, "step": 7500 }, { "epoch": 0.133788748974423, "grad_norm": 0.3220905363559723, "learning_rate": 4.9826201082853385e-05, "loss": 0.2957, "step": 7501 }, { "epoch": 0.1338065850961367, "grad_norm": 0.3390187919139862, "learning_rate": 4.9826017818627494e-05, "loss": 0.2495, "step": 7502 }, { "epoch": 0.1338244212178504, "grad_norm": 0.2769834101200104, "learning_rate": 4.9825834458167385e-05, "loss": 0.2327, "step": 7503 }, { "epoch": 0.1338422573395641, "grad_norm": 0.36122724413871765, "learning_rate": 4.9825651001473775e-05, "loss": 0.3026, "step": 7504 }, { "epoch": 0.13386009346127778, "grad_norm": 0.25237777829170227, "learning_rate": 4.982546744854736e-05, "loss": 0.2315, "step": 7505 }, { "epoch": 0.13387792958299147, "grad_norm": 0.37935492396354675, "learning_rate": 4.9825283799388854e-05, "loss": 0.2855, "step": 7506 }, { "epoch": 0.13389576570470516, "grad_norm": 0.29708775877952576, "learning_rate": 4.982510005399897e-05, "loss": 0.2491, "step": 7507 }, { "epoch": 0.13391360182641887, "grad_norm": 0.4350215792655945, "learning_rate": 4.9824916212378436e-05, "loss": 0.286, "step": 7508 }, { "epoch": 0.13393143794813256, "grad_norm": 0.3282392621040344, "learning_rate": 4.982473227452795e-05, "loss": 0.2853, "step": 7509 }, { "epoch": 0.13394927406984625, "grad_norm": 0.6081443428993225, "learning_rate": 4.9824548240448234e-05, "loss": 0.2892, "step": 7510 }, { "epoch": 0.13396711019155993, "grad_norm": 0.24356110394001007, "learning_rate": 4.9824364110139984e-05, "loss": 0.2276, "step": 7511 }, { "epoch": 0.13398494631327365, "grad_norm": 0.2384781390428543, "learning_rate": 4.9824179883603926e-05, "loss": 0.2432, "step": 7512 }, { "epoch": 0.13400278243498734, "grad_norm": 0.3611038029193878, "learning_rate": 4.982399556084078e-05, "loss": 0.3191, "step": 7513 }, { "epoch": 0.13402061855670103, "grad_norm": 0.2158660739660263, "learning_rate": 4.982381114185124e-05, "loss": 0.1981, "step": 7514 }, { "epoch": 0.13403845467841471, "grad_norm": 0.22014988958835602, "learning_rate": 4.9823626626636045e-05, "loss": 0.2359, "step": 7515 }, { "epoch": 0.13405629080012843, "grad_norm": 0.26914459466934204, "learning_rate": 4.9823442015195896e-05, "loss": 0.2023, "step": 7516 }, { "epoch": 0.13407412692184212, "grad_norm": 0.36341536045074463, "learning_rate": 4.982325730753151e-05, "loss": 0.3064, "step": 7517 }, { "epoch": 0.1340919630435558, "grad_norm": 0.29143232107162476, "learning_rate": 4.9823072503643606e-05, "loss": 0.2266, "step": 7518 }, { "epoch": 0.1341097991652695, "grad_norm": 0.3108413815498352, "learning_rate": 4.98228876035329e-05, "loss": 0.2183, "step": 7519 }, { "epoch": 0.1341276352869832, "grad_norm": 0.3253674805164337, "learning_rate": 4.9822702607200114e-05, "loss": 0.2183, "step": 7520 }, { "epoch": 0.1341454714086969, "grad_norm": 0.2737044095993042, "learning_rate": 4.982251751464595e-05, "loss": 0.2016, "step": 7521 }, { "epoch": 0.13416330753041059, "grad_norm": 0.2607121467590332, "learning_rate": 4.982233232587114e-05, "loss": 0.2314, "step": 7522 }, { "epoch": 0.13418114365212427, "grad_norm": 0.2589547634124756, "learning_rate": 4.98221470408764e-05, "loss": 0.224, "step": 7523 }, { "epoch": 0.134198979773838, "grad_norm": 0.24485012888908386, "learning_rate": 4.9821961659662434e-05, "loss": 0.2448, "step": 7524 }, { "epoch": 0.13421681589555168, "grad_norm": 0.3238263428211212, "learning_rate": 4.9821776182229976e-05, "loss": 0.2345, "step": 7525 }, { "epoch": 0.13423465201726537, "grad_norm": 0.28708145022392273, "learning_rate": 4.982159060857975e-05, "loss": 0.2222, "step": 7526 }, { "epoch": 0.13425248813897905, "grad_norm": 0.3221355378627777, "learning_rate": 4.9821404938712454e-05, "loss": 0.2758, "step": 7527 }, { "epoch": 0.13427032426069274, "grad_norm": 0.31099021434783936, "learning_rate": 4.982121917262882e-05, "loss": 0.2624, "step": 7528 }, { "epoch": 0.13428816038240646, "grad_norm": 0.18360298871994019, "learning_rate": 4.982103331032957e-05, "loss": 0.1803, "step": 7529 }, { "epoch": 0.13430599650412015, "grad_norm": 0.2785327136516571, "learning_rate": 4.9820847351815424e-05, "loss": 0.2526, "step": 7530 }, { "epoch": 0.13432383262583383, "grad_norm": 0.1713157743215561, "learning_rate": 4.98206612970871e-05, "loss": 0.1979, "step": 7531 }, { "epoch": 0.13434166874754752, "grad_norm": 0.3337853252887726, "learning_rate": 4.9820475146145317e-05, "loss": 0.2219, "step": 7532 }, { "epoch": 0.13435950486926124, "grad_norm": 0.4755903482437134, "learning_rate": 4.9820288898990804e-05, "loss": 0.2267, "step": 7533 }, { "epoch": 0.13437734099097493, "grad_norm": 0.2882138192653656, "learning_rate": 4.982010255562428e-05, "loss": 0.248, "step": 7534 }, { "epoch": 0.1343951771126886, "grad_norm": 0.3054235577583313, "learning_rate": 4.981991611604646e-05, "loss": 0.2103, "step": 7535 }, { "epoch": 0.1344130132344023, "grad_norm": 0.33048173785209656, "learning_rate": 4.981972958025807e-05, "loss": 0.2469, "step": 7536 }, { "epoch": 0.13443084935611602, "grad_norm": 0.25596049427986145, "learning_rate": 4.9819542948259843e-05, "loss": 0.209, "step": 7537 }, { "epoch": 0.1344486854778297, "grad_norm": 0.2825992703437805, "learning_rate": 4.981935622005249e-05, "loss": 0.1947, "step": 7538 }, { "epoch": 0.1344665215995434, "grad_norm": 0.28847551345825195, "learning_rate": 4.9819169395636744e-05, "loss": 0.2466, "step": 7539 }, { "epoch": 0.13448435772125708, "grad_norm": 0.2828240394592285, "learning_rate": 4.981898247501333e-05, "loss": 0.2332, "step": 7540 }, { "epoch": 0.1345021938429708, "grad_norm": 0.28144776821136475, "learning_rate": 4.9818795458182955e-05, "loss": 0.1825, "step": 7541 }, { "epoch": 0.13452002996468448, "grad_norm": 0.29975757002830505, "learning_rate": 4.981860834514637e-05, "loss": 0.2202, "step": 7542 }, { "epoch": 0.13453786608639817, "grad_norm": 0.33452877402305603, "learning_rate": 4.9818421135904276e-05, "loss": 0.2115, "step": 7543 }, { "epoch": 0.13455570220811186, "grad_norm": 0.2965460419654846, "learning_rate": 4.9818233830457414e-05, "loss": 0.2238, "step": 7544 }, { "epoch": 0.13457353832982558, "grad_norm": 0.41199344396591187, "learning_rate": 4.981804642880651e-05, "loss": 0.2071, "step": 7545 }, { "epoch": 0.13459137445153926, "grad_norm": 0.39077064394950867, "learning_rate": 4.981785893095228e-05, "loss": 0.1899, "step": 7546 }, { "epoch": 0.13460921057325295, "grad_norm": 0.30126917362213135, "learning_rate": 4.981767133689545e-05, "loss": 0.2044, "step": 7547 }, { "epoch": 0.13462704669496664, "grad_norm": 0.5434653162956238, "learning_rate": 4.981748364663677e-05, "loss": 0.2414, "step": 7548 }, { "epoch": 0.13464488281668033, "grad_norm": 0.35855361819267273, "learning_rate": 4.981729586017694e-05, "loss": 0.2518, "step": 7549 }, { "epoch": 0.13466271893839404, "grad_norm": 0.3158375322818756, "learning_rate": 4.9817107977516706e-05, "loss": 0.2736, "step": 7550 }, { "epoch": 0.13468055506010773, "grad_norm": 0.2712560296058655, "learning_rate": 4.9816919998656784e-05, "loss": 0.1824, "step": 7551 }, { "epoch": 0.13469839118182142, "grad_norm": 0.35339856147766113, "learning_rate": 4.9816731923597914e-05, "loss": 0.2988, "step": 7552 }, { "epoch": 0.1347162273035351, "grad_norm": 0.25982826948165894, "learning_rate": 4.981654375234082e-05, "loss": 0.2368, "step": 7553 }, { "epoch": 0.13473406342524882, "grad_norm": 0.24519726634025574, "learning_rate": 4.9816355484886237e-05, "loss": 0.2228, "step": 7554 }, { "epoch": 0.1347518995469625, "grad_norm": 0.2974936068058014, "learning_rate": 4.981616712123488e-05, "loss": 0.1965, "step": 7555 }, { "epoch": 0.1347697356686762, "grad_norm": 0.26997777819633484, "learning_rate": 4.981597866138749e-05, "loss": 0.2227, "step": 7556 }, { "epoch": 0.1347875717903899, "grad_norm": 0.2788987457752228, "learning_rate": 4.981579010534479e-05, "loss": 0.2542, "step": 7557 }, { "epoch": 0.1348054079121036, "grad_norm": 0.28224748373031616, "learning_rate": 4.9815601453107516e-05, "loss": 0.2572, "step": 7558 }, { "epoch": 0.1348232440338173, "grad_norm": 0.30463093519210815, "learning_rate": 4.981541270467641e-05, "loss": 0.2796, "step": 7559 }, { "epoch": 0.13484108015553098, "grad_norm": 0.3485594093799591, "learning_rate": 4.981522386005219e-05, "loss": 0.2443, "step": 7560 }, { "epoch": 0.13485891627724467, "grad_norm": 0.2835558354854584, "learning_rate": 4.981503491923559e-05, "loss": 0.2386, "step": 7561 }, { "epoch": 0.13487675239895838, "grad_norm": 0.23558658361434937, "learning_rate": 4.981484588222735e-05, "loss": 0.1589, "step": 7562 }, { "epoch": 0.13489458852067207, "grad_norm": 0.25932517647743225, "learning_rate": 4.981465674902818e-05, "loss": 0.1913, "step": 7563 }, { "epoch": 0.13491242464238576, "grad_norm": 0.2826513946056366, "learning_rate": 4.981446751963884e-05, "loss": 0.1794, "step": 7564 }, { "epoch": 0.13493026076409945, "grad_norm": 0.24743780493736267, "learning_rate": 4.981427819406006e-05, "loss": 0.2051, "step": 7565 }, { "epoch": 0.13494809688581316, "grad_norm": 0.28147047758102417, "learning_rate": 4.981408877229256e-05, "loss": 0.229, "step": 7566 }, { "epoch": 0.13496593300752685, "grad_norm": 0.2662518322467804, "learning_rate": 4.9813899254337084e-05, "loss": 0.2272, "step": 7567 }, { "epoch": 0.13498376912924054, "grad_norm": 0.26303184032440186, "learning_rate": 4.981370964019436e-05, "loss": 0.1733, "step": 7568 }, { "epoch": 0.13500160525095423, "grad_norm": 0.2362370789051056, "learning_rate": 4.9813519929865125e-05, "loss": 0.2038, "step": 7569 }, { "epoch": 0.13501944137266791, "grad_norm": 0.2295181155204773, "learning_rate": 4.981333012335012e-05, "loss": 0.2052, "step": 7570 }, { "epoch": 0.13503727749438163, "grad_norm": 0.29024702310562134, "learning_rate": 4.981314022065008e-05, "loss": 0.253, "step": 7571 }, { "epoch": 0.13505511361609532, "grad_norm": 0.31106990575790405, "learning_rate": 4.981295022176573e-05, "loss": 0.1461, "step": 7572 }, { "epoch": 0.135072949737809, "grad_norm": 0.31030508875846863, "learning_rate": 4.981276012669782e-05, "loss": 0.2392, "step": 7573 }, { "epoch": 0.1350907858595227, "grad_norm": 0.2728276252746582, "learning_rate": 4.9812569935447087e-05, "loss": 0.2436, "step": 7574 }, { "epoch": 0.1351086219812364, "grad_norm": 0.24788887798786163, "learning_rate": 4.981237964801426e-05, "loss": 0.1683, "step": 7575 }, { "epoch": 0.1351264581029501, "grad_norm": 0.3057432472705841, "learning_rate": 4.9812189264400075e-05, "loss": 0.2349, "step": 7576 }, { "epoch": 0.13514429422466379, "grad_norm": 0.44676804542541504, "learning_rate": 4.981199878460528e-05, "loss": 0.2308, "step": 7577 }, { "epoch": 0.13516213034637747, "grad_norm": 0.41504162549972534, "learning_rate": 4.98118082086306e-05, "loss": 0.2349, "step": 7578 }, { "epoch": 0.1351799664680912, "grad_norm": 0.3064153492450714, "learning_rate": 4.98116175364768e-05, "loss": 0.219, "step": 7579 }, { "epoch": 0.13519780258980488, "grad_norm": 0.35112741589546204, "learning_rate": 4.9811426768144574e-05, "loss": 0.2371, "step": 7580 }, { "epoch": 0.13521563871151857, "grad_norm": 0.2610100507736206, "learning_rate": 4.981123590363471e-05, "loss": 0.2202, "step": 7581 }, { "epoch": 0.13523347483323225, "grad_norm": 0.3716852068901062, "learning_rate": 4.981104494294792e-05, "loss": 0.1981, "step": 7582 }, { "epoch": 0.13525131095494597, "grad_norm": 0.339006632566452, "learning_rate": 4.981085388608494e-05, "loss": 0.2599, "step": 7583 }, { "epoch": 0.13526914707665966, "grad_norm": 0.27571389079093933, "learning_rate": 4.9810662733046534e-05, "loss": 0.1884, "step": 7584 }, { "epoch": 0.13528698319837335, "grad_norm": 0.24952851235866547, "learning_rate": 4.9810471483833426e-05, "loss": 0.2138, "step": 7585 }, { "epoch": 0.13530481932008703, "grad_norm": 0.24076996743679047, "learning_rate": 4.981028013844636e-05, "loss": 0.2029, "step": 7586 }, { "epoch": 0.13532265544180072, "grad_norm": 0.569031834602356, "learning_rate": 4.9810088696886084e-05, "loss": 0.3176, "step": 7587 }, { "epoch": 0.13534049156351444, "grad_norm": 0.2556706368923187, "learning_rate": 4.980989715915333e-05, "loss": 0.2467, "step": 7588 }, { "epoch": 0.13535832768522812, "grad_norm": 0.2645498216152191, "learning_rate": 4.980970552524884e-05, "loss": 0.2695, "step": 7589 }, { "epoch": 0.1353761638069418, "grad_norm": 0.26399847865104675, "learning_rate": 4.980951379517337e-05, "loss": 0.2065, "step": 7590 }, { "epoch": 0.1353939999286555, "grad_norm": 0.2496369630098343, "learning_rate": 4.980932196892766e-05, "loss": 0.2078, "step": 7591 }, { "epoch": 0.13541183605036922, "grad_norm": 0.19830262660980225, "learning_rate": 4.980913004651244e-05, "loss": 0.2083, "step": 7592 }, { "epoch": 0.1354296721720829, "grad_norm": 0.28163546323776245, "learning_rate": 4.9808938027928466e-05, "loss": 0.2439, "step": 7593 }, { "epoch": 0.1354475082937966, "grad_norm": 0.2291107177734375, "learning_rate": 4.980874591317648e-05, "loss": 0.1775, "step": 7594 }, { "epoch": 0.13546534441551028, "grad_norm": 0.2885727882385254, "learning_rate": 4.980855370225722e-05, "loss": 0.1927, "step": 7595 }, { "epoch": 0.135483180537224, "grad_norm": 0.21624629199504852, "learning_rate": 4.980836139517145e-05, "loss": 0.1885, "step": 7596 }, { "epoch": 0.13550101665893768, "grad_norm": 0.44790226221084595, "learning_rate": 4.98081689919199e-05, "loss": 0.2218, "step": 7597 }, { "epoch": 0.13551885278065137, "grad_norm": 0.3004307746887207, "learning_rate": 4.980797649250331e-05, "loss": 0.2285, "step": 7598 }, { "epoch": 0.13553668890236506, "grad_norm": 0.29081809520721436, "learning_rate": 4.980778389692244e-05, "loss": 0.225, "step": 7599 }, { "epoch": 0.13555452502407878, "grad_norm": 0.2907610237598419, "learning_rate": 4.980759120517803e-05, "loss": 0.2349, "step": 7600 }, { "epoch": 0.13557236114579246, "grad_norm": 0.21427428722381592, "learning_rate": 4.980739841727083e-05, "loss": 0.2029, "step": 7601 }, { "epoch": 0.13559019726750615, "grad_norm": 0.42423343658447266, "learning_rate": 4.980720553320158e-05, "loss": 0.3132, "step": 7602 }, { "epoch": 0.13560803338921984, "grad_norm": 0.29754114151000977, "learning_rate": 4.9807012552971045e-05, "loss": 0.2514, "step": 7603 }, { "epoch": 0.13562586951093356, "grad_norm": 0.3172772526741028, "learning_rate": 4.980681947657995e-05, "loss": 0.2296, "step": 7604 }, { "epoch": 0.13564370563264724, "grad_norm": 0.30040034651756287, "learning_rate": 4.9806626304029056e-05, "loss": 0.2615, "step": 7605 }, { "epoch": 0.13566154175436093, "grad_norm": 0.3222219944000244, "learning_rate": 4.9806433035319114e-05, "loss": 0.2498, "step": 7606 }, { "epoch": 0.13567937787607462, "grad_norm": 0.25147029757499695, "learning_rate": 4.980623967045087e-05, "loss": 0.2258, "step": 7607 }, { "epoch": 0.1356972139977883, "grad_norm": 0.3038370609283447, "learning_rate": 4.980604620942507e-05, "loss": 0.2091, "step": 7608 }, { "epoch": 0.13571505011950202, "grad_norm": 0.35255715250968933, "learning_rate": 4.980585265224247e-05, "loss": 0.1884, "step": 7609 }, { "epoch": 0.1357328862412157, "grad_norm": 0.3303506076335907, "learning_rate": 4.9805658998903815e-05, "loss": 0.189, "step": 7610 }, { "epoch": 0.1357507223629294, "grad_norm": 0.3681982755661011, "learning_rate": 4.980546524940987e-05, "loss": 0.2408, "step": 7611 }, { "epoch": 0.1357685584846431, "grad_norm": 0.3078673481941223, "learning_rate": 4.980527140376136e-05, "loss": 0.2295, "step": 7612 }, { "epoch": 0.1357863946063568, "grad_norm": 0.2225552201271057, "learning_rate": 4.980507746195905e-05, "loss": 0.2092, "step": 7613 }, { "epoch": 0.1358042307280705, "grad_norm": 0.2969847023487091, "learning_rate": 4.98048834240037e-05, "loss": 0.2222, "step": 7614 }, { "epoch": 0.13582206684978418, "grad_norm": 0.43571245670318604, "learning_rate": 4.980468928989605e-05, "loss": 0.2177, "step": 7615 }, { "epoch": 0.13583990297149787, "grad_norm": 0.2678923010826111, "learning_rate": 4.980449505963686e-05, "loss": 0.2913, "step": 7616 }, { "epoch": 0.13585773909321158, "grad_norm": 0.22254163026809692, "learning_rate": 4.9804300733226875e-05, "loss": 0.1995, "step": 7617 }, { "epoch": 0.13587557521492527, "grad_norm": 0.29656386375427246, "learning_rate": 4.980410631066686e-05, "loss": 0.2393, "step": 7618 }, { "epoch": 0.13589341133663896, "grad_norm": 0.30731868743896484, "learning_rate": 4.980391179195756e-05, "loss": 0.249, "step": 7619 }, { "epoch": 0.13591124745835265, "grad_norm": 0.3457026481628418, "learning_rate": 4.980371717709973e-05, "loss": 0.2449, "step": 7620 }, { "epoch": 0.13592908358006636, "grad_norm": 0.33114808797836304, "learning_rate": 4.980352246609412e-05, "loss": 0.2788, "step": 7621 }, { "epoch": 0.13594691970178005, "grad_norm": 0.3019861578941345, "learning_rate": 4.9803327658941494e-05, "loss": 0.2316, "step": 7622 }, { "epoch": 0.13596475582349374, "grad_norm": 0.24484869837760925, "learning_rate": 4.9803132755642604e-05, "loss": 0.2464, "step": 7623 }, { "epoch": 0.13598259194520743, "grad_norm": 0.26004162430763245, "learning_rate": 4.980293775619821e-05, "loss": 0.2382, "step": 7624 }, { "epoch": 0.13600042806692114, "grad_norm": 0.2276809811592102, "learning_rate": 4.980274266060905e-05, "loss": 0.2028, "step": 7625 }, { "epoch": 0.13601826418863483, "grad_norm": 0.30349066853523254, "learning_rate": 4.9802547468875906e-05, "loss": 0.2134, "step": 7626 }, { "epoch": 0.13603610031034852, "grad_norm": 0.4561436176300049, "learning_rate": 4.9802352180999514e-05, "loss": 0.3011, "step": 7627 }, { "epoch": 0.1360539364320622, "grad_norm": 0.29373860359191895, "learning_rate": 4.9802156796980634e-05, "loss": 0.2635, "step": 7628 }, { "epoch": 0.1360717725537759, "grad_norm": 0.43702778220176697, "learning_rate": 4.980196131682004e-05, "loss": 0.2278, "step": 7629 }, { "epoch": 0.1360896086754896, "grad_norm": 0.2533094584941864, "learning_rate": 4.980176574051847e-05, "loss": 0.2398, "step": 7630 }, { "epoch": 0.1361074447972033, "grad_norm": 0.29270872473716736, "learning_rate": 4.9801570068076694e-05, "loss": 0.2301, "step": 7631 }, { "epoch": 0.13612528091891699, "grad_norm": 0.22814325988292694, "learning_rate": 4.9801374299495464e-05, "loss": 0.1466, "step": 7632 }, { "epoch": 0.13614311704063067, "grad_norm": 0.2394150346517563, "learning_rate": 4.980117843477554e-05, "loss": 0.2213, "step": 7633 }, { "epoch": 0.1361609531623444, "grad_norm": 0.38309991359710693, "learning_rate": 4.980098247391768e-05, "loss": 0.2326, "step": 7634 }, { "epoch": 0.13617878928405808, "grad_norm": 0.35889971256256104, "learning_rate": 4.980078641692265e-05, "loss": 0.2485, "step": 7635 }, { "epoch": 0.13619662540577177, "grad_norm": 0.22773735225200653, "learning_rate": 4.9800590263791205e-05, "loss": 0.2159, "step": 7636 }, { "epoch": 0.13621446152748545, "grad_norm": 0.26242002844810486, "learning_rate": 4.980039401452411e-05, "loss": 0.2011, "step": 7637 }, { "epoch": 0.13623229764919917, "grad_norm": 0.33753737807273865, "learning_rate": 4.9800197669122116e-05, "loss": 0.2791, "step": 7638 }, { "epoch": 0.13625013377091286, "grad_norm": 0.2737777829170227, "learning_rate": 4.980000122758599e-05, "loss": 0.1997, "step": 7639 }, { "epoch": 0.13626796989262654, "grad_norm": 0.2658303380012512, "learning_rate": 4.9799804689916496e-05, "loss": 0.2115, "step": 7640 }, { "epoch": 0.13628580601434023, "grad_norm": 0.3175022304058075, "learning_rate": 4.979960805611439e-05, "loss": 0.2359, "step": 7641 }, { "epoch": 0.13630364213605395, "grad_norm": 0.3396710455417633, "learning_rate": 4.979941132618045e-05, "loss": 0.2294, "step": 7642 }, { "epoch": 0.13632147825776764, "grad_norm": 0.24313685297966003, "learning_rate": 4.979921450011541e-05, "loss": 0.2103, "step": 7643 }, { "epoch": 0.13633931437948132, "grad_norm": 0.26737284660339355, "learning_rate": 4.979901757792006e-05, "loss": 0.2164, "step": 7644 }, { "epoch": 0.136357150501195, "grad_norm": 0.2784193158149719, "learning_rate": 4.979882055959515e-05, "loss": 0.2089, "step": 7645 }, { "epoch": 0.13637498662290873, "grad_norm": 0.3830283582210541, "learning_rate": 4.9798623445141446e-05, "loss": 0.2285, "step": 7646 }, { "epoch": 0.13639282274462242, "grad_norm": 0.39115971326828003, "learning_rate": 4.979842623455971e-05, "loss": 0.2719, "step": 7647 }, { "epoch": 0.1364106588663361, "grad_norm": 0.24941574037075043, "learning_rate": 4.979822892785071e-05, "loss": 0.2265, "step": 7648 }, { "epoch": 0.1364284949880498, "grad_norm": 0.27254605293273926, "learning_rate": 4.979803152501521e-05, "loss": 0.2409, "step": 7649 }, { "epoch": 0.13644633110976348, "grad_norm": 0.3258862793445587, "learning_rate": 4.979783402605398e-05, "loss": 0.2733, "step": 7650 }, { "epoch": 0.1364641672314772, "grad_norm": 0.3291133642196655, "learning_rate": 4.979763643096777e-05, "loss": 0.2678, "step": 7651 }, { "epoch": 0.13648200335319088, "grad_norm": 0.3363911211490631, "learning_rate": 4.9797438739757366e-05, "loss": 0.28, "step": 7652 }, { "epoch": 0.13649983947490457, "grad_norm": 0.25408124923706055, "learning_rate": 4.979724095242352e-05, "loss": 0.194, "step": 7653 }, { "epoch": 0.13651767559661826, "grad_norm": 0.3379240930080414, "learning_rate": 4.9797043068967e-05, "loss": 0.2662, "step": 7654 }, { "epoch": 0.13653551171833198, "grad_norm": 0.2658340632915497, "learning_rate": 4.979684508938858e-05, "loss": 0.1696, "step": 7655 }, { "epoch": 0.13655334784004566, "grad_norm": 0.29358401894569397, "learning_rate": 4.979664701368903e-05, "loss": 0.243, "step": 7656 }, { "epoch": 0.13657118396175935, "grad_norm": 0.231268510222435, "learning_rate": 4.9796448841869104e-05, "loss": 0.2119, "step": 7657 }, { "epoch": 0.13658902008347304, "grad_norm": 0.3786660134792328, "learning_rate": 4.979625057392958e-05, "loss": 0.1902, "step": 7658 }, { "epoch": 0.13660685620518676, "grad_norm": 0.35064491629600525, "learning_rate": 4.979605220987122e-05, "loss": 0.2257, "step": 7659 }, { "epoch": 0.13662469232690044, "grad_norm": 0.2789657413959503, "learning_rate": 4.97958537496948e-05, "loss": 0.2233, "step": 7660 }, { "epoch": 0.13664252844861413, "grad_norm": 0.24864362180233002, "learning_rate": 4.979565519340109e-05, "loss": 0.2041, "step": 7661 }, { "epoch": 0.13666036457032782, "grad_norm": 0.2274325042963028, "learning_rate": 4.979545654099086e-05, "loss": 0.217, "step": 7662 }, { "epoch": 0.13667820069204153, "grad_norm": 0.6578096151351929, "learning_rate": 4.9795257792464865e-05, "loss": 0.251, "step": 7663 }, { "epoch": 0.13669603681375522, "grad_norm": 0.2294447273015976, "learning_rate": 4.979505894782389e-05, "loss": 0.2561, "step": 7664 }, { "epoch": 0.1367138729354689, "grad_norm": 0.3409743010997772, "learning_rate": 4.979486000706871e-05, "loss": 0.2623, "step": 7665 }, { "epoch": 0.1367317090571826, "grad_norm": 0.3147277534008026, "learning_rate": 4.979466097020008e-05, "loss": 0.2224, "step": 7666 }, { "epoch": 0.13674954517889631, "grad_norm": 0.28334489464759827, "learning_rate": 4.979446183721879e-05, "loss": 0.2328, "step": 7667 }, { "epoch": 0.13676738130061, "grad_norm": 0.25945356488227844, "learning_rate": 4.97942626081256e-05, "loss": 0.2445, "step": 7668 }, { "epoch": 0.1367852174223237, "grad_norm": 0.23040388524532318, "learning_rate": 4.979406328292128e-05, "loss": 0.2049, "step": 7669 }, { "epoch": 0.13680305354403738, "grad_norm": 0.22734519839286804, "learning_rate": 4.9793863861606606e-05, "loss": 0.2041, "step": 7670 }, { "epoch": 0.13682088966575107, "grad_norm": 0.3171316683292389, "learning_rate": 4.979366434418235e-05, "loss": 0.2241, "step": 7671 }, { "epoch": 0.13683872578746478, "grad_norm": 0.28010791540145874, "learning_rate": 4.97934647306493e-05, "loss": 0.2299, "step": 7672 }, { "epoch": 0.13685656190917847, "grad_norm": 0.317367821931839, "learning_rate": 4.979326502100821e-05, "loss": 0.2972, "step": 7673 }, { "epoch": 0.13687439803089216, "grad_norm": 0.23120209574699402, "learning_rate": 4.979306521525985e-05, "loss": 0.2046, "step": 7674 }, { "epoch": 0.13689223415260585, "grad_norm": 0.2688504755496979, "learning_rate": 4.9792865313405016e-05, "loss": 0.2804, "step": 7675 }, { "epoch": 0.13691007027431956, "grad_norm": 0.27711114287376404, "learning_rate": 4.9792665315444474e-05, "loss": 0.2628, "step": 7676 }, { "epoch": 0.13692790639603325, "grad_norm": 0.29201340675354004, "learning_rate": 4.9792465221379005e-05, "loss": 0.2444, "step": 7677 }, { "epoch": 0.13694574251774694, "grad_norm": 0.3418673276901245, "learning_rate": 4.979226503120936e-05, "loss": 0.2169, "step": 7678 }, { "epoch": 0.13696357863946063, "grad_norm": 0.2988224923610687, "learning_rate": 4.979206474493635e-05, "loss": 0.2612, "step": 7679 }, { "epoch": 0.13698141476117434, "grad_norm": 0.28798702359199524, "learning_rate": 4.9791864362560725e-05, "loss": 0.1973, "step": 7680 }, { "epoch": 0.13699925088288803, "grad_norm": 0.33267301321029663, "learning_rate": 4.979166388408327e-05, "loss": 0.2744, "step": 7681 }, { "epoch": 0.13701708700460172, "grad_norm": 0.22727416455745697, "learning_rate": 4.979146330950477e-05, "loss": 0.2341, "step": 7682 }, { "epoch": 0.1370349231263154, "grad_norm": 0.21520079672336578, "learning_rate": 4.979126263882599e-05, "loss": 0.1871, "step": 7683 }, { "epoch": 0.13705275924802912, "grad_norm": 0.3009205162525177, "learning_rate": 4.979106187204772e-05, "loss": 0.2557, "step": 7684 }, { "epoch": 0.1370705953697428, "grad_norm": 0.2567964494228363, "learning_rate": 4.979086100917072e-05, "loss": 0.2346, "step": 7685 }, { "epoch": 0.1370884314914565, "grad_norm": 0.25454699993133545, "learning_rate": 4.979066005019579e-05, "loss": 0.2332, "step": 7686 }, { "epoch": 0.13710626761317019, "grad_norm": 0.21537166833877563, "learning_rate": 4.97904589951237e-05, "loss": 0.2111, "step": 7687 }, { "epoch": 0.13712410373488387, "grad_norm": 0.3063955307006836, "learning_rate": 4.979025784395522e-05, "loss": 0.2375, "step": 7688 }, { "epoch": 0.1371419398565976, "grad_norm": 0.28575992584228516, "learning_rate": 4.979005659669114e-05, "loss": 0.2595, "step": 7689 }, { "epoch": 0.13715977597831128, "grad_norm": 0.29159319400787354, "learning_rate": 4.978985525333224e-05, "loss": 0.2096, "step": 7690 }, { "epoch": 0.13717761210002496, "grad_norm": 0.27432194352149963, "learning_rate": 4.9789653813879305e-05, "loss": 0.2223, "step": 7691 }, { "epoch": 0.13719544822173865, "grad_norm": 0.25467804074287415, "learning_rate": 4.9789452278333106e-05, "loss": 0.1564, "step": 7692 }, { "epoch": 0.13721328434345237, "grad_norm": 0.20438627898693085, "learning_rate": 4.978925064669443e-05, "loss": 0.1789, "step": 7693 }, { "epoch": 0.13723112046516606, "grad_norm": 0.36302071809768677, "learning_rate": 4.978904891896405e-05, "loss": 0.2068, "step": 7694 }, { "epoch": 0.13724895658687974, "grad_norm": 0.2433769255876541, "learning_rate": 4.9788847095142754e-05, "loss": 0.2283, "step": 7695 }, { "epoch": 0.13726679270859343, "grad_norm": 0.27924856543540955, "learning_rate": 4.978864517523133e-05, "loss": 0.2749, "step": 7696 }, { "epoch": 0.13728462883030715, "grad_norm": 0.29090026021003723, "learning_rate": 4.9788443159230556e-05, "loss": 0.2267, "step": 7697 }, { "epoch": 0.13730246495202084, "grad_norm": 0.38446855545043945, "learning_rate": 4.9788241047141216e-05, "loss": 0.2827, "step": 7698 }, { "epoch": 0.13732030107373452, "grad_norm": 0.2920713722705841, "learning_rate": 4.9788038838964093e-05, "loss": 0.2333, "step": 7699 }, { "epoch": 0.1373381371954482, "grad_norm": 0.20829983055591583, "learning_rate": 4.978783653469996e-05, "loss": 0.2014, "step": 7700 }, { "epoch": 0.13735597331716193, "grad_norm": 0.2887932360172272, "learning_rate": 4.9787634134349614e-05, "loss": 0.1782, "step": 7701 }, { "epoch": 0.13737380943887562, "grad_norm": 0.3210797607898712, "learning_rate": 4.978743163791384e-05, "loss": 0.2622, "step": 7702 }, { "epoch": 0.1373916455605893, "grad_norm": 0.24034082889556885, "learning_rate": 4.978722904539343e-05, "loss": 0.2119, "step": 7703 }, { "epoch": 0.137409481682303, "grad_norm": 0.27551499009132385, "learning_rate": 4.978702635678914e-05, "loss": 0.1916, "step": 7704 }, { "epoch": 0.1374273178040167, "grad_norm": 0.4206904172897339, "learning_rate": 4.9786823572101786e-05, "loss": 0.3473, "step": 7705 }, { "epoch": 0.1374451539257304, "grad_norm": 0.2999703884124756, "learning_rate": 4.978662069133214e-05, "loss": 0.213, "step": 7706 }, { "epoch": 0.13746299004744408, "grad_norm": 0.24993175268173218, "learning_rate": 4.978641771448099e-05, "loss": 0.2125, "step": 7707 }, { "epoch": 0.13748082616915777, "grad_norm": 0.2945472002029419, "learning_rate": 4.978621464154913e-05, "loss": 0.211, "step": 7708 }, { "epoch": 0.13749866229087146, "grad_norm": 0.2602582573890686, "learning_rate": 4.978601147253733e-05, "loss": 0.2164, "step": 7709 }, { "epoch": 0.13751649841258518, "grad_norm": 0.3148273229598999, "learning_rate": 4.97858082074464e-05, "loss": 0.2348, "step": 7710 }, { "epoch": 0.13753433453429886, "grad_norm": 0.22988027334213257, "learning_rate": 4.9785604846277113e-05, "loss": 0.2005, "step": 7711 }, { "epoch": 0.13755217065601255, "grad_norm": 0.23377783596515656, "learning_rate": 4.978540138903026e-05, "loss": 0.2024, "step": 7712 }, { "epoch": 0.13757000677772624, "grad_norm": 0.27650436758995056, "learning_rate": 4.978519783570663e-05, "loss": 0.1741, "step": 7713 }, { "epoch": 0.13758784289943995, "grad_norm": 0.30671361088752747, "learning_rate": 4.978499418630701e-05, "loss": 0.2339, "step": 7714 }, { "epoch": 0.13760567902115364, "grad_norm": 0.3299501836299896, "learning_rate": 4.9784790440832196e-05, "loss": 0.1843, "step": 7715 }, { "epoch": 0.13762351514286733, "grad_norm": 0.25257813930511475, "learning_rate": 4.978458659928297e-05, "loss": 0.2184, "step": 7716 }, { "epoch": 0.13764135126458102, "grad_norm": 0.3795832097530365, "learning_rate": 4.9784382661660134e-05, "loss": 0.2324, "step": 7717 }, { "epoch": 0.13765918738629473, "grad_norm": 0.2929432690143585, "learning_rate": 4.978417862796446e-05, "loss": 0.2189, "step": 7718 }, { "epoch": 0.13767702350800842, "grad_norm": 0.2690240144729614, "learning_rate": 4.978397449819676e-05, "loss": 0.242, "step": 7719 }, { "epoch": 0.1376948596297221, "grad_norm": 0.25798019766807556, "learning_rate": 4.9783770272357814e-05, "loss": 0.2546, "step": 7720 }, { "epoch": 0.1377126957514358, "grad_norm": 0.25778549909591675, "learning_rate": 4.9783565950448406e-05, "loss": 0.2008, "step": 7721 }, { "epoch": 0.13773053187314951, "grad_norm": 0.4554760158061981, "learning_rate": 4.978336153246934e-05, "loss": 0.2811, "step": 7722 }, { "epoch": 0.1377483679948632, "grad_norm": 0.28381386399269104, "learning_rate": 4.9783157018421405e-05, "loss": 0.2299, "step": 7723 }, { "epoch": 0.1377662041165769, "grad_norm": 0.36203667521476746, "learning_rate": 4.978295240830539e-05, "loss": 0.2807, "step": 7724 }, { "epoch": 0.13778404023829058, "grad_norm": 0.2515256404876709, "learning_rate": 4.97827477021221e-05, "loss": 0.1768, "step": 7725 }, { "epoch": 0.1378018763600043, "grad_norm": 0.2921491265296936, "learning_rate": 4.9782542899872314e-05, "loss": 0.1776, "step": 7726 }, { "epoch": 0.13781971248171798, "grad_norm": 0.40256834030151367, "learning_rate": 4.9782338001556836e-05, "loss": 0.2834, "step": 7727 }, { "epoch": 0.13783754860343167, "grad_norm": 0.2674209177494049, "learning_rate": 4.978213300717646e-05, "loss": 0.2224, "step": 7728 }, { "epoch": 0.13785538472514536, "grad_norm": 0.25338393449783325, "learning_rate": 4.978192791673196e-05, "loss": 0.1888, "step": 7729 }, { "epoch": 0.13787322084685905, "grad_norm": 0.3655220866203308, "learning_rate": 4.978172273022417e-05, "loss": 0.2599, "step": 7730 }, { "epoch": 0.13789105696857276, "grad_norm": 0.28567010164260864, "learning_rate": 4.978151744765385e-05, "loss": 0.2238, "step": 7731 }, { "epoch": 0.13790889309028645, "grad_norm": 0.2943632900714874, "learning_rate": 4.978131206902181e-05, "loss": 0.2445, "step": 7732 }, { "epoch": 0.13792672921200014, "grad_norm": 0.23185555636882782, "learning_rate": 4.9781106594328846e-05, "loss": 0.2087, "step": 7733 }, { "epoch": 0.13794456533371383, "grad_norm": 0.3514593541622162, "learning_rate": 4.978090102357575e-05, "loss": 0.2235, "step": 7734 }, { "epoch": 0.13796240145542754, "grad_norm": 0.26353880763053894, "learning_rate": 4.978069535676333e-05, "loss": 0.1979, "step": 7735 }, { "epoch": 0.13798023757714123, "grad_norm": 0.2900046110153198, "learning_rate": 4.978048959389238e-05, "loss": 0.2148, "step": 7736 }, { "epoch": 0.13799807369885492, "grad_norm": 0.23146869242191315, "learning_rate": 4.978028373496369e-05, "loss": 0.1841, "step": 7737 }, { "epoch": 0.1380159098205686, "grad_norm": 0.2509578466415405, "learning_rate": 4.978007777997805e-05, "loss": 0.2163, "step": 7738 }, { "epoch": 0.13803374594228232, "grad_norm": 0.30609941482543945, "learning_rate": 4.977987172893628e-05, "loss": 0.2252, "step": 7739 }, { "epoch": 0.138051582063996, "grad_norm": 0.31992587447166443, "learning_rate": 4.977966558183916e-05, "loss": 0.2674, "step": 7740 }, { "epoch": 0.1380694181857097, "grad_norm": 0.24072711169719696, "learning_rate": 4.977945933868751e-05, "loss": 0.196, "step": 7741 }, { "epoch": 0.13808725430742338, "grad_norm": 0.31340426206588745, "learning_rate": 4.977925299948211e-05, "loss": 0.2514, "step": 7742 }, { "epoch": 0.1381050904291371, "grad_norm": 0.2519441843032837, "learning_rate": 4.977904656422376e-05, "loss": 0.2283, "step": 7743 }, { "epoch": 0.1381229265508508, "grad_norm": 0.3220392167568207, "learning_rate": 4.977884003291328e-05, "loss": 0.2451, "step": 7744 }, { "epoch": 0.13814076267256448, "grad_norm": 0.3158110976219177, "learning_rate": 4.9778633405551455e-05, "loss": 0.2628, "step": 7745 }, { "epoch": 0.13815859879427816, "grad_norm": 0.3030248284339905, "learning_rate": 4.977842668213909e-05, "loss": 0.189, "step": 7746 }, { "epoch": 0.13817643491599188, "grad_norm": 0.47189828753471375, "learning_rate": 4.977821986267698e-05, "loss": 0.2288, "step": 7747 }, { "epoch": 0.13819427103770557, "grad_norm": 0.34288308024406433, "learning_rate": 4.977801294716593e-05, "loss": 0.2301, "step": 7748 }, { "epoch": 0.13821210715941926, "grad_norm": 0.29675331711769104, "learning_rate": 4.9777805935606746e-05, "loss": 0.267, "step": 7749 }, { "epoch": 0.13822994328113294, "grad_norm": 0.2849023938179016, "learning_rate": 4.977759882800023e-05, "loss": 0.2276, "step": 7750 }, { "epoch": 0.13824777940284663, "grad_norm": 0.2039736807346344, "learning_rate": 4.977739162434718e-05, "loss": 0.2152, "step": 7751 }, { "epoch": 0.13826561552456035, "grad_norm": 0.3245219886302948, "learning_rate": 4.977718432464841e-05, "loss": 0.2591, "step": 7752 }, { "epoch": 0.13828345164627404, "grad_norm": 0.2827920913696289, "learning_rate": 4.977697692890471e-05, "loss": 0.2193, "step": 7753 }, { "epoch": 0.13830128776798772, "grad_norm": 0.26571762561798096, "learning_rate": 4.9776769437116885e-05, "loss": 0.2147, "step": 7754 }, { "epoch": 0.1383191238897014, "grad_norm": 0.3618394136428833, "learning_rate": 4.977656184928575e-05, "loss": 0.2503, "step": 7755 }, { "epoch": 0.13833696001141513, "grad_norm": 0.29902219772338867, "learning_rate": 4.97763541654121e-05, "loss": 0.2391, "step": 7756 }, { "epoch": 0.13835479613312882, "grad_norm": 0.27607762813568115, "learning_rate": 4.977614638549675e-05, "loss": 0.1884, "step": 7757 }, { "epoch": 0.1383726322548425, "grad_norm": 0.3239946961402893, "learning_rate": 4.977593850954049e-05, "loss": 0.2456, "step": 7758 }, { "epoch": 0.1383904683765562, "grad_norm": 0.27721700072288513, "learning_rate": 4.977573053754414e-05, "loss": 0.2462, "step": 7759 }, { "epoch": 0.1384083044982699, "grad_norm": 0.388680100440979, "learning_rate": 4.9775522469508504e-05, "loss": 0.3074, "step": 7760 }, { "epoch": 0.1384261406199836, "grad_norm": 0.30190929770469666, "learning_rate": 4.9775314305434385e-05, "loss": 0.2169, "step": 7761 }, { "epoch": 0.13844397674169728, "grad_norm": 0.35949015617370605, "learning_rate": 4.977510604532259e-05, "loss": 0.2478, "step": 7762 }, { "epoch": 0.13846181286341097, "grad_norm": 0.6132022738456726, "learning_rate": 4.9774897689173926e-05, "loss": 0.2179, "step": 7763 }, { "epoch": 0.1384796489851247, "grad_norm": 0.3205450475215912, "learning_rate": 4.97746892369892e-05, "loss": 0.2549, "step": 7764 }, { "epoch": 0.13849748510683837, "grad_norm": 0.2332857996225357, "learning_rate": 4.977448068876922e-05, "loss": 0.2082, "step": 7765 }, { "epoch": 0.13851532122855206, "grad_norm": 0.3490433990955353, "learning_rate": 4.9774272044514806e-05, "loss": 0.3034, "step": 7766 }, { "epoch": 0.13853315735026575, "grad_norm": 0.26735496520996094, "learning_rate": 4.977406330422675e-05, "loss": 0.2018, "step": 7767 }, { "epoch": 0.13855099347197944, "grad_norm": 0.34351831674575806, "learning_rate": 4.977385446790587e-05, "loss": 0.2448, "step": 7768 }, { "epoch": 0.13856882959369315, "grad_norm": 0.3086816668510437, "learning_rate": 4.977364553555296e-05, "loss": 0.2432, "step": 7769 }, { "epoch": 0.13858666571540684, "grad_norm": 0.25710898637771606, "learning_rate": 4.9773436507168857e-05, "loss": 0.2239, "step": 7770 }, { "epoch": 0.13860450183712053, "grad_norm": 0.3609481453895569, "learning_rate": 4.977322738275436e-05, "loss": 0.2575, "step": 7771 }, { "epoch": 0.13862233795883422, "grad_norm": 0.3354337513446808, "learning_rate": 4.977301816231027e-05, "loss": 0.2455, "step": 7772 }, { "epoch": 0.13864017408054793, "grad_norm": 0.260863333940506, "learning_rate": 4.977280884583741e-05, "loss": 0.2296, "step": 7773 }, { "epoch": 0.13865801020226162, "grad_norm": 0.43223699927330017, "learning_rate": 4.977259943333658e-05, "loss": 0.2654, "step": 7774 }, { "epoch": 0.1386758463239753, "grad_norm": 0.280205100774765, "learning_rate": 4.9772389924808605e-05, "loss": 0.2126, "step": 7775 }, { "epoch": 0.138693682445689, "grad_norm": 0.2625616490840912, "learning_rate": 4.977218032025429e-05, "loss": 0.2647, "step": 7776 }, { "epoch": 0.13871151856740271, "grad_norm": 0.3067643642425537, "learning_rate": 4.9771970619674446e-05, "loss": 0.231, "step": 7777 }, { "epoch": 0.1387293546891164, "grad_norm": 0.24626529216766357, "learning_rate": 4.977176082306989e-05, "loss": 0.2554, "step": 7778 }, { "epoch": 0.1387471908108301, "grad_norm": 0.2416291981935501, "learning_rate": 4.9771550930441426e-05, "loss": 0.2272, "step": 7779 }, { "epoch": 0.13876502693254378, "grad_norm": 0.35257646441459656, "learning_rate": 4.9771340941789884e-05, "loss": 0.2731, "step": 7780 }, { "epoch": 0.1387828630542575, "grad_norm": 0.3278643488883972, "learning_rate": 4.9771130857116065e-05, "loss": 0.2786, "step": 7781 }, { "epoch": 0.13880069917597118, "grad_norm": 0.23713479936122894, "learning_rate": 4.977092067642078e-05, "loss": 0.2336, "step": 7782 }, { "epoch": 0.13881853529768487, "grad_norm": 0.2497008889913559, "learning_rate": 4.977071039970487e-05, "loss": 0.1848, "step": 7783 }, { "epoch": 0.13883637141939856, "grad_norm": 0.38271504640579224, "learning_rate": 4.9770500026969116e-05, "loss": 0.2249, "step": 7784 }, { "epoch": 0.13885420754111227, "grad_norm": 0.28771165013313293, "learning_rate": 4.977028955821435e-05, "loss": 0.2435, "step": 7785 }, { "epoch": 0.13887204366282596, "grad_norm": 0.3053930699825287, "learning_rate": 4.9770078993441386e-05, "loss": 0.2176, "step": 7786 }, { "epoch": 0.13888987978453965, "grad_norm": 0.2448943704366684, "learning_rate": 4.9769868332651047e-05, "loss": 0.2293, "step": 7787 }, { "epoch": 0.13890771590625334, "grad_norm": 0.2562117576599121, "learning_rate": 4.9769657575844136e-05, "loss": 0.2296, "step": 7788 }, { "epoch": 0.13892555202796703, "grad_norm": 0.2721194922924042, "learning_rate": 4.976944672302148e-05, "loss": 0.2138, "step": 7789 }, { "epoch": 0.13894338814968074, "grad_norm": 0.36899375915527344, "learning_rate": 4.976923577418389e-05, "loss": 0.2031, "step": 7790 }, { "epoch": 0.13896122427139443, "grad_norm": 0.26053762435913086, "learning_rate": 4.976902472933219e-05, "loss": 0.2323, "step": 7791 }, { "epoch": 0.13897906039310812, "grad_norm": 0.27441224455833435, "learning_rate": 4.97688135884672e-05, "loss": 0.2217, "step": 7792 }, { "epoch": 0.1389968965148218, "grad_norm": 0.31387025117874146, "learning_rate": 4.9768602351589724e-05, "loss": 0.2297, "step": 7793 }, { "epoch": 0.13901473263653552, "grad_norm": 0.3134457767009735, "learning_rate": 4.97683910187006e-05, "loss": 0.2938, "step": 7794 }, { "epoch": 0.1390325687582492, "grad_norm": 0.3367738723754883, "learning_rate": 4.9768179589800634e-05, "loss": 0.252, "step": 7795 }, { "epoch": 0.1390504048799629, "grad_norm": 0.39340123534202576, "learning_rate": 4.9767968064890646e-05, "loss": 0.2329, "step": 7796 }, { "epoch": 0.13906824100167658, "grad_norm": 0.3973371684551239, "learning_rate": 4.976775644397146e-05, "loss": 0.2732, "step": 7797 }, { "epoch": 0.1390860771233903, "grad_norm": 0.22739411890506744, "learning_rate": 4.97675447270439e-05, "loss": 0.1817, "step": 7798 }, { "epoch": 0.139103913245104, "grad_norm": 0.3370538353919983, "learning_rate": 4.9767332914108776e-05, "loss": 0.2702, "step": 7799 }, { "epoch": 0.13912174936681768, "grad_norm": 0.2768855094909668, "learning_rate": 4.976712100516692e-05, "loss": 0.2037, "step": 7800 }, { "epoch": 0.13913958548853136, "grad_norm": 0.4047466814517975, "learning_rate": 4.976690900021915e-05, "loss": 0.2317, "step": 7801 }, { "epoch": 0.13915742161024508, "grad_norm": 0.25438666343688965, "learning_rate": 4.976669689926628e-05, "loss": 0.2062, "step": 7802 }, { "epoch": 0.13917525773195877, "grad_norm": 0.3366991877555847, "learning_rate": 4.9766484702309143e-05, "loss": 0.2606, "step": 7803 }, { "epoch": 0.13919309385367246, "grad_norm": 0.29640814661979675, "learning_rate": 4.9766272409348555e-05, "loss": 0.2453, "step": 7804 }, { "epoch": 0.13921092997538614, "grad_norm": 0.24976986646652222, "learning_rate": 4.976606002038534e-05, "loss": 0.1908, "step": 7805 }, { "epoch": 0.13922876609709986, "grad_norm": 0.24931728839874268, "learning_rate": 4.9765847535420326e-05, "loss": 0.2028, "step": 7806 }, { "epoch": 0.13924660221881355, "grad_norm": 0.340193510055542, "learning_rate": 4.976563495445433e-05, "loss": 0.1855, "step": 7807 }, { "epoch": 0.13926443834052724, "grad_norm": 0.3073176145553589, "learning_rate": 4.9765422277488186e-05, "loss": 0.2306, "step": 7808 }, { "epoch": 0.13928227446224092, "grad_norm": 0.2590597867965698, "learning_rate": 4.976520950452271e-05, "loss": 0.2141, "step": 7809 }, { "epoch": 0.1393001105839546, "grad_norm": 0.31174999475479126, "learning_rate": 4.976499663555872e-05, "loss": 0.2089, "step": 7810 }, { "epoch": 0.13931794670566833, "grad_norm": 0.221009761095047, "learning_rate": 4.976478367059706e-05, "loss": 0.2174, "step": 7811 }, { "epoch": 0.13933578282738202, "grad_norm": 0.3522317111492157, "learning_rate": 4.976457060963854e-05, "loss": 0.3125, "step": 7812 }, { "epoch": 0.1393536189490957, "grad_norm": 0.5266415476799011, "learning_rate": 4.976435745268398e-05, "loss": 0.2487, "step": 7813 }, { "epoch": 0.1393714550708094, "grad_norm": 0.28174644708633423, "learning_rate": 4.976414419973424e-05, "loss": 0.2332, "step": 7814 }, { "epoch": 0.1393892911925231, "grad_norm": 0.2874157130718231, "learning_rate": 4.976393085079011e-05, "loss": 0.2329, "step": 7815 }, { "epoch": 0.1394071273142368, "grad_norm": 0.28394871950149536, "learning_rate": 4.976371740585243e-05, "loss": 0.1899, "step": 7816 }, { "epoch": 0.13942496343595048, "grad_norm": 0.3517719507217407, "learning_rate": 4.976350386492203e-05, "loss": 0.2816, "step": 7817 }, { "epoch": 0.13944279955766417, "grad_norm": 0.2708662152290344, "learning_rate": 4.976329022799974e-05, "loss": 0.1945, "step": 7818 }, { "epoch": 0.1394606356793779, "grad_norm": 0.42597663402557373, "learning_rate": 4.976307649508638e-05, "loss": 0.2259, "step": 7819 }, { "epoch": 0.13947847180109157, "grad_norm": 0.28468161821365356, "learning_rate": 4.976286266618279e-05, "loss": 0.2786, "step": 7820 }, { "epoch": 0.13949630792280526, "grad_norm": 0.3473394513130188, "learning_rate": 4.9762648741289776e-05, "loss": 0.225, "step": 7821 }, { "epoch": 0.13951414404451895, "grad_norm": 0.2951386868953705, "learning_rate": 4.976243472040819e-05, "loss": 0.2848, "step": 7822 }, { "epoch": 0.13953198016623267, "grad_norm": 0.246065154671669, "learning_rate": 4.9762220603538857e-05, "loss": 0.2411, "step": 7823 }, { "epoch": 0.13954981628794635, "grad_norm": 0.20123934745788574, "learning_rate": 4.97620063906826e-05, "loss": 0.1367, "step": 7824 }, { "epoch": 0.13956765240966004, "grad_norm": 0.2690652012825012, "learning_rate": 4.976179208184026e-05, "loss": 0.2411, "step": 7825 }, { "epoch": 0.13958548853137373, "grad_norm": 0.27102038264274597, "learning_rate": 4.9761577677012664e-05, "loss": 0.2195, "step": 7826 }, { "epoch": 0.13960332465308745, "grad_norm": 0.28155240416526794, "learning_rate": 4.9761363176200634e-05, "loss": 0.221, "step": 7827 }, { "epoch": 0.13962116077480113, "grad_norm": 0.23990245163440704, "learning_rate": 4.976114857940501e-05, "loss": 0.2387, "step": 7828 }, { "epoch": 0.13963899689651482, "grad_norm": 0.27962687611579895, "learning_rate": 4.976093388662662e-05, "loss": 0.222, "step": 7829 }, { "epoch": 0.1396568330182285, "grad_norm": 0.27067622542381287, "learning_rate": 4.97607190978663e-05, "loss": 0.2328, "step": 7830 }, { "epoch": 0.1396746691399422, "grad_norm": 0.2322358638048172, "learning_rate": 4.9760504213124884e-05, "loss": 0.2285, "step": 7831 }, { "epoch": 0.1396925052616559, "grad_norm": 0.30207064747810364, "learning_rate": 4.976028923240319e-05, "loss": 0.2724, "step": 7832 }, { "epoch": 0.1397103413833696, "grad_norm": 0.265759140253067, "learning_rate": 4.976007415570207e-05, "loss": 0.241, "step": 7833 }, { "epoch": 0.1397281775050833, "grad_norm": 0.2388290911912918, "learning_rate": 4.9759858983022355e-05, "loss": 0.2088, "step": 7834 }, { "epoch": 0.13974601362679698, "grad_norm": 0.22784021496772766, "learning_rate": 4.975964371436487e-05, "loss": 0.1872, "step": 7835 }, { "epoch": 0.1397638497485107, "grad_norm": 0.2823319137096405, "learning_rate": 4.975942834973045e-05, "loss": 0.2361, "step": 7836 }, { "epoch": 0.13978168587022438, "grad_norm": 0.3873385787010193, "learning_rate": 4.975921288911994e-05, "loss": 0.2146, "step": 7837 }, { "epoch": 0.13979952199193807, "grad_norm": 0.3117041289806366, "learning_rate": 4.975899733253417e-05, "loss": 0.2604, "step": 7838 }, { "epoch": 0.13981735811365176, "grad_norm": 0.21867047250270844, "learning_rate": 4.975878167997398e-05, "loss": 0.2108, "step": 7839 }, { "epoch": 0.13983519423536547, "grad_norm": 0.2233334183692932, "learning_rate": 4.975856593144019e-05, "loss": 0.1897, "step": 7840 }, { "epoch": 0.13985303035707916, "grad_norm": 0.29016876220703125, "learning_rate": 4.975835008693365e-05, "loss": 0.2198, "step": 7841 }, { "epoch": 0.13987086647879285, "grad_norm": 0.23145824670791626, "learning_rate": 4.9758134146455195e-05, "loss": 0.2077, "step": 7842 }, { "epoch": 0.13988870260050654, "grad_norm": 0.3653275966644287, "learning_rate": 4.975791811000566e-05, "loss": 0.2354, "step": 7843 }, { "epoch": 0.13990653872222025, "grad_norm": 0.2752903401851654, "learning_rate": 4.9757701977585894e-05, "loss": 0.2082, "step": 7844 }, { "epoch": 0.13992437484393394, "grad_norm": 0.32940614223480225, "learning_rate": 4.975748574919671e-05, "loss": 0.2427, "step": 7845 }, { "epoch": 0.13994221096564763, "grad_norm": 0.3125225603580475, "learning_rate": 4.975726942483896e-05, "loss": 0.1942, "step": 7846 }, { "epoch": 0.13996004708736132, "grad_norm": 0.31455323100090027, "learning_rate": 4.975705300451349e-05, "loss": 0.2438, "step": 7847 }, { "epoch": 0.13997788320907503, "grad_norm": 0.46753013134002686, "learning_rate": 4.975683648822113e-05, "loss": 0.218, "step": 7848 }, { "epoch": 0.13999571933078872, "grad_norm": 0.28788918256759644, "learning_rate": 4.9756619875962716e-05, "loss": 0.2348, "step": 7849 }, { "epoch": 0.1400135554525024, "grad_norm": 0.21361945569515228, "learning_rate": 4.9756403167739105e-05, "loss": 0.1712, "step": 7850 }, { "epoch": 0.1400313915742161, "grad_norm": 0.2623341679573059, "learning_rate": 4.975618636355111e-05, "loss": 0.2154, "step": 7851 }, { "epoch": 0.14004922769592978, "grad_norm": 0.37504589557647705, "learning_rate": 4.97559694633996e-05, "loss": 0.2406, "step": 7852 }, { "epoch": 0.1400670638176435, "grad_norm": 0.3687654137611389, "learning_rate": 4.975575246728539e-05, "loss": 0.203, "step": 7853 }, { "epoch": 0.1400848999393572, "grad_norm": 0.3115758001804352, "learning_rate": 4.975553537520934e-05, "loss": 0.2691, "step": 7854 }, { "epoch": 0.14010273606107088, "grad_norm": 0.254865437746048, "learning_rate": 4.975531818717228e-05, "loss": 0.1961, "step": 7855 }, { "epoch": 0.14012057218278456, "grad_norm": 0.28733110427856445, "learning_rate": 4.975510090317506e-05, "loss": 0.2301, "step": 7856 }, { "epoch": 0.14013840830449828, "grad_norm": 0.3322754502296448, "learning_rate": 4.975488352321852e-05, "loss": 0.2262, "step": 7857 }, { "epoch": 0.14015624442621197, "grad_norm": 0.43327051401138306, "learning_rate": 4.97546660473035e-05, "loss": 0.2336, "step": 7858 }, { "epoch": 0.14017408054792566, "grad_norm": 0.256805419921875, "learning_rate": 4.9754448475430835e-05, "loss": 0.1908, "step": 7859 }, { "epoch": 0.14019191666963934, "grad_norm": 0.2885371744632721, "learning_rate": 4.975423080760139e-05, "loss": 0.2392, "step": 7860 }, { "epoch": 0.14020975279135306, "grad_norm": 0.21325603127479553, "learning_rate": 4.975401304381599e-05, "loss": 0.1862, "step": 7861 }, { "epoch": 0.14022758891306675, "grad_norm": 0.26652243733406067, "learning_rate": 4.975379518407549e-05, "loss": 0.2251, "step": 7862 }, { "epoch": 0.14024542503478044, "grad_norm": 0.2744951844215393, "learning_rate": 4.975357722838073e-05, "loss": 0.24, "step": 7863 }, { "epoch": 0.14026326115649412, "grad_norm": 0.2505877614021301, "learning_rate": 4.9753359176732555e-05, "loss": 0.2052, "step": 7864 }, { "epoch": 0.14028109727820784, "grad_norm": 0.3701929450035095, "learning_rate": 4.975314102913181e-05, "loss": 0.2578, "step": 7865 }, { "epoch": 0.14029893339992153, "grad_norm": 0.34019145369529724, "learning_rate": 4.9752922785579334e-05, "loss": 0.2317, "step": 7866 }, { "epoch": 0.14031676952163522, "grad_norm": 0.23121345043182373, "learning_rate": 4.975270444607599e-05, "loss": 0.2122, "step": 7867 }, { "epoch": 0.1403346056433489, "grad_norm": 0.20442263782024384, "learning_rate": 4.975248601062261e-05, "loss": 0.1995, "step": 7868 }, { "epoch": 0.1403524417650626, "grad_norm": 0.30495911836624146, "learning_rate": 4.975226747922005e-05, "loss": 0.2616, "step": 7869 }, { "epoch": 0.1403702778867763, "grad_norm": 0.3029687702655792, "learning_rate": 4.975204885186915e-05, "loss": 0.2401, "step": 7870 }, { "epoch": 0.14038811400849, "grad_norm": 0.2812706530094147, "learning_rate": 4.9751830128570754e-05, "loss": 0.2493, "step": 7871 }, { "epoch": 0.14040595013020368, "grad_norm": 0.2909778356552124, "learning_rate": 4.9751611309325716e-05, "loss": 0.2208, "step": 7872 }, { "epoch": 0.14042378625191737, "grad_norm": 0.32993146777153015, "learning_rate": 4.975139239413489e-05, "loss": 0.2599, "step": 7873 }, { "epoch": 0.1404416223736311, "grad_norm": 0.32004687190055847, "learning_rate": 4.975117338299911e-05, "loss": 0.2658, "step": 7874 }, { "epoch": 0.14045945849534477, "grad_norm": 0.27156245708465576, "learning_rate": 4.975095427591924e-05, "loss": 0.2032, "step": 7875 }, { "epoch": 0.14047729461705846, "grad_norm": 0.2504098415374756, "learning_rate": 4.9750735072896117e-05, "loss": 0.2203, "step": 7876 }, { "epoch": 0.14049513073877215, "grad_norm": 0.2697783410549164, "learning_rate": 4.97505157739306e-05, "loss": 0.2577, "step": 7877 }, { "epoch": 0.14051296686048587, "grad_norm": 0.34915924072265625, "learning_rate": 4.975029637902353e-05, "loss": 0.1898, "step": 7878 }, { "epoch": 0.14053080298219955, "grad_norm": 0.3016009032726288, "learning_rate": 4.975007688817577e-05, "loss": 0.2329, "step": 7879 }, { "epoch": 0.14054863910391324, "grad_norm": 0.2796032130718231, "learning_rate": 4.974985730138816e-05, "loss": 0.1957, "step": 7880 }, { "epoch": 0.14056647522562693, "grad_norm": 0.5127027034759521, "learning_rate": 4.974963761866156e-05, "loss": 0.2184, "step": 7881 }, { "epoch": 0.14058431134734065, "grad_norm": 0.49412110447883606, "learning_rate": 4.974941783999681e-05, "loss": 0.2192, "step": 7882 }, { "epoch": 0.14060214746905433, "grad_norm": 0.21453262865543365, "learning_rate": 4.974919796539477e-05, "loss": 0.1925, "step": 7883 }, { "epoch": 0.14061998359076802, "grad_norm": 0.29166239500045776, "learning_rate": 4.974897799485629e-05, "loss": 0.2405, "step": 7884 }, { "epoch": 0.1406378197124817, "grad_norm": 0.3181530833244324, "learning_rate": 4.9748757928382225e-05, "loss": 0.2747, "step": 7885 }, { "epoch": 0.14065565583419543, "grad_norm": 0.27765437960624695, "learning_rate": 4.974853776597343e-05, "loss": 0.2084, "step": 7886 }, { "epoch": 0.1406734919559091, "grad_norm": 0.24031805992126465, "learning_rate": 4.974831750763074e-05, "loss": 0.22, "step": 7887 }, { "epoch": 0.1406913280776228, "grad_norm": 0.21924948692321777, "learning_rate": 4.974809715335504e-05, "loss": 0.1956, "step": 7888 }, { "epoch": 0.1407091641993365, "grad_norm": 0.278427392244339, "learning_rate": 4.9747876703147155e-05, "loss": 0.2104, "step": 7889 }, { "epoch": 0.14072700032105018, "grad_norm": 0.27412453293800354, "learning_rate": 4.974765615700796e-05, "loss": 0.2294, "step": 7890 }, { "epoch": 0.1407448364427639, "grad_norm": 0.2860864996910095, "learning_rate": 4.97474355149383e-05, "loss": 0.1772, "step": 7891 }, { "epoch": 0.14076267256447758, "grad_norm": 0.3259214162826538, "learning_rate": 4.9747214776939035e-05, "loss": 0.1863, "step": 7892 }, { "epoch": 0.14078050868619127, "grad_norm": 0.3208634555339813, "learning_rate": 4.9746993943011014e-05, "loss": 0.2001, "step": 7893 }, { "epoch": 0.14079834480790496, "grad_norm": 0.2717551290988922, "learning_rate": 4.97467730131551e-05, "loss": 0.1538, "step": 7894 }, { "epoch": 0.14081618092961867, "grad_norm": 0.28506383299827576, "learning_rate": 4.9746551987372146e-05, "loss": 0.2135, "step": 7895 }, { "epoch": 0.14083401705133236, "grad_norm": 0.35007163882255554, "learning_rate": 4.9746330865663014e-05, "loss": 0.2224, "step": 7896 }, { "epoch": 0.14085185317304605, "grad_norm": 0.2527383267879486, "learning_rate": 4.974610964802855e-05, "loss": 0.2415, "step": 7897 }, { "epoch": 0.14086968929475974, "grad_norm": 0.30727824568748474, "learning_rate": 4.974588833446962e-05, "loss": 0.2302, "step": 7898 }, { "epoch": 0.14088752541647345, "grad_norm": 0.3137305974960327, "learning_rate": 4.974566692498708e-05, "loss": 0.2473, "step": 7899 }, { "epoch": 0.14090536153818714, "grad_norm": 0.267914354801178, "learning_rate": 4.9745445419581785e-05, "loss": 0.2019, "step": 7900 }, { "epoch": 0.14092319765990083, "grad_norm": 0.31420591473579407, "learning_rate": 4.9745223818254605e-05, "loss": 0.2311, "step": 7901 }, { "epoch": 0.14094103378161452, "grad_norm": 0.3154907524585724, "learning_rate": 4.974500212100638e-05, "loss": 0.2104, "step": 7902 }, { "epoch": 0.14095886990332823, "grad_norm": 0.2309712916612625, "learning_rate": 4.974478032783799e-05, "loss": 0.1947, "step": 7903 }, { "epoch": 0.14097670602504192, "grad_norm": 0.24380679428577423, "learning_rate": 4.9744558438750276e-05, "loss": 0.2208, "step": 7904 }, { "epoch": 0.1409945421467556, "grad_norm": 0.23298384249210358, "learning_rate": 4.9744336453744114e-05, "loss": 0.1853, "step": 7905 }, { "epoch": 0.1410123782684693, "grad_norm": 0.28253084421157837, "learning_rate": 4.974411437282035e-05, "loss": 0.2184, "step": 7906 }, { "epoch": 0.141030214390183, "grad_norm": 0.30858737230300903, "learning_rate": 4.974389219597986e-05, "loss": 0.2265, "step": 7907 }, { "epoch": 0.1410480505118967, "grad_norm": 0.36670511960983276, "learning_rate": 4.97436699232235e-05, "loss": 0.2287, "step": 7908 }, { "epoch": 0.1410658866336104, "grad_norm": 0.2474767416715622, "learning_rate": 4.974344755455212e-05, "loss": 0.2146, "step": 7909 }, { "epoch": 0.14108372275532408, "grad_norm": 0.27468055486679077, "learning_rate": 4.974322508996659e-05, "loss": 0.1993, "step": 7910 }, { "epoch": 0.14110155887703776, "grad_norm": 0.2542925477027893, "learning_rate": 4.9743002529467786e-05, "loss": 0.2377, "step": 7911 }, { "epoch": 0.14111939499875148, "grad_norm": 0.5516005158424377, "learning_rate": 4.974277987305655e-05, "loss": 0.294, "step": 7912 }, { "epoch": 0.14113723112046517, "grad_norm": 0.24601012468338013, "learning_rate": 4.9742557120733755e-05, "loss": 0.2118, "step": 7913 }, { "epoch": 0.14115506724217886, "grad_norm": 0.2901665270328522, "learning_rate": 4.9742334272500256e-05, "loss": 0.2164, "step": 7914 }, { "epoch": 0.14117290336389254, "grad_norm": 0.25811460614204407, "learning_rate": 4.974211132835693e-05, "loss": 0.2268, "step": 7915 }, { "epoch": 0.14119073948560626, "grad_norm": 0.31770452857017517, "learning_rate": 4.974188828830464e-05, "loss": 0.22, "step": 7916 }, { "epoch": 0.14120857560731995, "grad_norm": 0.2723231315612793, "learning_rate": 4.974166515234424e-05, "loss": 0.2514, "step": 7917 }, { "epoch": 0.14122641172903364, "grad_norm": 0.30405065417289734, "learning_rate": 4.9741441920476596e-05, "loss": 0.2547, "step": 7918 }, { "epoch": 0.14124424785074732, "grad_norm": 0.26237574219703674, "learning_rate": 4.9741218592702584e-05, "loss": 0.2056, "step": 7919 }, { "epoch": 0.14126208397246104, "grad_norm": 0.22551776468753815, "learning_rate": 4.9740995169023056e-05, "loss": 0.1652, "step": 7920 }, { "epoch": 0.14127992009417473, "grad_norm": 0.2911512553691864, "learning_rate": 4.9740771649438894e-05, "loss": 0.2436, "step": 7921 }, { "epoch": 0.14129775621588841, "grad_norm": 0.2582857310771942, "learning_rate": 4.974054803395095e-05, "loss": 0.2375, "step": 7922 }, { "epoch": 0.1413155923376021, "grad_norm": 0.31233006715774536, "learning_rate": 4.97403243225601e-05, "loss": 0.2474, "step": 7923 }, { "epoch": 0.14133342845931582, "grad_norm": 0.22808755934238434, "learning_rate": 4.9740100515267206e-05, "loss": 0.2234, "step": 7924 }, { "epoch": 0.1413512645810295, "grad_norm": 0.2825382649898529, "learning_rate": 4.9739876612073145e-05, "loss": 0.2038, "step": 7925 }, { "epoch": 0.1413691007027432, "grad_norm": 0.34420591592788696, "learning_rate": 4.973965261297877e-05, "loss": 0.2381, "step": 7926 }, { "epoch": 0.14138693682445688, "grad_norm": 0.2703482508659363, "learning_rate": 4.973942851798496e-05, "loss": 0.227, "step": 7927 }, { "epoch": 0.1414047729461706, "grad_norm": 0.3449389338493347, "learning_rate": 4.973920432709257e-05, "loss": 0.3174, "step": 7928 }, { "epoch": 0.1414226090678843, "grad_norm": 0.2196127325296402, "learning_rate": 4.973898004030249e-05, "loss": 0.2498, "step": 7929 }, { "epoch": 0.14144044518959797, "grad_norm": 0.3597094714641571, "learning_rate": 4.973875565761558e-05, "loss": 0.2552, "step": 7930 }, { "epoch": 0.14145828131131166, "grad_norm": 0.2061515748500824, "learning_rate": 4.97385311790327e-05, "loss": 0.1896, "step": 7931 }, { "epoch": 0.14147611743302535, "grad_norm": 0.2318653017282486, "learning_rate": 4.973830660455474e-05, "loss": 0.2182, "step": 7932 }, { "epoch": 0.14149395355473907, "grad_norm": 0.2656973898410797, "learning_rate": 4.9738081934182546e-05, "loss": 0.1992, "step": 7933 }, { "epoch": 0.14151178967645275, "grad_norm": 0.1819588840007782, "learning_rate": 4.973785716791701e-05, "loss": 0.2088, "step": 7934 }, { "epoch": 0.14152962579816644, "grad_norm": 0.2581210434436798, "learning_rate": 4.9737632305758996e-05, "loss": 0.1868, "step": 7935 }, { "epoch": 0.14154746191988013, "grad_norm": 0.25400230288505554, "learning_rate": 4.973740734770938e-05, "loss": 0.1907, "step": 7936 }, { "epoch": 0.14156529804159385, "grad_norm": 0.30852246284484863, "learning_rate": 4.9737182293769017e-05, "loss": 0.2392, "step": 7937 }, { "epoch": 0.14158313416330753, "grad_norm": 0.3192148506641388, "learning_rate": 4.973695714393879e-05, "loss": 0.2806, "step": 7938 }, { "epoch": 0.14160097028502122, "grad_norm": 0.25666457414627075, "learning_rate": 4.973673189821959e-05, "loss": 0.2046, "step": 7939 }, { "epoch": 0.1416188064067349, "grad_norm": 0.30235546827316284, "learning_rate": 4.973650655661226e-05, "loss": 0.2737, "step": 7940 }, { "epoch": 0.14163664252844863, "grad_norm": 0.32992446422576904, "learning_rate": 4.973628111911769e-05, "loss": 0.2122, "step": 7941 }, { "epoch": 0.1416544786501623, "grad_norm": 0.3310461938381195, "learning_rate": 4.973605558573675e-05, "loss": 0.2673, "step": 7942 }, { "epoch": 0.141672314771876, "grad_norm": 0.2232588827610016, "learning_rate": 4.9735829956470314e-05, "loss": 0.1908, "step": 7943 }, { "epoch": 0.1416901508935897, "grad_norm": 0.21040025353431702, "learning_rate": 4.9735604231319266e-05, "loss": 0.2036, "step": 7944 }, { "epoch": 0.1417079870153034, "grad_norm": 0.30268794298171997, "learning_rate": 4.973537841028446e-05, "loss": 0.2791, "step": 7945 }, { "epoch": 0.1417258231370171, "grad_norm": 0.3545149862766266, "learning_rate": 4.9735152493366795e-05, "loss": 0.2817, "step": 7946 }, { "epoch": 0.14174365925873078, "grad_norm": 0.2459118813276291, "learning_rate": 4.9734926480567126e-05, "loss": 0.2244, "step": 7947 }, { "epoch": 0.14176149538044447, "grad_norm": 0.34598618745803833, "learning_rate": 4.973470037188634e-05, "loss": 0.3079, "step": 7948 }, { "epoch": 0.14177933150215818, "grad_norm": 0.25589612126350403, "learning_rate": 4.9734474167325326e-05, "loss": 0.2909, "step": 7949 }, { "epoch": 0.14179716762387187, "grad_norm": 0.26901867985725403, "learning_rate": 4.973424786688494e-05, "loss": 0.1569, "step": 7950 }, { "epoch": 0.14181500374558556, "grad_norm": 0.24347417056560516, "learning_rate": 4.973402147056606e-05, "loss": 0.2038, "step": 7951 }, { "epoch": 0.14183283986729925, "grad_norm": 0.2705577611923218, "learning_rate": 4.973379497836957e-05, "loss": 0.2513, "step": 7952 }, { "epoch": 0.14185067598901294, "grad_norm": 0.2895198166370392, "learning_rate": 4.9733568390296357e-05, "loss": 0.1899, "step": 7953 }, { "epoch": 0.14186851211072665, "grad_norm": 0.24842306971549988, "learning_rate": 4.973334170634728e-05, "loss": 0.2473, "step": 7954 }, { "epoch": 0.14188634823244034, "grad_norm": 0.18200337886810303, "learning_rate": 4.9733114926523233e-05, "loss": 0.1884, "step": 7955 }, { "epoch": 0.14190418435415403, "grad_norm": 0.23742805421352386, "learning_rate": 4.973288805082509e-05, "loss": 0.2358, "step": 7956 }, { "epoch": 0.14192202047586772, "grad_norm": 0.2698855400085449, "learning_rate": 4.973266107925373e-05, "loss": 0.2343, "step": 7957 }, { "epoch": 0.14193985659758143, "grad_norm": 0.2611949145793915, "learning_rate": 4.9732434011810036e-05, "loss": 0.2233, "step": 7958 }, { "epoch": 0.14195769271929512, "grad_norm": 0.29039445519447327, "learning_rate": 4.973220684849488e-05, "loss": 0.2236, "step": 7959 }, { "epoch": 0.1419755288410088, "grad_norm": 0.2685015797615051, "learning_rate": 4.973197958930915e-05, "loss": 0.2361, "step": 7960 }, { "epoch": 0.1419933649627225, "grad_norm": 0.2829175293445587, "learning_rate": 4.9731752234253723e-05, "loss": 0.2004, "step": 7961 }, { "epoch": 0.1420112010844362, "grad_norm": 0.43400296568870544, "learning_rate": 4.9731524783329485e-05, "loss": 0.2381, "step": 7962 }, { "epoch": 0.1420290372061499, "grad_norm": 0.2866727113723755, "learning_rate": 4.973129723653732e-05, "loss": 0.2553, "step": 7963 }, { "epoch": 0.1420468733278636, "grad_norm": 0.3433106243610382, "learning_rate": 4.97310695938781e-05, "loss": 0.2442, "step": 7964 }, { "epoch": 0.14206470944957728, "grad_norm": 0.2611335813999176, "learning_rate": 4.973084185535271e-05, "loss": 0.2685, "step": 7965 }, { "epoch": 0.142082545571291, "grad_norm": 0.2822869122028351, "learning_rate": 4.973061402096204e-05, "loss": 0.2473, "step": 7966 }, { "epoch": 0.14210038169300468, "grad_norm": 0.24985238909721375, "learning_rate": 4.9730386090706965e-05, "loss": 0.205, "step": 7967 }, { "epoch": 0.14211821781471837, "grad_norm": 0.3009773790836334, "learning_rate": 4.9730158064588375e-05, "loss": 0.2491, "step": 7968 }, { "epoch": 0.14213605393643206, "grad_norm": 0.31487444043159485, "learning_rate": 4.972992994260714e-05, "loss": 0.2181, "step": 7969 }, { "epoch": 0.14215389005814574, "grad_norm": 0.3135034739971161, "learning_rate": 4.9729701724764163e-05, "loss": 0.2353, "step": 7970 }, { "epoch": 0.14217172617985946, "grad_norm": 0.28377479314804077, "learning_rate": 4.972947341106033e-05, "loss": 0.2243, "step": 7971 }, { "epoch": 0.14218956230157315, "grad_norm": 0.2449411153793335, "learning_rate": 4.9729245001496505e-05, "loss": 0.2249, "step": 7972 }, { "epoch": 0.14220739842328683, "grad_norm": 0.36053651571273804, "learning_rate": 4.972901649607359e-05, "loss": 0.2622, "step": 7973 }, { "epoch": 0.14222523454500052, "grad_norm": 0.24852022528648376, "learning_rate": 4.972878789479246e-05, "loss": 0.2031, "step": 7974 }, { "epoch": 0.14224307066671424, "grad_norm": 0.27910172939300537, "learning_rate": 4.972855919765402e-05, "loss": 0.2611, "step": 7975 }, { "epoch": 0.14226090678842793, "grad_norm": 0.32723069190979004, "learning_rate": 4.9728330404659126e-05, "loss": 0.1865, "step": 7976 }, { "epoch": 0.14227874291014161, "grad_norm": 0.2713997960090637, "learning_rate": 4.97281015158087e-05, "loss": 0.2426, "step": 7977 }, { "epoch": 0.1422965790318553, "grad_norm": 0.22855231165885925, "learning_rate": 4.97278725311036e-05, "loss": 0.2, "step": 7978 }, { "epoch": 0.14231441515356902, "grad_norm": 0.2307584434747696, "learning_rate": 4.9727643450544725e-05, "loss": 0.2072, "step": 7979 }, { "epoch": 0.1423322512752827, "grad_norm": 0.25341737270355225, "learning_rate": 4.972741427413297e-05, "loss": 0.1912, "step": 7980 }, { "epoch": 0.1423500873969964, "grad_norm": 0.26412492990493774, "learning_rate": 4.972718500186921e-05, "loss": 0.2425, "step": 7981 }, { "epoch": 0.14236792351871008, "grad_norm": 0.33935558795928955, "learning_rate": 4.9726955633754344e-05, "loss": 0.2557, "step": 7982 }, { "epoch": 0.1423857596404238, "grad_norm": 0.240280881524086, "learning_rate": 4.972672616978926e-05, "loss": 0.1984, "step": 7983 }, { "epoch": 0.14240359576213749, "grad_norm": 0.36089375615119934, "learning_rate": 4.972649660997485e-05, "loss": 0.2615, "step": 7984 }, { "epoch": 0.14242143188385117, "grad_norm": 0.29607707262039185, "learning_rate": 4.9726266954311986e-05, "loss": 0.2109, "step": 7985 }, { "epoch": 0.14243926800556486, "grad_norm": 0.24391315877437592, "learning_rate": 4.972603720280158e-05, "loss": 0.1981, "step": 7986 }, { "epoch": 0.14245710412727858, "grad_norm": 0.23113210499286652, "learning_rate": 4.972580735544451e-05, "loss": 0.1997, "step": 7987 }, { "epoch": 0.14247494024899227, "grad_norm": 0.3148064613342285, "learning_rate": 4.9725577412241666e-05, "loss": 0.2365, "step": 7988 }, { "epoch": 0.14249277637070595, "grad_norm": 0.8902646899223328, "learning_rate": 4.972534737319395e-05, "loss": 0.2688, "step": 7989 }, { "epoch": 0.14251061249241964, "grad_norm": 0.22924846410751343, "learning_rate": 4.9725117238302246e-05, "loss": 0.2286, "step": 7990 }, { "epoch": 0.14252844861413333, "grad_norm": 0.23769497871398926, "learning_rate": 4.972488700756745e-05, "loss": 0.1631, "step": 7991 }, { "epoch": 0.14254628473584705, "grad_norm": 0.31016814708709717, "learning_rate": 4.972465668099045e-05, "loss": 0.2464, "step": 7992 }, { "epoch": 0.14256412085756073, "grad_norm": 0.279687762260437, "learning_rate": 4.972442625857214e-05, "loss": 0.2219, "step": 7993 }, { "epoch": 0.14258195697927442, "grad_norm": 0.27037692070007324, "learning_rate": 4.972419574031342e-05, "loss": 0.2399, "step": 7994 }, { "epoch": 0.1425997931009881, "grad_norm": 0.2630898058414459, "learning_rate": 4.972396512621517e-05, "loss": 0.2321, "step": 7995 }, { "epoch": 0.14261762922270182, "grad_norm": 0.21923020482063293, "learning_rate": 4.97237344162783e-05, "loss": 0.1847, "step": 7996 }, { "epoch": 0.1426354653444155, "grad_norm": 0.30596184730529785, "learning_rate": 4.9723503610503693e-05, "loss": 0.2455, "step": 7997 }, { "epoch": 0.1426533014661292, "grad_norm": 0.33181026577949524, "learning_rate": 4.9723272708892243e-05, "loss": 0.1955, "step": 7998 }, { "epoch": 0.1426711375878429, "grad_norm": 0.42547452449798584, "learning_rate": 4.972304171144485e-05, "loss": 0.2604, "step": 7999 }, { "epoch": 0.1426889737095566, "grad_norm": 0.3572325110435486, "learning_rate": 4.972281061816241e-05, "loss": 0.2575, "step": 8000 }, { "epoch": 0.1426889737095566, "eval_loss": 0.21591931581497192, "eval_runtime": 106.6657, "eval_samples_per_second": 9.6, "eval_steps_per_second": 1.603, "step": 8000 }, { "epoch": 0.1427068098312703, "grad_norm": 0.1958993822336197, "learning_rate": 4.9722579429045816e-05, "loss": 0.2162, "step": 8001 }, { "epoch": 0.14272464595298398, "grad_norm": 0.33424729108810425, "learning_rate": 4.9722348144095966e-05, "loss": 0.1894, "step": 8002 }, { "epoch": 0.14274248207469767, "grad_norm": 0.2643277645111084, "learning_rate": 4.9722116763313756e-05, "loss": 0.2133, "step": 8003 }, { "epoch": 0.14276031819641138, "grad_norm": 0.33820122480392456, "learning_rate": 4.972188528670009e-05, "loss": 0.241, "step": 8004 }, { "epoch": 0.14277815431812507, "grad_norm": 0.28121674060821533, "learning_rate": 4.9721653714255844e-05, "loss": 0.2235, "step": 8005 }, { "epoch": 0.14279599043983876, "grad_norm": 0.31005755066871643, "learning_rate": 4.972142204598194e-05, "loss": 0.2206, "step": 8006 }, { "epoch": 0.14281382656155245, "grad_norm": 0.25871145725250244, "learning_rate": 4.9721190281879256e-05, "loss": 0.1956, "step": 8007 }, { "epoch": 0.14283166268326616, "grad_norm": 0.3964315950870514, "learning_rate": 4.97209584219487e-05, "loss": 0.2176, "step": 8008 }, { "epoch": 0.14284949880497985, "grad_norm": 0.2921893298625946, "learning_rate": 4.9720726466191184e-05, "loss": 0.2438, "step": 8009 }, { "epoch": 0.14286733492669354, "grad_norm": 0.44086408615112305, "learning_rate": 4.972049441460759e-05, "loss": 0.2577, "step": 8010 }, { "epoch": 0.14288517104840723, "grad_norm": 0.32219594717025757, "learning_rate": 4.9720262267198815e-05, "loss": 0.1987, "step": 8011 }, { "epoch": 0.14290300717012092, "grad_norm": 0.2792239785194397, "learning_rate": 4.972003002396577e-05, "loss": 0.1722, "step": 8012 }, { "epoch": 0.14292084329183463, "grad_norm": 0.18763373792171478, "learning_rate": 4.971979768490935e-05, "loss": 0.2042, "step": 8013 }, { "epoch": 0.14293867941354832, "grad_norm": 0.2346906065940857, "learning_rate": 4.971956525003045e-05, "loss": 0.1704, "step": 8014 }, { "epoch": 0.142956515535262, "grad_norm": 0.29033222794532776, "learning_rate": 4.971933271932999e-05, "loss": 0.2116, "step": 8015 }, { "epoch": 0.1429743516569757, "grad_norm": 0.25404953956604004, "learning_rate": 4.971910009280885e-05, "loss": 0.2469, "step": 8016 }, { "epoch": 0.1429921877786894, "grad_norm": 0.28456249833106995, "learning_rate": 4.971886737046794e-05, "loss": 0.2165, "step": 8017 }, { "epoch": 0.1430100239004031, "grad_norm": 0.20949894189834595, "learning_rate": 4.971863455230816e-05, "loss": 0.2139, "step": 8018 }, { "epoch": 0.1430278600221168, "grad_norm": 0.2785196006298065, "learning_rate": 4.971840163833042e-05, "loss": 0.2459, "step": 8019 }, { "epoch": 0.14304569614383048, "grad_norm": 0.23433707654476166, "learning_rate": 4.9718168628535615e-05, "loss": 0.2369, "step": 8020 }, { "epoch": 0.1430635322655442, "grad_norm": 0.289565771818161, "learning_rate": 4.971793552292465e-05, "loss": 0.2314, "step": 8021 }, { "epoch": 0.14308136838725788, "grad_norm": 0.2865285277366638, "learning_rate": 4.9717702321498435e-05, "loss": 0.2114, "step": 8022 }, { "epoch": 0.14309920450897157, "grad_norm": 0.248923197388649, "learning_rate": 4.9717469024257866e-05, "loss": 0.2595, "step": 8023 }, { "epoch": 0.14311704063068525, "grad_norm": 0.23164553940296173, "learning_rate": 4.971723563120385e-05, "loss": 0.2634, "step": 8024 }, { "epoch": 0.14313487675239897, "grad_norm": 0.3079147934913635, "learning_rate": 4.971700214233729e-05, "loss": 0.2527, "step": 8025 }, { "epoch": 0.14315271287411266, "grad_norm": 0.21224114298820496, "learning_rate": 4.97167685576591e-05, "loss": 0.2148, "step": 8026 }, { "epoch": 0.14317054899582635, "grad_norm": 0.4156232476234436, "learning_rate": 4.9716534877170167e-05, "loss": 0.2609, "step": 8027 }, { "epoch": 0.14318838511754003, "grad_norm": 0.20538099110126495, "learning_rate": 4.971630110087141e-05, "loss": 0.1823, "step": 8028 }, { "epoch": 0.14320622123925375, "grad_norm": 0.2794420123100281, "learning_rate": 4.971606722876374e-05, "loss": 0.2258, "step": 8029 }, { "epoch": 0.14322405736096744, "grad_norm": 0.2170548290014267, "learning_rate": 4.971583326084806e-05, "loss": 0.1697, "step": 8030 }, { "epoch": 0.14324189348268113, "grad_norm": 0.2593088448047638, "learning_rate": 4.971559919712526e-05, "loss": 0.2506, "step": 8031 }, { "epoch": 0.14325972960439481, "grad_norm": 0.3047870099544525, "learning_rate": 4.971536503759627e-05, "loss": 0.2401, "step": 8032 }, { "epoch": 0.1432775657261085, "grad_norm": 0.23515376448631287, "learning_rate": 4.971513078226199e-05, "loss": 0.2025, "step": 8033 }, { "epoch": 0.14329540184782222, "grad_norm": 0.2998775839805603, "learning_rate": 4.9714896431123314e-05, "loss": 0.2554, "step": 8034 }, { "epoch": 0.1433132379695359, "grad_norm": 0.28107354044914246, "learning_rate": 4.9714661984181175e-05, "loss": 0.2352, "step": 8035 }, { "epoch": 0.1433310740912496, "grad_norm": 0.28603890538215637, "learning_rate": 4.971442744143647e-05, "loss": 0.2584, "step": 8036 }, { "epoch": 0.14334891021296328, "grad_norm": 0.2731328010559082, "learning_rate": 4.97141928028901e-05, "loss": 0.2455, "step": 8037 }, { "epoch": 0.143366746334677, "grad_norm": 0.33348676562309265, "learning_rate": 4.9713958068542994e-05, "loss": 0.2973, "step": 8038 }, { "epoch": 0.14338458245639069, "grad_norm": 0.3898952603340149, "learning_rate": 4.971372323839605e-05, "loss": 0.2013, "step": 8039 }, { "epoch": 0.14340241857810437, "grad_norm": 0.22789378464221954, "learning_rate": 4.971348831245017e-05, "loss": 0.2235, "step": 8040 }, { "epoch": 0.14342025469981806, "grad_norm": 0.3405819833278656, "learning_rate": 4.971325329070628e-05, "loss": 0.2106, "step": 8041 }, { "epoch": 0.14343809082153178, "grad_norm": 0.3461797833442688, "learning_rate": 4.9713018173165274e-05, "loss": 0.2119, "step": 8042 }, { "epoch": 0.14345592694324547, "grad_norm": 0.27884143590927124, "learning_rate": 4.971278295982808e-05, "loss": 0.2193, "step": 8043 }, { "epoch": 0.14347376306495915, "grad_norm": 0.3599877655506134, "learning_rate": 4.971254765069561e-05, "loss": 0.2579, "step": 8044 }, { "epoch": 0.14349159918667284, "grad_norm": 0.2900587022304535, "learning_rate": 4.9712312245768766e-05, "loss": 0.2459, "step": 8045 }, { "epoch": 0.14350943530838656, "grad_norm": 0.35157376527786255, "learning_rate": 4.971207674504846e-05, "loss": 0.2461, "step": 8046 }, { "epoch": 0.14352727143010025, "grad_norm": 0.3435586392879486, "learning_rate": 4.971184114853562e-05, "loss": 0.2141, "step": 8047 }, { "epoch": 0.14354510755181393, "grad_norm": 0.32010671496391296, "learning_rate": 4.9711605456231136e-05, "loss": 0.2176, "step": 8048 }, { "epoch": 0.14356294367352762, "grad_norm": 0.3224741518497467, "learning_rate": 4.9711369668135945e-05, "loss": 0.2333, "step": 8049 }, { "epoch": 0.1435807797952413, "grad_norm": 0.24018892645835876, "learning_rate": 4.971113378425094e-05, "loss": 0.2011, "step": 8050 }, { "epoch": 0.14359861591695502, "grad_norm": 0.3418002128601074, "learning_rate": 4.971089780457705e-05, "loss": 0.2582, "step": 8051 }, { "epoch": 0.1436164520386687, "grad_norm": 0.23992377519607544, "learning_rate": 4.9710661729115185e-05, "loss": 0.1967, "step": 8052 }, { "epoch": 0.1436342881603824, "grad_norm": 0.36270955204963684, "learning_rate": 4.9710425557866266e-05, "loss": 0.3147, "step": 8053 }, { "epoch": 0.1436521242820961, "grad_norm": 0.23312827944755554, "learning_rate": 4.97101892908312e-05, "loss": 0.1954, "step": 8054 }, { "epoch": 0.1436699604038098, "grad_norm": 0.2479989379644394, "learning_rate": 4.97099529280109e-05, "loss": 0.1936, "step": 8055 }, { "epoch": 0.1436877965255235, "grad_norm": 0.4056938886642456, "learning_rate": 4.9709716469406286e-05, "loss": 0.2886, "step": 8056 }, { "epoch": 0.14370563264723718, "grad_norm": 0.3030484914779663, "learning_rate": 4.970947991501829e-05, "loss": 0.2702, "step": 8057 }, { "epoch": 0.14372346876895087, "grad_norm": 0.3851870894432068, "learning_rate": 4.97092432648478e-05, "loss": 0.2449, "step": 8058 }, { "epoch": 0.14374130489066458, "grad_norm": 0.2610694468021393, "learning_rate": 4.970900651889576e-05, "loss": 0.2187, "step": 8059 }, { "epoch": 0.14375914101237827, "grad_norm": 0.24890683591365814, "learning_rate": 4.970876967716307e-05, "loss": 0.2262, "step": 8060 }, { "epoch": 0.14377697713409196, "grad_norm": 0.25924643874168396, "learning_rate": 4.970853273965066e-05, "loss": 0.2189, "step": 8061 }, { "epoch": 0.14379481325580565, "grad_norm": 0.2735474705696106, "learning_rate": 4.970829570635944e-05, "loss": 0.2177, "step": 8062 }, { "epoch": 0.14381264937751936, "grad_norm": 0.23441441357135773, "learning_rate": 4.9708058577290336e-05, "loss": 0.1935, "step": 8063 }, { "epoch": 0.14383048549923305, "grad_norm": 0.2908290922641754, "learning_rate": 4.970782135244426e-05, "loss": 0.223, "step": 8064 }, { "epoch": 0.14384832162094674, "grad_norm": 0.3465617001056671, "learning_rate": 4.970758403182213e-05, "loss": 0.2106, "step": 8065 }, { "epoch": 0.14386615774266043, "grad_norm": 0.28953370451927185, "learning_rate": 4.9707346615424876e-05, "loss": 0.2513, "step": 8066 }, { "epoch": 0.14388399386437414, "grad_norm": 0.24587005376815796, "learning_rate": 4.970710910325341e-05, "loss": 0.229, "step": 8067 }, { "epoch": 0.14390182998608783, "grad_norm": 0.2227289229631424, "learning_rate": 4.970687149530866e-05, "loss": 0.2105, "step": 8068 }, { "epoch": 0.14391966610780152, "grad_norm": 0.4306078553199768, "learning_rate": 4.970663379159154e-05, "loss": 0.2362, "step": 8069 }, { "epoch": 0.1439375022295152, "grad_norm": 0.2689933478832245, "learning_rate": 4.9706395992102974e-05, "loss": 0.1784, "step": 8070 }, { "epoch": 0.1439553383512289, "grad_norm": 0.27545493841171265, "learning_rate": 4.970615809684388e-05, "loss": 0.2588, "step": 8071 }, { "epoch": 0.1439731744729426, "grad_norm": 0.2528339922428131, "learning_rate": 4.970592010581519e-05, "loss": 0.2213, "step": 8072 }, { "epoch": 0.1439910105946563, "grad_norm": 0.27702268958091736, "learning_rate": 4.970568201901781e-05, "loss": 0.246, "step": 8073 }, { "epoch": 0.14400884671637, "grad_norm": 0.23985956609249115, "learning_rate": 4.9705443836452684e-05, "loss": 0.2214, "step": 8074 }, { "epoch": 0.14402668283808367, "grad_norm": 0.2800622880458832, "learning_rate": 4.970520555812073e-05, "loss": 0.2471, "step": 8075 }, { "epoch": 0.1440445189597974, "grad_norm": 0.2803814113140106, "learning_rate": 4.970496718402286e-05, "loss": 0.2139, "step": 8076 }, { "epoch": 0.14406235508151108, "grad_norm": 0.30929356813430786, "learning_rate": 4.9704728714159995e-05, "loss": 0.2391, "step": 8077 }, { "epoch": 0.14408019120322477, "grad_norm": 0.20972399413585663, "learning_rate": 4.970449014853308e-05, "loss": 0.1837, "step": 8078 }, { "epoch": 0.14409802732493845, "grad_norm": 0.33398327231407166, "learning_rate": 4.9704251487143024e-05, "loss": 0.2284, "step": 8079 }, { "epoch": 0.14411586344665217, "grad_norm": 0.24110308289527893, "learning_rate": 4.970401272999076e-05, "loss": 0.225, "step": 8080 }, { "epoch": 0.14413369956836586, "grad_norm": 0.2563042938709259, "learning_rate": 4.97037738770772e-05, "loss": 0.246, "step": 8081 }, { "epoch": 0.14415153569007955, "grad_norm": 0.22172963619232178, "learning_rate": 4.970353492840329e-05, "loss": 0.2462, "step": 8082 }, { "epoch": 0.14416937181179323, "grad_norm": 0.2786707878112793, "learning_rate": 4.970329588396995e-05, "loss": 0.187, "step": 8083 }, { "epoch": 0.14418720793350695, "grad_norm": 0.35980361700057983, "learning_rate": 4.9703056743778094e-05, "loss": 0.2348, "step": 8084 }, { "epoch": 0.14420504405522064, "grad_norm": 0.2369632124900818, "learning_rate": 4.970281750782866e-05, "loss": 0.1732, "step": 8085 }, { "epoch": 0.14422288017693433, "grad_norm": 0.25025275349617004, "learning_rate": 4.970257817612257e-05, "loss": 0.2125, "step": 8086 }, { "epoch": 0.14424071629864801, "grad_norm": 0.2807694673538208, "learning_rate": 4.970233874866076e-05, "loss": 0.2635, "step": 8087 }, { "epoch": 0.14425855242036173, "grad_norm": 0.28084516525268555, "learning_rate": 4.970209922544415e-05, "loss": 0.2589, "step": 8088 }, { "epoch": 0.14427638854207542, "grad_norm": 0.213764950633049, "learning_rate": 4.970185960647368e-05, "loss": 0.2026, "step": 8089 }, { "epoch": 0.1442942246637891, "grad_norm": 0.20018413662910461, "learning_rate": 4.970161989175026e-05, "loss": 0.227, "step": 8090 }, { "epoch": 0.1443120607855028, "grad_norm": 0.26697149872779846, "learning_rate": 4.970138008127484e-05, "loss": 0.2787, "step": 8091 }, { "epoch": 0.14432989690721648, "grad_norm": 0.24063348770141602, "learning_rate": 4.970114017504833e-05, "loss": 0.2127, "step": 8092 }, { "epoch": 0.1443477330289302, "grad_norm": 0.31096041202545166, "learning_rate": 4.970090017307167e-05, "loss": 0.1976, "step": 8093 }, { "epoch": 0.14436556915064389, "grad_norm": 0.36545073986053467, "learning_rate": 4.97006600753458e-05, "loss": 0.2472, "step": 8094 }, { "epoch": 0.14438340527235757, "grad_norm": 0.2593143880367279, "learning_rate": 4.970041988187163e-05, "loss": 0.2508, "step": 8095 }, { "epoch": 0.14440124139407126, "grad_norm": 0.29695287346839905, "learning_rate": 4.97001795926501e-05, "loss": 0.2252, "step": 8096 }, { "epoch": 0.14441907751578498, "grad_norm": 0.4915003776550293, "learning_rate": 4.9699939207682147e-05, "loss": 0.1764, "step": 8097 }, { "epoch": 0.14443691363749867, "grad_norm": 0.2949909567832947, "learning_rate": 4.96996987269687e-05, "loss": 0.246, "step": 8098 }, { "epoch": 0.14445474975921235, "grad_norm": 0.24923904240131378, "learning_rate": 4.969945815051068e-05, "loss": 0.2175, "step": 8099 }, { "epoch": 0.14447258588092604, "grad_norm": 0.3403671979904175, "learning_rate": 4.969921747830904e-05, "loss": 0.2788, "step": 8100 }, { "epoch": 0.14449042200263976, "grad_norm": 0.21889542043209076, "learning_rate": 4.96989767103647e-05, "loss": 0.1927, "step": 8101 }, { "epoch": 0.14450825812435344, "grad_norm": 0.3265772759914398, "learning_rate": 4.969873584667859e-05, "loss": 0.2157, "step": 8102 }, { "epoch": 0.14452609424606713, "grad_norm": 0.2654820680618286, "learning_rate": 4.969849488725166e-05, "loss": 0.2121, "step": 8103 }, { "epoch": 0.14454393036778082, "grad_norm": 0.32656437158584595, "learning_rate": 4.9698253832084825e-05, "loss": 0.2258, "step": 8104 }, { "epoch": 0.14456176648949454, "grad_norm": 0.358965128660202, "learning_rate": 4.969801268117903e-05, "loss": 0.2774, "step": 8105 }, { "epoch": 0.14457960261120822, "grad_norm": 0.23094293475151062, "learning_rate": 4.96977714345352e-05, "loss": 0.2035, "step": 8106 }, { "epoch": 0.1445974387329219, "grad_norm": 0.22926008701324463, "learning_rate": 4.969753009215429e-05, "loss": 0.1865, "step": 8107 }, { "epoch": 0.1446152748546356, "grad_norm": 0.26557493209838867, "learning_rate": 4.969728865403722e-05, "loss": 0.1873, "step": 8108 }, { "epoch": 0.14463311097634932, "grad_norm": 0.21748287975788116, "learning_rate": 4.9697047120184926e-05, "loss": 0.2328, "step": 8109 }, { "epoch": 0.144650947098063, "grad_norm": 0.25423404574394226, "learning_rate": 4.969680549059834e-05, "loss": 0.1797, "step": 8110 }, { "epoch": 0.1446687832197767, "grad_norm": 0.3078209161758423, "learning_rate": 4.969656376527842e-05, "loss": 0.203, "step": 8111 }, { "epoch": 0.14468661934149038, "grad_norm": 0.2998126447200775, "learning_rate": 4.969632194422608e-05, "loss": 0.2082, "step": 8112 }, { "epoch": 0.14470445546320407, "grad_norm": 0.3039228618144989, "learning_rate": 4.9696080027442274e-05, "loss": 0.2659, "step": 8113 }, { "epoch": 0.14472229158491778, "grad_norm": 0.3542231321334839, "learning_rate": 4.969583801492792e-05, "loss": 0.2618, "step": 8114 }, { "epoch": 0.14474012770663147, "grad_norm": 0.37329572439193726, "learning_rate": 4.969559590668398e-05, "loss": 0.2969, "step": 8115 }, { "epoch": 0.14475796382834516, "grad_norm": 0.22694751620292664, "learning_rate": 4.969535370271138e-05, "loss": 0.2274, "step": 8116 }, { "epoch": 0.14477579995005885, "grad_norm": 0.2826818823814392, "learning_rate": 4.9695111403011054e-05, "loss": 0.234, "step": 8117 }, { "epoch": 0.14479363607177256, "grad_norm": 0.2796265184879303, "learning_rate": 4.9694869007583946e-05, "loss": 0.1865, "step": 8118 }, { "epoch": 0.14481147219348625, "grad_norm": 0.2666398286819458, "learning_rate": 4.9694626516430995e-05, "loss": 0.2224, "step": 8119 }, { "epoch": 0.14482930831519994, "grad_norm": 0.29951611161231995, "learning_rate": 4.969438392955314e-05, "loss": 0.2108, "step": 8120 }, { "epoch": 0.14484714443691363, "grad_norm": 0.2740534543991089, "learning_rate": 4.969414124695134e-05, "loss": 0.2134, "step": 8121 }, { "epoch": 0.14486498055862734, "grad_norm": 0.29929760098457336, "learning_rate": 4.969389846862651e-05, "loss": 0.2098, "step": 8122 }, { "epoch": 0.14488281668034103, "grad_norm": 0.3080112040042877, "learning_rate": 4.9693655594579594e-05, "loss": 0.1933, "step": 8123 }, { "epoch": 0.14490065280205472, "grad_norm": 0.2736315131187439, "learning_rate": 4.969341262481155e-05, "loss": 0.1723, "step": 8124 }, { "epoch": 0.1449184889237684, "grad_norm": 0.26007893681526184, "learning_rate": 4.969316955932331e-05, "loss": 0.1735, "step": 8125 }, { "epoch": 0.14493632504548212, "grad_norm": 0.2956458330154419, "learning_rate": 4.96929263981158e-05, "loss": 0.2097, "step": 8126 }, { "epoch": 0.1449541611671958, "grad_norm": 0.2500307559967041, "learning_rate": 4.969268314119e-05, "loss": 0.2197, "step": 8127 }, { "epoch": 0.1449719972889095, "grad_norm": 0.3087429106235504, "learning_rate": 4.969243978854682e-05, "loss": 0.2027, "step": 8128 }, { "epoch": 0.1449898334106232, "grad_norm": 0.29210174083709717, "learning_rate": 4.9692196340187216e-05, "loss": 0.2017, "step": 8129 }, { "epoch": 0.1450076695323369, "grad_norm": 0.27657708525657654, "learning_rate": 4.969195279611213e-05, "loss": 0.2153, "step": 8130 }, { "epoch": 0.1450255056540506, "grad_norm": 0.3296692967414856, "learning_rate": 4.969170915632251e-05, "loss": 0.2376, "step": 8131 }, { "epoch": 0.14504334177576428, "grad_norm": 0.2788555920124054, "learning_rate": 4.969146542081929e-05, "loss": 0.2657, "step": 8132 }, { "epoch": 0.14506117789747797, "grad_norm": 0.3158535659313202, "learning_rate": 4.969122158960343e-05, "loss": 0.2651, "step": 8133 }, { "epoch": 0.14507901401919165, "grad_norm": 0.23868532478809357, "learning_rate": 4.969097766267587e-05, "loss": 0.2108, "step": 8134 }, { "epoch": 0.14509685014090537, "grad_norm": 0.46661341190338135, "learning_rate": 4.969073364003754e-05, "loss": 0.1925, "step": 8135 }, { "epoch": 0.14511468626261906, "grad_norm": 0.23679344356060028, "learning_rate": 4.9690489521689406e-05, "loss": 0.237, "step": 8136 }, { "epoch": 0.14513252238433275, "grad_norm": 0.2594318091869354, "learning_rate": 4.96902453076324e-05, "loss": 0.2708, "step": 8137 }, { "epoch": 0.14515035850604643, "grad_norm": 0.34159931540489197, "learning_rate": 4.9690000997867486e-05, "loss": 0.261, "step": 8138 }, { "epoch": 0.14516819462776015, "grad_norm": 0.32327941060066223, "learning_rate": 4.96897565923956e-05, "loss": 0.2556, "step": 8139 }, { "epoch": 0.14518603074947384, "grad_norm": 0.5102737545967102, "learning_rate": 4.968951209121768e-05, "loss": 0.3226, "step": 8140 }, { "epoch": 0.14520386687118753, "grad_norm": 0.24807362258434296, "learning_rate": 4.968926749433469e-05, "loss": 0.2064, "step": 8141 }, { "epoch": 0.1452217029929012, "grad_norm": 0.2608893811702728, "learning_rate": 4.9689022801747576e-05, "loss": 0.2039, "step": 8142 }, { "epoch": 0.14523953911461493, "grad_norm": 0.2564334571361542, "learning_rate": 4.968877801345727e-05, "loss": 0.2266, "step": 8143 }, { "epoch": 0.14525737523632862, "grad_norm": 0.27542316913604736, "learning_rate": 4.9688533129464745e-05, "loss": 0.2699, "step": 8144 }, { "epoch": 0.1452752113580423, "grad_norm": 0.2978516221046448, "learning_rate": 4.968828814977093e-05, "loss": 0.2244, "step": 8145 }, { "epoch": 0.145293047479756, "grad_norm": 0.29370105266571045, "learning_rate": 4.968804307437679e-05, "loss": 0.2232, "step": 8146 }, { "epoch": 0.1453108836014697, "grad_norm": 0.21740269660949707, "learning_rate": 4.968779790328326e-05, "loss": 0.1728, "step": 8147 }, { "epoch": 0.1453287197231834, "grad_norm": 0.3440368175506592, "learning_rate": 4.968755263649131e-05, "loss": 0.2197, "step": 8148 }, { "epoch": 0.14534655584489709, "grad_norm": 0.32539790868759155, "learning_rate": 4.968730727400187e-05, "loss": 0.2727, "step": 8149 }, { "epoch": 0.14536439196661077, "grad_norm": 0.2705410420894623, "learning_rate": 4.968706181581591e-05, "loss": 0.2623, "step": 8150 }, { "epoch": 0.14538222808832446, "grad_norm": 0.23159928619861603, "learning_rate": 4.968681626193436e-05, "loss": 0.1949, "step": 8151 }, { "epoch": 0.14540006421003818, "grad_norm": 0.2809099555015564, "learning_rate": 4.9686570612358184e-05, "loss": 0.2459, "step": 8152 }, { "epoch": 0.14541790033175186, "grad_norm": 0.37867191433906555, "learning_rate": 4.968632486708834e-05, "loss": 0.2524, "step": 8153 }, { "epoch": 0.14543573645346555, "grad_norm": 0.1915334165096283, "learning_rate": 4.968607902612578e-05, "loss": 0.1841, "step": 8154 }, { "epoch": 0.14545357257517924, "grad_norm": 0.2534467577934265, "learning_rate": 4.968583308947144e-05, "loss": 0.228, "step": 8155 }, { "epoch": 0.14547140869689296, "grad_norm": 0.2592531740665436, "learning_rate": 4.968558705712629e-05, "loss": 0.215, "step": 8156 }, { "epoch": 0.14548924481860664, "grad_norm": 0.23502813279628754, "learning_rate": 4.9685340929091276e-05, "loss": 0.1788, "step": 8157 }, { "epoch": 0.14550708094032033, "grad_norm": 0.2555530369281769, "learning_rate": 4.968509470536735e-05, "loss": 0.2124, "step": 8158 }, { "epoch": 0.14552491706203402, "grad_norm": 0.40521240234375, "learning_rate": 4.968484838595548e-05, "loss": 0.1925, "step": 8159 }, { "epoch": 0.14554275318374774, "grad_norm": 0.2218252420425415, "learning_rate": 4.9684601970856606e-05, "loss": 0.2149, "step": 8160 }, { "epoch": 0.14556058930546142, "grad_norm": 0.32937249541282654, "learning_rate": 4.96843554600717e-05, "loss": 0.2082, "step": 8161 }, { "epoch": 0.1455784254271751, "grad_norm": 0.23088762164115906, "learning_rate": 4.96841088536017e-05, "loss": 0.1849, "step": 8162 }, { "epoch": 0.1455962615488888, "grad_norm": 0.2395429164171219, "learning_rate": 4.9683862151447556e-05, "loss": 0.1946, "step": 8163 }, { "epoch": 0.14561409767060252, "grad_norm": 0.2747701108455658, "learning_rate": 4.968361535361025e-05, "loss": 0.1932, "step": 8164 }, { "epoch": 0.1456319337923162, "grad_norm": 0.36645421385765076, "learning_rate": 4.9683368460090725e-05, "loss": 0.25, "step": 8165 }, { "epoch": 0.1456497699140299, "grad_norm": 0.22646576166152954, "learning_rate": 4.968312147088994e-05, "loss": 0.2238, "step": 8166 }, { "epoch": 0.14566760603574358, "grad_norm": 0.2774210274219513, "learning_rate": 4.968287438600884e-05, "loss": 0.2193, "step": 8167 }, { "epoch": 0.1456854421574573, "grad_norm": 0.2596759498119354, "learning_rate": 4.96826272054484e-05, "loss": 0.1979, "step": 8168 }, { "epoch": 0.14570327827917098, "grad_norm": 0.3014243245124817, "learning_rate": 4.9682379929209576e-05, "loss": 0.2679, "step": 8169 }, { "epoch": 0.14572111440088467, "grad_norm": 0.22725193202495575, "learning_rate": 4.968213255729332e-05, "loss": 0.2007, "step": 8170 }, { "epoch": 0.14573895052259836, "grad_norm": 0.2004374861717224, "learning_rate": 4.968188508970059e-05, "loss": 0.1861, "step": 8171 }, { "epoch": 0.14575678664431205, "grad_norm": 0.27653491497039795, "learning_rate": 4.968163752643235e-05, "loss": 0.2265, "step": 8172 }, { "epoch": 0.14577462276602576, "grad_norm": 0.24558009207248688, "learning_rate": 4.968138986748956e-05, "loss": 0.2089, "step": 8173 }, { "epoch": 0.14579245888773945, "grad_norm": 0.23888957500457764, "learning_rate": 4.968114211287318e-05, "loss": 0.2124, "step": 8174 }, { "epoch": 0.14581029500945314, "grad_norm": 0.27592718601226807, "learning_rate": 4.968089426258417e-05, "loss": 0.2321, "step": 8175 }, { "epoch": 0.14582813113116683, "grad_norm": 0.262919157743454, "learning_rate": 4.968064631662348e-05, "loss": 0.2093, "step": 8176 }, { "epoch": 0.14584596725288054, "grad_norm": 0.3246628940105438, "learning_rate": 4.9680398274992086e-05, "loss": 0.2108, "step": 8177 }, { "epoch": 0.14586380337459423, "grad_norm": 0.22410131990909576, "learning_rate": 4.968015013769094e-05, "loss": 0.2301, "step": 8178 }, { "epoch": 0.14588163949630792, "grad_norm": 0.23250728845596313, "learning_rate": 4.9679901904721015e-05, "loss": 0.1826, "step": 8179 }, { "epoch": 0.1458994756180216, "grad_norm": 0.29271814227104187, "learning_rate": 4.9679653576083265e-05, "loss": 0.2291, "step": 8180 }, { "epoch": 0.14591731173973532, "grad_norm": 0.3109009861946106, "learning_rate": 4.9679405151778646e-05, "loss": 0.259, "step": 8181 }, { "epoch": 0.145935147861449, "grad_norm": 0.23242051899433136, "learning_rate": 4.967915663180813e-05, "loss": 0.2082, "step": 8182 }, { "epoch": 0.1459529839831627, "grad_norm": 0.3276471197605133, "learning_rate": 4.967890801617269e-05, "loss": 0.1778, "step": 8183 }, { "epoch": 0.1459708201048764, "grad_norm": 0.21032224595546722, "learning_rate": 4.9678659304873264e-05, "loss": 0.2045, "step": 8184 }, { "epoch": 0.1459886562265901, "grad_norm": 0.2781781256198883, "learning_rate": 4.9678410497910845e-05, "loss": 0.2736, "step": 8185 }, { "epoch": 0.1460064923483038, "grad_norm": 0.32890549302101135, "learning_rate": 4.9678161595286375e-05, "loss": 0.2146, "step": 8186 }, { "epoch": 0.14602432847001748, "grad_norm": 0.3681790828704834, "learning_rate": 4.967791259700082e-05, "loss": 0.1741, "step": 8187 }, { "epoch": 0.14604216459173117, "grad_norm": 0.32718828320503235, "learning_rate": 4.9677663503055164e-05, "loss": 0.2018, "step": 8188 }, { "epoch": 0.14606000071344488, "grad_norm": 0.3910665810108185, "learning_rate": 4.967741431345036e-05, "loss": 0.2411, "step": 8189 }, { "epoch": 0.14607783683515857, "grad_norm": 0.3346827030181885, "learning_rate": 4.967716502818737e-05, "loss": 0.2197, "step": 8190 }, { "epoch": 0.14609567295687226, "grad_norm": 0.24446497857570648, "learning_rate": 4.967691564726717e-05, "loss": 0.1766, "step": 8191 }, { "epoch": 0.14611350907858595, "grad_norm": 0.2619584798812866, "learning_rate": 4.9676666170690724e-05, "loss": 0.2152, "step": 8192 }, { "epoch": 0.14613134520029963, "grad_norm": 0.2953864634037018, "learning_rate": 4.967641659845899e-05, "loss": 0.2223, "step": 8193 }, { "epoch": 0.14614918132201335, "grad_norm": 0.28771674633026123, "learning_rate": 4.9676166930572945e-05, "loss": 0.2306, "step": 8194 }, { "epoch": 0.14616701744372704, "grad_norm": 0.32447558641433716, "learning_rate": 4.967591716703355e-05, "loss": 0.216, "step": 8195 }, { "epoch": 0.14618485356544073, "grad_norm": 0.3141137659549713, "learning_rate": 4.9675667307841784e-05, "loss": 0.1908, "step": 8196 }, { "epoch": 0.1462026896871544, "grad_norm": 0.20078648626804352, "learning_rate": 4.9675417352998604e-05, "loss": 0.1952, "step": 8197 }, { "epoch": 0.14622052580886813, "grad_norm": 0.28396427631378174, "learning_rate": 4.967516730250499e-05, "loss": 0.2495, "step": 8198 }, { "epoch": 0.14623836193058182, "grad_norm": 0.41971689462661743, "learning_rate": 4.96749171563619e-05, "loss": 0.2914, "step": 8199 }, { "epoch": 0.1462561980522955, "grad_norm": 0.19014528393745422, "learning_rate": 4.967466691457031e-05, "loss": 0.193, "step": 8200 }, { "epoch": 0.1462740341740092, "grad_norm": 0.2847881019115448, "learning_rate": 4.967441657713118e-05, "loss": 0.2313, "step": 8201 }, { "epoch": 0.1462918702957229, "grad_norm": 0.32855483889579773, "learning_rate": 4.9674166144045495e-05, "loss": 0.2555, "step": 8202 }, { "epoch": 0.1463097064174366, "grad_norm": 0.2652437686920166, "learning_rate": 4.967391561531422e-05, "loss": 0.2204, "step": 8203 }, { "epoch": 0.14632754253915028, "grad_norm": 0.2823733389377594, "learning_rate": 4.967366499093834e-05, "loss": 0.1949, "step": 8204 }, { "epoch": 0.14634537866086397, "grad_norm": 0.28654277324676514, "learning_rate": 4.9673414270918795e-05, "loss": 0.2389, "step": 8205 }, { "epoch": 0.1463632147825777, "grad_norm": 0.38240379095077515, "learning_rate": 4.9673163455256576e-05, "loss": 0.259, "step": 8206 }, { "epoch": 0.14638105090429138, "grad_norm": 0.2780166268348694, "learning_rate": 4.967291254395266e-05, "loss": 0.1985, "step": 8207 }, { "epoch": 0.14639888702600506, "grad_norm": 0.26061293482780457, "learning_rate": 4.9672661537008016e-05, "loss": 0.2017, "step": 8208 }, { "epoch": 0.14641672314771875, "grad_norm": 0.35266759991645813, "learning_rate": 4.9672410434423605e-05, "loss": 0.214, "step": 8209 }, { "epoch": 0.14643455926943247, "grad_norm": 0.28910812735557556, "learning_rate": 4.967215923620041e-05, "loss": 0.2458, "step": 8210 }, { "epoch": 0.14645239539114616, "grad_norm": 0.24122272431850433, "learning_rate": 4.9671907942339414e-05, "loss": 0.1887, "step": 8211 }, { "epoch": 0.14647023151285984, "grad_norm": 0.2747018039226532, "learning_rate": 4.967165655284157e-05, "loss": 0.2361, "step": 8212 }, { "epoch": 0.14648806763457353, "grad_norm": 0.29957759380340576, "learning_rate": 4.967140506770787e-05, "loss": 0.2312, "step": 8213 }, { "epoch": 0.14650590375628722, "grad_norm": 0.3302885890007019, "learning_rate": 4.967115348693928e-05, "loss": 0.2114, "step": 8214 }, { "epoch": 0.14652373987800094, "grad_norm": 0.6019874215126038, "learning_rate": 4.9670901810536786e-05, "loss": 0.1988, "step": 8215 }, { "epoch": 0.14654157599971462, "grad_norm": 0.22647647559642792, "learning_rate": 4.967065003850134e-05, "loss": 0.2315, "step": 8216 }, { "epoch": 0.1465594121214283, "grad_norm": 0.37151867151260376, "learning_rate": 4.967039817083395e-05, "loss": 0.207, "step": 8217 }, { "epoch": 0.146577248243142, "grad_norm": 0.4023069739341736, "learning_rate": 4.9670146207535564e-05, "loss": 0.1972, "step": 8218 }, { "epoch": 0.14659508436485572, "grad_norm": 0.2935321629047394, "learning_rate": 4.9669894148607174e-05, "loss": 0.2346, "step": 8219 }, { "epoch": 0.1466129204865694, "grad_norm": 0.3261707127094269, "learning_rate": 4.9669641994049754e-05, "loss": 0.2257, "step": 8220 }, { "epoch": 0.1466307566082831, "grad_norm": 0.26666054129600525, "learning_rate": 4.9669389743864286e-05, "loss": 0.195, "step": 8221 }, { "epoch": 0.14664859272999678, "grad_norm": 0.2738647758960724, "learning_rate": 4.966913739805173e-05, "loss": 0.2008, "step": 8222 }, { "epoch": 0.1466664288517105, "grad_norm": 0.2955527603626251, "learning_rate": 4.9668884956613096e-05, "loss": 0.2421, "step": 8223 }, { "epoch": 0.14668426497342418, "grad_norm": 0.24047254025936127, "learning_rate": 4.966863241954933e-05, "loss": 0.2012, "step": 8224 }, { "epoch": 0.14670210109513787, "grad_norm": 0.23851563036441803, "learning_rate": 4.966837978686143e-05, "loss": 0.2016, "step": 8225 }, { "epoch": 0.14671993721685156, "grad_norm": 0.28972363471984863, "learning_rate": 4.966812705855036e-05, "loss": 0.2711, "step": 8226 }, { "epoch": 0.14673777333856527, "grad_norm": 0.23951609432697296, "learning_rate": 4.9667874234617116e-05, "loss": 0.1954, "step": 8227 }, { "epoch": 0.14675560946027896, "grad_norm": 0.24366194009780884, "learning_rate": 4.9667621315062676e-05, "loss": 0.2251, "step": 8228 }, { "epoch": 0.14677344558199265, "grad_norm": 0.4206940233707428, "learning_rate": 4.9667368299888014e-05, "loss": 0.2047, "step": 8229 }, { "epoch": 0.14679128170370634, "grad_norm": 0.3422740399837494, "learning_rate": 4.966711518909411e-05, "loss": 0.2524, "step": 8230 }, { "epoch": 0.14680911782542003, "grad_norm": 0.41899481415748596, "learning_rate": 4.9666861982681944e-05, "loss": 0.2174, "step": 8231 }, { "epoch": 0.14682695394713374, "grad_norm": 0.31520822644233704, "learning_rate": 4.966660868065251e-05, "loss": 0.2663, "step": 8232 }, { "epoch": 0.14684479006884743, "grad_norm": 0.29547446966171265, "learning_rate": 4.966635528300677e-05, "loss": 0.2506, "step": 8233 }, { "epoch": 0.14686262619056112, "grad_norm": 0.2587685286998749, "learning_rate": 4.9666101789745735e-05, "loss": 0.1836, "step": 8234 }, { "epoch": 0.1468804623122748, "grad_norm": 0.257411926984787, "learning_rate": 4.966584820087036e-05, "loss": 0.2155, "step": 8235 }, { "epoch": 0.14689829843398852, "grad_norm": 0.299124151468277, "learning_rate": 4.9665594516381633e-05, "loss": 0.1628, "step": 8236 }, { "epoch": 0.1469161345557022, "grad_norm": 0.27042296528816223, "learning_rate": 4.966534073628054e-05, "loss": 0.2252, "step": 8237 }, { "epoch": 0.1469339706774159, "grad_norm": 0.27317970991134644, "learning_rate": 4.9665086860568075e-05, "loss": 0.2202, "step": 8238 }, { "epoch": 0.1469518067991296, "grad_norm": 0.26989591121673584, "learning_rate": 4.9664832889245214e-05, "loss": 0.1972, "step": 8239 }, { "epoch": 0.1469696429208433, "grad_norm": 0.24562902748584747, "learning_rate": 4.9664578822312947e-05, "loss": 0.2044, "step": 8240 }, { "epoch": 0.146987479042557, "grad_norm": 0.3071196973323822, "learning_rate": 4.966432465977224e-05, "loss": 0.2394, "step": 8241 }, { "epoch": 0.14700531516427068, "grad_norm": 0.28153204917907715, "learning_rate": 4.966407040162411e-05, "loss": 0.157, "step": 8242 }, { "epoch": 0.14702315128598437, "grad_norm": 0.3015204966068268, "learning_rate": 4.9663816047869505e-05, "loss": 0.1976, "step": 8243 }, { "epoch": 0.14704098740769808, "grad_norm": 0.35817933082580566, "learning_rate": 4.9663561598509445e-05, "loss": 0.2379, "step": 8244 }, { "epoch": 0.14705882352941177, "grad_norm": 0.4818767309188843, "learning_rate": 4.966330705354489e-05, "loss": 0.2459, "step": 8245 }, { "epoch": 0.14707665965112546, "grad_norm": 0.29964667558670044, "learning_rate": 4.966305241297685e-05, "loss": 0.2077, "step": 8246 }, { "epoch": 0.14709449577283915, "grad_norm": 0.2904871106147766, "learning_rate": 4.9662797676806294e-05, "loss": 0.1811, "step": 8247 }, { "epoch": 0.14711233189455286, "grad_norm": 0.40816056728363037, "learning_rate": 4.9662542845034216e-05, "loss": 0.1932, "step": 8248 }, { "epoch": 0.14713016801626655, "grad_norm": 0.2841852307319641, "learning_rate": 4.96622879176616e-05, "loss": 0.1828, "step": 8249 }, { "epoch": 0.14714800413798024, "grad_norm": 0.26207995414733887, "learning_rate": 4.966203289468945e-05, "loss": 0.2171, "step": 8250 }, { "epoch": 0.14716584025969393, "grad_norm": 0.29089948534965515, "learning_rate": 4.9661777776118736e-05, "loss": 0.2864, "step": 8251 }, { "epoch": 0.1471836763814076, "grad_norm": 0.265689492225647, "learning_rate": 4.966152256195045e-05, "loss": 0.2163, "step": 8252 }, { "epoch": 0.14720151250312133, "grad_norm": 0.3602275550365448, "learning_rate": 4.966126725218558e-05, "loss": 0.2683, "step": 8253 }, { "epoch": 0.14721934862483502, "grad_norm": 0.24762341380119324, "learning_rate": 4.9661011846825134e-05, "loss": 0.1996, "step": 8254 }, { "epoch": 0.1472371847465487, "grad_norm": 0.23596131801605225, "learning_rate": 4.966075634587008e-05, "loss": 0.1925, "step": 8255 }, { "epoch": 0.1472550208682624, "grad_norm": 0.2957203984260559, "learning_rate": 4.966050074932141e-05, "loss": 0.2074, "step": 8256 }, { "epoch": 0.1472728569899761, "grad_norm": 0.2252059429883957, "learning_rate": 4.966024505718014e-05, "loss": 0.1966, "step": 8257 }, { "epoch": 0.1472906931116898, "grad_norm": 0.2354527711868286, "learning_rate": 4.965998926944723e-05, "loss": 0.1968, "step": 8258 }, { "epoch": 0.14730852923340348, "grad_norm": 0.26098114252090454, "learning_rate": 4.9659733386123694e-05, "loss": 0.2569, "step": 8259 }, { "epoch": 0.14732636535511717, "grad_norm": 0.2737635672092438, "learning_rate": 4.9659477407210505e-05, "loss": 0.2287, "step": 8260 }, { "epoch": 0.1473442014768309, "grad_norm": 0.3221443295478821, "learning_rate": 4.9659221332708665e-05, "loss": 0.1841, "step": 8261 }, { "epoch": 0.14736203759854458, "grad_norm": 0.3214664161205292, "learning_rate": 4.965896516261917e-05, "loss": 0.1934, "step": 8262 }, { "epoch": 0.14737987372025826, "grad_norm": 0.38391464948654175, "learning_rate": 4.965870889694301e-05, "loss": 0.244, "step": 8263 }, { "epoch": 0.14739770984197195, "grad_norm": 0.2779027819633484, "learning_rate": 4.9658452535681174e-05, "loss": 0.213, "step": 8264 }, { "epoch": 0.14741554596368567, "grad_norm": 0.26770997047424316, "learning_rate": 4.965819607883466e-05, "loss": 0.1298, "step": 8265 }, { "epoch": 0.14743338208539936, "grad_norm": 0.37342795729637146, "learning_rate": 4.965793952640446e-05, "loss": 0.2392, "step": 8266 }, { "epoch": 0.14745121820711304, "grad_norm": 0.32738959789276123, "learning_rate": 4.965768287839157e-05, "loss": 0.2381, "step": 8267 }, { "epoch": 0.14746905432882673, "grad_norm": 0.241044819355011, "learning_rate": 4.965742613479699e-05, "loss": 0.2188, "step": 8268 }, { "epoch": 0.14748689045054045, "grad_norm": 0.25374168157577515, "learning_rate": 4.96571692956217e-05, "loss": 0.2453, "step": 8269 }, { "epoch": 0.14750472657225414, "grad_norm": 0.1898292899131775, "learning_rate": 4.9656912360866713e-05, "loss": 0.1853, "step": 8270 }, { "epoch": 0.14752256269396782, "grad_norm": 0.2697450518608093, "learning_rate": 4.965665533053302e-05, "loss": 0.1873, "step": 8271 }, { "epoch": 0.1475403988156815, "grad_norm": 0.32905855774879456, "learning_rate": 4.965639820462161e-05, "loss": 0.2453, "step": 8272 }, { "epoch": 0.1475582349373952, "grad_norm": 0.38826000690460205, "learning_rate": 4.965614098313348e-05, "loss": 0.2626, "step": 8273 }, { "epoch": 0.14757607105910892, "grad_norm": 0.2632378339767456, "learning_rate": 4.965588366606964e-05, "loss": 0.2363, "step": 8274 }, { "epoch": 0.1475939071808226, "grad_norm": 0.27714109420776367, "learning_rate": 4.9655626253431076e-05, "loss": 0.188, "step": 8275 }, { "epoch": 0.1476117433025363, "grad_norm": 0.2937466502189636, "learning_rate": 4.9655368745218785e-05, "loss": 0.2229, "step": 8276 }, { "epoch": 0.14762957942424998, "grad_norm": 0.32056376338005066, "learning_rate": 4.9655111141433775e-05, "loss": 0.2606, "step": 8277 }, { "epoch": 0.1476474155459637, "grad_norm": 0.26838433742523193, "learning_rate": 4.9654853442077035e-05, "loss": 0.2063, "step": 8278 }, { "epoch": 0.14766525166767738, "grad_norm": 0.35123705863952637, "learning_rate": 4.9654595647149574e-05, "loss": 0.2585, "step": 8279 }, { "epoch": 0.14768308778939107, "grad_norm": 0.34772756695747375, "learning_rate": 4.9654337756652375e-05, "loss": 0.2656, "step": 8280 }, { "epoch": 0.14770092391110476, "grad_norm": 0.3487825393676758, "learning_rate": 4.9654079770586454e-05, "loss": 0.2803, "step": 8281 }, { "epoch": 0.14771876003281847, "grad_norm": 0.23886552453041077, "learning_rate": 4.965382168895281e-05, "loss": 0.2179, "step": 8282 }, { "epoch": 0.14773659615453216, "grad_norm": 0.35290834307670593, "learning_rate": 4.965356351175242e-05, "loss": 0.2081, "step": 8283 }, { "epoch": 0.14775443227624585, "grad_norm": 0.2803749144077301, "learning_rate": 4.9653305238986316e-05, "loss": 0.2845, "step": 8284 }, { "epoch": 0.14777226839795954, "grad_norm": 0.4249259829521179, "learning_rate": 4.965304687065548e-05, "loss": 0.2111, "step": 8285 }, { "epoch": 0.14779010451967325, "grad_norm": 0.2126653790473938, "learning_rate": 4.965278840676093e-05, "loss": 0.2064, "step": 8286 }, { "epoch": 0.14780794064138694, "grad_norm": 0.28971126675605774, "learning_rate": 4.9652529847303644e-05, "loss": 0.1976, "step": 8287 }, { "epoch": 0.14782577676310063, "grad_norm": 0.3742590844631195, "learning_rate": 4.965227119228465e-05, "loss": 0.2615, "step": 8288 }, { "epoch": 0.14784361288481432, "grad_norm": 0.24936038255691528, "learning_rate": 4.965201244170493e-05, "loss": 0.2163, "step": 8289 }, { "epoch": 0.14786144900652803, "grad_norm": 0.2307882308959961, "learning_rate": 4.96517535955655e-05, "loss": 0.1838, "step": 8290 }, { "epoch": 0.14787928512824172, "grad_norm": 0.2994025945663452, "learning_rate": 4.9651494653867356e-05, "loss": 0.2127, "step": 8291 }, { "epoch": 0.1478971212499554, "grad_norm": 0.21554845571517944, "learning_rate": 4.9651235616611504e-05, "loss": 0.1681, "step": 8292 }, { "epoch": 0.1479149573716691, "grad_norm": 0.25892943143844604, "learning_rate": 4.9650976483798945e-05, "loss": 0.2094, "step": 8293 }, { "epoch": 0.14793279349338279, "grad_norm": 0.2690183222293854, "learning_rate": 4.965071725543069e-05, "loss": 0.23, "step": 8294 }, { "epoch": 0.1479506296150965, "grad_norm": 0.24941860139369965, "learning_rate": 4.9650457931507744e-05, "loss": 0.2253, "step": 8295 }, { "epoch": 0.1479684657368102, "grad_norm": 0.3571094274520874, "learning_rate": 4.965019851203111e-05, "loss": 0.2332, "step": 8296 }, { "epoch": 0.14798630185852388, "grad_norm": 0.29016420245170593, "learning_rate": 4.964993899700179e-05, "loss": 0.2428, "step": 8297 }, { "epoch": 0.14800413798023757, "grad_norm": 0.3337816298007965, "learning_rate": 4.964967938642079e-05, "loss": 0.2693, "step": 8298 }, { "epoch": 0.14802197410195128, "grad_norm": 0.2792920470237732, "learning_rate": 4.9649419680289116e-05, "loss": 0.2064, "step": 8299 }, { "epoch": 0.14803981022366497, "grad_norm": 0.27864938974380493, "learning_rate": 4.964915987860779e-05, "loss": 0.2242, "step": 8300 }, { "epoch": 0.14805764634537866, "grad_norm": 0.25790777802467346, "learning_rate": 4.9648899981377796e-05, "loss": 0.2249, "step": 8301 }, { "epoch": 0.14807548246709235, "grad_norm": 0.35313209891319275, "learning_rate": 4.964863998860016e-05, "loss": 0.2099, "step": 8302 }, { "epoch": 0.14809331858880606, "grad_norm": 0.2688464820384979, "learning_rate": 4.964837990027587e-05, "loss": 0.2186, "step": 8303 }, { "epoch": 0.14811115471051975, "grad_norm": 0.3255918025970459, "learning_rate": 4.9648119716405954e-05, "loss": 0.2578, "step": 8304 }, { "epoch": 0.14812899083223344, "grad_norm": 0.27829429507255554, "learning_rate": 4.964785943699141e-05, "loss": 0.2015, "step": 8305 }, { "epoch": 0.14814682695394712, "grad_norm": 0.29929518699645996, "learning_rate": 4.964759906203326e-05, "loss": 0.2974, "step": 8306 }, { "epoch": 0.14816466307566084, "grad_norm": 0.3907299041748047, "learning_rate": 4.964733859153249e-05, "loss": 0.2528, "step": 8307 }, { "epoch": 0.14818249919737453, "grad_norm": 0.3519734740257263, "learning_rate": 4.964707802549013e-05, "loss": 0.1722, "step": 8308 }, { "epoch": 0.14820033531908822, "grad_norm": 0.2800590395927429, "learning_rate": 4.964681736390718e-05, "loss": 0.227, "step": 8309 }, { "epoch": 0.1482181714408019, "grad_norm": 0.2709098160266876, "learning_rate": 4.9646556606784655e-05, "loss": 0.2277, "step": 8310 }, { "epoch": 0.14823600756251562, "grad_norm": 0.25083452463150024, "learning_rate": 4.964629575412356e-05, "loss": 0.2256, "step": 8311 }, { "epoch": 0.1482538436842293, "grad_norm": 0.23127196729183197, "learning_rate": 4.964603480592491e-05, "loss": 0.2213, "step": 8312 }, { "epoch": 0.148271679805943, "grad_norm": 0.2816026508808136, "learning_rate": 4.964577376218972e-05, "loss": 0.2505, "step": 8313 }, { "epoch": 0.14828951592765668, "grad_norm": 0.2292989045381546, "learning_rate": 4.9645512622919e-05, "loss": 0.1753, "step": 8314 }, { "epoch": 0.14830735204937037, "grad_norm": 0.3542279899120331, "learning_rate": 4.964525138811376e-05, "loss": 0.2513, "step": 8315 }, { "epoch": 0.1483251881710841, "grad_norm": 0.2088947743177414, "learning_rate": 4.9644990057775015e-05, "loss": 0.2277, "step": 8316 }, { "epoch": 0.14834302429279778, "grad_norm": 0.30650970339775085, "learning_rate": 4.964472863190377e-05, "loss": 0.2214, "step": 8317 }, { "epoch": 0.14836086041451146, "grad_norm": 0.27056947350502014, "learning_rate": 4.964446711050105e-05, "loss": 0.2224, "step": 8318 }, { "epoch": 0.14837869653622515, "grad_norm": 0.3205641806125641, "learning_rate": 4.964420549356786e-05, "loss": 0.1694, "step": 8319 }, { "epoch": 0.14839653265793887, "grad_norm": 0.29826006293296814, "learning_rate": 4.964394378110522e-05, "loss": 0.2263, "step": 8320 }, { "epoch": 0.14841436877965256, "grad_norm": 0.36686989665031433, "learning_rate": 4.964368197311414e-05, "loss": 0.2471, "step": 8321 }, { "epoch": 0.14843220490136624, "grad_norm": 0.24641509354114532, "learning_rate": 4.964342006959565e-05, "loss": 0.1845, "step": 8322 }, { "epoch": 0.14845004102307993, "grad_norm": 0.31873640418052673, "learning_rate": 4.9643158070550734e-05, "loss": 0.277, "step": 8323 }, { "epoch": 0.14846787714479365, "grad_norm": 0.27908164262771606, "learning_rate": 4.964289597598044e-05, "loss": 0.2519, "step": 8324 }, { "epoch": 0.14848571326650734, "grad_norm": 0.3104163706302643, "learning_rate": 4.964263378588576e-05, "loss": 0.2318, "step": 8325 }, { "epoch": 0.14850354938822102, "grad_norm": 0.2784401476383209, "learning_rate": 4.964237150026772e-05, "loss": 0.2207, "step": 8326 }, { "epoch": 0.1485213855099347, "grad_norm": 0.4940575361251831, "learning_rate": 4.9642109119127345e-05, "loss": 0.2583, "step": 8327 }, { "epoch": 0.14853922163164843, "grad_norm": 0.20831309258937836, "learning_rate": 4.9641846642465636e-05, "loss": 0.1777, "step": 8328 }, { "epoch": 0.14855705775336212, "grad_norm": 0.20303252339363098, "learning_rate": 4.964158407028362e-05, "loss": 0.2176, "step": 8329 }, { "epoch": 0.1485748938750758, "grad_norm": 0.33788809180259705, "learning_rate": 4.964132140258231e-05, "loss": 0.2748, "step": 8330 }, { "epoch": 0.1485927299967895, "grad_norm": 0.26124975085258484, "learning_rate": 4.9641058639362735e-05, "loss": 0.2575, "step": 8331 }, { "epoch": 0.14861056611850318, "grad_norm": 0.2390952706336975, "learning_rate": 4.9640795780625903e-05, "loss": 0.215, "step": 8332 }, { "epoch": 0.1486284022402169, "grad_norm": 0.28409543633461, "learning_rate": 4.964053282637284e-05, "loss": 0.2635, "step": 8333 }, { "epoch": 0.14864623836193058, "grad_norm": 0.34645968675613403, "learning_rate": 4.964026977660455e-05, "loss": 0.3206, "step": 8334 }, { "epoch": 0.14866407448364427, "grad_norm": 0.2699476480484009, "learning_rate": 4.964000663132208e-05, "loss": 0.2449, "step": 8335 }, { "epoch": 0.14868191060535796, "grad_norm": 0.2880427837371826, "learning_rate": 4.963974339052641e-05, "loss": 0.2121, "step": 8336 }, { "epoch": 0.14869974672707167, "grad_norm": 0.31315329670906067, "learning_rate": 4.963948005421861e-05, "loss": 0.2072, "step": 8337 }, { "epoch": 0.14871758284878536, "grad_norm": 0.3538931906223297, "learning_rate": 4.963921662239966e-05, "loss": 0.2264, "step": 8338 }, { "epoch": 0.14873541897049905, "grad_norm": 0.2767057418823242, "learning_rate": 4.963895309507061e-05, "loss": 0.2501, "step": 8339 }, { "epoch": 0.14875325509221274, "grad_norm": 0.2637615203857422, "learning_rate": 4.9638689472232455e-05, "loss": 0.2457, "step": 8340 }, { "epoch": 0.14877109121392645, "grad_norm": 0.2188502848148346, "learning_rate": 4.963842575388623e-05, "loss": 0.1833, "step": 8341 }, { "epoch": 0.14878892733564014, "grad_norm": 0.2582019567489624, "learning_rate": 4.963816194003296e-05, "loss": 0.2158, "step": 8342 }, { "epoch": 0.14880676345735383, "grad_norm": 0.31333044171333313, "learning_rate": 4.963789803067366e-05, "loss": 0.2446, "step": 8343 }, { "epoch": 0.14882459957906752, "grad_norm": 0.25765636563301086, "learning_rate": 4.9637634025809365e-05, "loss": 0.2173, "step": 8344 }, { "epoch": 0.14884243570078123, "grad_norm": 0.2975947856903076, "learning_rate": 4.963736992544109e-05, "loss": 0.1715, "step": 8345 }, { "epoch": 0.14886027182249492, "grad_norm": 0.267433762550354, "learning_rate": 4.9637105729569854e-05, "loss": 0.2244, "step": 8346 }, { "epoch": 0.1488781079442086, "grad_norm": 0.23204271495342255, "learning_rate": 4.963684143819669e-05, "loss": 0.2341, "step": 8347 }, { "epoch": 0.1488959440659223, "grad_norm": 0.35042837262153625, "learning_rate": 4.9636577051322616e-05, "loss": 0.2745, "step": 8348 }, { "epoch": 0.148913780187636, "grad_norm": 0.21348612010478973, "learning_rate": 4.9636312568948674e-05, "loss": 0.1458, "step": 8349 }, { "epoch": 0.1489316163093497, "grad_norm": 0.30840039253234863, "learning_rate": 4.963604799107586e-05, "loss": 0.2059, "step": 8350 }, { "epoch": 0.1489494524310634, "grad_norm": 0.25926488637924194, "learning_rate": 4.963578331770522e-05, "loss": 0.1971, "step": 8351 }, { "epoch": 0.14896728855277708, "grad_norm": 0.29957008361816406, "learning_rate": 4.9635518548837775e-05, "loss": 0.2229, "step": 8352 }, { "epoch": 0.14898512467449077, "grad_norm": 0.29102393984794617, "learning_rate": 4.963525368447455e-05, "loss": 0.2005, "step": 8353 }, { "epoch": 0.14900296079620448, "grad_norm": 0.29465726017951965, "learning_rate": 4.963498872461658e-05, "loss": 0.2216, "step": 8354 }, { "epoch": 0.14902079691791817, "grad_norm": 0.3072602152824402, "learning_rate": 4.963472366926488e-05, "loss": 0.2175, "step": 8355 }, { "epoch": 0.14903863303963186, "grad_norm": 0.2767748534679413, "learning_rate": 4.963445851842048e-05, "loss": 0.2509, "step": 8356 }, { "epoch": 0.14905646916134555, "grad_norm": 0.2600916028022766, "learning_rate": 4.963419327208441e-05, "loss": 0.1951, "step": 8357 }, { "epoch": 0.14907430528305926, "grad_norm": 0.3140299618244171, "learning_rate": 4.963392793025771e-05, "loss": 0.2379, "step": 8358 }, { "epoch": 0.14909214140477295, "grad_norm": 0.21253187954425812, "learning_rate": 4.963366249294138e-05, "loss": 0.2285, "step": 8359 }, { "epoch": 0.14910997752648664, "grad_norm": 0.2381104677915573, "learning_rate": 4.963339696013647e-05, "loss": 0.2235, "step": 8360 }, { "epoch": 0.14912781364820032, "grad_norm": 0.26216456294059753, "learning_rate": 4.963313133184402e-05, "loss": 0.1895, "step": 8361 }, { "epoch": 0.14914564976991404, "grad_norm": 0.2712250351905823, "learning_rate": 4.9632865608065025e-05, "loss": 0.2727, "step": 8362 }, { "epoch": 0.14916348589162773, "grad_norm": 0.22147129476070404, "learning_rate": 4.963259978880055e-05, "loss": 0.1657, "step": 8363 }, { "epoch": 0.14918132201334142, "grad_norm": 0.39221853017807007, "learning_rate": 4.9632333874051604e-05, "loss": 0.2286, "step": 8364 }, { "epoch": 0.1491991581350551, "grad_norm": 0.2558264434337616, "learning_rate": 4.963206786381922e-05, "loss": 0.2123, "step": 8365 }, { "epoch": 0.14921699425676882, "grad_norm": 0.3101685643196106, "learning_rate": 4.963180175810444e-05, "loss": 0.1932, "step": 8366 }, { "epoch": 0.1492348303784825, "grad_norm": 0.3007619380950928, "learning_rate": 4.963153555690829e-05, "loss": 0.2591, "step": 8367 }, { "epoch": 0.1492526665001962, "grad_norm": 0.2865447998046875, "learning_rate": 4.963126926023179e-05, "loss": 0.2352, "step": 8368 }, { "epoch": 0.14927050262190988, "grad_norm": 0.24834208190441132, "learning_rate": 4.9631002868075995e-05, "loss": 0.2512, "step": 8369 }, { "epoch": 0.1492883387436236, "grad_norm": 0.27149853110313416, "learning_rate": 4.9630736380441925e-05, "loss": 0.237, "step": 8370 }, { "epoch": 0.1493061748653373, "grad_norm": 0.26262322068214417, "learning_rate": 4.9630469797330606e-05, "loss": 0.2102, "step": 8371 }, { "epoch": 0.14932401098705098, "grad_norm": 0.30290353298187256, "learning_rate": 4.963020311874308e-05, "loss": 0.2413, "step": 8372 }, { "epoch": 0.14934184710876466, "grad_norm": 0.3651910722255707, "learning_rate": 4.962993634468038e-05, "loss": 0.2062, "step": 8373 }, { "epoch": 0.14935968323047835, "grad_norm": 0.28044047951698303, "learning_rate": 4.962966947514355e-05, "loss": 0.2423, "step": 8374 }, { "epoch": 0.14937751935219207, "grad_norm": 0.2301790416240692, "learning_rate": 4.9629402510133604e-05, "loss": 0.2353, "step": 8375 }, { "epoch": 0.14939535547390576, "grad_norm": 0.4232882857322693, "learning_rate": 4.9629135449651585e-05, "loss": 0.2332, "step": 8376 }, { "epoch": 0.14941319159561944, "grad_norm": 0.23570029437541962, "learning_rate": 4.962886829369854e-05, "loss": 0.1951, "step": 8377 }, { "epoch": 0.14943102771733313, "grad_norm": 0.3388097882270813, "learning_rate": 4.962860104227549e-05, "loss": 0.2622, "step": 8378 }, { "epoch": 0.14944886383904685, "grad_norm": 0.29123201966285706, "learning_rate": 4.962833369538348e-05, "loss": 0.225, "step": 8379 }, { "epoch": 0.14946669996076054, "grad_norm": 0.2710510790348053, "learning_rate": 4.962806625302353e-05, "loss": 0.2378, "step": 8380 }, { "epoch": 0.14948453608247422, "grad_norm": 0.2576589584350586, "learning_rate": 4.96277987151967e-05, "loss": 0.2127, "step": 8381 }, { "epoch": 0.1495023722041879, "grad_norm": 0.2736780643463135, "learning_rate": 4.962753108190401e-05, "loss": 0.2326, "step": 8382 }, { "epoch": 0.14952020832590163, "grad_norm": 0.2831859588623047, "learning_rate": 4.962726335314651e-05, "loss": 0.2421, "step": 8383 }, { "epoch": 0.14953804444761531, "grad_norm": 0.27426156401634216, "learning_rate": 4.962699552892522e-05, "loss": 0.2091, "step": 8384 }, { "epoch": 0.149555880569329, "grad_norm": 0.24105580151081085, "learning_rate": 4.962672760924121e-05, "loss": 0.2285, "step": 8385 }, { "epoch": 0.1495737166910427, "grad_norm": 0.33097365498542786, "learning_rate": 4.962645959409548e-05, "loss": 0.1814, "step": 8386 }, { "epoch": 0.1495915528127564, "grad_norm": 0.2840391993522644, "learning_rate": 4.9626191483489094e-05, "loss": 0.1809, "step": 8387 }, { "epoch": 0.1496093889344701, "grad_norm": 0.25599801540374756, "learning_rate": 4.962592327742308e-05, "loss": 0.2323, "step": 8388 }, { "epoch": 0.14962722505618378, "grad_norm": 0.29342347383499146, "learning_rate": 4.962565497589848e-05, "loss": 0.2408, "step": 8389 }, { "epoch": 0.14964506117789747, "grad_norm": 0.25567734241485596, "learning_rate": 4.9625386578916346e-05, "loss": 0.263, "step": 8390 }, { "epoch": 0.1496628972996112, "grad_norm": 0.23582299053668976, "learning_rate": 4.96251180864777e-05, "loss": 0.1918, "step": 8391 }, { "epoch": 0.14968073342132487, "grad_norm": 0.31972259283065796, "learning_rate": 4.962484949858359e-05, "loss": 0.1738, "step": 8392 }, { "epoch": 0.14969856954303856, "grad_norm": 0.21748530864715576, "learning_rate": 4.9624580815235064e-05, "loss": 0.1819, "step": 8393 }, { "epoch": 0.14971640566475225, "grad_norm": 0.26137396693229675, "learning_rate": 4.9624312036433155e-05, "loss": 0.1671, "step": 8394 }, { "epoch": 0.14973424178646594, "grad_norm": 0.3161908686161041, "learning_rate": 4.9624043162178914e-05, "loss": 0.1966, "step": 8395 }, { "epoch": 0.14975207790817965, "grad_norm": 0.3270423710346222, "learning_rate": 4.962377419247337e-05, "loss": 0.213, "step": 8396 }, { "epoch": 0.14976991402989334, "grad_norm": 0.25357145071029663, "learning_rate": 4.9623505127317574e-05, "loss": 0.2075, "step": 8397 }, { "epoch": 0.14978775015160703, "grad_norm": 0.30495455861091614, "learning_rate": 4.962323596671257e-05, "loss": 0.2164, "step": 8398 }, { "epoch": 0.14980558627332072, "grad_norm": 0.32474029064178467, "learning_rate": 4.962296671065939e-05, "loss": 0.2119, "step": 8399 }, { "epoch": 0.14982342239503443, "grad_norm": 0.31686919927597046, "learning_rate": 4.962269735915909e-05, "loss": 0.227, "step": 8400 }, { "epoch": 0.14984125851674812, "grad_norm": 0.27647504210472107, "learning_rate": 4.962242791221271e-05, "loss": 0.2417, "step": 8401 }, { "epoch": 0.1498590946384618, "grad_norm": 0.2595807611942291, "learning_rate": 4.9622158369821306e-05, "loss": 0.1925, "step": 8402 }, { "epoch": 0.1498769307601755, "grad_norm": 0.21945913136005402, "learning_rate": 4.96218887319859e-05, "loss": 0.1882, "step": 8403 }, { "epoch": 0.1498947668818892, "grad_norm": 0.45307475328445435, "learning_rate": 4.9621618998707554e-05, "loss": 0.21, "step": 8404 }, { "epoch": 0.1499126030036029, "grad_norm": 0.280192106962204, "learning_rate": 4.962134916998731e-05, "loss": 0.2048, "step": 8405 }, { "epoch": 0.1499304391253166, "grad_norm": 0.306252121925354, "learning_rate": 4.962107924582621e-05, "loss": 0.2351, "step": 8406 }, { "epoch": 0.14994827524703028, "grad_norm": 0.31264379620552063, "learning_rate": 4.96208092262253e-05, "loss": 0.2696, "step": 8407 }, { "epoch": 0.149966111368744, "grad_norm": 0.24853278696537018, "learning_rate": 4.962053911118563e-05, "loss": 0.2416, "step": 8408 }, { "epoch": 0.14998394749045768, "grad_norm": 0.24951770901679993, "learning_rate": 4.962026890070825e-05, "loss": 0.1975, "step": 8409 }, { "epoch": 0.15000178361217137, "grad_norm": 0.3052162230014801, "learning_rate": 4.96199985947942e-05, "loss": 0.1841, "step": 8410 }, { "epoch": 0.15001961973388506, "grad_norm": 0.26822376251220703, "learning_rate": 4.9619728193444536e-05, "loss": 0.2704, "step": 8411 }, { "epoch": 0.15003745585559874, "grad_norm": 0.3441866636276245, "learning_rate": 4.961945769666031e-05, "loss": 0.2713, "step": 8412 }, { "epoch": 0.15005529197731246, "grad_norm": 0.2854105830192566, "learning_rate": 4.961918710444255e-05, "loss": 0.3004, "step": 8413 }, { "epoch": 0.15007312809902615, "grad_norm": 0.26902419328689575, "learning_rate": 4.9618916416792324e-05, "loss": 0.2319, "step": 8414 }, { "epoch": 0.15009096422073984, "grad_norm": 0.2367042899131775, "learning_rate": 4.961864563371067e-05, "loss": 0.2256, "step": 8415 }, { "epoch": 0.15010880034245352, "grad_norm": 0.3011890649795532, "learning_rate": 4.9618374755198646e-05, "loss": 0.2273, "step": 8416 }, { "epoch": 0.15012663646416724, "grad_norm": 0.2846713066101074, "learning_rate": 4.9618103781257295e-05, "loss": 0.232, "step": 8417 }, { "epoch": 0.15014447258588093, "grad_norm": 0.30780264735221863, "learning_rate": 4.961783271188768e-05, "loss": 0.2468, "step": 8418 }, { "epoch": 0.15016230870759462, "grad_norm": 0.24429358541965485, "learning_rate": 4.961756154709083e-05, "loss": 0.2227, "step": 8419 }, { "epoch": 0.1501801448293083, "grad_norm": 0.36014044284820557, "learning_rate": 4.961729028686782e-05, "loss": 0.2634, "step": 8420 }, { "epoch": 0.15019798095102202, "grad_norm": 0.3677643835544586, "learning_rate": 4.9617018931219686e-05, "loss": 0.262, "step": 8421 }, { "epoch": 0.1502158170727357, "grad_norm": 0.29291391372680664, "learning_rate": 4.961674748014749e-05, "loss": 0.2256, "step": 8422 }, { "epoch": 0.1502336531944494, "grad_norm": 0.2308768481016159, "learning_rate": 4.9616475933652264e-05, "loss": 0.2112, "step": 8423 }, { "epoch": 0.15025148931616308, "grad_norm": 0.21656841039657593, "learning_rate": 4.9616204291735086e-05, "loss": 0.2052, "step": 8424 }, { "epoch": 0.1502693254378768, "grad_norm": 0.3243447542190552, "learning_rate": 4.961593255439699e-05, "loss": 0.2141, "step": 8425 }, { "epoch": 0.1502871615595905, "grad_norm": 0.34292158484458923, "learning_rate": 4.961566072163905e-05, "loss": 0.2355, "step": 8426 }, { "epoch": 0.15030499768130418, "grad_norm": 0.21832285821437836, "learning_rate": 4.9615388793462294e-05, "loss": 0.2206, "step": 8427 }, { "epoch": 0.15032283380301786, "grad_norm": 0.23595459759235382, "learning_rate": 4.96151167698678e-05, "loss": 0.2027, "step": 8428 }, { "epoch": 0.15034066992473158, "grad_norm": 0.3123375475406647, "learning_rate": 4.9614844650856605e-05, "loss": 0.2355, "step": 8429 }, { "epoch": 0.15035850604644527, "grad_norm": 0.23871618509292603, "learning_rate": 4.9614572436429777e-05, "loss": 0.1664, "step": 8430 }, { "epoch": 0.15037634216815896, "grad_norm": 0.25301218032836914, "learning_rate": 4.961430012658835e-05, "loss": 0.1872, "step": 8431 }, { "epoch": 0.15039417828987264, "grad_norm": 0.3113987445831299, "learning_rate": 4.961402772133341e-05, "loss": 0.2002, "step": 8432 }, { "epoch": 0.15041201441158633, "grad_norm": 0.26478689908981323, "learning_rate": 4.9613755220665994e-05, "loss": 0.2332, "step": 8433 }, { "epoch": 0.15042985053330005, "grad_norm": 0.4453291893005371, "learning_rate": 4.961348262458715e-05, "loss": 0.2066, "step": 8434 }, { "epoch": 0.15044768665501373, "grad_norm": 0.33681079745292664, "learning_rate": 4.961320993309796e-05, "loss": 0.2064, "step": 8435 }, { "epoch": 0.15046552277672742, "grad_norm": 0.28030309081077576, "learning_rate": 4.961293714619946e-05, "loss": 0.2421, "step": 8436 }, { "epoch": 0.1504833588984411, "grad_norm": 0.2608387768268585, "learning_rate": 4.961266426389272e-05, "loss": 0.2318, "step": 8437 }, { "epoch": 0.15050119502015483, "grad_norm": 0.29991066455841064, "learning_rate": 4.9612391286178784e-05, "loss": 0.2629, "step": 8438 }, { "epoch": 0.15051903114186851, "grad_norm": 0.34991273283958435, "learning_rate": 4.9612118213058725e-05, "loss": 0.267, "step": 8439 }, { "epoch": 0.1505368672635822, "grad_norm": 0.2911750376224518, "learning_rate": 4.961184504453359e-05, "loss": 0.2172, "step": 8440 }, { "epoch": 0.1505547033852959, "grad_norm": 0.3051919937133789, "learning_rate": 4.961157178060445e-05, "loss": 0.2735, "step": 8441 }, { "epoch": 0.1505725395070096, "grad_norm": 0.2562267482280731, "learning_rate": 4.9611298421272356e-05, "loss": 0.1978, "step": 8442 }, { "epoch": 0.1505903756287233, "grad_norm": 0.45183566212654114, "learning_rate": 4.961102496653837e-05, "loss": 0.1959, "step": 8443 }, { "epoch": 0.15060821175043698, "grad_norm": 0.2823636531829834, "learning_rate": 4.961075141640355e-05, "loss": 0.2414, "step": 8444 }, { "epoch": 0.15062604787215067, "grad_norm": 0.27796629071235657, "learning_rate": 4.961047777086894e-05, "loss": 0.2348, "step": 8445 }, { "epoch": 0.15064388399386439, "grad_norm": 0.40828168392181396, "learning_rate": 4.9610204029935634e-05, "loss": 0.2373, "step": 8446 }, { "epoch": 0.15066172011557807, "grad_norm": 0.25054875016212463, "learning_rate": 4.9609930193604684e-05, "loss": 0.2241, "step": 8447 }, { "epoch": 0.15067955623729176, "grad_norm": 0.23433774709701538, "learning_rate": 4.960965626187713e-05, "loss": 0.1971, "step": 8448 }, { "epoch": 0.15069739235900545, "grad_norm": 0.26952916383743286, "learning_rate": 4.9609382234754054e-05, "loss": 0.2388, "step": 8449 }, { "epoch": 0.15071522848071917, "grad_norm": 0.3398025631904602, "learning_rate": 4.9609108112236515e-05, "loss": 0.2012, "step": 8450 }, { "epoch": 0.15073306460243285, "grad_norm": 0.28553298115730286, "learning_rate": 4.960883389432557e-05, "loss": 0.2339, "step": 8451 }, { "epoch": 0.15075090072414654, "grad_norm": 0.32566359639167786, "learning_rate": 4.960855958102228e-05, "loss": 0.2237, "step": 8452 }, { "epoch": 0.15076873684586023, "grad_norm": 0.300100713968277, "learning_rate": 4.960828517232773e-05, "loss": 0.236, "step": 8453 }, { "epoch": 0.15078657296757392, "grad_norm": 0.31294363737106323, "learning_rate": 4.960801066824295e-05, "loss": 0.2633, "step": 8454 }, { "epoch": 0.15080440908928763, "grad_norm": 0.2673596739768982, "learning_rate": 4.9607736068769034e-05, "loss": 0.213, "step": 8455 }, { "epoch": 0.15082224521100132, "grad_norm": 0.2582441568374634, "learning_rate": 4.960746137390703e-05, "loss": 0.2209, "step": 8456 }, { "epoch": 0.150840081332715, "grad_norm": 0.25271373987197876, "learning_rate": 4.9607186583658e-05, "loss": 0.2171, "step": 8457 }, { "epoch": 0.1508579174544287, "grad_norm": 0.22569482028484344, "learning_rate": 4.9606911698023024e-05, "loss": 0.2073, "step": 8458 }, { "epoch": 0.1508757535761424, "grad_norm": 0.26054930686950684, "learning_rate": 4.960663671700315e-05, "loss": 0.1471, "step": 8459 }, { "epoch": 0.1508935896978561, "grad_norm": 0.23638111352920532, "learning_rate": 4.9606361640599464e-05, "loss": 0.2183, "step": 8460 }, { "epoch": 0.1509114258195698, "grad_norm": 0.4223468005657196, "learning_rate": 4.960608646881302e-05, "loss": 0.2378, "step": 8461 }, { "epoch": 0.15092926194128348, "grad_norm": 0.2853187322616577, "learning_rate": 4.960581120164488e-05, "loss": 0.2367, "step": 8462 }, { "epoch": 0.1509470980629972, "grad_norm": 0.2830091416835785, "learning_rate": 4.960553583909612e-05, "loss": 0.2337, "step": 8463 }, { "epoch": 0.15096493418471088, "grad_norm": 0.288740873336792, "learning_rate": 4.960526038116781e-05, "loss": 0.1913, "step": 8464 }, { "epoch": 0.15098277030642457, "grad_norm": 0.35857534408569336, "learning_rate": 4.9604984827861e-05, "loss": 0.2053, "step": 8465 }, { "epoch": 0.15100060642813826, "grad_norm": 0.3122760057449341, "learning_rate": 4.9604709179176777e-05, "loss": 0.2189, "step": 8466 }, { "epoch": 0.15101844254985197, "grad_norm": 0.2901649475097656, "learning_rate": 4.96044334351162e-05, "loss": 0.2098, "step": 8467 }, { "epoch": 0.15103627867156566, "grad_norm": 0.21733441948890686, "learning_rate": 4.9604157595680356e-05, "loss": 0.2342, "step": 8468 }, { "epoch": 0.15105411479327935, "grad_norm": 0.2657352387905121, "learning_rate": 4.960388166087028e-05, "loss": 0.1948, "step": 8469 }, { "epoch": 0.15107195091499304, "grad_norm": 0.243385449051857, "learning_rate": 4.960360563068707e-05, "loss": 0.2416, "step": 8470 }, { "epoch": 0.15108978703670675, "grad_norm": 0.39115190505981445, "learning_rate": 4.960332950513179e-05, "loss": 0.1748, "step": 8471 }, { "epoch": 0.15110762315842044, "grad_norm": 0.2807091772556305, "learning_rate": 4.9603053284205504e-05, "loss": 0.2212, "step": 8472 }, { "epoch": 0.15112545928013413, "grad_norm": 0.22286827862262726, "learning_rate": 4.960277696790928e-05, "loss": 0.2162, "step": 8473 }, { "epoch": 0.15114329540184782, "grad_norm": 0.3109433054924011, "learning_rate": 4.9602500556244204e-05, "loss": 0.249, "step": 8474 }, { "epoch": 0.1511611315235615, "grad_norm": 0.24084334075450897, "learning_rate": 4.960222404921133e-05, "loss": 0.166, "step": 8475 }, { "epoch": 0.15117896764527522, "grad_norm": 0.35053345561027527, "learning_rate": 4.960194744681174e-05, "loss": 0.2813, "step": 8476 }, { "epoch": 0.1511968037669889, "grad_norm": 0.30203041434288025, "learning_rate": 4.960167074904651e-05, "loss": 0.241, "step": 8477 }, { "epoch": 0.1512146398887026, "grad_norm": 0.2878948450088501, "learning_rate": 4.96013939559167e-05, "loss": 0.225, "step": 8478 }, { "epoch": 0.15123247601041628, "grad_norm": 0.26000845432281494, "learning_rate": 4.96011170674234e-05, "loss": 0.2151, "step": 8479 }, { "epoch": 0.15125031213213, "grad_norm": 0.2977293133735657, "learning_rate": 4.9600840083567665e-05, "loss": 0.2014, "step": 8480 }, { "epoch": 0.1512681482538437, "grad_norm": 0.29933255910873413, "learning_rate": 4.960056300435058e-05, "loss": 0.1762, "step": 8481 }, { "epoch": 0.15128598437555738, "grad_norm": 0.2834800183773041, "learning_rate": 4.960028582977321e-05, "loss": 0.2121, "step": 8482 }, { "epoch": 0.15130382049727106, "grad_norm": 0.2429545670747757, "learning_rate": 4.960000855983664e-05, "loss": 0.1709, "step": 8483 }, { "epoch": 0.15132165661898478, "grad_norm": 0.31816548109054565, "learning_rate": 4.9599731194541943e-05, "loss": 0.2205, "step": 8484 }, { "epoch": 0.15133949274069847, "grad_norm": 0.4332368075847626, "learning_rate": 4.9599453733890186e-05, "loss": 0.2089, "step": 8485 }, { "epoch": 0.15135732886241215, "grad_norm": 0.27083703875541687, "learning_rate": 4.9599176177882454e-05, "loss": 0.2379, "step": 8486 }, { "epoch": 0.15137516498412584, "grad_norm": 0.35265836119651794, "learning_rate": 4.959889852651982e-05, "loss": 0.216, "step": 8487 }, { "epoch": 0.15139300110583956, "grad_norm": 0.3029022216796875, "learning_rate": 4.959862077980335e-05, "loss": 0.2287, "step": 8488 }, { "epoch": 0.15141083722755325, "grad_norm": 0.2494308054447174, "learning_rate": 4.959834293773414e-05, "loss": 0.187, "step": 8489 }, { "epoch": 0.15142867334926693, "grad_norm": 0.639824628829956, "learning_rate": 4.959806500031325e-05, "loss": 0.2199, "step": 8490 }, { "epoch": 0.15144650947098062, "grad_norm": 0.300088107585907, "learning_rate": 4.959778696754177e-05, "loss": 0.2404, "step": 8491 }, { "epoch": 0.15146434559269434, "grad_norm": 0.2985757887363434, "learning_rate": 4.959750883942077e-05, "loss": 0.2502, "step": 8492 }, { "epoch": 0.15148218171440803, "grad_norm": 0.46329471468925476, "learning_rate": 4.959723061595133e-05, "loss": 0.2459, "step": 8493 }, { "epoch": 0.15150001783612171, "grad_norm": 0.3448359966278076, "learning_rate": 4.9596952297134525e-05, "loss": 0.2431, "step": 8494 }, { "epoch": 0.1515178539578354, "grad_norm": 0.3522936701774597, "learning_rate": 4.959667388297144e-05, "loss": 0.2975, "step": 8495 }, { "epoch": 0.1515356900795491, "grad_norm": 0.27203306555747986, "learning_rate": 4.9596395373463153e-05, "loss": 0.202, "step": 8496 }, { "epoch": 0.1515535262012628, "grad_norm": 0.3497348129749298, "learning_rate": 4.959611676861074e-05, "loss": 0.2551, "step": 8497 }, { "epoch": 0.1515713623229765, "grad_norm": 0.2883908450603485, "learning_rate": 4.959583806841529e-05, "loss": 0.1995, "step": 8498 }, { "epoch": 0.15158919844469018, "grad_norm": 0.3639731705188751, "learning_rate": 4.959555927287787e-05, "loss": 0.2438, "step": 8499 }, { "epoch": 0.15160703456640387, "grad_norm": 0.30356094241142273, "learning_rate": 4.959528038199956e-05, "loss": 0.2124, "step": 8500 }, { "epoch": 0.15162487068811759, "grad_norm": 0.26965057849884033, "learning_rate": 4.959500139578146e-05, "loss": 0.1731, "step": 8501 }, { "epoch": 0.15164270680983127, "grad_norm": 0.31213638186454773, "learning_rate": 4.959472231422464e-05, "loss": 0.2119, "step": 8502 }, { "epoch": 0.15166054293154496, "grad_norm": 0.30166947841644287, "learning_rate": 4.9594443137330175e-05, "loss": 0.2041, "step": 8503 }, { "epoch": 0.15167837905325865, "grad_norm": 0.2721315622329712, "learning_rate": 4.9594163865099156e-05, "loss": 0.2055, "step": 8504 }, { "epoch": 0.15169621517497237, "grad_norm": 0.45368692278862, "learning_rate": 4.959388449753266e-05, "loss": 0.3265, "step": 8505 }, { "epoch": 0.15171405129668605, "grad_norm": 0.2886175215244293, "learning_rate": 4.959360503463178e-05, "loss": 0.1992, "step": 8506 }, { "epoch": 0.15173188741839974, "grad_norm": 0.31494712829589844, "learning_rate": 4.959332547639759e-05, "loss": 0.2155, "step": 8507 }, { "epoch": 0.15174972354011343, "grad_norm": 0.33003467321395874, "learning_rate": 4.959304582283118e-05, "loss": 0.2715, "step": 8508 }, { "epoch": 0.15176755966182714, "grad_norm": 0.29312780499458313, "learning_rate": 4.959276607393362e-05, "loss": 0.1529, "step": 8509 }, { "epoch": 0.15178539578354083, "grad_norm": 0.2924138903617859, "learning_rate": 4.9592486229706016e-05, "loss": 0.1973, "step": 8510 }, { "epoch": 0.15180323190525452, "grad_norm": 0.3075374662876129, "learning_rate": 4.9592206290149434e-05, "loss": 0.2165, "step": 8511 }, { "epoch": 0.1518210680269682, "grad_norm": 0.27690669894218445, "learning_rate": 4.9591926255264966e-05, "loss": 0.2144, "step": 8512 }, { "epoch": 0.1518389041486819, "grad_norm": 0.3529800772666931, "learning_rate": 4.95916461250537e-05, "loss": 0.2546, "step": 8513 }, { "epoch": 0.1518567402703956, "grad_norm": 0.2664533257484436, "learning_rate": 4.959136589951672e-05, "loss": 0.2501, "step": 8514 }, { "epoch": 0.1518745763921093, "grad_norm": 0.27176034450531006, "learning_rate": 4.959108557865512e-05, "loss": 0.2364, "step": 8515 }, { "epoch": 0.151892412513823, "grad_norm": 0.2882852852344513, "learning_rate": 4.9590805162469966e-05, "loss": 0.218, "step": 8516 }, { "epoch": 0.15191024863553668, "grad_norm": 0.27222636342048645, "learning_rate": 4.959052465096237e-05, "loss": 0.221, "step": 8517 }, { "epoch": 0.1519280847572504, "grad_norm": 0.4094749689102173, "learning_rate": 4.95902440441334e-05, "loss": 0.2974, "step": 8518 }, { "epoch": 0.15194592087896408, "grad_norm": 0.26940155029296875, "learning_rate": 4.958996334198415e-05, "loss": 0.2223, "step": 8519 }, { "epoch": 0.15196375700067777, "grad_norm": 0.2948804497718811, "learning_rate": 4.95896825445157e-05, "loss": 0.2379, "step": 8520 }, { "epoch": 0.15198159312239146, "grad_norm": 0.3114687204360962, "learning_rate": 4.958940165172916e-05, "loss": 0.2183, "step": 8521 }, { "epoch": 0.15199942924410517, "grad_norm": 0.477892130613327, "learning_rate": 4.958912066362561e-05, "loss": 0.2554, "step": 8522 }, { "epoch": 0.15201726536581886, "grad_norm": 0.23657453060150146, "learning_rate": 4.9588839580206126e-05, "loss": 0.2047, "step": 8523 }, { "epoch": 0.15203510148753255, "grad_norm": 0.2781616449356079, "learning_rate": 4.958855840147181e-05, "loss": 0.2234, "step": 8524 }, { "epoch": 0.15205293760924624, "grad_norm": 0.26945939660072327, "learning_rate": 4.958827712742375e-05, "loss": 0.2486, "step": 8525 }, { "epoch": 0.15207077373095995, "grad_norm": 0.2888774275779724, "learning_rate": 4.958799575806303e-05, "loss": 0.2455, "step": 8526 }, { "epoch": 0.15208860985267364, "grad_norm": 0.24447181820869446, "learning_rate": 4.958771429339076e-05, "loss": 0.2261, "step": 8527 }, { "epoch": 0.15210644597438733, "grad_norm": 0.3367098569869995, "learning_rate": 4.9587432733408004e-05, "loss": 0.2687, "step": 8528 }, { "epoch": 0.15212428209610102, "grad_norm": 0.29054486751556396, "learning_rate": 4.958715107811587e-05, "loss": 0.21, "step": 8529 }, { "epoch": 0.15214211821781473, "grad_norm": 0.40333035588264465, "learning_rate": 4.958686932751545e-05, "loss": 0.1971, "step": 8530 }, { "epoch": 0.15215995433952842, "grad_norm": 0.38966500759124756, "learning_rate": 4.9586587481607824e-05, "loss": 0.2542, "step": 8531 }, { "epoch": 0.1521777904612421, "grad_norm": 0.2995615303516388, "learning_rate": 4.95863055403941e-05, "loss": 0.2149, "step": 8532 }, { "epoch": 0.1521956265829558, "grad_norm": 0.23772743344306946, "learning_rate": 4.958602350387537e-05, "loss": 0.1994, "step": 8533 }, { "epoch": 0.15221346270466948, "grad_norm": 0.2952633202075958, "learning_rate": 4.958574137205271e-05, "loss": 0.288, "step": 8534 }, { "epoch": 0.1522312988263832, "grad_norm": 0.22035880386829376, "learning_rate": 4.9585459144927225e-05, "loss": 0.2107, "step": 8535 }, { "epoch": 0.1522491349480969, "grad_norm": 0.44636270403862, "learning_rate": 4.9585176822500015e-05, "loss": 0.2507, "step": 8536 }, { "epoch": 0.15226697106981057, "grad_norm": 0.27889424562454224, "learning_rate": 4.958489440477217e-05, "loss": 0.2453, "step": 8537 }, { "epoch": 0.15228480719152426, "grad_norm": 0.40132936835289, "learning_rate": 4.958461189174477e-05, "loss": 0.2846, "step": 8538 }, { "epoch": 0.15230264331323798, "grad_norm": 0.20691967010498047, "learning_rate": 4.958432928341893e-05, "loss": 0.1841, "step": 8539 }, { "epoch": 0.15232047943495167, "grad_norm": 0.21933163702487946, "learning_rate": 4.958404657979574e-05, "loss": 0.1895, "step": 8540 }, { "epoch": 0.15233831555666535, "grad_norm": 0.25566959381103516, "learning_rate": 4.9583763780876296e-05, "loss": 0.2703, "step": 8541 }, { "epoch": 0.15235615167837904, "grad_norm": 0.25642478466033936, "learning_rate": 4.958348088666169e-05, "loss": 0.214, "step": 8542 }, { "epoch": 0.15237398780009276, "grad_norm": 0.2996155321598053, "learning_rate": 4.958319789715302e-05, "loss": 0.2258, "step": 8543 }, { "epoch": 0.15239182392180645, "grad_norm": 0.3043200373649597, "learning_rate": 4.958291481235139e-05, "loss": 0.2875, "step": 8544 }, { "epoch": 0.15240966004352013, "grad_norm": 0.4334576427936554, "learning_rate": 4.9582631632257884e-05, "loss": 0.2147, "step": 8545 }, { "epoch": 0.15242749616523382, "grad_norm": 0.4442044794559479, "learning_rate": 4.9582348356873615e-05, "loss": 0.2965, "step": 8546 }, { "epoch": 0.15244533228694754, "grad_norm": 0.3632372319698334, "learning_rate": 4.958206498619966e-05, "loss": 0.2019, "step": 8547 }, { "epoch": 0.15246316840866123, "grad_norm": 0.3032687306404114, "learning_rate": 4.958178152023715e-05, "loss": 0.2347, "step": 8548 }, { "epoch": 0.15248100453037491, "grad_norm": 0.3779764175415039, "learning_rate": 4.958149795898715e-05, "loss": 0.3051, "step": 8549 }, { "epoch": 0.1524988406520886, "grad_norm": 0.3018413782119751, "learning_rate": 4.958121430245078e-05, "loss": 0.2445, "step": 8550 }, { "epoch": 0.15251667677380232, "grad_norm": 0.31004467606544495, "learning_rate": 4.9580930550629136e-05, "loss": 0.2038, "step": 8551 }, { "epoch": 0.152534512895516, "grad_norm": 0.2716842591762543, "learning_rate": 4.95806467035233e-05, "loss": 0.179, "step": 8552 }, { "epoch": 0.1525523490172297, "grad_norm": 0.4797303080558777, "learning_rate": 4.95803627611344e-05, "loss": 0.2629, "step": 8553 }, { "epoch": 0.15257018513894338, "grad_norm": 0.27326005697250366, "learning_rate": 4.958007872346353e-05, "loss": 0.2147, "step": 8554 }, { "epoch": 0.15258802126065707, "grad_norm": 0.30851417779922485, "learning_rate": 4.9579794590511777e-05, "loss": 0.2648, "step": 8555 }, { "epoch": 0.15260585738237079, "grad_norm": 0.332461416721344, "learning_rate": 4.957951036228024e-05, "loss": 0.2503, "step": 8556 }, { "epoch": 0.15262369350408447, "grad_norm": 0.3649390637874603, "learning_rate": 4.957922603877005e-05, "loss": 0.2272, "step": 8557 }, { "epoch": 0.15264152962579816, "grad_norm": 0.3612349033355713, "learning_rate": 4.957894161998228e-05, "loss": 0.1984, "step": 8558 }, { "epoch": 0.15265936574751185, "grad_norm": 0.6173525452613831, "learning_rate": 4.9578657105918044e-05, "loss": 0.2221, "step": 8559 }, { "epoch": 0.15267720186922557, "grad_norm": 0.2857859134674072, "learning_rate": 4.957837249657845e-05, "loss": 0.2298, "step": 8560 }, { "epoch": 0.15269503799093925, "grad_norm": 0.35880544781684875, "learning_rate": 4.957808779196459e-05, "loss": 0.2385, "step": 8561 }, { "epoch": 0.15271287411265294, "grad_norm": 0.3190654516220093, "learning_rate": 4.957780299207758e-05, "loss": 0.2148, "step": 8562 }, { "epoch": 0.15273071023436663, "grad_norm": 0.3299749195575714, "learning_rate": 4.9577518096918506e-05, "loss": 0.2325, "step": 8563 }, { "epoch": 0.15274854635608034, "grad_norm": 0.28279489278793335, "learning_rate": 4.957723310648849e-05, "loss": 0.2081, "step": 8564 }, { "epoch": 0.15276638247779403, "grad_norm": 0.49218663573265076, "learning_rate": 4.957694802078863e-05, "loss": 0.1838, "step": 8565 }, { "epoch": 0.15278421859950772, "grad_norm": 0.24212224781513214, "learning_rate": 4.957666283982002e-05, "loss": 0.2183, "step": 8566 }, { "epoch": 0.1528020547212214, "grad_norm": 0.34885886311531067, "learning_rate": 4.9576377563583786e-05, "loss": 0.2463, "step": 8567 }, { "epoch": 0.15281989084293512, "grad_norm": 0.3379933536052704, "learning_rate": 4.9576092192081024e-05, "loss": 0.2377, "step": 8568 }, { "epoch": 0.1528377269646488, "grad_norm": 0.3034881353378296, "learning_rate": 4.957580672531283e-05, "loss": 0.2469, "step": 8569 }, { "epoch": 0.1528555630863625, "grad_norm": 0.2592678368091583, "learning_rate": 4.9575521163280336e-05, "loss": 0.1855, "step": 8570 }, { "epoch": 0.1528733992080762, "grad_norm": 0.41172993183135986, "learning_rate": 4.9575235505984626e-05, "loss": 0.2534, "step": 8571 }, { "epoch": 0.1528912353297899, "grad_norm": 0.39557528495788574, "learning_rate": 4.957494975342682e-05, "loss": 0.2813, "step": 8572 }, { "epoch": 0.1529090714515036, "grad_norm": 0.2432795763015747, "learning_rate": 4.9574663905608024e-05, "loss": 0.2221, "step": 8573 }, { "epoch": 0.15292690757321728, "grad_norm": 0.2979724407196045, "learning_rate": 4.9574377962529327e-05, "loss": 0.2153, "step": 8574 }, { "epoch": 0.15294474369493097, "grad_norm": 0.3295513987541199, "learning_rate": 4.957409192419187e-05, "loss": 0.2493, "step": 8575 }, { "epoch": 0.15296257981664466, "grad_norm": 0.242191880941391, "learning_rate": 4.957380579059673e-05, "loss": 0.1908, "step": 8576 }, { "epoch": 0.15298041593835837, "grad_norm": 0.22380758821964264, "learning_rate": 4.957351956174504e-05, "loss": 0.2212, "step": 8577 }, { "epoch": 0.15299825206007206, "grad_norm": 0.24662764370441437, "learning_rate": 4.9573233237637904e-05, "loss": 0.1852, "step": 8578 }, { "epoch": 0.15301608818178575, "grad_norm": 0.25940418243408203, "learning_rate": 4.957294681827642e-05, "loss": 0.2088, "step": 8579 }, { "epoch": 0.15303392430349944, "grad_norm": 0.30162614583969116, "learning_rate": 4.9572660303661716e-05, "loss": 0.2102, "step": 8580 }, { "epoch": 0.15305176042521315, "grad_norm": 0.2159837931394577, "learning_rate": 4.957237369379489e-05, "loss": 0.203, "step": 8581 }, { "epoch": 0.15306959654692684, "grad_norm": 0.3033880889415741, "learning_rate": 4.957208698867706e-05, "loss": 0.2548, "step": 8582 }, { "epoch": 0.15308743266864053, "grad_norm": 0.24734680354595184, "learning_rate": 4.957180018830933e-05, "loss": 0.194, "step": 8583 }, { "epoch": 0.15310526879035422, "grad_norm": 0.259811669588089, "learning_rate": 4.9571513292692804e-05, "loss": 0.2127, "step": 8584 }, { "epoch": 0.15312310491206793, "grad_norm": 0.23779602348804474, "learning_rate": 4.957122630182862e-05, "loss": 0.1758, "step": 8585 }, { "epoch": 0.15314094103378162, "grad_norm": 0.27816519141197205, "learning_rate": 4.957093921571787e-05, "loss": 0.161, "step": 8586 }, { "epoch": 0.1531587771554953, "grad_norm": 0.5385149717330933, "learning_rate": 4.957065203436168e-05, "loss": 0.1855, "step": 8587 }, { "epoch": 0.153176613277209, "grad_norm": 0.48837822675704956, "learning_rate": 4.9570364757761154e-05, "loss": 0.2055, "step": 8588 }, { "epoch": 0.1531944493989227, "grad_norm": 0.25603777170181274, "learning_rate": 4.9570077385917405e-05, "loss": 0.2018, "step": 8589 }, { "epoch": 0.1532122855206364, "grad_norm": 0.32549938559532166, "learning_rate": 4.956978991883156e-05, "loss": 0.2484, "step": 8590 }, { "epoch": 0.1532301216423501, "grad_norm": 0.24236349761486053, "learning_rate": 4.95695023565047e-05, "loss": 0.2225, "step": 8591 }, { "epoch": 0.15324795776406377, "grad_norm": 0.3066392242908478, "learning_rate": 4.956921469893798e-05, "loss": 0.2101, "step": 8592 }, { "epoch": 0.1532657938857775, "grad_norm": 0.3377484083175659, "learning_rate": 4.95689269461325e-05, "loss": 0.2184, "step": 8593 }, { "epoch": 0.15328363000749118, "grad_norm": 0.3868796229362488, "learning_rate": 4.956863909808936e-05, "loss": 0.2844, "step": 8594 }, { "epoch": 0.15330146612920487, "grad_norm": 0.2308701127767563, "learning_rate": 4.95683511548097e-05, "loss": 0.2011, "step": 8595 }, { "epoch": 0.15331930225091855, "grad_norm": 0.17837509512901306, "learning_rate": 4.9568063116294625e-05, "loss": 0.1902, "step": 8596 }, { "epoch": 0.15333713837263224, "grad_norm": 0.25052610039711, "learning_rate": 4.956777498254525e-05, "loss": 0.2347, "step": 8597 }, { "epoch": 0.15335497449434596, "grad_norm": 0.2298162877559662, "learning_rate": 4.9567486753562697e-05, "loss": 0.2386, "step": 8598 }, { "epoch": 0.15337281061605965, "grad_norm": 0.33065974712371826, "learning_rate": 4.9567198429348075e-05, "loss": 0.2868, "step": 8599 }, { "epoch": 0.15339064673777333, "grad_norm": 0.229557603597641, "learning_rate": 4.956691000990251e-05, "loss": 0.2111, "step": 8600 }, { "epoch": 0.15340848285948702, "grad_norm": 0.2160930037498474, "learning_rate": 4.9566621495227115e-05, "loss": 0.2264, "step": 8601 }, { "epoch": 0.15342631898120074, "grad_norm": 0.25786328315734863, "learning_rate": 4.9566332885323005e-05, "loss": 0.1941, "step": 8602 }, { "epoch": 0.15344415510291443, "grad_norm": 0.3277515172958374, "learning_rate": 4.9566044180191304e-05, "loss": 0.2087, "step": 8603 }, { "epoch": 0.1534619912246281, "grad_norm": 0.3133716285228729, "learning_rate": 4.956575537983314e-05, "loss": 0.1982, "step": 8604 }, { "epoch": 0.1534798273463418, "grad_norm": 0.3015703558921814, "learning_rate": 4.9565466484249616e-05, "loss": 0.2515, "step": 8605 }, { "epoch": 0.15349766346805552, "grad_norm": 0.3521009087562561, "learning_rate": 4.9565177493441864e-05, "loss": 0.2298, "step": 8606 }, { "epoch": 0.1535154995897692, "grad_norm": 0.26029446721076965, "learning_rate": 4.9564888407411e-05, "loss": 0.2074, "step": 8607 }, { "epoch": 0.1535333357114829, "grad_norm": 0.4189286231994629, "learning_rate": 4.9564599226158136e-05, "loss": 0.2077, "step": 8608 }, { "epoch": 0.15355117183319658, "grad_norm": 0.22298909723758698, "learning_rate": 4.956430994968441e-05, "loss": 0.1952, "step": 8609 }, { "epoch": 0.1535690079549103, "grad_norm": 0.3025025725364685, "learning_rate": 4.956402057799093e-05, "loss": 0.2466, "step": 8610 }, { "epoch": 0.15358684407662399, "grad_norm": 0.2786144018173218, "learning_rate": 4.956373111107883e-05, "loss": 0.2594, "step": 8611 }, { "epoch": 0.15360468019833767, "grad_norm": 0.3745969235897064, "learning_rate": 4.9563441548949205e-05, "loss": 0.2655, "step": 8612 }, { "epoch": 0.15362251632005136, "grad_norm": 0.2170204520225525, "learning_rate": 4.956315189160322e-05, "loss": 0.1745, "step": 8613 }, { "epoch": 0.15364035244176505, "grad_norm": 0.2866086959838867, "learning_rate": 4.956286213904196e-05, "loss": 0.2301, "step": 8614 }, { "epoch": 0.15365818856347876, "grad_norm": 0.5086783170700073, "learning_rate": 4.9562572291266565e-05, "loss": 0.2374, "step": 8615 }, { "epoch": 0.15367602468519245, "grad_norm": 0.22385302186012268, "learning_rate": 4.956228234827816e-05, "loss": 0.2082, "step": 8616 }, { "epoch": 0.15369386080690614, "grad_norm": 0.20260019600391388, "learning_rate": 4.956199231007786e-05, "loss": 0.1938, "step": 8617 }, { "epoch": 0.15371169692861983, "grad_norm": 0.3071066737174988, "learning_rate": 4.9561702176666796e-05, "loss": 0.1806, "step": 8618 }, { "epoch": 0.15372953305033354, "grad_norm": 0.2761852741241455, "learning_rate": 4.956141194804609e-05, "loss": 0.2204, "step": 8619 }, { "epoch": 0.15374736917204723, "grad_norm": 0.31176427006721497, "learning_rate": 4.956112162421687e-05, "loss": 0.2642, "step": 8620 }, { "epoch": 0.15376520529376092, "grad_norm": 0.43502193689346313, "learning_rate": 4.956083120518026e-05, "loss": 0.2575, "step": 8621 }, { "epoch": 0.1537830414154746, "grad_norm": 0.25103381276130676, "learning_rate": 4.956054069093738e-05, "loss": 0.2386, "step": 8622 }, { "epoch": 0.15380087753718832, "grad_norm": 0.21322041749954224, "learning_rate": 4.956025008148937e-05, "loss": 0.2087, "step": 8623 }, { "epoch": 0.153818713658902, "grad_norm": 0.3090284764766693, "learning_rate": 4.955995937683734e-05, "loss": 0.2988, "step": 8624 }, { "epoch": 0.1538365497806157, "grad_norm": 0.27395325899124146, "learning_rate": 4.955966857698243e-05, "loss": 0.226, "step": 8625 }, { "epoch": 0.1538543859023294, "grad_norm": 0.30051928758621216, "learning_rate": 4.9559377681925764e-05, "loss": 0.2205, "step": 8626 }, { "epoch": 0.1538722220240431, "grad_norm": 0.33471882343292236, "learning_rate": 4.955908669166846e-05, "loss": 0.192, "step": 8627 }, { "epoch": 0.1538900581457568, "grad_norm": 0.2831500768661499, "learning_rate": 4.955879560621166e-05, "loss": 0.235, "step": 8628 }, { "epoch": 0.15390789426747048, "grad_norm": 0.28826627135276794, "learning_rate": 4.955850442555648e-05, "loss": 0.2041, "step": 8629 }, { "epoch": 0.15392573038918417, "grad_norm": 0.3237340450286865, "learning_rate": 4.955821314970406e-05, "loss": 0.2274, "step": 8630 }, { "epoch": 0.15394356651089788, "grad_norm": 0.3359971046447754, "learning_rate": 4.955792177865553e-05, "loss": 0.225, "step": 8631 }, { "epoch": 0.15396140263261157, "grad_norm": 0.22938977181911469, "learning_rate": 4.9557630312412e-05, "loss": 0.2399, "step": 8632 }, { "epoch": 0.15397923875432526, "grad_norm": 0.18498247861862183, "learning_rate": 4.955733875097461e-05, "loss": 0.1935, "step": 8633 }, { "epoch": 0.15399707487603895, "grad_norm": 0.29871103167533875, "learning_rate": 4.9557047094344504e-05, "loss": 0.2009, "step": 8634 }, { "epoch": 0.15401491099775264, "grad_norm": 0.314211368560791, "learning_rate": 4.95567553425228e-05, "loss": 0.216, "step": 8635 }, { "epoch": 0.15403274711946635, "grad_norm": 0.2440318912267685, "learning_rate": 4.955646349551063e-05, "loss": 0.2551, "step": 8636 }, { "epoch": 0.15405058324118004, "grad_norm": 0.2110043466091156, "learning_rate": 4.955617155330913e-05, "loss": 0.1894, "step": 8637 }, { "epoch": 0.15406841936289373, "grad_norm": 0.21493402123451233, "learning_rate": 4.955587951591941e-05, "loss": 0.2011, "step": 8638 }, { "epoch": 0.15408625548460742, "grad_norm": 0.21289654076099396, "learning_rate": 4.955558738334264e-05, "loss": 0.2037, "step": 8639 }, { "epoch": 0.15410409160632113, "grad_norm": 0.3226395845413208, "learning_rate": 4.9555295155579925e-05, "loss": 0.2882, "step": 8640 }, { "epoch": 0.15412192772803482, "grad_norm": 0.32725533843040466, "learning_rate": 4.95550028326324e-05, "loss": 0.3093, "step": 8641 }, { "epoch": 0.1541397638497485, "grad_norm": 0.3094915449619293, "learning_rate": 4.955471041450121e-05, "loss": 0.2093, "step": 8642 }, { "epoch": 0.1541575999714622, "grad_norm": 0.27979058027267456, "learning_rate": 4.955441790118748e-05, "loss": 0.2234, "step": 8643 }, { "epoch": 0.1541754360931759, "grad_norm": 0.22268089652061462, "learning_rate": 4.9554125292692346e-05, "loss": 0.2318, "step": 8644 }, { "epoch": 0.1541932722148896, "grad_norm": 0.20991800725460052, "learning_rate": 4.9553832589016934e-05, "loss": 0.2301, "step": 8645 }, { "epoch": 0.1542111083366033, "grad_norm": 0.2542620301246643, "learning_rate": 4.9553539790162395e-05, "loss": 0.2449, "step": 8646 }, { "epoch": 0.15422894445831697, "grad_norm": 0.30696237087249756, "learning_rate": 4.9553246896129854e-05, "loss": 0.2621, "step": 8647 }, { "epoch": 0.1542467805800307, "grad_norm": 0.29050058126449585, "learning_rate": 4.955295390692044e-05, "loss": 0.2537, "step": 8648 }, { "epoch": 0.15426461670174438, "grad_norm": 0.2910822927951813, "learning_rate": 4.9552660822535306e-05, "loss": 0.2586, "step": 8649 }, { "epoch": 0.15428245282345807, "grad_norm": 0.26228010654449463, "learning_rate": 4.9552367642975575e-05, "loss": 0.2458, "step": 8650 }, { "epoch": 0.15430028894517175, "grad_norm": 0.24198441207408905, "learning_rate": 4.955207436824239e-05, "loss": 0.195, "step": 8651 }, { "epoch": 0.15431812506688547, "grad_norm": 0.3051553964614868, "learning_rate": 4.9551780998336885e-05, "loss": 0.2308, "step": 8652 }, { "epoch": 0.15433596118859916, "grad_norm": 0.33055949211120605, "learning_rate": 4.955148753326019e-05, "loss": 0.2798, "step": 8653 }, { "epoch": 0.15435379731031285, "grad_norm": 0.3392762839794159, "learning_rate": 4.9551193973013453e-05, "loss": 0.2468, "step": 8654 }, { "epoch": 0.15437163343202653, "grad_norm": 0.3386043608188629, "learning_rate": 4.95509003175978e-05, "loss": 0.231, "step": 8655 }, { "epoch": 0.15438946955374022, "grad_norm": 0.31472453474998474, "learning_rate": 4.955060656701439e-05, "loss": 0.2748, "step": 8656 }, { "epoch": 0.15440730567545394, "grad_norm": 0.2320673018693924, "learning_rate": 4.955031272126435e-05, "loss": 0.2129, "step": 8657 }, { "epoch": 0.15442514179716763, "grad_norm": 0.2811758816242218, "learning_rate": 4.955001878034881e-05, "loss": 0.2266, "step": 8658 }, { "epoch": 0.1544429779188813, "grad_norm": 0.32722583413124084, "learning_rate": 4.954972474426892e-05, "loss": 0.2144, "step": 8659 }, { "epoch": 0.154460814040595, "grad_norm": 0.2794470489025116, "learning_rate": 4.9549430613025824e-05, "loss": 0.2236, "step": 8660 }, { "epoch": 0.15447865016230872, "grad_norm": 0.2867196202278137, "learning_rate": 4.9549136386620655e-05, "loss": 0.2413, "step": 8661 }, { "epoch": 0.1544964862840224, "grad_norm": 0.2107728123664856, "learning_rate": 4.954884206505455e-05, "loss": 0.173, "step": 8662 }, { "epoch": 0.1545143224057361, "grad_norm": 0.21892587840557098, "learning_rate": 4.954854764832865e-05, "loss": 0.1816, "step": 8663 }, { "epoch": 0.15453215852744978, "grad_norm": 0.2340811938047409, "learning_rate": 4.9548253136444105e-05, "loss": 0.1745, "step": 8664 }, { "epoch": 0.1545499946491635, "grad_norm": 0.26641198992729187, "learning_rate": 4.9547958529402056e-05, "loss": 0.2024, "step": 8665 }, { "epoch": 0.15456783077087718, "grad_norm": 0.40584680438041687, "learning_rate": 4.954766382720364e-05, "loss": 0.2996, "step": 8666 }, { "epoch": 0.15458566689259087, "grad_norm": 0.26639845967292786, "learning_rate": 4.954736902985e-05, "loss": 0.2264, "step": 8667 }, { "epoch": 0.15460350301430456, "grad_norm": 0.4409404993057251, "learning_rate": 4.954707413734227e-05, "loss": 0.3571, "step": 8668 }, { "epoch": 0.15462133913601828, "grad_norm": 0.2202620804309845, "learning_rate": 4.954677914968161e-05, "loss": 0.1992, "step": 8669 }, { "epoch": 0.15463917525773196, "grad_norm": 0.23582911491394043, "learning_rate": 4.954648406686916e-05, "loss": 0.2166, "step": 8670 }, { "epoch": 0.15465701137944565, "grad_norm": 0.32048580050468445, "learning_rate": 4.954618888890605e-05, "loss": 0.2881, "step": 8671 }, { "epoch": 0.15467484750115934, "grad_norm": 0.26223132014274597, "learning_rate": 4.9545893615793444e-05, "loss": 0.1733, "step": 8672 }, { "epoch": 0.15469268362287306, "grad_norm": 0.25206106901168823, "learning_rate": 4.9545598247532473e-05, "loss": 0.2346, "step": 8673 }, { "epoch": 0.15471051974458674, "grad_norm": 0.3384833037853241, "learning_rate": 4.954530278412428e-05, "loss": 0.2749, "step": 8674 }, { "epoch": 0.15472835586630043, "grad_norm": 0.2313491553068161, "learning_rate": 4.9545007225570024e-05, "loss": 0.224, "step": 8675 }, { "epoch": 0.15474619198801412, "grad_norm": 0.307544082403183, "learning_rate": 4.954471157187084e-05, "loss": 0.2255, "step": 8676 }, { "epoch": 0.1547640281097278, "grad_norm": 0.24871240556240082, "learning_rate": 4.954441582302787e-05, "loss": 0.201, "step": 8677 }, { "epoch": 0.15478186423144152, "grad_norm": 0.31050002574920654, "learning_rate": 4.954411997904228e-05, "loss": 0.2032, "step": 8678 }, { "epoch": 0.1547997003531552, "grad_norm": 0.3050120174884796, "learning_rate": 4.9543824039915185e-05, "loss": 0.2329, "step": 8679 }, { "epoch": 0.1548175364748689, "grad_norm": 0.31380385160446167, "learning_rate": 4.9543528005647766e-05, "loss": 0.2206, "step": 8680 }, { "epoch": 0.1548353725965826, "grad_norm": 0.2867913246154785, "learning_rate": 4.9543231876241145e-05, "loss": 0.207, "step": 8681 }, { "epoch": 0.1548532087182963, "grad_norm": 0.30340704321861267, "learning_rate": 4.9542935651696496e-05, "loss": 0.2325, "step": 8682 }, { "epoch": 0.15487104484001, "grad_norm": 0.2605895400047302, "learning_rate": 4.954263933201494e-05, "loss": 0.1886, "step": 8683 }, { "epoch": 0.15488888096172368, "grad_norm": 0.2807618975639343, "learning_rate": 4.9542342917197636e-05, "loss": 0.2215, "step": 8684 }, { "epoch": 0.15490671708343737, "grad_norm": 0.25563374161720276, "learning_rate": 4.9542046407245737e-05, "loss": 0.2062, "step": 8685 }, { "epoch": 0.15492455320515108, "grad_norm": 0.3591375946998596, "learning_rate": 4.954174980216039e-05, "loss": 0.2117, "step": 8686 }, { "epoch": 0.15494238932686477, "grad_norm": 0.27902495861053467, "learning_rate": 4.9541453101942744e-05, "loss": 0.1791, "step": 8687 }, { "epoch": 0.15496022544857846, "grad_norm": 0.26965099573135376, "learning_rate": 4.954115630659395e-05, "loss": 0.2185, "step": 8688 }, { "epoch": 0.15497806157029215, "grad_norm": 0.2934694290161133, "learning_rate": 4.954085941611516e-05, "loss": 0.2054, "step": 8689 }, { "epoch": 0.15499589769200586, "grad_norm": 0.31233468651771545, "learning_rate": 4.954056243050752e-05, "loss": 0.2922, "step": 8690 }, { "epoch": 0.15501373381371955, "grad_norm": 0.259063184261322, "learning_rate": 4.954026534977218e-05, "loss": 0.2171, "step": 8691 }, { "epoch": 0.15503156993543324, "grad_norm": 0.24388599395751953, "learning_rate": 4.95399681739103e-05, "loss": 0.2172, "step": 8692 }, { "epoch": 0.15504940605714693, "grad_norm": 0.2819579541683197, "learning_rate": 4.953967090292303e-05, "loss": 0.2178, "step": 8693 }, { "epoch": 0.15506724217886061, "grad_norm": 0.25104451179504395, "learning_rate": 4.9539373536811516e-05, "loss": 0.1745, "step": 8694 }, { "epoch": 0.15508507830057433, "grad_norm": 0.31823596358299255, "learning_rate": 4.953907607557692e-05, "loss": 0.1805, "step": 8695 }, { "epoch": 0.15510291442228802, "grad_norm": 0.2884424924850464, "learning_rate": 4.953877851922038e-05, "loss": 0.2039, "step": 8696 }, { "epoch": 0.1551207505440017, "grad_norm": 0.2584998309612274, "learning_rate": 4.9538480867743064e-05, "loss": 0.264, "step": 8697 }, { "epoch": 0.1551385866657154, "grad_norm": 0.3246391713619232, "learning_rate": 4.953818312114612e-05, "loss": 0.2266, "step": 8698 }, { "epoch": 0.1551564227874291, "grad_norm": 0.26628631353378296, "learning_rate": 4.9537885279430705e-05, "loss": 0.2587, "step": 8699 }, { "epoch": 0.1551742589091428, "grad_norm": 0.1852973997592926, "learning_rate": 4.953758734259797e-05, "loss": 0.1619, "step": 8700 }, { "epoch": 0.1551920950308565, "grad_norm": 0.19855822622776031, "learning_rate": 4.953728931064907e-05, "loss": 0.1836, "step": 8701 }, { "epoch": 0.15520993115257017, "grad_norm": 0.2494380623102188, "learning_rate": 4.953699118358517e-05, "loss": 0.2222, "step": 8702 }, { "epoch": 0.1552277672742839, "grad_norm": 0.2721339166164398, "learning_rate": 4.953669296140741e-05, "loss": 0.2114, "step": 8703 }, { "epoch": 0.15524560339599758, "grad_norm": 0.25268349051475525, "learning_rate": 4.9536394644116954e-05, "loss": 0.2186, "step": 8704 }, { "epoch": 0.15526343951771127, "grad_norm": 0.3157144784927368, "learning_rate": 4.9536096231714954e-05, "loss": 0.2638, "step": 8705 }, { "epoch": 0.15528127563942495, "grad_norm": 0.22628776729106903, "learning_rate": 4.953579772420258e-05, "loss": 0.1914, "step": 8706 }, { "epoch": 0.15529911176113867, "grad_norm": 0.2234720140695572, "learning_rate": 4.953549912158097e-05, "loss": 0.1698, "step": 8707 }, { "epoch": 0.15531694788285236, "grad_norm": 0.3426528573036194, "learning_rate": 4.9535200423851295e-05, "loss": 0.2192, "step": 8708 }, { "epoch": 0.15533478400456605, "grad_norm": 0.24046680331230164, "learning_rate": 4.9534901631014706e-05, "loss": 0.2183, "step": 8709 }, { "epoch": 0.15535262012627973, "grad_norm": 0.31615275144577026, "learning_rate": 4.953460274307237e-05, "loss": 0.2088, "step": 8710 }, { "epoch": 0.15537045624799345, "grad_norm": 0.22079584002494812, "learning_rate": 4.953430376002544e-05, "loss": 0.1927, "step": 8711 }, { "epoch": 0.15538829236970714, "grad_norm": 0.24755722284317017, "learning_rate": 4.953400468187507e-05, "loss": 0.2278, "step": 8712 }, { "epoch": 0.15540612849142083, "grad_norm": 0.22813691198825836, "learning_rate": 4.953370550862243e-05, "loss": 0.1993, "step": 8713 }, { "epoch": 0.1554239646131345, "grad_norm": 0.2970469892024994, "learning_rate": 4.953340624026867e-05, "loss": 0.1896, "step": 8714 }, { "epoch": 0.1554418007348482, "grad_norm": 0.36117640137672424, "learning_rate": 4.953310687681495e-05, "loss": 0.1771, "step": 8715 }, { "epoch": 0.15545963685656192, "grad_norm": 0.2168557047843933, "learning_rate": 4.953280741826244e-05, "loss": 0.1855, "step": 8716 }, { "epoch": 0.1554774729782756, "grad_norm": 0.24977099895477295, "learning_rate": 4.9532507864612296e-05, "loss": 0.231, "step": 8717 }, { "epoch": 0.1554953090999893, "grad_norm": 0.1853153258562088, "learning_rate": 4.953220821586567e-05, "loss": 0.1602, "step": 8718 }, { "epoch": 0.15551314522170298, "grad_norm": 0.25104716420173645, "learning_rate": 4.953190847202374e-05, "loss": 0.2043, "step": 8719 }, { "epoch": 0.1555309813434167, "grad_norm": 0.23357023298740387, "learning_rate": 4.953160863308766e-05, "loss": 0.1792, "step": 8720 }, { "epoch": 0.15554881746513038, "grad_norm": 0.24363559484481812, "learning_rate": 4.953130869905859e-05, "loss": 0.2322, "step": 8721 }, { "epoch": 0.15556665358684407, "grad_norm": 0.3597864508628845, "learning_rate": 4.953100866993769e-05, "loss": 0.2694, "step": 8722 }, { "epoch": 0.15558448970855776, "grad_norm": 0.3142867684364319, "learning_rate": 4.9530708545726135e-05, "loss": 0.2261, "step": 8723 }, { "epoch": 0.15560232583027148, "grad_norm": 0.24061615765094757, "learning_rate": 4.953040832642507e-05, "loss": 0.2014, "step": 8724 }, { "epoch": 0.15562016195198516, "grad_norm": 0.294924259185791, "learning_rate": 4.953010801203568e-05, "loss": 0.256, "step": 8725 }, { "epoch": 0.15563799807369885, "grad_norm": 0.35685548186302185, "learning_rate": 4.952980760255912e-05, "loss": 0.2613, "step": 8726 }, { "epoch": 0.15565583419541254, "grad_norm": 0.36911293864250183, "learning_rate": 4.952950709799655e-05, "loss": 0.1945, "step": 8727 }, { "epoch": 0.15567367031712626, "grad_norm": 0.27103596925735474, "learning_rate": 4.9529206498349134e-05, "loss": 0.2065, "step": 8728 }, { "epoch": 0.15569150643883994, "grad_norm": 0.32939764857292175, "learning_rate": 4.952890580361804e-05, "loss": 0.2353, "step": 8729 }, { "epoch": 0.15570934256055363, "grad_norm": 0.2824895977973938, "learning_rate": 4.952860501380445e-05, "loss": 0.256, "step": 8730 }, { "epoch": 0.15572717868226732, "grad_norm": 0.31564801931381226, "learning_rate": 4.95283041289095e-05, "loss": 0.3028, "step": 8731 }, { "epoch": 0.15574501480398104, "grad_norm": 0.2352769374847412, "learning_rate": 4.952800314893438e-05, "loss": 0.2005, "step": 8732 }, { "epoch": 0.15576285092569472, "grad_norm": 0.36282840371131897, "learning_rate": 4.952770207388024e-05, "loss": 0.1704, "step": 8733 }, { "epoch": 0.1557806870474084, "grad_norm": 0.2992383539676666, "learning_rate": 4.9527400903748264e-05, "loss": 0.2129, "step": 8734 }, { "epoch": 0.1557985231691221, "grad_norm": 0.27786487340927124, "learning_rate": 4.952709963853961e-05, "loss": 0.2069, "step": 8735 }, { "epoch": 0.1558163592908358, "grad_norm": 0.2970375120639801, "learning_rate": 4.9526798278255435e-05, "loss": 0.2419, "step": 8736 }, { "epoch": 0.1558341954125495, "grad_norm": 0.2086101770401001, "learning_rate": 4.952649682289693e-05, "loss": 0.1731, "step": 8737 }, { "epoch": 0.1558520315342632, "grad_norm": 0.40671852231025696, "learning_rate": 4.952619527246525e-05, "loss": 0.1869, "step": 8738 }, { "epoch": 0.15586986765597688, "grad_norm": 0.2768620550632477, "learning_rate": 4.9525893626961564e-05, "loss": 0.2335, "step": 8739 }, { "epoch": 0.15588770377769057, "grad_norm": 0.22819559276103973, "learning_rate": 4.952559188638705e-05, "loss": 0.1749, "step": 8740 }, { "epoch": 0.15590553989940428, "grad_norm": 0.2739728093147278, "learning_rate": 4.9525290050742855e-05, "loss": 0.2235, "step": 8741 }, { "epoch": 0.15592337602111797, "grad_norm": 0.3490871787071228, "learning_rate": 4.952498812003018e-05, "loss": 0.2354, "step": 8742 }, { "epoch": 0.15594121214283166, "grad_norm": 0.287117600440979, "learning_rate": 4.9524686094250175e-05, "loss": 0.254, "step": 8743 }, { "epoch": 0.15595904826454535, "grad_norm": 0.29424506425857544, "learning_rate": 4.952438397340402e-05, "loss": 0.2015, "step": 8744 }, { "epoch": 0.15597688438625906, "grad_norm": 0.31124168634414673, "learning_rate": 4.952408175749288e-05, "loss": 0.2054, "step": 8745 }, { "epoch": 0.15599472050797275, "grad_norm": 0.2352152317762375, "learning_rate": 4.952377944651793e-05, "loss": 0.2381, "step": 8746 }, { "epoch": 0.15601255662968644, "grad_norm": 0.28440725803375244, "learning_rate": 4.952347704048033e-05, "loss": 0.239, "step": 8747 }, { "epoch": 0.15603039275140013, "grad_norm": 0.25761833786964417, "learning_rate": 4.952317453938127e-05, "loss": 0.2086, "step": 8748 }, { "epoch": 0.15604822887311384, "grad_norm": 0.21963690221309662, "learning_rate": 4.9522871943221914e-05, "loss": 0.1989, "step": 8749 }, { "epoch": 0.15606606499482753, "grad_norm": 0.3100791871547699, "learning_rate": 4.952256925200345e-05, "loss": 0.2065, "step": 8750 }, { "epoch": 0.15608390111654122, "grad_norm": 0.34078601002693176, "learning_rate": 4.952226646572702e-05, "loss": 0.2479, "step": 8751 }, { "epoch": 0.1561017372382549, "grad_norm": 0.30791711807250977, "learning_rate": 4.9521963584393824e-05, "loss": 0.2446, "step": 8752 }, { "epoch": 0.15611957335996862, "grad_norm": 0.2543742060661316, "learning_rate": 4.952166060800503e-05, "loss": 0.2336, "step": 8753 }, { "epoch": 0.1561374094816823, "grad_norm": 0.23772947490215302, "learning_rate": 4.95213575365618e-05, "loss": 0.1987, "step": 8754 }, { "epoch": 0.156155245603396, "grad_norm": 0.27661585807800293, "learning_rate": 4.9521054370065324e-05, "loss": 0.238, "step": 8755 }, { "epoch": 0.15617308172510969, "grad_norm": 0.2552585005760193, "learning_rate": 4.9520751108516773e-05, "loss": 0.2462, "step": 8756 }, { "epoch": 0.15619091784682337, "grad_norm": 0.2204442024230957, "learning_rate": 4.9520447751917323e-05, "loss": 0.2097, "step": 8757 }, { "epoch": 0.1562087539685371, "grad_norm": 0.2437412589788437, "learning_rate": 4.9520144300268146e-05, "loss": 0.1955, "step": 8758 }, { "epoch": 0.15622659009025078, "grad_norm": 0.3021933138370514, "learning_rate": 4.9519840753570426e-05, "loss": 0.3068, "step": 8759 }, { "epoch": 0.15624442621196447, "grad_norm": 0.33262425661087036, "learning_rate": 4.9519537111825324e-05, "loss": 0.2585, "step": 8760 }, { "epoch": 0.15626226233367815, "grad_norm": 0.33257779479026794, "learning_rate": 4.951923337503404e-05, "loss": 0.1946, "step": 8761 }, { "epoch": 0.15628009845539187, "grad_norm": 0.2478998601436615, "learning_rate": 4.951892954319772e-05, "loss": 0.2019, "step": 8762 }, { "epoch": 0.15629793457710556, "grad_norm": 0.23337683081626892, "learning_rate": 4.9518625616317583e-05, "loss": 0.1918, "step": 8763 }, { "epoch": 0.15631577069881925, "grad_norm": 0.27422499656677246, "learning_rate": 4.9518321594394767e-05, "loss": 0.1997, "step": 8764 }, { "epoch": 0.15633360682053293, "grad_norm": 0.2050887942314148, "learning_rate": 4.9518017477430476e-05, "loss": 0.1976, "step": 8765 }, { "epoch": 0.15635144294224665, "grad_norm": 0.34559714794158936, "learning_rate": 4.951771326542588e-05, "loss": 0.1783, "step": 8766 }, { "epoch": 0.15636927906396034, "grad_norm": 0.3659738600254059, "learning_rate": 4.951740895838216e-05, "loss": 0.1905, "step": 8767 }, { "epoch": 0.15638711518567402, "grad_norm": 0.42823851108551025, "learning_rate": 4.95171045563005e-05, "loss": 0.2344, "step": 8768 }, { "epoch": 0.1564049513073877, "grad_norm": 0.2627209424972534, "learning_rate": 4.951680005918207e-05, "loss": 0.2262, "step": 8769 }, { "epoch": 0.15642278742910143, "grad_norm": 0.27858999371528625, "learning_rate": 4.951649546702805e-05, "loss": 0.2307, "step": 8770 }, { "epoch": 0.15644062355081512, "grad_norm": 0.3153055012226105, "learning_rate": 4.951619077983963e-05, "loss": 0.2738, "step": 8771 }, { "epoch": 0.1564584596725288, "grad_norm": 0.4113086760044098, "learning_rate": 4.951588599761798e-05, "loss": 0.2621, "step": 8772 }, { "epoch": 0.1564762957942425, "grad_norm": 0.2547321319580078, "learning_rate": 4.9515581120364295e-05, "loss": 0.2166, "step": 8773 }, { "epoch": 0.1564941319159562, "grad_norm": 0.28523120284080505, "learning_rate": 4.9515276148079754e-05, "loss": 0.2276, "step": 8774 }, { "epoch": 0.1565119680376699, "grad_norm": 0.21435360610485077, "learning_rate": 4.951497108076553e-05, "loss": 0.2089, "step": 8775 }, { "epoch": 0.15652980415938358, "grad_norm": 0.2475617676973343, "learning_rate": 4.9514665918422815e-05, "loss": 0.2407, "step": 8776 }, { "epoch": 0.15654764028109727, "grad_norm": 0.2178918570280075, "learning_rate": 4.951436066105278e-05, "loss": 0.2188, "step": 8777 }, { "epoch": 0.15656547640281096, "grad_norm": 0.31759148836135864, "learning_rate": 4.951405530865663e-05, "loss": 0.244, "step": 8778 }, { "epoch": 0.15658331252452468, "grad_norm": 0.2582131624221802, "learning_rate": 4.951374986123553e-05, "loss": 0.2115, "step": 8779 }, { "epoch": 0.15660114864623836, "grad_norm": 0.23167286813259125, "learning_rate": 4.951344431879066e-05, "loss": 0.1681, "step": 8780 }, { "epoch": 0.15661898476795205, "grad_norm": 1.0556530952453613, "learning_rate": 4.951313868132321e-05, "loss": 0.1988, "step": 8781 }, { "epoch": 0.15663682088966574, "grad_norm": 0.35775575041770935, "learning_rate": 4.951283294883438e-05, "loss": 0.3155, "step": 8782 }, { "epoch": 0.15665465701137946, "grad_norm": 0.18764452636241913, "learning_rate": 4.9512527121325345e-05, "loss": 0.2122, "step": 8783 }, { "epoch": 0.15667249313309314, "grad_norm": 0.33498260378837585, "learning_rate": 4.9512221198797285e-05, "loss": 0.2391, "step": 8784 }, { "epoch": 0.15669032925480683, "grad_norm": 0.3173202574253082, "learning_rate": 4.951191518125138e-05, "loss": 0.2411, "step": 8785 }, { "epoch": 0.15670816537652052, "grad_norm": 0.2543131709098816, "learning_rate": 4.9511609068688836e-05, "loss": 0.2016, "step": 8786 }, { "epoch": 0.15672600149823424, "grad_norm": 0.26163366436958313, "learning_rate": 4.951130286111082e-05, "loss": 0.223, "step": 8787 }, { "epoch": 0.15674383761994792, "grad_norm": 0.3173080086708069, "learning_rate": 4.951099655851854e-05, "loss": 0.1949, "step": 8788 }, { "epoch": 0.1567616737416616, "grad_norm": 0.336243212223053, "learning_rate": 4.9510690160913166e-05, "loss": 0.2066, "step": 8789 }, { "epoch": 0.1567795098633753, "grad_norm": 0.49204471707344055, "learning_rate": 4.951038366829589e-05, "loss": 0.1743, "step": 8790 }, { "epoch": 0.15679734598508901, "grad_norm": 0.2887970805168152, "learning_rate": 4.95100770806679e-05, "loss": 0.1905, "step": 8791 }, { "epoch": 0.1568151821068027, "grad_norm": 0.2841673791408539, "learning_rate": 4.950977039803039e-05, "loss": 0.2525, "step": 8792 }, { "epoch": 0.1568330182285164, "grad_norm": 0.353541761636734, "learning_rate": 4.950946362038454e-05, "loss": 0.1895, "step": 8793 }, { "epoch": 0.15685085435023008, "grad_norm": 0.2435857206583023, "learning_rate": 4.9509156747731544e-05, "loss": 0.2216, "step": 8794 }, { "epoch": 0.15686869047194377, "grad_norm": 0.2575038969516754, "learning_rate": 4.95088497800726e-05, "loss": 0.2111, "step": 8795 }, { "epoch": 0.15688652659365748, "grad_norm": 0.3675071597099304, "learning_rate": 4.9508542717408877e-05, "loss": 0.2019, "step": 8796 }, { "epoch": 0.15690436271537117, "grad_norm": 0.3092038333415985, "learning_rate": 4.950823555974158e-05, "loss": 0.2541, "step": 8797 }, { "epoch": 0.15692219883708486, "grad_norm": 0.23880967497825623, "learning_rate": 4.9507928307071904e-05, "loss": 0.2112, "step": 8798 }, { "epoch": 0.15694003495879855, "grad_norm": 0.21204820275306702, "learning_rate": 4.9507620959401024e-05, "loss": 0.1796, "step": 8799 }, { "epoch": 0.15695787108051226, "grad_norm": 0.21364718675613403, "learning_rate": 4.950731351673015e-05, "loss": 0.2262, "step": 8800 }, { "epoch": 0.15697570720222595, "grad_norm": 0.2173614203929901, "learning_rate": 4.950700597906046e-05, "loss": 0.2114, "step": 8801 }, { "epoch": 0.15699354332393964, "grad_norm": 0.26080960035324097, "learning_rate": 4.950669834639315e-05, "loss": 0.2279, "step": 8802 }, { "epoch": 0.15701137944565333, "grad_norm": 0.3522777557373047, "learning_rate": 4.9506390618729416e-05, "loss": 0.2678, "step": 8803 }, { "epoch": 0.15702921556736704, "grad_norm": 0.24790038168430328, "learning_rate": 4.950608279607044e-05, "loss": 0.2021, "step": 8804 }, { "epoch": 0.15704705168908073, "grad_norm": 0.23251527547836304, "learning_rate": 4.9505774878417434e-05, "loss": 0.1912, "step": 8805 }, { "epoch": 0.15706488781079442, "grad_norm": 0.2686958909034729, "learning_rate": 4.950546686577157e-05, "loss": 0.2239, "step": 8806 }, { "epoch": 0.1570827239325081, "grad_norm": 0.24929532408714294, "learning_rate": 4.9505158758134054e-05, "loss": 0.2136, "step": 8807 }, { "epoch": 0.15710056005422182, "grad_norm": 0.3674381375312805, "learning_rate": 4.9504850555506085e-05, "loss": 0.1945, "step": 8808 }, { "epoch": 0.1571183961759355, "grad_norm": 0.35281965136528015, "learning_rate": 4.9504542257888845e-05, "loss": 0.1781, "step": 8809 }, { "epoch": 0.1571362322976492, "grad_norm": 0.25824156403541565, "learning_rate": 4.950423386528354e-05, "loss": 0.2593, "step": 8810 }, { "epoch": 0.15715406841936289, "grad_norm": 0.21956311166286469, "learning_rate": 4.950392537769136e-05, "loss": 0.1892, "step": 8811 }, { "epoch": 0.1571719045410766, "grad_norm": 0.2829541265964508, "learning_rate": 4.95036167951135e-05, "loss": 0.2587, "step": 8812 }, { "epoch": 0.1571897406627903, "grad_norm": 0.3168048560619354, "learning_rate": 4.9503308117551164e-05, "loss": 0.2331, "step": 8813 }, { "epoch": 0.15720757678450398, "grad_norm": 0.3431147038936615, "learning_rate": 4.950299934500553e-05, "loss": 0.2068, "step": 8814 }, { "epoch": 0.15722541290621767, "grad_norm": 0.2998479902744293, "learning_rate": 4.950269047747782e-05, "loss": 0.2285, "step": 8815 }, { "epoch": 0.15724324902793135, "grad_norm": 0.2590060830116272, "learning_rate": 4.9502381514969215e-05, "loss": 0.2303, "step": 8816 }, { "epoch": 0.15726108514964507, "grad_norm": 0.2475730925798416, "learning_rate": 4.950207245748092e-05, "loss": 0.2162, "step": 8817 }, { "epoch": 0.15727892127135876, "grad_norm": 0.23853184282779694, "learning_rate": 4.9501763305014125e-05, "loss": 0.1879, "step": 8818 }, { "epoch": 0.15729675739307244, "grad_norm": 0.30825313925743103, "learning_rate": 4.950145405757003e-05, "loss": 0.2148, "step": 8819 }, { "epoch": 0.15731459351478613, "grad_norm": 0.2983599305152893, "learning_rate": 4.9501144715149836e-05, "loss": 0.232, "step": 8820 }, { "epoch": 0.15733242963649985, "grad_norm": 0.3923456072807312, "learning_rate": 4.9500835277754756e-05, "loss": 0.2835, "step": 8821 }, { "epoch": 0.15735026575821354, "grad_norm": 0.22664014995098114, "learning_rate": 4.9500525745385964e-05, "loss": 0.201, "step": 8822 }, { "epoch": 0.15736810187992722, "grad_norm": 0.28232407569885254, "learning_rate": 4.9500216118044674e-05, "loss": 0.2129, "step": 8823 }, { "epoch": 0.1573859380016409, "grad_norm": 0.2689587473869324, "learning_rate": 4.9499906395732085e-05, "loss": 0.2052, "step": 8824 }, { "epoch": 0.15740377412335463, "grad_norm": 0.2931232750415802, "learning_rate": 4.949959657844939e-05, "loss": 0.1753, "step": 8825 }, { "epoch": 0.15742161024506832, "grad_norm": 0.3985549807548523, "learning_rate": 4.949928666619781e-05, "loss": 0.2974, "step": 8826 }, { "epoch": 0.157439446366782, "grad_norm": 0.2937762439250946, "learning_rate": 4.9498976658978524e-05, "loss": 0.2571, "step": 8827 }, { "epoch": 0.1574572824884957, "grad_norm": 0.232209250330925, "learning_rate": 4.9498666556792745e-05, "loss": 0.1929, "step": 8828 }, { "epoch": 0.1574751186102094, "grad_norm": 0.49438947439193726, "learning_rate": 4.949835635964167e-05, "loss": 0.2251, "step": 8829 }, { "epoch": 0.1574929547319231, "grad_norm": 0.2629947066307068, "learning_rate": 4.949804606752651e-05, "loss": 0.2425, "step": 8830 }, { "epoch": 0.15751079085363678, "grad_norm": 0.2666455805301666, "learning_rate": 4.9497735680448456e-05, "loss": 0.2106, "step": 8831 }, { "epoch": 0.15752862697535047, "grad_norm": 0.32069793343544006, "learning_rate": 4.949742519840872e-05, "loss": 0.2422, "step": 8832 }, { "epoch": 0.1575464630970642, "grad_norm": 0.25603896379470825, "learning_rate": 4.9497114621408506e-05, "loss": 0.2203, "step": 8833 }, { "epoch": 0.15756429921877788, "grad_norm": 0.252055287361145, "learning_rate": 4.9496803949449014e-05, "loss": 0.2054, "step": 8834 }, { "epoch": 0.15758213534049156, "grad_norm": 0.32936811447143555, "learning_rate": 4.949649318253144e-05, "loss": 0.2401, "step": 8835 }, { "epoch": 0.15759997146220525, "grad_norm": 0.25010862946510315, "learning_rate": 4.9496182320657014e-05, "loss": 0.2547, "step": 8836 }, { "epoch": 0.15761780758391894, "grad_norm": 0.386197954416275, "learning_rate": 4.949587136382691e-05, "loss": 0.2993, "step": 8837 }, { "epoch": 0.15763564370563266, "grad_norm": 0.2307082712650299, "learning_rate": 4.9495560312042355e-05, "loss": 0.1814, "step": 8838 }, { "epoch": 0.15765347982734634, "grad_norm": 0.37932249903678894, "learning_rate": 4.9495249165304545e-05, "loss": 0.2772, "step": 8839 }, { "epoch": 0.15767131594906003, "grad_norm": 0.22730407118797302, "learning_rate": 4.9494937923614694e-05, "loss": 0.2157, "step": 8840 }, { "epoch": 0.15768915207077372, "grad_norm": 0.22970502078533173, "learning_rate": 4.9494626586973995e-05, "loss": 0.2247, "step": 8841 }, { "epoch": 0.15770698819248744, "grad_norm": 0.5269384384155273, "learning_rate": 4.949431515538367e-05, "loss": 0.2016, "step": 8842 }, { "epoch": 0.15772482431420112, "grad_norm": 0.27919816970825195, "learning_rate": 4.9494003628844916e-05, "loss": 0.2272, "step": 8843 }, { "epoch": 0.1577426604359148, "grad_norm": 0.26930591464042664, "learning_rate": 4.949369200735894e-05, "loss": 0.1959, "step": 8844 }, { "epoch": 0.1577604965576285, "grad_norm": 0.2881614863872528, "learning_rate": 4.949338029092696e-05, "loss": 0.25, "step": 8845 }, { "epoch": 0.15777833267934221, "grad_norm": 0.3047850430011749, "learning_rate": 4.949306847955018e-05, "loss": 0.2708, "step": 8846 }, { "epoch": 0.1577961688010559, "grad_norm": 0.2956016957759857, "learning_rate": 4.9492756573229806e-05, "loss": 0.2368, "step": 8847 }, { "epoch": 0.1578140049227696, "grad_norm": 0.27399855852127075, "learning_rate": 4.949244457196704e-05, "loss": 0.2314, "step": 8848 }, { "epoch": 0.15783184104448328, "grad_norm": 0.2946315407752991, "learning_rate": 4.9492132475763107e-05, "loss": 0.1979, "step": 8849 }, { "epoch": 0.157849677166197, "grad_norm": 0.28162893652915955, "learning_rate": 4.949182028461921e-05, "loss": 0.226, "step": 8850 }, { "epoch": 0.15786751328791068, "grad_norm": 0.2386331856250763, "learning_rate": 4.949150799853656e-05, "loss": 0.2156, "step": 8851 }, { "epoch": 0.15788534940962437, "grad_norm": 0.1961589753627777, "learning_rate": 4.9491195617516364e-05, "loss": 0.1616, "step": 8852 }, { "epoch": 0.15790318553133806, "grad_norm": 0.2792550027370453, "learning_rate": 4.949088314155983e-05, "loss": 0.2126, "step": 8853 }, { "epoch": 0.15792102165305177, "grad_norm": 0.2500261664390564, "learning_rate": 4.949057057066817e-05, "loss": 0.1943, "step": 8854 }, { "epoch": 0.15793885777476546, "grad_norm": 0.26533347368240356, "learning_rate": 4.9490257904842606e-05, "loss": 0.2237, "step": 8855 }, { "epoch": 0.15795669389647915, "grad_norm": 0.22932292520999908, "learning_rate": 4.948994514408435e-05, "loss": 0.1762, "step": 8856 }, { "epoch": 0.15797453001819284, "grad_norm": 0.32544583082199097, "learning_rate": 4.94896322883946e-05, "loss": 0.2228, "step": 8857 }, { "epoch": 0.15799236613990653, "grad_norm": 0.2862977981567383, "learning_rate": 4.9489319337774573e-05, "loss": 0.2136, "step": 8858 }, { "epoch": 0.15801020226162024, "grad_norm": 0.35817044973373413, "learning_rate": 4.9489006292225496e-05, "loss": 0.1899, "step": 8859 }, { "epoch": 0.15802803838333393, "grad_norm": 0.2702390253543854, "learning_rate": 4.948869315174857e-05, "loss": 0.1696, "step": 8860 }, { "epoch": 0.15804587450504762, "grad_norm": 0.25291597843170166, "learning_rate": 4.9488379916345004e-05, "loss": 0.2307, "step": 8861 }, { "epoch": 0.1580637106267613, "grad_norm": 0.27768197655677795, "learning_rate": 4.948806658601603e-05, "loss": 0.2101, "step": 8862 }, { "epoch": 0.15808154674847502, "grad_norm": 0.2693616449832916, "learning_rate": 4.948775316076285e-05, "loss": 0.2381, "step": 8863 }, { "epoch": 0.1580993828701887, "grad_norm": 0.29252687096595764, "learning_rate": 4.9487439640586674e-05, "loss": 0.2301, "step": 8864 }, { "epoch": 0.1581172189919024, "grad_norm": 0.30311325192451477, "learning_rate": 4.9487126025488726e-05, "loss": 0.1958, "step": 8865 }, { "epoch": 0.15813505511361609, "grad_norm": 0.2934456467628479, "learning_rate": 4.9486812315470226e-05, "loss": 0.191, "step": 8866 }, { "epoch": 0.1581528912353298, "grad_norm": 0.2761151194572449, "learning_rate": 4.948649851053238e-05, "loss": 0.2083, "step": 8867 }, { "epoch": 0.1581707273570435, "grad_norm": 0.23584337532520294, "learning_rate": 4.948618461067641e-05, "loss": 0.189, "step": 8868 }, { "epoch": 0.15818856347875718, "grad_norm": 0.25781938433647156, "learning_rate": 4.948587061590353e-05, "loss": 0.2331, "step": 8869 }, { "epoch": 0.15820639960047087, "grad_norm": 0.27441632747650146, "learning_rate": 4.9485556526214955e-05, "loss": 0.1956, "step": 8870 }, { "epoch": 0.15822423572218458, "grad_norm": 0.29231467843055725, "learning_rate": 4.948524234161191e-05, "loss": 0.2419, "step": 8871 }, { "epoch": 0.15824207184389827, "grad_norm": 0.2262069433927536, "learning_rate": 4.94849280620956e-05, "loss": 0.1842, "step": 8872 }, { "epoch": 0.15825990796561196, "grad_norm": 0.38599586486816406, "learning_rate": 4.948461368766727e-05, "loss": 0.2499, "step": 8873 }, { "epoch": 0.15827774408732564, "grad_norm": 0.414283812046051, "learning_rate": 4.948429921832811e-05, "loss": 0.1976, "step": 8874 }, { "epoch": 0.15829558020903933, "grad_norm": 0.22861029207706451, "learning_rate": 4.948398465407935e-05, "loss": 0.2088, "step": 8875 }, { "epoch": 0.15831341633075305, "grad_norm": 0.2521374821662903, "learning_rate": 4.948366999492221e-05, "loss": 0.1988, "step": 8876 }, { "epoch": 0.15833125245246674, "grad_norm": 0.23936320841312408, "learning_rate": 4.94833552408579e-05, "loss": 0.161, "step": 8877 }, { "epoch": 0.15834908857418042, "grad_norm": 0.28193894028663635, "learning_rate": 4.948304039188766e-05, "loss": 0.1982, "step": 8878 }, { "epoch": 0.1583669246958941, "grad_norm": 0.3436919152736664, "learning_rate": 4.9482725448012695e-05, "loss": 0.2975, "step": 8879 }, { "epoch": 0.15838476081760783, "grad_norm": 0.29465392231941223, "learning_rate": 4.948241040923423e-05, "loss": 0.2351, "step": 8880 }, { "epoch": 0.15840259693932152, "grad_norm": 0.3796325922012329, "learning_rate": 4.9482095275553484e-05, "loss": 0.2295, "step": 8881 }, { "epoch": 0.1584204330610352, "grad_norm": 0.3656231164932251, "learning_rate": 4.948178004697169e-05, "loss": 0.3035, "step": 8882 }, { "epoch": 0.1584382691827489, "grad_norm": 0.26530107855796814, "learning_rate": 4.948146472349005e-05, "loss": 0.225, "step": 8883 }, { "epoch": 0.1584561053044626, "grad_norm": 0.30532997846603394, "learning_rate": 4.948114930510981e-05, "loss": 0.2209, "step": 8884 }, { "epoch": 0.1584739414261763, "grad_norm": 0.23127897083759308, "learning_rate": 4.948083379183217e-05, "loss": 0.2262, "step": 8885 }, { "epoch": 0.15849177754788998, "grad_norm": 0.37624356150627136, "learning_rate": 4.9480518183658364e-05, "loss": 0.1997, "step": 8886 }, { "epoch": 0.15850961366960367, "grad_norm": 0.2639813721179962, "learning_rate": 4.948020248058961e-05, "loss": 0.2645, "step": 8887 }, { "epoch": 0.1585274497913174, "grad_norm": 0.2535322606563568, "learning_rate": 4.947988668262714e-05, "loss": 0.2417, "step": 8888 }, { "epoch": 0.15854528591303108, "grad_norm": 0.25185152888298035, "learning_rate": 4.9479570789772176e-05, "loss": 0.1838, "step": 8889 }, { "epoch": 0.15856312203474476, "grad_norm": 0.1991785615682602, "learning_rate": 4.947925480202594e-05, "loss": 0.1901, "step": 8890 }, { "epoch": 0.15858095815645845, "grad_norm": 0.3542342782020569, "learning_rate": 4.947893871938966e-05, "loss": 0.2594, "step": 8891 }, { "epoch": 0.15859879427817217, "grad_norm": 0.2714672088623047, "learning_rate": 4.947862254186455e-05, "loss": 0.2104, "step": 8892 }, { "epoch": 0.15861663039988586, "grad_norm": 0.282929003238678, "learning_rate": 4.947830626945186e-05, "loss": 0.2695, "step": 8893 }, { "epoch": 0.15863446652159954, "grad_norm": 0.2924908995628357, "learning_rate": 4.947798990215278e-05, "loss": 0.2223, "step": 8894 }, { "epoch": 0.15865230264331323, "grad_norm": 0.34512925148010254, "learning_rate": 4.947767343996857e-05, "loss": 0.2111, "step": 8895 }, { "epoch": 0.15867013876502692, "grad_norm": 0.3300144672393799, "learning_rate": 4.947735688290044e-05, "loss": 0.2437, "step": 8896 }, { "epoch": 0.15868797488674063, "grad_norm": 0.2518937289714813, "learning_rate": 4.947704023094962e-05, "loss": 0.2384, "step": 8897 }, { "epoch": 0.15870581100845432, "grad_norm": 0.2370232343673706, "learning_rate": 4.947672348411734e-05, "loss": 0.2191, "step": 8898 }, { "epoch": 0.158723647130168, "grad_norm": 0.29083213210105896, "learning_rate": 4.9476406642404826e-05, "loss": 0.1869, "step": 8899 }, { "epoch": 0.1587414832518817, "grad_norm": 0.2743731141090393, "learning_rate": 4.9476089705813306e-05, "loss": 0.1975, "step": 8900 }, { "epoch": 0.15875931937359541, "grad_norm": 0.33588162064552307, "learning_rate": 4.9475772674344005e-05, "loss": 0.2459, "step": 8901 }, { "epoch": 0.1587771554953091, "grad_norm": 0.33835238218307495, "learning_rate": 4.9475455547998165e-05, "loss": 0.2301, "step": 8902 }, { "epoch": 0.1587949916170228, "grad_norm": 0.22800800204277039, "learning_rate": 4.947513832677699e-05, "loss": 0.1953, "step": 8903 }, { "epoch": 0.15881282773873648, "grad_norm": 0.25396639108657837, "learning_rate": 4.9474821010681736e-05, "loss": 0.2107, "step": 8904 }, { "epoch": 0.1588306638604502, "grad_norm": 0.24167804419994354, "learning_rate": 4.947450359971362e-05, "loss": 0.1658, "step": 8905 }, { "epoch": 0.15884849998216388, "grad_norm": 0.29995760321617126, "learning_rate": 4.947418609387387e-05, "loss": 0.241, "step": 8906 }, { "epoch": 0.15886633610387757, "grad_norm": 0.3503248393535614, "learning_rate": 4.947386849316373e-05, "loss": 0.1706, "step": 8907 }, { "epoch": 0.15888417222559126, "grad_norm": 0.29240545630455017, "learning_rate": 4.947355079758442e-05, "loss": 0.2675, "step": 8908 }, { "epoch": 0.15890200834730497, "grad_norm": 0.2589358687400818, "learning_rate": 4.947323300713718e-05, "loss": 0.1867, "step": 8909 }, { "epoch": 0.15891984446901866, "grad_norm": 0.2756797969341278, "learning_rate": 4.9472915121823226e-05, "loss": 0.1965, "step": 8910 }, { "epoch": 0.15893768059073235, "grad_norm": 0.4475850462913513, "learning_rate": 4.947259714164381e-05, "loss": 0.2285, "step": 8911 }, { "epoch": 0.15895551671244604, "grad_norm": 0.27526500821113586, "learning_rate": 4.947227906660015e-05, "loss": 0.2318, "step": 8912 }, { "epoch": 0.15897335283415975, "grad_norm": 0.2726994752883911, "learning_rate": 4.947196089669348e-05, "loss": 0.2049, "step": 8913 }, { "epoch": 0.15899118895587344, "grad_norm": 0.2614360451698303, "learning_rate": 4.9471642631925045e-05, "loss": 0.1859, "step": 8914 }, { "epoch": 0.15900902507758713, "grad_norm": 0.4405480623245239, "learning_rate": 4.947132427229606e-05, "loss": 0.2847, "step": 8915 }, { "epoch": 0.15902686119930082, "grad_norm": 0.41811949014663696, "learning_rate": 4.947100581780778e-05, "loss": 0.2068, "step": 8916 }, { "epoch": 0.1590446973210145, "grad_norm": 0.2581080198287964, "learning_rate": 4.9470687268461426e-05, "loss": 0.1841, "step": 8917 }, { "epoch": 0.15906253344272822, "grad_norm": 0.255830317735672, "learning_rate": 4.9470368624258226e-05, "loss": 0.2252, "step": 8918 }, { "epoch": 0.1590803695644419, "grad_norm": 0.2948690354824066, "learning_rate": 4.9470049885199445e-05, "loss": 0.2325, "step": 8919 }, { "epoch": 0.1590982056861556, "grad_norm": 0.3190224766731262, "learning_rate": 4.946973105128628e-05, "loss": 0.1824, "step": 8920 }, { "epoch": 0.15911604180786929, "grad_norm": 0.2818789780139923, "learning_rate": 4.946941212252e-05, "loss": 0.2352, "step": 8921 }, { "epoch": 0.159133877929583, "grad_norm": 0.3696920871734619, "learning_rate": 4.946909309890182e-05, "loss": 0.2749, "step": 8922 }, { "epoch": 0.1591517140512967, "grad_norm": 0.2734166979789734, "learning_rate": 4.946877398043299e-05, "loss": 0.1973, "step": 8923 }, { "epoch": 0.15916955017301038, "grad_norm": 0.32807162404060364, "learning_rate": 4.9468454767114735e-05, "loss": 0.2532, "step": 8924 }, { "epoch": 0.15918738629472406, "grad_norm": 0.28743776679039, "learning_rate": 4.946813545894829e-05, "loss": 0.2206, "step": 8925 }, { "epoch": 0.15920522241643778, "grad_norm": 0.2819172143936157, "learning_rate": 4.9467816055934916e-05, "loss": 0.2572, "step": 8926 }, { "epoch": 0.15922305853815147, "grad_norm": 0.3044775426387787, "learning_rate": 4.946749655807583e-05, "loss": 0.2425, "step": 8927 }, { "epoch": 0.15924089465986516, "grad_norm": 0.292955219745636, "learning_rate": 4.946717696537228e-05, "loss": 0.2514, "step": 8928 }, { "epoch": 0.15925873078157884, "grad_norm": 0.2505530118942261, "learning_rate": 4.94668572778255e-05, "loss": 0.2392, "step": 8929 }, { "epoch": 0.15927656690329256, "grad_norm": 0.22183232009410858, "learning_rate": 4.9466537495436726e-05, "loss": 0.1846, "step": 8930 }, { "epoch": 0.15929440302500625, "grad_norm": 0.26471900939941406, "learning_rate": 4.946621761820721e-05, "loss": 0.217, "step": 8931 }, { "epoch": 0.15931223914671994, "grad_norm": 0.3160739243030548, "learning_rate": 4.946589764613818e-05, "loss": 0.1824, "step": 8932 }, { "epoch": 0.15933007526843362, "grad_norm": 0.2557903826236725, "learning_rate": 4.9465577579230884e-05, "loss": 0.1999, "step": 8933 }, { "epoch": 0.15934791139014734, "grad_norm": 0.24554532766342163, "learning_rate": 4.946525741748655e-05, "loss": 0.2405, "step": 8934 }, { "epoch": 0.15936574751186103, "grad_norm": 0.29524701833724976, "learning_rate": 4.9464937160906433e-05, "loss": 0.2606, "step": 8935 }, { "epoch": 0.15938358363357472, "grad_norm": 0.23595523834228516, "learning_rate": 4.9464616809491774e-05, "loss": 0.2311, "step": 8936 }, { "epoch": 0.1594014197552884, "grad_norm": 0.268909752368927, "learning_rate": 4.946429636324381e-05, "loss": 0.1478, "step": 8937 }, { "epoch": 0.1594192558770021, "grad_norm": 0.26586514711380005, "learning_rate": 4.946397582216378e-05, "loss": 0.2411, "step": 8938 }, { "epoch": 0.1594370919987158, "grad_norm": 0.2992939352989197, "learning_rate": 4.9463655186252925e-05, "loss": 0.1826, "step": 8939 }, { "epoch": 0.1594549281204295, "grad_norm": 0.3257649540901184, "learning_rate": 4.94633344555125e-05, "loss": 0.2712, "step": 8940 }, { "epoch": 0.15947276424214318, "grad_norm": 0.3885219693183899, "learning_rate": 4.946301362994374e-05, "loss": 0.2507, "step": 8941 }, { "epoch": 0.15949060036385687, "grad_norm": 0.24294568598270416, "learning_rate": 4.946269270954789e-05, "loss": 0.1777, "step": 8942 }, { "epoch": 0.1595084364855706, "grad_norm": 0.33604273200035095, "learning_rate": 4.946237169432619e-05, "loss": 0.2024, "step": 8943 }, { "epoch": 0.15952627260728428, "grad_norm": 0.34399721026420593, "learning_rate": 4.94620505842799e-05, "loss": 0.2638, "step": 8944 }, { "epoch": 0.15954410872899796, "grad_norm": 0.2487800121307373, "learning_rate": 4.9461729379410235e-05, "loss": 0.2206, "step": 8945 }, { "epoch": 0.15956194485071165, "grad_norm": 0.29507145285606384, "learning_rate": 4.9461408079718474e-05, "loss": 0.2007, "step": 8946 }, { "epoch": 0.15957978097242537, "grad_norm": 0.25678563117980957, "learning_rate": 4.946108668520584e-05, "loss": 0.2522, "step": 8947 }, { "epoch": 0.15959761709413905, "grad_norm": 0.42071107029914856, "learning_rate": 4.946076519587359e-05, "loss": 0.2379, "step": 8948 }, { "epoch": 0.15961545321585274, "grad_norm": 0.24987351894378662, "learning_rate": 4.946044361172296e-05, "loss": 0.2132, "step": 8949 }, { "epoch": 0.15963328933756643, "grad_norm": 0.28699952363967896, "learning_rate": 4.94601219327552e-05, "loss": 0.1839, "step": 8950 }, { "epoch": 0.15965112545928015, "grad_norm": 0.2963704764842987, "learning_rate": 4.945980015897157e-05, "loss": 0.2226, "step": 8951 }, { "epoch": 0.15966896158099383, "grad_norm": 0.2969897985458374, "learning_rate": 4.94594782903733e-05, "loss": 0.2807, "step": 8952 }, { "epoch": 0.15968679770270752, "grad_norm": 0.29011672735214233, "learning_rate": 4.9459156326961645e-05, "loss": 0.2143, "step": 8953 }, { "epoch": 0.1597046338244212, "grad_norm": 0.26859965920448303, "learning_rate": 4.945883426873784e-05, "loss": 0.2292, "step": 8954 }, { "epoch": 0.15972246994613493, "grad_norm": 0.25487181544303894, "learning_rate": 4.945851211570316e-05, "loss": 0.2219, "step": 8955 }, { "epoch": 0.15974030606784861, "grad_norm": 0.31263524293899536, "learning_rate": 4.945818986785884e-05, "loss": 0.1598, "step": 8956 }, { "epoch": 0.1597581421895623, "grad_norm": 0.47453898191452026, "learning_rate": 4.945786752520612e-05, "loss": 0.2524, "step": 8957 }, { "epoch": 0.159775978311276, "grad_norm": 0.3323827385902405, "learning_rate": 4.945754508774626e-05, "loss": 0.2851, "step": 8958 }, { "epoch": 0.15979381443298968, "grad_norm": 0.3283613920211792, "learning_rate": 4.945722255548051e-05, "loss": 0.2461, "step": 8959 }, { "epoch": 0.1598116505547034, "grad_norm": 0.29481279850006104, "learning_rate": 4.945689992841012e-05, "loss": 0.2443, "step": 8960 }, { "epoch": 0.15982948667641708, "grad_norm": 0.26263076066970825, "learning_rate": 4.9456577206536333e-05, "loss": 0.2342, "step": 8961 }, { "epoch": 0.15984732279813077, "grad_norm": 0.2603324353694916, "learning_rate": 4.945625438986041e-05, "loss": 0.2133, "step": 8962 }, { "epoch": 0.15986515891984446, "grad_norm": 0.2865920960903168, "learning_rate": 4.94559314783836e-05, "loss": 0.2091, "step": 8963 }, { "epoch": 0.15988299504155817, "grad_norm": 0.40341323614120483, "learning_rate": 4.945560847210715e-05, "loss": 0.2199, "step": 8964 }, { "epoch": 0.15990083116327186, "grad_norm": 0.3902820944786072, "learning_rate": 4.945528537103232e-05, "loss": 0.1633, "step": 8965 }, { "epoch": 0.15991866728498555, "grad_norm": 0.353304922580719, "learning_rate": 4.9454962175160345e-05, "loss": 0.27, "step": 8966 }, { "epoch": 0.15993650340669924, "grad_norm": 0.3476138114929199, "learning_rate": 4.94546388844925e-05, "loss": 0.2563, "step": 8967 }, { "epoch": 0.15995433952841295, "grad_norm": 0.2467448115348816, "learning_rate": 4.945431549903003e-05, "loss": 0.2318, "step": 8968 }, { "epoch": 0.15997217565012664, "grad_norm": 0.22938476502895355, "learning_rate": 4.945399201877418e-05, "loss": 0.1865, "step": 8969 }, { "epoch": 0.15999001177184033, "grad_norm": 0.2441449910402298, "learning_rate": 4.945366844372622e-05, "loss": 0.2405, "step": 8970 }, { "epoch": 0.16000784789355402, "grad_norm": 0.22523269057273865, "learning_rate": 4.945334477388739e-05, "loss": 0.1787, "step": 8971 }, { "epoch": 0.16002568401526773, "grad_norm": 0.26215478777885437, "learning_rate": 4.9453021009258944e-05, "loss": 0.2547, "step": 8972 }, { "epoch": 0.16004352013698142, "grad_norm": 0.2577087879180908, "learning_rate": 4.945269714984215e-05, "loss": 0.1961, "step": 8973 }, { "epoch": 0.1600613562586951, "grad_norm": 0.310244619846344, "learning_rate": 4.9452373195638245e-05, "loss": 0.2473, "step": 8974 }, { "epoch": 0.1600791923804088, "grad_norm": 0.3036620616912842, "learning_rate": 4.945204914664851e-05, "loss": 0.2243, "step": 8975 }, { "epoch": 0.16009702850212248, "grad_norm": 0.3738292455673218, "learning_rate": 4.945172500287418e-05, "loss": 0.1934, "step": 8976 }, { "epoch": 0.1601148646238362, "grad_norm": 0.3114437758922577, "learning_rate": 4.9451400764316526e-05, "loss": 0.2377, "step": 8977 }, { "epoch": 0.1601327007455499, "grad_norm": 0.2868841886520386, "learning_rate": 4.945107643097679e-05, "loss": 0.2644, "step": 8978 }, { "epoch": 0.16015053686726358, "grad_norm": 0.3465786874294281, "learning_rate": 4.9450752002856235e-05, "loss": 0.2069, "step": 8979 }, { "epoch": 0.16016837298897726, "grad_norm": 0.26838719844818115, "learning_rate": 4.9450427479956126e-05, "loss": 0.1961, "step": 8980 }, { "epoch": 0.16018620911069098, "grad_norm": 0.2795334756374359, "learning_rate": 4.9450102862277706e-05, "loss": 0.2156, "step": 8981 }, { "epoch": 0.16020404523240467, "grad_norm": 0.30109819769859314, "learning_rate": 4.9449778149822255e-05, "loss": 0.245, "step": 8982 }, { "epoch": 0.16022188135411836, "grad_norm": 0.33533725142478943, "learning_rate": 4.944945334259101e-05, "loss": 0.2278, "step": 8983 }, { "epoch": 0.16023971747583204, "grad_norm": 0.44862592220306396, "learning_rate": 4.944912844058525e-05, "loss": 0.1847, "step": 8984 }, { "epoch": 0.16025755359754576, "grad_norm": 0.19114886224269867, "learning_rate": 4.9448803443806214e-05, "loss": 0.1796, "step": 8985 }, { "epoch": 0.16027538971925945, "grad_norm": 0.3102222979068756, "learning_rate": 4.944847835225517e-05, "loss": 0.2716, "step": 8986 }, { "epoch": 0.16029322584097314, "grad_norm": 0.21888549625873566, "learning_rate": 4.9448153165933385e-05, "loss": 0.2119, "step": 8987 }, { "epoch": 0.16031106196268682, "grad_norm": 0.26361963152885437, "learning_rate": 4.944782788484211e-05, "loss": 0.2145, "step": 8988 }, { "epoch": 0.16032889808440054, "grad_norm": 0.32678624987602234, "learning_rate": 4.9447502508982616e-05, "loss": 0.2564, "step": 8989 }, { "epoch": 0.16034673420611423, "grad_norm": 0.25996658205986023, "learning_rate": 4.944717703835615e-05, "loss": 0.1867, "step": 8990 }, { "epoch": 0.16036457032782792, "grad_norm": 0.23481029272079468, "learning_rate": 4.944685147296399e-05, "loss": 0.198, "step": 8991 }, { "epoch": 0.1603824064495416, "grad_norm": 0.2839064300060272, "learning_rate": 4.9446525812807385e-05, "loss": 0.2736, "step": 8992 }, { "epoch": 0.16040024257125532, "grad_norm": 0.2369505614042282, "learning_rate": 4.94462000578876e-05, "loss": 0.2307, "step": 8993 }, { "epoch": 0.160418078692969, "grad_norm": 0.31546878814697266, "learning_rate": 4.944587420820591e-05, "loss": 0.2291, "step": 8994 }, { "epoch": 0.1604359148146827, "grad_norm": 0.34085386991500854, "learning_rate": 4.9445548263763564e-05, "loss": 0.2389, "step": 8995 }, { "epoch": 0.16045375093639638, "grad_norm": 0.285194993019104, "learning_rate": 4.944522222456183e-05, "loss": 0.2312, "step": 8996 }, { "epoch": 0.16047158705811007, "grad_norm": 0.2984153628349304, "learning_rate": 4.9444896090601965e-05, "loss": 0.2711, "step": 8997 }, { "epoch": 0.1604894231798238, "grad_norm": 0.19079795479774475, "learning_rate": 4.944456986188525e-05, "loss": 0.208, "step": 8998 }, { "epoch": 0.16050725930153747, "grad_norm": 0.23399192094802856, "learning_rate": 4.944424353841293e-05, "loss": 0.1868, "step": 8999 }, { "epoch": 0.16052509542325116, "grad_norm": 0.3381305932998657, "learning_rate": 4.944391712018629e-05, "loss": 0.2717, "step": 9000 }, { "epoch": 0.16052509542325116, "eval_loss": 0.2077334076166153, "eval_runtime": 107.6389, "eval_samples_per_second": 9.513, "eval_steps_per_second": 1.589, "step": 9000 }, { "epoch": 0.16054293154496485, "grad_norm": 0.261714905500412, "learning_rate": 4.944359060720657e-05, "loss": 0.2021, "step": 9001 }, { "epoch": 0.16056076766667857, "grad_norm": 0.30212825536727905, "learning_rate": 4.9443263999475066e-05, "loss": 0.2212, "step": 9002 }, { "epoch": 0.16057860378839225, "grad_norm": 0.28419339656829834, "learning_rate": 4.9442937296993016e-05, "loss": 0.213, "step": 9003 }, { "epoch": 0.16059643991010594, "grad_norm": 0.32400792837142944, "learning_rate": 4.94426104997617e-05, "loss": 0.2554, "step": 9004 }, { "epoch": 0.16061427603181963, "grad_norm": 0.28723564743995667, "learning_rate": 4.944228360778239e-05, "loss": 0.235, "step": 9005 }, { "epoch": 0.16063211215353335, "grad_norm": 0.2636263966560364, "learning_rate": 4.944195662105634e-05, "loss": 0.2128, "step": 9006 }, { "epoch": 0.16064994827524703, "grad_norm": 0.26810839772224426, "learning_rate": 4.944162953958483e-05, "loss": 0.195, "step": 9007 }, { "epoch": 0.16066778439696072, "grad_norm": 0.18495428562164307, "learning_rate": 4.944130236336913e-05, "loss": 0.1662, "step": 9008 }, { "epoch": 0.1606856205186744, "grad_norm": 0.2508189380168915, "learning_rate": 4.944097509241048e-05, "loss": 0.1853, "step": 9009 }, { "epoch": 0.16070345664038813, "grad_norm": 0.30176711082458496, "learning_rate": 4.944064772671017e-05, "loss": 0.1929, "step": 9010 }, { "epoch": 0.16072129276210181, "grad_norm": 0.319826602935791, "learning_rate": 4.9440320266269486e-05, "loss": 0.2475, "step": 9011 }, { "epoch": 0.1607391288838155, "grad_norm": 0.28028926253318787, "learning_rate": 4.943999271108967e-05, "loss": 0.2455, "step": 9012 }, { "epoch": 0.1607569650055292, "grad_norm": 0.252003014087677, "learning_rate": 4.9439665061172006e-05, "loss": 0.1783, "step": 9013 }, { "epoch": 0.1607748011272429, "grad_norm": 0.2615832984447479, "learning_rate": 4.943933731651775e-05, "loss": 0.1914, "step": 9014 }, { "epoch": 0.1607926372489566, "grad_norm": 0.2725923955440521, "learning_rate": 4.943900947712819e-05, "loss": 0.2566, "step": 9015 }, { "epoch": 0.16081047337067028, "grad_norm": 0.2708735167980194, "learning_rate": 4.943868154300458e-05, "loss": 0.2271, "step": 9016 }, { "epoch": 0.16082830949238397, "grad_norm": 0.2694780230522156, "learning_rate": 4.94383535141482e-05, "loss": 0.2171, "step": 9017 }, { "epoch": 0.16084614561409766, "grad_norm": 0.3065152168273926, "learning_rate": 4.9438025390560324e-05, "loss": 0.2566, "step": 9018 }, { "epoch": 0.16086398173581137, "grad_norm": 0.27773818373680115, "learning_rate": 4.9437697172242227e-05, "loss": 0.2278, "step": 9019 }, { "epoch": 0.16088181785752506, "grad_norm": 0.2347675859928131, "learning_rate": 4.943736885919516e-05, "loss": 0.1888, "step": 9020 }, { "epoch": 0.16089965397923875, "grad_norm": 0.2529999911785126, "learning_rate": 4.9437040451420426e-05, "loss": 0.1808, "step": 9021 }, { "epoch": 0.16091749010095244, "grad_norm": 0.2778419554233551, "learning_rate": 4.943671194891928e-05, "loss": 0.2236, "step": 9022 }, { "epoch": 0.16093532622266615, "grad_norm": 0.22367282211780548, "learning_rate": 4.943638335169299e-05, "loss": 0.18, "step": 9023 }, { "epoch": 0.16095316234437984, "grad_norm": 0.3940797746181488, "learning_rate": 4.943605465974285e-05, "loss": 0.2755, "step": 9024 }, { "epoch": 0.16097099846609353, "grad_norm": 0.2517111599445343, "learning_rate": 4.943572587307012e-05, "loss": 0.1823, "step": 9025 }, { "epoch": 0.16098883458780722, "grad_norm": 0.30851662158966064, "learning_rate": 4.943539699167606e-05, "loss": 0.2598, "step": 9026 }, { "epoch": 0.16100667070952093, "grad_norm": 0.27914607524871826, "learning_rate": 4.9435068015561984e-05, "loss": 0.2495, "step": 9027 }, { "epoch": 0.16102450683123462, "grad_norm": 0.3053116500377655, "learning_rate": 4.943473894472913e-05, "loss": 0.2166, "step": 9028 }, { "epoch": 0.1610423429529483, "grad_norm": 0.42158517241477966, "learning_rate": 4.943440977917879e-05, "loss": 0.2609, "step": 9029 }, { "epoch": 0.161060179074662, "grad_norm": 0.26858243346214294, "learning_rate": 4.943408051891224e-05, "loss": 0.2538, "step": 9030 }, { "epoch": 0.1610780151963757, "grad_norm": 0.22635705769062042, "learning_rate": 4.9433751163930766e-05, "loss": 0.2091, "step": 9031 }, { "epoch": 0.1610958513180894, "grad_norm": 0.21643264591693878, "learning_rate": 4.9433421714235614e-05, "loss": 0.1997, "step": 9032 }, { "epoch": 0.1611136874398031, "grad_norm": 0.32239702343940735, "learning_rate": 4.943309216982809e-05, "loss": 0.2162, "step": 9033 }, { "epoch": 0.16113152356151678, "grad_norm": 0.21508969366550446, "learning_rate": 4.9432762530709464e-05, "loss": 0.1844, "step": 9034 }, { "epoch": 0.1611493596832305, "grad_norm": 0.2988855242729187, "learning_rate": 4.943243279688101e-05, "loss": 0.1863, "step": 9035 }, { "epoch": 0.16116719580494418, "grad_norm": 0.35321035981178284, "learning_rate": 4.9432102968344006e-05, "loss": 0.3203, "step": 9036 }, { "epoch": 0.16118503192665787, "grad_norm": 0.29631587862968445, "learning_rate": 4.943177304509974e-05, "loss": 0.2274, "step": 9037 }, { "epoch": 0.16120286804837156, "grad_norm": 0.2676721215248108, "learning_rate": 4.943144302714947e-05, "loss": 0.2211, "step": 9038 }, { "epoch": 0.16122070417008524, "grad_norm": 0.2809307277202606, "learning_rate": 4.94311129144945e-05, "loss": 0.1732, "step": 9039 }, { "epoch": 0.16123854029179896, "grad_norm": 0.3126406669616699, "learning_rate": 4.943078270713609e-05, "loss": 0.2488, "step": 9040 }, { "epoch": 0.16125637641351265, "grad_norm": 0.25174424052238464, "learning_rate": 4.943045240507553e-05, "loss": 0.1848, "step": 9041 }, { "epoch": 0.16127421253522634, "grad_norm": 0.28735512495040894, "learning_rate": 4.94301220083141e-05, "loss": 0.219, "step": 9042 }, { "epoch": 0.16129204865694002, "grad_norm": 0.2653440833091736, "learning_rate": 4.942979151685309e-05, "loss": 0.2425, "step": 9043 }, { "epoch": 0.16130988477865374, "grad_norm": 0.2919001877307892, "learning_rate": 4.942946093069375e-05, "loss": 0.2239, "step": 9044 }, { "epoch": 0.16132772090036743, "grad_norm": 0.3311496376991272, "learning_rate": 4.9429130249837395e-05, "loss": 0.2193, "step": 9045 }, { "epoch": 0.16134555702208112, "grad_norm": 0.3010842502117157, "learning_rate": 4.9428799474285285e-05, "loss": 0.2165, "step": 9046 }, { "epoch": 0.1613633931437948, "grad_norm": 0.29573285579681396, "learning_rate": 4.942846860403872e-05, "loss": 0.2065, "step": 9047 }, { "epoch": 0.16138122926550852, "grad_norm": 0.36145737767219543, "learning_rate": 4.942813763909897e-05, "loss": 0.2555, "step": 9048 }, { "epoch": 0.1613990653872222, "grad_norm": 0.31323644518852234, "learning_rate": 4.942780657946732e-05, "loss": 0.1722, "step": 9049 }, { "epoch": 0.1614169015089359, "grad_norm": 0.19427388906478882, "learning_rate": 4.942747542514505e-05, "loss": 0.1989, "step": 9050 }, { "epoch": 0.16143473763064958, "grad_norm": 0.2875831127166748, "learning_rate": 4.9427144176133454e-05, "loss": 0.2038, "step": 9051 }, { "epoch": 0.1614525737523633, "grad_norm": 0.25899288058280945, "learning_rate": 4.942681283243381e-05, "loss": 0.2161, "step": 9052 }, { "epoch": 0.161470409874077, "grad_norm": 0.2957363426685333, "learning_rate": 4.9426481394047404e-05, "loss": 0.2435, "step": 9053 }, { "epoch": 0.16148824599579067, "grad_norm": 0.38003870844841003, "learning_rate": 4.9426149860975514e-05, "loss": 0.2859, "step": 9054 }, { "epoch": 0.16150608211750436, "grad_norm": 0.2551209330558777, "learning_rate": 4.9425818233219436e-05, "loss": 0.2494, "step": 9055 }, { "epoch": 0.16152391823921805, "grad_norm": 0.2630995512008667, "learning_rate": 4.942548651078045e-05, "loss": 0.2529, "step": 9056 }, { "epoch": 0.16154175436093177, "grad_norm": 0.30915892124176025, "learning_rate": 4.9425154693659834e-05, "loss": 0.245, "step": 9057 }, { "epoch": 0.16155959048264545, "grad_norm": 0.40903642773628235, "learning_rate": 4.942482278185889e-05, "loss": 0.2441, "step": 9058 }, { "epoch": 0.16157742660435914, "grad_norm": 0.308986634016037, "learning_rate": 4.942449077537889e-05, "loss": 0.2346, "step": 9059 }, { "epoch": 0.16159526272607283, "grad_norm": 0.2930994927883148, "learning_rate": 4.942415867422114e-05, "loss": 0.2378, "step": 9060 }, { "epoch": 0.16161309884778655, "grad_norm": 0.30246230959892273, "learning_rate": 4.942382647838691e-05, "loss": 0.2361, "step": 9061 }, { "epoch": 0.16163093496950023, "grad_norm": 0.25455141067504883, "learning_rate": 4.9423494187877494e-05, "loss": 0.1872, "step": 9062 }, { "epoch": 0.16164877109121392, "grad_norm": 0.2597738206386566, "learning_rate": 4.942316180269417e-05, "loss": 0.2298, "step": 9063 }, { "epoch": 0.1616666072129276, "grad_norm": 0.34653058648109436, "learning_rate": 4.942282932283825e-05, "loss": 0.3185, "step": 9064 }, { "epoch": 0.16168444333464133, "grad_norm": 0.27348095178604126, "learning_rate": 4.9422496748311e-05, "loss": 0.1916, "step": 9065 }, { "epoch": 0.161702279456355, "grad_norm": 0.23623839020729065, "learning_rate": 4.942216407911371e-05, "loss": 0.2325, "step": 9066 }, { "epoch": 0.1617201155780687, "grad_norm": 0.21392683684825897, "learning_rate": 4.9421831315247685e-05, "loss": 0.2109, "step": 9067 }, { "epoch": 0.1617379516997824, "grad_norm": 0.3376295864582062, "learning_rate": 4.942149845671421e-05, "loss": 0.2582, "step": 9068 }, { "epoch": 0.1617557878214961, "grad_norm": 0.2988283038139343, "learning_rate": 4.9421165503514566e-05, "loss": 0.2613, "step": 9069 }, { "epoch": 0.1617736239432098, "grad_norm": 0.23068830370903015, "learning_rate": 4.942083245565005e-05, "loss": 0.2025, "step": 9070 }, { "epoch": 0.16179146006492348, "grad_norm": 0.19631221890449524, "learning_rate": 4.9420499313121954e-05, "loss": 0.1988, "step": 9071 }, { "epoch": 0.16180929618663717, "grad_norm": 0.24286353588104248, "learning_rate": 4.9420166075931576e-05, "loss": 0.1754, "step": 9072 }, { "epoch": 0.16182713230835089, "grad_norm": 0.34894347190856934, "learning_rate": 4.9419832744080184e-05, "loss": 0.2563, "step": 9073 }, { "epoch": 0.16184496843006457, "grad_norm": 0.2975502014160156, "learning_rate": 4.94194993175691e-05, "loss": 0.2278, "step": 9074 }, { "epoch": 0.16186280455177826, "grad_norm": 0.23034453392028809, "learning_rate": 4.941916579639959e-05, "loss": 0.1912, "step": 9075 }, { "epoch": 0.16188064067349195, "grad_norm": 0.25986409187316895, "learning_rate": 4.9418832180572973e-05, "loss": 0.2014, "step": 9076 }, { "epoch": 0.16189847679520564, "grad_norm": 0.2645118236541748, "learning_rate": 4.9418498470090515e-05, "loss": 0.2078, "step": 9077 }, { "epoch": 0.16191631291691935, "grad_norm": 0.4214165210723877, "learning_rate": 4.9418164664953534e-05, "loss": 0.2373, "step": 9078 }, { "epoch": 0.16193414903863304, "grad_norm": 0.24069081246852875, "learning_rate": 4.9417830765163305e-05, "loss": 0.222, "step": 9079 }, { "epoch": 0.16195198516034673, "grad_norm": 0.2954716086387634, "learning_rate": 4.9417496770721135e-05, "loss": 0.2126, "step": 9080 }, { "epoch": 0.16196982128206042, "grad_norm": 0.4046162962913513, "learning_rate": 4.941716268162831e-05, "loss": 0.3247, "step": 9081 }, { "epoch": 0.16198765740377413, "grad_norm": 0.26268166303634644, "learning_rate": 4.941682849788614e-05, "loss": 0.2403, "step": 9082 }, { "epoch": 0.16200549352548782, "grad_norm": 0.2406916320323944, "learning_rate": 4.941649421949589e-05, "loss": 0.2215, "step": 9083 }, { "epoch": 0.1620233296472015, "grad_norm": 0.2639237940311432, "learning_rate": 4.941615984645889e-05, "loss": 0.1992, "step": 9084 }, { "epoch": 0.1620411657689152, "grad_norm": 0.317731648683548, "learning_rate": 4.9415825378776414e-05, "loss": 0.2707, "step": 9085 }, { "epoch": 0.1620590018906289, "grad_norm": 0.2969341576099396, "learning_rate": 4.941549081644977e-05, "loss": 0.1779, "step": 9086 }, { "epoch": 0.1620768380123426, "grad_norm": 0.3448761999607086, "learning_rate": 4.941515615948025e-05, "loss": 0.1679, "step": 9087 }, { "epoch": 0.1620946741340563, "grad_norm": 0.29252657294273376, "learning_rate": 4.941482140786916e-05, "loss": 0.194, "step": 9088 }, { "epoch": 0.16211251025576998, "grad_norm": 0.227921724319458, "learning_rate": 4.941448656161778e-05, "loss": 0.2128, "step": 9089 }, { "epoch": 0.1621303463774837, "grad_norm": 0.2951878309249878, "learning_rate": 4.941415162072742e-05, "loss": 0.2238, "step": 9090 }, { "epoch": 0.16214818249919738, "grad_norm": 0.35384219884872437, "learning_rate": 4.941381658519937e-05, "loss": 0.2157, "step": 9091 }, { "epoch": 0.16216601862091107, "grad_norm": 0.25139087438583374, "learning_rate": 4.941348145503494e-05, "loss": 0.1804, "step": 9092 }, { "epoch": 0.16218385474262476, "grad_norm": 0.34730130434036255, "learning_rate": 4.941314623023543e-05, "loss": 0.2003, "step": 9093 }, { "epoch": 0.16220169086433847, "grad_norm": 0.35382080078125, "learning_rate": 4.9412810910802124e-05, "loss": 0.2623, "step": 9094 }, { "epoch": 0.16221952698605216, "grad_norm": 0.31246358156204224, "learning_rate": 4.941247549673633e-05, "loss": 0.1946, "step": 9095 }, { "epoch": 0.16223736310776585, "grad_norm": 0.27841705083847046, "learning_rate": 4.9412139988039356e-05, "loss": 0.2081, "step": 9096 }, { "epoch": 0.16225519922947954, "grad_norm": 0.28796371817588806, "learning_rate": 4.941180438471249e-05, "loss": 0.2546, "step": 9097 }, { "epoch": 0.16227303535119322, "grad_norm": 0.2595517933368683, "learning_rate": 4.9411468686757046e-05, "loss": 0.2342, "step": 9098 }, { "epoch": 0.16229087147290694, "grad_norm": 0.28407278656959534, "learning_rate": 4.941113289417431e-05, "loss": 0.2107, "step": 9099 }, { "epoch": 0.16230870759462063, "grad_norm": 0.2532673478126526, "learning_rate": 4.9410797006965596e-05, "loss": 0.2051, "step": 9100 }, { "epoch": 0.16232654371633432, "grad_norm": 0.2700106203556061, "learning_rate": 4.9410461025132203e-05, "loss": 0.1779, "step": 9101 }, { "epoch": 0.162344379838048, "grad_norm": 0.26635727286338806, "learning_rate": 4.9410124948675426e-05, "loss": 0.2286, "step": 9102 }, { "epoch": 0.16236221595976172, "grad_norm": 0.30911654233932495, "learning_rate": 4.940978877759658e-05, "loss": 0.2352, "step": 9103 }, { "epoch": 0.1623800520814754, "grad_norm": 0.23911771178245544, "learning_rate": 4.940945251189696e-05, "loss": 0.188, "step": 9104 }, { "epoch": 0.1623978882031891, "grad_norm": 0.31234505772590637, "learning_rate": 4.940911615157787e-05, "loss": 0.2039, "step": 9105 }, { "epoch": 0.16241572432490278, "grad_norm": 0.3038794994354248, "learning_rate": 4.940877969664062e-05, "loss": 0.1957, "step": 9106 }, { "epoch": 0.1624335604466165, "grad_norm": 0.4138329029083252, "learning_rate": 4.94084431470865e-05, "loss": 0.2636, "step": 9107 }, { "epoch": 0.1624513965683302, "grad_norm": 0.29021209478378296, "learning_rate": 4.940810650291683e-05, "loss": 0.2374, "step": 9108 }, { "epoch": 0.16246923269004387, "grad_norm": 0.23764866590499878, "learning_rate": 4.9407769764132904e-05, "loss": 0.2121, "step": 9109 }, { "epoch": 0.16248706881175756, "grad_norm": 0.3777204155921936, "learning_rate": 4.940743293073604e-05, "loss": 0.2393, "step": 9110 }, { "epoch": 0.16250490493347128, "grad_norm": 0.22937625646591187, "learning_rate": 4.940709600272753e-05, "loss": 0.1655, "step": 9111 }, { "epoch": 0.16252274105518497, "grad_norm": 0.290792852640152, "learning_rate": 4.940675898010869e-05, "loss": 0.2217, "step": 9112 }, { "epoch": 0.16254057717689865, "grad_norm": 0.42406579852104187, "learning_rate": 4.940642186288082e-05, "loss": 0.2039, "step": 9113 }, { "epoch": 0.16255841329861234, "grad_norm": 0.38261035084724426, "learning_rate": 4.940608465104523e-05, "loss": 0.2746, "step": 9114 }, { "epoch": 0.16257624942032606, "grad_norm": 0.27044060826301575, "learning_rate": 4.940574734460323e-05, "loss": 0.2171, "step": 9115 }, { "epoch": 0.16259408554203975, "grad_norm": 0.27975448966026306, "learning_rate": 4.940540994355612e-05, "loss": 0.2406, "step": 9116 }, { "epoch": 0.16261192166375343, "grad_norm": 0.2940988838672638, "learning_rate": 4.9405072447905204e-05, "loss": 0.2433, "step": 9117 }, { "epoch": 0.16262975778546712, "grad_norm": 0.2927718758583069, "learning_rate": 4.9404734857651804e-05, "loss": 0.2238, "step": 9118 }, { "epoch": 0.1626475939071808, "grad_norm": 0.22832036018371582, "learning_rate": 4.9404397172797224e-05, "loss": 0.219, "step": 9119 }, { "epoch": 0.16266543002889453, "grad_norm": 0.25635266304016113, "learning_rate": 4.940405939334277e-05, "loss": 0.1817, "step": 9120 }, { "epoch": 0.1626832661506082, "grad_norm": 0.27498435974121094, "learning_rate": 4.940372151928976e-05, "loss": 0.1861, "step": 9121 }, { "epoch": 0.1627011022723219, "grad_norm": 0.32306236028671265, "learning_rate": 4.9403383550639485e-05, "loss": 0.2662, "step": 9122 }, { "epoch": 0.1627189383940356, "grad_norm": 0.28174206614494324, "learning_rate": 4.940304548739327e-05, "loss": 0.2218, "step": 9123 }, { "epoch": 0.1627367745157493, "grad_norm": 0.25538182258605957, "learning_rate": 4.940270732955243e-05, "loss": 0.2307, "step": 9124 }, { "epoch": 0.162754610637463, "grad_norm": 0.27320006489753723, "learning_rate": 4.940236907711826e-05, "loss": 0.2023, "step": 9125 }, { "epoch": 0.16277244675917668, "grad_norm": 0.25064462423324585, "learning_rate": 4.940203073009209e-05, "loss": 0.2175, "step": 9126 }, { "epoch": 0.16279028288089037, "grad_norm": 0.23444148898124695, "learning_rate": 4.940169228847521e-05, "loss": 0.2256, "step": 9127 }, { "epoch": 0.16280811900260408, "grad_norm": 0.28661078214645386, "learning_rate": 4.940135375226894e-05, "loss": 0.1922, "step": 9128 }, { "epoch": 0.16282595512431777, "grad_norm": 0.22695735096931458, "learning_rate": 4.9401015121474606e-05, "loss": 0.2143, "step": 9129 }, { "epoch": 0.16284379124603146, "grad_norm": 0.24954082071781158, "learning_rate": 4.94006763960935e-05, "loss": 0.2431, "step": 9130 }, { "epoch": 0.16286162736774515, "grad_norm": 0.27113354206085205, "learning_rate": 4.9400337576126945e-05, "loss": 0.191, "step": 9131 }, { "epoch": 0.16287946348945886, "grad_norm": 0.3145564794540405, "learning_rate": 4.9399998661576265e-05, "loss": 0.2514, "step": 9132 }, { "epoch": 0.16289729961117255, "grad_norm": 0.25162550806999207, "learning_rate": 4.9399659652442756e-05, "loss": 0.1994, "step": 9133 }, { "epoch": 0.16291513573288624, "grad_norm": 0.329497754573822, "learning_rate": 4.939932054872773e-05, "loss": 0.2718, "step": 9134 }, { "epoch": 0.16293297185459993, "grad_norm": 0.26434576511383057, "learning_rate": 4.939898135043251e-05, "loss": 0.2875, "step": 9135 }, { "epoch": 0.16295080797631364, "grad_norm": 0.42806506156921387, "learning_rate": 4.939864205755843e-05, "loss": 0.2814, "step": 9136 }, { "epoch": 0.16296864409802733, "grad_norm": 0.23605118691921234, "learning_rate": 4.9398302670106775e-05, "loss": 0.2328, "step": 9137 }, { "epoch": 0.16298648021974102, "grad_norm": 0.27913331985473633, "learning_rate": 4.939796318807887e-05, "loss": 0.2526, "step": 9138 }, { "epoch": 0.1630043163414547, "grad_norm": 0.2679133713245392, "learning_rate": 4.939762361147604e-05, "loss": 0.2347, "step": 9139 }, { "epoch": 0.1630221524631684, "grad_norm": 0.2884804606437683, "learning_rate": 4.9397283940299585e-05, "loss": 0.2805, "step": 9140 }, { "epoch": 0.1630399885848821, "grad_norm": 0.24972495436668396, "learning_rate": 4.939694417455083e-05, "loss": 0.204, "step": 9141 }, { "epoch": 0.1630578247065958, "grad_norm": 0.2075093686580658, "learning_rate": 4.93966043142311e-05, "loss": 0.2175, "step": 9142 }, { "epoch": 0.1630756608283095, "grad_norm": 0.24308235943317413, "learning_rate": 4.939626435934171e-05, "loss": 0.1705, "step": 9143 }, { "epoch": 0.16309349695002318, "grad_norm": 0.22188474237918854, "learning_rate": 4.9395924309883966e-05, "loss": 0.2176, "step": 9144 }, { "epoch": 0.1631113330717369, "grad_norm": 0.23017635941505432, "learning_rate": 4.9395584165859197e-05, "loss": 0.2217, "step": 9145 }, { "epoch": 0.16312916919345058, "grad_norm": 0.2875329852104187, "learning_rate": 4.939524392726871e-05, "loss": 0.1913, "step": 9146 }, { "epoch": 0.16314700531516427, "grad_norm": 0.24955126643180847, "learning_rate": 4.939490359411384e-05, "loss": 0.217, "step": 9147 }, { "epoch": 0.16316484143687796, "grad_norm": 0.3250877559185028, "learning_rate": 4.939456316639589e-05, "loss": 0.2772, "step": 9148 }, { "epoch": 0.16318267755859167, "grad_norm": 0.22584985196590424, "learning_rate": 4.939422264411619e-05, "loss": 0.1957, "step": 9149 }, { "epoch": 0.16320051368030536, "grad_norm": 0.24119976162910461, "learning_rate": 4.939388202727606e-05, "loss": 0.2039, "step": 9150 }, { "epoch": 0.16321834980201905, "grad_norm": 0.5231024026870728, "learning_rate": 4.939354131587682e-05, "loss": 0.18, "step": 9151 }, { "epoch": 0.16323618592373274, "grad_norm": 0.26688915491104126, "learning_rate": 4.939320050991979e-05, "loss": 0.2165, "step": 9152 }, { "epoch": 0.16325402204544645, "grad_norm": 0.23372162878513336, "learning_rate": 4.9392859609406284e-05, "loss": 0.2203, "step": 9153 }, { "epoch": 0.16327185816716014, "grad_norm": 0.2783568203449249, "learning_rate": 4.939251861433763e-05, "loss": 0.2346, "step": 9154 }, { "epoch": 0.16328969428887383, "grad_norm": 0.27722346782684326, "learning_rate": 4.939217752471515e-05, "loss": 0.2122, "step": 9155 }, { "epoch": 0.16330753041058751, "grad_norm": 0.2658185362815857, "learning_rate": 4.9391836340540166e-05, "loss": 0.2288, "step": 9156 }, { "epoch": 0.1633253665323012, "grad_norm": 0.29888370633125305, "learning_rate": 4.9391495061813994e-05, "loss": 0.1779, "step": 9157 }, { "epoch": 0.16334320265401492, "grad_norm": 0.448290079832077, "learning_rate": 4.939115368853797e-05, "loss": 0.2321, "step": 9158 }, { "epoch": 0.1633610387757286, "grad_norm": 0.16972346603870392, "learning_rate": 4.939081222071341e-05, "loss": 0.1749, "step": 9159 }, { "epoch": 0.1633788748974423, "grad_norm": 0.2823371887207031, "learning_rate": 4.939047065834164e-05, "loss": 0.195, "step": 9160 }, { "epoch": 0.16339671101915598, "grad_norm": 0.3754192590713501, "learning_rate": 4.939012900142397e-05, "loss": 0.2495, "step": 9161 }, { "epoch": 0.1634145471408697, "grad_norm": 0.40936192870140076, "learning_rate": 4.938978724996174e-05, "loss": 0.1623, "step": 9162 }, { "epoch": 0.16343238326258339, "grad_norm": 0.27821943163871765, "learning_rate": 4.9389445403956275e-05, "loss": 0.2047, "step": 9163 }, { "epoch": 0.16345021938429707, "grad_norm": 0.28809860348701477, "learning_rate": 4.938910346340889e-05, "loss": 0.1842, "step": 9164 }, { "epoch": 0.16346805550601076, "grad_norm": 0.26269033551216125, "learning_rate": 4.938876142832092e-05, "loss": 0.1676, "step": 9165 }, { "epoch": 0.16348589162772448, "grad_norm": 0.3284805417060852, "learning_rate": 4.938841929869369e-05, "loss": 0.257, "step": 9166 }, { "epoch": 0.16350372774943817, "grad_norm": 0.27647194266319275, "learning_rate": 4.938807707452852e-05, "loss": 0.2268, "step": 9167 }, { "epoch": 0.16352156387115185, "grad_norm": 0.405900776386261, "learning_rate": 4.9387734755826745e-05, "loss": 0.2674, "step": 9168 }, { "epoch": 0.16353939999286554, "grad_norm": 0.2592333257198334, "learning_rate": 4.938739234258968e-05, "loss": 0.2214, "step": 9169 }, { "epoch": 0.16355723611457926, "grad_norm": 0.22996890544891357, "learning_rate": 4.938704983481866e-05, "loss": 0.2299, "step": 9170 }, { "epoch": 0.16357507223629295, "grad_norm": 0.3244262933731079, "learning_rate": 4.9386707232515015e-05, "loss": 0.2111, "step": 9171 }, { "epoch": 0.16359290835800663, "grad_norm": 0.2678900957107544, "learning_rate": 4.938636453568006e-05, "loss": 0.1915, "step": 9172 }, { "epoch": 0.16361074447972032, "grad_norm": 0.25056418776512146, "learning_rate": 4.938602174431515e-05, "loss": 0.1759, "step": 9173 }, { "epoch": 0.16362858060143404, "grad_norm": 0.32993167638778687, "learning_rate": 4.938567885842158e-05, "loss": 0.2133, "step": 9174 }, { "epoch": 0.16364641672314773, "grad_norm": 0.2732590138912201, "learning_rate": 4.938533587800071e-05, "loss": 0.2263, "step": 9175 }, { "epoch": 0.1636642528448614, "grad_norm": 0.2834323048591614, "learning_rate": 4.9384992803053845e-05, "loss": 0.1817, "step": 9176 }, { "epoch": 0.1636820889665751, "grad_norm": 0.27836138010025024, "learning_rate": 4.9384649633582334e-05, "loss": 0.229, "step": 9177 }, { "epoch": 0.1636999250882888, "grad_norm": 0.261381596326828, "learning_rate": 4.9384306369587496e-05, "loss": 0.2145, "step": 9178 }, { "epoch": 0.1637177612100025, "grad_norm": 0.27303367853164673, "learning_rate": 4.9383963011070665e-05, "loss": 0.1928, "step": 9179 }, { "epoch": 0.1637355973317162, "grad_norm": 0.2684634029865265, "learning_rate": 4.9383619558033164e-05, "loss": 0.2303, "step": 9180 }, { "epoch": 0.16375343345342988, "grad_norm": 0.28900694847106934, "learning_rate": 4.938327601047634e-05, "loss": 0.2435, "step": 9181 }, { "epoch": 0.16377126957514357, "grad_norm": 0.24638424813747406, "learning_rate": 4.9382932368401516e-05, "loss": 0.1906, "step": 9182 }, { "epoch": 0.16378910569685728, "grad_norm": 0.28836917877197266, "learning_rate": 4.9382588631810025e-05, "loss": 0.1684, "step": 9183 }, { "epoch": 0.16380694181857097, "grad_norm": 0.24633167684078217, "learning_rate": 4.938224480070319e-05, "loss": 0.1984, "step": 9184 }, { "epoch": 0.16382477794028466, "grad_norm": 0.2866842746734619, "learning_rate": 4.9381900875082364e-05, "loss": 0.2271, "step": 9185 }, { "epoch": 0.16384261406199835, "grad_norm": 0.3088231086730957, "learning_rate": 4.9381556854948864e-05, "loss": 0.214, "step": 9186 }, { "epoch": 0.16386045018371206, "grad_norm": 0.29733720421791077, "learning_rate": 4.938121274030403e-05, "loss": 0.2241, "step": 9187 }, { "epoch": 0.16387828630542575, "grad_norm": 0.3455222249031067, "learning_rate": 4.9380868531149193e-05, "loss": 0.1825, "step": 9188 }, { "epoch": 0.16389612242713944, "grad_norm": 0.2591247260570526, "learning_rate": 4.938052422748569e-05, "loss": 0.207, "step": 9189 }, { "epoch": 0.16391395854885313, "grad_norm": 0.3061193823814392, "learning_rate": 4.9380179829314856e-05, "loss": 0.219, "step": 9190 }, { "epoch": 0.16393179467056684, "grad_norm": 0.2616443336009979, "learning_rate": 4.9379835336638026e-05, "loss": 0.2156, "step": 9191 }, { "epoch": 0.16394963079228053, "grad_norm": 0.29381993412971497, "learning_rate": 4.9379490749456523e-05, "loss": 0.2207, "step": 9192 }, { "epoch": 0.16396746691399422, "grad_norm": 0.24452641606330872, "learning_rate": 4.9379146067771705e-05, "loss": 0.2204, "step": 9193 }, { "epoch": 0.1639853030357079, "grad_norm": 0.26373422145843506, "learning_rate": 4.937880129158488e-05, "loss": 0.1731, "step": 9194 }, { "epoch": 0.16400313915742162, "grad_norm": 0.21541623771190643, "learning_rate": 4.937845642089742e-05, "loss": 0.1844, "step": 9195 }, { "epoch": 0.1640209752791353, "grad_norm": 0.27089133858680725, "learning_rate": 4.937811145571064e-05, "loss": 0.1617, "step": 9196 }, { "epoch": 0.164038811400849, "grad_norm": 0.19446557760238647, "learning_rate": 4.937776639602587e-05, "loss": 0.177, "step": 9197 }, { "epoch": 0.1640566475225627, "grad_norm": 0.3105425536632538, "learning_rate": 4.937742124184447e-05, "loss": 0.2382, "step": 9198 }, { "epoch": 0.16407448364427638, "grad_norm": 0.2988620400428772, "learning_rate": 4.937707599316776e-05, "loss": 0.2219, "step": 9199 }, { "epoch": 0.1640923197659901, "grad_norm": 0.36146506667137146, "learning_rate": 4.9376730649997085e-05, "loss": 0.212, "step": 9200 }, { "epoch": 0.16411015588770378, "grad_norm": 0.21937191486358643, "learning_rate": 4.9376385212333775e-05, "loss": 0.1952, "step": 9201 }, { "epoch": 0.16412799200941747, "grad_norm": 0.24174143373966217, "learning_rate": 4.937603968017918e-05, "loss": 0.2014, "step": 9202 }, { "epoch": 0.16414582813113116, "grad_norm": 0.3496522009372711, "learning_rate": 4.9375694053534636e-05, "loss": 0.2198, "step": 9203 }, { "epoch": 0.16416366425284487, "grad_norm": 0.2990228235721588, "learning_rate": 4.937534833240149e-05, "loss": 0.227, "step": 9204 }, { "epoch": 0.16418150037455856, "grad_norm": 0.24706658720970154, "learning_rate": 4.937500251678107e-05, "loss": 0.2213, "step": 9205 }, { "epoch": 0.16419933649627225, "grad_norm": 0.2717100977897644, "learning_rate": 4.937465660667472e-05, "loss": 0.1839, "step": 9206 }, { "epoch": 0.16421717261798593, "grad_norm": 0.40295538306236267, "learning_rate": 4.9374310602083785e-05, "loss": 0.2493, "step": 9207 }, { "epoch": 0.16423500873969965, "grad_norm": 0.35368722677230835, "learning_rate": 4.9373964503009594e-05, "loss": 0.2407, "step": 9208 }, { "epoch": 0.16425284486141334, "grad_norm": 0.24363276362419128, "learning_rate": 4.937361830945351e-05, "loss": 0.2023, "step": 9209 }, { "epoch": 0.16427068098312703, "grad_norm": 0.30939704179763794, "learning_rate": 4.937327202141686e-05, "loss": 0.2017, "step": 9210 }, { "epoch": 0.16428851710484071, "grad_norm": 0.30299273133277893, "learning_rate": 4.937292563890099e-05, "loss": 0.2789, "step": 9211 }, { "epoch": 0.16430635322655443, "grad_norm": 0.36112216114997864, "learning_rate": 4.937257916190724e-05, "loss": 0.2697, "step": 9212 }, { "epoch": 0.16432418934826812, "grad_norm": 0.20341669023036957, "learning_rate": 4.937223259043695e-05, "loss": 0.2112, "step": 9213 }, { "epoch": 0.1643420254699818, "grad_norm": 0.19208763539791107, "learning_rate": 4.937188592449148e-05, "loss": 0.1909, "step": 9214 }, { "epoch": 0.1643598615916955, "grad_norm": 0.26996225118637085, "learning_rate": 4.937153916407216e-05, "loss": 0.243, "step": 9215 }, { "epoch": 0.1643776977134092, "grad_norm": 0.24019834399223328, "learning_rate": 4.9371192309180325e-05, "loss": 0.2197, "step": 9216 }, { "epoch": 0.1643955338351229, "grad_norm": 0.28452110290527344, "learning_rate": 4.937084535981734e-05, "loss": 0.2213, "step": 9217 }, { "epoch": 0.16441336995683659, "grad_norm": 0.2400692254304886, "learning_rate": 4.937049831598454e-05, "loss": 0.1732, "step": 9218 }, { "epoch": 0.16443120607855027, "grad_norm": 0.3314850330352783, "learning_rate": 4.937015117768328e-05, "loss": 0.2573, "step": 9219 }, { "epoch": 0.16444904220026396, "grad_norm": 0.3499220013618469, "learning_rate": 4.936980394491488e-05, "loss": 0.2428, "step": 9220 }, { "epoch": 0.16446687832197768, "grad_norm": 0.20323525369167328, "learning_rate": 4.9369456617680706e-05, "loss": 0.1806, "step": 9221 }, { "epoch": 0.16448471444369137, "grad_norm": 0.27988317608833313, "learning_rate": 4.936910919598211e-05, "loss": 0.2443, "step": 9222 }, { "epoch": 0.16450255056540505, "grad_norm": 0.33889874815940857, "learning_rate": 4.936876167982043e-05, "loss": 0.1574, "step": 9223 }, { "epoch": 0.16452038668711874, "grad_norm": 0.26385030150413513, "learning_rate": 4.936841406919701e-05, "loss": 0.19, "step": 9224 }, { "epoch": 0.16453822280883246, "grad_norm": 0.22243303060531616, "learning_rate": 4.93680663641132e-05, "loss": 0.1955, "step": 9225 }, { "epoch": 0.16455605893054615, "grad_norm": 0.33228254318237305, "learning_rate": 4.9367718564570344e-05, "loss": 0.2159, "step": 9226 }, { "epoch": 0.16457389505225983, "grad_norm": 0.40989211201667786, "learning_rate": 4.93673706705698e-05, "loss": 0.1839, "step": 9227 }, { "epoch": 0.16459173117397352, "grad_norm": 0.2205306887626648, "learning_rate": 4.9367022682112905e-05, "loss": 0.2204, "step": 9228 }, { "epoch": 0.16460956729568724, "grad_norm": 0.25861892104148865, "learning_rate": 4.9366674599201026e-05, "loss": 0.1944, "step": 9229 }, { "epoch": 0.16462740341740092, "grad_norm": 0.297453910112381, "learning_rate": 4.936632642183549e-05, "loss": 0.1802, "step": 9230 }, { "epoch": 0.1646452395391146, "grad_norm": 0.3584047853946686, "learning_rate": 4.936597815001766e-05, "loss": 0.1766, "step": 9231 }, { "epoch": 0.1646630756608283, "grad_norm": 0.26932886242866516, "learning_rate": 4.9365629783748886e-05, "loss": 0.191, "step": 9232 }, { "epoch": 0.16468091178254202, "grad_norm": 0.23983994126319885, "learning_rate": 4.936528132303051e-05, "loss": 0.1901, "step": 9233 }, { "epoch": 0.1646987479042557, "grad_norm": 0.30796483159065247, "learning_rate": 4.9364932767863895e-05, "loss": 0.1925, "step": 9234 }, { "epoch": 0.1647165840259694, "grad_norm": 0.2939542829990387, "learning_rate": 4.936458411825038e-05, "loss": 0.221, "step": 9235 }, { "epoch": 0.16473442014768308, "grad_norm": 0.22121989727020264, "learning_rate": 4.936423537419131e-05, "loss": 0.144, "step": 9236 }, { "epoch": 0.1647522562693968, "grad_norm": 0.40346765518188477, "learning_rate": 4.936388653568806e-05, "loss": 0.2865, "step": 9237 }, { "epoch": 0.16477009239111048, "grad_norm": 0.23672229051589966, "learning_rate": 4.936353760274198e-05, "loss": 0.2238, "step": 9238 }, { "epoch": 0.16478792851282417, "grad_norm": 0.2538418173789978, "learning_rate": 4.93631885753544e-05, "loss": 0.2314, "step": 9239 }, { "epoch": 0.16480576463453786, "grad_norm": 0.2791252136230469, "learning_rate": 4.9362839453526696e-05, "loss": 0.1656, "step": 9240 }, { "epoch": 0.16482360075625155, "grad_norm": 0.3458743095397949, "learning_rate": 4.93624902372602e-05, "loss": 0.2176, "step": 9241 }, { "epoch": 0.16484143687796526, "grad_norm": 0.2711387872695923, "learning_rate": 4.9362140926556286e-05, "loss": 0.2272, "step": 9242 }, { "epoch": 0.16485927299967895, "grad_norm": 0.2151830792427063, "learning_rate": 4.93617915214163e-05, "loss": 0.1776, "step": 9243 }, { "epoch": 0.16487710912139264, "grad_norm": 0.22689075767993927, "learning_rate": 4.93614420218416e-05, "loss": 0.236, "step": 9244 }, { "epoch": 0.16489494524310633, "grad_norm": 0.2275635004043579, "learning_rate": 4.9361092427833525e-05, "loss": 0.1879, "step": 9245 }, { "epoch": 0.16491278136482004, "grad_norm": 0.25142842531204224, "learning_rate": 4.936074273939345e-05, "loss": 0.2016, "step": 9246 }, { "epoch": 0.16493061748653373, "grad_norm": 0.2320721447467804, "learning_rate": 4.936039295652272e-05, "loss": 0.1782, "step": 9247 }, { "epoch": 0.16494845360824742, "grad_norm": 0.24124976992607117, "learning_rate": 4.9360043079222686e-05, "loss": 0.1985, "step": 9248 }, { "epoch": 0.1649662897299611, "grad_norm": 0.22440405189990997, "learning_rate": 4.935969310749472e-05, "loss": 0.1928, "step": 9249 }, { "epoch": 0.16498412585167482, "grad_norm": 0.32603585720062256, "learning_rate": 4.9359343041340166e-05, "loss": 0.2157, "step": 9250 }, { "epoch": 0.1650019619733885, "grad_norm": 0.31903767585754395, "learning_rate": 4.935899288076038e-05, "loss": 0.2224, "step": 9251 }, { "epoch": 0.1650197980951022, "grad_norm": 0.3269581198692322, "learning_rate": 4.935864262575674e-05, "loss": 0.2286, "step": 9252 }, { "epoch": 0.1650376342168159, "grad_norm": 0.359083354473114, "learning_rate": 4.935829227633058e-05, "loss": 0.2729, "step": 9253 }, { "epoch": 0.1650554703385296, "grad_norm": 0.30463945865631104, "learning_rate": 4.9357941832483255e-05, "loss": 0.2887, "step": 9254 }, { "epoch": 0.1650733064602433, "grad_norm": 0.3253450393676758, "learning_rate": 4.935759129421614e-05, "loss": 0.1632, "step": 9255 }, { "epoch": 0.16509114258195698, "grad_norm": 0.24560873210430145, "learning_rate": 4.9357240661530596e-05, "loss": 0.2428, "step": 9256 }, { "epoch": 0.16510897870367067, "grad_norm": 0.2875535488128662, "learning_rate": 4.9356889934427975e-05, "loss": 0.1914, "step": 9257 }, { "epoch": 0.16512681482538435, "grad_norm": 0.2681664824485779, "learning_rate": 4.935653911290963e-05, "loss": 0.2437, "step": 9258 }, { "epoch": 0.16514465094709807, "grad_norm": 0.22050854563713074, "learning_rate": 4.935618819697693e-05, "loss": 0.1901, "step": 9259 }, { "epoch": 0.16516248706881176, "grad_norm": 0.2631033957004547, "learning_rate": 4.935583718663123e-05, "loss": 0.229, "step": 9260 }, { "epoch": 0.16518032319052545, "grad_norm": 0.22643926739692688, "learning_rate": 4.935548608187389e-05, "loss": 0.2473, "step": 9261 }, { "epoch": 0.16519815931223913, "grad_norm": 0.283899188041687, "learning_rate": 4.935513488270628e-05, "loss": 0.2028, "step": 9262 }, { "epoch": 0.16521599543395285, "grad_norm": 0.33085399866104126, "learning_rate": 4.935478358912975e-05, "loss": 0.1895, "step": 9263 }, { "epoch": 0.16523383155566654, "grad_norm": 0.3649313449859619, "learning_rate": 4.935443220114567e-05, "loss": 0.2429, "step": 9264 }, { "epoch": 0.16525166767738023, "grad_norm": 0.23980526626110077, "learning_rate": 4.93540807187554e-05, "loss": 0.1669, "step": 9265 }, { "epoch": 0.16526950379909391, "grad_norm": 0.3209800124168396, "learning_rate": 4.93537291419603e-05, "loss": 0.1951, "step": 9266 }, { "epoch": 0.16528733992080763, "grad_norm": 0.3084694743156433, "learning_rate": 4.935337747076173e-05, "loss": 0.1937, "step": 9267 }, { "epoch": 0.16530517604252132, "grad_norm": 0.4329877197742462, "learning_rate": 4.935302570516106e-05, "loss": 0.271, "step": 9268 }, { "epoch": 0.165323012164235, "grad_norm": 0.30714884400367737, "learning_rate": 4.935267384515966e-05, "loss": 0.2226, "step": 9269 }, { "epoch": 0.1653408482859487, "grad_norm": 0.2898719608783722, "learning_rate": 4.935232189075887e-05, "loss": 0.2108, "step": 9270 }, { "epoch": 0.1653586844076624, "grad_norm": 0.2915104627609253, "learning_rate": 4.935196984196008e-05, "loss": 0.2004, "step": 9271 }, { "epoch": 0.1653765205293761, "grad_norm": 0.2621375620365143, "learning_rate": 4.935161769876464e-05, "loss": 0.1941, "step": 9272 }, { "epoch": 0.16539435665108979, "grad_norm": 0.285562127828598, "learning_rate": 4.9351265461173926e-05, "loss": 0.2176, "step": 9273 }, { "epoch": 0.16541219277280347, "grad_norm": 0.32798701524734497, "learning_rate": 4.935091312918929e-05, "loss": 0.2213, "step": 9274 }, { "epoch": 0.1654300288945172, "grad_norm": 0.24334648251533508, "learning_rate": 4.93505607028121e-05, "loss": 0.2243, "step": 9275 }, { "epoch": 0.16544786501623088, "grad_norm": 0.3257021903991699, "learning_rate": 4.9350208182043734e-05, "loss": 0.2943, "step": 9276 }, { "epoch": 0.16546570113794457, "grad_norm": 0.34047359228134155, "learning_rate": 4.934985556688555e-05, "loss": 0.2667, "step": 9277 }, { "epoch": 0.16548353725965825, "grad_norm": 0.2405623346567154, "learning_rate": 4.934950285733892e-05, "loss": 0.2246, "step": 9278 }, { "epoch": 0.16550137338137194, "grad_norm": 0.2989078462123871, "learning_rate": 4.934915005340519e-05, "loss": 0.1881, "step": 9279 }, { "epoch": 0.16551920950308566, "grad_norm": 0.2507303059101105, "learning_rate": 4.934879715508576e-05, "loss": 0.2, "step": 9280 }, { "epoch": 0.16553704562479934, "grad_norm": 0.2618032991886139, "learning_rate": 4.934844416238197e-05, "loss": 0.1976, "step": 9281 }, { "epoch": 0.16555488174651303, "grad_norm": 0.31148505210876465, "learning_rate": 4.934809107529521e-05, "loss": 0.2888, "step": 9282 }, { "epoch": 0.16557271786822672, "grad_norm": 0.21653443574905396, "learning_rate": 4.934773789382684e-05, "loss": 0.2061, "step": 9283 }, { "epoch": 0.16559055398994044, "grad_norm": 0.2662765681743622, "learning_rate": 4.934738461797823e-05, "loss": 0.2127, "step": 9284 }, { "epoch": 0.16560839011165412, "grad_norm": 0.2488943487405777, "learning_rate": 4.934703124775074e-05, "loss": 0.1938, "step": 9285 }, { "epoch": 0.1656262262333678, "grad_norm": 0.24388650059700012, "learning_rate": 4.934667778314575e-05, "loss": 0.1947, "step": 9286 }, { "epoch": 0.1656440623550815, "grad_norm": 0.2611672282218933, "learning_rate": 4.934632422416463e-05, "loss": 0.2194, "step": 9287 }, { "epoch": 0.16566189847679522, "grad_norm": 0.32193028926849365, "learning_rate": 4.934597057080875e-05, "loss": 0.163, "step": 9288 }, { "epoch": 0.1656797345985089, "grad_norm": 0.38710469007492065, "learning_rate": 4.934561682307948e-05, "loss": 0.215, "step": 9289 }, { "epoch": 0.1656975707202226, "grad_norm": 0.24965006113052368, "learning_rate": 4.9345262980978176e-05, "loss": 0.1624, "step": 9290 }, { "epoch": 0.16571540684193628, "grad_norm": 0.21670930087566376, "learning_rate": 4.934490904450624e-05, "loss": 0.1711, "step": 9291 }, { "epoch": 0.16573324296365, "grad_norm": 0.16519004106521606, "learning_rate": 4.9344555013665025e-05, "loss": 0.1714, "step": 9292 }, { "epoch": 0.16575107908536368, "grad_norm": 0.2520431876182556, "learning_rate": 4.93442008884559e-05, "loss": 0.2168, "step": 9293 }, { "epoch": 0.16576891520707737, "grad_norm": 0.21662983298301697, "learning_rate": 4.9343846668880254e-05, "loss": 0.2019, "step": 9294 }, { "epoch": 0.16578675132879106, "grad_norm": 0.2764663100242615, "learning_rate": 4.9343492354939444e-05, "loss": 0.2365, "step": 9295 }, { "epoch": 0.16580458745050478, "grad_norm": 0.26178547739982605, "learning_rate": 4.934313794663485e-05, "loss": 0.1978, "step": 9296 }, { "epoch": 0.16582242357221846, "grad_norm": 0.25380316376686096, "learning_rate": 4.934278344396785e-05, "loss": 0.2209, "step": 9297 }, { "epoch": 0.16584025969393215, "grad_norm": 0.2863463759422302, "learning_rate": 4.9342428846939815e-05, "loss": 0.2536, "step": 9298 }, { "epoch": 0.16585809581564584, "grad_norm": 0.29264238476753235, "learning_rate": 4.934207415555211e-05, "loss": 0.1947, "step": 9299 }, { "epoch": 0.16587593193735953, "grad_norm": 0.27421027421951294, "learning_rate": 4.934171936980612e-05, "loss": 0.1866, "step": 9300 }, { "epoch": 0.16589376805907324, "grad_norm": 0.24602636694908142, "learning_rate": 4.9341364489703225e-05, "loss": 0.1794, "step": 9301 }, { "epoch": 0.16591160418078693, "grad_norm": 0.3985421061515808, "learning_rate": 4.934100951524479e-05, "loss": 0.2384, "step": 9302 }, { "epoch": 0.16592944030250062, "grad_norm": 0.2679640054702759, "learning_rate": 4.9340654446432185e-05, "loss": 0.2029, "step": 9303 }, { "epoch": 0.1659472764242143, "grad_norm": 0.319214403629303, "learning_rate": 4.934029928326681e-05, "loss": 0.2091, "step": 9304 }, { "epoch": 0.16596511254592802, "grad_norm": 0.2522352337837219, "learning_rate": 4.9339944025750024e-05, "loss": 0.2142, "step": 9305 }, { "epoch": 0.1659829486676417, "grad_norm": 0.4243687689304352, "learning_rate": 4.933958867388321e-05, "loss": 0.1222, "step": 9306 }, { "epoch": 0.1660007847893554, "grad_norm": 0.34678542613983154, "learning_rate": 4.933923322766774e-05, "loss": 0.2195, "step": 9307 }, { "epoch": 0.1660186209110691, "grad_norm": 0.2175455540418625, "learning_rate": 4.9338877687104995e-05, "loss": 0.1939, "step": 9308 }, { "epoch": 0.1660364570327828, "grad_norm": 0.34130173921585083, "learning_rate": 4.9338522052196354e-05, "loss": 0.1974, "step": 9309 }, { "epoch": 0.1660542931544965, "grad_norm": 0.2852207124233246, "learning_rate": 4.9338166322943205e-05, "loss": 0.2181, "step": 9310 }, { "epoch": 0.16607212927621018, "grad_norm": 0.2939755320549011, "learning_rate": 4.93378104993469e-05, "loss": 0.2153, "step": 9311 }, { "epoch": 0.16608996539792387, "grad_norm": 0.29781708121299744, "learning_rate": 4.933745458140885e-05, "loss": 0.2711, "step": 9312 }, { "epoch": 0.16610780151963758, "grad_norm": 0.2629675567150116, "learning_rate": 4.9337098569130416e-05, "loss": 0.1836, "step": 9313 }, { "epoch": 0.16612563764135127, "grad_norm": 0.28269559144973755, "learning_rate": 4.9336742462512976e-05, "loss": 0.2018, "step": 9314 }, { "epoch": 0.16614347376306496, "grad_norm": 0.23797199130058289, "learning_rate": 4.933638626155792e-05, "loss": 0.1891, "step": 9315 }, { "epoch": 0.16616130988477865, "grad_norm": 0.31651830673217773, "learning_rate": 4.933602996626663e-05, "loss": 0.219, "step": 9316 }, { "epoch": 0.16617914600649236, "grad_norm": 0.24222183227539062, "learning_rate": 4.9335673576640476e-05, "loss": 0.1917, "step": 9317 }, { "epoch": 0.16619698212820605, "grad_norm": 0.2895123362541199, "learning_rate": 4.9335317092680856e-05, "loss": 0.1995, "step": 9318 }, { "epoch": 0.16621481824991974, "grad_norm": 0.32943058013916016, "learning_rate": 4.933496051438913e-05, "loss": 0.1907, "step": 9319 }, { "epoch": 0.16623265437163343, "grad_norm": 0.3121800124645233, "learning_rate": 4.9334603841766695e-05, "loss": 0.2735, "step": 9320 }, { "epoch": 0.16625049049334711, "grad_norm": 0.2764334976673126, "learning_rate": 4.933424707481493e-05, "loss": 0.1593, "step": 9321 }, { "epoch": 0.16626832661506083, "grad_norm": 0.2478717565536499, "learning_rate": 4.933389021353523e-05, "loss": 0.2118, "step": 9322 }, { "epoch": 0.16628616273677452, "grad_norm": 0.25999343395233154, "learning_rate": 4.9333533257928954e-05, "loss": 0.2372, "step": 9323 }, { "epoch": 0.1663039988584882, "grad_norm": 0.2991454601287842, "learning_rate": 4.93331762079975e-05, "loss": 0.2344, "step": 9324 }, { "epoch": 0.1663218349802019, "grad_norm": 0.390278160572052, "learning_rate": 4.933281906374225e-05, "loss": 0.2002, "step": 9325 }, { "epoch": 0.1663396711019156, "grad_norm": 0.2732143998146057, "learning_rate": 4.93324618251646e-05, "loss": 0.2408, "step": 9326 }, { "epoch": 0.1663575072236293, "grad_norm": 0.2051214575767517, "learning_rate": 4.9332104492265915e-05, "loss": 0.1909, "step": 9327 }, { "epoch": 0.16637534334534299, "grad_norm": 0.39487937092781067, "learning_rate": 4.933174706504759e-05, "loss": 0.2202, "step": 9328 }, { "epoch": 0.16639317946705667, "grad_norm": 0.3209646940231323, "learning_rate": 4.9331389543511006e-05, "loss": 0.2248, "step": 9329 }, { "epoch": 0.1664110155887704, "grad_norm": 0.21034787595272064, "learning_rate": 4.933103192765756e-05, "loss": 0.1834, "step": 9330 }, { "epoch": 0.16642885171048408, "grad_norm": 0.22905397415161133, "learning_rate": 4.9330674217488626e-05, "loss": 0.2089, "step": 9331 }, { "epoch": 0.16644668783219776, "grad_norm": 0.26188793778419495, "learning_rate": 4.9330316413005596e-05, "loss": 0.2152, "step": 9332 }, { "epoch": 0.16646452395391145, "grad_norm": 0.32350656390190125, "learning_rate": 4.932995851420985e-05, "loss": 0.2177, "step": 9333 }, { "epoch": 0.16648236007562517, "grad_norm": 0.27724212408065796, "learning_rate": 4.932960052110279e-05, "loss": 0.222, "step": 9334 }, { "epoch": 0.16650019619733886, "grad_norm": 0.26598259806632996, "learning_rate": 4.93292424336858e-05, "loss": 0.2034, "step": 9335 }, { "epoch": 0.16651803231905254, "grad_norm": 0.26200130581855774, "learning_rate": 4.932888425196025e-05, "loss": 0.208, "step": 9336 }, { "epoch": 0.16653586844076623, "grad_norm": 0.22515469789505005, "learning_rate": 4.9328525975927545e-05, "loss": 0.2054, "step": 9337 }, { "epoch": 0.16655370456247992, "grad_norm": 0.2405278980731964, "learning_rate": 4.932816760558907e-05, "loss": 0.2007, "step": 9338 }, { "epoch": 0.16657154068419364, "grad_norm": 0.2784559726715088, "learning_rate": 4.9327809140946225e-05, "loss": 0.2017, "step": 9339 }, { "epoch": 0.16658937680590732, "grad_norm": 0.25360438227653503, "learning_rate": 4.932745058200038e-05, "loss": 0.2042, "step": 9340 }, { "epoch": 0.166607212927621, "grad_norm": 0.24294637143611908, "learning_rate": 4.932709192875293e-05, "loss": 0.2125, "step": 9341 }, { "epoch": 0.1666250490493347, "grad_norm": 0.2221154272556305, "learning_rate": 4.9326733181205284e-05, "loss": 0.1764, "step": 9342 }, { "epoch": 0.16664288517104842, "grad_norm": 0.23242053389549255, "learning_rate": 4.932637433935881e-05, "loss": 0.1794, "step": 9343 }, { "epoch": 0.1666607212927621, "grad_norm": 0.27005764842033386, "learning_rate": 4.93260154032149e-05, "loss": 0.1828, "step": 9344 }, { "epoch": 0.1666785574144758, "grad_norm": 0.3330439329147339, "learning_rate": 4.9325656372774966e-05, "loss": 0.2613, "step": 9345 }, { "epoch": 0.16669639353618948, "grad_norm": 0.29532188177108765, "learning_rate": 4.932529724804037e-05, "loss": 0.2227, "step": 9346 }, { "epoch": 0.1667142296579032, "grad_norm": 0.2895061671733856, "learning_rate": 4.9324938029012535e-05, "loss": 0.2338, "step": 9347 }, { "epoch": 0.16673206577961688, "grad_norm": 0.23465098440647125, "learning_rate": 4.932457871569282e-05, "loss": 0.1812, "step": 9348 }, { "epoch": 0.16674990190133057, "grad_norm": 0.2588139474391937, "learning_rate": 4.932421930808266e-05, "loss": 0.1632, "step": 9349 }, { "epoch": 0.16676773802304426, "grad_norm": 0.2403249442577362, "learning_rate": 4.932385980618341e-05, "loss": 0.1964, "step": 9350 }, { "epoch": 0.16678557414475798, "grad_norm": 0.2389722466468811, "learning_rate": 4.9323500209996486e-05, "loss": 0.1993, "step": 9351 }, { "epoch": 0.16680341026647166, "grad_norm": 0.2843533158302307, "learning_rate": 4.932314051952327e-05, "loss": 0.2609, "step": 9352 }, { "epoch": 0.16682124638818535, "grad_norm": 0.30103957653045654, "learning_rate": 4.932278073476516e-05, "loss": 0.2318, "step": 9353 }, { "epoch": 0.16683908250989904, "grad_norm": 0.28298187255859375, "learning_rate": 4.932242085572355e-05, "loss": 0.2628, "step": 9354 }, { "epoch": 0.16685691863161276, "grad_norm": 0.3170102834701538, "learning_rate": 4.9322060882399836e-05, "loss": 0.2369, "step": 9355 }, { "epoch": 0.16687475475332644, "grad_norm": 0.30096954107284546, "learning_rate": 4.9321700814795414e-05, "loss": 0.2139, "step": 9356 }, { "epoch": 0.16689259087504013, "grad_norm": 0.26875007152557373, "learning_rate": 4.9321340652911686e-05, "loss": 0.1966, "step": 9357 }, { "epoch": 0.16691042699675382, "grad_norm": 0.356659471988678, "learning_rate": 4.932098039675003e-05, "loss": 0.1979, "step": 9358 }, { "epoch": 0.1669282631184675, "grad_norm": 0.22786852717399597, "learning_rate": 4.932062004631186e-05, "loss": 0.2311, "step": 9359 }, { "epoch": 0.16694609924018122, "grad_norm": 0.2709221839904785, "learning_rate": 4.932025960159857e-05, "loss": 0.2024, "step": 9360 }, { "epoch": 0.1669639353618949, "grad_norm": 0.2221805602312088, "learning_rate": 4.931989906261155e-05, "loss": 0.1716, "step": 9361 }, { "epoch": 0.1669817714836086, "grad_norm": 0.2553713619709015, "learning_rate": 4.93195384293522e-05, "loss": 0.1863, "step": 9362 }, { "epoch": 0.1669996076053223, "grad_norm": 0.32181254029273987, "learning_rate": 4.931917770182192e-05, "loss": 0.2268, "step": 9363 }, { "epoch": 0.167017443727036, "grad_norm": 0.23647813498973846, "learning_rate": 4.9318816880022106e-05, "loss": 0.1974, "step": 9364 }, { "epoch": 0.1670352798487497, "grad_norm": 0.2741914689540863, "learning_rate": 4.931845596395416e-05, "loss": 0.2122, "step": 9365 }, { "epoch": 0.16705311597046338, "grad_norm": 0.3124186098575592, "learning_rate": 4.931809495361948e-05, "loss": 0.2283, "step": 9366 }, { "epoch": 0.16707095209217707, "grad_norm": 0.4242728650569916, "learning_rate": 4.9317733849019464e-05, "loss": 0.2164, "step": 9367 }, { "epoch": 0.16708878821389078, "grad_norm": 0.23358015716075897, "learning_rate": 4.9317372650155514e-05, "loss": 0.1944, "step": 9368 }, { "epoch": 0.16710662433560447, "grad_norm": 0.24014799296855927, "learning_rate": 4.931701135702903e-05, "loss": 0.1961, "step": 9369 }, { "epoch": 0.16712446045731816, "grad_norm": 0.2570462226867676, "learning_rate": 4.93166499696414e-05, "loss": 0.2115, "step": 9370 }, { "epoch": 0.16714229657903185, "grad_norm": 0.24118487536907196, "learning_rate": 4.931628848799405e-05, "loss": 0.2014, "step": 9371 }, { "epoch": 0.16716013270074556, "grad_norm": 0.3396017849445343, "learning_rate": 4.931592691208836e-05, "loss": 0.1809, "step": 9372 }, { "epoch": 0.16717796882245925, "grad_norm": 0.37545526027679443, "learning_rate": 4.9315565241925746e-05, "loss": 0.2441, "step": 9373 }, { "epoch": 0.16719580494417294, "grad_norm": 0.27863696217536926, "learning_rate": 4.93152034775076e-05, "loss": 0.2231, "step": 9374 }, { "epoch": 0.16721364106588663, "grad_norm": 0.27867764234542847, "learning_rate": 4.931484161883532e-05, "loss": 0.2506, "step": 9375 }, { "epoch": 0.16723147718760034, "grad_norm": 0.2119695246219635, "learning_rate": 4.9314479665910326e-05, "loss": 0.194, "step": 9376 }, { "epoch": 0.16724931330931403, "grad_norm": 0.2885444462299347, "learning_rate": 4.9314117618734e-05, "loss": 0.2483, "step": 9377 }, { "epoch": 0.16726714943102772, "grad_norm": 0.2301822006702423, "learning_rate": 4.931375547730777e-05, "loss": 0.1828, "step": 9378 }, { "epoch": 0.1672849855527414, "grad_norm": 0.3811148405075073, "learning_rate": 4.931339324163301e-05, "loss": 0.2558, "step": 9379 }, { "epoch": 0.1673028216744551, "grad_norm": 0.24624472856521606, "learning_rate": 4.931303091171115e-05, "loss": 0.2141, "step": 9380 }, { "epoch": 0.1673206577961688, "grad_norm": 0.3223497271537781, "learning_rate": 4.9312668487543584e-05, "loss": 0.2806, "step": 9381 }, { "epoch": 0.1673384939178825, "grad_norm": 0.31021153926849365, "learning_rate": 4.9312305969131716e-05, "loss": 0.2328, "step": 9382 }, { "epoch": 0.16735633003959619, "grad_norm": 0.2613779306411743, "learning_rate": 4.9311943356476956e-05, "loss": 0.2023, "step": 9383 }, { "epoch": 0.16737416616130987, "grad_norm": 0.3111892640590668, "learning_rate": 4.93115806495807e-05, "loss": 0.2871, "step": 9384 }, { "epoch": 0.1673920022830236, "grad_norm": 0.3307771384716034, "learning_rate": 4.931121784844437e-05, "loss": 0.2521, "step": 9385 }, { "epoch": 0.16740983840473728, "grad_norm": 0.2541097104549408, "learning_rate": 4.931085495306935e-05, "loss": 0.209, "step": 9386 }, { "epoch": 0.16742767452645096, "grad_norm": 0.29149866104125977, "learning_rate": 4.9310491963457074e-05, "loss": 0.2009, "step": 9387 }, { "epoch": 0.16744551064816465, "grad_norm": 0.2406076043844223, "learning_rate": 4.9310128879608924e-05, "loss": 0.189, "step": 9388 }, { "epoch": 0.16746334676987837, "grad_norm": 0.3291756212711334, "learning_rate": 4.9309765701526325e-05, "loss": 0.2816, "step": 9389 }, { "epoch": 0.16748118289159206, "grad_norm": 0.1909375935792923, "learning_rate": 4.9309402429210674e-05, "loss": 0.1709, "step": 9390 }, { "epoch": 0.16749901901330574, "grad_norm": 0.23396065831184387, "learning_rate": 4.9309039062663374e-05, "loss": 0.1379, "step": 9391 }, { "epoch": 0.16751685513501943, "grad_norm": 0.2677765190601349, "learning_rate": 4.9308675601885853e-05, "loss": 0.2063, "step": 9392 }, { "epoch": 0.16753469125673315, "grad_norm": 0.3470064401626587, "learning_rate": 4.930831204687951e-05, "loss": 0.2567, "step": 9393 }, { "epoch": 0.16755252737844684, "grad_norm": 0.2177836298942566, "learning_rate": 4.930794839764575e-05, "loss": 0.194, "step": 9394 }, { "epoch": 0.16757036350016052, "grad_norm": 0.26476576924324036, "learning_rate": 4.930758465418599e-05, "loss": 0.2522, "step": 9395 }, { "epoch": 0.1675881996218742, "grad_norm": 0.19955436885356903, "learning_rate": 4.9307220816501634e-05, "loss": 0.18, "step": 9396 }, { "epoch": 0.16760603574358793, "grad_norm": 0.41708457469940186, "learning_rate": 4.93068568845941e-05, "loss": 0.2566, "step": 9397 }, { "epoch": 0.16762387186530162, "grad_norm": 0.2114209681749344, "learning_rate": 4.930649285846478e-05, "loss": 0.1735, "step": 9398 }, { "epoch": 0.1676417079870153, "grad_norm": 0.2221326380968094, "learning_rate": 4.930612873811511e-05, "loss": 0.2334, "step": 9399 }, { "epoch": 0.167659544108729, "grad_norm": 0.22841550409793854, "learning_rate": 4.930576452354649e-05, "loss": 0.2472, "step": 9400 }, { "epoch": 0.16767738023044268, "grad_norm": 0.2357344925403595, "learning_rate": 4.930540021476032e-05, "loss": 0.1603, "step": 9401 }, { "epoch": 0.1676952163521564, "grad_norm": 0.33083274960517883, "learning_rate": 4.9305035811758035e-05, "loss": 0.1738, "step": 9402 }, { "epoch": 0.16771305247387008, "grad_norm": 0.2918514311313629, "learning_rate": 4.930467131454104e-05, "loss": 0.2345, "step": 9403 }, { "epoch": 0.16773088859558377, "grad_norm": 0.3187107443809509, "learning_rate": 4.930430672311074e-05, "loss": 0.2117, "step": 9404 }, { "epoch": 0.16774872471729746, "grad_norm": 0.21408715844154358, "learning_rate": 4.9303942037468545e-05, "loss": 0.208, "step": 9405 }, { "epoch": 0.16776656083901118, "grad_norm": 0.2944090962409973, "learning_rate": 4.9303577257615886e-05, "loss": 0.2434, "step": 9406 }, { "epoch": 0.16778439696072486, "grad_norm": 0.29843562841415405, "learning_rate": 4.930321238355416e-05, "loss": 0.1755, "step": 9407 }, { "epoch": 0.16780223308243855, "grad_norm": 0.26216739416122437, "learning_rate": 4.930284741528479e-05, "loss": 0.1796, "step": 9408 }, { "epoch": 0.16782006920415224, "grad_norm": 0.27436238527297974, "learning_rate": 4.93024823528092e-05, "loss": 0.2511, "step": 9409 }, { "epoch": 0.16783790532586595, "grad_norm": 0.23143106698989868, "learning_rate": 4.930211719612878e-05, "loss": 0.2406, "step": 9410 }, { "epoch": 0.16785574144757964, "grad_norm": 0.21253831684589386, "learning_rate": 4.930175194524497e-05, "loss": 0.1675, "step": 9411 }, { "epoch": 0.16787357756929333, "grad_norm": 0.2958551347255707, "learning_rate": 4.9301386600159174e-05, "loss": 0.2471, "step": 9412 }, { "epoch": 0.16789141369100702, "grad_norm": 0.2436182200908661, "learning_rate": 4.9301021160872806e-05, "loss": 0.1959, "step": 9413 }, { "epoch": 0.16790924981272073, "grad_norm": 0.4054587185382843, "learning_rate": 4.930065562738729e-05, "loss": 0.2134, "step": 9414 }, { "epoch": 0.16792708593443442, "grad_norm": 0.2374972403049469, "learning_rate": 4.930028999970404e-05, "loss": 0.1883, "step": 9415 }, { "epoch": 0.1679449220561481, "grad_norm": 0.3502541780471802, "learning_rate": 4.929992427782447e-05, "loss": 0.2559, "step": 9416 }, { "epoch": 0.1679627581778618, "grad_norm": 0.2598343789577484, "learning_rate": 4.9299558461750006e-05, "loss": 0.2454, "step": 9417 }, { "epoch": 0.16798059429957551, "grad_norm": 0.4071826934814453, "learning_rate": 4.929919255148205e-05, "loss": 0.2169, "step": 9418 }, { "epoch": 0.1679984304212892, "grad_norm": 0.247068390250206, "learning_rate": 4.929882654702205e-05, "loss": 0.2036, "step": 9419 }, { "epoch": 0.1680162665430029, "grad_norm": 0.2935871481895447, "learning_rate": 4.929846044837139e-05, "loss": 0.1573, "step": 9420 }, { "epoch": 0.16803410266471658, "grad_norm": 0.40864014625549316, "learning_rate": 4.929809425553151e-05, "loss": 0.2359, "step": 9421 }, { "epoch": 0.16805193878643027, "grad_norm": 0.2923342287540436, "learning_rate": 4.929772796850382e-05, "loss": 0.1903, "step": 9422 }, { "epoch": 0.16806977490814398, "grad_norm": 0.3476279079914093, "learning_rate": 4.9297361587289745e-05, "loss": 0.1812, "step": 9423 }, { "epoch": 0.16808761102985767, "grad_norm": 0.3068753778934479, "learning_rate": 4.9296995111890707e-05, "loss": 0.1785, "step": 9424 }, { "epoch": 0.16810544715157136, "grad_norm": 0.2985677719116211, "learning_rate": 4.929662854230813e-05, "loss": 0.2138, "step": 9425 }, { "epoch": 0.16812328327328505, "grad_norm": 0.2700032889842987, "learning_rate": 4.929626187854342e-05, "loss": 0.2015, "step": 9426 }, { "epoch": 0.16814111939499876, "grad_norm": 0.31977972388267517, "learning_rate": 4.929589512059801e-05, "loss": 0.2467, "step": 9427 }, { "epoch": 0.16815895551671245, "grad_norm": 0.28535133600234985, "learning_rate": 4.929552826847332e-05, "loss": 0.2166, "step": 9428 }, { "epoch": 0.16817679163842614, "grad_norm": 0.4397616386413574, "learning_rate": 4.9295161322170766e-05, "loss": 0.2034, "step": 9429 }, { "epoch": 0.16819462776013983, "grad_norm": 0.245170459151268, "learning_rate": 4.929479428169178e-05, "loss": 0.2384, "step": 9430 }, { "epoch": 0.16821246388185354, "grad_norm": 0.3002978265285492, "learning_rate": 4.929442714703778e-05, "loss": 0.1601, "step": 9431 }, { "epoch": 0.16823030000356723, "grad_norm": 0.3409084677696228, "learning_rate": 4.929405991821019e-05, "loss": 0.1642, "step": 9432 }, { "epoch": 0.16824813612528092, "grad_norm": 0.2634219229221344, "learning_rate": 4.9293692595210435e-05, "loss": 0.27, "step": 9433 }, { "epoch": 0.1682659722469946, "grad_norm": 0.3201228678226471, "learning_rate": 4.929332517803993e-05, "loss": 0.2406, "step": 9434 }, { "epoch": 0.16828380836870832, "grad_norm": 0.21339882910251617, "learning_rate": 4.9292957666700113e-05, "loss": 0.1946, "step": 9435 }, { "epoch": 0.168301644490422, "grad_norm": 0.2794226109981537, "learning_rate": 4.929259006119239e-05, "loss": 0.2107, "step": 9436 }, { "epoch": 0.1683194806121357, "grad_norm": 0.2748399078845978, "learning_rate": 4.929222236151821e-05, "loss": 0.2781, "step": 9437 }, { "epoch": 0.16833731673384938, "grad_norm": 0.293453186750412, "learning_rate": 4.929185456767898e-05, "loss": 0.1632, "step": 9438 }, { "epoch": 0.16835515285556307, "grad_norm": 0.31061115860939026, "learning_rate": 4.929148667967613e-05, "loss": 0.1794, "step": 9439 }, { "epoch": 0.1683729889772768, "grad_norm": 0.35332250595092773, "learning_rate": 4.9291118697511096e-05, "loss": 0.1995, "step": 9440 }, { "epoch": 0.16839082509899048, "grad_norm": 0.4127959907054901, "learning_rate": 4.929075062118529e-05, "loss": 0.2784, "step": 9441 }, { "epoch": 0.16840866122070416, "grad_norm": 0.24581199884414673, "learning_rate": 4.929038245070015e-05, "loss": 0.1807, "step": 9442 }, { "epoch": 0.16842649734241785, "grad_norm": 0.2739531993865967, "learning_rate": 4.929001418605709e-05, "loss": 0.263, "step": 9443 }, { "epoch": 0.16844433346413157, "grad_norm": 0.23609992861747742, "learning_rate": 4.928964582725755e-05, "loss": 0.1981, "step": 9444 }, { "epoch": 0.16846216958584526, "grad_norm": 0.2891889810562134, "learning_rate": 4.928927737430296e-05, "loss": 0.2169, "step": 9445 }, { "epoch": 0.16848000570755894, "grad_norm": 0.2761732339859009, "learning_rate": 4.928890882719472e-05, "loss": 0.2447, "step": 9446 }, { "epoch": 0.16849784182927263, "grad_norm": 0.23232130706310272, "learning_rate": 4.9288540185934307e-05, "loss": 0.1814, "step": 9447 }, { "epoch": 0.16851567795098635, "grad_norm": 0.23173119127750397, "learning_rate": 4.928817145052311e-05, "loss": 0.2003, "step": 9448 }, { "epoch": 0.16853351407270004, "grad_norm": 0.2666068971157074, "learning_rate": 4.928780262096257e-05, "loss": 0.2011, "step": 9449 }, { "epoch": 0.16855135019441372, "grad_norm": 0.2718893885612488, "learning_rate": 4.928743369725412e-05, "loss": 0.2074, "step": 9450 }, { "epoch": 0.1685691863161274, "grad_norm": 0.3470384180545807, "learning_rate": 4.9287064679399184e-05, "loss": 0.1858, "step": 9451 }, { "epoch": 0.16858702243784113, "grad_norm": 0.3397728502750397, "learning_rate": 4.928669556739921e-05, "loss": 0.2754, "step": 9452 }, { "epoch": 0.16860485855955482, "grad_norm": 0.2192605584859848, "learning_rate": 4.92863263612556e-05, "loss": 0.2343, "step": 9453 }, { "epoch": 0.1686226946812685, "grad_norm": 0.2916926145553589, "learning_rate": 4.928595706096981e-05, "loss": 0.2249, "step": 9454 }, { "epoch": 0.1686405308029822, "grad_norm": 0.3425137996673584, "learning_rate": 4.928558766654326e-05, "loss": 0.26, "step": 9455 }, { "epoch": 0.1686583669246959, "grad_norm": 0.23316988348960876, "learning_rate": 4.928521817797739e-05, "loss": 0.1864, "step": 9456 }, { "epoch": 0.1686762030464096, "grad_norm": 0.34133097529411316, "learning_rate": 4.928484859527362e-05, "loss": 0.2389, "step": 9457 }, { "epoch": 0.16869403916812328, "grad_norm": 0.34185847640037537, "learning_rate": 4.9284478918433385e-05, "loss": 0.172, "step": 9458 }, { "epoch": 0.16871187528983697, "grad_norm": 0.34840038418769836, "learning_rate": 4.928410914745813e-05, "loss": 0.2312, "step": 9459 }, { "epoch": 0.16872971141155066, "grad_norm": 0.4255453944206238, "learning_rate": 4.928373928234928e-05, "loss": 0.2366, "step": 9460 }, { "epoch": 0.16874754753326437, "grad_norm": 0.2785493731498718, "learning_rate": 4.9283369323108265e-05, "loss": 0.219, "step": 9461 }, { "epoch": 0.16876538365497806, "grad_norm": 0.21891143918037415, "learning_rate": 4.928299926973653e-05, "loss": 0.1987, "step": 9462 }, { "epoch": 0.16878321977669175, "grad_norm": 0.2747426927089691, "learning_rate": 4.9282629122235495e-05, "loss": 0.233, "step": 9463 }, { "epoch": 0.16880105589840544, "grad_norm": 0.266500860452652, "learning_rate": 4.928225888060661e-05, "loss": 0.1999, "step": 9464 }, { "epoch": 0.16881889202011915, "grad_norm": 0.308432936668396, "learning_rate": 4.9281888544851294e-05, "loss": 0.2153, "step": 9465 }, { "epoch": 0.16883672814183284, "grad_norm": 0.3269672691822052, "learning_rate": 4.9281518114970996e-05, "loss": 0.1926, "step": 9466 }, { "epoch": 0.16885456426354653, "grad_norm": 0.30079373717308044, "learning_rate": 4.928114759096715e-05, "loss": 0.226, "step": 9467 }, { "epoch": 0.16887240038526022, "grad_norm": 0.41163352131843567, "learning_rate": 4.9280776972841184e-05, "loss": 0.2061, "step": 9468 }, { "epoch": 0.16889023650697393, "grad_norm": 0.24012619256973267, "learning_rate": 4.9280406260594545e-05, "loss": 0.1878, "step": 9469 }, { "epoch": 0.16890807262868762, "grad_norm": 0.24753014743328094, "learning_rate": 4.928003545422866e-05, "loss": 0.2083, "step": 9470 }, { "epoch": 0.1689259087504013, "grad_norm": 0.24844524264335632, "learning_rate": 4.9279664553744974e-05, "loss": 0.1917, "step": 9471 }, { "epoch": 0.168943744872115, "grad_norm": 0.2850281298160553, "learning_rate": 4.927929355914492e-05, "loss": 0.2142, "step": 9472 }, { "epoch": 0.1689615809938287, "grad_norm": 0.35059431195259094, "learning_rate": 4.927892247042994e-05, "loss": 0.1987, "step": 9473 }, { "epoch": 0.1689794171155424, "grad_norm": 0.22267958521842957, "learning_rate": 4.9278551287601484e-05, "loss": 0.2018, "step": 9474 }, { "epoch": 0.1689972532372561, "grad_norm": 0.23396602272987366, "learning_rate": 4.927818001066096e-05, "loss": 0.2111, "step": 9475 }, { "epoch": 0.16901508935896978, "grad_norm": 0.3829486668109894, "learning_rate": 4.927780863960984e-05, "loss": 0.1948, "step": 9476 }, { "epoch": 0.1690329254806835, "grad_norm": 0.3118758201599121, "learning_rate": 4.927743717444953e-05, "loss": 0.1669, "step": 9477 }, { "epoch": 0.16905076160239718, "grad_norm": 0.3104802966117859, "learning_rate": 4.92770656151815e-05, "loss": 0.204, "step": 9478 }, { "epoch": 0.16906859772411087, "grad_norm": 0.3120456337928772, "learning_rate": 4.9276693961807175e-05, "loss": 0.2165, "step": 9479 }, { "epoch": 0.16908643384582456, "grad_norm": 0.23897507786750793, "learning_rate": 4.9276322214327994e-05, "loss": 0.1911, "step": 9480 }, { "epoch": 0.16910426996753825, "grad_norm": 0.3869253098964691, "learning_rate": 4.927595037274542e-05, "loss": 0.2543, "step": 9481 }, { "epoch": 0.16912210608925196, "grad_norm": 0.46600019931793213, "learning_rate": 4.927557843706086e-05, "loss": 0.2489, "step": 9482 }, { "epoch": 0.16913994221096565, "grad_norm": 0.26517653465270996, "learning_rate": 4.9275206407275784e-05, "loss": 0.2443, "step": 9483 }, { "epoch": 0.16915777833267934, "grad_norm": 0.20588456094264984, "learning_rate": 4.927483428339162e-05, "loss": 0.1974, "step": 9484 }, { "epoch": 0.16917561445439303, "grad_norm": 0.2681596279144287, "learning_rate": 4.927446206540981e-05, "loss": 0.2339, "step": 9485 }, { "epoch": 0.16919345057610674, "grad_norm": 0.2703278660774231, "learning_rate": 4.927408975333181e-05, "loss": 0.2253, "step": 9486 }, { "epoch": 0.16921128669782043, "grad_norm": 0.3100188076496124, "learning_rate": 4.927371734715905e-05, "loss": 0.2053, "step": 9487 }, { "epoch": 0.16922912281953412, "grad_norm": 0.2778243124485016, "learning_rate": 4.927334484689298e-05, "loss": 0.2267, "step": 9488 }, { "epoch": 0.1692469589412478, "grad_norm": 0.27329525351524353, "learning_rate": 4.927297225253503e-05, "loss": 0.1882, "step": 9489 }, { "epoch": 0.16926479506296152, "grad_norm": 0.3512638509273529, "learning_rate": 4.927259956408667e-05, "loss": 0.2209, "step": 9490 }, { "epoch": 0.1692826311846752, "grad_norm": 0.23508402705192566, "learning_rate": 4.927222678154932e-05, "loss": 0.2112, "step": 9491 }, { "epoch": 0.1693004673063889, "grad_norm": 0.24606750905513763, "learning_rate": 4.927185390492445e-05, "loss": 0.22, "step": 9492 }, { "epoch": 0.16931830342810258, "grad_norm": 0.2985594570636749, "learning_rate": 4.927148093421348e-05, "loss": 0.1979, "step": 9493 }, { "epoch": 0.1693361395498163, "grad_norm": 0.4615451395511627, "learning_rate": 4.927110786941786e-05, "loss": 0.2043, "step": 9494 }, { "epoch": 0.16935397567153, "grad_norm": 0.2040075808763504, "learning_rate": 4.927073471053906e-05, "loss": 0.214, "step": 9495 }, { "epoch": 0.16937181179324368, "grad_norm": 0.2320394665002823, "learning_rate": 4.9270361457578496e-05, "loss": 0.231, "step": 9496 }, { "epoch": 0.16938964791495736, "grad_norm": 0.17597275972366333, "learning_rate": 4.926998811053763e-05, "loss": 0.1999, "step": 9497 }, { "epoch": 0.16940748403667108, "grad_norm": 0.2441011518239975, "learning_rate": 4.9269614669417916e-05, "loss": 0.1758, "step": 9498 }, { "epoch": 0.16942532015838477, "grad_norm": 0.23479744791984558, "learning_rate": 4.926924113422079e-05, "loss": 0.2184, "step": 9499 }, { "epoch": 0.16944315628009846, "grad_norm": 0.2610747516155243, "learning_rate": 4.92688675049477e-05, "loss": 0.2098, "step": 9500 }, { "epoch": 0.16946099240181214, "grad_norm": 0.2525850236415863, "learning_rate": 4.92684937816001e-05, "loss": 0.1759, "step": 9501 }, { "epoch": 0.16947882852352583, "grad_norm": 0.30792132019996643, "learning_rate": 4.9268119964179436e-05, "loss": 0.2316, "step": 9502 }, { "epoch": 0.16949666464523955, "grad_norm": 0.31012576818466187, "learning_rate": 4.926774605268715e-05, "loss": 0.2273, "step": 9503 }, { "epoch": 0.16951450076695324, "grad_norm": 0.2511320412158966, "learning_rate": 4.9267372047124704e-05, "loss": 0.1761, "step": 9504 }, { "epoch": 0.16953233688866692, "grad_norm": 0.27153435349464417, "learning_rate": 4.926699794749354e-05, "loss": 0.2474, "step": 9505 }, { "epoch": 0.1695501730103806, "grad_norm": 0.3075025975704193, "learning_rate": 4.9266623753795114e-05, "loss": 0.2206, "step": 9506 }, { "epoch": 0.16956800913209433, "grad_norm": 0.29549577832221985, "learning_rate": 4.926624946603087e-05, "loss": 0.2054, "step": 9507 }, { "epoch": 0.16958584525380802, "grad_norm": 0.33228665590286255, "learning_rate": 4.926587508420227e-05, "loss": 0.2292, "step": 9508 }, { "epoch": 0.1696036813755217, "grad_norm": 0.30444657802581787, "learning_rate": 4.926550060831074e-05, "loss": 0.1954, "step": 9509 }, { "epoch": 0.1696215174972354, "grad_norm": 0.2660526931285858, "learning_rate": 4.9265126038357767e-05, "loss": 0.192, "step": 9510 }, { "epoch": 0.1696393536189491, "grad_norm": 0.33575963973999023, "learning_rate": 4.926475137434478e-05, "loss": 0.219, "step": 9511 }, { "epoch": 0.1696571897406628, "grad_norm": 0.38330385088920593, "learning_rate": 4.926437661627323e-05, "loss": 0.259, "step": 9512 }, { "epoch": 0.16967502586237648, "grad_norm": 0.26194047927856445, "learning_rate": 4.926400176414458e-05, "loss": 0.2118, "step": 9513 }, { "epoch": 0.16969286198409017, "grad_norm": 0.37973135709762573, "learning_rate": 4.9263626817960274e-05, "loss": 0.2161, "step": 9514 }, { "epoch": 0.1697106981058039, "grad_norm": 0.2892526388168335, "learning_rate": 4.926325177772177e-05, "loss": 0.2438, "step": 9515 }, { "epoch": 0.16972853422751757, "grad_norm": 0.2314528524875641, "learning_rate": 4.926287664343052e-05, "loss": 0.2021, "step": 9516 }, { "epoch": 0.16974637034923126, "grad_norm": 0.27935171127319336, "learning_rate": 4.926250141508799e-05, "loss": 0.2135, "step": 9517 }, { "epoch": 0.16976420647094495, "grad_norm": 0.3179842233657837, "learning_rate": 4.926212609269562e-05, "loss": 0.2195, "step": 9518 }, { "epoch": 0.16978204259265864, "grad_norm": 0.31895214319229126, "learning_rate": 4.926175067625487e-05, "loss": 0.1969, "step": 9519 }, { "epoch": 0.16979987871437235, "grad_norm": 0.24014034867286682, "learning_rate": 4.926137516576719e-05, "loss": 0.201, "step": 9520 }, { "epoch": 0.16981771483608604, "grad_norm": 0.3354727029800415, "learning_rate": 4.926099956123404e-05, "loss": 0.2192, "step": 9521 }, { "epoch": 0.16983555095779973, "grad_norm": 0.4670446515083313, "learning_rate": 4.926062386265689e-05, "loss": 0.2549, "step": 9522 }, { "epoch": 0.16985338707951342, "grad_norm": 0.22529637813568115, "learning_rate": 4.926024807003716e-05, "loss": 0.2162, "step": 9523 }, { "epoch": 0.16987122320122713, "grad_norm": 0.28081199526786804, "learning_rate": 4.925987218337635e-05, "loss": 0.249, "step": 9524 }, { "epoch": 0.16988905932294082, "grad_norm": 0.21510134637355804, "learning_rate": 4.925949620267589e-05, "loss": 0.1642, "step": 9525 }, { "epoch": 0.1699068954446545, "grad_norm": 0.23330473899841309, "learning_rate": 4.925912012793724e-05, "loss": 0.1789, "step": 9526 }, { "epoch": 0.1699247315663682, "grad_norm": 0.2610885798931122, "learning_rate": 4.9258743959161855e-05, "loss": 0.2157, "step": 9527 }, { "epoch": 0.1699425676880819, "grad_norm": 0.1969381719827652, "learning_rate": 4.925836769635121e-05, "loss": 0.2244, "step": 9528 }, { "epoch": 0.1699604038097956, "grad_norm": 0.289852112531662, "learning_rate": 4.9257991339506754e-05, "loss": 0.2412, "step": 9529 }, { "epoch": 0.1699782399315093, "grad_norm": 0.32819539308547974, "learning_rate": 4.925761488862994e-05, "loss": 0.2586, "step": 9530 }, { "epoch": 0.16999607605322298, "grad_norm": 0.3206731677055359, "learning_rate": 4.9257238343722233e-05, "loss": 0.23, "step": 9531 }, { "epoch": 0.1700139121749367, "grad_norm": 0.28485774993896484, "learning_rate": 4.925686170478509e-05, "loss": 0.1996, "step": 9532 }, { "epoch": 0.17003174829665038, "grad_norm": 0.3539358377456665, "learning_rate": 4.9256484971819984e-05, "loss": 0.2612, "step": 9533 }, { "epoch": 0.17004958441836407, "grad_norm": 0.23671498894691467, "learning_rate": 4.9256108144828356e-05, "loss": 0.1673, "step": 9534 }, { "epoch": 0.17006742054007776, "grad_norm": 0.3517628014087677, "learning_rate": 4.925573122381167e-05, "loss": 0.2546, "step": 9535 }, { "epoch": 0.17008525666179147, "grad_norm": 0.24914629757404327, "learning_rate": 4.9255354208771406e-05, "loss": 0.1779, "step": 9536 }, { "epoch": 0.17010309278350516, "grad_norm": 0.46196451783180237, "learning_rate": 4.9254977099709e-05, "loss": 0.1753, "step": 9537 }, { "epoch": 0.17012092890521885, "grad_norm": 0.21600893139839172, "learning_rate": 4.925459989662593e-05, "loss": 0.2025, "step": 9538 }, { "epoch": 0.17013876502693254, "grad_norm": 0.290483295917511, "learning_rate": 4.925422259952365e-05, "loss": 0.2166, "step": 9539 }, { "epoch": 0.17015660114864622, "grad_norm": 0.38096922636032104, "learning_rate": 4.925384520840363e-05, "loss": 0.23, "step": 9540 }, { "epoch": 0.17017443727035994, "grad_norm": 0.2617246210575104, "learning_rate": 4.925346772326733e-05, "loss": 0.1999, "step": 9541 }, { "epoch": 0.17019227339207363, "grad_norm": 0.26019787788391113, "learning_rate": 4.925309014411621e-05, "loss": 0.2342, "step": 9542 }, { "epoch": 0.17021010951378732, "grad_norm": 0.2577129900455475, "learning_rate": 4.925271247095173e-05, "loss": 0.1996, "step": 9543 }, { "epoch": 0.170227945635501, "grad_norm": 0.31563353538513184, "learning_rate": 4.925233470377537e-05, "loss": 0.1928, "step": 9544 }, { "epoch": 0.17024578175721472, "grad_norm": 0.3143184781074524, "learning_rate": 4.9251956842588574e-05, "loss": 0.2377, "step": 9545 }, { "epoch": 0.1702636178789284, "grad_norm": 0.22418881952762604, "learning_rate": 4.925157888739282e-05, "loss": 0.1899, "step": 9546 }, { "epoch": 0.1702814540006421, "grad_norm": 0.40662509202957153, "learning_rate": 4.9251200838189585e-05, "loss": 0.1679, "step": 9547 }, { "epoch": 0.17029929012235578, "grad_norm": 0.1995219886302948, "learning_rate": 4.92508226949803e-05, "loss": 0.1832, "step": 9548 }, { "epoch": 0.1703171262440695, "grad_norm": 0.24337337911128998, "learning_rate": 4.925044445776646e-05, "loss": 0.2133, "step": 9549 }, { "epoch": 0.1703349623657832, "grad_norm": 0.35046741366386414, "learning_rate": 4.925006612654952e-05, "loss": 0.2338, "step": 9550 }, { "epoch": 0.17035279848749688, "grad_norm": 0.264070600271225, "learning_rate": 4.924968770133094e-05, "loss": 0.1708, "step": 9551 }, { "epoch": 0.17037063460921056, "grad_norm": 0.2133130431175232, "learning_rate": 4.92493091821122e-05, "loss": 0.1851, "step": 9552 }, { "epoch": 0.17038847073092428, "grad_norm": 0.28974494338035583, "learning_rate": 4.924893056889477e-05, "loss": 0.2417, "step": 9553 }, { "epoch": 0.17040630685263797, "grad_norm": 0.2992122173309326, "learning_rate": 4.9248551861680106e-05, "loss": 0.2528, "step": 9554 }, { "epoch": 0.17042414297435166, "grad_norm": 0.22294269502162933, "learning_rate": 4.924817306046967e-05, "loss": 0.2189, "step": 9555 }, { "epoch": 0.17044197909606534, "grad_norm": 0.3522729277610779, "learning_rate": 4.924779416526495e-05, "loss": 0.2156, "step": 9556 }, { "epoch": 0.17045981521777906, "grad_norm": 0.25028425455093384, "learning_rate": 4.92474151760674e-05, "loss": 0.2003, "step": 9557 }, { "epoch": 0.17047765133949275, "grad_norm": 0.34904852509498596, "learning_rate": 4.92470360928785e-05, "loss": 0.1727, "step": 9558 }, { "epoch": 0.17049548746120644, "grad_norm": 0.2682909667491913, "learning_rate": 4.924665691569971e-05, "loss": 0.1896, "step": 9559 }, { "epoch": 0.17051332358292012, "grad_norm": 0.3951048254966736, "learning_rate": 4.92462776445325e-05, "loss": 0.2083, "step": 9560 }, { "epoch": 0.1705311597046338, "grad_norm": 0.28123539686203003, "learning_rate": 4.924589827937835e-05, "loss": 0.2124, "step": 9561 }, { "epoch": 0.17054899582634753, "grad_norm": 0.2618462145328522, "learning_rate": 4.9245518820238724e-05, "loss": 0.2757, "step": 9562 }, { "epoch": 0.17056683194806121, "grad_norm": 0.23455531895160675, "learning_rate": 4.9245139267115086e-05, "loss": 0.2121, "step": 9563 }, { "epoch": 0.1705846680697749, "grad_norm": 0.2572258412837982, "learning_rate": 4.9244759620008915e-05, "loss": 0.2257, "step": 9564 }, { "epoch": 0.1706025041914886, "grad_norm": 0.2199053168296814, "learning_rate": 4.9244379878921686e-05, "loss": 0.2304, "step": 9565 }, { "epoch": 0.1706203403132023, "grad_norm": 0.2827913761138916, "learning_rate": 4.9244000043854865e-05, "loss": 0.2141, "step": 9566 }, { "epoch": 0.170638176434916, "grad_norm": 0.224032923579216, "learning_rate": 4.924362011480993e-05, "loss": 0.2007, "step": 9567 }, { "epoch": 0.17065601255662968, "grad_norm": 0.2857799828052521, "learning_rate": 4.9243240091788345e-05, "loss": 0.1962, "step": 9568 }, { "epoch": 0.17067384867834337, "grad_norm": 0.29973313212394714, "learning_rate": 4.924285997479159e-05, "loss": 0.2459, "step": 9569 }, { "epoch": 0.1706916848000571, "grad_norm": 0.3320271670818329, "learning_rate": 4.924247976382114e-05, "loss": 0.2526, "step": 9570 }, { "epoch": 0.17070952092177077, "grad_norm": 0.2696791887283325, "learning_rate": 4.924209945887846e-05, "loss": 0.1771, "step": 9571 }, { "epoch": 0.17072735704348446, "grad_norm": 0.31899896264076233, "learning_rate": 4.924171905996503e-05, "loss": 0.2624, "step": 9572 }, { "epoch": 0.17074519316519815, "grad_norm": 0.2947900891304016, "learning_rate": 4.924133856708232e-05, "loss": 0.1881, "step": 9573 }, { "epoch": 0.17076302928691187, "grad_norm": 0.25045454502105713, "learning_rate": 4.924095798023182e-05, "loss": 0.2334, "step": 9574 }, { "epoch": 0.17078086540862555, "grad_norm": 0.3101300597190857, "learning_rate": 4.924057729941499e-05, "loss": 0.2129, "step": 9575 }, { "epoch": 0.17079870153033924, "grad_norm": 0.2634217143058777, "learning_rate": 4.92401965246333e-05, "loss": 0.227, "step": 9576 }, { "epoch": 0.17081653765205293, "grad_norm": 0.4140169620513916, "learning_rate": 4.923981565588824e-05, "loss": 0.1585, "step": 9577 }, { "epoch": 0.17083437377376665, "grad_norm": 0.3452723026275635, "learning_rate": 4.92394346931813e-05, "loss": 0.2064, "step": 9578 }, { "epoch": 0.17085220989548033, "grad_norm": 0.46921178698539734, "learning_rate": 4.923905363651392e-05, "loss": 0.2256, "step": 9579 }, { "epoch": 0.17087004601719402, "grad_norm": 0.366527259349823, "learning_rate": 4.9238672485887606e-05, "loss": 0.2719, "step": 9580 }, { "epoch": 0.1708878821389077, "grad_norm": 0.3229540288448334, "learning_rate": 4.923829124130382e-05, "loss": 0.2027, "step": 9581 }, { "epoch": 0.1709057182606214, "grad_norm": 0.2804720401763916, "learning_rate": 4.9237909902764045e-05, "loss": 0.2224, "step": 9582 }, { "epoch": 0.1709235543823351, "grad_norm": 0.28612813353538513, "learning_rate": 4.923752847026976e-05, "loss": 0.2143, "step": 9583 }, { "epoch": 0.1709413905040488, "grad_norm": 0.3355180323123932, "learning_rate": 4.923714694382245e-05, "loss": 0.1881, "step": 9584 }, { "epoch": 0.1709592266257625, "grad_norm": 0.3040127754211426, "learning_rate": 4.9236765323423587e-05, "loss": 0.2305, "step": 9585 }, { "epoch": 0.17097706274747618, "grad_norm": 0.29329100251197815, "learning_rate": 4.9236383609074635e-05, "loss": 0.295, "step": 9586 }, { "epoch": 0.1709948988691899, "grad_norm": 0.2795793116092682, "learning_rate": 4.9236001800777105e-05, "loss": 0.2712, "step": 9587 }, { "epoch": 0.17101273499090358, "grad_norm": 0.2780926525592804, "learning_rate": 4.923561989853246e-05, "loss": 0.2072, "step": 9588 }, { "epoch": 0.17103057111261727, "grad_norm": 0.2929146885871887, "learning_rate": 4.923523790234217e-05, "loss": 0.212, "step": 9589 }, { "epoch": 0.17104840723433096, "grad_norm": 0.27513065934181213, "learning_rate": 4.923485581220774e-05, "loss": 0.2167, "step": 9590 }, { "epoch": 0.17106624335604467, "grad_norm": 0.2884487807750702, "learning_rate": 4.923447362813063e-05, "loss": 0.2433, "step": 9591 }, { "epoch": 0.17108407947775836, "grad_norm": 0.41791215538978577, "learning_rate": 4.923409135011233e-05, "loss": 0.2539, "step": 9592 }, { "epoch": 0.17110191559947205, "grad_norm": 0.38883331418037415, "learning_rate": 4.923370897815433e-05, "loss": 0.1835, "step": 9593 }, { "epoch": 0.17111975172118574, "grad_norm": 0.2700742483139038, "learning_rate": 4.9233326512258096e-05, "loss": 0.2083, "step": 9594 }, { "epoch": 0.17113758784289945, "grad_norm": 0.27792683243751526, "learning_rate": 4.923294395242513e-05, "loss": 0.2313, "step": 9595 }, { "epoch": 0.17115542396461314, "grad_norm": 0.26268866658210754, "learning_rate": 4.9232561298656885e-05, "loss": 0.186, "step": 9596 }, { "epoch": 0.17117326008632683, "grad_norm": 0.2692398130893707, "learning_rate": 4.923217855095488e-05, "loss": 0.1767, "step": 9597 }, { "epoch": 0.17119109620804052, "grad_norm": 0.41611286997795105, "learning_rate": 4.9231795709320574e-05, "loss": 0.2298, "step": 9598 }, { "epoch": 0.17120893232975423, "grad_norm": 0.27804091572761536, "learning_rate": 4.923141277375546e-05, "loss": 0.2003, "step": 9599 }, { "epoch": 0.17122676845146792, "grad_norm": 0.2379477322101593, "learning_rate": 4.923102974426101e-05, "loss": 0.1769, "step": 9600 }, { "epoch": 0.1712446045731816, "grad_norm": 0.2880176901817322, "learning_rate": 4.9230646620838736e-05, "loss": 0.2089, "step": 9601 }, { "epoch": 0.1712624406948953, "grad_norm": 0.36888349056243896, "learning_rate": 4.9230263403490095e-05, "loss": 0.1884, "step": 9602 }, { "epoch": 0.17128027681660898, "grad_norm": 0.2614987790584564, "learning_rate": 4.92298800922166e-05, "loss": 0.2062, "step": 9603 }, { "epoch": 0.1712981129383227, "grad_norm": 0.30136510729789734, "learning_rate": 4.9229496687019705e-05, "loss": 0.2217, "step": 9604 }, { "epoch": 0.1713159490600364, "grad_norm": 0.2660360038280487, "learning_rate": 4.922911318790092e-05, "loss": 0.2118, "step": 9605 }, { "epoch": 0.17133378518175008, "grad_norm": 0.3715563714504242, "learning_rate": 4.9228729594861716e-05, "loss": 0.2264, "step": 9606 }, { "epoch": 0.17135162130346376, "grad_norm": 0.27642402052879333, "learning_rate": 4.9228345907903595e-05, "loss": 0.2029, "step": 9607 }, { "epoch": 0.17136945742517748, "grad_norm": 0.21892401576042175, "learning_rate": 4.922796212702804e-05, "loss": 0.2302, "step": 9608 }, { "epoch": 0.17138729354689117, "grad_norm": 0.23819518089294434, "learning_rate": 4.922757825223653e-05, "loss": 0.1883, "step": 9609 }, { "epoch": 0.17140512966860486, "grad_norm": 0.2200346291065216, "learning_rate": 4.9227194283530556e-05, "loss": 0.1766, "step": 9610 }, { "epoch": 0.17142296579031854, "grad_norm": 0.37342411279678345, "learning_rate": 4.9226810220911615e-05, "loss": 0.2511, "step": 9611 }, { "epoch": 0.17144080191203226, "grad_norm": 0.3035658895969391, "learning_rate": 4.922642606438118e-05, "loss": 0.243, "step": 9612 }, { "epoch": 0.17145863803374595, "grad_norm": 0.27800440788269043, "learning_rate": 4.922604181394076e-05, "loss": 0.2063, "step": 9613 }, { "epoch": 0.17147647415545964, "grad_norm": 0.22545850276947021, "learning_rate": 4.9225657469591835e-05, "loss": 0.1697, "step": 9614 }, { "epoch": 0.17149431027717332, "grad_norm": 0.25454989075660706, "learning_rate": 4.9225273031335886e-05, "loss": 0.2046, "step": 9615 }, { "epoch": 0.17151214639888704, "grad_norm": 0.2833195626735687, "learning_rate": 4.922488849917442e-05, "loss": 0.2352, "step": 9616 }, { "epoch": 0.17152998252060073, "grad_norm": 0.24875544011592865, "learning_rate": 4.9224503873108915e-05, "loss": 0.2119, "step": 9617 }, { "epoch": 0.17154781864231441, "grad_norm": 0.3346499800682068, "learning_rate": 4.922411915314087e-05, "loss": 0.2307, "step": 9618 }, { "epoch": 0.1715656547640281, "grad_norm": 0.2831631302833557, "learning_rate": 4.922373433927176e-05, "loss": 0.2582, "step": 9619 }, { "epoch": 0.1715834908857418, "grad_norm": 0.2854125201702118, "learning_rate": 4.922334943150311e-05, "loss": 0.2082, "step": 9620 }, { "epoch": 0.1716013270074555, "grad_norm": 0.3862302303314209, "learning_rate": 4.922296442983638e-05, "loss": 0.2358, "step": 9621 }, { "epoch": 0.1716191631291692, "grad_norm": 0.2591022849082947, "learning_rate": 4.922257933427307e-05, "loss": 0.2111, "step": 9622 }, { "epoch": 0.17163699925088288, "grad_norm": 0.2876119017601013, "learning_rate": 4.922219414481468e-05, "loss": 0.2475, "step": 9623 }, { "epoch": 0.17165483537259657, "grad_norm": 0.24456492066383362, "learning_rate": 4.92218088614627e-05, "loss": 0.2087, "step": 9624 }, { "epoch": 0.17167267149431029, "grad_norm": 0.31571707129478455, "learning_rate": 4.922142348421862e-05, "loss": 0.2666, "step": 9625 }, { "epoch": 0.17169050761602397, "grad_norm": 0.39414793252944946, "learning_rate": 4.922103801308394e-05, "loss": 0.2134, "step": 9626 }, { "epoch": 0.17170834373773766, "grad_norm": 0.4650692939758301, "learning_rate": 4.9220652448060144e-05, "loss": 0.2645, "step": 9627 }, { "epoch": 0.17172617985945135, "grad_norm": 0.2746374011039734, "learning_rate": 4.922026678914874e-05, "loss": 0.2298, "step": 9628 }, { "epoch": 0.17174401598116507, "grad_norm": 0.2523934543132782, "learning_rate": 4.9219881036351226e-05, "loss": 0.2227, "step": 9629 }, { "epoch": 0.17176185210287875, "grad_norm": 0.2765379548072815, "learning_rate": 4.921949518966907e-05, "loss": 0.2579, "step": 9630 }, { "epoch": 0.17177968822459244, "grad_norm": 0.27917927503585815, "learning_rate": 4.921910924910379e-05, "loss": 0.1976, "step": 9631 }, { "epoch": 0.17179752434630613, "grad_norm": 0.28405430912971497, "learning_rate": 4.9218723214656885e-05, "loss": 0.2388, "step": 9632 }, { "epoch": 0.17181536046801985, "grad_norm": 0.3750477731227875, "learning_rate": 4.921833708632984e-05, "loss": 0.1991, "step": 9633 }, { "epoch": 0.17183319658973353, "grad_norm": 0.4381203353404999, "learning_rate": 4.921795086412416e-05, "loss": 0.2514, "step": 9634 }, { "epoch": 0.17185103271144722, "grad_norm": 0.3596727252006531, "learning_rate": 4.921756454804133e-05, "loss": 0.2688, "step": 9635 }, { "epoch": 0.1718688688331609, "grad_norm": 0.28607720136642456, "learning_rate": 4.921717813808286e-05, "loss": 0.2055, "step": 9636 }, { "epoch": 0.17188670495487463, "grad_norm": 0.32283514738082886, "learning_rate": 4.9216791634250236e-05, "loss": 0.2551, "step": 9637 }, { "epoch": 0.1719045410765883, "grad_norm": 0.2867508828639984, "learning_rate": 4.921640503654497e-05, "loss": 0.2092, "step": 9638 }, { "epoch": 0.171922377198302, "grad_norm": 0.25139811635017395, "learning_rate": 4.9216018344968554e-05, "loss": 0.1677, "step": 9639 }, { "epoch": 0.1719402133200157, "grad_norm": 0.1929500699043274, "learning_rate": 4.921563155952249e-05, "loss": 0.1734, "step": 9640 }, { "epoch": 0.17195804944172938, "grad_norm": 0.22774456441402435, "learning_rate": 4.921524468020827e-05, "loss": 0.1664, "step": 9641 }, { "epoch": 0.1719758855634431, "grad_norm": 0.2855418026447296, "learning_rate": 4.92148577070274e-05, "loss": 0.2406, "step": 9642 }, { "epoch": 0.17199372168515678, "grad_norm": 0.2703842222690582, "learning_rate": 4.921447063998137e-05, "loss": 0.2062, "step": 9643 }, { "epoch": 0.17201155780687047, "grad_norm": 0.5362157225608826, "learning_rate": 4.92140834790717e-05, "loss": 0.2785, "step": 9644 }, { "epoch": 0.17202939392858416, "grad_norm": 0.21611298620700836, "learning_rate": 4.921369622429987e-05, "loss": 0.2096, "step": 9645 }, { "epoch": 0.17204723005029787, "grad_norm": 0.24363186955451965, "learning_rate": 4.92133088756674e-05, "loss": 0.223, "step": 9646 }, { "epoch": 0.17206506617201156, "grad_norm": 0.22381004691123962, "learning_rate": 4.9212921433175775e-05, "loss": 0.1981, "step": 9647 }, { "epoch": 0.17208290229372525, "grad_norm": 0.3383592963218689, "learning_rate": 4.9212533896826505e-05, "loss": 0.2343, "step": 9648 }, { "epoch": 0.17210073841543894, "grad_norm": 0.2760606110095978, "learning_rate": 4.9212146266621084e-05, "loss": 0.1813, "step": 9649 }, { "epoch": 0.17211857453715265, "grad_norm": 0.3160358965396881, "learning_rate": 4.921175854256103e-05, "loss": 0.2898, "step": 9650 }, { "epoch": 0.17213641065886634, "grad_norm": 0.23598331212997437, "learning_rate": 4.921137072464784e-05, "loss": 0.187, "step": 9651 }, { "epoch": 0.17215424678058003, "grad_norm": 0.2617507576942444, "learning_rate": 4.9210982812883e-05, "loss": 0.2486, "step": 9652 }, { "epoch": 0.17217208290229372, "grad_norm": 0.3494400680065155, "learning_rate": 4.921059480726805e-05, "loss": 0.197, "step": 9653 }, { "epoch": 0.17218991902400743, "grad_norm": 0.26766592264175415, "learning_rate": 4.9210206707804456e-05, "loss": 0.2161, "step": 9654 }, { "epoch": 0.17220775514572112, "grad_norm": 0.2603318691253662, "learning_rate": 4.920981851449374e-05, "loss": 0.234, "step": 9655 }, { "epoch": 0.1722255912674348, "grad_norm": 0.2517866790294647, "learning_rate": 4.9209430227337415e-05, "loss": 0.2215, "step": 9656 }, { "epoch": 0.1722434273891485, "grad_norm": 0.21566170454025269, "learning_rate": 4.9209041846336965e-05, "loss": 0.1644, "step": 9657 }, { "epoch": 0.1722612635108622, "grad_norm": 0.31020960211753845, "learning_rate": 4.920865337149392e-05, "loss": 0.2661, "step": 9658 }, { "epoch": 0.1722790996325759, "grad_norm": 0.24349386990070343, "learning_rate": 4.9208264802809766e-05, "loss": 0.2034, "step": 9659 }, { "epoch": 0.1722969357542896, "grad_norm": 0.29610615968704224, "learning_rate": 4.920787614028601e-05, "loss": 0.2042, "step": 9660 }, { "epoch": 0.17231477187600328, "grad_norm": 0.3083944618701935, "learning_rate": 4.920748738392418e-05, "loss": 0.1733, "step": 9661 }, { "epoch": 0.17233260799771696, "grad_norm": 0.3282853364944458, "learning_rate": 4.920709853372576e-05, "loss": 0.2216, "step": 9662 }, { "epoch": 0.17235044411943068, "grad_norm": 0.333893358707428, "learning_rate": 4.9206709589692265e-05, "loss": 0.2421, "step": 9663 }, { "epoch": 0.17236828024114437, "grad_norm": 0.32559463381767273, "learning_rate": 4.9206320551825206e-05, "loss": 0.2711, "step": 9664 }, { "epoch": 0.17238611636285806, "grad_norm": 0.26360490918159485, "learning_rate": 4.920593142012608e-05, "loss": 0.2177, "step": 9665 }, { "epoch": 0.17240395248457174, "grad_norm": 0.2548786997795105, "learning_rate": 4.920554219459641e-05, "loss": 0.2029, "step": 9666 }, { "epoch": 0.17242178860628546, "grad_norm": 0.25616228580474854, "learning_rate": 4.920515287523769e-05, "loss": 0.204, "step": 9667 }, { "epoch": 0.17243962472799915, "grad_norm": 0.30703577399253845, "learning_rate": 4.920476346205145e-05, "loss": 0.2718, "step": 9668 }, { "epoch": 0.17245746084971283, "grad_norm": 0.31625422835350037, "learning_rate": 4.9204373955039174e-05, "loss": 0.2013, "step": 9669 }, { "epoch": 0.17247529697142652, "grad_norm": 0.22914358973503113, "learning_rate": 4.9203984354202393e-05, "loss": 0.1981, "step": 9670 }, { "epoch": 0.17249313309314024, "grad_norm": 0.3993740379810333, "learning_rate": 4.9203594659542605e-05, "loss": 0.2443, "step": 9671 }, { "epoch": 0.17251096921485393, "grad_norm": 0.36154547333717346, "learning_rate": 4.920320487106133e-05, "loss": 0.2278, "step": 9672 }, { "epoch": 0.17252880533656761, "grad_norm": 0.2840382158756256, "learning_rate": 4.920281498876007e-05, "loss": 0.1785, "step": 9673 }, { "epoch": 0.1725466414582813, "grad_norm": 0.2900967597961426, "learning_rate": 4.9202425012640344e-05, "loss": 0.2081, "step": 9674 }, { "epoch": 0.17256447757999502, "grad_norm": 0.4035519063472748, "learning_rate": 4.920203494270365e-05, "loss": 0.2242, "step": 9675 }, { "epoch": 0.1725823137017087, "grad_norm": 0.26229366660118103, "learning_rate": 4.9201644778951516e-05, "loss": 0.1973, "step": 9676 }, { "epoch": 0.1726001498234224, "grad_norm": 0.24187491834163666, "learning_rate": 4.9201254521385446e-05, "loss": 0.2187, "step": 9677 }, { "epoch": 0.17261798594513608, "grad_norm": 0.30757611989974976, "learning_rate": 4.920086417000695e-05, "loss": 0.2842, "step": 9678 }, { "epoch": 0.1726358220668498, "grad_norm": 0.44807401299476624, "learning_rate": 4.9200473724817555e-05, "loss": 0.2652, "step": 9679 }, { "epoch": 0.17265365818856349, "grad_norm": 0.3205125331878662, "learning_rate": 4.920008318581876e-05, "loss": 0.2036, "step": 9680 }, { "epoch": 0.17267149431027717, "grad_norm": 0.36433398723602295, "learning_rate": 4.919969255301209e-05, "loss": 0.1986, "step": 9681 }, { "epoch": 0.17268933043199086, "grad_norm": 0.2050919532775879, "learning_rate": 4.9199301826399046e-05, "loss": 0.1881, "step": 9682 }, { "epoch": 0.17270716655370455, "grad_norm": 0.430519700050354, "learning_rate": 4.9198911005981155e-05, "loss": 0.2074, "step": 9683 }, { "epoch": 0.17272500267541827, "grad_norm": 0.32934385538101196, "learning_rate": 4.919852009175992e-05, "loss": 0.212, "step": 9684 }, { "epoch": 0.17274283879713195, "grad_norm": 0.2581077814102173, "learning_rate": 4.919812908373686e-05, "loss": 0.1845, "step": 9685 }, { "epoch": 0.17276067491884564, "grad_norm": 0.32681527733802795, "learning_rate": 4.9197737981913504e-05, "loss": 0.2646, "step": 9686 }, { "epoch": 0.17277851104055933, "grad_norm": 0.2916010916233063, "learning_rate": 4.919734678629136e-05, "loss": 0.24, "step": 9687 }, { "epoch": 0.17279634716227305, "grad_norm": 0.2567000389099121, "learning_rate": 4.919695549687193e-05, "loss": 0.2023, "step": 9688 }, { "epoch": 0.17281418328398673, "grad_norm": 0.3838261365890503, "learning_rate": 4.9196564113656755e-05, "loss": 0.2245, "step": 9689 }, { "epoch": 0.17283201940570042, "grad_norm": 0.2686448097229004, "learning_rate": 4.919617263664734e-05, "loss": 0.1613, "step": 9690 }, { "epoch": 0.1728498555274141, "grad_norm": 0.3094487190246582, "learning_rate": 4.91957810658452e-05, "loss": 0.1849, "step": 9691 }, { "epoch": 0.17286769164912782, "grad_norm": 0.28329363465309143, "learning_rate": 4.919538940125185e-05, "loss": 0.2214, "step": 9692 }, { "epoch": 0.1728855277708415, "grad_norm": 0.24060699343681335, "learning_rate": 4.9194997642868826e-05, "loss": 0.1737, "step": 9693 }, { "epoch": 0.1729033638925552, "grad_norm": 0.26238158345222473, "learning_rate": 4.9194605790697625e-05, "loss": 0.1841, "step": 9694 }, { "epoch": 0.1729212000142689, "grad_norm": 0.20694534480571747, "learning_rate": 4.919421384473977e-05, "loss": 0.165, "step": 9695 }, { "epoch": 0.1729390361359826, "grad_norm": 0.27723363041877747, "learning_rate": 4.9193821804996797e-05, "loss": 0.1745, "step": 9696 }, { "epoch": 0.1729568722576963, "grad_norm": 0.2610498070716858, "learning_rate": 4.919342967147021e-05, "loss": 0.2022, "step": 9697 }, { "epoch": 0.17297470837940998, "grad_norm": 0.4002273678779602, "learning_rate": 4.9193037444161536e-05, "loss": 0.168, "step": 9698 }, { "epoch": 0.17299254450112367, "grad_norm": 0.2577795088291168, "learning_rate": 4.919264512307229e-05, "loss": 0.1844, "step": 9699 }, { "epoch": 0.17301038062283736, "grad_norm": 0.20801451802253723, "learning_rate": 4.9192252708204e-05, "loss": 0.1781, "step": 9700 }, { "epoch": 0.17302821674455107, "grad_norm": 0.2729775309562683, "learning_rate": 4.9191860199558174e-05, "loss": 0.2275, "step": 9701 }, { "epoch": 0.17304605286626476, "grad_norm": 0.3288865089416504, "learning_rate": 4.919146759713634e-05, "loss": 0.2058, "step": 9702 }, { "epoch": 0.17306388898797845, "grad_norm": 0.24803242087364197, "learning_rate": 4.9191074900940034e-05, "loss": 0.2055, "step": 9703 }, { "epoch": 0.17308172510969214, "grad_norm": 0.3133016526699066, "learning_rate": 4.919068211097076e-05, "loss": 0.2303, "step": 9704 }, { "epoch": 0.17309956123140585, "grad_norm": 0.27445313334465027, "learning_rate": 4.919028922723005e-05, "loss": 0.1938, "step": 9705 }, { "epoch": 0.17311739735311954, "grad_norm": 0.244804248213768, "learning_rate": 4.9189896249719425e-05, "loss": 0.2076, "step": 9706 }, { "epoch": 0.17313523347483323, "grad_norm": 0.3342616558074951, "learning_rate": 4.918950317844041e-05, "loss": 0.2105, "step": 9707 }, { "epoch": 0.17315306959654692, "grad_norm": 0.35800743103027344, "learning_rate": 4.918911001339451e-05, "loss": 0.2382, "step": 9708 }, { "epoch": 0.17317090571826063, "grad_norm": 0.42986416816711426, "learning_rate": 4.918871675458328e-05, "loss": 0.2172, "step": 9709 }, { "epoch": 0.17318874183997432, "grad_norm": 0.2927144169807434, "learning_rate": 4.918832340200823e-05, "loss": 0.2592, "step": 9710 }, { "epoch": 0.173206577961688, "grad_norm": 0.27397555112838745, "learning_rate": 4.9187929955670875e-05, "loss": 0.2169, "step": 9711 }, { "epoch": 0.1732244140834017, "grad_norm": 0.2059878408908844, "learning_rate": 4.9187536415572756e-05, "loss": 0.1558, "step": 9712 }, { "epoch": 0.1732422502051154, "grad_norm": 0.4312272071838379, "learning_rate": 4.918714278171539e-05, "loss": 0.1652, "step": 9713 }, { "epoch": 0.1732600863268291, "grad_norm": 0.27202436327934265, "learning_rate": 4.91867490541003e-05, "loss": 0.2212, "step": 9714 }, { "epoch": 0.1732779224485428, "grad_norm": 0.3257104158401489, "learning_rate": 4.918635523272902e-05, "loss": 0.2062, "step": 9715 }, { "epoch": 0.17329575857025648, "grad_norm": 0.3060513436794281, "learning_rate": 4.9185961317603074e-05, "loss": 0.2417, "step": 9716 }, { "epoch": 0.1733135946919702, "grad_norm": 0.5047610998153687, "learning_rate": 4.918556730872399e-05, "loss": 0.1946, "step": 9717 }, { "epoch": 0.17333143081368388, "grad_norm": 0.21300067007541656, "learning_rate": 4.918517320609329e-05, "loss": 0.1606, "step": 9718 }, { "epoch": 0.17334926693539757, "grad_norm": 0.2857724726200104, "learning_rate": 4.918477900971251e-05, "loss": 0.2572, "step": 9719 }, { "epoch": 0.17336710305711125, "grad_norm": 0.22725330293178558, "learning_rate": 4.9184384719583165e-05, "loss": 0.1677, "step": 9720 }, { "epoch": 0.17338493917882494, "grad_norm": 0.2861236333847046, "learning_rate": 4.91839903357068e-05, "loss": 0.2036, "step": 9721 }, { "epoch": 0.17340277530053866, "grad_norm": 0.26781216263771057, "learning_rate": 4.918359585808493e-05, "loss": 0.1937, "step": 9722 }, { "epoch": 0.17342061142225235, "grad_norm": 0.3556922674179077, "learning_rate": 4.91832012867191e-05, "loss": 0.1568, "step": 9723 }, { "epoch": 0.17343844754396603, "grad_norm": 0.27563250064849854, "learning_rate": 4.9182806621610825e-05, "loss": 0.2472, "step": 9724 }, { "epoch": 0.17345628366567972, "grad_norm": 0.3423020541667938, "learning_rate": 4.918241186276163e-05, "loss": 0.1946, "step": 9725 }, { "epoch": 0.17347411978739344, "grad_norm": 0.334852010011673, "learning_rate": 4.9182017010173067e-05, "loss": 0.1823, "step": 9726 }, { "epoch": 0.17349195590910713, "grad_norm": 0.3619869351387024, "learning_rate": 4.9181622063846644e-05, "loss": 0.2479, "step": 9727 }, { "epoch": 0.17350979203082081, "grad_norm": 0.37388190627098083, "learning_rate": 4.91812270237839e-05, "loss": 0.2479, "step": 9728 }, { "epoch": 0.1735276281525345, "grad_norm": 0.2579343020915985, "learning_rate": 4.918083188998638e-05, "loss": 0.2453, "step": 9729 }, { "epoch": 0.17354546427424822, "grad_norm": 0.2560495436191559, "learning_rate": 4.918043666245559e-05, "loss": 0.215, "step": 9730 }, { "epoch": 0.1735633003959619, "grad_norm": 0.35689017176628113, "learning_rate": 4.918004134119308e-05, "loss": 0.2404, "step": 9731 }, { "epoch": 0.1735811365176756, "grad_norm": 0.3746400475502014, "learning_rate": 4.917964592620039e-05, "loss": 0.2109, "step": 9732 }, { "epoch": 0.17359897263938928, "grad_norm": 0.23508204519748688, "learning_rate": 4.917925041747903e-05, "loss": 0.178, "step": 9733 }, { "epoch": 0.173616808761103, "grad_norm": 0.36890703439712524, "learning_rate": 4.9178854815030543e-05, "loss": 0.2254, "step": 9734 }, { "epoch": 0.17363464488281669, "grad_norm": 0.321085125207901, "learning_rate": 4.917845911885647e-05, "loss": 0.215, "step": 9735 }, { "epoch": 0.17365248100453037, "grad_norm": 0.3148922622203827, "learning_rate": 4.917806332895833e-05, "loss": 0.2464, "step": 9736 }, { "epoch": 0.17367031712624406, "grad_norm": 0.2999875545501709, "learning_rate": 4.9177667445337674e-05, "loss": 0.2248, "step": 9737 }, { "epoch": 0.17368815324795778, "grad_norm": 0.23620140552520752, "learning_rate": 4.9177271467996025e-05, "loss": 0.2219, "step": 9738 }, { "epoch": 0.17370598936967147, "grad_norm": 0.24593178927898407, "learning_rate": 4.9176875396934925e-05, "loss": 0.1783, "step": 9739 }, { "epoch": 0.17372382549138515, "grad_norm": 0.31903770565986633, "learning_rate": 4.91764792321559e-05, "loss": 0.2441, "step": 9740 }, { "epoch": 0.17374166161309884, "grad_norm": 0.26004692912101746, "learning_rate": 4.91760829736605e-05, "loss": 0.2603, "step": 9741 }, { "epoch": 0.17375949773481253, "grad_norm": 0.29169684648513794, "learning_rate": 4.917568662145024e-05, "loss": 0.2386, "step": 9742 }, { "epoch": 0.17377733385652624, "grad_norm": 0.36901918053627014, "learning_rate": 4.9175290175526676e-05, "loss": 0.1731, "step": 9743 }, { "epoch": 0.17379516997823993, "grad_norm": 0.35805922746658325, "learning_rate": 4.9174893635891333e-05, "loss": 0.2204, "step": 9744 }, { "epoch": 0.17381300609995362, "grad_norm": 0.5005916357040405, "learning_rate": 4.9174497002545754e-05, "loss": 0.2167, "step": 9745 }, { "epoch": 0.1738308422216673, "grad_norm": 0.5021663904190063, "learning_rate": 4.9174100275491477e-05, "loss": 0.2218, "step": 9746 }, { "epoch": 0.17384867834338102, "grad_norm": 0.350599080324173, "learning_rate": 4.917370345473004e-05, "loss": 0.3009, "step": 9747 }, { "epoch": 0.1738665144650947, "grad_norm": 0.305866539478302, "learning_rate": 4.917330654026297e-05, "loss": 0.1872, "step": 9748 }, { "epoch": 0.1738843505868084, "grad_norm": 0.3086687922477722, "learning_rate": 4.917290953209183e-05, "loss": 0.2482, "step": 9749 }, { "epoch": 0.1739021867085221, "grad_norm": 0.34263765811920166, "learning_rate": 4.9172512430218134e-05, "loss": 0.2292, "step": 9750 }, { "epoch": 0.1739200228302358, "grad_norm": 0.24874238669872284, "learning_rate": 4.9172115234643425e-05, "loss": 0.1788, "step": 9751 }, { "epoch": 0.1739378589519495, "grad_norm": 0.3459385633468628, "learning_rate": 4.917171794536925e-05, "loss": 0.2341, "step": 9752 }, { "epoch": 0.17395569507366318, "grad_norm": 0.27681493759155273, "learning_rate": 4.9171320562397164e-05, "loss": 0.2004, "step": 9753 }, { "epoch": 0.17397353119537687, "grad_norm": 0.2709489166736603, "learning_rate": 4.9170923085728676e-05, "loss": 0.2219, "step": 9754 }, { "epoch": 0.17399136731709058, "grad_norm": 0.20481276512145996, "learning_rate": 4.9170525515365346e-05, "loss": 0.1868, "step": 9755 }, { "epoch": 0.17400920343880427, "grad_norm": 0.23008392751216888, "learning_rate": 4.9170127851308715e-05, "loss": 0.1872, "step": 9756 }, { "epoch": 0.17402703956051796, "grad_norm": 0.3555377125740051, "learning_rate": 4.9169730093560305e-05, "loss": 0.2022, "step": 9757 }, { "epoch": 0.17404487568223165, "grad_norm": 0.2266392856836319, "learning_rate": 4.916933224212169e-05, "loss": 0.2072, "step": 9758 }, { "epoch": 0.17406271180394536, "grad_norm": 0.25155770778656006, "learning_rate": 4.9168934296994386e-05, "loss": 0.1901, "step": 9759 }, { "epoch": 0.17408054792565905, "grad_norm": 0.247994065284729, "learning_rate": 4.9168536258179946e-05, "loss": 0.1749, "step": 9760 }, { "epoch": 0.17409838404737274, "grad_norm": 0.21949608623981476, "learning_rate": 4.916813812567992e-05, "loss": 0.1891, "step": 9761 }, { "epoch": 0.17411622016908643, "grad_norm": 0.2974618077278137, "learning_rate": 4.916773989949584e-05, "loss": 0.2353, "step": 9762 }, { "epoch": 0.17413405629080012, "grad_norm": 0.35332390666007996, "learning_rate": 4.9167341579629245e-05, "loss": 0.2217, "step": 9763 }, { "epoch": 0.17415189241251383, "grad_norm": 0.3531196117401123, "learning_rate": 4.916694316608169e-05, "loss": 0.2382, "step": 9764 }, { "epoch": 0.17416972853422752, "grad_norm": 0.4063680171966553, "learning_rate": 4.9166544658854717e-05, "loss": 0.1759, "step": 9765 }, { "epoch": 0.1741875646559412, "grad_norm": 0.2495127022266388, "learning_rate": 4.916614605794988e-05, "loss": 0.1589, "step": 9766 }, { "epoch": 0.1742054007776549, "grad_norm": 0.369315505027771, "learning_rate": 4.9165747363368696e-05, "loss": 0.2126, "step": 9767 }, { "epoch": 0.1742232368993686, "grad_norm": 0.28651097416877747, "learning_rate": 4.916534857511274e-05, "loss": 0.1435, "step": 9768 }, { "epoch": 0.1742410730210823, "grad_norm": 0.3717736601829529, "learning_rate": 4.916494969318355e-05, "loss": 0.2426, "step": 9769 }, { "epoch": 0.174258909142796, "grad_norm": 0.39979088306427, "learning_rate": 4.916455071758266e-05, "loss": 0.2567, "step": 9770 }, { "epoch": 0.17427674526450967, "grad_norm": 0.24432671070098877, "learning_rate": 4.916415164831163e-05, "loss": 0.194, "step": 9771 }, { "epoch": 0.1742945813862234, "grad_norm": 0.39139798283576965, "learning_rate": 4.9163752485372e-05, "loss": 0.1943, "step": 9772 }, { "epoch": 0.17431241750793708, "grad_norm": 0.3452328145503998, "learning_rate": 4.916335322876532e-05, "loss": 0.1947, "step": 9773 }, { "epoch": 0.17433025362965077, "grad_norm": 0.3301299810409546, "learning_rate": 4.9162953878493135e-05, "loss": 0.2463, "step": 9774 }, { "epoch": 0.17434808975136445, "grad_norm": 0.35334160923957825, "learning_rate": 4.9162554434556996e-05, "loss": 0.1241, "step": 9775 }, { "epoch": 0.17436592587307817, "grad_norm": 0.2236124724149704, "learning_rate": 4.9162154896958454e-05, "loss": 0.211, "step": 9776 }, { "epoch": 0.17438376199479186, "grad_norm": 0.23963242769241333, "learning_rate": 4.916175526569905e-05, "loss": 0.2092, "step": 9777 }, { "epoch": 0.17440159811650555, "grad_norm": 0.36571216583251953, "learning_rate": 4.916135554078034e-05, "loss": 0.2641, "step": 9778 }, { "epoch": 0.17441943423821923, "grad_norm": 0.2502310574054718, "learning_rate": 4.9160955722203875e-05, "loss": 0.217, "step": 9779 }, { "epoch": 0.17443727035993295, "grad_norm": 0.2844808101654053, "learning_rate": 4.916055580997119e-05, "loss": 0.182, "step": 9780 }, { "epoch": 0.17445510648164664, "grad_norm": 0.2635055184364319, "learning_rate": 4.916015580408386e-05, "loss": 0.2371, "step": 9781 }, { "epoch": 0.17447294260336033, "grad_norm": 0.2510976493358612, "learning_rate": 4.915975570454341e-05, "loss": 0.2193, "step": 9782 }, { "epoch": 0.174490778725074, "grad_norm": 0.2558085024356842, "learning_rate": 4.9159355511351404e-05, "loss": 0.1912, "step": 9783 }, { "epoch": 0.1745086148467877, "grad_norm": 0.2219114452600479, "learning_rate": 4.9158955224509395e-05, "loss": 0.2049, "step": 9784 }, { "epoch": 0.17452645096850142, "grad_norm": 0.23177124559879303, "learning_rate": 4.9158554844018934e-05, "loss": 0.1954, "step": 9785 }, { "epoch": 0.1745442870902151, "grad_norm": 0.303877055644989, "learning_rate": 4.915815436988156e-05, "loss": 0.224, "step": 9786 }, { "epoch": 0.1745621232119288, "grad_norm": 0.3392929434776306, "learning_rate": 4.915775380209884e-05, "loss": 0.2373, "step": 9787 }, { "epoch": 0.17457995933364248, "grad_norm": 0.30069419741630554, "learning_rate": 4.915735314067233e-05, "loss": 0.2341, "step": 9788 }, { "epoch": 0.1745977954553562, "grad_norm": 0.2342492789030075, "learning_rate": 4.915695238560357e-05, "loss": 0.1689, "step": 9789 }, { "epoch": 0.17461563157706989, "grad_norm": 0.28799715638160706, "learning_rate": 4.915655153689412e-05, "loss": 0.1907, "step": 9790 }, { "epoch": 0.17463346769878357, "grad_norm": 0.23690320551395416, "learning_rate": 4.915615059454553e-05, "loss": 0.2085, "step": 9791 }, { "epoch": 0.17465130382049726, "grad_norm": 0.35785195231437683, "learning_rate": 4.915574955855936e-05, "loss": 0.1599, "step": 9792 }, { "epoch": 0.17466913994221098, "grad_norm": 0.3013131022453308, "learning_rate": 4.915534842893716e-05, "loss": 0.2044, "step": 9793 }, { "epoch": 0.17468697606392466, "grad_norm": 0.23619958758354187, "learning_rate": 4.9154947205680485e-05, "loss": 0.2028, "step": 9794 }, { "epoch": 0.17470481218563835, "grad_norm": 0.21933041512966156, "learning_rate": 4.9154545888790894e-05, "loss": 0.1891, "step": 9795 }, { "epoch": 0.17472264830735204, "grad_norm": 0.3393298089504242, "learning_rate": 4.9154144478269935e-05, "loss": 0.176, "step": 9796 }, { "epoch": 0.17474048442906576, "grad_norm": 0.37045788764953613, "learning_rate": 4.9153742974119174e-05, "loss": 0.2525, "step": 9797 }, { "epoch": 0.17475832055077944, "grad_norm": 0.3073008358478546, "learning_rate": 4.915334137634017e-05, "loss": 0.2141, "step": 9798 }, { "epoch": 0.17477615667249313, "grad_norm": 0.22247996926307678, "learning_rate": 4.9152939684934465e-05, "loss": 0.2147, "step": 9799 }, { "epoch": 0.17479399279420682, "grad_norm": 0.24229590594768524, "learning_rate": 4.915253789990363e-05, "loss": 0.125, "step": 9800 }, { "epoch": 0.1748118289159205, "grad_norm": 0.20457574725151062, "learning_rate": 4.915213602124921e-05, "loss": 0.1612, "step": 9801 }, { "epoch": 0.17482966503763422, "grad_norm": 0.3060280382633209, "learning_rate": 4.915173404897277e-05, "loss": 0.1487, "step": 9802 }, { "epoch": 0.1748475011593479, "grad_norm": 0.28459033370018005, "learning_rate": 4.915133198307586e-05, "loss": 0.2066, "step": 9803 }, { "epoch": 0.1748653372810616, "grad_norm": 0.28581908345222473, "learning_rate": 4.9150929823560055e-05, "loss": 0.264, "step": 9804 }, { "epoch": 0.1748831734027753, "grad_norm": 0.33398017287254333, "learning_rate": 4.9150527570426895e-05, "loss": 0.1408, "step": 9805 }, { "epoch": 0.174901009524489, "grad_norm": 0.28528156876564026, "learning_rate": 4.915012522367796e-05, "loss": 0.2305, "step": 9806 }, { "epoch": 0.1749188456462027, "grad_norm": 0.30481860041618347, "learning_rate": 4.9149722783314794e-05, "loss": 0.2607, "step": 9807 }, { "epoch": 0.17493668176791638, "grad_norm": 0.2669420838356018, "learning_rate": 4.9149320249338956e-05, "loss": 0.2067, "step": 9808 }, { "epoch": 0.17495451788963007, "grad_norm": 0.21455597877502441, "learning_rate": 4.914891762175202e-05, "loss": 0.2008, "step": 9809 }, { "epoch": 0.17497235401134378, "grad_norm": 0.24768298864364624, "learning_rate": 4.914851490055554e-05, "loss": 0.2354, "step": 9810 }, { "epoch": 0.17499019013305747, "grad_norm": 0.30151158571243286, "learning_rate": 4.914811208575107e-05, "loss": 0.1939, "step": 9811 }, { "epoch": 0.17500802625477116, "grad_norm": 0.3170900046825409, "learning_rate": 4.914770917734018e-05, "loss": 0.2702, "step": 9812 }, { "epoch": 0.17502586237648485, "grad_norm": 0.3405921757221222, "learning_rate": 4.914730617532443e-05, "loss": 0.1717, "step": 9813 }, { "epoch": 0.17504369849819856, "grad_norm": 0.2955649495124817, "learning_rate": 4.914690307970538e-05, "loss": 0.2227, "step": 9814 }, { "epoch": 0.17506153461991225, "grad_norm": 0.25658777356147766, "learning_rate": 4.914649989048459e-05, "loss": 0.2099, "step": 9815 }, { "epoch": 0.17507937074162594, "grad_norm": 0.37147191166877747, "learning_rate": 4.914609660766363e-05, "loss": 0.1566, "step": 9816 }, { "epoch": 0.17509720686333963, "grad_norm": 0.2716359794139862, "learning_rate": 4.914569323124405e-05, "loss": 0.2056, "step": 9817 }, { "epoch": 0.17511504298505334, "grad_norm": 0.25064435601234436, "learning_rate": 4.9145289761227434e-05, "loss": 0.228, "step": 9818 }, { "epoch": 0.17513287910676703, "grad_norm": 0.27970239520072937, "learning_rate": 4.9144886197615334e-05, "loss": 0.2028, "step": 9819 }, { "epoch": 0.17515071522848072, "grad_norm": 0.280620276927948, "learning_rate": 4.914448254040931e-05, "loss": 0.2091, "step": 9820 }, { "epoch": 0.1751685513501944, "grad_norm": 0.7011124491691589, "learning_rate": 4.914407878961094e-05, "loss": 0.2389, "step": 9821 }, { "epoch": 0.1751863874719081, "grad_norm": 0.31332695484161377, "learning_rate": 4.9143674945221776e-05, "loss": 0.1929, "step": 9822 }, { "epoch": 0.1752042235936218, "grad_norm": 0.23674538731575012, "learning_rate": 4.9143271007243394e-05, "loss": 0.1476, "step": 9823 }, { "epoch": 0.1752220597153355, "grad_norm": 0.24798457324504852, "learning_rate": 4.914286697567736e-05, "loss": 0.1976, "step": 9824 }, { "epoch": 0.1752398958370492, "grad_norm": 0.2565896511077881, "learning_rate": 4.9142462850525225e-05, "loss": 0.1953, "step": 9825 }, { "epoch": 0.17525773195876287, "grad_norm": 0.35916703939437866, "learning_rate": 4.914205863178857e-05, "loss": 0.2273, "step": 9826 }, { "epoch": 0.1752755680804766, "grad_norm": 0.2522268295288086, "learning_rate": 4.914165431946895e-05, "loss": 0.2018, "step": 9827 }, { "epoch": 0.17529340420219028, "grad_norm": 0.2315714806318283, "learning_rate": 4.9141249913567945e-05, "loss": 0.1948, "step": 9828 }, { "epoch": 0.17531124032390397, "grad_norm": 0.2239033430814743, "learning_rate": 4.914084541408712e-05, "loss": 0.1864, "step": 9829 }, { "epoch": 0.17532907644561765, "grad_norm": 0.26858869194984436, "learning_rate": 4.914044082102803e-05, "loss": 0.2481, "step": 9830 }, { "epoch": 0.17534691256733137, "grad_norm": 0.3861125111579895, "learning_rate": 4.9140036134392266e-05, "loss": 0.3285, "step": 9831 }, { "epoch": 0.17536474868904506, "grad_norm": 0.3304365575313568, "learning_rate": 4.9139631354181376e-05, "loss": 0.2086, "step": 9832 }, { "epoch": 0.17538258481075875, "grad_norm": 0.24423335492610931, "learning_rate": 4.913922648039694e-05, "loss": 0.2261, "step": 9833 }, { "epoch": 0.17540042093247243, "grad_norm": 0.2607804834842682, "learning_rate": 4.913882151304052e-05, "loss": 0.2353, "step": 9834 }, { "epoch": 0.17541825705418615, "grad_norm": 0.29175958037376404, "learning_rate": 4.91384164521137e-05, "loss": 0.2022, "step": 9835 }, { "epoch": 0.17543609317589984, "grad_norm": 0.25276750326156616, "learning_rate": 4.9138011297618036e-05, "loss": 0.2201, "step": 9836 }, { "epoch": 0.17545392929761353, "grad_norm": 0.308044970035553, "learning_rate": 4.9137606049555105e-05, "loss": 0.1551, "step": 9837 }, { "epoch": 0.1754717654193272, "grad_norm": 0.24433742463588715, "learning_rate": 4.9137200707926476e-05, "loss": 0.1797, "step": 9838 }, { "epoch": 0.17548960154104093, "grad_norm": 0.1911228746175766, "learning_rate": 4.913679527273371e-05, "loss": 0.1688, "step": 9839 }, { "epoch": 0.17550743766275462, "grad_norm": 0.28093472123146057, "learning_rate": 4.913638974397841e-05, "loss": 0.2009, "step": 9840 }, { "epoch": 0.1755252737844683, "grad_norm": 0.27896586060523987, "learning_rate": 4.913598412166211e-05, "loss": 0.1798, "step": 9841 }, { "epoch": 0.175543109906182, "grad_norm": 0.20525510609149933, "learning_rate": 4.9135578405786404e-05, "loss": 0.1791, "step": 9842 }, { "epoch": 0.17556094602789568, "grad_norm": 0.32102063298225403, "learning_rate": 4.913517259635286e-05, "loss": 0.1919, "step": 9843 }, { "epoch": 0.1755787821496094, "grad_norm": 0.34964942932128906, "learning_rate": 4.913476669336305e-05, "loss": 0.2439, "step": 9844 }, { "epoch": 0.17559661827132308, "grad_norm": 0.33790168166160583, "learning_rate": 4.913436069681855e-05, "loss": 0.2195, "step": 9845 }, { "epoch": 0.17561445439303677, "grad_norm": 0.2906341850757599, "learning_rate": 4.913395460672093e-05, "loss": 0.2097, "step": 9846 }, { "epoch": 0.17563229051475046, "grad_norm": 0.35009142756462097, "learning_rate": 4.9133548423071765e-05, "loss": 0.2547, "step": 9847 }, { "epoch": 0.17565012663646418, "grad_norm": 0.2403053641319275, "learning_rate": 4.913314214587263e-05, "loss": 0.1929, "step": 9848 }, { "epoch": 0.17566796275817786, "grad_norm": 0.2491363137960434, "learning_rate": 4.913273577512511e-05, "loss": 0.1813, "step": 9849 }, { "epoch": 0.17568579887989155, "grad_norm": 0.3104622960090637, "learning_rate": 4.9132329310830764e-05, "loss": 0.2335, "step": 9850 }, { "epoch": 0.17570363500160524, "grad_norm": 0.3666343688964844, "learning_rate": 4.913192275299118e-05, "loss": 0.2267, "step": 9851 }, { "epoch": 0.17572147112331896, "grad_norm": 0.2799401879310608, "learning_rate": 4.9131516101607923e-05, "loss": 0.246, "step": 9852 }, { "epoch": 0.17573930724503264, "grad_norm": 0.316927045583725, "learning_rate": 4.913110935668258e-05, "loss": 0.2505, "step": 9853 }, { "epoch": 0.17575714336674633, "grad_norm": 0.277109831571579, "learning_rate": 4.9130702518216715e-05, "loss": 0.2282, "step": 9854 }, { "epoch": 0.17577497948846002, "grad_norm": 0.2826685309410095, "learning_rate": 4.913029558621192e-05, "loss": 0.2573, "step": 9855 }, { "epoch": 0.17579281561017374, "grad_norm": 0.32976076006889343, "learning_rate": 4.9129888560669755e-05, "loss": 0.2104, "step": 9856 }, { "epoch": 0.17581065173188742, "grad_norm": 0.214805006980896, "learning_rate": 4.912948144159182e-05, "loss": 0.2194, "step": 9857 }, { "epoch": 0.1758284878536011, "grad_norm": 0.36780327558517456, "learning_rate": 4.912907422897967e-05, "loss": 0.1797, "step": 9858 }, { "epoch": 0.1758463239753148, "grad_norm": 0.30478477478027344, "learning_rate": 4.91286669228349e-05, "loss": 0.229, "step": 9859 }, { "epoch": 0.17586416009702852, "grad_norm": 0.23440249264240265, "learning_rate": 4.912825952315908e-05, "loss": 0.1987, "step": 9860 }, { "epoch": 0.1758819962187422, "grad_norm": 0.3610669672489166, "learning_rate": 4.91278520299538e-05, "loss": 0.2409, "step": 9861 }, { "epoch": 0.1758998323404559, "grad_norm": 0.26612475514411926, "learning_rate": 4.912744444322063e-05, "loss": 0.2077, "step": 9862 }, { "epoch": 0.17591766846216958, "grad_norm": 0.28233879804611206, "learning_rate": 4.9127036762961144e-05, "loss": 0.2312, "step": 9863 }, { "epoch": 0.17593550458388327, "grad_norm": 0.23805402219295502, "learning_rate": 4.9126628989176936e-05, "loss": 0.1833, "step": 9864 }, { "epoch": 0.17595334070559698, "grad_norm": 0.32132792472839355, "learning_rate": 4.912622112186958e-05, "loss": 0.2008, "step": 9865 }, { "epoch": 0.17597117682731067, "grad_norm": 0.3585755228996277, "learning_rate": 4.9125813161040654e-05, "loss": 0.2293, "step": 9866 }, { "epoch": 0.17598901294902436, "grad_norm": 0.3080814778804779, "learning_rate": 4.912540510669175e-05, "loss": 0.2971, "step": 9867 }, { "epoch": 0.17600684907073805, "grad_norm": 0.33391812443733215, "learning_rate": 4.912499695882444e-05, "loss": 0.2254, "step": 9868 }, { "epoch": 0.17602468519245176, "grad_norm": 0.2802722156047821, "learning_rate": 4.912458871744031e-05, "loss": 0.2181, "step": 9869 }, { "epoch": 0.17604252131416545, "grad_norm": 0.24930240213871002, "learning_rate": 4.912418038254094e-05, "loss": 0.2178, "step": 9870 }, { "epoch": 0.17606035743587914, "grad_norm": 0.25119373202323914, "learning_rate": 4.9123771954127917e-05, "loss": 0.1792, "step": 9871 }, { "epoch": 0.17607819355759283, "grad_norm": 0.339108943939209, "learning_rate": 4.912336343220283e-05, "loss": 0.1594, "step": 9872 }, { "epoch": 0.17609602967930654, "grad_norm": 0.3193516135215759, "learning_rate": 4.912295481676724e-05, "loss": 0.2209, "step": 9873 }, { "epoch": 0.17611386580102023, "grad_norm": 0.28487035632133484, "learning_rate": 4.9122546107822756e-05, "loss": 0.2196, "step": 9874 }, { "epoch": 0.17613170192273392, "grad_norm": 0.3017159700393677, "learning_rate": 4.912213730537094e-05, "loss": 0.1577, "step": 9875 }, { "epoch": 0.1761495380444476, "grad_norm": 0.36447104811668396, "learning_rate": 4.91217284094134e-05, "loss": 0.2475, "step": 9876 }, { "epoch": 0.17616737416616132, "grad_norm": 0.3028956949710846, "learning_rate": 4.91213194199517e-05, "loss": 0.2058, "step": 9877 }, { "epoch": 0.176185210287875, "grad_norm": 0.21894517540931702, "learning_rate": 4.912091033698744e-05, "loss": 0.1685, "step": 9878 }, { "epoch": 0.1762030464095887, "grad_norm": 0.24029791355133057, "learning_rate": 4.9120501160522204e-05, "loss": 0.1988, "step": 9879 }, { "epoch": 0.1762208825313024, "grad_norm": 0.39944303035736084, "learning_rate": 4.9120091890557565e-05, "loss": 0.2806, "step": 9880 }, { "epoch": 0.17623871865301607, "grad_norm": 0.33641016483306885, "learning_rate": 4.9119682527095126e-05, "loss": 0.1639, "step": 9881 }, { "epoch": 0.1762565547747298, "grad_norm": 0.3470108211040497, "learning_rate": 4.9119273070136465e-05, "loss": 0.2263, "step": 9882 }, { "epoch": 0.17627439089644348, "grad_norm": 0.33996477723121643, "learning_rate": 4.911886351968317e-05, "loss": 0.1611, "step": 9883 }, { "epoch": 0.17629222701815717, "grad_norm": 0.3267709016799927, "learning_rate": 4.911845387573683e-05, "loss": 0.2091, "step": 9884 }, { "epoch": 0.17631006313987085, "grad_norm": 0.33487847447395325, "learning_rate": 4.9118044138299033e-05, "loss": 0.2619, "step": 9885 }, { "epoch": 0.17632789926158457, "grad_norm": 0.29962268471717834, "learning_rate": 4.911763430737136e-05, "loss": 0.2807, "step": 9886 }, { "epoch": 0.17634573538329826, "grad_norm": 0.3162447214126587, "learning_rate": 4.911722438295542e-05, "loss": 0.2039, "step": 9887 }, { "epoch": 0.17636357150501195, "grad_norm": 0.32478970289230347, "learning_rate": 4.9116814365052784e-05, "loss": 0.2566, "step": 9888 }, { "epoch": 0.17638140762672563, "grad_norm": 0.2739572525024414, "learning_rate": 4.9116404253665037e-05, "loss": 0.2126, "step": 9889 }, { "epoch": 0.17639924374843935, "grad_norm": 0.26051992177963257, "learning_rate": 4.911599404879379e-05, "loss": 0.2655, "step": 9890 }, { "epoch": 0.17641707987015304, "grad_norm": 0.26293838024139404, "learning_rate": 4.911558375044061e-05, "loss": 0.2522, "step": 9891 }, { "epoch": 0.17643491599186673, "grad_norm": 0.29335278272628784, "learning_rate": 4.9115173358607105e-05, "loss": 0.1907, "step": 9892 }, { "epoch": 0.1764527521135804, "grad_norm": 0.26845383644104004, "learning_rate": 4.911476287329486e-05, "loss": 0.1828, "step": 9893 }, { "epoch": 0.17647058823529413, "grad_norm": 0.30866870284080505, "learning_rate": 4.911435229450546e-05, "loss": 0.2407, "step": 9894 }, { "epoch": 0.17648842435700782, "grad_norm": 0.24196955561637878, "learning_rate": 4.91139416222405e-05, "loss": 0.235, "step": 9895 }, { "epoch": 0.1765062604787215, "grad_norm": 0.328097939491272, "learning_rate": 4.9113530856501575e-05, "loss": 0.1971, "step": 9896 }, { "epoch": 0.1765240966004352, "grad_norm": 0.47702354192733765, "learning_rate": 4.9113119997290284e-05, "loss": 0.3136, "step": 9897 }, { "epoch": 0.1765419327221489, "grad_norm": 0.2346765697002411, "learning_rate": 4.911270904460821e-05, "loss": 0.1882, "step": 9898 }, { "epoch": 0.1765597688438626, "grad_norm": 0.2766011357307434, "learning_rate": 4.911229799845694e-05, "loss": 0.2342, "step": 9899 }, { "epoch": 0.17657760496557628, "grad_norm": 0.24140343070030212, "learning_rate": 4.9111886858838074e-05, "loss": 0.2126, "step": 9900 }, { "epoch": 0.17659544108728997, "grad_norm": 0.29035061597824097, "learning_rate": 4.911147562575321e-05, "loss": 0.1827, "step": 9901 }, { "epoch": 0.17661327720900366, "grad_norm": 0.4507565200328827, "learning_rate": 4.9111064299203946e-05, "loss": 0.2255, "step": 9902 }, { "epoch": 0.17663111333071738, "grad_norm": 0.2248343527317047, "learning_rate": 4.911065287919186e-05, "loss": 0.2027, "step": 9903 }, { "epoch": 0.17664894945243106, "grad_norm": 0.23875249922275543, "learning_rate": 4.911024136571856e-05, "loss": 0.2375, "step": 9904 }, { "epoch": 0.17666678557414475, "grad_norm": 0.3332054316997528, "learning_rate": 4.910982975878563e-05, "loss": 0.2603, "step": 9905 }, { "epoch": 0.17668462169585844, "grad_norm": 0.30601832270622253, "learning_rate": 4.910941805839468e-05, "loss": 0.253, "step": 9906 }, { "epoch": 0.17670245781757216, "grad_norm": 0.2917705476284027, "learning_rate": 4.91090062645473e-05, "loss": 0.2622, "step": 9907 }, { "epoch": 0.17672029393928584, "grad_norm": 0.31659191846847534, "learning_rate": 4.910859437724508e-05, "loss": 0.226, "step": 9908 }, { "epoch": 0.17673813006099953, "grad_norm": 0.23532883822917938, "learning_rate": 4.910818239648962e-05, "loss": 0.1455, "step": 9909 }, { "epoch": 0.17675596618271322, "grad_norm": 0.24646751582622528, "learning_rate": 4.910777032228252e-05, "loss": 0.2111, "step": 9910 }, { "epoch": 0.17677380230442694, "grad_norm": 0.2357739359140396, "learning_rate": 4.910735815462538e-05, "loss": 0.2006, "step": 9911 }, { "epoch": 0.17679163842614062, "grad_norm": 0.23476216197013855, "learning_rate": 4.910694589351979e-05, "loss": 0.1557, "step": 9912 }, { "epoch": 0.1768094745478543, "grad_norm": 0.30611780285835266, "learning_rate": 4.910653353896735e-05, "loss": 0.1899, "step": 9913 }, { "epoch": 0.176827310669568, "grad_norm": 0.3897404968738556, "learning_rate": 4.9106121090969656e-05, "loss": 0.2269, "step": 9914 }, { "epoch": 0.17684514679128172, "grad_norm": 0.42323267459869385, "learning_rate": 4.910570854952832e-05, "loss": 0.2138, "step": 9915 }, { "epoch": 0.1768629829129954, "grad_norm": 0.304720938205719, "learning_rate": 4.9105295914644925e-05, "loss": 0.229, "step": 9916 }, { "epoch": 0.1768808190347091, "grad_norm": 0.3408209979534149, "learning_rate": 4.9104883186321083e-05, "loss": 0.1995, "step": 9917 }, { "epoch": 0.17689865515642278, "grad_norm": 0.4280528724193573, "learning_rate": 4.910447036455839e-05, "loss": 0.2414, "step": 9918 }, { "epoch": 0.1769164912781365, "grad_norm": 0.30985012650489807, "learning_rate": 4.910405744935843e-05, "loss": 0.216, "step": 9919 }, { "epoch": 0.17693432739985018, "grad_norm": 0.37898531556129456, "learning_rate": 4.910364444072283e-05, "loss": 0.217, "step": 9920 }, { "epoch": 0.17695216352156387, "grad_norm": 0.4527721405029297, "learning_rate": 4.910323133865318e-05, "loss": 0.2087, "step": 9921 }, { "epoch": 0.17696999964327756, "grad_norm": 0.3203611969947815, "learning_rate": 4.910281814315108e-05, "loss": 0.2423, "step": 9922 }, { "epoch": 0.17698783576499125, "grad_norm": 0.3138802647590637, "learning_rate": 4.910240485421812e-05, "loss": 0.2013, "step": 9923 }, { "epoch": 0.17700567188670496, "grad_norm": 0.2812381386756897, "learning_rate": 4.9101991471855926e-05, "loss": 0.2333, "step": 9924 }, { "epoch": 0.17702350800841865, "grad_norm": 0.28126227855682373, "learning_rate": 4.9101577996066085e-05, "loss": 0.1604, "step": 9925 }, { "epoch": 0.17704134413013234, "grad_norm": 0.29395556449890137, "learning_rate": 4.91011644268502e-05, "loss": 0.2431, "step": 9926 }, { "epoch": 0.17705918025184603, "grad_norm": 0.3164561688899994, "learning_rate": 4.910075076420988e-05, "loss": 0.2506, "step": 9927 }, { "epoch": 0.17707701637355974, "grad_norm": 0.25460055470466614, "learning_rate": 4.910033700814673e-05, "loss": 0.1756, "step": 9928 }, { "epoch": 0.17709485249527343, "grad_norm": 0.3828481137752533, "learning_rate": 4.9099923158662346e-05, "loss": 0.1819, "step": 9929 }, { "epoch": 0.17711268861698712, "grad_norm": 0.3121713697910309, "learning_rate": 4.909950921575834e-05, "loss": 0.1679, "step": 9930 }, { "epoch": 0.1771305247387008, "grad_norm": 0.24948590993881226, "learning_rate": 4.9099095179436305e-05, "loss": 0.2032, "step": 9931 }, { "epoch": 0.17714836086041452, "grad_norm": 0.35499536991119385, "learning_rate": 4.909868104969786e-05, "loss": 0.2371, "step": 9932 }, { "epoch": 0.1771661969821282, "grad_norm": 0.3047904074192047, "learning_rate": 4.90982668265446e-05, "loss": 0.2402, "step": 9933 }, { "epoch": 0.1771840331038419, "grad_norm": 0.2975825369358063, "learning_rate": 4.909785250997814e-05, "loss": 0.1914, "step": 9934 }, { "epoch": 0.17720186922555559, "grad_norm": 0.382649689912796, "learning_rate": 4.909743810000008e-05, "loss": 0.2156, "step": 9935 }, { "epoch": 0.1772197053472693, "grad_norm": 0.33098697662353516, "learning_rate": 4.9097023596612024e-05, "loss": 0.2515, "step": 9936 }, { "epoch": 0.177237541468983, "grad_norm": 0.2597023844718933, "learning_rate": 4.9096608999815575e-05, "loss": 0.1875, "step": 9937 }, { "epoch": 0.17725537759069668, "grad_norm": 0.3416253626346588, "learning_rate": 4.909619430961235e-05, "loss": 0.2768, "step": 9938 }, { "epoch": 0.17727321371241037, "grad_norm": 0.2221720963716507, "learning_rate": 4.909577952600396e-05, "loss": 0.2055, "step": 9939 }, { "epoch": 0.17729104983412408, "grad_norm": 0.325078547000885, "learning_rate": 4.9095364648992e-05, "loss": 0.2108, "step": 9940 }, { "epoch": 0.17730888595583777, "grad_norm": 0.2751140594482422, "learning_rate": 4.9094949678578095e-05, "loss": 0.1819, "step": 9941 }, { "epoch": 0.17732672207755146, "grad_norm": 0.3418734669685364, "learning_rate": 4.909453461476383e-05, "loss": 0.2249, "step": 9942 }, { "epoch": 0.17734455819926515, "grad_norm": 0.28655219078063965, "learning_rate": 4.9094119457550834e-05, "loss": 0.2366, "step": 9943 }, { "epoch": 0.17736239432097883, "grad_norm": 0.25974804162979126, "learning_rate": 4.9093704206940705e-05, "loss": 0.2074, "step": 9944 }, { "epoch": 0.17738023044269255, "grad_norm": 0.31325864791870117, "learning_rate": 4.909328886293506e-05, "loss": 0.2014, "step": 9945 }, { "epoch": 0.17739806656440624, "grad_norm": 0.27111369371414185, "learning_rate": 4.909287342553551e-05, "loss": 0.2426, "step": 9946 }, { "epoch": 0.17741590268611993, "grad_norm": 0.35351452231407166, "learning_rate": 4.909245789474365e-05, "loss": 0.2444, "step": 9947 }, { "epoch": 0.1774337388078336, "grad_norm": 0.27385374903678894, "learning_rate": 4.909204227056111e-05, "loss": 0.1869, "step": 9948 }, { "epoch": 0.17745157492954733, "grad_norm": 0.3400082588195801, "learning_rate": 4.909162655298949e-05, "loss": 0.2009, "step": 9949 }, { "epoch": 0.17746941105126102, "grad_norm": 0.30281776189804077, "learning_rate": 4.90912107420304e-05, "loss": 0.2664, "step": 9950 }, { "epoch": 0.1774872471729747, "grad_norm": 0.3745337128639221, "learning_rate": 4.909079483768547e-05, "loss": 0.247, "step": 9951 }, { "epoch": 0.1775050832946884, "grad_norm": 0.38315799832344055, "learning_rate": 4.9090378839956285e-05, "loss": 0.2328, "step": 9952 }, { "epoch": 0.1775229194164021, "grad_norm": 0.30493584275245667, "learning_rate": 4.908996274884448e-05, "loss": 0.1915, "step": 9953 }, { "epoch": 0.1775407555381158, "grad_norm": 0.26232585310935974, "learning_rate": 4.908954656435165e-05, "loss": 0.2097, "step": 9954 }, { "epoch": 0.17755859165982948, "grad_norm": 0.35629040002822876, "learning_rate": 4.9089130286479424e-05, "loss": 0.1797, "step": 9955 }, { "epoch": 0.17757642778154317, "grad_norm": 0.27860766649246216, "learning_rate": 4.908871391522941e-05, "loss": 0.2176, "step": 9956 }, { "epoch": 0.1775942639032569, "grad_norm": 0.2514987587928772, "learning_rate": 4.908829745060321e-05, "loss": 0.1759, "step": 9957 }, { "epoch": 0.17761210002497058, "grad_norm": 0.3102920353412628, "learning_rate": 4.9087880892602466e-05, "loss": 0.2128, "step": 9958 }, { "epoch": 0.17762993614668426, "grad_norm": 0.24939922988414764, "learning_rate": 4.9087464241228765e-05, "loss": 0.1897, "step": 9959 }, { "epoch": 0.17764777226839795, "grad_norm": 0.326032429933548, "learning_rate": 4.908704749648374e-05, "loss": 0.2266, "step": 9960 }, { "epoch": 0.17766560839011167, "grad_norm": 0.3501764237880707, "learning_rate": 4.908663065836899e-05, "loss": 0.2301, "step": 9961 }, { "epoch": 0.17768344451182536, "grad_norm": 0.23558185994625092, "learning_rate": 4.908621372688614e-05, "loss": 0.2011, "step": 9962 }, { "epoch": 0.17770128063353904, "grad_norm": 0.30565160512924194, "learning_rate": 4.9085796702036817e-05, "loss": 0.1886, "step": 9963 }, { "epoch": 0.17771911675525273, "grad_norm": 0.2684495449066162, "learning_rate": 4.908537958382262e-05, "loss": 0.1652, "step": 9964 }, { "epoch": 0.17773695287696642, "grad_norm": 0.63982093334198, "learning_rate": 4.908496237224518e-05, "loss": 0.1859, "step": 9965 }, { "epoch": 0.17775478899868014, "grad_norm": 0.3628515899181366, "learning_rate": 4.9084545067306096e-05, "loss": 0.2356, "step": 9966 }, { "epoch": 0.17777262512039382, "grad_norm": 0.38683274388313293, "learning_rate": 4.9084127669007005e-05, "loss": 0.2076, "step": 9967 }, { "epoch": 0.1777904612421075, "grad_norm": 0.2974708378314972, "learning_rate": 4.9083710177349515e-05, "loss": 0.2423, "step": 9968 }, { "epoch": 0.1778082973638212, "grad_norm": 0.4412638247013092, "learning_rate": 4.908329259233525e-05, "loss": 0.2604, "step": 9969 }, { "epoch": 0.17782613348553492, "grad_norm": 0.29436057806015015, "learning_rate": 4.9082874913965815e-05, "loss": 0.1318, "step": 9970 }, { "epoch": 0.1778439696072486, "grad_norm": 0.4153296947479248, "learning_rate": 4.9082457142242845e-05, "loss": 0.1986, "step": 9971 }, { "epoch": 0.1778618057289623, "grad_norm": 0.26473212242126465, "learning_rate": 4.908203927716796e-05, "loss": 0.2138, "step": 9972 }, { "epoch": 0.17787964185067598, "grad_norm": 0.34248077869415283, "learning_rate": 4.908162131874276e-05, "loss": 0.2183, "step": 9973 }, { "epoch": 0.1778974779723897, "grad_norm": 0.2785325348377228, "learning_rate": 4.908120326696888e-05, "loss": 0.1947, "step": 9974 }, { "epoch": 0.17791531409410338, "grad_norm": 0.2864294648170471, "learning_rate": 4.908078512184795e-05, "loss": 0.1924, "step": 9975 }, { "epoch": 0.17793315021581707, "grad_norm": 0.584975004196167, "learning_rate": 4.908036688338157e-05, "loss": 0.1898, "step": 9976 }, { "epoch": 0.17795098633753076, "grad_norm": 0.25670674443244934, "learning_rate": 4.907994855157138e-05, "loss": 0.1677, "step": 9977 }, { "epoch": 0.17796882245924447, "grad_norm": 0.24501079320907593, "learning_rate": 4.9079530126418975e-05, "loss": 0.1651, "step": 9978 }, { "epoch": 0.17798665858095816, "grad_norm": 0.29451894760131836, "learning_rate": 4.907911160792601e-05, "loss": 0.2531, "step": 9979 }, { "epoch": 0.17800449470267185, "grad_norm": 0.3314656615257263, "learning_rate": 4.907869299609408e-05, "loss": 0.2115, "step": 9980 }, { "epoch": 0.17802233082438554, "grad_norm": 0.2793998718261719, "learning_rate": 4.907827429092483e-05, "loss": 0.2081, "step": 9981 }, { "epoch": 0.17804016694609923, "grad_norm": 0.2776472866535187, "learning_rate": 4.907785549241987e-05, "loss": 0.2439, "step": 9982 }, { "epoch": 0.17805800306781294, "grad_norm": 0.2417910248041153, "learning_rate": 4.907743660058082e-05, "loss": 0.2136, "step": 9983 }, { "epoch": 0.17807583918952663, "grad_norm": 0.2986113727092743, "learning_rate": 4.907701761540931e-05, "loss": 0.2501, "step": 9984 }, { "epoch": 0.17809367531124032, "grad_norm": 0.2245412915945053, "learning_rate": 4.907659853690698e-05, "loss": 0.2006, "step": 9985 }, { "epoch": 0.178111511432954, "grad_norm": 0.2672586143016815, "learning_rate": 4.907617936507543e-05, "loss": 0.2609, "step": 9986 }, { "epoch": 0.17812934755466772, "grad_norm": 0.2883603572845459, "learning_rate": 4.907576009991628e-05, "loss": 0.2264, "step": 9987 }, { "epoch": 0.1781471836763814, "grad_norm": 0.253579318523407, "learning_rate": 4.907534074143118e-05, "loss": 0.218, "step": 9988 }, { "epoch": 0.1781650197980951, "grad_norm": 0.2399081587791443, "learning_rate": 4.9074921289621745e-05, "loss": 0.2039, "step": 9989 }, { "epoch": 0.17818285591980879, "grad_norm": 0.2588498890399933, "learning_rate": 4.9074501744489596e-05, "loss": 0.1713, "step": 9990 }, { "epoch": 0.1782006920415225, "grad_norm": 0.2734178602695465, "learning_rate": 4.907408210603636e-05, "loss": 0.1993, "step": 9991 }, { "epoch": 0.1782185281632362, "grad_norm": 0.2253803014755249, "learning_rate": 4.9073662374263676e-05, "loss": 0.2069, "step": 9992 }, { "epoch": 0.17823636428494988, "grad_norm": 0.2691268026828766, "learning_rate": 4.9073242549173145e-05, "loss": 0.2307, "step": 9993 }, { "epoch": 0.17825420040666357, "grad_norm": 0.38317370414733887, "learning_rate": 4.907282263076643e-05, "loss": 0.1795, "step": 9994 }, { "epoch": 0.17827203652837728, "grad_norm": 0.31615981459617615, "learning_rate": 4.907240261904513e-05, "loss": 0.2273, "step": 9995 }, { "epoch": 0.17828987265009097, "grad_norm": 0.319078654050827, "learning_rate": 4.907198251401089e-05, "loss": 0.165, "step": 9996 }, { "epoch": 0.17830770877180466, "grad_norm": 0.31444546580314636, "learning_rate": 4.907156231566532e-05, "loss": 0.1997, "step": 9997 }, { "epoch": 0.17832554489351835, "grad_norm": 0.30600807070732117, "learning_rate": 4.907114202401008e-05, "loss": 0.2123, "step": 9998 }, { "epoch": 0.17834338101523206, "grad_norm": 0.3261357247829437, "learning_rate": 4.907072163904676e-05, "loss": 0.2054, "step": 9999 }, { "epoch": 0.17836121713694575, "grad_norm": 0.34018146991729736, "learning_rate": 4.907030116077702e-05, "loss": 0.2091, "step": 10000 }, { "epoch": 0.17836121713694575, "eval_loss": 0.20239697396755219, "eval_runtime": 106.9886, "eval_samples_per_second": 9.571, "eval_steps_per_second": 1.598, "step": 10000 }, { "epoch": 0.17837905325865944, "grad_norm": 0.40059399604797363, "learning_rate": 4.906988058920247e-05, "loss": 0.2009, "step": 10001 }, { "epoch": 0.17839688938037312, "grad_norm": 0.2519589960575104, "learning_rate": 4.9069459924324754e-05, "loss": 0.2261, "step": 10002 }, { "epoch": 0.1784147255020868, "grad_norm": 0.3123573064804077, "learning_rate": 4.90690391661455e-05, "loss": 0.2269, "step": 10003 }, { "epoch": 0.17843256162380053, "grad_norm": 0.319833904504776, "learning_rate": 4.906861831466634e-05, "loss": 0.1499, "step": 10004 }, { "epoch": 0.17845039774551422, "grad_norm": 0.31864285469055176, "learning_rate": 4.90681973698889e-05, "loss": 0.2233, "step": 10005 }, { "epoch": 0.1784682338672279, "grad_norm": 0.38282573223114014, "learning_rate": 4.906777633181481e-05, "loss": 0.1963, "step": 10006 }, { "epoch": 0.1784860699889416, "grad_norm": 0.2704548239707947, "learning_rate": 4.9067355200445706e-05, "loss": 0.1841, "step": 10007 }, { "epoch": 0.1785039061106553, "grad_norm": 0.33108240365982056, "learning_rate": 4.906693397578322e-05, "loss": 0.2074, "step": 10008 }, { "epoch": 0.178521742232369, "grad_norm": 0.3164540231227875, "learning_rate": 4.9066512657829e-05, "loss": 0.2126, "step": 10009 }, { "epoch": 0.17853957835408268, "grad_norm": 0.302031010389328, "learning_rate": 4.906609124658464e-05, "loss": 0.1862, "step": 10010 }, { "epoch": 0.17855741447579637, "grad_norm": 0.23535902798175812, "learning_rate": 4.906566974205182e-05, "loss": 0.1513, "step": 10011 }, { "epoch": 0.1785752505975101, "grad_norm": 0.28384605050086975, "learning_rate": 4.9065248144232144e-05, "loss": 0.2258, "step": 10012 }, { "epoch": 0.17859308671922378, "grad_norm": 0.36058810353279114, "learning_rate": 4.906482645312726e-05, "loss": 0.1631, "step": 10013 }, { "epoch": 0.17861092284093746, "grad_norm": 0.20574362576007843, "learning_rate": 4.906440466873878e-05, "loss": 0.2168, "step": 10014 }, { "epoch": 0.17862875896265115, "grad_norm": 0.23759403824806213, "learning_rate": 4.9063982791068377e-05, "loss": 0.1958, "step": 10015 }, { "epoch": 0.17864659508436487, "grad_norm": 0.2946540117263794, "learning_rate": 4.906356082011765e-05, "loss": 0.2133, "step": 10016 }, { "epoch": 0.17866443120607856, "grad_norm": 0.26436474919319153, "learning_rate": 4.906313875588826e-05, "loss": 0.1651, "step": 10017 }, { "epoch": 0.17868226732779224, "grad_norm": 0.3037585914134979, "learning_rate": 4.906271659838182e-05, "loss": 0.184, "step": 10018 }, { "epoch": 0.17870010344950593, "grad_norm": 0.4259195327758789, "learning_rate": 4.906229434759999e-05, "loss": 0.2351, "step": 10019 }, { "epoch": 0.17871793957121965, "grad_norm": 0.30201098322868347, "learning_rate": 4.9061872003544395e-05, "loss": 0.2287, "step": 10020 }, { "epoch": 0.17873577569293334, "grad_norm": 0.31891515851020813, "learning_rate": 4.9061449566216675e-05, "loss": 0.2143, "step": 10021 }, { "epoch": 0.17875361181464702, "grad_norm": 0.2972477972507477, "learning_rate": 4.906102703561846e-05, "loss": 0.1957, "step": 10022 }, { "epoch": 0.1787714479363607, "grad_norm": 0.2640993297100067, "learning_rate": 4.9060604411751396e-05, "loss": 0.1779, "step": 10023 }, { "epoch": 0.1787892840580744, "grad_norm": 0.21846936643123627, "learning_rate": 4.9060181694617123e-05, "loss": 0.1865, "step": 10024 }, { "epoch": 0.17880712017978811, "grad_norm": 0.28230080008506775, "learning_rate": 4.905975888421727e-05, "loss": 0.2395, "step": 10025 }, { "epoch": 0.1788249563015018, "grad_norm": 0.22010251879692078, "learning_rate": 4.905933598055349e-05, "loss": 0.1691, "step": 10026 }, { "epoch": 0.1788427924232155, "grad_norm": 0.3181862533092499, "learning_rate": 4.90589129836274e-05, "loss": 0.2341, "step": 10027 }, { "epoch": 0.17886062854492918, "grad_norm": 0.3197450041770935, "learning_rate": 4.9058489893440664e-05, "loss": 0.2166, "step": 10028 }, { "epoch": 0.1788784646666429, "grad_norm": 0.21327778697013855, "learning_rate": 4.905806670999491e-05, "loss": 0.1914, "step": 10029 }, { "epoch": 0.17889630078835658, "grad_norm": 0.20844419300556183, "learning_rate": 4.9057643433291776e-05, "loss": 0.1759, "step": 10030 }, { "epoch": 0.17891413691007027, "grad_norm": 0.21894694864749908, "learning_rate": 4.9057220063332914e-05, "loss": 0.2039, "step": 10031 }, { "epoch": 0.17893197303178396, "grad_norm": 0.25392717123031616, "learning_rate": 4.905679660011996e-05, "loss": 0.2122, "step": 10032 }, { "epoch": 0.17894980915349767, "grad_norm": 0.2689146399497986, "learning_rate": 4.9056373043654546e-05, "loss": 0.2041, "step": 10033 }, { "epoch": 0.17896764527521136, "grad_norm": 0.2774428427219391, "learning_rate": 4.905594939393831e-05, "loss": 0.2238, "step": 10034 }, { "epoch": 0.17898548139692505, "grad_norm": 0.23594151437282562, "learning_rate": 4.905552565097293e-05, "loss": 0.1941, "step": 10035 }, { "epoch": 0.17900331751863874, "grad_norm": 0.24410341680049896, "learning_rate": 4.905510181476001e-05, "loss": 0.1933, "step": 10036 }, { "epoch": 0.17902115364035245, "grad_norm": 0.3463740944862366, "learning_rate": 4.905467788530121e-05, "loss": 0.2435, "step": 10037 }, { "epoch": 0.17903898976206614, "grad_norm": 0.20690517127513885, "learning_rate": 4.905425386259817e-05, "loss": 0.1466, "step": 10038 }, { "epoch": 0.17905682588377983, "grad_norm": 0.44511640071868896, "learning_rate": 4.905382974665254e-05, "loss": 0.2505, "step": 10039 }, { "epoch": 0.17907466200549352, "grad_norm": 0.38292738795280457, "learning_rate": 4.9053405537465946e-05, "loss": 0.2146, "step": 10040 }, { "epoch": 0.17909249812720723, "grad_norm": 0.2860642075538635, "learning_rate": 4.905298123504005e-05, "loss": 0.2118, "step": 10041 }, { "epoch": 0.17911033424892092, "grad_norm": 0.2530413269996643, "learning_rate": 4.905255683937649e-05, "loss": 0.1631, "step": 10042 }, { "epoch": 0.1791281703706346, "grad_norm": 0.24118545651435852, "learning_rate": 4.9052132350476916e-05, "loss": 0.2145, "step": 10043 }, { "epoch": 0.1791460064923483, "grad_norm": 0.2952115535736084, "learning_rate": 4.9051707768342966e-05, "loss": 0.2511, "step": 10044 }, { "epoch": 0.17916384261406199, "grad_norm": 0.26165199279785156, "learning_rate": 4.905128309297629e-05, "loss": 0.1825, "step": 10045 }, { "epoch": 0.1791816787357757, "grad_norm": 0.35493025183677673, "learning_rate": 4.905085832437853e-05, "loss": 0.286, "step": 10046 }, { "epoch": 0.1791995148574894, "grad_norm": 0.21601396799087524, "learning_rate": 4.905043346255135e-05, "loss": 0.1767, "step": 10047 }, { "epoch": 0.17921735097920308, "grad_norm": 0.3951280117034912, "learning_rate": 4.905000850749637e-05, "loss": 0.187, "step": 10048 }, { "epoch": 0.17923518710091677, "grad_norm": 0.19908763468265533, "learning_rate": 4.904958345921525e-05, "loss": 0.2055, "step": 10049 }, { "epoch": 0.17925302322263048, "grad_norm": 0.3498621881008148, "learning_rate": 4.904915831770964e-05, "loss": 0.2689, "step": 10050 }, { "epoch": 0.17927085934434417, "grad_norm": 0.19761058688163757, "learning_rate": 4.904873308298119e-05, "loss": 0.2118, "step": 10051 }, { "epoch": 0.17928869546605786, "grad_norm": 0.20359058678150177, "learning_rate": 4.9048307755031544e-05, "loss": 0.184, "step": 10052 }, { "epoch": 0.17930653158777154, "grad_norm": 0.2612164318561554, "learning_rate": 4.904788233386235e-05, "loss": 0.229, "step": 10053 }, { "epoch": 0.17932436770948526, "grad_norm": 0.4159994423389435, "learning_rate": 4.904745681947526e-05, "loss": 0.2232, "step": 10054 }, { "epoch": 0.17934220383119895, "grad_norm": 0.26582232117652893, "learning_rate": 4.904703121187192e-05, "loss": 0.1779, "step": 10055 }, { "epoch": 0.17936003995291264, "grad_norm": 0.2905386686325073, "learning_rate": 4.904660551105398e-05, "loss": 0.1675, "step": 10056 }, { "epoch": 0.17937787607462632, "grad_norm": 0.2359733134508133, "learning_rate": 4.904617971702309e-05, "loss": 0.192, "step": 10057 }, { "epoch": 0.17939571219634004, "grad_norm": 0.3787361681461334, "learning_rate": 4.904575382978091e-05, "loss": 0.2654, "step": 10058 }, { "epoch": 0.17941354831805373, "grad_norm": 0.2540598511695862, "learning_rate": 4.904532784932907e-05, "loss": 0.2457, "step": 10059 }, { "epoch": 0.17943138443976742, "grad_norm": 0.24077114462852478, "learning_rate": 4.904490177566925e-05, "loss": 0.1961, "step": 10060 }, { "epoch": 0.1794492205614811, "grad_norm": 0.3214060664176941, "learning_rate": 4.9044475608803074e-05, "loss": 0.1926, "step": 10061 }, { "epoch": 0.17946705668319482, "grad_norm": 0.2988150417804718, "learning_rate": 4.904404934873221e-05, "loss": 0.2524, "step": 10062 }, { "epoch": 0.1794848928049085, "grad_norm": 0.2745426297187805, "learning_rate": 4.9043622995458306e-05, "loss": 0.1605, "step": 10063 }, { "epoch": 0.1795027289266222, "grad_norm": 0.20333455502986908, "learning_rate": 4.904319654898302e-05, "loss": 0.1994, "step": 10064 }, { "epoch": 0.17952056504833588, "grad_norm": 0.28882116079330444, "learning_rate": 4.904277000930799e-05, "loss": 0.2462, "step": 10065 }, { "epoch": 0.17953840117004957, "grad_norm": 0.2296663224697113, "learning_rate": 4.9042343376434887e-05, "loss": 0.2299, "step": 10066 }, { "epoch": 0.1795562372917633, "grad_norm": 0.22529172897338867, "learning_rate": 4.904191665036535e-05, "loss": 0.1718, "step": 10067 }, { "epoch": 0.17957407341347698, "grad_norm": 0.3339504897594452, "learning_rate": 4.904148983110105e-05, "loss": 0.2199, "step": 10068 }, { "epoch": 0.17959190953519066, "grad_norm": 0.22459593415260315, "learning_rate": 4.904106291864362e-05, "loss": 0.207, "step": 10069 }, { "epoch": 0.17960974565690435, "grad_norm": 0.3362443745136261, "learning_rate": 4.904063591299474e-05, "loss": 0.2692, "step": 10070 }, { "epoch": 0.17962758177861807, "grad_norm": 0.28358936309814453, "learning_rate": 4.904020881415604e-05, "loss": 0.2137, "step": 10071 }, { "epoch": 0.17964541790033176, "grad_norm": 0.28809306025505066, "learning_rate": 4.9039781622129185e-05, "loss": 0.2255, "step": 10072 }, { "epoch": 0.17966325402204544, "grad_norm": 0.3335660398006439, "learning_rate": 4.903935433691584e-05, "loss": 0.2067, "step": 10073 }, { "epoch": 0.17968109014375913, "grad_norm": 0.9616582989692688, "learning_rate": 4.903892695851766e-05, "loss": 0.2188, "step": 10074 }, { "epoch": 0.17969892626547285, "grad_norm": 0.32171082496643066, "learning_rate": 4.9038499486936296e-05, "loss": 0.2343, "step": 10075 }, { "epoch": 0.17971676238718653, "grad_norm": 0.3304702341556549, "learning_rate": 4.90380719221734e-05, "loss": 0.2034, "step": 10076 }, { "epoch": 0.17973459850890022, "grad_norm": 0.2655666768550873, "learning_rate": 4.9037644264230634e-05, "loss": 0.2003, "step": 10077 }, { "epoch": 0.1797524346306139, "grad_norm": 0.3717540204524994, "learning_rate": 4.903721651310966e-05, "loss": 0.2121, "step": 10078 }, { "epoch": 0.17977027075232763, "grad_norm": 0.2529218792915344, "learning_rate": 4.903678866881213e-05, "loss": 0.2095, "step": 10079 }, { "epoch": 0.17978810687404131, "grad_norm": 0.2826620936393738, "learning_rate": 4.9036360731339706e-05, "loss": 0.1921, "step": 10080 }, { "epoch": 0.179805942995755, "grad_norm": 0.25628387928009033, "learning_rate": 4.903593270069404e-05, "loss": 0.2037, "step": 10081 }, { "epoch": 0.1798237791174687, "grad_norm": 0.22990234196186066, "learning_rate": 4.90355045768768e-05, "loss": 0.1803, "step": 10082 }, { "epoch": 0.17984161523918238, "grad_norm": 0.24044960737228394, "learning_rate": 4.903507635988965e-05, "loss": 0.1575, "step": 10083 }, { "epoch": 0.1798594513608961, "grad_norm": 0.24302862584590912, "learning_rate": 4.903464804973424e-05, "loss": 0.2132, "step": 10084 }, { "epoch": 0.17987728748260978, "grad_norm": 0.5112969279289246, "learning_rate": 4.903421964641223e-05, "loss": 0.1884, "step": 10085 }, { "epoch": 0.17989512360432347, "grad_norm": 0.28698021173477173, "learning_rate": 4.903379114992528e-05, "loss": 0.194, "step": 10086 }, { "epoch": 0.17991295972603716, "grad_norm": 0.28727683424949646, "learning_rate": 4.9033362560275066e-05, "loss": 0.2345, "step": 10087 }, { "epoch": 0.17993079584775087, "grad_norm": 0.2771809995174408, "learning_rate": 4.903293387746323e-05, "loss": 0.2315, "step": 10088 }, { "epoch": 0.17994863196946456, "grad_norm": 0.30761608481407166, "learning_rate": 4.9032505101491436e-05, "loss": 0.2544, "step": 10089 }, { "epoch": 0.17996646809117825, "grad_norm": 0.3372572958469391, "learning_rate": 4.903207623236136e-05, "loss": 0.1898, "step": 10090 }, { "epoch": 0.17998430421289194, "grad_norm": 0.29283350706100464, "learning_rate": 4.9031647270074655e-05, "loss": 0.1629, "step": 10091 }, { "epoch": 0.18000214033460565, "grad_norm": 0.2555445730686188, "learning_rate": 4.903121821463299e-05, "loss": 0.1916, "step": 10092 }, { "epoch": 0.18001997645631934, "grad_norm": 0.25533509254455566, "learning_rate": 4.903078906603801e-05, "loss": 0.2242, "step": 10093 }, { "epoch": 0.18003781257803303, "grad_norm": 0.33725500106811523, "learning_rate": 4.90303598242914e-05, "loss": 0.2191, "step": 10094 }, { "epoch": 0.18005564869974672, "grad_norm": 0.28250038623809814, "learning_rate": 4.902993048939482e-05, "loss": 0.2508, "step": 10095 }, { "epoch": 0.18007348482146043, "grad_norm": 0.2518704831600189, "learning_rate": 4.902950106134992e-05, "loss": 0.1816, "step": 10096 }, { "epoch": 0.18009132094317412, "grad_norm": 0.32761144638061523, "learning_rate": 4.902907154015838e-05, "loss": 0.2432, "step": 10097 }, { "epoch": 0.1801091570648878, "grad_norm": 0.27961263060569763, "learning_rate": 4.902864192582185e-05, "loss": 0.1908, "step": 10098 }, { "epoch": 0.1801269931866015, "grad_norm": 0.22646716237068176, "learning_rate": 4.902821221834202e-05, "loss": 0.2251, "step": 10099 }, { "epoch": 0.1801448293083152, "grad_norm": 0.30442455410957336, "learning_rate": 4.902778241772053e-05, "loss": 0.1881, "step": 10100 }, { "epoch": 0.1801626654300289, "grad_norm": 0.34533753991127014, "learning_rate": 4.9027352523959056e-05, "loss": 0.2673, "step": 10101 }, { "epoch": 0.1801805015517426, "grad_norm": 0.23440693318843842, "learning_rate": 4.902692253705927e-05, "loss": 0.2069, "step": 10102 }, { "epoch": 0.18019833767345628, "grad_norm": 0.31848660111427307, "learning_rate": 4.9026492457022834e-05, "loss": 0.2152, "step": 10103 }, { "epoch": 0.18021617379516996, "grad_norm": 0.17989078164100647, "learning_rate": 4.9026062283851404e-05, "loss": 0.1923, "step": 10104 }, { "epoch": 0.18023400991688368, "grad_norm": 0.24270272254943848, "learning_rate": 4.9025632017546675e-05, "loss": 0.1616, "step": 10105 }, { "epoch": 0.18025184603859737, "grad_norm": 0.3410046100616455, "learning_rate": 4.902520165811029e-05, "loss": 0.2518, "step": 10106 }, { "epoch": 0.18026968216031106, "grad_norm": 0.20635966956615448, "learning_rate": 4.902477120554392e-05, "loss": 0.1945, "step": 10107 }, { "epoch": 0.18028751828202474, "grad_norm": 0.24014919996261597, "learning_rate": 4.9024340659849244e-05, "loss": 0.2093, "step": 10108 }, { "epoch": 0.18030535440373846, "grad_norm": 0.2237083911895752, "learning_rate": 4.902391002102792e-05, "loss": 0.1716, "step": 10109 }, { "epoch": 0.18032319052545215, "grad_norm": 0.24098485708236694, "learning_rate": 4.902347928908163e-05, "loss": 0.1786, "step": 10110 }, { "epoch": 0.18034102664716584, "grad_norm": 0.2782251238822937, "learning_rate": 4.902304846401204e-05, "loss": 0.1624, "step": 10111 }, { "epoch": 0.18035886276887952, "grad_norm": 0.2588271498680115, "learning_rate": 4.9022617545820815e-05, "loss": 0.2122, "step": 10112 }, { "epoch": 0.18037669889059324, "grad_norm": 0.3134334683418274, "learning_rate": 4.9022186534509626e-05, "loss": 0.2113, "step": 10113 }, { "epoch": 0.18039453501230693, "grad_norm": 0.24441859126091003, "learning_rate": 4.902175543008014e-05, "loss": 0.2083, "step": 10114 }, { "epoch": 0.18041237113402062, "grad_norm": 0.25083234906196594, "learning_rate": 4.902132423253404e-05, "loss": 0.1869, "step": 10115 }, { "epoch": 0.1804302072557343, "grad_norm": 0.23445671796798706, "learning_rate": 4.9020892941872985e-05, "loss": 0.1789, "step": 10116 }, { "epoch": 0.18044804337744802, "grad_norm": 0.2497745305299759, "learning_rate": 4.9020461558098655e-05, "loss": 0.1765, "step": 10117 }, { "epoch": 0.1804658794991617, "grad_norm": 0.31935933232307434, "learning_rate": 4.902003008121272e-05, "loss": 0.2159, "step": 10118 }, { "epoch": 0.1804837156208754, "grad_norm": 0.2808605134487152, "learning_rate": 4.9019598511216844e-05, "loss": 0.2251, "step": 10119 }, { "epoch": 0.18050155174258908, "grad_norm": 0.28707271814346313, "learning_rate": 4.901916684811272e-05, "loss": 0.1909, "step": 10120 }, { "epoch": 0.1805193878643028, "grad_norm": 0.2506902515888214, "learning_rate": 4.9018735091902005e-05, "loss": 0.1728, "step": 10121 }, { "epoch": 0.1805372239860165, "grad_norm": 0.3061169385910034, "learning_rate": 4.901830324258638e-05, "loss": 0.2428, "step": 10122 }, { "epoch": 0.18055506010773018, "grad_norm": 0.18775995075702667, "learning_rate": 4.901787130016751e-05, "loss": 0.213, "step": 10123 }, { "epoch": 0.18057289622944386, "grad_norm": 0.20270133018493652, "learning_rate": 4.901743926464708e-05, "loss": 0.2191, "step": 10124 }, { "epoch": 0.18059073235115755, "grad_norm": 0.307338148355484, "learning_rate": 4.9017007136026763e-05, "loss": 0.1448, "step": 10125 }, { "epoch": 0.18060856847287127, "grad_norm": 0.26945802569389343, "learning_rate": 4.9016574914308224e-05, "loss": 0.1872, "step": 10126 }, { "epoch": 0.18062640459458496, "grad_norm": 0.46143609285354614, "learning_rate": 4.901614259949315e-05, "loss": 0.2883, "step": 10127 }, { "epoch": 0.18064424071629864, "grad_norm": 0.2881545126438141, "learning_rate": 4.9015710191583206e-05, "loss": 0.2017, "step": 10128 }, { "epoch": 0.18066207683801233, "grad_norm": 0.23882745206356049, "learning_rate": 4.901527769058008e-05, "loss": 0.1722, "step": 10129 }, { "epoch": 0.18067991295972605, "grad_norm": 0.29898542165756226, "learning_rate": 4.901484509648544e-05, "loss": 0.2052, "step": 10130 }, { "epoch": 0.18069774908143973, "grad_norm": 0.26189887523651123, "learning_rate": 4.9014412409300966e-05, "loss": 0.1923, "step": 10131 }, { "epoch": 0.18071558520315342, "grad_norm": 0.29160553216934204, "learning_rate": 4.901397962902834e-05, "loss": 0.2391, "step": 10132 }, { "epoch": 0.1807334213248671, "grad_norm": 0.39594241976737976, "learning_rate": 4.9013546755669236e-05, "loss": 0.2531, "step": 10133 }, { "epoch": 0.18075125744658083, "grad_norm": 0.2338830679655075, "learning_rate": 4.901311378922532e-05, "loss": 0.2227, "step": 10134 }, { "epoch": 0.18076909356829451, "grad_norm": 0.32091307640075684, "learning_rate": 4.901268072969829e-05, "loss": 0.2462, "step": 10135 }, { "epoch": 0.1807869296900082, "grad_norm": 0.27749884128570557, "learning_rate": 4.9012247577089815e-05, "loss": 0.205, "step": 10136 }, { "epoch": 0.1808047658117219, "grad_norm": 0.2812494933605194, "learning_rate": 4.9011814331401575e-05, "loss": 0.2368, "step": 10137 }, { "epoch": 0.1808226019334356, "grad_norm": 0.24878400564193726, "learning_rate": 4.901138099263525e-05, "loss": 0.2003, "step": 10138 }, { "epoch": 0.1808404380551493, "grad_norm": 0.33797627687454224, "learning_rate": 4.901094756079251e-05, "loss": 0.257, "step": 10139 }, { "epoch": 0.18085827417686298, "grad_norm": 0.32351264357566833, "learning_rate": 4.901051403587506e-05, "loss": 0.2164, "step": 10140 }, { "epoch": 0.18087611029857667, "grad_norm": 0.2124016135931015, "learning_rate": 4.901008041788455e-05, "loss": 0.1737, "step": 10141 }, { "epoch": 0.18089394642029039, "grad_norm": 0.22003863751888275, "learning_rate": 4.900964670682268e-05, "loss": 0.185, "step": 10142 }, { "epoch": 0.18091178254200407, "grad_norm": 0.269803524017334, "learning_rate": 4.900921290269113e-05, "loss": 0.2116, "step": 10143 }, { "epoch": 0.18092961866371776, "grad_norm": 0.3971666097640991, "learning_rate": 4.900877900549158e-05, "loss": 0.2402, "step": 10144 }, { "epoch": 0.18094745478543145, "grad_norm": 0.2891790568828583, "learning_rate": 4.90083450152257e-05, "loss": 0.1826, "step": 10145 }, { "epoch": 0.18096529090714514, "grad_norm": 0.44094592332839966, "learning_rate": 4.900791093189519e-05, "loss": 0.2428, "step": 10146 }, { "epoch": 0.18098312702885885, "grad_norm": 0.3213162422180176, "learning_rate": 4.900747675550172e-05, "loss": 0.2035, "step": 10147 }, { "epoch": 0.18100096315057254, "grad_norm": 0.3137926459312439, "learning_rate": 4.900704248604698e-05, "loss": 0.1753, "step": 10148 }, { "epoch": 0.18101879927228623, "grad_norm": 0.3039524555206299, "learning_rate": 4.900660812353266e-05, "loss": 0.2506, "step": 10149 }, { "epoch": 0.18103663539399992, "grad_norm": 0.32710862159729004, "learning_rate": 4.900617366796043e-05, "loss": 0.1822, "step": 10150 }, { "epoch": 0.18105447151571363, "grad_norm": 0.3087460398674011, "learning_rate": 4.900573911933197e-05, "loss": 0.1597, "step": 10151 }, { "epoch": 0.18107230763742732, "grad_norm": 0.2529021203517914, "learning_rate": 4.900530447764899e-05, "loss": 0.2498, "step": 10152 }, { "epoch": 0.181090143759141, "grad_norm": 0.3058854937553406, "learning_rate": 4.900486974291315e-05, "loss": 0.2046, "step": 10153 }, { "epoch": 0.1811079798808547, "grad_norm": 0.4110319912433624, "learning_rate": 4.9004434915126144e-05, "loss": 0.1871, "step": 10154 }, { "epoch": 0.1811258160025684, "grad_norm": 0.24077773094177246, "learning_rate": 4.900399999428966e-05, "loss": 0.2313, "step": 10155 }, { "epoch": 0.1811436521242821, "grad_norm": 0.37185460329055786, "learning_rate": 4.900356498040538e-05, "loss": 0.2642, "step": 10156 }, { "epoch": 0.1811614882459958, "grad_norm": 0.252057820558548, "learning_rate": 4.900312987347498e-05, "loss": 0.1816, "step": 10157 }, { "epoch": 0.18117932436770948, "grad_norm": 0.3014383912086487, "learning_rate": 4.900269467350018e-05, "loss": 0.2613, "step": 10158 }, { "epoch": 0.1811971604894232, "grad_norm": 0.3230541944503784, "learning_rate": 4.900225938048263e-05, "loss": 0.2529, "step": 10159 }, { "epoch": 0.18121499661113688, "grad_norm": 0.4753343462944031, "learning_rate": 4.900182399442404e-05, "loss": 0.1648, "step": 10160 }, { "epoch": 0.18123283273285057, "grad_norm": 0.3304778039455414, "learning_rate": 4.9001388515326085e-05, "loss": 0.1849, "step": 10161 }, { "epoch": 0.18125066885456426, "grad_norm": 0.22614115476608276, "learning_rate": 4.900095294319046e-05, "loss": 0.2063, "step": 10162 }, { "epoch": 0.18126850497627794, "grad_norm": 0.23606637120246887, "learning_rate": 4.900051727801885e-05, "loss": 0.214, "step": 10163 }, { "epoch": 0.18128634109799166, "grad_norm": 0.28194326162338257, "learning_rate": 4.900008151981295e-05, "loss": 0.2027, "step": 10164 }, { "epoch": 0.18130417721970535, "grad_norm": 0.337079793214798, "learning_rate": 4.899964566857444e-05, "loss": 0.204, "step": 10165 }, { "epoch": 0.18132201334141904, "grad_norm": 0.31034016609191895, "learning_rate": 4.899920972430502e-05, "loss": 0.2246, "step": 10166 }, { "epoch": 0.18133984946313272, "grad_norm": 0.32246097922325134, "learning_rate": 4.899877368700637e-05, "loss": 0.1863, "step": 10167 }, { "epoch": 0.18135768558484644, "grad_norm": 0.3250961899757385, "learning_rate": 4.8998337556680186e-05, "loss": 0.1845, "step": 10168 }, { "epoch": 0.18137552170656013, "grad_norm": 0.3406941592693329, "learning_rate": 4.8997901333328156e-05, "loss": 0.2148, "step": 10169 }, { "epoch": 0.18139335782827382, "grad_norm": 0.27561306953430176, "learning_rate": 4.899746501695197e-05, "loss": 0.2462, "step": 10170 }, { "epoch": 0.1814111939499875, "grad_norm": 0.2547914385795593, "learning_rate": 4.8997028607553316e-05, "loss": 0.2442, "step": 10171 }, { "epoch": 0.18142903007170122, "grad_norm": 0.24809367954730988, "learning_rate": 4.89965921051339e-05, "loss": 0.258, "step": 10172 }, { "epoch": 0.1814468661934149, "grad_norm": 0.39914289116859436, "learning_rate": 4.899615550969541e-05, "loss": 0.2125, "step": 10173 }, { "epoch": 0.1814647023151286, "grad_norm": 0.23397384583950043, "learning_rate": 4.8995718821239525e-05, "loss": 0.2111, "step": 10174 }, { "epoch": 0.18148253843684228, "grad_norm": 0.28204038739204407, "learning_rate": 4.8995282039767945e-05, "loss": 0.2132, "step": 10175 }, { "epoch": 0.181500374558556, "grad_norm": 0.26229846477508545, "learning_rate": 4.899484516528236e-05, "loss": 0.1764, "step": 10176 }, { "epoch": 0.1815182106802697, "grad_norm": 0.182540163397789, "learning_rate": 4.899440819778448e-05, "loss": 0.1721, "step": 10177 }, { "epoch": 0.18153604680198338, "grad_norm": 0.3202981948852539, "learning_rate": 4.899397113727597e-05, "loss": 0.2177, "step": 10178 }, { "epoch": 0.18155388292369706, "grad_norm": 0.219949871301651, "learning_rate": 4.8993533983758554e-05, "loss": 0.1655, "step": 10179 }, { "epoch": 0.18157171904541078, "grad_norm": 0.275773286819458, "learning_rate": 4.8993096737233915e-05, "loss": 0.2187, "step": 10180 }, { "epoch": 0.18158955516712447, "grad_norm": 0.2234012335538864, "learning_rate": 4.899265939770374e-05, "loss": 0.1435, "step": 10181 }, { "epoch": 0.18160739128883815, "grad_norm": 0.2315882295370102, "learning_rate": 4.899222196516973e-05, "loss": 0.198, "step": 10182 }, { "epoch": 0.18162522741055184, "grad_norm": 0.2787816524505615, "learning_rate": 4.899178443963358e-05, "loss": 0.1807, "step": 10183 }, { "epoch": 0.18164306353226553, "grad_norm": 0.2763764560222626, "learning_rate": 4.899134682109699e-05, "loss": 0.1814, "step": 10184 }, { "epoch": 0.18166089965397925, "grad_norm": 0.31553885340690613, "learning_rate": 4.8990909109561655e-05, "loss": 0.2394, "step": 10185 }, { "epoch": 0.18167873577569293, "grad_norm": 0.27082380652427673, "learning_rate": 4.899047130502926e-05, "loss": 0.2341, "step": 10186 }, { "epoch": 0.18169657189740662, "grad_norm": 0.48682701587677, "learning_rate": 4.899003340750152e-05, "loss": 0.2737, "step": 10187 }, { "epoch": 0.1817144080191203, "grad_norm": 0.30625849962234497, "learning_rate": 4.8989595416980126e-05, "loss": 0.2232, "step": 10188 }, { "epoch": 0.18173224414083403, "grad_norm": 0.30230265855789185, "learning_rate": 4.898915733346677e-05, "loss": 0.2432, "step": 10189 }, { "epoch": 0.18175008026254771, "grad_norm": 0.34137627482414246, "learning_rate": 4.898871915696316e-05, "loss": 0.2677, "step": 10190 }, { "epoch": 0.1817679163842614, "grad_norm": 0.20676808059215546, "learning_rate": 4.898828088747099e-05, "loss": 0.2142, "step": 10191 }, { "epoch": 0.1817857525059751, "grad_norm": 0.26453015208244324, "learning_rate": 4.8987842524991956e-05, "loss": 0.2176, "step": 10192 }, { "epoch": 0.1818035886276888, "grad_norm": 0.35668259859085083, "learning_rate": 4.898740406952775e-05, "loss": 0.2253, "step": 10193 }, { "epoch": 0.1818214247494025, "grad_norm": 0.275824636220932, "learning_rate": 4.8986965521080095e-05, "loss": 0.2202, "step": 10194 }, { "epoch": 0.18183926087111618, "grad_norm": 0.2264062613248825, "learning_rate": 4.898652687965067e-05, "loss": 0.1946, "step": 10195 }, { "epoch": 0.18185709699282987, "grad_norm": 0.3434748649597168, "learning_rate": 4.898608814524118e-05, "loss": 0.2257, "step": 10196 }, { "epoch": 0.18187493311454359, "grad_norm": 0.49679169058799744, "learning_rate": 4.898564931785333e-05, "loss": 0.2124, "step": 10197 }, { "epoch": 0.18189276923625727, "grad_norm": 0.20148178935050964, "learning_rate": 4.8985210397488825e-05, "loss": 0.1772, "step": 10198 }, { "epoch": 0.18191060535797096, "grad_norm": 0.2926502227783203, "learning_rate": 4.898477138414935e-05, "loss": 0.2321, "step": 10199 }, { "epoch": 0.18192844147968465, "grad_norm": 0.29967305064201355, "learning_rate": 4.898433227783662e-05, "loss": 0.188, "step": 10200 }, { "epoch": 0.18194627760139837, "grad_norm": 0.2732852101325989, "learning_rate": 4.8983893078552336e-05, "loss": 0.1824, "step": 10201 }, { "epoch": 0.18196411372311205, "grad_norm": 0.2793472111225128, "learning_rate": 4.898345378629819e-05, "loss": 0.1622, "step": 10202 }, { "epoch": 0.18198194984482574, "grad_norm": 0.29475104808807373, "learning_rate": 4.898301440107591e-05, "loss": 0.2124, "step": 10203 }, { "epoch": 0.18199978596653943, "grad_norm": 0.28337574005126953, "learning_rate": 4.898257492288718e-05, "loss": 0.2164, "step": 10204 }, { "epoch": 0.18201762208825312, "grad_norm": 0.34891387820243835, "learning_rate": 4.898213535173369e-05, "loss": 0.2182, "step": 10205 }, { "epoch": 0.18203545820996683, "grad_norm": 0.326509028673172, "learning_rate": 4.898169568761718e-05, "loss": 0.2226, "step": 10206 }, { "epoch": 0.18205329433168052, "grad_norm": 0.2673135995864868, "learning_rate": 4.898125593053932e-05, "loss": 0.1923, "step": 10207 }, { "epoch": 0.1820711304533942, "grad_norm": 0.3589719831943512, "learning_rate": 4.8980816080501836e-05, "loss": 0.3074, "step": 10208 }, { "epoch": 0.1820889665751079, "grad_norm": 0.24968448281288147, "learning_rate": 4.8980376137506425e-05, "loss": 0.172, "step": 10209 }, { "epoch": 0.1821068026968216, "grad_norm": 0.29482802748680115, "learning_rate": 4.897993610155479e-05, "loss": 0.2223, "step": 10210 }, { "epoch": 0.1821246388185353, "grad_norm": 0.3561578392982483, "learning_rate": 4.8979495972648645e-05, "loss": 0.2261, "step": 10211 }, { "epoch": 0.182142474940249, "grad_norm": 0.27590808272361755, "learning_rate": 4.897905575078969e-05, "loss": 0.2053, "step": 10212 }, { "epoch": 0.18216031106196268, "grad_norm": 0.2933117747306824, "learning_rate": 4.8978615435979635e-05, "loss": 0.2206, "step": 10213 }, { "epoch": 0.1821781471836764, "grad_norm": 0.2966252565383911, "learning_rate": 4.897817502822018e-05, "loss": 0.199, "step": 10214 }, { "epoch": 0.18219598330539008, "grad_norm": 0.29027706384658813, "learning_rate": 4.897773452751304e-05, "loss": 0.1623, "step": 10215 }, { "epoch": 0.18221381942710377, "grad_norm": 0.271456241607666, "learning_rate": 4.897729393385992e-05, "loss": 0.1994, "step": 10216 }, { "epoch": 0.18223165554881746, "grad_norm": 0.33194246888160706, "learning_rate": 4.8976853247262524e-05, "loss": 0.2635, "step": 10217 }, { "epoch": 0.18224949167053117, "grad_norm": 0.3467560112476349, "learning_rate": 4.897641246772257e-05, "loss": 0.2193, "step": 10218 }, { "epoch": 0.18226732779224486, "grad_norm": 0.2434745728969574, "learning_rate": 4.897597159524175e-05, "loss": 0.2056, "step": 10219 }, { "epoch": 0.18228516391395855, "grad_norm": 0.3007213771343231, "learning_rate": 4.8975530629821784e-05, "loss": 0.2594, "step": 10220 }, { "epoch": 0.18230300003567224, "grad_norm": 0.265828013420105, "learning_rate": 4.8975089571464386e-05, "loss": 0.2334, "step": 10221 }, { "epoch": 0.18232083615738595, "grad_norm": 0.2535021901130676, "learning_rate": 4.8974648420171264e-05, "loss": 0.2272, "step": 10222 }, { "epoch": 0.18233867227909964, "grad_norm": 0.3300265371799469, "learning_rate": 4.897420717594412e-05, "loss": 0.212, "step": 10223 }, { "epoch": 0.18235650840081333, "grad_norm": 0.40497615933418274, "learning_rate": 4.897376583878467e-05, "loss": 0.2154, "step": 10224 }, { "epoch": 0.18237434452252702, "grad_norm": 0.28284552693367004, "learning_rate": 4.8973324408694617e-05, "loss": 0.2036, "step": 10225 }, { "epoch": 0.1823921806442407, "grad_norm": 0.2925427556037903, "learning_rate": 4.897288288567568e-05, "loss": 0.2122, "step": 10226 }, { "epoch": 0.18241001676595442, "grad_norm": 0.4144842028617859, "learning_rate": 4.8972441269729576e-05, "loss": 0.2253, "step": 10227 }, { "epoch": 0.1824278528876681, "grad_norm": 0.267392098903656, "learning_rate": 4.8971999560858e-05, "loss": 0.2377, "step": 10228 }, { "epoch": 0.1824456890093818, "grad_norm": 0.33622896671295166, "learning_rate": 4.897155775906268e-05, "loss": 0.2393, "step": 10229 }, { "epoch": 0.18246352513109548, "grad_norm": 0.2719358205795288, "learning_rate": 4.897111586434532e-05, "loss": 0.1879, "step": 10230 }, { "epoch": 0.1824813612528092, "grad_norm": 0.22177031636238098, "learning_rate": 4.8970673876707643e-05, "loss": 0.1907, "step": 10231 }, { "epoch": 0.1824991973745229, "grad_norm": 0.2801766097545624, "learning_rate": 4.8970231796151345e-05, "loss": 0.1349, "step": 10232 }, { "epoch": 0.18251703349623657, "grad_norm": 0.32964321970939636, "learning_rate": 4.8969789622678155e-05, "loss": 0.2377, "step": 10233 }, { "epoch": 0.18253486961795026, "grad_norm": 0.26035168766975403, "learning_rate": 4.896934735628978e-05, "loss": 0.233, "step": 10234 }, { "epoch": 0.18255270573966398, "grad_norm": 0.30898937582969666, "learning_rate": 4.8968904996987936e-05, "loss": 0.2615, "step": 10235 }, { "epoch": 0.18257054186137767, "grad_norm": 0.3576295077800751, "learning_rate": 4.896846254477434e-05, "loss": 0.1934, "step": 10236 }, { "epoch": 0.18258837798309135, "grad_norm": 0.2590436339378357, "learning_rate": 4.89680199996507e-05, "loss": 0.2266, "step": 10237 }, { "epoch": 0.18260621410480504, "grad_norm": 0.25483256578445435, "learning_rate": 4.896757736161874e-05, "loss": 0.1919, "step": 10238 }, { "epoch": 0.18262405022651876, "grad_norm": 0.3020104467868805, "learning_rate": 4.896713463068017e-05, "loss": 0.2111, "step": 10239 }, { "epoch": 0.18264188634823245, "grad_norm": 0.23808705806732178, "learning_rate": 4.896669180683671e-05, "loss": 0.2008, "step": 10240 }, { "epoch": 0.18265972246994613, "grad_norm": 0.37742194533348083, "learning_rate": 4.8966248890090075e-05, "loss": 0.2515, "step": 10241 }, { "epoch": 0.18267755859165982, "grad_norm": 0.3267439901828766, "learning_rate": 4.896580588044198e-05, "loss": 0.2342, "step": 10242 }, { "epoch": 0.18269539471337354, "grad_norm": 0.25319239497184753, "learning_rate": 4.896536277789414e-05, "loss": 0.206, "step": 10243 }, { "epoch": 0.18271323083508723, "grad_norm": 0.4356033504009247, "learning_rate": 4.896491958244828e-05, "loss": 0.2916, "step": 10244 }, { "epoch": 0.1827310669568009, "grad_norm": 0.4053899347782135, "learning_rate": 4.896447629410612e-05, "loss": 0.2414, "step": 10245 }, { "epoch": 0.1827489030785146, "grad_norm": 0.28319886326789856, "learning_rate": 4.8964032912869364e-05, "loss": 0.1892, "step": 10246 }, { "epoch": 0.1827667392002283, "grad_norm": 0.23461323976516724, "learning_rate": 4.8963589438739746e-05, "loss": 0.1816, "step": 10247 }, { "epoch": 0.182784575321942, "grad_norm": 0.528769850730896, "learning_rate": 4.896314587171897e-05, "loss": 0.2609, "step": 10248 }, { "epoch": 0.1828024114436557, "grad_norm": 0.300067663192749, "learning_rate": 4.896270221180878e-05, "loss": 0.2166, "step": 10249 }, { "epoch": 0.18282024756536938, "grad_norm": 0.28619349002838135, "learning_rate": 4.896225845901087e-05, "loss": 0.2043, "step": 10250 }, { "epoch": 0.18283808368708307, "grad_norm": 0.2763802409172058, "learning_rate": 4.896181461332696e-05, "loss": 0.1843, "step": 10251 }, { "epoch": 0.18285591980879679, "grad_norm": 0.30844414234161377, "learning_rate": 4.896137067475879e-05, "loss": 0.2434, "step": 10252 }, { "epoch": 0.18287375593051047, "grad_norm": 0.32825011014938354, "learning_rate": 4.896092664330808e-05, "loss": 0.2587, "step": 10253 }, { "epoch": 0.18289159205222416, "grad_norm": 0.2965489327907562, "learning_rate": 4.896048251897652e-05, "loss": 0.2666, "step": 10254 }, { "epoch": 0.18290942817393785, "grad_norm": 0.2501685619354248, "learning_rate": 4.896003830176588e-05, "loss": 0.2091, "step": 10255 }, { "epoch": 0.18292726429565156, "grad_norm": 0.26090285181999207, "learning_rate": 4.895959399167784e-05, "loss": 0.2426, "step": 10256 }, { "epoch": 0.18294510041736525, "grad_norm": 0.2957301139831543, "learning_rate": 4.895914958871414e-05, "loss": 0.2544, "step": 10257 }, { "epoch": 0.18296293653907894, "grad_norm": 0.21864773333072662, "learning_rate": 4.895870509287651e-05, "loss": 0.2117, "step": 10258 }, { "epoch": 0.18298077266079263, "grad_norm": 0.26415616273880005, "learning_rate": 4.8958260504166654e-05, "loss": 0.2288, "step": 10259 }, { "epoch": 0.18299860878250634, "grad_norm": 0.27247998118400574, "learning_rate": 4.8957815822586304e-05, "loss": 0.2024, "step": 10260 }, { "epoch": 0.18301644490422003, "grad_norm": 0.2134028524160385, "learning_rate": 4.895737104813719e-05, "loss": 0.1938, "step": 10261 }, { "epoch": 0.18303428102593372, "grad_norm": 0.22990845143795013, "learning_rate": 4.895692618082103e-05, "loss": 0.2045, "step": 10262 }, { "epoch": 0.1830521171476474, "grad_norm": 0.3156064748764038, "learning_rate": 4.895648122063955e-05, "loss": 0.2366, "step": 10263 }, { "epoch": 0.1830699532693611, "grad_norm": 0.2991919219493866, "learning_rate": 4.8956036167594476e-05, "loss": 0.249, "step": 10264 }, { "epoch": 0.1830877893910748, "grad_norm": 0.2450971007347107, "learning_rate": 4.895559102168754e-05, "loss": 0.2049, "step": 10265 }, { "epoch": 0.1831056255127885, "grad_norm": 0.302469402551651, "learning_rate": 4.895514578292044e-05, "loss": 0.237, "step": 10266 }, { "epoch": 0.1831234616345022, "grad_norm": 0.3612455129623413, "learning_rate": 4.8954700451294933e-05, "loss": 0.2851, "step": 10267 }, { "epoch": 0.18314129775621588, "grad_norm": 0.4785887897014618, "learning_rate": 4.8954255026812737e-05, "loss": 0.3032, "step": 10268 }, { "epoch": 0.1831591338779296, "grad_norm": 0.31391844153404236, "learning_rate": 4.895380950947557e-05, "loss": 0.2405, "step": 10269 }, { "epoch": 0.18317696999964328, "grad_norm": 0.27950945496559143, "learning_rate": 4.895336389928516e-05, "loss": 0.28, "step": 10270 }, { "epoch": 0.18319480612135697, "grad_norm": 0.36554351449012756, "learning_rate": 4.895291819624324e-05, "loss": 0.2614, "step": 10271 }, { "epoch": 0.18321264224307066, "grad_norm": 0.2411484569311142, "learning_rate": 4.895247240035154e-05, "loss": 0.1321, "step": 10272 }, { "epoch": 0.18323047836478437, "grad_norm": 0.31390002369880676, "learning_rate": 4.895202651161178e-05, "loss": 0.2091, "step": 10273 }, { "epoch": 0.18324831448649806, "grad_norm": 0.3174966275691986, "learning_rate": 4.8951580530025696e-05, "loss": 0.218, "step": 10274 }, { "epoch": 0.18326615060821175, "grad_norm": 0.25548991560935974, "learning_rate": 4.895113445559501e-05, "loss": 0.2092, "step": 10275 }, { "epoch": 0.18328398672992544, "grad_norm": 0.20418310165405273, "learning_rate": 4.8950688288321456e-05, "loss": 0.1875, "step": 10276 }, { "epoch": 0.18330182285163915, "grad_norm": 0.2449674755334854, "learning_rate": 4.895024202820676e-05, "loss": 0.2224, "step": 10277 }, { "epoch": 0.18331965897335284, "grad_norm": 0.28462448716163635, "learning_rate": 4.8949795675252656e-05, "loss": 0.2413, "step": 10278 }, { "epoch": 0.18333749509506653, "grad_norm": 0.29260769486427307, "learning_rate": 4.894934922946087e-05, "loss": 0.2203, "step": 10279 }, { "epoch": 0.18335533121678022, "grad_norm": 0.2456568330526352, "learning_rate": 4.894890269083314e-05, "loss": 0.2272, "step": 10280 }, { "epoch": 0.18337316733849393, "grad_norm": 0.3558341860771179, "learning_rate": 4.894845605937118e-05, "loss": 0.1961, "step": 10281 }, { "epoch": 0.18339100346020762, "grad_norm": 0.23935003578662872, "learning_rate": 4.894800933507675e-05, "loss": 0.2079, "step": 10282 }, { "epoch": 0.1834088395819213, "grad_norm": 0.30422544479370117, "learning_rate": 4.894756251795155e-05, "loss": 0.195, "step": 10283 }, { "epoch": 0.183426675703635, "grad_norm": 0.24865223467350006, "learning_rate": 4.894711560799733e-05, "loss": 0.1899, "step": 10284 }, { "epoch": 0.18344451182534868, "grad_norm": 0.20240098237991333, "learning_rate": 4.8946668605215824e-05, "loss": 0.1628, "step": 10285 }, { "epoch": 0.1834623479470624, "grad_norm": 0.25108715891838074, "learning_rate": 4.894622150960875e-05, "loss": 0.1887, "step": 10286 }, { "epoch": 0.1834801840687761, "grad_norm": 0.29093337059020996, "learning_rate": 4.894577432117786e-05, "loss": 0.1975, "step": 10287 }, { "epoch": 0.18349802019048977, "grad_norm": 0.23927940428256989, "learning_rate": 4.894532703992487e-05, "loss": 0.1879, "step": 10288 }, { "epoch": 0.18351585631220346, "grad_norm": 0.23843155801296234, "learning_rate": 4.894487966585153e-05, "loss": 0.2221, "step": 10289 }, { "epoch": 0.18353369243391718, "grad_norm": 0.2642580568790436, "learning_rate": 4.894443219895957e-05, "loss": 0.2257, "step": 10290 }, { "epoch": 0.18355152855563087, "grad_norm": 0.2536928951740265, "learning_rate": 4.8943984639250704e-05, "loss": 0.2286, "step": 10291 }, { "epoch": 0.18356936467734455, "grad_norm": 0.2608407437801361, "learning_rate": 4.894353698672669e-05, "loss": 0.1545, "step": 10292 }, { "epoch": 0.18358720079905824, "grad_norm": 0.3679421842098236, "learning_rate": 4.8943089241389264e-05, "loss": 0.2385, "step": 10293 }, { "epoch": 0.18360503692077196, "grad_norm": 0.22421351075172424, "learning_rate": 4.894264140324015e-05, "loss": 0.1818, "step": 10294 }, { "epoch": 0.18362287304248565, "grad_norm": 0.2700023949146271, "learning_rate": 4.894219347228109e-05, "loss": 0.1828, "step": 10295 }, { "epoch": 0.18364070916419933, "grad_norm": 0.2802733778953552, "learning_rate": 4.8941745448513814e-05, "loss": 0.204, "step": 10296 }, { "epoch": 0.18365854528591302, "grad_norm": 0.29686352610588074, "learning_rate": 4.8941297331940066e-05, "loss": 0.1993, "step": 10297 }, { "epoch": 0.18367638140762674, "grad_norm": 0.3029332160949707, "learning_rate": 4.894084912256158e-05, "loss": 0.2077, "step": 10298 }, { "epoch": 0.18369421752934043, "grad_norm": 0.2746410667896271, "learning_rate": 4.8940400820380097e-05, "loss": 0.1405, "step": 10299 }, { "epoch": 0.1837120536510541, "grad_norm": 0.2549181878566742, "learning_rate": 4.893995242539735e-05, "loss": 0.2129, "step": 10300 }, { "epoch": 0.1837298897727678, "grad_norm": 0.23572704195976257, "learning_rate": 4.893950393761508e-05, "loss": 0.152, "step": 10301 }, { "epoch": 0.18374772589448152, "grad_norm": 0.2893717288970947, "learning_rate": 4.893905535703502e-05, "loss": 0.2152, "step": 10302 }, { "epoch": 0.1837655620161952, "grad_norm": 0.4165283143520355, "learning_rate": 4.8938606683658915e-05, "loss": 0.3003, "step": 10303 }, { "epoch": 0.1837833981379089, "grad_norm": 0.22356657683849335, "learning_rate": 4.8938157917488505e-05, "loss": 0.1867, "step": 10304 }, { "epoch": 0.18380123425962258, "grad_norm": 0.26489782333374023, "learning_rate": 4.893770905852553e-05, "loss": 0.1968, "step": 10305 }, { "epoch": 0.18381907038133627, "grad_norm": 0.20398347079753876, "learning_rate": 4.893726010677172e-05, "loss": 0.1736, "step": 10306 }, { "epoch": 0.18383690650304998, "grad_norm": 0.22316183149814606, "learning_rate": 4.893681106222882e-05, "loss": 0.1809, "step": 10307 }, { "epoch": 0.18385474262476367, "grad_norm": 0.34353235363960266, "learning_rate": 4.893636192489858e-05, "loss": 0.2134, "step": 10308 }, { "epoch": 0.18387257874647736, "grad_norm": 0.2776363492012024, "learning_rate": 4.8935912694782725e-05, "loss": 0.1773, "step": 10309 }, { "epoch": 0.18389041486819105, "grad_norm": 0.348707377910614, "learning_rate": 4.893546337188302e-05, "loss": 0.2195, "step": 10310 }, { "epoch": 0.18390825098990476, "grad_norm": 0.2620590925216675, "learning_rate": 4.8935013956201176e-05, "loss": 0.1788, "step": 10311 }, { "epoch": 0.18392608711161845, "grad_norm": 0.21259760856628418, "learning_rate": 4.8934564447738965e-05, "loss": 0.1605, "step": 10312 }, { "epoch": 0.18394392323333214, "grad_norm": 0.3143465518951416, "learning_rate": 4.8934114846498105e-05, "loss": 0.1483, "step": 10313 }, { "epoch": 0.18396175935504583, "grad_norm": 0.3004714846611023, "learning_rate": 4.893366515248034e-05, "loss": 0.2323, "step": 10314 }, { "epoch": 0.18397959547675954, "grad_norm": 0.2775789499282837, "learning_rate": 4.893321536568744e-05, "loss": 0.1946, "step": 10315 }, { "epoch": 0.18399743159847323, "grad_norm": 0.23492038249969482, "learning_rate": 4.893276548612114e-05, "loss": 0.2067, "step": 10316 }, { "epoch": 0.18401526772018692, "grad_norm": 0.3655635416507721, "learning_rate": 4.8932315513783155e-05, "loss": 0.2117, "step": 10317 }, { "epoch": 0.1840331038419006, "grad_norm": 0.31022271513938904, "learning_rate": 4.893186544867525e-05, "loss": 0.2344, "step": 10318 }, { "epoch": 0.18405093996361432, "grad_norm": 0.3456069827079773, "learning_rate": 4.8931415290799175e-05, "loss": 0.201, "step": 10319 }, { "epoch": 0.184068776085328, "grad_norm": 0.3017507493495941, "learning_rate": 4.893096504015667e-05, "loss": 0.2253, "step": 10320 }, { "epoch": 0.1840866122070417, "grad_norm": 0.21177324652671814, "learning_rate": 4.8930514696749475e-05, "loss": 0.1705, "step": 10321 }, { "epoch": 0.1841044483287554, "grad_norm": 0.27520138025283813, "learning_rate": 4.893006426057934e-05, "loss": 0.2337, "step": 10322 }, { "epoch": 0.1841222844504691, "grad_norm": 0.2814891040325165, "learning_rate": 4.8929613731648014e-05, "loss": 0.2062, "step": 10323 }, { "epoch": 0.1841401205721828, "grad_norm": 0.24566584825515747, "learning_rate": 4.8929163109957234e-05, "loss": 0.2111, "step": 10324 }, { "epoch": 0.18415795669389648, "grad_norm": 0.30118003487586975, "learning_rate": 4.892871239550876e-05, "loss": 0.2353, "step": 10325 }, { "epoch": 0.18417579281561017, "grad_norm": 0.2540930211544037, "learning_rate": 4.8928261588304325e-05, "loss": 0.2879, "step": 10326 }, { "epoch": 0.18419362893732386, "grad_norm": 0.34676486253738403, "learning_rate": 4.8927810688345685e-05, "loss": 0.2383, "step": 10327 }, { "epoch": 0.18421146505903757, "grad_norm": 0.25336676836013794, "learning_rate": 4.892735969563459e-05, "loss": 0.2008, "step": 10328 }, { "epoch": 0.18422930118075126, "grad_norm": 0.32810625433921814, "learning_rate": 4.892690861017278e-05, "loss": 0.1595, "step": 10329 }, { "epoch": 0.18424713730246495, "grad_norm": 0.2761939764022827, "learning_rate": 4.892645743196202e-05, "loss": 0.1878, "step": 10330 }, { "epoch": 0.18426497342417864, "grad_norm": 0.3160751163959503, "learning_rate": 4.892600616100403e-05, "loss": 0.238, "step": 10331 }, { "epoch": 0.18428280954589235, "grad_norm": 0.2797072231769562, "learning_rate": 4.892555479730059e-05, "loss": 0.1414, "step": 10332 }, { "epoch": 0.18430064566760604, "grad_norm": 0.22800606489181519, "learning_rate": 4.8925103340853436e-05, "loss": 0.1621, "step": 10333 }, { "epoch": 0.18431848178931973, "grad_norm": 0.2722718417644501, "learning_rate": 4.892465179166431e-05, "loss": 0.2118, "step": 10334 }, { "epoch": 0.18433631791103341, "grad_norm": 0.40476706624031067, "learning_rate": 4.8924200149734976e-05, "loss": 0.1782, "step": 10335 }, { "epoch": 0.18435415403274713, "grad_norm": 0.20445409417152405, "learning_rate": 4.892374841506717e-05, "loss": 0.1783, "step": 10336 }, { "epoch": 0.18437199015446082, "grad_norm": 0.25002533197402954, "learning_rate": 4.892329658766266e-05, "loss": 0.2073, "step": 10337 }, { "epoch": 0.1843898262761745, "grad_norm": 0.256468266248703, "learning_rate": 4.892284466752319e-05, "loss": 0.1964, "step": 10338 }, { "epoch": 0.1844076623978882, "grad_norm": 0.3192954659461975, "learning_rate": 4.892239265465051e-05, "loss": 0.1902, "step": 10339 }, { "epoch": 0.1844254985196019, "grad_norm": 0.3462444245815277, "learning_rate": 4.8921940549046376e-05, "loss": 0.2478, "step": 10340 }, { "epoch": 0.1844433346413156, "grad_norm": 0.314211905002594, "learning_rate": 4.892148835071253e-05, "loss": 0.2892, "step": 10341 }, { "epoch": 0.1844611707630293, "grad_norm": 0.2857251763343811, "learning_rate": 4.8921036059650737e-05, "loss": 0.1764, "step": 10342 }, { "epoch": 0.18447900688474297, "grad_norm": 0.2825760245323181, "learning_rate": 4.8920583675862755e-05, "loss": 0.1645, "step": 10343 }, { "epoch": 0.18449684300645666, "grad_norm": 0.3393782377243042, "learning_rate": 4.892013119935032e-05, "loss": 0.2655, "step": 10344 }, { "epoch": 0.18451467912817038, "grad_norm": 0.24785509705543518, "learning_rate": 4.8919678630115194e-05, "loss": 0.2039, "step": 10345 }, { "epoch": 0.18453251524988407, "grad_norm": 0.21270188689231873, "learning_rate": 4.891922596815913e-05, "loss": 0.1796, "step": 10346 }, { "epoch": 0.18455035137159775, "grad_norm": 0.25588127970695496, "learning_rate": 4.891877321348389e-05, "loss": 0.1828, "step": 10347 }, { "epoch": 0.18456818749331144, "grad_norm": 0.2840774357318878, "learning_rate": 4.891832036609122e-05, "loss": 0.197, "step": 10348 }, { "epoch": 0.18458602361502516, "grad_norm": 0.343094140291214, "learning_rate": 4.891786742598289e-05, "loss": 0.2261, "step": 10349 }, { "epoch": 0.18460385973673885, "grad_norm": 0.22164809703826904, "learning_rate": 4.8917414393160634e-05, "loss": 0.186, "step": 10350 }, { "epoch": 0.18462169585845253, "grad_norm": 0.34998172521591187, "learning_rate": 4.891696126762622e-05, "loss": 0.1992, "step": 10351 }, { "epoch": 0.18463953198016622, "grad_norm": 0.25199243426322937, "learning_rate": 4.8916508049381404e-05, "loss": 0.1758, "step": 10352 }, { "epoch": 0.18465736810187994, "grad_norm": 0.23775465786457062, "learning_rate": 4.891605473842794e-05, "loss": 0.2022, "step": 10353 }, { "epoch": 0.18467520422359363, "grad_norm": 0.27597668766975403, "learning_rate": 4.891560133476759e-05, "loss": 0.2727, "step": 10354 }, { "epoch": 0.1846930403453073, "grad_norm": 0.3825770318508148, "learning_rate": 4.8915147838402106e-05, "loss": 0.185, "step": 10355 }, { "epoch": 0.184710876467021, "grad_norm": 0.5336138606071472, "learning_rate": 4.891469424933326e-05, "loss": 0.2208, "step": 10356 }, { "epoch": 0.18472871258873472, "grad_norm": 0.396625280380249, "learning_rate": 4.891424056756279e-05, "loss": 0.2225, "step": 10357 }, { "epoch": 0.1847465487104484, "grad_norm": 0.22673653066158295, "learning_rate": 4.891378679309247e-05, "loss": 0.1938, "step": 10358 }, { "epoch": 0.1847643848321621, "grad_norm": 0.24844437837600708, "learning_rate": 4.891333292592404e-05, "loss": 0.2051, "step": 10359 }, { "epoch": 0.18478222095387578, "grad_norm": 0.549529492855072, "learning_rate": 4.891287896605928e-05, "loss": 0.229, "step": 10360 }, { "epoch": 0.1848000570755895, "grad_norm": 0.3561497926712036, "learning_rate": 4.891242491349994e-05, "loss": 0.1734, "step": 10361 }, { "epoch": 0.18481789319730318, "grad_norm": 0.36185210943222046, "learning_rate": 4.89119707682478e-05, "loss": 0.2414, "step": 10362 }, { "epoch": 0.18483572931901687, "grad_norm": 0.3784153163433075, "learning_rate": 4.891151653030458e-05, "loss": 0.2199, "step": 10363 }, { "epoch": 0.18485356544073056, "grad_norm": 0.2495015263557434, "learning_rate": 4.891106219967206e-05, "loss": 0.1791, "step": 10364 }, { "epoch": 0.18487140156244425, "grad_norm": 0.2821410298347473, "learning_rate": 4.8910607776352024e-05, "loss": 0.1975, "step": 10365 }, { "epoch": 0.18488923768415796, "grad_norm": 0.2477578967809677, "learning_rate": 4.8910153260346204e-05, "loss": 0.2251, "step": 10366 }, { "epoch": 0.18490707380587165, "grad_norm": 0.2333185076713562, "learning_rate": 4.8909698651656366e-05, "loss": 0.1984, "step": 10367 }, { "epoch": 0.18492490992758534, "grad_norm": 0.2450585812330246, "learning_rate": 4.890924395028429e-05, "loss": 0.2067, "step": 10368 }, { "epoch": 0.18494274604929903, "grad_norm": 0.2756058871746063, "learning_rate": 4.8908789156231715e-05, "loss": 0.2216, "step": 10369 }, { "epoch": 0.18496058217101274, "grad_norm": 0.2147826999425888, "learning_rate": 4.890833426950042e-05, "loss": 0.1968, "step": 10370 }, { "epoch": 0.18497841829272643, "grad_norm": 0.23656854033470154, "learning_rate": 4.8907879290092165e-05, "loss": 0.2101, "step": 10371 }, { "epoch": 0.18499625441444012, "grad_norm": 0.21040819585323334, "learning_rate": 4.8907424218008714e-05, "loss": 0.2268, "step": 10372 }, { "epoch": 0.1850140905361538, "grad_norm": 0.2578413188457489, "learning_rate": 4.890696905325183e-05, "loss": 0.1864, "step": 10373 }, { "epoch": 0.18503192665786752, "grad_norm": 0.25228288769721985, "learning_rate": 4.890651379582327e-05, "loss": 0.1558, "step": 10374 }, { "epoch": 0.1850497627795812, "grad_norm": 0.2664131224155426, "learning_rate": 4.890605844572481e-05, "loss": 0.2014, "step": 10375 }, { "epoch": 0.1850675989012949, "grad_norm": 0.28589069843292236, "learning_rate": 4.890560300295821e-05, "loss": 0.1808, "step": 10376 }, { "epoch": 0.1850854350230086, "grad_norm": 0.31567105650901794, "learning_rate": 4.890514746752524e-05, "loss": 0.2233, "step": 10377 }, { "epoch": 0.1851032711447223, "grad_norm": 0.36673760414123535, "learning_rate": 4.890469183942765e-05, "loss": 0.2316, "step": 10378 }, { "epoch": 0.185121107266436, "grad_norm": 0.31213730573654175, "learning_rate": 4.8904236118667226e-05, "loss": 0.2195, "step": 10379 }, { "epoch": 0.18513894338814968, "grad_norm": 0.3055059611797333, "learning_rate": 4.890378030524573e-05, "loss": 0.2288, "step": 10380 }, { "epoch": 0.18515677950986337, "grad_norm": 0.256708025932312, "learning_rate": 4.8903324399164916e-05, "loss": 0.2355, "step": 10381 }, { "epoch": 0.18517461563157708, "grad_norm": 0.2967715859413147, "learning_rate": 4.8902868400426574e-05, "loss": 0.2373, "step": 10382 }, { "epoch": 0.18519245175329077, "grad_norm": 0.3068571984767914, "learning_rate": 4.890241230903245e-05, "loss": 0.2203, "step": 10383 }, { "epoch": 0.18521028787500446, "grad_norm": 0.27461376786231995, "learning_rate": 4.890195612498432e-05, "loss": 0.1891, "step": 10384 }, { "epoch": 0.18522812399671815, "grad_norm": 0.26489153504371643, "learning_rate": 4.890149984828395e-05, "loss": 0.1708, "step": 10385 }, { "epoch": 0.18524596011843183, "grad_norm": 0.33622679114341736, "learning_rate": 4.890104347893312e-05, "loss": 0.2821, "step": 10386 }, { "epoch": 0.18526379624014555, "grad_norm": 0.25586479902267456, "learning_rate": 4.890058701693358e-05, "loss": 0.1965, "step": 10387 }, { "epoch": 0.18528163236185924, "grad_norm": 0.27159854769706726, "learning_rate": 4.8900130462287115e-05, "loss": 0.2123, "step": 10388 }, { "epoch": 0.18529946848357293, "grad_norm": 0.3056102991104126, "learning_rate": 4.8899673814995486e-05, "loss": 0.1632, "step": 10389 }, { "epoch": 0.18531730460528661, "grad_norm": 0.33112654089927673, "learning_rate": 4.889921707506047e-05, "loss": 0.2297, "step": 10390 }, { "epoch": 0.18533514072700033, "grad_norm": 0.3449118137359619, "learning_rate": 4.889876024248384e-05, "loss": 0.223, "step": 10391 }, { "epoch": 0.18535297684871402, "grad_norm": 0.2861473560333252, "learning_rate": 4.889830331726735e-05, "loss": 0.2286, "step": 10392 }, { "epoch": 0.1853708129704277, "grad_norm": 0.3719724118709564, "learning_rate": 4.889784629941279e-05, "loss": 0.206, "step": 10393 }, { "epoch": 0.1853886490921414, "grad_norm": 0.4821029305458069, "learning_rate": 4.889738918892192e-05, "loss": 0.1915, "step": 10394 }, { "epoch": 0.1854064852138551, "grad_norm": 0.2707119286060333, "learning_rate": 4.8896931985796515e-05, "loss": 0.2132, "step": 10395 }, { "epoch": 0.1854243213355688, "grad_norm": 0.2872961759567261, "learning_rate": 4.889647469003835e-05, "loss": 0.1871, "step": 10396 }, { "epoch": 0.18544215745728249, "grad_norm": 0.21720540523529053, "learning_rate": 4.8896017301649196e-05, "loss": 0.1443, "step": 10397 }, { "epoch": 0.18545999357899617, "grad_norm": 0.30704933404922485, "learning_rate": 4.889555982063082e-05, "loss": 0.2134, "step": 10398 }, { "epoch": 0.1854778297007099, "grad_norm": 0.3383081555366516, "learning_rate": 4.8895102246985007e-05, "loss": 0.2082, "step": 10399 }, { "epoch": 0.18549566582242358, "grad_norm": 0.2674878239631653, "learning_rate": 4.889464458071352e-05, "loss": 0.1919, "step": 10400 }, { "epoch": 0.18551350194413727, "grad_norm": 0.3940347135066986, "learning_rate": 4.8894186821818144e-05, "loss": 0.2138, "step": 10401 }, { "epoch": 0.18553133806585095, "grad_norm": 0.2133244276046753, "learning_rate": 4.889372897030065e-05, "loss": 0.2427, "step": 10402 }, { "epoch": 0.18554917418756467, "grad_norm": 0.2724769115447998, "learning_rate": 4.88932710261628e-05, "loss": 0.1914, "step": 10403 }, { "epoch": 0.18556701030927836, "grad_norm": 0.35610491037368774, "learning_rate": 4.889281298940638e-05, "loss": 0.211, "step": 10404 }, { "epoch": 0.18558484643099205, "grad_norm": 0.3148233890533447, "learning_rate": 4.889235486003316e-05, "loss": 0.2114, "step": 10405 }, { "epoch": 0.18560268255270573, "grad_norm": 0.24598360061645508, "learning_rate": 4.889189663804493e-05, "loss": 0.2118, "step": 10406 }, { "epoch": 0.18562051867441942, "grad_norm": 0.2542036175727844, "learning_rate": 4.8891438323443456e-05, "loss": 0.209, "step": 10407 }, { "epoch": 0.18563835479613314, "grad_norm": 0.343126118183136, "learning_rate": 4.889097991623052e-05, "loss": 0.2053, "step": 10408 }, { "epoch": 0.18565619091784683, "grad_norm": 0.2791293263435364, "learning_rate": 4.889052141640788e-05, "loss": 0.1894, "step": 10409 }, { "epoch": 0.1856740270395605, "grad_norm": 0.3229711949825287, "learning_rate": 4.889006282397733e-05, "loss": 0.2333, "step": 10410 }, { "epoch": 0.1856918631612742, "grad_norm": 0.22067569196224213, "learning_rate": 4.888960413894066e-05, "loss": 0.2048, "step": 10411 }, { "epoch": 0.18570969928298792, "grad_norm": 0.27574804425239563, "learning_rate": 4.8889145361299616e-05, "loss": 0.1934, "step": 10412 }, { "epoch": 0.1857275354047016, "grad_norm": 0.21312962472438812, "learning_rate": 4.8888686491056e-05, "loss": 0.1792, "step": 10413 }, { "epoch": 0.1857453715264153, "grad_norm": 0.2817594110965729, "learning_rate": 4.888822752821159e-05, "loss": 0.1818, "step": 10414 }, { "epoch": 0.18576320764812898, "grad_norm": 0.2855500280857086, "learning_rate": 4.8887768472768155e-05, "loss": 0.178, "step": 10415 }, { "epoch": 0.1857810437698427, "grad_norm": 0.23362240195274353, "learning_rate": 4.8887309324727475e-05, "loss": 0.2257, "step": 10416 }, { "epoch": 0.18579887989155638, "grad_norm": 0.22333790361881256, "learning_rate": 4.888685008409134e-05, "loss": 0.1452, "step": 10417 }, { "epoch": 0.18581671601327007, "grad_norm": 0.33676812052726746, "learning_rate": 4.8886390750861524e-05, "loss": 0.1794, "step": 10418 }, { "epoch": 0.18583455213498376, "grad_norm": 0.25902023911476135, "learning_rate": 4.88859313250398e-05, "loss": 0.2096, "step": 10419 }, { "epoch": 0.18585238825669748, "grad_norm": 0.23255212604999542, "learning_rate": 4.8885471806627957e-05, "loss": 0.2005, "step": 10420 }, { "epoch": 0.18587022437841116, "grad_norm": 0.26637881994247437, "learning_rate": 4.888501219562778e-05, "loss": 0.1722, "step": 10421 }, { "epoch": 0.18588806050012485, "grad_norm": 0.30216068029403687, "learning_rate": 4.8884552492041044e-05, "loss": 0.1606, "step": 10422 }, { "epoch": 0.18590589662183854, "grad_norm": 0.28722715377807617, "learning_rate": 4.888409269586953e-05, "loss": 0.2134, "step": 10423 }, { "epoch": 0.18592373274355226, "grad_norm": 0.41661202907562256, "learning_rate": 4.888363280711503e-05, "loss": 0.2097, "step": 10424 }, { "epoch": 0.18594156886526594, "grad_norm": 0.26423731446266174, "learning_rate": 4.888317282577932e-05, "loss": 0.1949, "step": 10425 }, { "epoch": 0.18595940498697963, "grad_norm": 0.35436776280403137, "learning_rate": 4.888271275186418e-05, "loss": 0.2011, "step": 10426 }, { "epoch": 0.18597724110869332, "grad_norm": 0.2574150562286377, "learning_rate": 4.88822525853714e-05, "loss": 0.2063, "step": 10427 }, { "epoch": 0.185995077230407, "grad_norm": 0.26777106523513794, "learning_rate": 4.8881792326302755e-05, "loss": 0.1655, "step": 10428 }, { "epoch": 0.18601291335212072, "grad_norm": 0.2430422157049179, "learning_rate": 4.888133197466004e-05, "loss": 0.1554, "step": 10429 }, { "epoch": 0.1860307494738344, "grad_norm": 0.2989281117916107, "learning_rate": 4.888087153044503e-05, "loss": 0.2006, "step": 10430 }, { "epoch": 0.1860485855955481, "grad_norm": 0.2649911344051361, "learning_rate": 4.888041099365951e-05, "loss": 0.2234, "step": 10431 }, { "epoch": 0.1860664217172618, "grad_norm": 0.31233587861061096, "learning_rate": 4.887995036430527e-05, "loss": 0.2529, "step": 10432 }, { "epoch": 0.1860842578389755, "grad_norm": 0.28314629197120667, "learning_rate": 4.8879489642384104e-05, "loss": 0.1864, "step": 10433 }, { "epoch": 0.1861020939606892, "grad_norm": 0.3037908375263214, "learning_rate": 4.8879028827897776e-05, "loss": 0.24, "step": 10434 }, { "epoch": 0.18611993008240288, "grad_norm": 0.2743125259876251, "learning_rate": 4.8878567920848094e-05, "loss": 0.1829, "step": 10435 }, { "epoch": 0.18613776620411657, "grad_norm": 0.2598416209220886, "learning_rate": 4.8878106921236833e-05, "loss": 0.2357, "step": 10436 }, { "epoch": 0.18615560232583028, "grad_norm": 0.3465643525123596, "learning_rate": 4.8877645829065783e-05, "loss": 0.2195, "step": 10437 }, { "epoch": 0.18617343844754397, "grad_norm": 0.2014244794845581, "learning_rate": 4.8877184644336726e-05, "loss": 0.194, "step": 10438 }, { "epoch": 0.18619127456925766, "grad_norm": 0.25579357147216797, "learning_rate": 4.887672336705146e-05, "loss": 0.1877, "step": 10439 }, { "epoch": 0.18620911069097135, "grad_norm": 0.28658202290534973, "learning_rate": 4.887626199721177e-05, "loss": 0.2202, "step": 10440 }, { "epoch": 0.18622694681268506, "grad_norm": 0.31616172194480896, "learning_rate": 4.887580053481943e-05, "loss": 0.2048, "step": 10441 }, { "epoch": 0.18624478293439875, "grad_norm": 0.2259247601032257, "learning_rate": 4.887533897987625e-05, "loss": 0.1727, "step": 10442 }, { "epoch": 0.18626261905611244, "grad_norm": 0.3351108431816101, "learning_rate": 4.887487733238401e-05, "loss": 0.2405, "step": 10443 }, { "epoch": 0.18628045517782613, "grad_norm": 0.26033225655555725, "learning_rate": 4.8874415592344494e-05, "loss": 0.1762, "step": 10444 }, { "epoch": 0.18629829129953981, "grad_norm": 0.2744556665420532, "learning_rate": 4.887395375975951e-05, "loss": 0.2253, "step": 10445 }, { "epoch": 0.18631612742125353, "grad_norm": 0.32611531019210815, "learning_rate": 4.887349183463082e-05, "loss": 0.2745, "step": 10446 }, { "epoch": 0.18633396354296722, "grad_norm": 0.2906774878501892, "learning_rate": 4.887302981696024e-05, "loss": 0.2273, "step": 10447 }, { "epoch": 0.1863517996646809, "grad_norm": 0.2538030445575714, "learning_rate": 4.887256770674954e-05, "loss": 0.1846, "step": 10448 }, { "epoch": 0.1863696357863946, "grad_norm": 0.35917553305625916, "learning_rate": 4.887210550400053e-05, "loss": 0.2289, "step": 10449 }, { "epoch": 0.1863874719081083, "grad_norm": 0.23671753704547882, "learning_rate": 4.887164320871499e-05, "loss": 0.1749, "step": 10450 }, { "epoch": 0.186405308029822, "grad_norm": 0.27708324790000916, "learning_rate": 4.887118082089472e-05, "loss": 0.2244, "step": 10451 }, { "epoch": 0.18642314415153569, "grad_norm": 0.36812689900398254, "learning_rate": 4.887071834054151e-05, "loss": 0.1962, "step": 10452 }, { "epoch": 0.18644098027324937, "grad_norm": 0.28726956248283386, "learning_rate": 4.8870255767657146e-05, "loss": 0.2299, "step": 10453 }, { "epoch": 0.1864588163949631, "grad_norm": 0.23517903685569763, "learning_rate": 4.886979310224343e-05, "loss": 0.1669, "step": 10454 }, { "epoch": 0.18647665251667678, "grad_norm": 0.2091180831193924, "learning_rate": 4.886933034430215e-05, "loss": 0.1511, "step": 10455 }, { "epoch": 0.18649448863839047, "grad_norm": 0.3165168762207031, "learning_rate": 4.88688674938351e-05, "loss": 0.1985, "step": 10456 }, { "epoch": 0.18651232476010415, "grad_norm": 0.341937392950058, "learning_rate": 4.886840455084408e-05, "loss": 0.1943, "step": 10457 }, { "epoch": 0.18653016088181787, "grad_norm": 0.46690675616264343, "learning_rate": 4.886794151533087e-05, "loss": 0.2097, "step": 10458 }, { "epoch": 0.18654799700353156, "grad_norm": 0.39278659224510193, "learning_rate": 4.886747838729728e-05, "loss": 0.2673, "step": 10459 }, { "epoch": 0.18656583312524525, "grad_norm": 0.2433737814426422, "learning_rate": 4.88670151667451e-05, "loss": 0.1925, "step": 10460 }, { "epoch": 0.18658366924695893, "grad_norm": 0.2127729207277298, "learning_rate": 4.8866551853676135e-05, "loss": 0.1718, "step": 10461 }, { "epoch": 0.18660150536867265, "grad_norm": 0.23966027796268463, "learning_rate": 4.886608844809216e-05, "loss": 0.1774, "step": 10462 }, { "epoch": 0.18661934149038634, "grad_norm": 0.37410762906074524, "learning_rate": 4.8865624949994993e-05, "loss": 0.1917, "step": 10463 }, { "epoch": 0.18663717761210002, "grad_norm": 0.26284417510032654, "learning_rate": 4.886516135938641e-05, "loss": 0.2002, "step": 10464 }, { "epoch": 0.1866550137338137, "grad_norm": 0.3009563088417053, "learning_rate": 4.886469767626823e-05, "loss": 0.2417, "step": 10465 }, { "epoch": 0.1866728498555274, "grad_norm": 0.43802765011787415, "learning_rate": 4.8864233900642234e-05, "loss": 0.2612, "step": 10466 }, { "epoch": 0.18669068597724112, "grad_norm": 0.29394182562828064, "learning_rate": 4.8863770032510225e-05, "loss": 0.2576, "step": 10467 }, { "epoch": 0.1867085220989548, "grad_norm": 0.364239364862442, "learning_rate": 4.8863306071874e-05, "loss": 0.2408, "step": 10468 }, { "epoch": 0.1867263582206685, "grad_norm": 0.24073238670825958, "learning_rate": 4.8862842018735356e-05, "loss": 0.261, "step": 10469 }, { "epoch": 0.18674419434238218, "grad_norm": 0.2975509464740753, "learning_rate": 4.88623778730961e-05, "loss": 0.211, "step": 10470 }, { "epoch": 0.1867620304640959, "grad_norm": 0.25481876730918884, "learning_rate": 4.8861913634958025e-05, "loss": 0.2289, "step": 10471 }, { "epoch": 0.18677986658580958, "grad_norm": 0.2389741837978363, "learning_rate": 4.8861449304322926e-05, "loss": 0.1965, "step": 10472 }, { "epoch": 0.18679770270752327, "grad_norm": 0.39479899406433105, "learning_rate": 4.8860984881192615e-05, "loss": 0.216, "step": 10473 }, { "epoch": 0.18681553882923696, "grad_norm": 0.2663244605064392, "learning_rate": 4.8860520365568875e-05, "loss": 0.2175, "step": 10474 }, { "epoch": 0.18683337495095068, "grad_norm": 0.39912599325180054, "learning_rate": 4.886005575745353e-05, "loss": 0.2101, "step": 10475 }, { "epoch": 0.18685121107266436, "grad_norm": 0.29135116934776306, "learning_rate": 4.885959105684835e-05, "loss": 0.193, "step": 10476 }, { "epoch": 0.18686904719437805, "grad_norm": 0.26923084259033203, "learning_rate": 4.8859126263755176e-05, "loss": 0.2592, "step": 10477 }, { "epoch": 0.18688688331609174, "grad_norm": 0.27490487694740295, "learning_rate": 4.8858661378175776e-05, "loss": 0.2032, "step": 10478 }, { "epoch": 0.18690471943780546, "grad_norm": 0.29819801449775696, "learning_rate": 4.8858196400111966e-05, "loss": 0.1763, "step": 10479 }, { "epoch": 0.18692255555951914, "grad_norm": 0.2618018090724945, "learning_rate": 4.885773132956554e-05, "loss": 0.2239, "step": 10480 }, { "epoch": 0.18694039168123283, "grad_norm": 0.2373030036687851, "learning_rate": 4.8857266166538317e-05, "loss": 0.2069, "step": 10481 }, { "epoch": 0.18695822780294652, "grad_norm": 0.24133995175361633, "learning_rate": 4.885680091103208e-05, "loss": 0.1897, "step": 10482 }, { "epoch": 0.18697606392466024, "grad_norm": 0.26540258526802063, "learning_rate": 4.885633556304865e-05, "loss": 0.2141, "step": 10483 }, { "epoch": 0.18699390004637392, "grad_norm": 0.2913873493671417, "learning_rate": 4.885587012258983e-05, "loss": 0.1931, "step": 10484 }, { "epoch": 0.1870117361680876, "grad_norm": 0.2755669355392456, "learning_rate": 4.88554045896574e-05, "loss": 0.2272, "step": 10485 }, { "epoch": 0.1870295722898013, "grad_norm": 0.26733720302581787, "learning_rate": 4.88549389642532e-05, "loss": 0.2255, "step": 10486 }, { "epoch": 0.187047408411515, "grad_norm": 0.24001014232635498, "learning_rate": 4.8854473246379005e-05, "loss": 0.1721, "step": 10487 }, { "epoch": 0.1870652445332287, "grad_norm": 0.24600258469581604, "learning_rate": 4.885400743603664e-05, "loss": 0.2012, "step": 10488 }, { "epoch": 0.1870830806549424, "grad_norm": 0.32753440737724304, "learning_rate": 4.88535415332279e-05, "loss": 0.1993, "step": 10489 }, { "epoch": 0.18710091677665608, "grad_norm": 0.2985244393348694, "learning_rate": 4.8853075537954596e-05, "loss": 0.2165, "step": 10490 }, { "epoch": 0.18711875289836977, "grad_norm": 0.27764397859573364, "learning_rate": 4.8852609450218535e-05, "loss": 0.2443, "step": 10491 }, { "epoch": 0.18713658902008348, "grad_norm": 0.24802055954933167, "learning_rate": 4.8852143270021524e-05, "loss": 0.1829, "step": 10492 }, { "epoch": 0.18715442514179717, "grad_norm": 0.3216690421104431, "learning_rate": 4.885167699736536e-05, "loss": 0.1525, "step": 10493 }, { "epoch": 0.18717226126351086, "grad_norm": 0.3482573628425598, "learning_rate": 4.885121063225185e-05, "loss": 0.1983, "step": 10494 }, { "epoch": 0.18719009738522455, "grad_norm": 0.3186511993408203, "learning_rate": 4.885074417468283e-05, "loss": 0.2242, "step": 10495 }, { "epoch": 0.18720793350693826, "grad_norm": 0.22009779512882233, "learning_rate": 4.885027762466007e-05, "loss": 0.1687, "step": 10496 }, { "epoch": 0.18722576962865195, "grad_norm": 0.25250473618507385, "learning_rate": 4.8849810982185404e-05, "loss": 0.1785, "step": 10497 }, { "epoch": 0.18724360575036564, "grad_norm": 0.2682039141654968, "learning_rate": 4.8849344247260634e-05, "loss": 0.178, "step": 10498 }, { "epoch": 0.18726144187207933, "grad_norm": 0.35088911652565, "learning_rate": 4.884887741988757e-05, "loss": 0.1735, "step": 10499 }, { "epoch": 0.18727927799379304, "grad_norm": 0.28017309308052063, "learning_rate": 4.884841050006802e-05, "loss": 0.1913, "step": 10500 }, { "epoch": 0.18729711411550673, "grad_norm": 0.2931995093822479, "learning_rate": 4.884794348780378e-05, "loss": 0.2281, "step": 10501 }, { "epoch": 0.18731495023722042, "grad_norm": 0.25336283445358276, "learning_rate": 4.884747638309669e-05, "loss": 0.1918, "step": 10502 }, { "epoch": 0.1873327863589341, "grad_norm": 0.40355825424194336, "learning_rate": 4.8847009185948546e-05, "loss": 0.1647, "step": 10503 }, { "epoch": 0.18735062248064782, "grad_norm": 0.31773319840431213, "learning_rate": 4.884654189636115e-05, "loss": 0.2862, "step": 10504 }, { "epoch": 0.1873684586023615, "grad_norm": 0.25512444972991943, "learning_rate": 4.8846074514336324e-05, "loss": 0.2549, "step": 10505 }, { "epoch": 0.1873862947240752, "grad_norm": 0.30692002177238464, "learning_rate": 4.8845607039875876e-05, "loss": 0.2474, "step": 10506 }, { "epoch": 0.18740413084578889, "grad_norm": 0.25550419092178345, "learning_rate": 4.8845139472981616e-05, "loss": 0.2519, "step": 10507 }, { "epoch": 0.18742196696750257, "grad_norm": 0.23434126377105713, "learning_rate": 4.884467181365536e-05, "loss": 0.1703, "step": 10508 }, { "epoch": 0.1874398030892163, "grad_norm": 0.3422868549823761, "learning_rate": 4.8844204061898925e-05, "loss": 0.2269, "step": 10509 }, { "epoch": 0.18745763921092998, "grad_norm": 0.24813294410705566, "learning_rate": 4.884373621771412e-05, "loss": 0.1789, "step": 10510 }, { "epoch": 0.18747547533264367, "grad_norm": 0.19203995168209076, "learning_rate": 4.884326828110276e-05, "loss": 0.2068, "step": 10511 }, { "epoch": 0.18749331145435735, "grad_norm": 0.27889353036880493, "learning_rate": 4.8842800252066644e-05, "loss": 0.2654, "step": 10512 }, { "epoch": 0.18751114757607107, "grad_norm": 0.22642937302589417, "learning_rate": 4.884233213060761e-05, "loss": 0.1717, "step": 10513 }, { "epoch": 0.18752898369778476, "grad_norm": 0.303641140460968, "learning_rate": 4.884186391672746e-05, "loss": 0.1824, "step": 10514 }, { "epoch": 0.18754681981949844, "grad_norm": 0.2523878514766693, "learning_rate": 4.884139561042801e-05, "loss": 0.2431, "step": 10515 }, { "epoch": 0.18756465594121213, "grad_norm": 0.3550451397895813, "learning_rate": 4.8840927211711076e-05, "loss": 0.2396, "step": 10516 }, { "epoch": 0.18758249206292585, "grad_norm": 0.19239379465579987, "learning_rate": 4.8840458720578476e-05, "loss": 0.2078, "step": 10517 }, { "epoch": 0.18760032818463954, "grad_norm": 0.38046181201934814, "learning_rate": 4.883999013703202e-05, "loss": 0.2256, "step": 10518 }, { "epoch": 0.18761816430635322, "grad_norm": 0.2400396317243576, "learning_rate": 4.883952146107353e-05, "loss": 0.1923, "step": 10519 }, { "epoch": 0.1876360004280669, "grad_norm": 0.2525434195995331, "learning_rate": 4.8839052692704825e-05, "loss": 0.2219, "step": 10520 }, { "epoch": 0.18765383654978063, "grad_norm": 0.24584171175956726, "learning_rate": 4.883858383192771e-05, "loss": 0.1575, "step": 10521 }, { "epoch": 0.18767167267149432, "grad_norm": 0.39342108368873596, "learning_rate": 4.883811487874402e-05, "loss": 0.265, "step": 10522 }, { "epoch": 0.187689508793208, "grad_norm": 0.2990253269672394, "learning_rate": 4.883764583315556e-05, "loss": 0.2416, "step": 10523 }, { "epoch": 0.1877073449149217, "grad_norm": 0.30369022488594055, "learning_rate": 4.883717669516414e-05, "loss": 0.2062, "step": 10524 }, { "epoch": 0.18772518103663538, "grad_norm": 0.29360026121139526, "learning_rate": 4.8836707464771605e-05, "loss": 0.1785, "step": 10525 }, { "epoch": 0.1877430171583491, "grad_norm": 0.2826521098613739, "learning_rate": 4.883623814197975e-05, "loss": 0.208, "step": 10526 }, { "epoch": 0.18776085328006278, "grad_norm": 0.23486946523189545, "learning_rate": 4.883576872679041e-05, "loss": 0.17, "step": 10527 }, { "epoch": 0.18777868940177647, "grad_norm": 0.3598060607910156, "learning_rate": 4.883529921920539e-05, "loss": 0.2452, "step": 10528 }, { "epoch": 0.18779652552349016, "grad_norm": 0.2967919707298279, "learning_rate": 4.883482961922653e-05, "loss": 0.1489, "step": 10529 }, { "epoch": 0.18781436164520388, "grad_norm": 0.30657777190208435, "learning_rate": 4.883435992685562e-05, "loss": 0.1991, "step": 10530 }, { "epoch": 0.18783219776691756, "grad_norm": 0.23573394119739532, "learning_rate": 4.883389014209452e-05, "loss": 0.1742, "step": 10531 }, { "epoch": 0.18785003388863125, "grad_norm": 0.40680450201034546, "learning_rate": 4.8833420264945015e-05, "loss": 0.1742, "step": 10532 }, { "epoch": 0.18786787001034494, "grad_norm": 0.39763349294662476, "learning_rate": 4.883295029540894e-05, "loss": 0.1765, "step": 10533 }, { "epoch": 0.18788570613205866, "grad_norm": 0.30162012577056885, "learning_rate": 4.8832480233488124e-05, "loss": 0.1913, "step": 10534 }, { "epoch": 0.18790354225377234, "grad_norm": 0.3297789394855499, "learning_rate": 4.8832010079184384e-05, "loss": 0.224, "step": 10535 }, { "epoch": 0.18792137837548603, "grad_norm": 0.3167688846588135, "learning_rate": 4.883153983249954e-05, "loss": 0.2162, "step": 10536 }, { "epoch": 0.18793921449719972, "grad_norm": 0.23432107269763947, "learning_rate": 4.8831069493435414e-05, "loss": 0.169, "step": 10537 }, { "epoch": 0.18795705061891343, "grad_norm": 0.27008378505706787, "learning_rate": 4.883059906199384e-05, "loss": 0.2298, "step": 10538 }, { "epoch": 0.18797488674062712, "grad_norm": 0.34587347507476807, "learning_rate": 4.883012853817662e-05, "loss": 0.2241, "step": 10539 }, { "epoch": 0.1879927228623408, "grad_norm": 0.3272530436515808, "learning_rate": 4.8829657921985605e-05, "loss": 0.2656, "step": 10540 }, { "epoch": 0.1880105589840545, "grad_norm": 0.30216947197914124, "learning_rate": 4.8829187213422603e-05, "loss": 0.187, "step": 10541 }, { "epoch": 0.18802839510576821, "grad_norm": 0.2211897075176239, "learning_rate": 4.8828716412489425e-05, "loss": 0.1697, "step": 10542 }, { "epoch": 0.1880462312274819, "grad_norm": 0.224198117852211, "learning_rate": 4.8828245519187935e-05, "loss": 0.2007, "step": 10543 }, { "epoch": 0.1880640673491956, "grad_norm": 0.395285964012146, "learning_rate": 4.882777453351992e-05, "loss": 0.2465, "step": 10544 }, { "epoch": 0.18808190347090928, "grad_norm": 0.20634904503822327, "learning_rate": 4.8827303455487225e-05, "loss": 0.1731, "step": 10545 }, { "epoch": 0.18809973959262297, "grad_norm": 0.3251771628856659, "learning_rate": 4.882683228509167e-05, "loss": 0.2284, "step": 10546 }, { "epoch": 0.18811757571433668, "grad_norm": 0.258035808801651, "learning_rate": 4.882636102233509e-05, "loss": 0.1689, "step": 10547 }, { "epoch": 0.18813541183605037, "grad_norm": 0.3538181781768799, "learning_rate": 4.88258896672193e-05, "loss": 0.2385, "step": 10548 }, { "epoch": 0.18815324795776406, "grad_norm": 0.2677677869796753, "learning_rate": 4.8825418219746135e-05, "loss": 0.1968, "step": 10549 }, { "epoch": 0.18817108407947775, "grad_norm": 0.21104206144809723, "learning_rate": 4.882494667991742e-05, "loss": 0.1717, "step": 10550 }, { "epoch": 0.18818892020119146, "grad_norm": 0.2170165479183197, "learning_rate": 4.8824475047734974e-05, "loss": 0.1801, "step": 10551 }, { "epoch": 0.18820675632290515, "grad_norm": 0.2783610224723816, "learning_rate": 4.882400332320065e-05, "loss": 0.19, "step": 10552 }, { "epoch": 0.18822459244461884, "grad_norm": 0.2591277062892914, "learning_rate": 4.8823531506316244e-05, "loss": 0.2198, "step": 10553 }, { "epoch": 0.18824242856633253, "grad_norm": 0.21704983711242676, "learning_rate": 4.882305959708361e-05, "loss": 0.2005, "step": 10554 }, { "epoch": 0.18826026468804624, "grad_norm": 0.2296086698770523, "learning_rate": 4.882258759550457e-05, "loss": 0.1808, "step": 10555 }, { "epoch": 0.18827810080975993, "grad_norm": 0.26934829354286194, "learning_rate": 4.882211550158095e-05, "loss": 0.2394, "step": 10556 }, { "epoch": 0.18829593693147362, "grad_norm": 0.38297170400619507, "learning_rate": 4.8821643315314585e-05, "loss": 0.1776, "step": 10557 }, { "epoch": 0.1883137730531873, "grad_norm": 0.2702253460884094, "learning_rate": 4.8821171036707304e-05, "loss": 0.2196, "step": 10558 }, { "epoch": 0.18833160917490102, "grad_norm": 0.2383365035057068, "learning_rate": 4.882069866576093e-05, "loss": 0.2077, "step": 10559 }, { "epoch": 0.1883494452966147, "grad_norm": 0.20856791734695435, "learning_rate": 4.8820226202477305e-05, "loss": 0.1629, "step": 10560 }, { "epoch": 0.1883672814183284, "grad_norm": 0.3456088602542877, "learning_rate": 4.881975364685826e-05, "loss": 0.2566, "step": 10561 }, { "epoch": 0.18838511754004209, "grad_norm": 0.2895427346229553, "learning_rate": 4.8819280998905616e-05, "loss": 0.2019, "step": 10562 }, { "epoch": 0.1884029536617558, "grad_norm": 0.2795896530151367, "learning_rate": 4.8818808258621205e-05, "loss": 0.208, "step": 10563 }, { "epoch": 0.1884207897834695, "grad_norm": 0.3548523783683777, "learning_rate": 4.881833542600688e-05, "loss": 0.2041, "step": 10564 }, { "epoch": 0.18843862590518318, "grad_norm": 0.27151885628700256, "learning_rate": 4.8817862501064456e-05, "loss": 0.2094, "step": 10565 }, { "epoch": 0.18845646202689686, "grad_norm": 0.25382769107818604, "learning_rate": 4.881738948379577e-05, "loss": 0.2424, "step": 10566 }, { "epoch": 0.18847429814861055, "grad_norm": 0.21698448061943054, "learning_rate": 4.8816916374202656e-05, "loss": 0.2004, "step": 10567 }, { "epoch": 0.18849213427032427, "grad_norm": 0.32958438992500305, "learning_rate": 4.881644317228695e-05, "loss": 0.2452, "step": 10568 }, { "epoch": 0.18850997039203796, "grad_norm": 0.24666844308376312, "learning_rate": 4.8815969878050484e-05, "loss": 0.18, "step": 10569 }, { "epoch": 0.18852780651375164, "grad_norm": 0.2679874300956726, "learning_rate": 4.8815496491495085e-05, "loss": 0.1445, "step": 10570 }, { "epoch": 0.18854564263546533, "grad_norm": 0.4095442593097687, "learning_rate": 4.881502301262261e-05, "loss": 0.3187, "step": 10571 }, { "epoch": 0.18856347875717905, "grad_norm": 0.2798469662666321, "learning_rate": 4.8814549441434865e-05, "loss": 0.2063, "step": 10572 }, { "epoch": 0.18858131487889274, "grad_norm": 0.2988153398036957, "learning_rate": 4.881407577793371e-05, "loss": 0.269, "step": 10573 }, { "epoch": 0.18859915100060642, "grad_norm": 0.22357898950576782, "learning_rate": 4.881360202212097e-05, "loss": 0.1896, "step": 10574 }, { "epoch": 0.1886169871223201, "grad_norm": 0.32179194688796997, "learning_rate": 4.881312817399848e-05, "loss": 0.2574, "step": 10575 }, { "epoch": 0.18863482324403383, "grad_norm": 0.24870289862155914, "learning_rate": 4.881265423356809e-05, "loss": 0.2105, "step": 10576 }, { "epoch": 0.18865265936574752, "grad_norm": 0.3383040130138397, "learning_rate": 4.8812180200831626e-05, "loss": 0.2095, "step": 10577 }, { "epoch": 0.1886704954874612, "grad_norm": 0.22676241397857666, "learning_rate": 4.881170607579092e-05, "loss": 0.1704, "step": 10578 }, { "epoch": 0.1886883316091749, "grad_norm": 0.2661273777484894, "learning_rate": 4.8811231858447823e-05, "loss": 0.1785, "step": 10579 }, { "epoch": 0.1887061677308886, "grad_norm": 0.27105018496513367, "learning_rate": 4.8810757548804165e-05, "loss": 0.2322, "step": 10580 }, { "epoch": 0.1887240038526023, "grad_norm": 0.3106338679790497, "learning_rate": 4.881028314686179e-05, "loss": 0.1691, "step": 10581 }, { "epoch": 0.18874183997431598, "grad_norm": 0.2899042069911957, "learning_rate": 4.8809808652622525e-05, "loss": 0.2155, "step": 10582 }, { "epoch": 0.18875967609602967, "grad_norm": 0.2647222876548767, "learning_rate": 4.880933406608823e-05, "loss": 0.2058, "step": 10583 }, { "epoch": 0.1887775122177434, "grad_norm": 0.2770458161830902, "learning_rate": 4.880885938726072e-05, "loss": 0.1951, "step": 10584 }, { "epoch": 0.18879534833945708, "grad_norm": 0.3557983934879303, "learning_rate": 4.880838461614186e-05, "loss": 0.289, "step": 10585 }, { "epoch": 0.18881318446117076, "grad_norm": 0.2803858518600464, "learning_rate": 4.880790975273347e-05, "loss": 0.2157, "step": 10586 }, { "epoch": 0.18883102058288445, "grad_norm": 0.22735588252544403, "learning_rate": 4.88074347970374e-05, "loss": 0.1677, "step": 10587 }, { "epoch": 0.18884885670459814, "grad_norm": 0.3329298198223114, "learning_rate": 4.8806959749055484e-05, "loss": 0.2207, "step": 10588 }, { "epoch": 0.18886669282631185, "grad_norm": 0.22475937008857727, "learning_rate": 4.880648460878958e-05, "loss": 0.2078, "step": 10589 }, { "epoch": 0.18888452894802554, "grad_norm": 0.2685500979423523, "learning_rate": 4.880600937624151e-05, "loss": 0.1901, "step": 10590 }, { "epoch": 0.18890236506973923, "grad_norm": 0.25866085290908813, "learning_rate": 4.880553405141313e-05, "loss": 0.1974, "step": 10591 }, { "epoch": 0.18892020119145292, "grad_norm": 0.2405107468366623, "learning_rate": 4.880505863430628e-05, "loss": 0.1733, "step": 10592 }, { "epoch": 0.18893803731316663, "grad_norm": 0.29471492767333984, "learning_rate": 4.88045831249228e-05, "loss": 0.1963, "step": 10593 }, { "epoch": 0.18895587343488032, "grad_norm": 0.3194786608219147, "learning_rate": 4.880410752326453e-05, "loss": 0.1858, "step": 10594 }, { "epoch": 0.188973709556594, "grad_norm": 0.21729514002799988, "learning_rate": 4.8803631829333326e-05, "loss": 0.1892, "step": 10595 }, { "epoch": 0.1889915456783077, "grad_norm": 0.4171539545059204, "learning_rate": 4.880315604313101e-05, "loss": 0.2813, "step": 10596 }, { "epoch": 0.18900938180002141, "grad_norm": 0.3281741440296173, "learning_rate": 4.8802680164659456e-05, "loss": 0.238, "step": 10597 }, { "epoch": 0.1890272179217351, "grad_norm": 0.29790347814559937, "learning_rate": 4.880220419392048e-05, "loss": 0.1899, "step": 10598 }, { "epoch": 0.1890450540434488, "grad_norm": 0.28304579854011536, "learning_rate": 4.880172813091595e-05, "loss": 0.2499, "step": 10599 }, { "epoch": 0.18906289016516248, "grad_norm": 0.39394810795783997, "learning_rate": 4.8801251975647686e-05, "loss": 0.2015, "step": 10600 }, { "epoch": 0.1890807262868762, "grad_norm": 0.2597050368785858, "learning_rate": 4.8800775728117565e-05, "loss": 0.2577, "step": 10601 }, { "epoch": 0.18909856240858988, "grad_norm": 0.22351546585559845, "learning_rate": 4.880029938832741e-05, "loss": 0.1908, "step": 10602 }, { "epoch": 0.18911639853030357, "grad_norm": 0.2598824203014374, "learning_rate": 4.879982295627907e-05, "loss": 0.1949, "step": 10603 }, { "epoch": 0.18913423465201726, "grad_norm": 0.20953436195850372, "learning_rate": 4.87993464319744e-05, "loss": 0.1562, "step": 10604 }, { "epoch": 0.18915207077373097, "grad_norm": 0.24557650089263916, "learning_rate": 4.879886981541524e-05, "loss": 0.2513, "step": 10605 }, { "epoch": 0.18916990689544466, "grad_norm": 0.2735897898674011, "learning_rate": 4.8798393106603444e-05, "loss": 0.2241, "step": 10606 }, { "epoch": 0.18918774301715835, "grad_norm": 0.3022579848766327, "learning_rate": 4.879791630554086e-05, "loss": 0.221, "step": 10607 }, { "epoch": 0.18920557913887204, "grad_norm": 0.3795880973339081, "learning_rate": 4.879743941222932e-05, "loss": 0.2504, "step": 10608 }, { "epoch": 0.18922341526058573, "grad_norm": 0.2794035077095032, "learning_rate": 4.87969624266707e-05, "loss": 0.2317, "step": 10609 }, { "epoch": 0.18924125138229944, "grad_norm": 0.23743732273578644, "learning_rate": 4.879648534886683e-05, "loss": 0.2037, "step": 10610 }, { "epoch": 0.18925908750401313, "grad_norm": 0.3326530158519745, "learning_rate": 4.8796008178819565e-05, "loss": 0.2405, "step": 10611 }, { "epoch": 0.18927692362572682, "grad_norm": 0.24557062983512878, "learning_rate": 4.8795530916530746e-05, "loss": 0.1831, "step": 10612 }, { "epoch": 0.1892947597474405, "grad_norm": 0.26500362157821655, "learning_rate": 4.8795053562002235e-05, "loss": 0.2482, "step": 10613 }, { "epoch": 0.18931259586915422, "grad_norm": 0.2623896598815918, "learning_rate": 4.879457611523588e-05, "loss": 0.2006, "step": 10614 }, { "epoch": 0.1893304319908679, "grad_norm": 0.25139978528022766, "learning_rate": 4.879409857623353e-05, "loss": 0.1826, "step": 10615 }, { "epoch": 0.1893482681125816, "grad_norm": 0.28829190135002136, "learning_rate": 4.879362094499703e-05, "loss": 0.1951, "step": 10616 }, { "epoch": 0.18936610423429528, "grad_norm": 0.2570495009422302, "learning_rate": 4.8793143221528236e-05, "loss": 0.2105, "step": 10617 }, { "epoch": 0.189383940356009, "grad_norm": 0.2040308564901352, "learning_rate": 4.8792665405829005e-05, "loss": 0.123, "step": 10618 }, { "epoch": 0.1894017764777227, "grad_norm": 0.3173140585422516, "learning_rate": 4.879218749790119e-05, "loss": 0.2562, "step": 10619 }, { "epoch": 0.18941961259943638, "grad_norm": 0.23079805076122284, "learning_rate": 4.879170949774663e-05, "loss": 0.1872, "step": 10620 }, { "epoch": 0.18943744872115006, "grad_norm": 0.2190384864807129, "learning_rate": 4.87912314053672e-05, "loss": 0.1654, "step": 10621 }, { "epoch": 0.18945528484286378, "grad_norm": 0.3155633807182312, "learning_rate": 4.879075322076473e-05, "loss": 0.1621, "step": 10622 }, { "epoch": 0.18947312096457747, "grad_norm": 0.24207884073257446, "learning_rate": 4.879027494394108e-05, "loss": 0.219, "step": 10623 }, { "epoch": 0.18949095708629116, "grad_norm": 0.2664487063884735, "learning_rate": 4.878979657489811e-05, "loss": 0.2105, "step": 10624 }, { "epoch": 0.18950879320800484, "grad_norm": 0.2868267297744751, "learning_rate": 4.8789318113637676e-05, "loss": 0.2023, "step": 10625 }, { "epoch": 0.18952662932971853, "grad_norm": 0.24286139011383057, "learning_rate": 4.878883956016163e-05, "loss": 0.1904, "step": 10626 }, { "epoch": 0.18954446545143225, "grad_norm": 0.27517902851104736, "learning_rate": 4.878836091447182e-05, "loss": 0.2367, "step": 10627 }, { "epoch": 0.18956230157314594, "grad_norm": 0.22094179689884186, "learning_rate": 4.878788217657011e-05, "loss": 0.2, "step": 10628 }, { "epoch": 0.18958013769485962, "grad_norm": 0.3139047622680664, "learning_rate": 4.878740334645835e-05, "loss": 0.1842, "step": 10629 }, { "epoch": 0.1895979738165733, "grad_norm": 0.3774126172065735, "learning_rate": 4.87869244241384e-05, "loss": 0.2002, "step": 10630 }, { "epoch": 0.18961580993828703, "grad_norm": 0.2085423320531845, "learning_rate": 4.878644540961212e-05, "loss": 0.1746, "step": 10631 }, { "epoch": 0.18963364606000072, "grad_norm": 0.26533958315849304, "learning_rate": 4.878596630288135e-05, "loss": 0.1814, "step": 10632 }, { "epoch": 0.1896514821817144, "grad_norm": 0.23908816277980804, "learning_rate": 4.8785487103947965e-05, "loss": 0.1822, "step": 10633 }, { "epoch": 0.1896693183034281, "grad_norm": 0.22133202850818634, "learning_rate": 4.878500781281381e-05, "loss": 0.2197, "step": 10634 }, { "epoch": 0.1896871544251418, "grad_norm": 0.2445531040430069, "learning_rate": 4.878452842948076e-05, "loss": 0.2065, "step": 10635 }, { "epoch": 0.1897049905468555, "grad_norm": 0.31081560254096985, "learning_rate": 4.878404895395067e-05, "loss": 0.2119, "step": 10636 }, { "epoch": 0.18972282666856918, "grad_norm": 0.32412323355674744, "learning_rate": 4.8783569386225374e-05, "loss": 0.2276, "step": 10637 }, { "epoch": 0.18974066279028287, "grad_norm": 0.3143625259399414, "learning_rate": 4.878308972630676e-05, "loss": 0.2369, "step": 10638 }, { "epoch": 0.1897584989119966, "grad_norm": 0.3914739787578583, "learning_rate": 4.878260997419667e-05, "loss": 0.2123, "step": 10639 }, { "epoch": 0.18977633503371028, "grad_norm": 0.3520326018333435, "learning_rate": 4.878213012989697e-05, "loss": 0.2435, "step": 10640 }, { "epoch": 0.18979417115542396, "grad_norm": 0.38156238198280334, "learning_rate": 4.878165019340952e-05, "loss": 0.2352, "step": 10641 }, { "epoch": 0.18981200727713765, "grad_norm": 0.30638787150382996, "learning_rate": 4.878117016473618e-05, "loss": 0.2642, "step": 10642 }, { "epoch": 0.18982984339885137, "grad_norm": 0.3258053660392761, "learning_rate": 4.878069004387882e-05, "loss": 0.2064, "step": 10643 }, { "epoch": 0.18984767952056505, "grad_norm": 0.23376846313476562, "learning_rate": 4.878020983083928e-05, "loss": 0.2156, "step": 10644 }, { "epoch": 0.18986551564227874, "grad_norm": 0.185083270072937, "learning_rate": 4.8779729525619434e-05, "loss": 0.1586, "step": 10645 }, { "epoch": 0.18988335176399243, "grad_norm": 0.26105907559394836, "learning_rate": 4.877924912822115e-05, "loss": 0.2215, "step": 10646 }, { "epoch": 0.18990118788570612, "grad_norm": 0.33682021498680115, "learning_rate": 4.8778768638646275e-05, "loss": 0.2209, "step": 10647 }, { "epoch": 0.18991902400741983, "grad_norm": 0.3240136504173279, "learning_rate": 4.877828805689669e-05, "loss": 0.1778, "step": 10648 }, { "epoch": 0.18993686012913352, "grad_norm": 0.24435627460479736, "learning_rate": 4.8777807382974236e-05, "loss": 0.2121, "step": 10649 }, { "epoch": 0.1899546962508472, "grad_norm": 0.21817460656166077, "learning_rate": 4.877732661688079e-05, "loss": 0.2128, "step": 10650 }, { "epoch": 0.1899725323725609, "grad_norm": 0.4411410391330719, "learning_rate": 4.8776845758618225e-05, "loss": 0.1674, "step": 10651 }, { "epoch": 0.18999036849427461, "grad_norm": 0.28367194533348083, "learning_rate": 4.8776364808188387e-05, "loss": 0.1967, "step": 10652 }, { "epoch": 0.1900082046159883, "grad_norm": 0.212859645485878, "learning_rate": 4.8775883765593144e-05, "loss": 0.1815, "step": 10653 }, { "epoch": 0.190026040737702, "grad_norm": 0.22487853467464447, "learning_rate": 4.8775402630834367e-05, "loss": 0.201, "step": 10654 }, { "epoch": 0.19004387685941568, "grad_norm": 0.40863654017448425, "learning_rate": 4.877492140391391e-05, "loss": 0.2182, "step": 10655 }, { "epoch": 0.1900617129811294, "grad_norm": 0.25813040137290955, "learning_rate": 4.877444008483366e-05, "loss": 0.2106, "step": 10656 }, { "epoch": 0.19007954910284308, "grad_norm": 0.2469097226858139, "learning_rate": 4.877395867359545e-05, "loss": 0.1879, "step": 10657 }, { "epoch": 0.19009738522455677, "grad_norm": 0.29186534881591797, "learning_rate": 4.877347717020118e-05, "loss": 0.2304, "step": 10658 }, { "epoch": 0.19011522134627046, "grad_norm": 0.30523616075515747, "learning_rate": 4.87729955746527e-05, "loss": 0.2028, "step": 10659 }, { "epoch": 0.19013305746798417, "grad_norm": 0.22320972383022308, "learning_rate": 4.877251388695188e-05, "loss": 0.179, "step": 10660 }, { "epoch": 0.19015089358969786, "grad_norm": 0.23349139094352722, "learning_rate": 4.8772032107100575e-05, "loss": 0.1955, "step": 10661 }, { "epoch": 0.19016872971141155, "grad_norm": 0.30190032720565796, "learning_rate": 4.877155023510067e-05, "loss": 0.192, "step": 10662 }, { "epoch": 0.19018656583312524, "grad_norm": 0.2792718708515167, "learning_rate": 4.8771068270954026e-05, "loss": 0.2449, "step": 10663 }, { "epoch": 0.19020440195483895, "grad_norm": 0.291419118642807, "learning_rate": 4.877058621466251e-05, "loss": 0.1789, "step": 10664 }, { "epoch": 0.19022223807655264, "grad_norm": 0.3131239116191864, "learning_rate": 4.877010406622799e-05, "loss": 0.2704, "step": 10665 }, { "epoch": 0.19024007419826633, "grad_norm": 0.28706690669059753, "learning_rate": 4.876962182565234e-05, "loss": 0.2338, "step": 10666 }, { "epoch": 0.19025791031998002, "grad_norm": 0.36330926418304443, "learning_rate": 4.8769139492937424e-05, "loss": 0.1698, "step": 10667 }, { "epoch": 0.1902757464416937, "grad_norm": 0.24396871030330658, "learning_rate": 4.876865706808511e-05, "loss": 0.2072, "step": 10668 }, { "epoch": 0.19029358256340742, "grad_norm": 0.2736376225948334, "learning_rate": 4.876817455109728e-05, "loss": 0.2225, "step": 10669 }, { "epoch": 0.1903114186851211, "grad_norm": 0.25717228651046753, "learning_rate": 4.876769194197579e-05, "loss": 0.1949, "step": 10670 }, { "epoch": 0.1903292548068348, "grad_norm": 0.31902459263801575, "learning_rate": 4.876720924072252e-05, "loss": 0.2415, "step": 10671 }, { "epoch": 0.19034709092854848, "grad_norm": 0.2533029317855835, "learning_rate": 4.876672644733934e-05, "loss": 0.1688, "step": 10672 }, { "epoch": 0.1903649270502622, "grad_norm": 0.6672841906547546, "learning_rate": 4.876624356182811e-05, "loss": 0.1843, "step": 10673 }, { "epoch": 0.1903827631719759, "grad_norm": 0.30146417021751404, "learning_rate": 4.876576058419072e-05, "loss": 0.2155, "step": 10674 }, { "epoch": 0.19040059929368958, "grad_norm": 0.5877314805984497, "learning_rate": 4.876527751442903e-05, "loss": 0.3144, "step": 10675 }, { "epoch": 0.19041843541540326, "grad_norm": 0.4620359539985657, "learning_rate": 4.876479435254492e-05, "loss": 0.2325, "step": 10676 }, { "epoch": 0.19043627153711698, "grad_norm": 0.2622694969177246, "learning_rate": 4.8764311098540256e-05, "loss": 0.1864, "step": 10677 }, { "epoch": 0.19045410765883067, "grad_norm": 0.3003308176994324, "learning_rate": 4.876382775241691e-05, "loss": 0.2028, "step": 10678 }, { "epoch": 0.19047194378054436, "grad_norm": 0.33290034532546997, "learning_rate": 4.876334431417677e-05, "loss": 0.2158, "step": 10679 }, { "epoch": 0.19048977990225804, "grad_norm": 0.3635729253292084, "learning_rate": 4.8762860783821695e-05, "loss": 0.2039, "step": 10680 }, { "epoch": 0.19050761602397176, "grad_norm": 0.23168057203292847, "learning_rate": 4.876237716135356e-05, "loss": 0.1456, "step": 10681 }, { "epoch": 0.19052545214568545, "grad_norm": 0.37110477685928345, "learning_rate": 4.8761893446774256e-05, "loss": 0.2385, "step": 10682 }, { "epoch": 0.19054328826739914, "grad_norm": 0.2669340670108795, "learning_rate": 4.876140964008563e-05, "loss": 0.1782, "step": 10683 }, { "epoch": 0.19056112438911282, "grad_norm": 0.3143084645271301, "learning_rate": 4.876092574128958e-05, "loss": 0.1606, "step": 10684 }, { "epoch": 0.19057896051082654, "grad_norm": 0.2638698220252991, "learning_rate": 4.876044175038797e-05, "loss": 0.2126, "step": 10685 }, { "epoch": 0.19059679663254023, "grad_norm": 0.3711576461791992, "learning_rate": 4.87599576673827e-05, "loss": 0.1979, "step": 10686 }, { "epoch": 0.19061463275425392, "grad_norm": 0.397236704826355, "learning_rate": 4.875947349227561e-05, "loss": 0.2516, "step": 10687 }, { "epoch": 0.1906324688759676, "grad_norm": 0.2776789367198944, "learning_rate": 4.8758989225068596e-05, "loss": 0.2108, "step": 10688 }, { "epoch": 0.1906503049976813, "grad_norm": 0.36340075731277466, "learning_rate": 4.8758504865763544e-05, "loss": 0.2122, "step": 10689 }, { "epoch": 0.190668141119395, "grad_norm": 0.34707874059677124, "learning_rate": 4.875802041436231e-05, "loss": 0.2305, "step": 10690 }, { "epoch": 0.1906859772411087, "grad_norm": 0.28226712346076965, "learning_rate": 4.8757535870866785e-05, "loss": 0.1888, "step": 10691 }, { "epoch": 0.19070381336282238, "grad_norm": 0.3021794855594635, "learning_rate": 4.875705123527885e-05, "loss": 0.2266, "step": 10692 }, { "epoch": 0.19072164948453607, "grad_norm": 0.2521364390850067, "learning_rate": 4.875656650760038e-05, "loss": 0.191, "step": 10693 }, { "epoch": 0.1907394856062498, "grad_norm": 0.18075856566429138, "learning_rate": 4.875608168783324e-05, "loss": 0.1905, "step": 10694 }, { "epoch": 0.19075732172796347, "grad_norm": 0.26127690076828003, "learning_rate": 4.875559677597934e-05, "loss": 0.2108, "step": 10695 }, { "epoch": 0.19077515784967716, "grad_norm": 0.27619415521621704, "learning_rate": 4.8755111772040526e-05, "loss": 0.2049, "step": 10696 }, { "epoch": 0.19079299397139085, "grad_norm": 0.37192365527153015, "learning_rate": 4.87546266760187e-05, "loss": 0.1842, "step": 10697 }, { "epoch": 0.19081083009310457, "grad_norm": 0.285893976688385, "learning_rate": 4.8754141487915745e-05, "loss": 0.1877, "step": 10698 }, { "epoch": 0.19082866621481825, "grad_norm": 0.37313127517700195, "learning_rate": 4.875365620773352e-05, "loss": 0.201, "step": 10699 }, { "epoch": 0.19084650233653194, "grad_norm": 0.3476355969905853, "learning_rate": 4.8753170835473926e-05, "loss": 0.2817, "step": 10700 }, { "epoch": 0.19086433845824563, "grad_norm": 0.2675214409828186, "learning_rate": 4.875268537113884e-05, "loss": 0.194, "step": 10701 }, { "epoch": 0.19088217457995935, "grad_norm": 0.31153154373168945, "learning_rate": 4.8752199814730134e-05, "loss": 0.2034, "step": 10702 }, { "epoch": 0.19090001070167303, "grad_norm": 0.23315644264221191, "learning_rate": 4.8751714166249706e-05, "loss": 0.1858, "step": 10703 }, { "epoch": 0.19091784682338672, "grad_norm": 0.36079779267311096, "learning_rate": 4.875122842569943e-05, "loss": 0.264, "step": 10704 }, { "epoch": 0.1909356829451004, "grad_norm": 0.2607985734939575, "learning_rate": 4.875074259308119e-05, "loss": 0.1662, "step": 10705 }, { "epoch": 0.19095351906681413, "grad_norm": 0.4972494840621948, "learning_rate": 4.875025666839686e-05, "loss": 0.2623, "step": 10706 }, { "epoch": 0.1909713551885278, "grad_norm": 0.3176359236240387, "learning_rate": 4.874977065164834e-05, "loss": 0.199, "step": 10707 }, { "epoch": 0.1909891913102415, "grad_norm": 0.22909832000732422, "learning_rate": 4.8749284542837504e-05, "loss": 0.1805, "step": 10708 }, { "epoch": 0.1910070274319552, "grad_norm": 0.23171298205852509, "learning_rate": 4.874879834196623e-05, "loss": 0.2299, "step": 10709 }, { "epoch": 0.19102486355366888, "grad_norm": 0.318135142326355, "learning_rate": 4.8748312049036426e-05, "loss": 0.1971, "step": 10710 }, { "epoch": 0.1910426996753826, "grad_norm": 0.2622092366218567, "learning_rate": 4.8747825664049954e-05, "loss": 0.2292, "step": 10711 }, { "epoch": 0.19106053579709628, "grad_norm": 0.23834381997585297, "learning_rate": 4.8747339187008707e-05, "loss": 0.2478, "step": 10712 }, { "epoch": 0.19107837191880997, "grad_norm": 0.276817262172699, "learning_rate": 4.8746852617914575e-05, "loss": 0.1794, "step": 10713 }, { "epoch": 0.19109620804052366, "grad_norm": 0.30297133326530457, "learning_rate": 4.8746365956769436e-05, "loss": 0.2156, "step": 10714 }, { "epoch": 0.19111404416223737, "grad_norm": 0.25306639075279236, "learning_rate": 4.874587920357518e-05, "loss": 0.2202, "step": 10715 }, { "epoch": 0.19113188028395106, "grad_norm": 0.27077245712280273, "learning_rate": 4.87453923583337e-05, "loss": 0.1987, "step": 10716 }, { "epoch": 0.19114971640566475, "grad_norm": 0.32376712560653687, "learning_rate": 4.874490542104687e-05, "loss": 0.2671, "step": 10717 }, { "epoch": 0.19116755252737844, "grad_norm": 0.2347438484430313, "learning_rate": 4.8744418391716597e-05, "loss": 0.2045, "step": 10718 }, { "epoch": 0.19118538864909215, "grad_norm": 0.23465541005134583, "learning_rate": 4.8743931270344745e-05, "loss": 0.1664, "step": 10719 }, { "epoch": 0.19120322477080584, "grad_norm": 0.26362845301628113, "learning_rate": 4.8743444056933216e-05, "loss": 0.1581, "step": 10720 }, { "epoch": 0.19122106089251953, "grad_norm": 0.30119380354881287, "learning_rate": 4.87429567514839e-05, "loss": 0.1942, "step": 10721 }, { "epoch": 0.19123889701423322, "grad_norm": 0.3214512765407562, "learning_rate": 4.874246935399869e-05, "loss": 0.1876, "step": 10722 }, { "epoch": 0.19125673313594693, "grad_norm": 0.3232775330543518, "learning_rate": 4.874198186447946e-05, "loss": 0.1692, "step": 10723 }, { "epoch": 0.19127456925766062, "grad_norm": 0.2662023603916168, "learning_rate": 4.874149428292811e-05, "loss": 0.2053, "step": 10724 }, { "epoch": 0.1912924053793743, "grad_norm": 0.323077529668808, "learning_rate": 4.8741006609346527e-05, "loss": 0.2443, "step": 10725 }, { "epoch": 0.191310241501088, "grad_norm": 0.34338510036468506, "learning_rate": 4.87405188437366e-05, "loss": 0.1505, "step": 10726 }, { "epoch": 0.19132807762280168, "grad_norm": 0.22434459626674652, "learning_rate": 4.874003098610023e-05, "loss": 0.1771, "step": 10727 }, { "epoch": 0.1913459137445154, "grad_norm": 0.29782113432884216, "learning_rate": 4.87395430364393e-05, "loss": 0.2559, "step": 10728 }, { "epoch": 0.1913637498662291, "grad_norm": 0.2826301157474518, "learning_rate": 4.873905499475569e-05, "loss": 0.2342, "step": 10729 }, { "epoch": 0.19138158598794278, "grad_norm": 0.3040046989917755, "learning_rate": 4.8738566861051324e-05, "loss": 0.1915, "step": 10730 }, { "epoch": 0.19139942210965646, "grad_norm": 0.23923376202583313, "learning_rate": 4.873807863532806e-05, "loss": 0.186, "step": 10731 }, { "epoch": 0.19141725823137018, "grad_norm": 0.4032547175884247, "learning_rate": 4.8737590317587806e-05, "loss": 0.1615, "step": 10732 }, { "epoch": 0.19143509435308387, "grad_norm": 0.33723071217536926, "learning_rate": 4.873710190783245e-05, "loss": 0.2841, "step": 10733 }, { "epoch": 0.19145293047479756, "grad_norm": 0.339765727519989, "learning_rate": 4.8736613406063894e-05, "loss": 0.215, "step": 10734 }, { "epoch": 0.19147076659651124, "grad_norm": 0.3320823907852173, "learning_rate": 4.8736124812284025e-05, "loss": 0.2184, "step": 10735 }, { "epoch": 0.19148860271822496, "grad_norm": 0.4221637547016144, "learning_rate": 4.8735636126494735e-05, "loss": 0.2394, "step": 10736 }, { "epoch": 0.19150643883993865, "grad_norm": 0.2771061956882477, "learning_rate": 4.873514734869793e-05, "loss": 0.2106, "step": 10737 }, { "epoch": 0.19152427496165234, "grad_norm": 0.2547479569911957, "learning_rate": 4.873465847889549e-05, "loss": 0.2227, "step": 10738 }, { "epoch": 0.19154211108336602, "grad_norm": 0.5282972455024719, "learning_rate": 4.8734169517089315e-05, "loss": 0.1985, "step": 10739 }, { "epoch": 0.19155994720507974, "grad_norm": 0.2373201847076416, "learning_rate": 4.873368046328129e-05, "loss": 0.2075, "step": 10740 }, { "epoch": 0.19157778332679343, "grad_norm": 0.24604111909866333, "learning_rate": 4.873319131747334e-05, "loss": 0.186, "step": 10741 }, { "epoch": 0.19159561944850712, "grad_norm": 0.25536486506462097, "learning_rate": 4.873270207966734e-05, "loss": 0.197, "step": 10742 }, { "epoch": 0.1916134555702208, "grad_norm": 0.28811123967170715, "learning_rate": 4.8732212749865183e-05, "loss": 0.1741, "step": 10743 }, { "epoch": 0.19163129169193452, "grad_norm": 0.21727687120437622, "learning_rate": 4.873172332806878e-05, "loss": 0.194, "step": 10744 }, { "epoch": 0.1916491278136482, "grad_norm": 0.3271524906158447, "learning_rate": 4.873123381428002e-05, "loss": 0.2366, "step": 10745 }, { "epoch": 0.1916669639353619, "grad_norm": 0.22577716410160065, "learning_rate": 4.8730744208500803e-05, "loss": 0.2071, "step": 10746 }, { "epoch": 0.19168480005707558, "grad_norm": 0.3227449357509613, "learning_rate": 4.8730254510733014e-05, "loss": 0.164, "step": 10747 }, { "epoch": 0.19170263617878927, "grad_norm": 0.24243931472301483, "learning_rate": 4.8729764720978565e-05, "loss": 0.1931, "step": 10748 }, { "epoch": 0.191720472300503, "grad_norm": 0.3302162289619446, "learning_rate": 4.872927483923936e-05, "loss": 0.22, "step": 10749 }, { "epoch": 0.19173830842221667, "grad_norm": 0.2613740563392639, "learning_rate": 4.872878486551728e-05, "loss": 0.1605, "step": 10750 }, { "epoch": 0.19175614454393036, "grad_norm": 0.22148790955543518, "learning_rate": 4.8728294799814244e-05, "loss": 0.1747, "step": 10751 }, { "epoch": 0.19177398066564405, "grad_norm": 0.23619091510772705, "learning_rate": 4.872780464213214e-05, "loss": 0.1598, "step": 10752 }, { "epoch": 0.19179181678735777, "grad_norm": 0.24416673183441162, "learning_rate": 4.872731439247287e-05, "loss": 0.1775, "step": 10753 }, { "epoch": 0.19180965290907145, "grad_norm": 0.24513179063796997, "learning_rate": 4.872682405083833e-05, "loss": 0.1532, "step": 10754 }, { "epoch": 0.19182748903078514, "grad_norm": 0.216408833861351, "learning_rate": 4.872633361723043e-05, "loss": 0.2088, "step": 10755 }, { "epoch": 0.19184532515249883, "grad_norm": 0.347248911857605, "learning_rate": 4.8725843091651057e-05, "loss": 0.2144, "step": 10756 }, { "epoch": 0.19186316127421255, "grad_norm": 0.3666917085647583, "learning_rate": 4.872535247410213e-05, "loss": 0.2423, "step": 10757 }, { "epoch": 0.19188099739592623, "grad_norm": 0.22781166434288025, "learning_rate": 4.872486176458554e-05, "loss": 0.2025, "step": 10758 }, { "epoch": 0.19189883351763992, "grad_norm": 0.19777579605579376, "learning_rate": 4.8724370963103195e-05, "loss": 0.1969, "step": 10759 }, { "epoch": 0.1919166696393536, "grad_norm": 0.3431404232978821, "learning_rate": 4.872388006965699e-05, "loss": 0.241, "step": 10760 }, { "epoch": 0.19193450576106733, "grad_norm": 0.3505854904651642, "learning_rate": 4.8723389084248836e-05, "loss": 0.2034, "step": 10761 }, { "epoch": 0.191952341882781, "grad_norm": 0.3567541241645813, "learning_rate": 4.872289800688063e-05, "loss": 0.1626, "step": 10762 }, { "epoch": 0.1919701780044947, "grad_norm": 0.34400448203086853, "learning_rate": 4.872240683755427e-05, "loss": 0.1878, "step": 10763 }, { "epoch": 0.1919880141262084, "grad_norm": 0.2739449441432953, "learning_rate": 4.8721915576271676e-05, "loss": 0.1989, "step": 10764 }, { "epoch": 0.1920058502479221, "grad_norm": 0.3268420398235321, "learning_rate": 4.872142422303474e-05, "loss": 0.227, "step": 10765 }, { "epoch": 0.1920236863696358, "grad_norm": 0.2755216062068939, "learning_rate": 4.8720932777845376e-05, "loss": 0.2013, "step": 10766 }, { "epoch": 0.19204152249134948, "grad_norm": 0.42162027955055237, "learning_rate": 4.872044124070548e-05, "loss": 0.1884, "step": 10767 }, { "epoch": 0.19205935861306317, "grad_norm": 0.267325222492218, "learning_rate": 4.871994961161695e-05, "loss": 0.1785, "step": 10768 }, { "epoch": 0.19207719473477686, "grad_norm": 0.22796767950057983, "learning_rate": 4.871945789058172e-05, "loss": 0.1904, "step": 10769 }, { "epoch": 0.19209503085649057, "grad_norm": 0.32255902886390686, "learning_rate": 4.871896607760168e-05, "loss": 0.1818, "step": 10770 }, { "epoch": 0.19211286697820426, "grad_norm": 0.3439323306083679, "learning_rate": 4.8718474172678725e-05, "loss": 0.1981, "step": 10771 }, { "epoch": 0.19213070309991795, "grad_norm": 0.23410923779010773, "learning_rate": 4.8717982175814774e-05, "loss": 0.2099, "step": 10772 }, { "epoch": 0.19214853922163164, "grad_norm": 0.2560957670211792, "learning_rate": 4.871749008701173e-05, "loss": 0.1981, "step": 10773 }, { "epoch": 0.19216637534334535, "grad_norm": 0.23217159509658813, "learning_rate": 4.871699790627151e-05, "loss": 0.2204, "step": 10774 }, { "epoch": 0.19218421146505904, "grad_norm": 0.2590517997741699, "learning_rate": 4.871650563359601e-05, "loss": 0.1566, "step": 10775 }, { "epoch": 0.19220204758677273, "grad_norm": 0.3200806677341461, "learning_rate": 4.871601326898714e-05, "loss": 0.2173, "step": 10776 }, { "epoch": 0.19221988370848642, "grad_norm": 0.2960502505302429, "learning_rate": 4.8715520812446816e-05, "loss": 0.1738, "step": 10777 }, { "epoch": 0.19223771983020013, "grad_norm": 0.26086127758026123, "learning_rate": 4.871502826397694e-05, "loss": 0.2041, "step": 10778 }, { "epoch": 0.19225555595191382, "grad_norm": 0.2736360430717468, "learning_rate": 4.871453562357943e-05, "loss": 0.1971, "step": 10779 }, { "epoch": 0.1922733920736275, "grad_norm": 0.2267794907093048, "learning_rate": 4.8714042891256175e-05, "loss": 0.1632, "step": 10780 }, { "epoch": 0.1922912281953412, "grad_norm": 0.21866673231124878, "learning_rate": 4.8713550067009106e-05, "loss": 0.1901, "step": 10781 }, { "epoch": 0.1923090643170549, "grad_norm": 0.2071189284324646, "learning_rate": 4.871305715084013e-05, "loss": 0.1645, "step": 10782 }, { "epoch": 0.1923269004387686, "grad_norm": 0.2618585228919983, "learning_rate": 4.8712564142751154e-05, "loss": 0.1955, "step": 10783 }, { "epoch": 0.1923447365604823, "grad_norm": 0.2709810137748718, "learning_rate": 4.871207104274409e-05, "loss": 0.2486, "step": 10784 }, { "epoch": 0.19236257268219598, "grad_norm": 0.3615318834781647, "learning_rate": 4.8711577850820845e-05, "loss": 0.2187, "step": 10785 }, { "epoch": 0.1923804088039097, "grad_norm": 0.3363041579723358, "learning_rate": 4.8711084566983334e-05, "loss": 0.2602, "step": 10786 }, { "epoch": 0.19239824492562338, "grad_norm": 0.3892727792263031, "learning_rate": 4.8710591191233466e-05, "loss": 0.1825, "step": 10787 }, { "epoch": 0.19241608104733707, "grad_norm": 0.2107529491186142, "learning_rate": 4.871009772357317e-05, "loss": 0.2102, "step": 10788 }, { "epoch": 0.19243391716905076, "grad_norm": 0.3997096121311188, "learning_rate": 4.870960416400434e-05, "loss": 0.1961, "step": 10789 }, { "epoch": 0.19245175329076444, "grad_norm": 0.25473466515541077, "learning_rate": 4.870911051252889e-05, "loss": 0.2156, "step": 10790 }, { "epoch": 0.19246958941247816, "grad_norm": 0.3052992522716522, "learning_rate": 4.870861676914874e-05, "loss": 0.1973, "step": 10791 }, { "epoch": 0.19248742553419185, "grad_norm": 0.28740665316581726, "learning_rate": 4.870812293386581e-05, "loss": 0.2368, "step": 10792 }, { "epoch": 0.19250526165590554, "grad_norm": 0.26118016242980957, "learning_rate": 4.8707629006682e-05, "loss": 0.16, "step": 10793 }, { "epoch": 0.19252309777761922, "grad_norm": 0.3979540169239044, "learning_rate": 4.870713498759924e-05, "loss": 0.197, "step": 10794 }, { "epoch": 0.19254093389933294, "grad_norm": 0.24741658568382263, "learning_rate": 4.8706640876619423e-05, "loss": 0.2398, "step": 10795 }, { "epoch": 0.19255877002104663, "grad_norm": 0.2383471131324768, "learning_rate": 4.870614667374449e-05, "loss": 0.2067, "step": 10796 }, { "epoch": 0.19257660614276031, "grad_norm": 0.20983032882213593, "learning_rate": 4.870565237897634e-05, "loss": 0.1858, "step": 10797 }, { "epoch": 0.192594442264474, "grad_norm": 0.33932432532310486, "learning_rate": 4.87051579923169e-05, "loss": 0.2122, "step": 10798 }, { "epoch": 0.19261227838618772, "grad_norm": 0.3419553339481354, "learning_rate": 4.8704663513768065e-05, "loss": 0.2347, "step": 10799 }, { "epoch": 0.1926301145079014, "grad_norm": 0.226662278175354, "learning_rate": 4.870416894333178e-05, "loss": 0.162, "step": 10800 }, { "epoch": 0.1926479506296151, "grad_norm": 0.2669966220855713, "learning_rate": 4.8703674281009944e-05, "loss": 0.1794, "step": 10801 }, { "epoch": 0.19266578675132878, "grad_norm": 0.2578684687614441, "learning_rate": 4.870317952680448e-05, "loss": 0.2104, "step": 10802 }, { "epoch": 0.1926836228730425, "grad_norm": 0.36353614926338196, "learning_rate": 4.87026846807173e-05, "loss": 0.2095, "step": 10803 }, { "epoch": 0.1927014589947562, "grad_norm": 0.26796650886535645, "learning_rate": 4.8702189742750336e-05, "loss": 0.1554, "step": 10804 }, { "epoch": 0.19271929511646987, "grad_norm": 0.2691427767276764, "learning_rate": 4.87016947129055e-05, "loss": 0.2155, "step": 10805 }, { "epoch": 0.19273713123818356, "grad_norm": 0.43507060408592224, "learning_rate": 4.87011995911847e-05, "loss": 0.2378, "step": 10806 }, { "epoch": 0.19275496735989725, "grad_norm": 0.2600294351577759, "learning_rate": 4.870070437758987e-05, "loss": 0.2423, "step": 10807 }, { "epoch": 0.19277280348161097, "grad_norm": 0.2781378924846649, "learning_rate": 4.870020907212293e-05, "loss": 0.146, "step": 10808 }, { "epoch": 0.19279063960332465, "grad_norm": 0.28758978843688965, "learning_rate": 4.869971367478578e-05, "loss": 0.2175, "step": 10809 }, { "epoch": 0.19280847572503834, "grad_norm": 0.23192574083805084, "learning_rate": 4.8699218185580364e-05, "loss": 0.1984, "step": 10810 }, { "epoch": 0.19282631184675203, "grad_norm": 0.3394330143928528, "learning_rate": 4.8698722604508585e-05, "loss": 0.2212, "step": 10811 }, { "epoch": 0.19284414796846575, "grad_norm": 0.2767375409603119, "learning_rate": 4.869822693157238e-05, "loss": 0.1984, "step": 10812 }, { "epoch": 0.19286198409017943, "grad_norm": 0.36856672167778015, "learning_rate": 4.869773116677365e-05, "loss": 0.2307, "step": 10813 }, { "epoch": 0.19287982021189312, "grad_norm": 0.2712098956108093, "learning_rate": 4.869723531011434e-05, "loss": 0.1482, "step": 10814 }, { "epoch": 0.1928976563336068, "grad_norm": 0.29187947511672974, "learning_rate": 4.8696739361596364e-05, "loss": 0.2319, "step": 10815 }, { "epoch": 0.19291549245532053, "grad_norm": 0.3189451992511749, "learning_rate": 4.8696243321221633e-05, "loss": 0.2065, "step": 10816 }, { "epoch": 0.1929333285770342, "grad_norm": 0.31867697834968567, "learning_rate": 4.869574718899208e-05, "loss": 0.1791, "step": 10817 }, { "epoch": 0.1929511646987479, "grad_norm": 0.36782655119895935, "learning_rate": 4.8695250964909634e-05, "loss": 0.2715, "step": 10818 }, { "epoch": 0.1929690008204616, "grad_norm": 0.44313058257102966, "learning_rate": 4.86947546489762e-05, "loss": 0.183, "step": 10819 }, { "epoch": 0.1929868369421753, "grad_norm": 0.29162415862083435, "learning_rate": 4.869425824119373e-05, "loss": 0.2108, "step": 10820 }, { "epoch": 0.193004673063889, "grad_norm": 0.27659785747528076, "learning_rate": 4.8693761741564116e-05, "loss": 0.1941, "step": 10821 }, { "epoch": 0.19302250918560268, "grad_norm": 0.5724294185638428, "learning_rate": 4.86932651500893e-05, "loss": 0.2551, "step": 10822 }, { "epoch": 0.19304034530731637, "grad_norm": 0.22013120353221893, "learning_rate": 4.869276846677121e-05, "loss": 0.1602, "step": 10823 }, { "epoch": 0.19305818142903008, "grad_norm": 0.34596380591392517, "learning_rate": 4.8692271691611755e-05, "loss": 0.2071, "step": 10824 }, { "epoch": 0.19307601755074377, "grad_norm": 0.274842232465744, "learning_rate": 4.869177482461288e-05, "loss": 0.1678, "step": 10825 }, { "epoch": 0.19309385367245746, "grad_norm": 0.26954323053359985, "learning_rate": 4.869127786577651e-05, "loss": 0.2212, "step": 10826 }, { "epoch": 0.19311168979417115, "grad_norm": 0.36622151732444763, "learning_rate": 4.869078081510455e-05, "loss": 0.2136, "step": 10827 }, { "epoch": 0.19312952591588484, "grad_norm": 0.1847706139087677, "learning_rate": 4.869028367259896e-05, "loss": 0.1542, "step": 10828 }, { "epoch": 0.19314736203759855, "grad_norm": 0.4318757653236389, "learning_rate": 4.868978643826163e-05, "loss": 0.1855, "step": 10829 }, { "epoch": 0.19316519815931224, "grad_norm": 0.30360129475593567, "learning_rate": 4.8689289112094515e-05, "loss": 0.222, "step": 10830 }, { "epoch": 0.19318303428102593, "grad_norm": 0.3232423663139343, "learning_rate": 4.8688791694099525e-05, "loss": 0.1796, "step": 10831 }, { "epoch": 0.19320087040273962, "grad_norm": 0.2598733901977539, "learning_rate": 4.868829418427861e-05, "loss": 0.2433, "step": 10832 }, { "epoch": 0.19321870652445333, "grad_norm": 0.23770968616008759, "learning_rate": 4.8687796582633673e-05, "loss": 0.2249, "step": 10833 }, { "epoch": 0.19323654264616702, "grad_norm": 0.21962356567382812, "learning_rate": 4.8687298889166655e-05, "loss": 0.1789, "step": 10834 }, { "epoch": 0.1932543787678807, "grad_norm": 0.26733145117759705, "learning_rate": 4.868680110387949e-05, "loss": 0.16, "step": 10835 }, { "epoch": 0.1932722148895944, "grad_norm": 0.3937084674835205, "learning_rate": 4.868630322677411e-05, "loss": 0.323, "step": 10836 }, { "epoch": 0.1932900510113081, "grad_norm": 0.26207637786865234, "learning_rate": 4.868580525785242e-05, "loss": 0.2092, "step": 10837 }, { "epoch": 0.1933078871330218, "grad_norm": 0.20126092433929443, "learning_rate": 4.868530719711638e-05, "loss": 0.1836, "step": 10838 }, { "epoch": 0.1933257232547355, "grad_norm": 0.2982984185218811, "learning_rate": 4.868480904456791e-05, "loss": 0.2397, "step": 10839 }, { "epoch": 0.19334355937644918, "grad_norm": 0.27005136013031006, "learning_rate": 4.868431080020893e-05, "loss": 0.233, "step": 10840 }, { "epoch": 0.1933613954981629, "grad_norm": 0.24865023791790009, "learning_rate": 4.868381246404139e-05, "loss": 0.1481, "step": 10841 }, { "epoch": 0.19337923161987658, "grad_norm": 0.3362480401992798, "learning_rate": 4.8683314036067205e-05, "loss": 0.268, "step": 10842 }, { "epoch": 0.19339706774159027, "grad_norm": 0.40264660120010376, "learning_rate": 4.868281551628833e-05, "loss": 0.2347, "step": 10843 }, { "epoch": 0.19341490386330396, "grad_norm": 0.21482892334461212, "learning_rate": 4.868231690470667e-05, "loss": 0.19, "step": 10844 }, { "epoch": 0.19343273998501767, "grad_norm": 0.2526087760925293, "learning_rate": 4.868181820132417e-05, "loss": 0.2067, "step": 10845 }, { "epoch": 0.19345057610673136, "grad_norm": 0.18698081374168396, "learning_rate": 4.868131940614277e-05, "loss": 0.1784, "step": 10846 }, { "epoch": 0.19346841222844505, "grad_norm": 0.23443618416786194, "learning_rate": 4.868082051916438e-05, "loss": 0.2057, "step": 10847 }, { "epoch": 0.19348624835015873, "grad_norm": 0.28047892451286316, "learning_rate": 4.8680321540390974e-05, "loss": 0.2106, "step": 10848 }, { "epoch": 0.19350408447187242, "grad_norm": 0.3395068645477295, "learning_rate": 4.8679822469824444e-05, "loss": 0.2003, "step": 10849 }, { "epoch": 0.19352192059358614, "grad_norm": 0.2478107362985611, "learning_rate": 4.867932330746675e-05, "loss": 0.2144, "step": 10850 }, { "epoch": 0.19353975671529983, "grad_norm": 0.2190067023038864, "learning_rate": 4.867882405331983e-05, "loss": 0.1752, "step": 10851 }, { "epoch": 0.19355759283701351, "grad_norm": 0.2671342194080353, "learning_rate": 4.867832470738559e-05, "loss": 0.2254, "step": 10852 }, { "epoch": 0.1935754289587272, "grad_norm": 0.30748844146728516, "learning_rate": 4.8677825269666e-05, "loss": 0.2, "step": 10853 }, { "epoch": 0.19359326508044092, "grad_norm": 0.20674552023410797, "learning_rate": 4.8677325740162974e-05, "loss": 0.1793, "step": 10854 }, { "epoch": 0.1936111012021546, "grad_norm": 0.32860302925109863, "learning_rate": 4.867682611887846e-05, "loss": 0.1456, "step": 10855 }, { "epoch": 0.1936289373238683, "grad_norm": 0.3131978213787079, "learning_rate": 4.8676326405814384e-05, "loss": 0.2181, "step": 10856 }, { "epoch": 0.19364677344558198, "grad_norm": 0.25286146998405457, "learning_rate": 4.86758266009727e-05, "loss": 0.2157, "step": 10857 }, { "epoch": 0.1936646095672957, "grad_norm": 0.36881959438323975, "learning_rate": 4.8675326704355325e-05, "loss": 0.3224, "step": 10858 }, { "epoch": 0.19368244568900939, "grad_norm": 0.23446522653102875, "learning_rate": 4.8674826715964216e-05, "loss": 0.1333, "step": 10859 }, { "epoch": 0.19370028181072307, "grad_norm": 0.24140134453773499, "learning_rate": 4.8674326635801294e-05, "loss": 0.225, "step": 10860 }, { "epoch": 0.19371811793243676, "grad_norm": 0.22906368970870972, "learning_rate": 4.867382646386851e-05, "loss": 0.165, "step": 10861 }, { "epoch": 0.19373595405415048, "grad_norm": 0.2945690155029297, "learning_rate": 4.867332620016779e-05, "loss": 0.2849, "step": 10862 }, { "epoch": 0.19375379017586417, "grad_norm": 0.23159514367580414, "learning_rate": 4.867282584470109e-05, "loss": 0.2187, "step": 10863 }, { "epoch": 0.19377162629757785, "grad_norm": 0.210427388548851, "learning_rate": 4.867232539747033e-05, "loss": 0.1433, "step": 10864 }, { "epoch": 0.19378946241929154, "grad_norm": 0.3701137602329254, "learning_rate": 4.867182485847747e-05, "loss": 0.1759, "step": 10865 }, { "epoch": 0.19380729854100526, "grad_norm": 0.3354681432247162, "learning_rate": 4.8671324227724444e-05, "loss": 0.2063, "step": 10866 }, { "epoch": 0.19382513466271895, "grad_norm": 0.2675134837627411, "learning_rate": 4.867082350521318e-05, "loss": 0.2005, "step": 10867 }, { "epoch": 0.19384297078443263, "grad_norm": 0.29437896609306335, "learning_rate": 4.867032269094563e-05, "loss": 0.2271, "step": 10868 }, { "epoch": 0.19386080690614632, "grad_norm": 0.304161936044693, "learning_rate": 4.866982178492374e-05, "loss": 0.1886, "step": 10869 }, { "epoch": 0.19387864302786, "grad_norm": 0.28409087657928467, "learning_rate": 4.866932078714944e-05, "loss": 0.1967, "step": 10870 }, { "epoch": 0.19389647914957372, "grad_norm": 0.2600594460964203, "learning_rate": 4.866881969762468e-05, "loss": 0.1916, "step": 10871 }, { "epoch": 0.1939143152712874, "grad_norm": 0.5094261169433594, "learning_rate": 4.86683185163514e-05, "loss": 0.2283, "step": 10872 }, { "epoch": 0.1939321513930011, "grad_norm": 0.24648918211460114, "learning_rate": 4.8667817243331534e-05, "loss": 0.1862, "step": 10873 }, { "epoch": 0.1939499875147148, "grad_norm": 0.2584965229034424, "learning_rate": 4.8667315878567044e-05, "loss": 0.1896, "step": 10874 }, { "epoch": 0.1939678236364285, "grad_norm": 0.2909288704395294, "learning_rate": 4.866681442205986e-05, "loss": 0.2344, "step": 10875 }, { "epoch": 0.1939856597581422, "grad_norm": 0.24164675176143646, "learning_rate": 4.866631287381193e-05, "loss": 0.1786, "step": 10876 }, { "epoch": 0.19400349587985588, "grad_norm": 0.28780633211135864, "learning_rate": 4.86658112338252e-05, "loss": 0.1794, "step": 10877 }, { "epoch": 0.19402133200156957, "grad_norm": 0.285815954208374, "learning_rate": 4.866530950210161e-05, "loss": 0.1922, "step": 10878 }, { "epoch": 0.19403916812328328, "grad_norm": 0.26834774017333984, "learning_rate": 4.86648076786431e-05, "loss": 0.2192, "step": 10879 }, { "epoch": 0.19405700424499697, "grad_norm": 0.2725721597671509, "learning_rate": 4.866430576345163e-05, "loss": 0.1714, "step": 10880 }, { "epoch": 0.19407484036671066, "grad_norm": 0.33946189284324646, "learning_rate": 4.866380375652914e-05, "loss": 0.24, "step": 10881 }, { "epoch": 0.19409267648842435, "grad_norm": 0.241267591714859, "learning_rate": 4.8663301657877556e-05, "loss": 0.2099, "step": 10882 }, { "epoch": 0.19411051261013806, "grad_norm": 0.3695162236690521, "learning_rate": 4.866279946749886e-05, "loss": 0.2413, "step": 10883 }, { "epoch": 0.19412834873185175, "grad_norm": 0.29236093163490295, "learning_rate": 4.8662297185394975e-05, "loss": 0.1886, "step": 10884 }, { "epoch": 0.19414618485356544, "grad_norm": 0.36731311678886414, "learning_rate": 4.866179481156785e-05, "loss": 0.1874, "step": 10885 }, { "epoch": 0.19416402097527913, "grad_norm": 0.22661186754703522, "learning_rate": 4.8661292346019436e-05, "loss": 0.1745, "step": 10886 }, { "epoch": 0.19418185709699284, "grad_norm": 0.29941847920417786, "learning_rate": 4.866078978875168e-05, "loss": 0.1862, "step": 10887 }, { "epoch": 0.19419969321870653, "grad_norm": 0.22813382744789124, "learning_rate": 4.866028713976654e-05, "loss": 0.2094, "step": 10888 }, { "epoch": 0.19421752934042022, "grad_norm": 0.34261658787727356, "learning_rate": 4.865978439906595e-05, "loss": 0.2389, "step": 10889 }, { "epoch": 0.1942353654621339, "grad_norm": 0.2346302717924118, "learning_rate": 4.865928156665186e-05, "loss": 0.1827, "step": 10890 }, { "epoch": 0.1942532015838476, "grad_norm": 0.23762480914592743, "learning_rate": 4.865877864252622e-05, "loss": 0.1724, "step": 10891 }, { "epoch": 0.1942710377055613, "grad_norm": 0.2964513301849365, "learning_rate": 4.865827562669099e-05, "loss": 0.1865, "step": 10892 }, { "epoch": 0.194288873827275, "grad_norm": 0.3174099624156952, "learning_rate": 4.8657772519148117e-05, "loss": 0.2023, "step": 10893 }, { "epoch": 0.1943067099489887, "grad_norm": 0.36556562781333923, "learning_rate": 4.865726931989954e-05, "loss": 0.2465, "step": 10894 }, { "epoch": 0.19432454607070238, "grad_norm": 0.255138099193573, "learning_rate": 4.865676602894721e-05, "loss": 0.2122, "step": 10895 }, { "epoch": 0.1943423821924161, "grad_norm": 0.3078806400299072, "learning_rate": 4.865626264629309e-05, "loss": 0.2798, "step": 10896 }, { "epoch": 0.19436021831412978, "grad_norm": 0.223884716629982, "learning_rate": 4.865575917193913e-05, "loss": 0.1669, "step": 10897 }, { "epoch": 0.19437805443584347, "grad_norm": 0.3409411907196045, "learning_rate": 4.865525560588727e-05, "loss": 0.2308, "step": 10898 }, { "epoch": 0.19439589055755715, "grad_norm": 0.30786609649658203, "learning_rate": 4.865475194813947e-05, "loss": 0.2074, "step": 10899 }, { "epoch": 0.19441372667927087, "grad_norm": 0.22870858013629913, "learning_rate": 4.8654248198697684e-05, "loss": 0.2159, "step": 10900 }, { "epoch": 0.19443156280098456, "grad_norm": 0.27009400725364685, "learning_rate": 4.865374435756386e-05, "loss": 0.1898, "step": 10901 }, { "epoch": 0.19444939892269825, "grad_norm": 0.22941553592681885, "learning_rate": 4.8653240424739955e-05, "loss": 0.183, "step": 10902 }, { "epoch": 0.19446723504441193, "grad_norm": 0.28045961260795593, "learning_rate": 4.8652736400227914e-05, "loss": 0.1481, "step": 10903 }, { "epoch": 0.19448507116612565, "grad_norm": 0.2851707637310028, "learning_rate": 4.8652232284029706e-05, "loss": 0.2099, "step": 10904 }, { "epoch": 0.19450290728783934, "grad_norm": 0.3628099262714386, "learning_rate": 4.8651728076147276e-05, "loss": 0.225, "step": 10905 }, { "epoch": 0.19452074340955303, "grad_norm": 0.24372237920761108, "learning_rate": 4.865122377658257e-05, "loss": 0.1887, "step": 10906 }, { "epoch": 0.19453857953126671, "grad_norm": 0.30829527974128723, "learning_rate": 4.8650719385337565e-05, "loss": 0.2443, "step": 10907 }, { "epoch": 0.1945564156529804, "grad_norm": 0.3270097076892853, "learning_rate": 4.865021490241419e-05, "loss": 0.2493, "step": 10908 }, { "epoch": 0.19457425177469412, "grad_norm": 0.2885185182094574, "learning_rate": 4.8649710327814426e-05, "loss": 0.2135, "step": 10909 }, { "epoch": 0.1945920878964078, "grad_norm": 0.31050482392311096, "learning_rate": 4.86492056615402e-05, "loss": 0.2236, "step": 10910 }, { "epoch": 0.1946099240181215, "grad_norm": 0.30919358134269714, "learning_rate": 4.8648700903593505e-05, "loss": 0.2012, "step": 10911 }, { "epoch": 0.19462776013983518, "grad_norm": 0.25236862897872925, "learning_rate": 4.8648196053976255e-05, "loss": 0.2225, "step": 10912 }, { "epoch": 0.1946455962615489, "grad_norm": 0.23284150660037994, "learning_rate": 4.864769111269045e-05, "loss": 0.1403, "step": 10913 }, { "epoch": 0.19466343238326259, "grad_norm": 0.3003937304019928, "learning_rate": 4.8647186079738014e-05, "loss": 0.2178, "step": 10914 }, { "epoch": 0.19468126850497627, "grad_norm": 0.363349586725235, "learning_rate": 4.864668095512092e-05, "loss": 0.1869, "step": 10915 }, { "epoch": 0.19469910462668996, "grad_norm": 0.28343427181243896, "learning_rate": 4.8646175738841124e-05, "loss": 0.1852, "step": 10916 }, { "epoch": 0.19471694074840368, "grad_norm": 0.24704958498477936, "learning_rate": 4.864567043090059e-05, "loss": 0.2422, "step": 10917 }, { "epoch": 0.19473477687011737, "grad_norm": 0.2634768486022949, "learning_rate": 4.864516503130126e-05, "loss": 0.2002, "step": 10918 }, { "epoch": 0.19475261299183105, "grad_norm": 0.33541154861450195, "learning_rate": 4.8644659540045113e-05, "loss": 0.2285, "step": 10919 }, { "epoch": 0.19477044911354474, "grad_norm": 0.21996474266052246, "learning_rate": 4.864415395713409e-05, "loss": 0.1815, "step": 10920 }, { "epoch": 0.19478828523525846, "grad_norm": 0.5477680563926697, "learning_rate": 4.8643648282570165e-05, "loss": 0.2558, "step": 10921 }, { "epoch": 0.19480612135697215, "grad_norm": 0.33574825525283813, "learning_rate": 4.8643142516355286e-05, "loss": 0.1647, "step": 10922 }, { "epoch": 0.19482395747868583, "grad_norm": 0.28537821769714355, "learning_rate": 4.864263665849143e-05, "loss": 0.1943, "step": 10923 }, { "epoch": 0.19484179360039952, "grad_norm": 0.3535176217556, "learning_rate": 4.864213070898055e-05, "loss": 0.1775, "step": 10924 }, { "epoch": 0.19485962972211324, "grad_norm": 0.28527507185935974, "learning_rate": 4.864162466782459e-05, "loss": 0.2236, "step": 10925 }, { "epoch": 0.19487746584382692, "grad_norm": 0.3154849410057068, "learning_rate": 4.864111853502554e-05, "loss": 0.2291, "step": 10926 }, { "epoch": 0.1948953019655406, "grad_norm": 0.3148837089538574, "learning_rate": 4.8640612310585355e-05, "loss": 0.1943, "step": 10927 }, { "epoch": 0.1949131380872543, "grad_norm": 0.2947532534599304, "learning_rate": 4.864010599450598e-05, "loss": 0.2268, "step": 10928 }, { "epoch": 0.194930974208968, "grad_norm": 0.2506810128688812, "learning_rate": 4.863959958678939e-05, "loss": 0.2585, "step": 10929 }, { "epoch": 0.1949488103306817, "grad_norm": 0.31015411019325256, "learning_rate": 4.863909308743755e-05, "loss": 0.1536, "step": 10930 }, { "epoch": 0.1949666464523954, "grad_norm": 0.19003814458847046, "learning_rate": 4.863858649645242e-05, "loss": 0.1376, "step": 10931 }, { "epoch": 0.19498448257410908, "grad_norm": 0.23797856271266937, "learning_rate": 4.863807981383597e-05, "loss": 0.2068, "step": 10932 }, { "epoch": 0.19500231869582277, "grad_norm": 0.27664363384246826, "learning_rate": 4.863757303959015e-05, "loss": 0.1614, "step": 10933 }, { "epoch": 0.19502015481753648, "grad_norm": 0.2264014333486557, "learning_rate": 4.863706617371693e-05, "loss": 0.1998, "step": 10934 }, { "epoch": 0.19503799093925017, "grad_norm": 0.31272202730178833, "learning_rate": 4.863655921621829e-05, "loss": 0.196, "step": 10935 }, { "epoch": 0.19505582706096386, "grad_norm": 0.21604971587657928, "learning_rate": 4.863605216709617e-05, "loss": 0.1825, "step": 10936 }, { "epoch": 0.19507366318267755, "grad_norm": 0.27598392963409424, "learning_rate": 4.863554502635256e-05, "loss": 0.2422, "step": 10937 }, { "epoch": 0.19509149930439126, "grad_norm": 0.30639636516571045, "learning_rate": 4.8635037793989405e-05, "loss": 0.2215, "step": 10938 }, { "epoch": 0.19510933542610495, "grad_norm": 0.27988162636756897, "learning_rate": 4.8634530470008674e-05, "loss": 0.2001, "step": 10939 }, { "epoch": 0.19512717154781864, "grad_norm": 0.35650813579559326, "learning_rate": 4.863402305441236e-05, "loss": 0.2067, "step": 10940 }, { "epoch": 0.19514500766953233, "grad_norm": 0.2776621878147125, "learning_rate": 4.863351554720239e-05, "loss": 0.2096, "step": 10941 }, { "epoch": 0.19516284379124604, "grad_norm": 0.2894197106361389, "learning_rate": 4.863300794838076e-05, "loss": 0.234, "step": 10942 }, { "epoch": 0.19518067991295973, "grad_norm": 0.27866825461387634, "learning_rate": 4.863250025794943e-05, "loss": 0.1422, "step": 10943 }, { "epoch": 0.19519851603467342, "grad_norm": 0.24755118787288666, "learning_rate": 4.8631992475910363e-05, "loss": 0.1817, "step": 10944 }, { "epoch": 0.1952163521563871, "grad_norm": 0.28582048416137695, "learning_rate": 4.863148460226554e-05, "loss": 0.1802, "step": 10945 }, { "epoch": 0.19523418827810082, "grad_norm": 0.3617633581161499, "learning_rate": 4.863097663701691e-05, "loss": 0.2156, "step": 10946 }, { "epoch": 0.1952520243998145, "grad_norm": 0.35809874534606934, "learning_rate": 4.8630468580166456e-05, "loss": 0.2128, "step": 10947 }, { "epoch": 0.1952698605215282, "grad_norm": 0.3512871265411377, "learning_rate": 4.862996043171614e-05, "loss": 0.276, "step": 10948 }, { "epoch": 0.1952876966432419, "grad_norm": 0.26752448081970215, "learning_rate": 4.8629452191667945e-05, "loss": 0.1536, "step": 10949 }, { "epoch": 0.19530553276495558, "grad_norm": 0.27539655566215515, "learning_rate": 4.8628943860023826e-05, "loss": 0.2174, "step": 10950 }, { "epoch": 0.1953233688866693, "grad_norm": 0.2687312364578247, "learning_rate": 4.862843543678576e-05, "loss": 0.1843, "step": 10951 }, { "epoch": 0.19534120500838298, "grad_norm": 0.3138580918312073, "learning_rate": 4.8627926921955715e-05, "loss": 0.2082, "step": 10952 }, { "epoch": 0.19535904113009667, "grad_norm": 0.3139511048793793, "learning_rate": 4.862741831553567e-05, "loss": 0.2334, "step": 10953 }, { "epoch": 0.19537687725181035, "grad_norm": 0.28194954991340637, "learning_rate": 4.8626909617527586e-05, "loss": 0.234, "step": 10954 }, { "epoch": 0.19539471337352407, "grad_norm": 0.2429482489824295, "learning_rate": 4.862640082793344e-05, "loss": 0.2133, "step": 10955 }, { "epoch": 0.19541254949523776, "grad_norm": 0.29499930143356323, "learning_rate": 4.862589194675521e-05, "loss": 0.2284, "step": 10956 }, { "epoch": 0.19543038561695145, "grad_norm": 0.2928573787212372, "learning_rate": 4.862538297399486e-05, "loss": 0.2361, "step": 10957 }, { "epoch": 0.19544822173866513, "grad_norm": 0.31073400378227234, "learning_rate": 4.862487390965436e-05, "loss": 0.1667, "step": 10958 }, { "epoch": 0.19546605786037885, "grad_norm": 0.3647032082080841, "learning_rate": 4.862436475373569e-05, "loss": 0.2649, "step": 10959 }, { "epoch": 0.19548389398209254, "grad_norm": 0.26545149087905884, "learning_rate": 4.8623855506240826e-05, "loss": 0.2153, "step": 10960 }, { "epoch": 0.19550173010380623, "grad_norm": 0.2558302581310272, "learning_rate": 4.862334616717175e-05, "loss": 0.1868, "step": 10961 }, { "epoch": 0.19551956622551991, "grad_norm": 0.23132599890232086, "learning_rate": 4.862283673653041e-05, "loss": 0.1707, "step": 10962 }, { "epoch": 0.19553740234723363, "grad_norm": 0.3207215368747711, "learning_rate": 4.8622327214318795e-05, "loss": 0.2362, "step": 10963 }, { "epoch": 0.19555523846894732, "grad_norm": 0.29617559909820557, "learning_rate": 4.862181760053889e-05, "loss": 0.189, "step": 10964 }, { "epoch": 0.195573074590661, "grad_norm": 0.22181539237499237, "learning_rate": 4.8621307895192646e-05, "loss": 0.1409, "step": 10965 }, { "epoch": 0.1955909107123747, "grad_norm": 0.3290722370147705, "learning_rate": 4.8620798098282075e-05, "loss": 0.2132, "step": 10966 }, { "epoch": 0.1956087468340884, "grad_norm": 0.23870375752449036, "learning_rate": 4.862028820980912e-05, "loss": 0.2184, "step": 10967 }, { "epoch": 0.1956265829558021, "grad_norm": 0.2592199146747589, "learning_rate": 4.861977822977577e-05, "loss": 0.2276, "step": 10968 }, { "epoch": 0.19564441907751579, "grad_norm": 0.2996153235435486, "learning_rate": 4.8619268158184e-05, "loss": 0.244, "step": 10969 }, { "epoch": 0.19566225519922947, "grad_norm": 0.2985076308250427, "learning_rate": 4.861875799503579e-05, "loss": 0.156, "step": 10970 }, { "epoch": 0.19568009132094316, "grad_norm": 0.28612715005874634, "learning_rate": 4.8618247740333125e-05, "loss": 0.1653, "step": 10971 }, { "epoch": 0.19569792744265688, "grad_norm": 0.23777885735034943, "learning_rate": 4.861773739407796e-05, "loss": 0.1621, "step": 10972 }, { "epoch": 0.19571576356437057, "grad_norm": 0.2995002269744873, "learning_rate": 4.86172269562723e-05, "loss": 0.2022, "step": 10973 }, { "epoch": 0.19573359968608425, "grad_norm": 0.4907090663909912, "learning_rate": 4.86167164269181e-05, "loss": 0.2436, "step": 10974 }, { "epoch": 0.19575143580779794, "grad_norm": 0.2928963601589203, "learning_rate": 4.861620580601736e-05, "loss": 0.2402, "step": 10975 }, { "epoch": 0.19576927192951166, "grad_norm": 0.30040040612220764, "learning_rate": 4.8615695093572044e-05, "loss": 0.2323, "step": 10976 }, { "epoch": 0.19578710805122534, "grad_norm": 0.3200577199459076, "learning_rate": 4.861518428958414e-05, "loss": 0.1874, "step": 10977 }, { "epoch": 0.19580494417293903, "grad_norm": 0.3254393935203552, "learning_rate": 4.8614673394055624e-05, "loss": 0.2132, "step": 10978 }, { "epoch": 0.19582278029465272, "grad_norm": 0.3281918466091156, "learning_rate": 4.861416240698848e-05, "loss": 0.231, "step": 10979 }, { "epoch": 0.19584061641636644, "grad_norm": 0.2855898439884186, "learning_rate": 4.8613651328384676e-05, "loss": 0.2164, "step": 10980 }, { "epoch": 0.19585845253808012, "grad_norm": 0.3017638325691223, "learning_rate": 4.861314015824622e-05, "loss": 0.2394, "step": 10981 }, { "epoch": 0.1958762886597938, "grad_norm": 0.24240683019161224, "learning_rate": 4.8612628896575063e-05, "loss": 0.2354, "step": 10982 }, { "epoch": 0.1958941247815075, "grad_norm": 0.25195175409317017, "learning_rate": 4.861211754337321e-05, "loss": 0.2011, "step": 10983 }, { "epoch": 0.19591196090322122, "grad_norm": 0.2800824046134949, "learning_rate": 4.861160609864263e-05, "loss": 0.2427, "step": 10984 }, { "epoch": 0.1959297970249349, "grad_norm": 0.24269692599773407, "learning_rate": 4.8611094562385306e-05, "loss": 0.1372, "step": 10985 }, { "epoch": 0.1959476331466486, "grad_norm": 0.23989631235599518, "learning_rate": 4.8610582934603234e-05, "loss": 0.2211, "step": 10986 }, { "epoch": 0.19596546926836228, "grad_norm": 0.30193716287612915, "learning_rate": 4.861007121529838e-05, "loss": 0.216, "step": 10987 }, { "epoch": 0.19598330539007597, "grad_norm": 0.45588362216949463, "learning_rate": 4.860955940447274e-05, "loss": 0.163, "step": 10988 }, { "epoch": 0.19600114151178968, "grad_norm": 0.2539214789867401, "learning_rate": 4.860904750212829e-05, "loss": 0.1891, "step": 10989 }, { "epoch": 0.19601897763350337, "grad_norm": 0.2454519271850586, "learning_rate": 4.8608535508267015e-05, "loss": 0.1247, "step": 10990 }, { "epoch": 0.19603681375521706, "grad_norm": 0.3297535479068756, "learning_rate": 4.860802342289091e-05, "loss": 0.2363, "step": 10991 }, { "epoch": 0.19605464987693075, "grad_norm": 0.2104528844356537, "learning_rate": 4.8607511246001944e-05, "loss": 0.1695, "step": 10992 }, { "epoch": 0.19607248599864446, "grad_norm": 0.21681469678878784, "learning_rate": 4.860699897760212e-05, "loss": 0.2314, "step": 10993 }, { "epoch": 0.19609032212035815, "grad_norm": 0.29959338903427124, "learning_rate": 4.860648661769341e-05, "loss": 0.1895, "step": 10994 }, { "epoch": 0.19610815824207184, "grad_norm": 0.28645989298820496, "learning_rate": 4.86059741662778e-05, "loss": 0.2517, "step": 10995 }, { "epoch": 0.19612599436378553, "grad_norm": 0.3185204267501831, "learning_rate": 4.860546162335728e-05, "loss": 0.1633, "step": 10996 }, { "epoch": 0.19614383048549924, "grad_norm": 0.24247121810913086, "learning_rate": 4.8604948988933854e-05, "loss": 0.1895, "step": 10997 }, { "epoch": 0.19616166660721293, "grad_norm": 0.34270310401916504, "learning_rate": 4.860443626300948e-05, "loss": 0.2418, "step": 10998 }, { "epoch": 0.19617950272892662, "grad_norm": 0.2494347244501114, "learning_rate": 4.8603923445586154e-05, "loss": 0.1486, "step": 10999 }, { "epoch": 0.1961973388506403, "grad_norm": 0.4187646806240082, "learning_rate": 4.860341053666587e-05, "loss": 0.1794, "step": 11000 }, { "epoch": 0.1961973388506403, "eval_loss": 0.19454918801784515, "eval_runtime": 107.5552, "eval_samples_per_second": 9.521, "eval_steps_per_second": 1.59, "step": 11000 }, { "epoch": 0.19621517497235402, "grad_norm": 0.25480276346206665, "learning_rate": 4.860289753625062e-05, "loss": 0.2163, "step": 11001 }, { "epoch": 0.1962330110940677, "grad_norm": 0.2538530230522156, "learning_rate": 4.860238444434239e-05, "loss": 0.2152, "step": 11002 }, { "epoch": 0.1962508472157814, "grad_norm": 0.32229483127593994, "learning_rate": 4.8601871260943164e-05, "loss": 0.1846, "step": 11003 }, { "epoch": 0.1962686833374951, "grad_norm": 0.2602200210094452, "learning_rate": 4.860135798605492e-05, "loss": 0.2004, "step": 11004 }, { "epoch": 0.1962865194592088, "grad_norm": 0.2437405288219452, "learning_rate": 4.860084461967967e-05, "loss": 0.1985, "step": 11005 }, { "epoch": 0.1963043555809225, "grad_norm": 0.23054543137550354, "learning_rate": 4.8600331161819405e-05, "loss": 0.2062, "step": 11006 }, { "epoch": 0.19632219170263618, "grad_norm": 0.36326172947883606, "learning_rate": 4.8599817612476095e-05, "loss": 0.2494, "step": 11007 }, { "epoch": 0.19634002782434987, "grad_norm": 0.34366416931152344, "learning_rate": 4.859930397165174e-05, "loss": 0.2261, "step": 11008 }, { "epoch": 0.19635786394606355, "grad_norm": 0.2851044535636902, "learning_rate": 4.8598790239348335e-05, "loss": 0.1639, "step": 11009 }, { "epoch": 0.19637570006777727, "grad_norm": 0.2594095766544342, "learning_rate": 4.859827641556787e-05, "loss": 0.1978, "step": 11010 }, { "epoch": 0.19639353618949096, "grad_norm": 0.23761868476867676, "learning_rate": 4.859776250031233e-05, "loss": 0.2054, "step": 11011 }, { "epoch": 0.19641137231120465, "grad_norm": 0.26732778549194336, "learning_rate": 4.859724849358371e-05, "loss": 0.2, "step": 11012 }, { "epoch": 0.19642920843291833, "grad_norm": 0.29227781295776367, "learning_rate": 4.859673439538401e-05, "loss": 0.1499, "step": 11013 }, { "epoch": 0.19644704455463205, "grad_norm": 0.21788150072097778, "learning_rate": 4.8596220205715214e-05, "loss": 0.1688, "step": 11014 }, { "epoch": 0.19646488067634574, "grad_norm": 0.3312310576438904, "learning_rate": 4.859570592457932e-05, "loss": 0.2355, "step": 11015 }, { "epoch": 0.19648271679805943, "grad_norm": 0.27505844831466675, "learning_rate": 4.859519155197832e-05, "loss": 0.2236, "step": 11016 }, { "epoch": 0.1965005529197731, "grad_norm": 0.4549323320388794, "learning_rate": 4.859467708791421e-05, "loss": 0.1763, "step": 11017 }, { "epoch": 0.19651838904148683, "grad_norm": 0.33471062779426575, "learning_rate": 4.859416253238898e-05, "loss": 0.1952, "step": 11018 }, { "epoch": 0.19653622516320052, "grad_norm": 0.2178431898355484, "learning_rate": 4.859364788540463e-05, "loss": 0.2121, "step": 11019 }, { "epoch": 0.1965540612849142, "grad_norm": 0.2079540640115738, "learning_rate": 4.859313314696315e-05, "loss": 0.1889, "step": 11020 }, { "epoch": 0.1965718974066279, "grad_norm": 0.2850393056869507, "learning_rate": 4.859261831706653e-05, "loss": 0.1715, "step": 11021 }, { "epoch": 0.1965897335283416, "grad_norm": 0.3005395531654358, "learning_rate": 4.8592103395716785e-05, "loss": 0.2188, "step": 11022 }, { "epoch": 0.1966075696500553, "grad_norm": 0.2339594066143036, "learning_rate": 4.859158838291589e-05, "loss": 0.1895, "step": 11023 }, { "epoch": 0.19662540577176899, "grad_norm": 0.22571700811386108, "learning_rate": 4.859107327866585e-05, "loss": 0.1794, "step": 11024 }, { "epoch": 0.19664324189348267, "grad_norm": 0.23732657730579376, "learning_rate": 4.859055808296867e-05, "loss": 0.2179, "step": 11025 }, { "epoch": 0.1966610780151964, "grad_norm": 0.23061764240264893, "learning_rate": 4.859004279582633e-05, "loss": 0.184, "step": 11026 }, { "epoch": 0.19667891413691008, "grad_norm": 0.34119805693626404, "learning_rate": 4.858952741724084e-05, "loss": 0.1919, "step": 11027 }, { "epoch": 0.19669675025862376, "grad_norm": 0.270794540643692, "learning_rate": 4.8589011947214206e-05, "loss": 0.2249, "step": 11028 }, { "epoch": 0.19671458638033745, "grad_norm": 0.3803497552871704, "learning_rate": 4.858849638574839e-05, "loss": 0.3063, "step": 11029 }, { "epoch": 0.19673242250205114, "grad_norm": 0.24121667444705963, "learning_rate": 4.858798073284544e-05, "loss": 0.2079, "step": 11030 }, { "epoch": 0.19675025862376486, "grad_norm": 0.2138267159461975, "learning_rate": 4.8587464988507314e-05, "loss": 0.1569, "step": 11031 }, { "epoch": 0.19676809474547854, "grad_norm": 0.24966634809970856, "learning_rate": 4.858694915273603e-05, "loss": 0.2027, "step": 11032 }, { "epoch": 0.19678593086719223, "grad_norm": 0.2865935266017914, "learning_rate": 4.8586433225533584e-05, "loss": 0.2174, "step": 11033 }, { "epoch": 0.19680376698890592, "grad_norm": 0.24785466492176056, "learning_rate": 4.858591720690198e-05, "loss": 0.2075, "step": 11034 }, { "epoch": 0.19682160311061964, "grad_norm": 0.36719441413879395, "learning_rate": 4.858540109684321e-05, "loss": 0.2419, "step": 11035 }, { "epoch": 0.19683943923233332, "grad_norm": 0.25197044014930725, "learning_rate": 4.8584884895359286e-05, "loss": 0.1649, "step": 11036 }, { "epoch": 0.196857275354047, "grad_norm": 0.24637635052204132, "learning_rate": 4.858436860245219e-05, "loss": 0.2739, "step": 11037 }, { "epoch": 0.1968751114757607, "grad_norm": 0.26137208938598633, "learning_rate": 4.858385221812395e-05, "loss": 0.2079, "step": 11038 }, { "epoch": 0.19689294759747442, "grad_norm": 0.3005337715148926, "learning_rate": 4.858333574237654e-05, "loss": 0.2485, "step": 11039 }, { "epoch": 0.1969107837191881, "grad_norm": 0.2986501157283783, "learning_rate": 4.858281917521198e-05, "loss": 0.2404, "step": 11040 }, { "epoch": 0.1969286198409018, "grad_norm": 0.2938983142375946, "learning_rate": 4.858230251663227e-05, "loss": 0.1532, "step": 11041 }, { "epoch": 0.19694645596261548, "grad_norm": 0.3248229920864105, "learning_rate": 4.858178576663941e-05, "loss": 0.2509, "step": 11042 }, { "epoch": 0.1969642920843292, "grad_norm": 0.22464273869991302, "learning_rate": 4.85812689252354e-05, "loss": 0.1826, "step": 11043 }, { "epoch": 0.19698212820604288, "grad_norm": 0.3778996467590332, "learning_rate": 4.858075199242225e-05, "loss": 0.1933, "step": 11044 }, { "epoch": 0.19699996432775657, "grad_norm": 0.2337600588798523, "learning_rate": 4.8580234968201965e-05, "loss": 0.1796, "step": 11045 }, { "epoch": 0.19701780044947026, "grad_norm": 0.35485145449638367, "learning_rate": 4.857971785257654e-05, "loss": 0.2053, "step": 11046 }, { "epoch": 0.19703563657118398, "grad_norm": 0.27883970737457275, "learning_rate": 4.8579200645547984e-05, "loss": 0.2144, "step": 11047 }, { "epoch": 0.19705347269289766, "grad_norm": 0.25961363315582275, "learning_rate": 4.8578683347118306e-05, "loss": 0.2338, "step": 11048 }, { "epoch": 0.19707130881461135, "grad_norm": 0.28233227133750916, "learning_rate": 4.8578165957289504e-05, "loss": 0.2079, "step": 11049 }, { "epoch": 0.19708914493632504, "grad_norm": 0.3279772400856018, "learning_rate": 4.8577648476063584e-05, "loss": 0.2071, "step": 11050 }, { "epoch": 0.19710698105803873, "grad_norm": 0.3648107647895813, "learning_rate": 4.857713090344256e-05, "loss": 0.1887, "step": 11051 }, { "epoch": 0.19712481717975244, "grad_norm": 0.3489072322845459, "learning_rate": 4.857661323942843e-05, "loss": 0.2542, "step": 11052 }, { "epoch": 0.19714265330146613, "grad_norm": 0.3205341398715973, "learning_rate": 4.857609548402321e-05, "loss": 0.199, "step": 11053 }, { "epoch": 0.19716048942317982, "grad_norm": 0.22784355282783508, "learning_rate": 4.85755776372289e-05, "loss": 0.2137, "step": 11054 }, { "epoch": 0.1971783255448935, "grad_norm": 0.2208159863948822, "learning_rate": 4.85750596990475e-05, "loss": 0.1949, "step": 11055 }, { "epoch": 0.19719616166660722, "grad_norm": 0.2629421353340149, "learning_rate": 4.857454166948103e-05, "loss": 0.2038, "step": 11056 }, { "epoch": 0.1972139977883209, "grad_norm": 0.4527952969074249, "learning_rate": 4.85740235485315e-05, "loss": 0.2401, "step": 11057 }, { "epoch": 0.1972318339100346, "grad_norm": 0.27207663655281067, "learning_rate": 4.85735053362009e-05, "loss": 0.1897, "step": 11058 }, { "epoch": 0.1972496700317483, "grad_norm": 0.2801261842250824, "learning_rate": 4.8572987032491264e-05, "loss": 0.1671, "step": 11059 }, { "epoch": 0.197267506153462, "grad_norm": 0.2612663507461548, "learning_rate": 4.857246863740458e-05, "loss": 0.1913, "step": 11060 }, { "epoch": 0.1972853422751757, "grad_norm": 0.32024240493774414, "learning_rate": 4.857195015094287e-05, "loss": 0.2919, "step": 11061 }, { "epoch": 0.19730317839688938, "grad_norm": 0.24168513715267181, "learning_rate": 4.857143157310814e-05, "loss": 0.1842, "step": 11062 }, { "epoch": 0.19732101451860307, "grad_norm": 0.551415205001831, "learning_rate": 4.85709129039024e-05, "loss": 0.2532, "step": 11063 }, { "epoch": 0.19733885064031678, "grad_norm": 0.26950013637542725, "learning_rate": 4.857039414332766e-05, "loss": 0.1991, "step": 11064 }, { "epoch": 0.19735668676203047, "grad_norm": 0.31064918637275696, "learning_rate": 4.8569875291385936e-05, "loss": 0.19, "step": 11065 }, { "epoch": 0.19737452288374416, "grad_norm": 0.27538514137268066, "learning_rate": 4.856935634807923e-05, "loss": 0.1863, "step": 11066 }, { "epoch": 0.19739235900545785, "grad_norm": 0.27919042110443115, "learning_rate": 4.856883731340955e-05, "loss": 0.2486, "step": 11067 }, { "epoch": 0.19741019512717156, "grad_norm": 0.27432864904403687, "learning_rate": 4.8568318187378924e-05, "loss": 0.211, "step": 11068 }, { "epoch": 0.19742803124888525, "grad_norm": 0.2728572189807892, "learning_rate": 4.856779896998936e-05, "loss": 0.1357, "step": 11069 }, { "epoch": 0.19744586737059894, "grad_norm": 0.2934436500072479, "learning_rate": 4.856727966124286e-05, "loss": 0.2306, "step": 11070 }, { "epoch": 0.19746370349231263, "grad_norm": 0.2735760509967804, "learning_rate": 4.856676026114145e-05, "loss": 0.2001, "step": 11071 }, { "epoch": 0.1974815396140263, "grad_norm": 0.28950780630111694, "learning_rate": 4.8566240769687135e-05, "loss": 0.2104, "step": 11072 }, { "epoch": 0.19749937573574003, "grad_norm": 0.2395309954881668, "learning_rate": 4.856572118688193e-05, "loss": 0.2276, "step": 11073 }, { "epoch": 0.19751721185745372, "grad_norm": 0.18597102165222168, "learning_rate": 4.856520151272785e-05, "loss": 0.1351, "step": 11074 }, { "epoch": 0.1975350479791674, "grad_norm": 0.3088156580924988, "learning_rate": 4.8564681747226914e-05, "loss": 0.174, "step": 11075 }, { "epoch": 0.1975528841008811, "grad_norm": 0.23534858226776123, "learning_rate": 4.856416189038113e-05, "loss": 0.1774, "step": 11076 }, { "epoch": 0.1975707202225948, "grad_norm": 0.2968669831752777, "learning_rate": 4.8563641942192514e-05, "loss": 0.2016, "step": 11077 }, { "epoch": 0.1975885563443085, "grad_norm": 0.2954857051372528, "learning_rate": 4.856312190266309e-05, "loss": 0.2246, "step": 11078 }, { "epoch": 0.19760639246602218, "grad_norm": 0.2747255265712738, "learning_rate": 4.856260177179486e-05, "loss": 0.1979, "step": 11079 }, { "epoch": 0.19762422858773587, "grad_norm": 0.218937948346138, "learning_rate": 4.8562081549589855e-05, "loss": 0.189, "step": 11080 }, { "epoch": 0.1976420647094496, "grad_norm": 0.2704882323741913, "learning_rate": 4.856156123605007e-05, "loss": 0.1866, "step": 11081 }, { "epoch": 0.19765990083116328, "grad_norm": 0.23084577918052673, "learning_rate": 4.856104083117755e-05, "loss": 0.209, "step": 11082 }, { "epoch": 0.19767773695287696, "grad_norm": 0.3036177158355713, "learning_rate": 4.856052033497429e-05, "loss": 0.2205, "step": 11083 }, { "epoch": 0.19769557307459065, "grad_norm": 0.2827765941619873, "learning_rate": 4.8559999747442316e-05, "loss": 0.1875, "step": 11084 }, { "epoch": 0.19771340919630437, "grad_norm": 0.2353384643793106, "learning_rate": 4.8559479068583645e-05, "loss": 0.1756, "step": 11085 }, { "epoch": 0.19773124531801806, "grad_norm": 0.34830811619758606, "learning_rate": 4.85589582984003e-05, "loss": 0.1886, "step": 11086 }, { "epoch": 0.19774908143973174, "grad_norm": 0.253828763961792, "learning_rate": 4.85584374368943e-05, "loss": 0.2047, "step": 11087 }, { "epoch": 0.19776691756144543, "grad_norm": 0.2931230664253235, "learning_rate": 4.855791648406765e-05, "loss": 0.2165, "step": 11088 }, { "epoch": 0.19778475368315912, "grad_norm": 0.30035024881362915, "learning_rate": 4.855739543992238e-05, "loss": 0.2276, "step": 11089 }, { "epoch": 0.19780258980487284, "grad_norm": 0.27374139428138733, "learning_rate": 4.8556874304460516e-05, "loss": 0.1931, "step": 11090 }, { "epoch": 0.19782042592658652, "grad_norm": 0.2854543924331665, "learning_rate": 4.855635307768406e-05, "loss": 0.2248, "step": 11091 }, { "epoch": 0.1978382620483002, "grad_norm": 0.27571243047714233, "learning_rate": 4.8555831759595056e-05, "loss": 0.2106, "step": 11092 }, { "epoch": 0.1978560981700139, "grad_norm": 0.2587672770023346, "learning_rate": 4.8555310350195506e-05, "loss": 0.2361, "step": 11093 }, { "epoch": 0.19787393429172762, "grad_norm": 0.3703326880931854, "learning_rate": 4.855478884948744e-05, "loss": 0.141, "step": 11094 }, { "epoch": 0.1978917704134413, "grad_norm": 0.32847192883491516, "learning_rate": 4.8554267257472876e-05, "loss": 0.2245, "step": 11095 }, { "epoch": 0.197909606535155, "grad_norm": 0.2797450125217438, "learning_rate": 4.855374557415383e-05, "loss": 0.2239, "step": 11096 }, { "epoch": 0.19792744265686868, "grad_norm": 0.2755650579929352, "learning_rate": 4.855322379953233e-05, "loss": 0.2082, "step": 11097 }, { "epoch": 0.1979452787785824, "grad_norm": 0.2720598876476288, "learning_rate": 4.855270193361041e-05, "loss": 0.206, "step": 11098 }, { "epoch": 0.19796311490029608, "grad_norm": 0.2276594340801239, "learning_rate": 4.855217997639008e-05, "loss": 0.1982, "step": 11099 }, { "epoch": 0.19798095102200977, "grad_norm": 0.31687310338020325, "learning_rate": 4.855165792787336e-05, "loss": 0.1331, "step": 11100 }, { "epoch": 0.19799878714372346, "grad_norm": 0.285958468914032, "learning_rate": 4.855113578806228e-05, "loss": 0.1516, "step": 11101 }, { "epoch": 0.19801662326543717, "grad_norm": 0.2214995175600052, "learning_rate": 4.855061355695887e-05, "loss": 0.1714, "step": 11102 }, { "epoch": 0.19803445938715086, "grad_norm": 0.3054547905921936, "learning_rate": 4.8550091234565144e-05, "loss": 0.1881, "step": 11103 }, { "epoch": 0.19805229550886455, "grad_norm": 0.24299995601177216, "learning_rate": 4.854956882088313e-05, "loss": 0.2094, "step": 11104 }, { "epoch": 0.19807013163057824, "grad_norm": 0.35838520526885986, "learning_rate": 4.854904631591486e-05, "loss": 0.2222, "step": 11105 }, { "epoch": 0.19808796775229195, "grad_norm": 0.29429590702056885, "learning_rate": 4.854852371966234e-05, "loss": 0.2435, "step": 11106 }, { "epoch": 0.19810580387400564, "grad_norm": 0.28344273567199707, "learning_rate": 4.854800103212762e-05, "loss": 0.2233, "step": 11107 }, { "epoch": 0.19812363999571933, "grad_norm": 0.35314199328422546, "learning_rate": 4.8547478253312706e-05, "loss": 0.1924, "step": 11108 }, { "epoch": 0.19814147611743302, "grad_norm": 0.31719276309013367, "learning_rate": 4.854695538321964e-05, "loss": 0.2063, "step": 11109 }, { "epoch": 0.1981593122391467, "grad_norm": 0.327848345041275, "learning_rate": 4.854643242185044e-05, "loss": 0.2478, "step": 11110 }, { "epoch": 0.19817714836086042, "grad_norm": 0.26491832733154297, "learning_rate": 4.854590936920713e-05, "loss": 0.2171, "step": 11111 }, { "epoch": 0.1981949844825741, "grad_norm": 0.27557340264320374, "learning_rate": 4.8545386225291756e-05, "loss": 0.2171, "step": 11112 }, { "epoch": 0.1982128206042878, "grad_norm": 0.37003302574157715, "learning_rate": 4.854486299010632e-05, "loss": 0.226, "step": 11113 }, { "epoch": 0.1982306567260015, "grad_norm": 0.23664388060569763, "learning_rate": 4.854433966365287e-05, "loss": 0.1684, "step": 11114 }, { "epoch": 0.1982484928477152, "grad_norm": 0.23957613110542297, "learning_rate": 4.854381624593342e-05, "loss": 0.1899, "step": 11115 }, { "epoch": 0.1982663289694289, "grad_norm": 0.26655709743499756, "learning_rate": 4.8543292736950016e-05, "loss": 0.2046, "step": 11116 }, { "epoch": 0.19828416509114258, "grad_norm": 0.29274481534957886, "learning_rate": 4.854276913670467e-05, "loss": 0.1896, "step": 11117 }, { "epoch": 0.19830200121285627, "grad_norm": 0.3711560070514679, "learning_rate": 4.854224544519942e-05, "loss": 0.2031, "step": 11118 }, { "epoch": 0.19831983733456998, "grad_norm": 0.2939615249633789, "learning_rate": 4.85417216624363e-05, "loss": 0.229, "step": 11119 }, { "epoch": 0.19833767345628367, "grad_norm": 0.2645339369773865, "learning_rate": 4.854119778841734e-05, "loss": 0.2124, "step": 11120 }, { "epoch": 0.19835550957799736, "grad_norm": 0.2945692837238312, "learning_rate": 4.854067382314456e-05, "loss": 0.2609, "step": 11121 }, { "epoch": 0.19837334569971105, "grad_norm": 0.3351791203022003, "learning_rate": 4.854014976661999e-05, "loss": 0.2441, "step": 11122 }, { "epoch": 0.19839118182142476, "grad_norm": 0.24094338715076447, "learning_rate": 4.853962561884568e-05, "loss": 0.2036, "step": 11123 }, { "epoch": 0.19840901794313845, "grad_norm": 0.24937154352664948, "learning_rate": 4.853910137982365e-05, "loss": 0.1894, "step": 11124 }, { "epoch": 0.19842685406485214, "grad_norm": 0.2517138719558716, "learning_rate": 4.853857704955593e-05, "loss": 0.2204, "step": 11125 }, { "epoch": 0.19844469018656583, "grad_norm": 0.2192293107509613, "learning_rate": 4.853805262804455e-05, "loss": 0.1849, "step": 11126 }, { "epoch": 0.19846252630827954, "grad_norm": 0.2045575976371765, "learning_rate": 4.8537528115291556e-05, "loss": 0.1599, "step": 11127 }, { "epoch": 0.19848036242999323, "grad_norm": 0.352490097284317, "learning_rate": 4.853700351129897e-05, "loss": 0.2454, "step": 11128 }, { "epoch": 0.19849819855170692, "grad_norm": 0.21434324979782104, "learning_rate": 4.853647881606883e-05, "loss": 0.1742, "step": 11129 }, { "epoch": 0.1985160346734206, "grad_norm": 0.306272953748703, "learning_rate": 4.853595402960317e-05, "loss": 0.213, "step": 11130 }, { "epoch": 0.1985338707951343, "grad_norm": 0.2694195806980133, "learning_rate": 4.8535429151904025e-05, "loss": 0.1842, "step": 11131 }, { "epoch": 0.198551706916848, "grad_norm": 0.2298295497894287, "learning_rate": 4.853490418297343e-05, "loss": 0.1591, "step": 11132 }, { "epoch": 0.1985695430385617, "grad_norm": 0.32406559586524963, "learning_rate": 4.853437912281341e-05, "loss": 0.1695, "step": 11133 }, { "epoch": 0.19858737916027538, "grad_norm": 0.2561782896518707, "learning_rate": 4.853385397142601e-05, "loss": 0.22, "step": 11134 }, { "epoch": 0.19860521528198907, "grad_norm": 0.30626562237739563, "learning_rate": 4.8533328728813265e-05, "loss": 0.2258, "step": 11135 }, { "epoch": 0.1986230514037028, "grad_norm": 0.2491305023431778, "learning_rate": 4.853280339497721e-05, "loss": 0.2172, "step": 11136 }, { "epoch": 0.19864088752541648, "grad_norm": 0.21116437017917633, "learning_rate": 4.853227796991988e-05, "loss": 0.1982, "step": 11137 }, { "epoch": 0.19865872364713016, "grad_norm": 0.310923308134079, "learning_rate": 4.853175245364331e-05, "loss": 0.2385, "step": 11138 }, { "epoch": 0.19867655976884385, "grad_norm": 0.29891157150268555, "learning_rate": 4.8531226846149544e-05, "loss": 0.2239, "step": 11139 }, { "epoch": 0.19869439589055757, "grad_norm": 0.33879512548446655, "learning_rate": 4.8530701147440615e-05, "loss": 0.2059, "step": 11140 }, { "epoch": 0.19871223201227126, "grad_norm": 0.23478348553180695, "learning_rate": 4.8530175357518556e-05, "loss": 0.1603, "step": 11141 }, { "epoch": 0.19873006813398494, "grad_norm": 0.31598207354545593, "learning_rate": 4.852964947638542e-05, "loss": 0.2302, "step": 11142 }, { "epoch": 0.19874790425569863, "grad_norm": 0.3208726942539215, "learning_rate": 4.852912350404323e-05, "loss": 0.2221, "step": 11143 }, { "epoch": 0.19876574037741235, "grad_norm": 0.2035573273897171, "learning_rate": 4.852859744049403e-05, "loss": 0.1876, "step": 11144 }, { "epoch": 0.19878357649912604, "grad_norm": 0.29464900493621826, "learning_rate": 4.8528071285739864e-05, "loss": 0.2111, "step": 11145 }, { "epoch": 0.19880141262083972, "grad_norm": 0.17972739040851593, "learning_rate": 4.852754503978276e-05, "loss": 0.176, "step": 11146 }, { "epoch": 0.1988192487425534, "grad_norm": 0.25738292932510376, "learning_rate": 4.8527018702624774e-05, "loss": 0.2385, "step": 11147 }, { "epoch": 0.19883708486426713, "grad_norm": 0.2520672380924225, "learning_rate": 4.852649227426793e-05, "loss": 0.2049, "step": 11148 }, { "epoch": 0.19885492098598082, "grad_norm": 0.17404726147651672, "learning_rate": 4.852596575471428e-05, "loss": 0.1774, "step": 11149 }, { "epoch": 0.1988727571076945, "grad_norm": 0.2706785798072815, "learning_rate": 4.8525439143965855e-05, "loss": 0.213, "step": 11150 }, { "epoch": 0.1988905932294082, "grad_norm": 0.7308741211891174, "learning_rate": 4.852491244202471e-05, "loss": 0.2105, "step": 11151 }, { "epoch": 0.19890842935112188, "grad_norm": 0.2657563090324402, "learning_rate": 4.852438564889288e-05, "loss": 0.1833, "step": 11152 }, { "epoch": 0.1989262654728356, "grad_norm": 0.21944956481456757, "learning_rate": 4.85238587645724e-05, "loss": 0.188, "step": 11153 }, { "epoch": 0.19894410159454928, "grad_norm": 0.21879857778549194, "learning_rate": 4.852333178906532e-05, "loss": 0.219, "step": 11154 }, { "epoch": 0.19896193771626297, "grad_norm": 0.2715228796005249, "learning_rate": 4.8522804722373685e-05, "loss": 0.1822, "step": 11155 }, { "epoch": 0.19897977383797666, "grad_norm": 0.23303835093975067, "learning_rate": 4.852227756449953e-05, "loss": 0.2435, "step": 11156 }, { "epoch": 0.19899760995969037, "grad_norm": 0.3210408091545105, "learning_rate": 4.8521750315444905e-05, "loss": 0.1564, "step": 11157 }, { "epoch": 0.19901544608140406, "grad_norm": 0.3169405162334442, "learning_rate": 4.8521222975211854e-05, "loss": 0.2404, "step": 11158 }, { "epoch": 0.19903328220311775, "grad_norm": 0.20615223050117493, "learning_rate": 4.852069554380242e-05, "loss": 0.2015, "step": 11159 }, { "epoch": 0.19905111832483144, "grad_norm": 0.3027874231338501, "learning_rate": 4.852016802121864e-05, "loss": 0.2397, "step": 11160 }, { "epoch": 0.19906895444654515, "grad_norm": 0.2798895239830017, "learning_rate": 4.851964040746256e-05, "loss": 0.1947, "step": 11161 }, { "epoch": 0.19908679056825884, "grad_norm": 0.31407061219215393, "learning_rate": 4.851911270253625e-05, "loss": 0.2067, "step": 11162 }, { "epoch": 0.19910462668997253, "grad_norm": 0.2413611114025116, "learning_rate": 4.851858490644172e-05, "loss": 0.1678, "step": 11163 }, { "epoch": 0.19912246281168622, "grad_norm": 0.28156372904777527, "learning_rate": 4.8518057019181035e-05, "loss": 0.1594, "step": 11164 }, { "epoch": 0.19914029893339993, "grad_norm": 0.4610389471054077, "learning_rate": 4.851752904075624e-05, "loss": 0.2219, "step": 11165 }, { "epoch": 0.19915813505511362, "grad_norm": 0.2356463074684143, "learning_rate": 4.851700097116938e-05, "loss": 0.1838, "step": 11166 }, { "epoch": 0.1991759711768273, "grad_norm": 0.34358465671539307, "learning_rate": 4.8516472810422495e-05, "loss": 0.3043, "step": 11167 }, { "epoch": 0.199193807298541, "grad_norm": 0.17977052927017212, "learning_rate": 4.851594455851764e-05, "loss": 0.1664, "step": 11168 }, { "epoch": 0.19921164342025469, "grad_norm": 0.3159478008747101, "learning_rate": 4.8515416215456874e-05, "loss": 0.2261, "step": 11169 }, { "epoch": 0.1992294795419684, "grad_norm": 0.20334972441196442, "learning_rate": 4.851488778124222e-05, "loss": 0.1678, "step": 11170 }, { "epoch": 0.1992473156636821, "grad_norm": 0.40189042687416077, "learning_rate": 4.851435925587575e-05, "loss": 0.2146, "step": 11171 }, { "epoch": 0.19926515178539578, "grad_norm": 0.47486233711242676, "learning_rate": 4.8513830639359495e-05, "loss": 0.2367, "step": 11172 }, { "epoch": 0.19928298790710947, "grad_norm": 0.2458191215991974, "learning_rate": 4.8513301931695515e-05, "loss": 0.2109, "step": 11173 }, { "epoch": 0.19930082402882318, "grad_norm": 0.2704322934150696, "learning_rate": 4.851277313288585e-05, "loss": 0.1861, "step": 11174 }, { "epoch": 0.19931866015053687, "grad_norm": 0.26017236709594727, "learning_rate": 4.851224424293256e-05, "loss": 0.1901, "step": 11175 }, { "epoch": 0.19933649627225056, "grad_norm": 0.25227510929107666, "learning_rate": 4.8511715261837684e-05, "loss": 0.2146, "step": 11176 }, { "epoch": 0.19935433239396425, "grad_norm": 0.3166466951370239, "learning_rate": 4.851118618960328e-05, "loss": 0.1695, "step": 11177 }, { "epoch": 0.19937216851567796, "grad_norm": 0.42788925766944885, "learning_rate": 4.851065702623141e-05, "loss": 0.1623, "step": 11178 }, { "epoch": 0.19939000463739165, "grad_norm": 0.25692322850227356, "learning_rate": 4.8510127771724104e-05, "loss": 0.2137, "step": 11179 }, { "epoch": 0.19940784075910534, "grad_norm": 0.2887136936187744, "learning_rate": 4.8509598426083426e-05, "loss": 0.2074, "step": 11180 }, { "epoch": 0.19942567688081902, "grad_norm": 0.23590153455734253, "learning_rate": 4.850906898931142e-05, "loss": 0.1661, "step": 11181 }, { "epoch": 0.19944351300253274, "grad_norm": 0.2689376175403595, "learning_rate": 4.8508539461410144e-05, "loss": 0.1732, "step": 11182 }, { "epoch": 0.19946134912424643, "grad_norm": 0.2713359296321869, "learning_rate": 4.8508009842381654e-05, "loss": 0.1991, "step": 11183 }, { "epoch": 0.19947918524596012, "grad_norm": 0.4445231258869171, "learning_rate": 4.850748013222799e-05, "loss": 0.2334, "step": 11184 }, { "epoch": 0.1994970213676738, "grad_norm": 0.35040053725242615, "learning_rate": 4.850695033095122e-05, "loss": 0.2546, "step": 11185 }, { "epoch": 0.19951485748938752, "grad_norm": 0.28394970297813416, "learning_rate": 4.850642043855339e-05, "loss": 0.2435, "step": 11186 }, { "epoch": 0.1995326936111012, "grad_norm": 0.24912366271018982, "learning_rate": 4.8505890455036554e-05, "loss": 0.1939, "step": 11187 }, { "epoch": 0.1995505297328149, "grad_norm": 0.3353257179260254, "learning_rate": 4.850536038040276e-05, "loss": 0.1978, "step": 11188 }, { "epoch": 0.19956836585452858, "grad_norm": 0.22216598689556122, "learning_rate": 4.8504830214654085e-05, "loss": 0.1926, "step": 11189 }, { "epoch": 0.19958620197624227, "grad_norm": 0.3079867660999298, "learning_rate": 4.850429995779257e-05, "loss": 0.1991, "step": 11190 }, { "epoch": 0.199604038097956, "grad_norm": 0.35655754804611206, "learning_rate": 4.850376960982026e-05, "loss": 0.2763, "step": 11191 }, { "epoch": 0.19962187421966968, "grad_norm": 0.38088274002075195, "learning_rate": 4.850323917073922e-05, "loss": 0.2186, "step": 11192 }, { "epoch": 0.19963971034138336, "grad_norm": 0.26722249388694763, "learning_rate": 4.8502708640551517e-05, "loss": 0.2097, "step": 11193 }, { "epoch": 0.19965754646309705, "grad_norm": 0.3426783084869385, "learning_rate": 4.850217801925919e-05, "loss": 0.2566, "step": 11194 }, { "epoch": 0.19967538258481077, "grad_norm": 0.41486701369285583, "learning_rate": 4.8501647306864314e-05, "loss": 0.2343, "step": 11195 }, { "epoch": 0.19969321870652446, "grad_norm": 0.30593550205230713, "learning_rate": 4.8501116503368925e-05, "loss": 0.2194, "step": 11196 }, { "epoch": 0.19971105482823814, "grad_norm": 0.27238717675209045, "learning_rate": 4.8500585608775095e-05, "loss": 0.2066, "step": 11197 }, { "epoch": 0.19972889094995183, "grad_norm": 0.3307051956653595, "learning_rate": 4.8500054623084884e-05, "loss": 0.2675, "step": 11198 }, { "epoch": 0.19974672707166555, "grad_norm": 0.2783980965614319, "learning_rate": 4.849952354630034e-05, "loss": 0.2019, "step": 11199 }, { "epoch": 0.19976456319337924, "grad_norm": 0.3086075186729431, "learning_rate": 4.8498992378423525e-05, "loss": 0.21, "step": 11200 }, { "epoch": 0.19978239931509292, "grad_norm": 0.3717106878757477, "learning_rate": 4.8498461119456504e-05, "loss": 0.1922, "step": 11201 }, { "epoch": 0.1998002354368066, "grad_norm": 0.2317422479391098, "learning_rate": 4.849792976940132e-05, "loss": 0.232, "step": 11202 }, { "epoch": 0.19981807155852033, "grad_norm": 0.3480771780014038, "learning_rate": 4.849739832826006e-05, "loss": 0.1527, "step": 11203 }, { "epoch": 0.19983590768023402, "grad_norm": 0.2628558576107025, "learning_rate": 4.8496866796034754e-05, "loss": 0.195, "step": 11204 }, { "epoch": 0.1998537438019477, "grad_norm": 0.2192603349685669, "learning_rate": 4.849633517272748e-05, "loss": 0.1893, "step": 11205 }, { "epoch": 0.1998715799236614, "grad_norm": 0.26822423934936523, "learning_rate": 4.849580345834031e-05, "loss": 0.1787, "step": 11206 }, { "epoch": 0.1998894160453751, "grad_norm": 0.2718605697154999, "learning_rate": 4.849527165287527e-05, "loss": 0.1806, "step": 11207 }, { "epoch": 0.1999072521670888, "grad_norm": 0.28898414969444275, "learning_rate": 4.849473975633445e-05, "loss": 0.1951, "step": 11208 }, { "epoch": 0.19992508828880248, "grad_norm": 0.3359219431877136, "learning_rate": 4.8494207768719906e-05, "loss": 0.1756, "step": 11209 }, { "epoch": 0.19994292441051617, "grad_norm": 0.3178466856479645, "learning_rate": 4.84936756900337e-05, "loss": 0.2605, "step": 11210 }, { "epoch": 0.19996076053222986, "grad_norm": 0.4385218918323517, "learning_rate": 4.849314352027789e-05, "loss": 0.1936, "step": 11211 }, { "epoch": 0.19997859665394357, "grad_norm": 0.2442820966243744, "learning_rate": 4.849261125945454e-05, "loss": 0.2093, "step": 11212 }, { "epoch": 0.19999643277565726, "grad_norm": 0.32601553201675415, "learning_rate": 4.849207890756572e-05, "loss": 0.1856, "step": 11213 }, { "epoch": 0.20001426889737095, "grad_norm": 0.2076176404953003, "learning_rate": 4.849154646461348e-05, "loss": 0.2047, "step": 11214 }, { "epoch": 0.20003210501908464, "grad_norm": 0.2987322509288788, "learning_rate": 4.849101393059989e-05, "loss": 0.2296, "step": 11215 }, { "epoch": 0.20004994114079835, "grad_norm": 0.24282674491405487, "learning_rate": 4.849048130552703e-05, "loss": 0.1968, "step": 11216 }, { "epoch": 0.20006777726251204, "grad_norm": 0.27987241744995117, "learning_rate": 4.8489948589396935e-05, "loss": 0.2309, "step": 11217 }, { "epoch": 0.20008561338422573, "grad_norm": 0.28088998794555664, "learning_rate": 4.84894157822117e-05, "loss": 0.219, "step": 11218 }, { "epoch": 0.20010344950593942, "grad_norm": 0.321716845035553, "learning_rate": 4.8488882883973375e-05, "loss": 0.251, "step": 11219 }, { "epoch": 0.20012128562765313, "grad_norm": 0.2137146294116974, "learning_rate": 4.848834989468402e-05, "loss": 0.1197, "step": 11220 }, { "epoch": 0.20013912174936682, "grad_norm": 0.27422183752059937, "learning_rate": 4.848781681434571e-05, "loss": 0.184, "step": 11221 }, { "epoch": 0.2001569578710805, "grad_norm": 0.3200380504131317, "learning_rate": 4.848728364296051e-05, "loss": 0.1888, "step": 11222 }, { "epoch": 0.2001747939927942, "grad_norm": 0.2924257218837738, "learning_rate": 4.8486750380530495e-05, "loss": 0.2313, "step": 11223 }, { "epoch": 0.2001926301145079, "grad_norm": 0.2781852185726166, "learning_rate": 4.848621702705771e-05, "loss": 0.2546, "step": 11224 }, { "epoch": 0.2002104662362216, "grad_norm": 0.20512135326862335, "learning_rate": 4.848568358254424e-05, "loss": 0.1845, "step": 11225 }, { "epoch": 0.2002283023579353, "grad_norm": 0.21732193231582642, "learning_rate": 4.848515004699216e-05, "loss": 0.1698, "step": 11226 }, { "epoch": 0.20024613847964898, "grad_norm": 0.4090214669704437, "learning_rate": 4.8484616420403516e-05, "loss": 0.1995, "step": 11227 }, { "epoch": 0.2002639746013627, "grad_norm": 0.5717278718948364, "learning_rate": 4.8484082702780387e-05, "loss": 0.2029, "step": 11228 }, { "epoch": 0.20028181072307638, "grad_norm": 0.43718910217285156, "learning_rate": 4.8483548894124844e-05, "loss": 0.247, "step": 11229 }, { "epoch": 0.20029964684479007, "grad_norm": 0.27847278118133545, "learning_rate": 4.8483014994438955e-05, "loss": 0.2494, "step": 11230 }, { "epoch": 0.20031748296650376, "grad_norm": 0.2626401484012604, "learning_rate": 4.848248100372479e-05, "loss": 0.1916, "step": 11231 }, { "epoch": 0.20033531908821745, "grad_norm": 0.23114930093288422, "learning_rate": 4.848194692198442e-05, "loss": 0.222, "step": 11232 }, { "epoch": 0.20035315520993116, "grad_norm": 0.3235883116722107, "learning_rate": 4.8481412749219906e-05, "loss": 0.1538, "step": 11233 }, { "epoch": 0.20037099133164485, "grad_norm": 0.24516287446022034, "learning_rate": 4.8480878485433334e-05, "loss": 0.2268, "step": 11234 }, { "epoch": 0.20038882745335854, "grad_norm": 0.2867223620414734, "learning_rate": 4.848034413062676e-05, "loss": 0.219, "step": 11235 }, { "epoch": 0.20040666357507222, "grad_norm": 0.3025898337364197, "learning_rate": 4.8479809684802266e-05, "loss": 0.1612, "step": 11236 }, { "epoch": 0.20042449969678594, "grad_norm": 0.2771890461444855, "learning_rate": 4.8479275147961924e-05, "loss": 0.2332, "step": 11237 }, { "epoch": 0.20044233581849963, "grad_norm": 0.2824143171310425, "learning_rate": 4.847874052010779e-05, "loss": 0.2284, "step": 11238 }, { "epoch": 0.20046017194021332, "grad_norm": 0.2517559230327606, "learning_rate": 4.847820580124196e-05, "loss": 0.1842, "step": 11239 }, { "epoch": 0.200478008061927, "grad_norm": 0.24260075390338898, "learning_rate": 4.8477670991366484e-05, "loss": 0.225, "step": 11240 }, { "epoch": 0.20049584418364072, "grad_norm": 0.46930018067359924, "learning_rate": 4.847713609048346e-05, "loss": 0.2576, "step": 11241 }, { "epoch": 0.2005136803053544, "grad_norm": 0.23439662158489227, "learning_rate": 4.8476601098594945e-05, "loss": 0.1545, "step": 11242 }, { "epoch": 0.2005315164270681, "grad_norm": 0.3082413971424103, "learning_rate": 4.847606601570301e-05, "loss": 0.2586, "step": 11243 }, { "epoch": 0.20054935254878178, "grad_norm": 0.3749242424964905, "learning_rate": 4.847553084180974e-05, "loss": 0.231, "step": 11244 }, { "epoch": 0.2005671886704955, "grad_norm": 0.35473912954330444, "learning_rate": 4.8474995576917195e-05, "loss": 0.2241, "step": 11245 }, { "epoch": 0.2005850247922092, "grad_norm": 0.22156821191310883, "learning_rate": 4.847446022102746e-05, "loss": 0.2013, "step": 11246 }, { "epoch": 0.20060286091392288, "grad_norm": 0.32568418979644775, "learning_rate": 4.847392477414262e-05, "loss": 0.1538, "step": 11247 }, { "epoch": 0.20062069703563656, "grad_norm": 0.3184446096420288, "learning_rate": 4.8473389236264735e-05, "loss": 0.2534, "step": 11248 }, { "epoch": 0.20063853315735028, "grad_norm": 0.2477748543024063, "learning_rate": 4.847285360739589e-05, "loss": 0.1544, "step": 11249 }, { "epoch": 0.20065636927906397, "grad_norm": 0.24207846820354462, "learning_rate": 4.847231788753815e-05, "loss": 0.219, "step": 11250 }, { "epoch": 0.20067420540077766, "grad_norm": 0.30859509110450745, "learning_rate": 4.84717820766936e-05, "loss": 0.1844, "step": 11251 }, { "epoch": 0.20069204152249134, "grad_norm": 0.2695724070072174, "learning_rate": 4.847124617486432e-05, "loss": 0.1656, "step": 11252 }, { "epoch": 0.20070987764420503, "grad_norm": 0.2993283271789551, "learning_rate": 4.8470710182052375e-05, "loss": 0.1897, "step": 11253 }, { "epoch": 0.20072771376591875, "grad_norm": 0.354107141494751, "learning_rate": 4.847017409825986e-05, "loss": 0.1842, "step": 11254 }, { "epoch": 0.20074554988763244, "grad_norm": 0.31370314955711365, "learning_rate": 4.8469637923488833e-05, "loss": 0.1657, "step": 11255 }, { "epoch": 0.20076338600934612, "grad_norm": 0.30259957909584045, "learning_rate": 4.8469101657741395e-05, "loss": 0.2054, "step": 11256 }, { "epoch": 0.2007812221310598, "grad_norm": 0.22491726279258728, "learning_rate": 4.846856530101961e-05, "loss": 0.1949, "step": 11257 }, { "epoch": 0.20079905825277353, "grad_norm": 0.32310980558395386, "learning_rate": 4.8468028853325556e-05, "loss": 0.1853, "step": 11258 }, { "epoch": 0.20081689437448721, "grad_norm": 0.25922152400016785, "learning_rate": 4.8467492314661316e-05, "loss": 0.1868, "step": 11259 }, { "epoch": 0.2008347304962009, "grad_norm": 0.25198090076446533, "learning_rate": 4.846695568502898e-05, "loss": 0.1529, "step": 11260 }, { "epoch": 0.2008525666179146, "grad_norm": 0.2648249864578247, "learning_rate": 4.8466418964430606e-05, "loss": 0.2072, "step": 11261 }, { "epoch": 0.2008704027396283, "grad_norm": 0.2596457302570343, "learning_rate": 4.846588215286829e-05, "loss": 0.1783, "step": 11262 }, { "epoch": 0.200888238861342, "grad_norm": 0.47238320112228394, "learning_rate": 4.846534525034412e-05, "loss": 0.1971, "step": 11263 }, { "epoch": 0.20090607498305568, "grad_norm": 0.37923213839530945, "learning_rate": 4.846480825686016e-05, "loss": 0.2222, "step": 11264 }, { "epoch": 0.20092391110476937, "grad_norm": 0.23220795392990112, "learning_rate": 4.846427117241849e-05, "loss": 0.2088, "step": 11265 }, { "epoch": 0.2009417472264831, "grad_norm": 0.33572810888290405, "learning_rate": 4.846373399702121e-05, "loss": 0.2196, "step": 11266 }, { "epoch": 0.20095958334819677, "grad_norm": 0.27861925959587097, "learning_rate": 4.8463196730670396e-05, "loss": 0.188, "step": 11267 }, { "epoch": 0.20097741946991046, "grad_norm": 0.3227293789386749, "learning_rate": 4.8462659373368126e-05, "loss": 0.1758, "step": 11268 }, { "epoch": 0.20099525559162415, "grad_norm": 0.3112263083457947, "learning_rate": 4.846212192511648e-05, "loss": 0.2377, "step": 11269 }, { "epoch": 0.20101309171333784, "grad_norm": 0.2563783526420593, "learning_rate": 4.846158438591755e-05, "loss": 0.1988, "step": 11270 }, { "epoch": 0.20103092783505155, "grad_norm": 0.49856215715408325, "learning_rate": 4.846104675577341e-05, "loss": 0.2516, "step": 11271 }, { "epoch": 0.20104876395676524, "grad_norm": 0.36224600672721863, "learning_rate": 4.8460509034686154e-05, "loss": 0.2016, "step": 11272 }, { "epoch": 0.20106660007847893, "grad_norm": 0.29951876401901245, "learning_rate": 4.8459971222657864e-05, "loss": 0.2289, "step": 11273 }, { "epoch": 0.20108443620019262, "grad_norm": 0.29126256704330444, "learning_rate": 4.845943331969062e-05, "loss": 0.1926, "step": 11274 }, { "epoch": 0.20110227232190633, "grad_norm": 0.2900295853614807, "learning_rate": 4.845889532578651e-05, "loss": 0.1862, "step": 11275 }, { "epoch": 0.20112010844362002, "grad_norm": 0.22529807686805725, "learning_rate": 4.8458357240947615e-05, "loss": 0.1833, "step": 11276 }, { "epoch": 0.2011379445653337, "grad_norm": 0.30294477939605713, "learning_rate": 4.845781906517603e-05, "loss": 0.209, "step": 11277 }, { "epoch": 0.2011557806870474, "grad_norm": 0.23169225454330444, "learning_rate": 4.8457280798473836e-05, "loss": 0.159, "step": 11278 }, { "epoch": 0.2011736168087611, "grad_norm": 0.17531831562519073, "learning_rate": 4.845674244084312e-05, "loss": 0.1469, "step": 11279 }, { "epoch": 0.2011914529304748, "grad_norm": 0.39195239543914795, "learning_rate": 4.8456203992285966e-05, "loss": 0.2589, "step": 11280 }, { "epoch": 0.2012092890521885, "grad_norm": 0.40377548336982727, "learning_rate": 4.845566545280447e-05, "loss": 0.2756, "step": 11281 }, { "epoch": 0.20122712517390218, "grad_norm": 0.22159047424793243, "learning_rate": 4.84551268224007e-05, "loss": 0.1906, "step": 11282 }, { "epoch": 0.2012449612956159, "grad_norm": 0.2438051849603653, "learning_rate": 4.845458810107677e-05, "loss": 0.2253, "step": 11283 }, { "epoch": 0.20126279741732958, "grad_norm": 0.36674851179122925, "learning_rate": 4.845404928883475e-05, "loss": 0.2272, "step": 11284 }, { "epoch": 0.20128063353904327, "grad_norm": 0.35977867245674133, "learning_rate": 4.8453510385676734e-05, "loss": 0.1991, "step": 11285 }, { "epoch": 0.20129846966075696, "grad_norm": 0.2580593526363373, "learning_rate": 4.845297139160482e-05, "loss": 0.1738, "step": 11286 }, { "epoch": 0.20131630578247067, "grad_norm": 0.21098247170448303, "learning_rate": 4.845243230662107e-05, "loss": 0.1881, "step": 11287 }, { "epoch": 0.20133414190418436, "grad_norm": 0.23772303760051727, "learning_rate": 4.845189313072761e-05, "loss": 0.1936, "step": 11288 }, { "epoch": 0.20135197802589805, "grad_norm": 0.24394717812538147, "learning_rate": 4.8451353863926504e-05, "loss": 0.2178, "step": 11289 }, { "epoch": 0.20136981414761174, "grad_norm": 0.36160221695899963, "learning_rate": 4.8450814506219854e-05, "loss": 0.1865, "step": 11290 }, { "epoch": 0.20138765026932542, "grad_norm": 0.3158969581127167, "learning_rate": 4.845027505760975e-05, "loss": 0.2043, "step": 11291 }, { "epoch": 0.20140548639103914, "grad_norm": 0.3311038315296173, "learning_rate": 4.8449735518098274e-05, "loss": 0.2591, "step": 11292 }, { "epoch": 0.20142332251275283, "grad_norm": 0.2837072014808655, "learning_rate": 4.844919588768752e-05, "loss": 0.2724, "step": 11293 }, { "epoch": 0.20144115863446652, "grad_norm": 0.32290181517601013, "learning_rate": 4.8448656166379594e-05, "loss": 0.2593, "step": 11294 }, { "epoch": 0.2014589947561802, "grad_norm": 0.335918128490448, "learning_rate": 4.844811635417657e-05, "loss": 0.2, "step": 11295 }, { "epoch": 0.20147683087789392, "grad_norm": 0.25452184677124023, "learning_rate": 4.844757645108055e-05, "loss": 0.2229, "step": 11296 }, { "epoch": 0.2014946669996076, "grad_norm": 0.2768148183822632, "learning_rate": 4.844703645709363e-05, "loss": 0.2139, "step": 11297 }, { "epoch": 0.2015125031213213, "grad_norm": 0.30912166833877563, "learning_rate": 4.8446496372217895e-05, "loss": 0.2014, "step": 11298 }, { "epoch": 0.20153033924303498, "grad_norm": 0.25039976835250854, "learning_rate": 4.8445956196455444e-05, "loss": 0.2202, "step": 11299 }, { "epoch": 0.2015481753647487, "grad_norm": 0.21830569207668304, "learning_rate": 4.844541592980837e-05, "loss": 0.171, "step": 11300 }, { "epoch": 0.2015660114864624, "grad_norm": 0.213765487074852, "learning_rate": 4.8444875572278755e-05, "loss": 0.1579, "step": 11301 }, { "epoch": 0.20158384760817608, "grad_norm": 0.2506726086139679, "learning_rate": 4.8444335123868725e-05, "loss": 0.1681, "step": 11302 }, { "epoch": 0.20160168372988976, "grad_norm": 0.2370816171169281, "learning_rate": 4.844379458458034e-05, "loss": 0.1813, "step": 11303 }, { "epoch": 0.20161951985160348, "grad_norm": 0.23096415400505066, "learning_rate": 4.8443253954415714e-05, "loss": 0.1626, "step": 11304 }, { "epoch": 0.20163735597331717, "grad_norm": 0.2860069274902344, "learning_rate": 4.8442713233376935e-05, "loss": 0.2174, "step": 11305 }, { "epoch": 0.20165519209503086, "grad_norm": 0.3346981406211853, "learning_rate": 4.8442172421466104e-05, "loss": 0.2084, "step": 11306 }, { "epoch": 0.20167302821674454, "grad_norm": 0.1893150955438614, "learning_rate": 4.844163151868531e-05, "loss": 0.1844, "step": 11307 }, { "epoch": 0.20169086433845826, "grad_norm": 0.24594847857952118, "learning_rate": 4.844109052503667e-05, "loss": 0.1797, "step": 11308 }, { "epoch": 0.20170870046017195, "grad_norm": 0.2367420196533203, "learning_rate": 4.844054944052225e-05, "loss": 0.1805, "step": 11309 }, { "epoch": 0.20172653658188563, "grad_norm": 0.3526380658149719, "learning_rate": 4.8440008265144175e-05, "loss": 0.1925, "step": 11310 }, { "epoch": 0.20174437270359932, "grad_norm": 0.3324848711490631, "learning_rate": 4.8439466998904535e-05, "loss": 0.1427, "step": 11311 }, { "epoch": 0.201762208825313, "grad_norm": 0.23890207707881927, "learning_rate": 4.843892564180542e-05, "loss": 0.1623, "step": 11312 }, { "epoch": 0.20178004494702673, "grad_norm": 0.35688701272010803, "learning_rate": 4.8438384193848935e-05, "loss": 0.1902, "step": 11313 }, { "epoch": 0.20179788106874041, "grad_norm": 0.4197233319282532, "learning_rate": 4.843784265503718e-05, "loss": 0.1918, "step": 11314 }, { "epoch": 0.2018157171904541, "grad_norm": 0.23810192942619324, "learning_rate": 4.843730102537224e-05, "loss": 0.155, "step": 11315 }, { "epoch": 0.2018335533121678, "grad_norm": 0.23382043838500977, "learning_rate": 4.8436759304856236e-05, "loss": 0.217, "step": 11316 }, { "epoch": 0.2018513894338815, "grad_norm": 0.27680182456970215, "learning_rate": 4.843621749349126e-05, "loss": 0.2037, "step": 11317 }, { "epoch": 0.2018692255555952, "grad_norm": 0.2820006310939789, "learning_rate": 4.8435675591279405e-05, "loss": 0.2018, "step": 11318 }, { "epoch": 0.20188706167730888, "grad_norm": 0.22262385487556458, "learning_rate": 4.843513359822278e-05, "loss": 0.1734, "step": 11319 }, { "epoch": 0.20190489779902257, "grad_norm": 0.29639485478401184, "learning_rate": 4.843459151432349e-05, "loss": 0.185, "step": 11320 }, { "epoch": 0.20192273392073629, "grad_norm": 0.30568423867225647, "learning_rate": 4.843404933958362e-05, "loss": 0.1997, "step": 11321 }, { "epoch": 0.20194057004244997, "grad_norm": 0.27424970269203186, "learning_rate": 4.843350707400528e-05, "loss": 0.1931, "step": 11322 }, { "epoch": 0.20195840616416366, "grad_norm": 0.2373412847518921, "learning_rate": 4.843296471759058e-05, "loss": 0.1727, "step": 11323 }, { "epoch": 0.20197624228587735, "grad_norm": 0.23920440673828125, "learning_rate": 4.8432422270341605e-05, "loss": 0.1505, "step": 11324 }, { "epoch": 0.20199407840759107, "grad_norm": 0.3613201081752777, "learning_rate": 4.843187973226048e-05, "loss": 0.1801, "step": 11325 }, { "epoch": 0.20201191452930475, "grad_norm": 0.3097452223300934, "learning_rate": 4.843133710334928e-05, "loss": 0.2277, "step": 11326 }, { "epoch": 0.20202975065101844, "grad_norm": 0.29457807540893555, "learning_rate": 4.843079438361014e-05, "loss": 0.1892, "step": 11327 }, { "epoch": 0.20204758677273213, "grad_norm": 0.25454843044281006, "learning_rate": 4.843025157304514e-05, "loss": 0.1835, "step": 11328 }, { "epoch": 0.20206542289444585, "grad_norm": 0.2775270640850067, "learning_rate": 4.842970867165639e-05, "loss": 0.2404, "step": 11329 }, { "epoch": 0.20208325901615953, "grad_norm": 0.37864527106285095, "learning_rate": 4.8429165679446006e-05, "loss": 0.1556, "step": 11330 }, { "epoch": 0.20210109513787322, "grad_norm": 0.19824384152889252, "learning_rate": 4.842862259641608e-05, "loss": 0.1706, "step": 11331 }, { "epoch": 0.2021189312595869, "grad_norm": 0.20984798669815063, "learning_rate": 4.842807942256872e-05, "loss": 0.1834, "step": 11332 }, { "epoch": 0.2021367673813006, "grad_norm": 0.2893177270889282, "learning_rate": 4.8427536157906025e-05, "loss": 0.225, "step": 11333 }, { "epoch": 0.2021546035030143, "grad_norm": 0.27121877670288086, "learning_rate": 4.8426992802430124e-05, "loss": 0.2244, "step": 11334 }, { "epoch": 0.202172439624728, "grad_norm": 0.35227641463279724, "learning_rate": 4.84264493561431e-05, "loss": 0.1842, "step": 11335 }, { "epoch": 0.2021902757464417, "grad_norm": 0.3382189869880676, "learning_rate": 4.842590581904706e-05, "loss": 0.1874, "step": 11336 }, { "epoch": 0.20220811186815538, "grad_norm": 0.2463979572057724, "learning_rate": 4.842536219114413e-05, "loss": 0.2219, "step": 11337 }, { "epoch": 0.2022259479898691, "grad_norm": 0.2901211678981781, "learning_rate": 4.8424818472436394e-05, "loss": 0.2135, "step": 11338 }, { "epoch": 0.20224378411158278, "grad_norm": 0.312142550945282, "learning_rate": 4.842427466292598e-05, "loss": 0.2433, "step": 11339 }, { "epoch": 0.20226162023329647, "grad_norm": 0.2258642017841339, "learning_rate": 4.8423730762614985e-05, "loss": 0.2135, "step": 11340 }, { "epoch": 0.20227945635501016, "grad_norm": 0.267499715089798, "learning_rate": 4.8423186771505516e-05, "loss": 0.1737, "step": 11341 }, { "epoch": 0.20229729247672387, "grad_norm": 0.26048150658607483, "learning_rate": 4.8422642689599685e-05, "loss": 0.1797, "step": 11342 }, { "epoch": 0.20231512859843756, "grad_norm": 0.22089146077632904, "learning_rate": 4.8422098516899606e-05, "loss": 0.2012, "step": 11343 }, { "epoch": 0.20233296472015125, "grad_norm": 0.21420103311538696, "learning_rate": 4.8421554253407374e-05, "loss": 0.1441, "step": 11344 }, { "epoch": 0.20235080084186494, "grad_norm": 0.2936931550502777, "learning_rate": 4.8421009899125115e-05, "loss": 0.1853, "step": 11345 }, { "epoch": 0.20236863696357865, "grad_norm": 0.27731451392173767, "learning_rate": 4.842046545405493e-05, "loss": 0.2182, "step": 11346 }, { "epoch": 0.20238647308529234, "grad_norm": 0.2916032373905182, "learning_rate": 4.8419920918198936e-05, "loss": 0.1762, "step": 11347 }, { "epoch": 0.20240430920700603, "grad_norm": 0.28585314750671387, "learning_rate": 4.841937629155924e-05, "loss": 0.2415, "step": 11348 }, { "epoch": 0.20242214532871972, "grad_norm": 0.28881019353866577, "learning_rate": 4.841883157413795e-05, "loss": 0.1883, "step": 11349 }, { "epoch": 0.20243998145043343, "grad_norm": 0.27883437275886536, "learning_rate": 4.841828676593718e-05, "loss": 0.2013, "step": 11350 }, { "epoch": 0.20245781757214712, "grad_norm": 0.24770846962928772, "learning_rate": 4.841774186695904e-05, "loss": 0.1986, "step": 11351 }, { "epoch": 0.2024756536938608, "grad_norm": 0.40508878231048584, "learning_rate": 4.841719687720565e-05, "loss": 0.206, "step": 11352 }, { "epoch": 0.2024934898155745, "grad_norm": 0.15239794552326202, "learning_rate": 4.841665179667911e-05, "loss": 0.1421, "step": 11353 }, { "epoch": 0.20251132593728818, "grad_norm": 0.3014697730541229, "learning_rate": 4.8416106625381544e-05, "loss": 0.2713, "step": 11354 }, { "epoch": 0.2025291620590019, "grad_norm": 0.3859409987926483, "learning_rate": 4.8415561363315055e-05, "loss": 0.2337, "step": 11355 }, { "epoch": 0.2025469981807156, "grad_norm": 0.2989046275615692, "learning_rate": 4.841501601048177e-05, "loss": 0.1813, "step": 11356 }, { "epoch": 0.20256483430242928, "grad_norm": 0.27538689970970154, "learning_rate": 4.841447056688379e-05, "loss": 0.1773, "step": 11357 }, { "epoch": 0.20258267042414296, "grad_norm": 0.25325432419776917, "learning_rate": 4.8413925032523235e-05, "loss": 0.1901, "step": 11358 }, { "epoch": 0.20260050654585668, "grad_norm": 0.21702085435390472, "learning_rate": 4.841337940740222e-05, "loss": 0.2, "step": 11359 }, { "epoch": 0.20261834266757037, "grad_norm": 0.3129924237728119, "learning_rate": 4.841283369152287e-05, "loss": 0.2324, "step": 11360 }, { "epoch": 0.20263617878928405, "grad_norm": 0.24327322840690613, "learning_rate": 4.841228788488728e-05, "loss": 0.2297, "step": 11361 }, { "epoch": 0.20265401491099774, "grad_norm": 0.29003897309303284, "learning_rate": 4.841174198749758e-05, "loss": 0.2204, "step": 11362 }, { "epoch": 0.20267185103271146, "grad_norm": 0.2230042666196823, "learning_rate": 4.841119599935588e-05, "loss": 0.192, "step": 11363 }, { "epoch": 0.20268968715442515, "grad_norm": 0.2523375451564789, "learning_rate": 4.8410649920464294e-05, "loss": 0.2427, "step": 11364 }, { "epoch": 0.20270752327613883, "grad_norm": 0.3380890488624573, "learning_rate": 4.8410103750824954e-05, "loss": 0.2331, "step": 11365 }, { "epoch": 0.20272535939785252, "grad_norm": 0.2913780212402344, "learning_rate": 4.840955749043996e-05, "loss": 0.1787, "step": 11366 }, { "epoch": 0.20274319551956624, "grad_norm": 0.21512164175510406, "learning_rate": 4.8409011139311435e-05, "loss": 0.1946, "step": 11367 }, { "epoch": 0.20276103164127993, "grad_norm": 0.2979514002799988, "learning_rate": 4.84084646974415e-05, "loss": 0.1874, "step": 11368 }, { "epoch": 0.20277886776299361, "grad_norm": 0.21054485440254211, "learning_rate": 4.840791816483227e-05, "loss": 0.1914, "step": 11369 }, { "epoch": 0.2027967038847073, "grad_norm": 0.2549343705177307, "learning_rate": 4.8407371541485856e-05, "loss": 0.2033, "step": 11370 }, { "epoch": 0.202814540006421, "grad_norm": 0.3070774972438812, "learning_rate": 4.840682482740439e-05, "loss": 0.2618, "step": 11371 }, { "epoch": 0.2028323761281347, "grad_norm": 0.2668624222278595, "learning_rate": 4.8406278022589993e-05, "loss": 0.1912, "step": 11372 }, { "epoch": 0.2028502122498484, "grad_norm": 0.2944768965244293, "learning_rate": 4.840573112704477e-05, "loss": 0.216, "step": 11373 }, { "epoch": 0.20286804837156208, "grad_norm": 0.3573305606842041, "learning_rate": 4.840518414077086e-05, "loss": 0.1576, "step": 11374 }, { "epoch": 0.20288588449327577, "grad_norm": 0.29191821813583374, "learning_rate": 4.840463706377036e-05, "loss": 0.1903, "step": 11375 }, { "epoch": 0.20290372061498949, "grad_norm": 0.3394958972930908, "learning_rate": 4.8404089896045414e-05, "loss": 0.193, "step": 11376 }, { "epoch": 0.20292155673670317, "grad_norm": 0.3412723243236542, "learning_rate": 4.840354263759813e-05, "loss": 0.2274, "step": 11377 }, { "epoch": 0.20293939285841686, "grad_norm": 0.42917343974113464, "learning_rate": 4.8402995288430626e-05, "loss": 0.2389, "step": 11378 }, { "epoch": 0.20295722898013055, "grad_norm": 0.30133911967277527, "learning_rate": 4.840244784854503e-05, "loss": 0.2173, "step": 11379 }, { "epoch": 0.20297506510184427, "grad_norm": 0.2544538378715515, "learning_rate": 4.840190031794346e-05, "loss": 0.1813, "step": 11380 }, { "epoch": 0.20299290122355795, "grad_norm": 0.3277254104614258, "learning_rate": 4.840135269662805e-05, "loss": 0.2397, "step": 11381 }, { "epoch": 0.20301073734527164, "grad_norm": 0.26831668615341187, "learning_rate": 4.8400804984600913e-05, "loss": 0.2141, "step": 11382 }, { "epoch": 0.20302857346698533, "grad_norm": 0.30280405282974243, "learning_rate": 4.8400257181864175e-05, "loss": 0.2337, "step": 11383 }, { "epoch": 0.20304640958869904, "grad_norm": 0.24623818695545197, "learning_rate": 4.8399709288419944e-05, "loss": 0.1875, "step": 11384 }, { "epoch": 0.20306424571041273, "grad_norm": 0.34140992164611816, "learning_rate": 4.839916130427037e-05, "loss": 0.1983, "step": 11385 }, { "epoch": 0.20308208183212642, "grad_norm": 0.26018744707107544, "learning_rate": 4.839861322941757e-05, "loss": 0.1615, "step": 11386 }, { "epoch": 0.2030999179538401, "grad_norm": 0.22534151375293732, "learning_rate": 4.839806506386365e-05, "loss": 0.1914, "step": 11387 }, { "epoch": 0.20311775407555382, "grad_norm": 0.2761533558368683, "learning_rate": 4.8397516807610756e-05, "loss": 0.2203, "step": 11388 }, { "epoch": 0.2031355901972675, "grad_norm": 0.2777502238750458, "learning_rate": 4.8396968460661006e-05, "loss": 0.2011, "step": 11389 }, { "epoch": 0.2031534263189812, "grad_norm": 0.28777870535850525, "learning_rate": 4.839642002301652e-05, "loss": 0.204, "step": 11390 }, { "epoch": 0.2031712624406949, "grad_norm": 0.38688167929649353, "learning_rate": 4.8395871494679434e-05, "loss": 0.3018, "step": 11391 }, { "epoch": 0.20318909856240858, "grad_norm": 0.22930848598480225, "learning_rate": 4.8395322875651874e-05, "loss": 0.2044, "step": 11392 }, { "epoch": 0.2032069346841223, "grad_norm": 0.3254350423812866, "learning_rate": 4.839477416593595e-05, "loss": 0.216, "step": 11393 }, { "epoch": 0.20322477080583598, "grad_norm": 0.2998526096343994, "learning_rate": 4.839422536553381e-05, "loss": 0.1831, "step": 11394 }, { "epoch": 0.20324260692754967, "grad_norm": 0.511393129825592, "learning_rate": 4.839367647444757e-05, "loss": 0.2312, "step": 11395 }, { "epoch": 0.20326044304926336, "grad_norm": 0.23622164130210876, "learning_rate": 4.839312749267936e-05, "loss": 0.2112, "step": 11396 }, { "epoch": 0.20327827917097707, "grad_norm": 0.4600674510002136, "learning_rate": 4.83925784202313e-05, "loss": 0.1679, "step": 11397 }, { "epoch": 0.20329611529269076, "grad_norm": 0.257719486951828, "learning_rate": 4.8392029257105534e-05, "loss": 0.238, "step": 11398 }, { "epoch": 0.20331395141440445, "grad_norm": 0.2990468740463257, "learning_rate": 4.839148000330419e-05, "loss": 0.2321, "step": 11399 }, { "epoch": 0.20333178753611814, "grad_norm": 0.2650798261165619, "learning_rate": 4.839093065882938e-05, "loss": 0.2051, "step": 11400 }, { "epoch": 0.20334962365783185, "grad_norm": 0.2789836823940277, "learning_rate": 4.8390381223683246e-05, "loss": 0.2093, "step": 11401 }, { "epoch": 0.20336745977954554, "grad_norm": 0.21605423092842102, "learning_rate": 4.838983169786792e-05, "loss": 0.1904, "step": 11402 }, { "epoch": 0.20338529590125923, "grad_norm": 0.2877443730831146, "learning_rate": 4.8389282081385526e-05, "loss": 0.2026, "step": 11403 }, { "epoch": 0.20340313202297292, "grad_norm": 0.24442289769649506, "learning_rate": 4.838873237423819e-05, "loss": 0.1872, "step": 11404 }, { "epoch": 0.20342096814468663, "grad_norm": 0.3187635838985443, "learning_rate": 4.838818257642806e-05, "loss": 0.2663, "step": 11405 }, { "epoch": 0.20343880426640032, "grad_norm": 0.32398679852485657, "learning_rate": 4.838763268795725e-05, "loss": 0.2051, "step": 11406 }, { "epoch": 0.203456640388114, "grad_norm": 0.264431893825531, "learning_rate": 4.8387082708827894e-05, "loss": 0.2238, "step": 11407 }, { "epoch": 0.2034744765098277, "grad_norm": 0.261491984128952, "learning_rate": 4.838653263904214e-05, "loss": 0.1814, "step": 11408 }, { "epoch": 0.2034923126315414, "grad_norm": 0.2942681908607483, "learning_rate": 4.83859824786021e-05, "loss": 0.2062, "step": 11409 }, { "epoch": 0.2035101487532551, "grad_norm": 0.5667554140090942, "learning_rate": 4.8385432227509906e-05, "loss": 0.2474, "step": 11410 }, { "epoch": 0.2035279848749688, "grad_norm": 0.27233177423477173, "learning_rate": 4.8384881885767716e-05, "loss": 0.1866, "step": 11411 }, { "epoch": 0.20354582099668247, "grad_norm": 0.24171605706214905, "learning_rate": 4.838433145337764e-05, "loss": 0.2231, "step": 11412 }, { "epoch": 0.20356365711839616, "grad_norm": 0.19546818733215332, "learning_rate": 4.838378093034182e-05, "loss": 0.1653, "step": 11413 }, { "epoch": 0.20358149324010988, "grad_norm": 0.21581801772117615, "learning_rate": 4.838323031666238e-05, "loss": 0.1527, "step": 11414 }, { "epoch": 0.20359932936182357, "grad_norm": 0.26809418201446533, "learning_rate": 4.838267961234147e-05, "loss": 0.1789, "step": 11415 }, { "epoch": 0.20361716548353725, "grad_norm": 0.4005754292011261, "learning_rate": 4.838212881738122e-05, "loss": 0.2106, "step": 11416 }, { "epoch": 0.20363500160525094, "grad_norm": 0.2776334583759308, "learning_rate": 4.838157793178376e-05, "loss": 0.1961, "step": 11417 }, { "epoch": 0.20365283772696466, "grad_norm": 0.24265480041503906, "learning_rate": 4.838102695555123e-05, "loss": 0.1628, "step": 11418 }, { "epoch": 0.20367067384867835, "grad_norm": 0.2787797152996063, "learning_rate": 4.838047588868576e-05, "loss": 0.2372, "step": 11419 }, { "epoch": 0.20368850997039203, "grad_norm": 0.3233875632286072, "learning_rate": 4.8379924731189496e-05, "loss": 0.2295, "step": 11420 }, { "epoch": 0.20370634609210572, "grad_norm": 0.2317197322845459, "learning_rate": 4.837937348306456e-05, "loss": 0.1612, "step": 11421 }, { "epoch": 0.20372418221381944, "grad_norm": 0.21986952424049377, "learning_rate": 4.837882214431311e-05, "loss": 0.1661, "step": 11422 }, { "epoch": 0.20374201833553313, "grad_norm": 0.33218953013420105, "learning_rate": 4.837827071493726e-05, "loss": 0.2106, "step": 11423 }, { "epoch": 0.20375985445724681, "grad_norm": 0.2304145246744156, "learning_rate": 4.837771919493916e-05, "loss": 0.1509, "step": 11424 }, { "epoch": 0.2037776905789605, "grad_norm": 0.3058910667896271, "learning_rate": 4.837716758432095e-05, "loss": 0.1587, "step": 11425 }, { "epoch": 0.20379552670067422, "grad_norm": 0.20500940084457397, "learning_rate": 4.837661588308476e-05, "loss": 0.1558, "step": 11426 }, { "epoch": 0.2038133628223879, "grad_norm": 0.3538636565208435, "learning_rate": 4.8376064091232734e-05, "loss": 0.1576, "step": 11427 }, { "epoch": 0.2038311989441016, "grad_norm": 0.26476800441741943, "learning_rate": 4.8375512208767e-05, "loss": 0.1957, "step": 11428 }, { "epoch": 0.20384903506581528, "grad_norm": 0.36764729022979736, "learning_rate": 4.8374960235689724e-05, "loss": 0.1987, "step": 11429 }, { "epoch": 0.203866871187529, "grad_norm": 0.3004645109176636, "learning_rate": 4.837440817200302e-05, "loss": 0.2302, "step": 11430 }, { "epoch": 0.20388470730924269, "grad_norm": 0.21339671313762665, "learning_rate": 4.837385601770904e-05, "loss": 0.212, "step": 11431 }, { "epoch": 0.20390254343095637, "grad_norm": 0.26658669114112854, "learning_rate": 4.837330377280992e-05, "loss": 0.1547, "step": 11432 }, { "epoch": 0.20392037955267006, "grad_norm": 0.2382674217224121, "learning_rate": 4.83727514373078e-05, "loss": 0.1728, "step": 11433 }, { "epoch": 0.20393821567438375, "grad_norm": 0.2619294822216034, "learning_rate": 4.8372199011204824e-05, "loss": 0.2034, "step": 11434 }, { "epoch": 0.20395605179609747, "grad_norm": 0.2552444040775299, "learning_rate": 4.837164649450313e-05, "loss": 0.2182, "step": 11435 }, { "epoch": 0.20397388791781115, "grad_norm": 0.38829824328422546, "learning_rate": 4.837109388720486e-05, "loss": 0.2658, "step": 11436 }, { "epoch": 0.20399172403952484, "grad_norm": 0.2558879256248474, "learning_rate": 4.837054118931217e-05, "loss": 0.2176, "step": 11437 }, { "epoch": 0.20400956016123853, "grad_norm": 0.262234091758728, "learning_rate": 4.8369988400827185e-05, "loss": 0.1894, "step": 11438 }, { "epoch": 0.20402739628295224, "grad_norm": 0.294992059469223, "learning_rate": 4.836943552175204e-05, "loss": 0.212, "step": 11439 }, { "epoch": 0.20404523240466593, "grad_norm": 0.339077889919281, "learning_rate": 4.836888255208891e-05, "loss": 0.1808, "step": 11440 }, { "epoch": 0.20406306852637962, "grad_norm": 0.28222477436065674, "learning_rate": 4.836832949183991e-05, "loss": 0.2125, "step": 11441 }, { "epoch": 0.2040809046480933, "grad_norm": 0.32367995381355286, "learning_rate": 4.836777634100719e-05, "loss": 0.2416, "step": 11442 }, { "epoch": 0.20409874076980702, "grad_norm": 0.2875728905200958, "learning_rate": 4.8367223099592904e-05, "loss": 0.1786, "step": 11443 }, { "epoch": 0.2041165768915207, "grad_norm": 0.24228376150131226, "learning_rate": 4.8366669767599194e-05, "loss": 0.1962, "step": 11444 }, { "epoch": 0.2041344130132344, "grad_norm": 0.2372943013906479, "learning_rate": 4.836611634502819e-05, "loss": 0.1839, "step": 11445 }, { "epoch": 0.2041522491349481, "grad_norm": 0.2930713891983032, "learning_rate": 4.836556283188206e-05, "loss": 0.1448, "step": 11446 }, { "epoch": 0.2041700852566618, "grad_norm": 0.3082902729511261, "learning_rate": 4.836500922816294e-05, "loss": 0.1671, "step": 11447 }, { "epoch": 0.2041879213783755, "grad_norm": 0.29287946224212646, "learning_rate": 4.8364455533872965e-05, "loss": 0.213, "step": 11448 }, { "epoch": 0.20420575750008918, "grad_norm": 0.3938713073730469, "learning_rate": 4.83639017490143e-05, "loss": 0.2069, "step": 11449 }, { "epoch": 0.20422359362180287, "grad_norm": 0.25268763303756714, "learning_rate": 4.836334787358907e-05, "loss": 0.1887, "step": 11450 }, { "epoch": 0.20424142974351656, "grad_norm": 0.3099231719970703, "learning_rate": 4.836279390759944e-05, "loss": 0.1988, "step": 11451 }, { "epoch": 0.20425926586523027, "grad_norm": 0.2578817903995514, "learning_rate": 4.8362239851047554e-05, "loss": 0.1446, "step": 11452 }, { "epoch": 0.20427710198694396, "grad_norm": 0.33942461013793945, "learning_rate": 4.8361685703935554e-05, "loss": 0.2132, "step": 11453 }, { "epoch": 0.20429493810865765, "grad_norm": 0.3227539658546448, "learning_rate": 4.8361131466265595e-05, "loss": 0.1603, "step": 11454 }, { "epoch": 0.20431277423037134, "grad_norm": 0.2488083392381668, "learning_rate": 4.836057713803982e-05, "loss": 0.2075, "step": 11455 }, { "epoch": 0.20433061035208505, "grad_norm": 0.2665003836154938, "learning_rate": 4.836002271926037e-05, "loss": 0.2125, "step": 11456 }, { "epoch": 0.20434844647379874, "grad_norm": 0.2565172016620636, "learning_rate": 4.8359468209929414e-05, "loss": 0.1748, "step": 11457 }, { "epoch": 0.20436628259551243, "grad_norm": 0.2896265983581543, "learning_rate": 4.83589136100491e-05, "loss": 0.2044, "step": 11458 }, { "epoch": 0.20438411871722612, "grad_norm": 0.3849414587020874, "learning_rate": 4.835835891962155e-05, "loss": 0.2037, "step": 11459 }, { "epoch": 0.20440195483893983, "grad_norm": 0.30482566356658936, "learning_rate": 4.8357804138648943e-05, "loss": 0.272, "step": 11460 }, { "epoch": 0.20441979096065352, "grad_norm": 0.4008617699146271, "learning_rate": 4.835724926713342e-05, "loss": 0.3139, "step": 11461 }, { "epoch": 0.2044376270823672, "grad_norm": 0.3116397559642792, "learning_rate": 4.8356694305077125e-05, "loss": 0.2907, "step": 11462 }, { "epoch": 0.2044554632040809, "grad_norm": 0.23187680542469025, "learning_rate": 4.835613925248222e-05, "loss": 0.1781, "step": 11463 }, { "epoch": 0.2044732993257946, "grad_norm": 0.2739063501358032, "learning_rate": 4.8355584109350854e-05, "loss": 0.2504, "step": 11464 }, { "epoch": 0.2044911354475083, "grad_norm": 0.2777095437049866, "learning_rate": 4.8355028875685175e-05, "loss": 0.2163, "step": 11465 }, { "epoch": 0.204508971569222, "grad_norm": 0.20674772560596466, "learning_rate": 4.835447355148734e-05, "loss": 0.1669, "step": 11466 }, { "epoch": 0.20452680769093567, "grad_norm": 0.2952086329460144, "learning_rate": 4.8353918136759494e-05, "loss": 0.2259, "step": 11467 }, { "epoch": 0.2045446438126494, "grad_norm": 0.34441298246383667, "learning_rate": 4.83533626315038e-05, "loss": 0.1494, "step": 11468 }, { "epoch": 0.20456247993436308, "grad_norm": 0.2142394334077835, "learning_rate": 4.83528070357224e-05, "loss": 0.1515, "step": 11469 }, { "epoch": 0.20458031605607677, "grad_norm": 0.3051239848136902, "learning_rate": 4.835225134941746e-05, "loss": 0.2263, "step": 11470 }, { "epoch": 0.20459815217779045, "grad_norm": 0.3721875846385956, "learning_rate": 4.8351695572591134e-05, "loss": 0.2559, "step": 11471 }, { "epoch": 0.20461598829950414, "grad_norm": 0.28584057092666626, "learning_rate": 4.835113970524556e-05, "loss": 0.2043, "step": 11472 }, { "epoch": 0.20463382442121786, "grad_norm": 0.2517910599708557, "learning_rate": 4.8350583747382914e-05, "loss": 0.1781, "step": 11473 }, { "epoch": 0.20465166054293155, "grad_norm": 0.32056596875190735, "learning_rate": 4.835002769900533e-05, "loss": 0.2394, "step": 11474 }, { "epoch": 0.20466949666464523, "grad_norm": 0.3380945920944214, "learning_rate": 4.834947156011498e-05, "loss": 0.2438, "step": 11475 }, { "epoch": 0.20468733278635892, "grad_norm": 0.25351786613464355, "learning_rate": 4.834891533071401e-05, "loss": 0.2389, "step": 11476 }, { "epoch": 0.20470516890807264, "grad_norm": 0.2545580565929413, "learning_rate": 4.834835901080458e-05, "loss": 0.2039, "step": 11477 }, { "epoch": 0.20472300502978633, "grad_norm": 0.2410781979560852, "learning_rate": 4.834780260038885e-05, "loss": 0.2047, "step": 11478 }, { "epoch": 0.2047408411515, "grad_norm": 0.25942933559417725, "learning_rate": 4.8347246099468966e-05, "loss": 0.2017, "step": 11479 }, { "epoch": 0.2047586772732137, "grad_norm": 0.24382233619689941, "learning_rate": 4.8346689508047095e-05, "loss": 0.1566, "step": 11480 }, { "epoch": 0.20477651339492742, "grad_norm": 0.2222810685634613, "learning_rate": 4.83461328261254e-05, "loss": 0.1633, "step": 11481 }, { "epoch": 0.2047943495166411, "grad_norm": 0.30445748567581177, "learning_rate": 4.8345576053706023e-05, "loss": 0.2015, "step": 11482 }, { "epoch": 0.2048121856383548, "grad_norm": 0.24044805765151978, "learning_rate": 4.834501919079113e-05, "loss": 0.2317, "step": 11483 }, { "epoch": 0.20483002176006848, "grad_norm": 0.409867525100708, "learning_rate": 4.8344462237382874e-05, "loss": 0.2042, "step": 11484 }, { "epoch": 0.2048478578817822, "grad_norm": 0.242264062166214, "learning_rate": 4.834390519348343e-05, "loss": 0.1738, "step": 11485 }, { "epoch": 0.20486569400349589, "grad_norm": 0.35216033458709717, "learning_rate": 4.834334805909494e-05, "loss": 0.2183, "step": 11486 }, { "epoch": 0.20488353012520957, "grad_norm": 0.2909255027770996, "learning_rate": 4.8342790834219575e-05, "loss": 0.196, "step": 11487 }, { "epoch": 0.20490136624692326, "grad_norm": 0.2488517016172409, "learning_rate": 4.834223351885949e-05, "loss": 0.1587, "step": 11488 }, { "epoch": 0.20491920236863698, "grad_norm": 0.36704373359680176, "learning_rate": 4.834167611301684e-05, "loss": 0.2038, "step": 11489 }, { "epoch": 0.20493703849035066, "grad_norm": 0.32630297541618347, "learning_rate": 4.8341118616693796e-05, "loss": 0.1769, "step": 11490 }, { "epoch": 0.20495487461206435, "grad_norm": 0.31875577569007874, "learning_rate": 4.8340561029892514e-05, "loss": 0.2206, "step": 11491 }, { "epoch": 0.20497271073377804, "grad_norm": 0.26939448714256287, "learning_rate": 4.834000335261516e-05, "loss": 0.1627, "step": 11492 }, { "epoch": 0.20499054685549173, "grad_norm": 0.22917385399341583, "learning_rate": 4.8339445584863887e-05, "loss": 0.1589, "step": 11493 }, { "epoch": 0.20500838297720544, "grad_norm": 0.5053426623344421, "learning_rate": 4.833888772664086e-05, "loss": 0.2124, "step": 11494 }, { "epoch": 0.20502621909891913, "grad_norm": 0.29969319701194763, "learning_rate": 4.8338329777948246e-05, "loss": 0.2134, "step": 11495 }, { "epoch": 0.20504405522063282, "grad_norm": 0.27430886030197144, "learning_rate": 4.83377717387882e-05, "loss": 0.2095, "step": 11496 }, { "epoch": 0.2050618913423465, "grad_norm": 0.2871789336204529, "learning_rate": 4.833721360916289e-05, "loss": 0.1714, "step": 11497 }, { "epoch": 0.20507972746406022, "grad_norm": 0.2735663056373596, "learning_rate": 4.8336655389074485e-05, "loss": 0.2492, "step": 11498 }, { "epoch": 0.2050975635857739, "grad_norm": 0.3597601056098938, "learning_rate": 4.833609707852514e-05, "loss": 0.1995, "step": 11499 }, { "epoch": 0.2051153997074876, "grad_norm": 0.31295526027679443, "learning_rate": 4.833553867751703e-05, "loss": 0.16, "step": 11500 }, { "epoch": 0.2051332358292013, "grad_norm": 0.2865414619445801, "learning_rate": 4.83349801860523e-05, "loss": 0.1707, "step": 11501 }, { "epoch": 0.205151071950915, "grad_norm": 0.3860272467136383, "learning_rate": 4.833442160413314e-05, "loss": 0.3146, "step": 11502 }, { "epoch": 0.2051689080726287, "grad_norm": 0.2656128406524658, "learning_rate": 4.83338629317617e-05, "loss": 0.2305, "step": 11503 }, { "epoch": 0.20518674419434238, "grad_norm": 0.33289384841918945, "learning_rate": 4.833330416894014e-05, "loss": 0.2213, "step": 11504 }, { "epoch": 0.20520458031605607, "grad_norm": 0.26756981015205383, "learning_rate": 4.833274531567064e-05, "loss": 0.2241, "step": 11505 }, { "epoch": 0.20522241643776978, "grad_norm": 0.25366178154945374, "learning_rate": 4.833218637195536e-05, "loss": 0.2162, "step": 11506 }, { "epoch": 0.20524025255948347, "grad_norm": 0.2395322471857071, "learning_rate": 4.833162733779647e-05, "loss": 0.2169, "step": 11507 }, { "epoch": 0.20525808868119716, "grad_norm": 0.27153098583221436, "learning_rate": 4.833106821319613e-05, "loss": 0.1839, "step": 11508 }, { "epoch": 0.20527592480291085, "grad_norm": 0.2954888343811035, "learning_rate": 4.833050899815651e-05, "loss": 0.212, "step": 11509 }, { "epoch": 0.20529376092462456, "grad_norm": 0.18858271837234497, "learning_rate": 4.832994969267978e-05, "loss": 0.1443, "step": 11510 }, { "epoch": 0.20531159704633825, "grad_norm": 0.37491849064826965, "learning_rate": 4.832939029676811e-05, "loss": 0.2055, "step": 11511 }, { "epoch": 0.20532943316805194, "grad_norm": 0.22919854521751404, "learning_rate": 4.832883081042366e-05, "loss": 0.196, "step": 11512 }, { "epoch": 0.20534726928976563, "grad_norm": 0.2858999967575073, "learning_rate": 4.83282712336486e-05, "loss": 0.1831, "step": 11513 }, { "epoch": 0.20536510541147932, "grad_norm": 0.24436570703983307, "learning_rate": 4.8327711566445116e-05, "loss": 0.2245, "step": 11514 }, { "epoch": 0.20538294153319303, "grad_norm": 0.32226887345314026, "learning_rate": 4.832715180881536e-05, "loss": 0.1983, "step": 11515 }, { "epoch": 0.20540077765490672, "grad_norm": 0.2781701982021332, "learning_rate": 4.832659196076151e-05, "loss": 0.2024, "step": 11516 }, { "epoch": 0.2054186137766204, "grad_norm": 0.27700164914131165, "learning_rate": 4.832603202228573e-05, "loss": 0.197, "step": 11517 }, { "epoch": 0.2054364498983341, "grad_norm": 0.21660085022449493, "learning_rate": 4.8325471993390195e-05, "loss": 0.184, "step": 11518 }, { "epoch": 0.2054542860200478, "grad_norm": 0.2504339814186096, "learning_rate": 4.832491187407706e-05, "loss": 0.1552, "step": 11519 }, { "epoch": 0.2054721221417615, "grad_norm": 0.31692907214164734, "learning_rate": 4.832435166434853e-05, "loss": 0.1871, "step": 11520 }, { "epoch": 0.2054899582634752, "grad_norm": 0.24089834094047546, "learning_rate": 4.832379136420675e-05, "loss": 0.1874, "step": 11521 }, { "epoch": 0.20550779438518887, "grad_norm": 0.2773512303829193, "learning_rate": 4.832323097365389e-05, "loss": 0.185, "step": 11522 }, { "epoch": 0.2055256305069026, "grad_norm": 0.3746817409992218, "learning_rate": 4.8322670492692145e-05, "loss": 0.2556, "step": 11523 }, { "epoch": 0.20554346662861628, "grad_norm": 0.25191113352775574, "learning_rate": 4.832210992132367e-05, "loss": 0.2098, "step": 11524 }, { "epoch": 0.20556130275032997, "grad_norm": 0.3493598997592926, "learning_rate": 4.8321549259550636e-05, "loss": 0.1926, "step": 11525 }, { "epoch": 0.20557913887204365, "grad_norm": 0.24469542503356934, "learning_rate": 4.832098850737522e-05, "loss": 0.1969, "step": 11526 }, { "epoch": 0.20559697499375737, "grad_norm": 0.2523654103279114, "learning_rate": 4.832042766479961e-05, "loss": 0.1761, "step": 11527 }, { "epoch": 0.20561481111547106, "grad_norm": 0.2907570004463196, "learning_rate": 4.8319866731825955e-05, "loss": 0.2217, "step": 11528 }, { "epoch": 0.20563264723718475, "grad_norm": 0.23885464668273926, "learning_rate": 4.831930570845645e-05, "loss": 0.1395, "step": 11529 }, { "epoch": 0.20565048335889843, "grad_norm": 0.2950996160507202, "learning_rate": 4.831874459469326e-05, "loss": 0.1898, "step": 11530 }, { "epoch": 0.20566831948061215, "grad_norm": 0.24162188172340393, "learning_rate": 4.831818339053856e-05, "loss": 0.1902, "step": 11531 }, { "epoch": 0.20568615560232584, "grad_norm": 0.2928565442562103, "learning_rate": 4.831762209599453e-05, "loss": 0.2269, "step": 11532 }, { "epoch": 0.20570399172403953, "grad_norm": 0.23402349650859833, "learning_rate": 4.8317060711063345e-05, "loss": 0.1853, "step": 11533 }, { "epoch": 0.2057218278457532, "grad_norm": 0.28118249773979187, "learning_rate": 4.831649923574717e-05, "loss": 0.2051, "step": 11534 }, { "epoch": 0.2057396639674669, "grad_norm": 0.35088911652565, "learning_rate": 4.831593767004821e-05, "loss": 0.1741, "step": 11535 }, { "epoch": 0.20575750008918062, "grad_norm": 0.4024946689605713, "learning_rate": 4.8315376013968606e-05, "loss": 0.2107, "step": 11536 }, { "epoch": 0.2057753362108943, "grad_norm": 0.3792724311351776, "learning_rate": 4.8314814267510554e-05, "loss": 0.2424, "step": 11537 }, { "epoch": 0.205793172332608, "grad_norm": 0.3693736493587494, "learning_rate": 4.8314252430676234e-05, "loss": 0.2483, "step": 11538 }, { "epoch": 0.20581100845432168, "grad_norm": 0.240234836935997, "learning_rate": 4.831369050346781e-05, "loss": 0.1542, "step": 11539 }, { "epoch": 0.2058288445760354, "grad_norm": 0.24847690761089325, "learning_rate": 4.8313128485887474e-05, "loss": 0.2104, "step": 11540 }, { "epoch": 0.20584668069774908, "grad_norm": 0.2729666531085968, "learning_rate": 4.83125663779374e-05, "loss": 0.2241, "step": 11541 }, { "epoch": 0.20586451681946277, "grad_norm": 0.4091082811355591, "learning_rate": 4.8312004179619766e-05, "loss": 0.224, "step": 11542 }, { "epoch": 0.20588235294117646, "grad_norm": 0.3012774586677551, "learning_rate": 4.831144189093676e-05, "loss": 0.2253, "step": 11543 }, { "epoch": 0.20590018906289018, "grad_norm": 0.25534123182296753, "learning_rate": 4.8310879511890546e-05, "loss": 0.2236, "step": 11544 }, { "epoch": 0.20591802518460386, "grad_norm": 0.21891318261623383, "learning_rate": 4.8310317042483314e-05, "loss": 0.1361, "step": 11545 }, { "epoch": 0.20593586130631755, "grad_norm": 0.34830811619758606, "learning_rate": 4.830975448271724e-05, "loss": 0.2053, "step": 11546 }, { "epoch": 0.20595369742803124, "grad_norm": 0.3653646409511566, "learning_rate": 4.830919183259451e-05, "loss": 0.1926, "step": 11547 }, { "epoch": 0.20597153354974496, "grad_norm": 0.24574635922908783, "learning_rate": 4.83086290921173e-05, "loss": 0.1726, "step": 11548 }, { "epoch": 0.20598936967145864, "grad_norm": 0.5150383710861206, "learning_rate": 4.830806626128779e-05, "loss": 0.1787, "step": 11549 }, { "epoch": 0.20600720579317233, "grad_norm": 0.2258683145046234, "learning_rate": 4.830750334010817e-05, "loss": 0.1716, "step": 11550 }, { "epoch": 0.20602504191488602, "grad_norm": 0.3205817639827728, "learning_rate": 4.8306940328580614e-05, "loss": 0.2262, "step": 11551 }, { "epoch": 0.2060428780365997, "grad_norm": 0.30511584877967834, "learning_rate": 4.8306377226707304e-05, "loss": 0.1959, "step": 11552 }, { "epoch": 0.20606071415831342, "grad_norm": 0.4284152388572693, "learning_rate": 4.830581403449043e-05, "loss": 0.239, "step": 11553 }, { "epoch": 0.2060785502800271, "grad_norm": 0.40363168716430664, "learning_rate": 4.830525075193218e-05, "loss": 0.1761, "step": 11554 }, { "epoch": 0.2060963864017408, "grad_norm": 0.3364522159099579, "learning_rate": 4.830468737903471e-05, "loss": 0.2448, "step": 11555 }, { "epoch": 0.2061142225234545, "grad_norm": 0.3168528974056244, "learning_rate": 4.830412391580024e-05, "loss": 0.1427, "step": 11556 }, { "epoch": 0.2061320586451682, "grad_norm": 0.2818145453929901, "learning_rate": 4.830356036223093e-05, "loss": 0.2116, "step": 11557 }, { "epoch": 0.2061498947668819, "grad_norm": 0.2617489695549011, "learning_rate": 4.8302996718328965e-05, "loss": 0.2497, "step": 11558 }, { "epoch": 0.20616773088859558, "grad_norm": 0.2629631459712982, "learning_rate": 4.830243298409655e-05, "loss": 0.2034, "step": 11559 }, { "epoch": 0.20618556701030927, "grad_norm": 0.24528156220912933, "learning_rate": 4.830186915953584e-05, "loss": 0.2211, "step": 11560 }, { "epoch": 0.20620340313202298, "grad_norm": 0.2015800029039383, "learning_rate": 4.830130524464904e-05, "loss": 0.1736, "step": 11561 }, { "epoch": 0.20622123925373667, "grad_norm": 0.3551153242588043, "learning_rate": 4.830074123943834e-05, "loss": 0.2888, "step": 11562 }, { "epoch": 0.20623907537545036, "grad_norm": 0.3136424124240875, "learning_rate": 4.830017714390592e-05, "loss": 0.1885, "step": 11563 }, { "epoch": 0.20625691149716405, "grad_norm": 0.32659777998924255, "learning_rate": 4.829961295805396e-05, "loss": 0.2399, "step": 11564 }, { "epoch": 0.20627474761887776, "grad_norm": 0.2697278559207916, "learning_rate": 4.8299048681884654e-05, "loss": 0.2096, "step": 11565 }, { "epoch": 0.20629258374059145, "grad_norm": 0.3694291114807129, "learning_rate": 4.8298484315400194e-05, "loss": 0.2328, "step": 11566 }, { "epoch": 0.20631041986230514, "grad_norm": 0.23069000244140625, "learning_rate": 4.8297919858602756e-05, "loss": 0.1868, "step": 11567 }, { "epoch": 0.20632825598401883, "grad_norm": 0.28382018208503723, "learning_rate": 4.8297355311494536e-05, "loss": 0.2109, "step": 11568 }, { "epoch": 0.20634609210573254, "grad_norm": 0.28953275084495544, "learning_rate": 4.829679067407772e-05, "loss": 0.1856, "step": 11569 }, { "epoch": 0.20636392822744623, "grad_norm": 0.26258930563926697, "learning_rate": 4.8296225946354494e-05, "loss": 0.2311, "step": 11570 }, { "epoch": 0.20638176434915992, "grad_norm": 0.34757038950920105, "learning_rate": 4.829566112832705e-05, "loss": 0.1777, "step": 11571 }, { "epoch": 0.2063996004708736, "grad_norm": 0.220563605427742, "learning_rate": 4.8295096219997584e-05, "loss": 0.1982, "step": 11572 }, { "epoch": 0.2064174365925873, "grad_norm": 0.33619746565818787, "learning_rate": 4.8294531221368274e-05, "loss": 0.173, "step": 11573 }, { "epoch": 0.206435272714301, "grad_norm": 0.3179114758968353, "learning_rate": 4.8293966132441315e-05, "loss": 0.1851, "step": 11574 }, { "epoch": 0.2064531088360147, "grad_norm": 0.2530944347381592, "learning_rate": 4.8293400953218896e-05, "loss": 0.2173, "step": 11575 }, { "epoch": 0.2064709449577284, "grad_norm": 0.260496586561203, "learning_rate": 4.829283568370321e-05, "loss": 0.1996, "step": 11576 }, { "epoch": 0.20648878107944207, "grad_norm": 0.2651001214981079, "learning_rate": 4.829227032389646e-05, "loss": 0.2357, "step": 11577 }, { "epoch": 0.2065066172011558, "grad_norm": 0.2797831594944, "learning_rate": 4.8291704873800816e-05, "loss": 0.2425, "step": 11578 }, { "epoch": 0.20652445332286948, "grad_norm": 0.2605598568916321, "learning_rate": 4.829113933341847e-05, "loss": 0.2215, "step": 11579 }, { "epoch": 0.20654228944458317, "grad_norm": 0.18506459891796112, "learning_rate": 4.829057370275163e-05, "loss": 0.1737, "step": 11580 }, { "epoch": 0.20656012556629685, "grad_norm": 0.33147522807121277, "learning_rate": 4.829000798180249e-05, "loss": 0.231, "step": 11581 }, { "epoch": 0.20657796168801057, "grad_norm": 0.23073697090148926, "learning_rate": 4.828944217057323e-05, "loss": 0.1762, "step": 11582 }, { "epoch": 0.20659579780972426, "grad_norm": 0.2841963768005371, "learning_rate": 4.828887626906604e-05, "loss": 0.1965, "step": 11583 }, { "epoch": 0.20661363393143795, "grad_norm": 0.23192636668682098, "learning_rate": 4.828831027728313e-05, "loss": 0.2083, "step": 11584 }, { "epoch": 0.20663147005315163, "grad_norm": 0.3724508285522461, "learning_rate": 4.828774419522669e-05, "loss": 0.2254, "step": 11585 }, { "epoch": 0.20664930617486535, "grad_norm": 0.3201790153980255, "learning_rate": 4.8287178022898906e-05, "loss": 0.2069, "step": 11586 }, { "epoch": 0.20666714229657904, "grad_norm": 0.20933395624160767, "learning_rate": 4.8286611760301974e-05, "loss": 0.1612, "step": 11587 }, { "epoch": 0.20668497841829273, "grad_norm": 0.30476272106170654, "learning_rate": 4.8286045407438096e-05, "loss": 0.2198, "step": 11588 }, { "epoch": 0.2067028145400064, "grad_norm": 0.2761203646659851, "learning_rate": 4.828547896430946e-05, "loss": 0.1914, "step": 11589 }, { "epoch": 0.20672065066172013, "grad_norm": 0.3802258372306824, "learning_rate": 4.828491243091827e-05, "loss": 0.2512, "step": 11590 }, { "epoch": 0.20673848678343382, "grad_norm": 0.22877644002437592, "learning_rate": 4.8284345807266706e-05, "loss": 0.1768, "step": 11591 }, { "epoch": 0.2067563229051475, "grad_norm": 0.2869945168495178, "learning_rate": 4.8283779093356986e-05, "loss": 0.215, "step": 11592 }, { "epoch": 0.2067741590268612, "grad_norm": 0.3268187642097473, "learning_rate": 4.828321228919129e-05, "loss": 0.2584, "step": 11593 }, { "epoch": 0.20679199514857488, "grad_norm": 0.3540118634700775, "learning_rate": 4.828264539477183e-05, "loss": 0.1475, "step": 11594 }, { "epoch": 0.2068098312702886, "grad_norm": 0.3449714481830597, "learning_rate": 4.828207841010078e-05, "loss": 0.2457, "step": 11595 }, { "epoch": 0.20682766739200228, "grad_norm": 0.2848774194717407, "learning_rate": 4.828151133518037e-05, "loss": 0.1825, "step": 11596 }, { "epoch": 0.20684550351371597, "grad_norm": 0.3935084640979767, "learning_rate": 4.828094417001277e-05, "loss": 0.1661, "step": 11597 }, { "epoch": 0.20686333963542966, "grad_norm": 0.2558848261833191, "learning_rate": 4.82803769146002e-05, "loss": 0.1652, "step": 11598 }, { "epoch": 0.20688117575714338, "grad_norm": 0.38729530572891235, "learning_rate": 4.827980956894484e-05, "loss": 0.2168, "step": 11599 }, { "epoch": 0.20689901187885706, "grad_norm": 0.22212867438793182, "learning_rate": 4.82792421330489e-05, "loss": 0.1866, "step": 11600 }, { "epoch": 0.20691684800057075, "grad_norm": 0.3084488809108734, "learning_rate": 4.827867460691456e-05, "loss": 0.2304, "step": 11601 }, { "epoch": 0.20693468412228444, "grad_norm": 0.2829030454158783, "learning_rate": 4.8278106990544056e-05, "loss": 0.1894, "step": 11602 }, { "epoch": 0.20695252024399816, "grad_norm": 0.32071101665496826, "learning_rate": 4.8277539283939566e-05, "loss": 0.1733, "step": 11603 }, { "epoch": 0.20697035636571184, "grad_norm": 0.2623653709888458, "learning_rate": 4.82769714871033e-05, "loss": 0.1994, "step": 11604 }, { "epoch": 0.20698819248742553, "grad_norm": 0.2787366509437561, "learning_rate": 4.827640360003745e-05, "loss": 0.2468, "step": 11605 }, { "epoch": 0.20700602860913922, "grad_norm": 0.20443102717399597, "learning_rate": 4.827583562274421e-05, "loss": 0.2222, "step": 11606 }, { "epoch": 0.20702386473085294, "grad_norm": 0.2780798673629761, "learning_rate": 4.82752675552258e-05, "loss": 0.2002, "step": 11607 }, { "epoch": 0.20704170085256662, "grad_norm": 0.2777286469936371, "learning_rate": 4.827469939748441e-05, "loss": 0.2184, "step": 11608 }, { "epoch": 0.2070595369742803, "grad_norm": 0.31433841586112976, "learning_rate": 4.827413114952225e-05, "loss": 0.1627, "step": 11609 }, { "epoch": 0.207077373095994, "grad_norm": 0.4145423471927643, "learning_rate": 4.827356281134152e-05, "loss": 0.1787, "step": 11610 }, { "epoch": 0.20709520921770772, "grad_norm": 0.36890047788619995, "learning_rate": 4.8272994382944416e-05, "loss": 0.2169, "step": 11611 }, { "epoch": 0.2071130453394214, "grad_norm": 0.23022206127643585, "learning_rate": 4.827242586433315e-05, "loss": 0.1788, "step": 11612 }, { "epoch": 0.2071308814611351, "grad_norm": 0.28333592414855957, "learning_rate": 4.8271857255509925e-05, "loss": 0.1839, "step": 11613 }, { "epoch": 0.20714871758284878, "grad_norm": 0.3183026611804962, "learning_rate": 4.827128855647694e-05, "loss": 0.226, "step": 11614 }, { "epoch": 0.20716655370456247, "grad_norm": 0.29933854937553406, "learning_rate": 4.827071976723641e-05, "loss": 0.226, "step": 11615 }, { "epoch": 0.20718438982627618, "grad_norm": 0.3120318651199341, "learning_rate": 4.8270150887790533e-05, "loss": 0.2724, "step": 11616 }, { "epoch": 0.20720222594798987, "grad_norm": 0.24469727277755737, "learning_rate": 4.82695819181415e-05, "loss": 0.1706, "step": 11617 }, { "epoch": 0.20722006206970356, "grad_norm": 0.2674146294593811, "learning_rate": 4.8269012858291554e-05, "loss": 0.193, "step": 11618 }, { "epoch": 0.20723789819141725, "grad_norm": 0.3089778423309326, "learning_rate": 4.826844370824286e-05, "loss": 0.1956, "step": 11619 }, { "epoch": 0.20725573431313096, "grad_norm": 0.2859371602535248, "learning_rate": 4.8267874467997644e-05, "loss": 0.2225, "step": 11620 }, { "epoch": 0.20727357043484465, "grad_norm": 0.28218528628349304, "learning_rate": 4.826730513755811e-05, "loss": 0.1991, "step": 11621 }, { "epoch": 0.20729140655655834, "grad_norm": 0.2642873525619507, "learning_rate": 4.826673571692646e-05, "loss": 0.1814, "step": 11622 }, { "epoch": 0.20730924267827203, "grad_norm": 0.3275371491909027, "learning_rate": 4.826616620610492e-05, "loss": 0.2512, "step": 11623 }, { "epoch": 0.20732707879998574, "grad_norm": 0.3450893759727478, "learning_rate": 4.8265596605095675e-05, "loss": 0.2632, "step": 11624 }, { "epoch": 0.20734491492169943, "grad_norm": 0.2679916024208069, "learning_rate": 4.8265026913900944e-05, "loss": 0.221, "step": 11625 }, { "epoch": 0.20736275104341312, "grad_norm": 0.24468261003494263, "learning_rate": 4.8264457132522934e-05, "loss": 0.1941, "step": 11626 }, { "epoch": 0.2073805871651268, "grad_norm": 0.328224241733551, "learning_rate": 4.8263887260963846e-05, "loss": 0.2185, "step": 11627 }, { "epoch": 0.20739842328684052, "grad_norm": 0.23001453280448914, "learning_rate": 4.826331729922591e-05, "loss": 0.1848, "step": 11628 }, { "epoch": 0.2074162594085542, "grad_norm": 0.3273157775402069, "learning_rate": 4.826274724731131e-05, "loss": 0.2393, "step": 11629 }, { "epoch": 0.2074340955302679, "grad_norm": 0.3130827248096466, "learning_rate": 4.8262177105222265e-05, "loss": 0.2294, "step": 11630 }, { "epoch": 0.20745193165198159, "grad_norm": 0.21297885477542877, "learning_rate": 4.8261606872960994e-05, "loss": 0.1678, "step": 11631 }, { "epoch": 0.20746976777369527, "grad_norm": 0.28783902525901794, "learning_rate": 4.826103655052969e-05, "loss": 0.2266, "step": 11632 }, { "epoch": 0.207487603895409, "grad_norm": 0.2523465156555176, "learning_rate": 4.826046613793059e-05, "loss": 0.2163, "step": 11633 }, { "epoch": 0.20750544001712268, "grad_norm": 0.2851579189300537, "learning_rate": 4.825989563516588e-05, "loss": 0.2245, "step": 11634 }, { "epoch": 0.20752327613883637, "grad_norm": 0.19099538028240204, "learning_rate": 4.825932504223778e-05, "loss": 0.1646, "step": 11635 }, { "epoch": 0.20754111226055005, "grad_norm": 0.32935628294944763, "learning_rate": 4.825875435914851e-05, "loss": 0.2004, "step": 11636 }, { "epoch": 0.20755894838226377, "grad_norm": 0.3129284083843231, "learning_rate": 4.8258183585900264e-05, "loss": 0.2332, "step": 11637 }, { "epoch": 0.20757678450397746, "grad_norm": 0.3103378117084503, "learning_rate": 4.825761272249527e-05, "loss": 0.265, "step": 11638 }, { "epoch": 0.20759462062569115, "grad_norm": 0.2517412602901459, "learning_rate": 4.825704176893574e-05, "loss": 0.2157, "step": 11639 }, { "epoch": 0.20761245674740483, "grad_norm": 0.3071826696395874, "learning_rate": 4.8256470725223876e-05, "loss": 0.174, "step": 11640 }, { "epoch": 0.20763029286911855, "grad_norm": 0.29822486639022827, "learning_rate": 4.825589959136191e-05, "loss": 0.2022, "step": 11641 }, { "epoch": 0.20764812899083224, "grad_norm": 0.37174782156944275, "learning_rate": 4.825532836735204e-05, "loss": 0.226, "step": 11642 }, { "epoch": 0.20766596511254592, "grad_norm": 0.26324722170829773, "learning_rate": 4.825475705319648e-05, "loss": 0.2499, "step": 11643 }, { "epoch": 0.2076838012342596, "grad_norm": 0.2306855171918869, "learning_rate": 4.8254185648897455e-05, "loss": 0.1702, "step": 11644 }, { "epoch": 0.20770163735597333, "grad_norm": 0.2013261318206787, "learning_rate": 4.825361415445717e-05, "loss": 0.1269, "step": 11645 }, { "epoch": 0.20771947347768702, "grad_norm": 0.31223180890083313, "learning_rate": 4.8253042569877846e-05, "loss": 0.1959, "step": 11646 }, { "epoch": 0.2077373095994007, "grad_norm": 0.22666801512241364, "learning_rate": 4.825247089516171e-05, "loss": 0.1778, "step": 11647 }, { "epoch": 0.2077551457211144, "grad_norm": 0.386925607919693, "learning_rate": 4.825189913031095e-05, "loss": 0.2875, "step": 11648 }, { "epoch": 0.2077729818428281, "grad_norm": 0.3336915075778961, "learning_rate": 4.82513272753278e-05, "loss": 0.248, "step": 11649 }, { "epoch": 0.2077908179645418, "grad_norm": 0.3365747332572937, "learning_rate": 4.825075533021448e-05, "loss": 0.1855, "step": 11650 }, { "epoch": 0.20780865408625548, "grad_norm": 0.3874429762363434, "learning_rate": 4.82501832949732e-05, "loss": 0.1836, "step": 11651 }, { "epoch": 0.20782649020796917, "grad_norm": 0.2855224311351776, "learning_rate": 4.824961116960618e-05, "loss": 0.2393, "step": 11652 }, { "epoch": 0.20784432632968286, "grad_norm": 0.2667388319969177, "learning_rate": 4.8249038954115634e-05, "loss": 0.1979, "step": 11653 }, { "epoch": 0.20786216245139658, "grad_norm": 0.35553085803985596, "learning_rate": 4.824846664850379e-05, "loss": 0.209, "step": 11654 }, { "epoch": 0.20787999857311026, "grad_norm": 0.25695550441741943, "learning_rate": 4.824789425277285e-05, "loss": 0.2067, "step": 11655 }, { "epoch": 0.20789783469482395, "grad_norm": 0.2398696094751358, "learning_rate": 4.824732176692505e-05, "loss": 0.2035, "step": 11656 }, { "epoch": 0.20791567081653764, "grad_norm": 0.2993643581867218, "learning_rate": 4.824674919096259e-05, "loss": 0.2223, "step": 11657 }, { "epoch": 0.20793350693825136, "grad_norm": 0.3369992673397064, "learning_rate": 4.8246176524887717e-05, "loss": 0.1716, "step": 11658 }, { "epoch": 0.20795134305996504, "grad_norm": 0.4276783764362335, "learning_rate": 4.8245603768702626e-05, "loss": 0.2464, "step": 11659 }, { "epoch": 0.20796917918167873, "grad_norm": 0.21072588860988617, "learning_rate": 4.824503092240955e-05, "loss": 0.2085, "step": 11660 }, { "epoch": 0.20798701530339242, "grad_norm": 0.379810631275177, "learning_rate": 4.82444579860107e-05, "loss": 0.2069, "step": 11661 }, { "epoch": 0.20800485142510614, "grad_norm": 0.19832254946231842, "learning_rate": 4.82438849595083e-05, "loss": 0.1803, "step": 11662 }, { "epoch": 0.20802268754681982, "grad_norm": 0.250905305147171, "learning_rate": 4.824331184290458e-05, "loss": 0.2263, "step": 11663 }, { "epoch": 0.2080405236685335, "grad_norm": 0.2300119251012802, "learning_rate": 4.824273863620176e-05, "loss": 0.1935, "step": 11664 }, { "epoch": 0.2080583597902472, "grad_norm": 0.2527783513069153, "learning_rate": 4.8242165339402044e-05, "loss": 0.2127, "step": 11665 }, { "epoch": 0.20807619591196092, "grad_norm": 0.21021036803722382, "learning_rate": 4.824159195250768e-05, "loss": 0.18, "step": 11666 }, { "epoch": 0.2080940320336746, "grad_norm": 0.3651825487613678, "learning_rate": 4.824101847552087e-05, "loss": 0.227, "step": 11667 }, { "epoch": 0.2081118681553883, "grad_norm": 0.2813571095466614, "learning_rate": 4.824044490844385e-05, "loss": 0.2167, "step": 11668 }, { "epoch": 0.20812970427710198, "grad_norm": 0.24911226332187653, "learning_rate": 4.8239871251278835e-05, "loss": 0.2084, "step": 11669 }, { "epoch": 0.2081475403988157, "grad_norm": 0.2716328799724579, "learning_rate": 4.8239297504028056e-05, "loss": 0.2106, "step": 11670 }, { "epoch": 0.20816537652052938, "grad_norm": 0.2395828366279602, "learning_rate": 4.823872366669373e-05, "loss": 0.1997, "step": 11671 }, { "epoch": 0.20818321264224307, "grad_norm": 0.23245181143283844, "learning_rate": 4.823814973927809e-05, "loss": 0.2127, "step": 11672 }, { "epoch": 0.20820104876395676, "grad_norm": 0.33587050437927246, "learning_rate": 4.823757572178334e-05, "loss": 0.2432, "step": 11673 }, { "epoch": 0.20821888488567045, "grad_norm": 0.2422908991575241, "learning_rate": 4.823700161421174e-05, "loss": 0.196, "step": 11674 }, { "epoch": 0.20823672100738416, "grad_norm": 0.2858142554759979, "learning_rate": 4.823642741656549e-05, "loss": 0.2166, "step": 11675 }, { "epoch": 0.20825455712909785, "grad_norm": 0.30445921421051025, "learning_rate": 4.823585312884682e-05, "loss": 0.1284, "step": 11676 }, { "epoch": 0.20827239325081154, "grad_norm": 0.2217496782541275, "learning_rate": 4.823527875105796e-05, "loss": 0.2001, "step": 11677 }, { "epoch": 0.20829022937252523, "grad_norm": 0.6687961220741272, "learning_rate": 4.823470428320113e-05, "loss": 0.2046, "step": 11678 }, { "epoch": 0.20830806549423894, "grad_norm": 0.2101033627986908, "learning_rate": 4.823412972527856e-05, "loss": 0.1793, "step": 11679 }, { "epoch": 0.20832590161595263, "grad_norm": 0.24649551510810852, "learning_rate": 4.8233555077292484e-05, "loss": 0.1993, "step": 11680 }, { "epoch": 0.20834373773766632, "grad_norm": 0.29888081550598145, "learning_rate": 4.823298033924512e-05, "loss": 0.1844, "step": 11681 }, { "epoch": 0.20836157385938, "grad_norm": 0.25010573863983154, "learning_rate": 4.82324055111387e-05, "loss": 0.2259, "step": 11682 }, { "epoch": 0.20837940998109372, "grad_norm": 0.3335828185081482, "learning_rate": 4.823183059297546e-05, "loss": 0.2108, "step": 11683 }, { "epoch": 0.2083972461028074, "grad_norm": 0.2499372661113739, "learning_rate": 4.823125558475761e-05, "loss": 0.1706, "step": 11684 }, { "epoch": 0.2084150822245211, "grad_norm": 0.2664951682090759, "learning_rate": 4.823068048648739e-05, "loss": 0.1762, "step": 11685 }, { "epoch": 0.20843291834623479, "grad_norm": 0.34128519892692566, "learning_rate": 4.8230105298167034e-05, "loss": 0.2474, "step": 11686 }, { "epoch": 0.2084507544679485, "grad_norm": 0.2763022184371948, "learning_rate": 4.822953001979876e-05, "loss": 0.1702, "step": 11687 }, { "epoch": 0.2084685905896622, "grad_norm": 0.23775845766067505, "learning_rate": 4.822895465138481e-05, "loss": 0.2059, "step": 11688 }, { "epoch": 0.20848642671137588, "grad_norm": 0.2731362581253052, "learning_rate": 4.822837919292741e-05, "loss": 0.2283, "step": 11689 }, { "epoch": 0.20850426283308957, "grad_norm": 0.22162795066833496, "learning_rate": 4.8227803644428786e-05, "loss": 0.1441, "step": 11690 }, { "epoch": 0.20852209895480328, "grad_norm": 0.24297797679901123, "learning_rate": 4.8227228005891176e-05, "loss": 0.2219, "step": 11691 }, { "epoch": 0.20853993507651697, "grad_norm": 0.26579803228378296, "learning_rate": 4.822665227731681e-05, "loss": 0.1966, "step": 11692 }, { "epoch": 0.20855777119823066, "grad_norm": 0.26142609119415283, "learning_rate": 4.822607645870791e-05, "loss": 0.2179, "step": 11693 }, { "epoch": 0.20857560731994434, "grad_norm": 0.2581977844238281, "learning_rate": 4.8225500550066714e-05, "loss": 0.2093, "step": 11694 }, { "epoch": 0.20859344344165803, "grad_norm": 0.4347952902317047, "learning_rate": 4.822492455139546e-05, "loss": 0.1717, "step": 11695 }, { "epoch": 0.20861127956337175, "grad_norm": 0.21105089783668518, "learning_rate": 4.822434846269638e-05, "loss": 0.1807, "step": 11696 }, { "epoch": 0.20862911568508544, "grad_norm": 0.2665728032588959, "learning_rate": 4.82237722839717e-05, "loss": 0.2004, "step": 11697 }, { "epoch": 0.20864695180679912, "grad_norm": 0.3084055483341217, "learning_rate": 4.822319601522366e-05, "loss": 0.1922, "step": 11698 }, { "epoch": 0.2086647879285128, "grad_norm": 0.2810305953025818, "learning_rate": 4.822261965645449e-05, "loss": 0.1687, "step": 11699 }, { "epoch": 0.20868262405022653, "grad_norm": 0.37187448143959045, "learning_rate": 4.822204320766642e-05, "loss": 0.1858, "step": 11700 }, { "epoch": 0.20870046017194022, "grad_norm": 0.39211082458496094, "learning_rate": 4.82214666688617e-05, "loss": 0.2323, "step": 11701 }, { "epoch": 0.2087182962936539, "grad_norm": 0.25368061661720276, "learning_rate": 4.822089004004255e-05, "loss": 0.1602, "step": 11702 }, { "epoch": 0.2087361324153676, "grad_norm": 0.40686118602752686, "learning_rate": 4.822031332121121e-05, "loss": 0.2694, "step": 11703 }, { "epoch": 0.2087539685370813, "grad_norm": 0.2769964933395386, "learning_rate": 4.821973651236992e-05, "loss": 0.1704, "step": 11704 }, { "epoch": 0.208771804658795, "grad_norm": 0.32989227771759033, "learning_rate": 4.82191596135209e-05, "loss": 0.1781, "step": 11705 }, { "epoch": 0.20878964078050868, "grad_norm": 0.32637158036231995, "learning_rate": 4.8218582624666406e-05, "loss": 0.1668, "step": 11706 }, { "epoch": 0.20880747690222237, "grad_norm": 0.2789934575557709, "learning_rate": 4.821800554580866e-05, "loss": 0.2033, "step": 11707 }, { "epoch": 0.2088253130239361, "grad_norm": 0.28855159878730774, "learning_rate": 4.8217428376949916e-05, "loss": 0.2071, "step": 11708 }, { "epoch": 0.20884314914564978, "grad_norm": 0.21203669905662537, "learning_rate": 4.82168511180924e-05, "loss": 0.1961, "step": 11709 }, { "epoch": 0.20886098526736346, "grad_norm": 0.25486427545547485, "learning_rate": 4.821627376923834e-05, "loss": 0.189, "step": 11710 }, { "epoch": 0.20887882138907715, "grad_norm": 0.24313603341579437, "learning_rate": 4.8215696330389994e-05, "loss": 0.2032, "step": 11711 }, { "epoch": 0.20889665751079087, "grad_norm": 0.2916683852672577, "learning_rate": 4.821511880154958e-05, "loss": 0.1774, "step": 11712 }, { "epoch": 0.20891449363250456, "grad_norm": 0.29036620259284973, "learning_rate": 4.8214541182719355e-05, "loss": 0.2242, "step": 11713 }, { "epoch": 0.20893232975421824, "grad_norm": 0.3922606110572815, "learning_rate": 4.8213963473901546e-05, "loss": 0.2757, "step": 11714 }, { "epoch": 0.20895016587593193, "grad_norm": 0.2874530553817749, "learning_rate": 4.82133856750984e-05, "loss": 0.2539, "step": 11715 }, { "epoch": 0.20896800199764562, "grad_norm": 0.21845576167106628, "learning_rate": 4.821280778631215e-05, "loss": 0.1825, "step": 11716 }, { "epoch": 0.20898583811935934, "grad_norm": 0.30289462208747864, "learning_rate": 4.821222980754504e-05, "loss": 0.1981, "step": 11717 }, { "epoch": 0.20900367424107302, "grad_norm": 0.29000192880630493, "learning_rate": 4.821165173879932e-05, "loss": 0.2164, "step": 11718 }, { "epoch": 0.2090215103627867, "grad_norm": 0.2553132176399231, "learning_rate": 4.8211073580077215e-05, "loss": 0.2171, "step": 11719 }, { "epoch": 0.2090393464845004, "grad_norm": 0.3072320520877838, "learning_rate": 4.821049533138097e-05, "loss": 0.2263, "step": 11720 }, { "epoch": 0.20905718260621411, "grad_norm": 0.27789634466171265, "learning_rate": 4.820991699271282e-05, "loss": 0.188, "step": 11721 }, { "epoch": 0.2090750187279278, "grad_norm": 0.21587303280830383, "learning_rate": 4.820933856407503e-05, "loss": 0.1947, "step": 11722 }, { "epoch": 0.2090928548496415, "grad_norm": 0.3030090630054474, "learning_rate": 4.820876004546982e-05, "loss": 0.2017, "step": 11723 }, { "epoch": 0.20911069097135518, "grad_norm": 0.19152145087718964, "learning_rate": 4.8208181436899443e-05, "loss": 0.1621, "step": 11724 }, { "epoch": 0.2091285270930689, "grad_norm": 0.2322104126214981, "learning_rate": 4.8207602738366136e-05, "loss": 0.1784, "step": 11725 }, { "epoch": 0.20914636321478258, "grad_norm": 0.24238301813602448, "learning_rate": 4.820702394987214e-05, "loss": 0.192, "step": 11726 }, { "epoch": 0.20916419933649627, "grad_norm": 0.25751087069511414, "learning_rate": 4.8206445071419714e-05, "loss": 0.1923, "step": 11727 }, { "epoch": 0.20918203545820996, "grad_norm": 0.3497230112552643, "learning_rate": 4.820586610301109e-05, "loss": 0.1868, "step": 11728 }, { "epoch": 0.20919987157992367, "grad_norm": 0.26342979073524475, "learning_rate": 4.820528704464851e-05, "loss": 0.2047, "step": 11729 }, { "epoch": 0.20921770770163736, "grad_norm": 0.3436564803123474, "learning_rate": 4.8204707896334224e-05, "loss": 0.1599, "step": 11730 }, { "epoch": 0.20923554382335105, "grad_norm": 0.1855187714099884, "learning_rate": 4.820412865807048e-05, "loss": 0.1529, "step": 11731 }, { "epoch": 0.20925337994506474, "grad_norm": 0.29702046513557434, "learning_rate": 4.820354932985951e-05, "loss": 0.2061, "step": 11732 }, { "epoch": 0.20927121606677843, "grad_norm": 0.242684468626976, "learning_rate": 4.8202969911703575e-05, "loss": 0.182, "step": 11733 }, { "epoch": 0.20928905218849214, "grad_norm": 0.3582392930984497, "learning_rate": 4.8202390403604915e-05, "loss": 0.2258, "step": 11734 }, { "epoch": 0.20930688831020583, "grad_norm": 0.18719659745693207, "learning_rate": 4.8201810805565776e-05, "loss": 0.1584, "step": 11735 }, { "epoch": 0.20932472443191952, "grad_norm": 0.24418845772743225, "learning_rate": 4.8201231117588393e-05, "loss": 0.1744, "step": 11736 }, { "epoch": 0.2093425605536332, "grad_norm": 0.41438546776771545, "learning_rate": 4.820065133967504e-05, "loss": 0.2297, "step": 11737 }, { "epoch": 0.20936039667534692, "grad_norm": 0.2501727044582367, "learning_rate": 4.820007147182794e-05, "loss": 0.2315, "step": 11738 }, { "epoch": 0.2093782327970606, "grad_norm": 0.27792489528656006, "learning_rate": 4.8199491514049354e-05, "loss": 0.2057, "step": 11739 }, { "epoch": 0.2093960689187743, "grad_norm": 0.45859941840171814, "learning_rate": 4.8198911466341525e-05, "loss": 0.1545, "step": 11740 }, { "epoch": 0.20941390504048799, "grad_norm": 0.4077008068561554, "learning_rate": 4.81983313287067e-05, "loss": 0.2182, "step": 11741 }, { "epoch": 0.2094317411622017, "grad_norm": 0.24512937664985657, "learning_rate": 4.819775110114714e-05, "loss": 0.1992, "step": 11742 }, { "epoch": 0.2094495772839154, "grad_norm": 0.3170695900917053, "learning_rate": 4.8197170783665075e-05, "loss": 0.1599, "step": 11743 }, { "epoch": 0.20946741340562908, "grad_norm": 0.28509825468063354, "learning_rate": 4.819659037626276e-05, "loss": 0.2171, "step": 11744 }, { "epoch": 0.20948524952734277, "grad_norm": 0.2481437474489212, "learning_rate": 4.819600987894246e-05, "loss": 0.2093, "step": 11745 }, { "epoch": 0.20950308564905648, "grad_norm": 0.23982587456703186, "learning_rate": 4.8195429291706406e-05, "loss": 0.2145, "step": 11746 }, { "epoch": 0.20952092177077017, "grad_norm": 0.33134645223617554, "learning_rate": 4.819484861455687e-05, "loss": 0.2098, "step": 11747 }, { "epoch": 0.20953875789248386, "grad_norm": 0.20616430044174194, "learning_rate": 4.8194267847496074e-05, "loss": 0.2048, "step": 11748 }, { "epoch": 0.20955659401419754, "grad_norm": 0.17882724106311798, "learning_rate": 4.8193686990526286e-05, "loss": 0.1799, "step": 11749 }, { "epoch": 0.20957443013591126, "grad_norm": 0.2518196702003479, "learning_rate": 4.8193106043649763e-05, "loss": 0.2014, "step": 11750 }, { "epoch": 0.20959226625762495, "grad_norm": 0.23048117756843567, "learning_rate": 4.819252500686875e-05, "loss": 0.2102, "step": 11751 }, { "epoch": 0.20961010237933864, "grad_norm": 0.2745598256587982, "learning_rate": 4.81919438801855e-05, "loss": 0.1998, "step": 11752 }, { "epoch": 0.20962793850105232, "grad_norm": 0.26343855261802673, "learning_rate": 4.819136266360226e-05, "loss": 0.2124, "step": 11753 }, { "epoch": 0.209645774622766, "grad_norm": 0.31484368443489075, "learning_rate": 4.8190781357121295e-05, "loss": 0.2256, "step": 11754 }, { "epoch": 0.20966361074447973, "grad_norm": 0.29990607500076294, "learning_rate": 4.819019996074484e-05, "loss": 0.2108, "step": 11755 }, { "epoch": 0.20968144686619342, "grad_norm": 0.19660401344299316, "learning_rate": 4.8189618474475175e-05, "loss": 0.1694, "step": 11756 }, { "epoch": 0.2096992829879071, "grad_norm": 0.31754928827285767, "learning_rate": 4.818903689831453e-05, "loss": 0.2387, "step": 11757 }, { "epoch": 0.2097171191096208, "grad_norm": 0.19113314151763916, "learning_rate": 4.8188455232265174e-05, "loss": 0.1914, "step": 11758 }, { "epoch": 0.2097349552313345, "grad_norm": 0.26601749658584595, "learning_rate": 4.818787347632935e-05, "loss": 0.2073, "step": 11759 }, { "epoch": 0.2097527913530482, "grad_norm": 0.31796202063560486, "learning_rate": 4.8187291630509324e-05, "loss": 0.2152, "step": 11760 }, { "epoch": 0.20977062747476188, "grad_norm": 0.2584232687950134, "learning_rate": 4.8186709694807346e-05, "loss": 0.1779, "step": 11761 }, { "epoch": 0.20978846359647557, "grad_norm": 0.32587021589279175, "learning_rate": 4.818612766922567e-05, "loss": 0.1862, "step": 11762 }, { "epoch": 0.2098062997181893, "grad_norm": 0.277302622795105, "learning_rate": 4.8185545553766564e-05, "loss": 0.2069, "step": 11763 }, { "epoch": 0.20982413583990298, "grad_norm": 0.38512516021728516, "learning_rate": 4.818496334843227e-05, "loss": 0.2824, "step": 11764 }, { "epoch": 0.20984197196161666, "grad_norm": 0.24719849228858948, "learning_rate": 4.818438105322505e-05, "loss": 0.1492, "step": 11765 }, { "epoch": 0.20985980808333035, "grad_norm": 0.30014151334762573, "learning_rate": 4.818379866814716e-05, "loss": 0.1974, "step": 11766 }, { "epoch": 0.20987764420504407, "grad_norm": 0.23975475132465363, "learning_rate": 4.8183216193200856e-05, "loss": 0.2285, "step": 11767 }, { "epoch": 0.20989548032675776, "grad_norm": 0.26746538281440735, "learning_rate": 4.8182633628388406e-05, "loss": 0.1758, "step": 11768 }, { "epoch": 0.20991331644847144, "grad_norm": 0.20747677981853485, "learning_rate": 4.8182050973712055e-05, "loss": 0.1621, "step": 11769 }, { "epoch": 0.20993115257018513, "grad_norm": 0.2552240490913391, "learning_rate": 4.818146822917407e-05, "loss": 0.1566, "step": 11770 }, { "epoch": 0.20994898869189885, "grad_norm": 0.21534845232963562, "learning_rate": 4.818088539477671e-05, "loss": 0.1422, "step": 11771 }, { "epoch": 0.20996682481361253, "grad_norm": 0.2540239095687866, "learning_rate": 4.818030247052223e-05, "loss": 0.2022, "step": 11772 }, { "epoch": 0.20998466093532622, "grad_norm": 0.24961107969284058, "learning_rate": 4.817971945641289e-05, "loss": 0.2003, "step": 11773 }, { "epoch": 0.2100024970570399, "grad_norm": 0.3151911795139313, "learning_rate": 4.817913635245096e-05, "loss": 0.2056, "step": 11774 }, { "epoch": 0.2100203331787536, "grad_norm": 0.34245213866233826, "learning_rate": 4.817855315863868e-05, "loss": 0.1899, "step": 11775 }, { "epoch": 0.21003816930046731, "grad_norm": 0.2559554874897003, "learning_rate": 4.8177969874978326e-05, "loss": 0.1803, "step": 11776 }, { "epoch": 0.210056005422181, "grad_norm": 0.25878554582595825, "learning_rate": 4.8177386501472154e-05, "loss": 0.186, "step": 11777 }, { "epoch": 0.2100738415438947, "grad_norm": 0.22685648500919342, "learning_rate": 4.817680303812243e-05, "loss": 0.2176, "step": 11778 }, { "epoch": 0.21009167766560838, "grad_norm": 0.22430676221847534, "learning_rate": 4.8176219484931404e-05, "loss": 0.1763, "step": 11779 }, { "epoch": 0.2101095137873221, "grad_norm": 0.3075858950614929, "learning_rate": 4.817563584190136e-05, "loss": 0.2276, "step": 11780 }, { "epoch": 0.21012734990903578, "grad_norm": 0.3472638428211212, "learning_rate": 4.817505210903454e-05, "loss": 0.1572, "step": 11781 }, { "epoch": 0.21014518603074947, "grad_norm": 0.3664585053920746, "learning_rate": 4.8174468286333216e-05, "loss": 0.1972, "step": 11782 }, { "epoch": 0.21016302215246316, "grad_norm": 0.2868519723415375, "learning_rate": 4.8173884373799644e-05, "loss": 0.2241, "step": 11783 }, { "epoch": 0.21018085827417687, "grad_norm": 0.2659103572368622, "learning_rate": 4.81733003714361e-05, "loss": 0.2196, "step": 11784 }, { "epoch": 0.21019869439589056, "grad_norm": 0.4240790605545044, "learning_rate": 4.817271627924483e-05, "loss": 0.2387, "step": 11785 }, { "epoch": 0.21021653051760425, "grad_norm": 0.28208059072494507, "learning_rate": 4.817213209722812e-05, "loss": 0.2099, "step": 11786 }, { "epoch": 0.21023436663931794, "grad_norm": 0.34443867206573486, "learning_rate": 4.8171547825388206e-05, "loss": 0.211, "step": 11787 }, { "epoch": 0.21025220276103165, "grad_norm": 0.27742916345596313, "learning_rate": 4.817096346372738e-05, "loss": 0.2465, "step": 11788 }, { "epoch": 0.21027003888274534, "grad_norm": 0.2281203269958496, "learning_rate": 4.8170379012247905e-05, "loss": 0.2177, "step": 11789 }, { "epoch": 0.21028787500445903, "grad_norm": 0.235040083527565, "learning_rate": 4.8169794470952024e-05, "loss": 0.1531, "step": 11790 }, { "epoch": 0.21030571112617272, "grad_norm": 0.21724410355091095, "learning_rate": 4.816920983984202e-05, "loss": 0.1489, "step": 11791 }, { "epoch": 0.21032354724788643, "grad_norm": 0.5910566449165344, "learning_rate": 4.816862511892016e-05, "loss": 0.2799, "step": 11792 }, { "epoch": 0.21034138336960012, "grad_norm": 0.3476826846599579, "learning_rate": 4.81680403081887e-05, "loss": 0.2637, "step": 11793 }, { "epoch": 0.2103592194913138, "grad_norm": 0.24049030244350433, "learning_rate": 4.816745540764992e-05, "loss": 0.2091, "step": 11794 }, { "epoch": 0.2103770556130275, "grad_norm": 0.23804958164691925, "learning_rate": 4.816687041730608e-05, "loss": 0.1957, "step": 11795 }, { "epoch": 0.21039489173474119, "grad_norm": 0.2845692038536072, "learning_rate": 4.816628533715945e-05, "loss": 0.2095, "step": 11796 }, { "epoch": 0.2104127278564549, "grad_norm": 0.32254090905189514, "learning_rate": 4.816570016721229e-05, "loss": 0.2591, "step": 11797 }, { "epoch": 0.2104305639781686, "grad_norm": 0.2692525088787079, "learning_rate": 4.816511490746689e-05, "loss": 0.2275, "step": 11798 }, { "epoch": 0.21044840009988228, "grad_norm": 0.2805209457874298, "learning_rate": 4.8164529557925487e-05, "loss": 0.202, "step": 11799 }, { "epoch": 0.21046623622159596, "grad_norm": 0.2221936285495758, "learning_rate": 4.8163944118590374e-05, "loss": 0.1844, "step": 11800 }, { "epoch": 0.21048407234330968, "grad_norm": 0.276731014251709, "learning_rate": 4.816335858946381e-05, "loss": 0.2538, "step": 11801 }, { "epoch": 0.21050190846502337, "grad_norm": 0.33310019969940186, "learning_rate": 4.8162772970548073e-05, "loss": 0.2104, "step": 11802 }, { "epoch": 0.21051974458673706, "grad_norm": 0.395674467086792, "learning_rate": 4.8162187261845425e-05, "loss": 0.2155, "step": 11803 }, { "epoch": 0.21053758070845074, "grad_norm": 0.2592020332813263, "learning_rate": 4.8161601463358145e-05, "loss": 0.2121, "step": 11804 }, { "epoch": 0.21055541683016446, "grad_norm": 0.33151042461395264, "learning_rate": 4.816101557508849e-05, "loss": 0.2532, "step": 11805 }, { "epoch": 0.21057325295187815, "grad_norm": 0.2417377531528473, "learning_rate": 4.8160429597038735e-05, "loss": 0.2056, "step": 11806 }, { "epoch": 0.21059108907359184, "grad_norm": 0.3523246943950653, "learning_rate": 4.8159843529211166e-05, "loss": 0.2051, "step": 11807 }, { "epoch": 0.21060892519530552, "grad_norm": 0.300409734249115, "learning_rate": 4.815925737160804e-05, "loss": 0.1907, "step": 11808 }, { "epoch": 0.21062676131701924, "grad_norm": 0.2804553806781769, "learning_rate": 4.8158671124231636e-05, "loss": 0.2201, "step": 11809 }, { "epoch": 0.21064459743873293, "grad_norm": 0.297488272190094, "learning_rate": 4.8158084787084216e-05, "loss": 0.1939, "step": 11810 }, { "epoch": 0.21066243356044662, "grad_norm": 0.26098382472991943, "learning_rate": 4.815749836016807e-05, "loss": 0.2015, "step": 11811 }, { "epoch": 0.2106802696821603, "grad_norm": 0.3780629634857178, "learning_rate": 4.8156911843485454e-05, "loss": 0.1788, "step": 11812 }, { "epoch": 0.210698105803874, "grad_norm": 0.26268333196640015, "learning_rate": 4.815632523703866e-05, "loss": 0.2065, "step": 11813 }, { "epoch": 0.2107159419255877, "grad_norm": 0.3022797107696533, "learning_rate": 4.815573854082994e-05, "loss": 0.2288, "step": 11814 }, { "epoch": 0.2107337780473014, "grad_norm": 0.2933153808116913, "learning_rate": 4.815515175486159e-05, "loss": 0.2226, "step": 11815 }, { "epoch": 0.21075161416901508, "grad_norm": 0.22890408337116241, "learning_rate": 4.8154564879135865e-05, "loss": 0.179, "step": 11816 }, { "epoch": 0.21076945029072877, "grad_norm": 0.3245255649089813, "learning_rate": 4.815397791365506e-05, "loss": 0.1807, "step": 11817 }, { "epoch": 0.2107872864124425, "grad_norm": 0.27721354365348816, "learning_rate": 4.815339085842143e-05, "loss": 0.1805, "step": 11818 }, { "epoch": 0.21080512253415618, "grad_norm": 0.2621777057647705, "learning_rate": 4.815280371343726e-05, "loss": 0.2, "step": 11819 }, { "epoch": 0.21082295865586986, "grad_norm": 0.3462750017642975, "learning_rate": 4.815221647870483e-05, "loss": 0.2016, "step": 11820 }, { "epoch": 0.21084079477758355, "grad_norm": 0.24098993837833405, "learning_rate": 4.815162915422641e-05, "loss": 0.1859, "step": 11821 }, { "epoch": 0.21085863089929727, "grad_norm": 0.34345999360084534, "learning_rate": 4.815104174000428e-05, "loss": 0.2116, "step": 11822 }, { "epoch": 0.21087646702101095, "grad_norm": 0.27372217178344727, "learning_rate": 4.815045423604072e-05, "loss": 0.2217, "step": 11823 }, { "epoch": 0.21089430314272464, "grad_norm": 0.3180995583534241, "learning_rate": 4.8149866642338e-05, "loss": 0.162, "step": 11824 }, { "epoch": 0.21091213926443833, "grad_norm": 0.27679339051246643, "learning_rate": 4.81492789588984e-05, "loss": 0.1638, "step": 11825 }, { "epoch": 0.21092997538615205, "grad_norm": 0.31787386536598206, "learning_rate": 4.81486911857242e-05, "loss": 0.2216, "step": 11826 }, { "epoch": 0.21094781150786573, "grad_norm": 0.35636964440345764, "learning_rate": 4.814810332281768e-05, "loss": 0.1713, "step": 11827 }, { "epoch": 0.21096564762957942, "grad_norm": 0.29187142848968506, "learning_rate": 4.814751537018112e-05, "loss": 0.1877, "step": 11828 }, { "epoch": 0.2109834837512931, "grad_norm": 0.22472573816776276, "learning_rate": 4.814692732781678e-05, "loss": 0.141, "step": 11829 }, { "epoch": 0.21100131987300683, "grad_norm": 0.31143370270729065, "learning_rate": 4.814633919572697e-05, "loss": 0.1929, "step": 11830 }, { "epoch": 0.21101915599472051, "grad_norm": 0.39636850357055664, "learning_rate": 4.814575097391395e-05, "loss": 0.2463, "step": 11831 }, { "epoch": 0.2110369921164342, "grad_norm": 0.3290325999259949, "learning_rate": 4.8145162662380006e-05, "loss": 0.2384, "step": 11832 }, { "epoch": 0.2110548282381479, "grad_norm": 0.3592544496059418, "learning_rate": 4.814457426112742e-05, "loss": 0.2279, "step": 11833 }, { "epoch": 0.21107266435986158, "grad_norm": 0.24955043196678162, "learning_rate": 4.8143985770158464e-05, "loss": 0.2159, "step": 11834 }, { "epoch": 0.2110905004815753, "grad_norm": 0.28856950998306274, "learning_rate": 4.8143397189475425e-05, "loss": 0.2151, "step": 11835 }, { "epoch": 0.21110833660328898, "grad_norm": 0.3253841698169708, "learning_rate": 4.814280851908059e-05, "loss": 0.2263, "step": 11836 }, { "epoch": 0.21112617272500267, "grad_norm": 0.2808425724506378, "learning_rate": 4.8142219758976235e-05, "loss": 0.2373, "step": 11837 }, { "epoch": 0.21114400884671636, "grad_norm": 0.25354379415512085, "learning_rate": 4.8141630909164646e-05, "loss": 0.1603, "step": 11838 }, { "epoch": 0.21116184496843007, "grad_norm": 0.27182918787002563, "learning_rate": 4.814104196964809e-05, "loss": 0.2015, "step": 11839 }, { "epoch": 0.21117968109014376, "grad_norm": 0.23462463915348053, "learning_rate": 4.814045294042887e-05, "loss": 0.1674, "step": 11840 }, { "epoch": 0.21119751721185745, "grad_norm": 0.24965649843215942, "learning_rate": 4.8139863821509265e-05, "loss": 0.2268, "step": 11841 }, { "epoch": 0.21121535333357114, "grad_norm": 0.3184555470943451, "learning_rate": 4.8139274612891564e-05, "loss": 0.237, "step": 11842 }, { "epoch": 0.21123318945528485, "grad_norm": 0.3308340907096863, "learning_rate": 4.813868531457803e-05, "loss": 0.2417, "step": 11843 }, { "epoch": 0.21125102557699854, "grad_norm": 0.22181598842144012, "learning_rate": 4.813809592657096e-05, "loss": 0.1977, "step": 11844 }, { "epoch": 0.21126886169871223, "grad_norm": 0.278463751077652, "learning_rate": 4.8137506448872636e-05, "loss": 0.2269, "step": 11845 }, { "epoch": 0.21128669782042592, "grad_norm": 0.27575892210006714, "learning_rate": 4.8136916881485355e-05, "loss": 0.2041, "step": 11846 }, { "epoch": 0.21130453394213963, "grad_norm": 0.3143797814846039, "learning_rate": 4.8136327224411394e-05, "loss": 0.1607, "step": 11847 }, { "epoch": 0.21132237006385332, "grad_norm": 0.2297838181257248, "learning_rate": 4.8135737477653035e-05, "loss": 0.1843, "step": 11848 }, { "epoch": 0.211340206185567, "grad_norm": 0.25091153383255005, "learning_rate": 4.8135147641212555e-05, "loss": 0.1848, "step": 11849 }, { "epoch": 0.2113580423072807, "grad_norm": 0.2786402702331543, "learning_rate": 4.8134557715092266e-05, "loss": 0.149, "step": 11850 }, { "epoch": 0.2113758784289944, "grad_norm": 0.294948548078537, "learning_rate": 4.8133967699294436e-05, "loss": 0.2547, "step": 11851 }, { "epoch": 0.2113937145507081, "grad_norm": 0.2742416262626648, "learning_rate": 4.813337759382136e-05, "loss": 0.193, "step": 11852 }, { "epoch": 0.2114115506724218, "grad_norm": 0.2357669323682785, "learning_rate": 4.8132787398675314e-05, "loss": 0.2058, "step": 11853 }, { "epoch": 0.21142938679413548, "grad_norm": 0.40839940309524536, "learning_rate": 4.81321971138586e-05, "loss": 0.213, "step": 11854 }, { "epoch": 0.21144722291584916, "grad_norm": 0.24076344072818756, "learning_rate": 4.81316067393735e-05, "loss": 0.1451, "step": 11855 }, { "epoch": 0.21146505903756288, "grad_norm": 0.2290666699409485, "learning_rate": 4.813101627522231e-05, "loss": 0.1757, "step": 11856 }, { "epoch": 0.21148289515927657, "grad_norm": 0.32733190059661865, "learning_rate": 4.81304257214073e-05, "loss": 0.1944, "step": 11857 }, { "epoch": 0.21150073128099026, "grad_norm": 0.2146921306848526, "learning_rate": 4.8129835077930775e-05, "loss": 0.1594, "step": 11858 }, { "epoch": 0.21151856740270394, "grad_norm": 0.2122744619846344, "learning_rate": 4.812924434479502e-05, "loss": 0.1781, "step": 11859 }, { "epoch": 0.21153640352441766, "grad_norm": 0.3274732530117035, "learning_rate": 4.8128653522002326e-05, "loss": 0.218, "step": 11860 }, { "epoch": 0.21155423964613135, "grad_norm": 0.26031941175460815, "learning_rate": 4.812806260955498e-05, "loss": 0.1755, "step": 11861 }, { "epoch": 0.21157207576784504, "grad_norm": 0.48987406492233276, "learning_rate": 4.812747160745528e-05, "loss": 0.2388, "step": 11862 }, { "epoch": 0.21158991188955872, "grad_norm": 0.26355957984924316, "learning_rate": 4.812688051570551e-05, "loss": 0.2151, "step": 11863 }, { "epoch": 0.21160774801127244, "grad_norm": 0.2524564564228058, "learning_rate": 4.812628933430797e-05, "loss": 0.1913, "step": 11864 }, { "epoch": 0.21162558413298613, "grad_norm": 0.28413692116737366, "learning_rate": 4.812569806326493e-05, "loss": 0.1949, "step": 11865 }, { "epoch": 0.21164342025469982, "grad_norm": 0.3151012659072876, "learning_rate": 4.812510670257871e-05, "loss": 0.1919, "step": 11866 }, { "epoch": 0.2116612563764135, "grad_norm": 0.23169133067131042, "learning_rate": 4.8124515252251586e-05, "loss": 0.2148, "step": 11867 }, { "epoch": 0.21167909249812722, "grad_norm": 0.32055649161338806, "learning_rate": 4.812392371228585e-05, "loss": 0.2344, "step": 11868 }, { "epoch": 0.2116969286198409, "grad_norm": 0.2602478265762329, "learning_rate": 4.81233320826838e-05, "loss": 0.1945, "step": 11869 }, { "epoch": 0.2117147647415546, "grad_norm": 0.2460232824087143, "learning_rate": 4.812274036344773e-05, "loss": 0.1679, "step": 11870 }, { "epoch": 0.21173260086326828, "grad_norm": 0.26596733927726746, "learning_rate": 4.812214855457994e-05, "loss": 0.2208, "step": 11871 }, { "epoch": 0.211750436984982, "grad_norm": 0.3579787611961365, "learning_rate": 4.81215566560827e-05, "loss": 0.1963, "step": 11872 }, { "epoch": 0.2117682731066957, "grad_norm": 0.24507632851600647, "learning_rate": 4.8120964667958334e-05, "loss": 0.1884, "step": 11873 }, { "epoch": 0.21178610922840937, "grad_norm": 0.410803884267807, "learning_rate": 4.812037259020912e-05, "loss": 0.2362, "step": 11874 }, { "epoch": 0.21180394535012306, "grad_norm": 0.30791109800338745, "learning_rate": 4.811978042283735e-05, "loss": 0.1933, "step": 11875 }, { "epoch": 0.21182178147183675, "grad_norm": 0.33453458547592163, "learning_rate": 4.811918816584533e-05, "loss": 0.217, "step": 11876 }, { "epoch": 0.21183961759355047, "grad_norm": 0.2710801064968109, "learning_rate": 4.811859581923535e-05, "loss": 0.1559, "step": 11877 }, { "epoch": 0.21185745371526415, "grad_norm": 0.2979060709476471, "learning_rate": 4.811800338300971e-05, "loss": 0.2213, "step": 11878 }, { "epoch": 0.21187528983697784, "grad_norm": 0.42303067445755005, "learning_rate": 4.81174108571707e-05, "loss": 0.2194, "step": 11879 }, { "epoch": 0.21189312595869153, "grad_norm": 0.19875451922416687, "learning_rate": 4.811681824172063e-05, "loss": 0.188, "step": 11880 }, { "epoch": 0.21191096208040525, "grad_norm": 0.2997405529022217, "learning_rate": 4.811622553666178e-05, "loss": 0.2203, "step": 11881 }, { "epoch": 0.21192879820211893, "grad_norm": 0.2618153393268585, "learning_rate": 4.8115632741996455e-05, "loss": 0.1761, "step": 11882 }, { "epoch": 0.21194663432383262, "grad_norm": 0.2891547977924347, "learning_rate": 4.8115039857726965e-05, "loss": 0.2172, "step": 11883 }, { "epoch": 0.2119644704455463, "grad_norm": 0.28733545541763306, "learning_rate": 4.811444688385558e-05, "loss": 0.2071, "step": 11884 }, { "epoch": 0.21198230656726003, "grad_norm": 0.46259891986846924, "learning_rate": 4.8113853820384625e-05, "loss": 0.2229, "step": 11885 }, { "epoch": 0.21200014268897371, "grad_norm": 0.43129998445510864, "learning_rate": 4.811326066731639e-05, "loss": 0.1536, "step": 11886 }, { "epoch": 0.2120179788106874, "grad_norm": 0.2170029580593109, "learning_rate": 4.8112667424653165e-05, "loss": 0.1908, "step": 11887 }, { "epoch": 0.2120358149324011, "grad_norm": 0.3331025242805481, "learning_rate": 4.811207409239727e-05, "loss": 0.1963, "step": 11888 }, { "epoch": 0.2120536510541148, "grad_norm": 0.42987164855003357, "learning_rate": 4.811148067055099e-05, "loss": 0.232, "step": 11889 }, { "epoch": 0.2120714871758285, "grad_norm": 0.45533299446105957, "learning_rate": 4.811088715911662e-05, "loss": 0.1773, "step": 11890 }, { "epoch": 0.21208932329754218, "grad_norm": 0.23405250906944275, "learning_rate": 4.8110293558096484e-05, "loss": 0.1527, "step": 11891 }, { "epoch": 0.21210715941925587, "grad_norm": 0.29416489601135254, "learning_rate": 4.810969986749286e-05, "loss": 0.2106, "step": 11892 }, { "epoch": 0.21212499554096959, "grad_norm": 0.2741703391075134, "learning_rate": 4.8109106087308056e-05, "loss": 0.2402, "step": 11893 }, { "epoch": 0.21214283166268327, "grad_norm": 0.286952406167984, "learning_rate": 4.8108512217544375e-05, "loss": 0.2211, "step": 11894 }, { "epoch": 0.21216066778439696, "grad_norm": 0.28501641750335693, "learning_rate": 4.810791825820412e-05, "loss": 0.1733, "step": 11895 }, { "epoch": 0.21217850390611065, "grad_norm": 0.2270091474056244, "learning_rate": 4.8107324209289595e-05, "loss": 0.1887, "step": 11896 }, { "epoch": 0.21219634002782434, "grad_norm": 0.30119457840919495, "learning_rate": 4.81067300708031e-05, "loss": 0.1958, "step": 11897 }, { "epoch": 0.21221417614953805, "grad_norm": 0.2292444109916687, "learning_rate": 4.810613584274693e-05, "loss": 0.1988, "step": 11898 }, { "epoch": 0.21223201227125174, "grad_norm": 0.2498779594898224, "learning_rate": 4.810554152512341e-05, "loss": 0.2417, "step": 11899 }, { "epoch": 0.21224984839296543, "grad_norm": 0.2860521376132965, "learning_rate": 4.8104947117934826e-05, "loss": 0.2432, "step": 11900 }, { "epoch": 0.21226768451467912, "grad_norm": 0.25130772590637207, "learning_rate": 4.8104352621183486e-05, "loss": 0.1743, "step": 11901 }, { "epoch": 0.21228552063639283, "grad_norm": 0.3735920488834381, "learning_rate": 4.810375803487169e-05, "loss": 0.164, "step": 11902 }, { "epoch": 0.21230335675810652, "grad_norm": 0.24691012501716614, "learning_rate": 4.810316335900176e-05, "loss": 0.1604, "step": 11903 }, { "epoch": 0.2123211928798202, "grad_norm": 0.32348644733428955, "learning_rate": 4.810256859357598e-05, "loss": 0.2053, "step": 11904 }, { "epoch": 0.2123390290015339, "grad_norm": 0.32618266344070435, "learning_rate": 4.8101973738596664e-05, "loss": 0.1986, "step": 11905 }, { "epoch": 0.2123568651232476, "grad_norm": 0.2829591631889343, "learning_rate": 4.810137879406612e-05, "loss": 0.1656, "step": 11906 }, { "epoch": 0.2123747012449613, "grad_norm": 0.243828684091568, "learning_rate": 4.8100783759986657e-05, "loss": 0.1787, "step": 11907 }, { "epoch": 0.212392537366675, "grad_norm": 0.2694132924079895, "learning_rate": 4.810018863636058e-05, "loss": 0.188, "step": 11908 }, { "epoch": 0.21241037348838868, "grad_norm": 0.3744795024394989, "learning_rate": 4.809959342319018e-05, "loss": 0.2002, "step": 11909 }, { "epoch": 0.2124282096101024, "grad_norm": 0.28664690256118774, "learning_rate": 4.809899812047779e-05, "loss": 0.1884, "step": 11910 }, { "epoch": 0.21244604573181608, "grad_norm": 0.2437635362148285, "learning_rate": 4.80984027282257e-05, "loss": 0.1947, "step": 11911 }, { "epoch": 0.21246388185352977, "grad_norm": 0.3605238199234009, "learning_rate": 4.8097807246436224e-05, "loss": 0.2133, "step": 11912 }, { "epoch": 0.21248171797524346, "grad_norm": 0.26123175024986267, "learning_rate": 4.809721167511167e-05, "loss": 0.1601, "step": 11913 }, { "epoch": 0.21249955409695714, "grad_norm": 0.3012862801551819, "learning_rate": 4.8096616014254345e-05, "loss": 0.1922, "step": 11914 }, { "epoch": 0.21251739021867086, "grad_norm": 0.2996641993522644, "learning_rate": 4.809602026386656e-05, "loss": 0.1709, "step": 11915 }, { "epoch": 0.21253522634038455, "grad_norm": 0.29815566539764404, "learning_rate": 4.809542442395062e-05, "loss": 0.2033, "step": 11916 }, { "epoch": 0.21255306246209824, "grad_norm": 0.27722781896591187, "learning_rate": 4.809482849450885e-05, "loss": 0.2296, "step": 11917 }, { "epoch": 0.21257089858381192, "grad_norm": 0.29742431640625, "learning_rate": 4.809423247554353e-05, "loss": 0.2146, "step": 11918 }, { "epoch": 0.21258873470552564, "grad_norm": 0.25360652804374695, "learning_rate": 4.8093636367057e-05, "loss": 0.193, "step": 11919 }, { "epoch": 0.21260657082723933, "grad_norm": 0.33742067217826843, "learning_rate": 4.809304016905156e-05, "loss": 0.19, "step": 11920 }, { "epoch": 0.21262440694895302, "grad_norm": 0.2800961136817932, "learning_rate": 4.8092443881529514e-05, "loss": 0.2151, "step": 11921 }, { "epoch": 0.2126422430706667, "grad_norm": 0.38956162333488464, "learning_rate": 4.8091847504493184e-05, "loss": 0.2057, "step": 11922 }, { "epoch": 0.21266007919238042, "grad_norm": 0.24416503310203552, "learning_rate": 4.809125103794487e-05, "loss": 0.1762, "step": 11923 }, { "epoch": 0.2126779153140941, "grad_norm": 0.27841487526893616, "learning_rate": 4.80906544818869e-05, "loss": 0.2577, "step": 11924 }, { "epoch": 0.2126957514358078, "grad_norm": 0.26839733123779297, "learning_rate": 4.8090057836321577e-05, "loss": 0.1725, "step": 11925 }, { "epoch": 0.21271358755752148, "grad_norm": 0.27964547276496887, "learning_rate": 4.808946110125121e-05, "loss": 0.1925, "step": 11926 }, { "epoch": 0.2127314236792352, "grad_norm": 0.28022778034210205, "learning_rate": 4.808886427667812e-05, "loss": 0.2106, "step": 11927 }, { "epoch": 0.2127492598009489, "grad_norm": 0.36025023460388184, "learning_rate": 4.8088267362604615e-05, "loss": 0.2445, "step": 11928 }, { "epoch": 0.21276709592266257, "grad_norm": 0.16520865261554718, "learning_rate": 4.8087670359033016e-05, "loss": 0.1755, "step": 11929 }, { "epoch": 0.21278493204437626, "grad_norm": 0.22348365187644958, "learning_rate": 4.808707326596563e-05, "loss": 0.1452, "step": 11930 }, { "epoch": 0.21280276816608998, "grad_norm": 0.2352830022573471, "learning_rate": 4.8086476083404775e-05, "loss": 0.2214, "step": 11931 }, { "epoch": 0.21282060428780367, "grad_norm": 0.2535237967967987, "learning_rate": 4.808587881135276e-05, "loss": 0.2172, "step": 11932 }, { "epoch": 0.21283844040951735, "grad_norm": 0.22616231441497803, "learning_rate": 4.8085281449811913e-05, "loss": 0.1837, "step": 11933 }, { "epoch": 0.21285627653123104, "grad_norm": 0.2927800416946411, "learning_rate": 4.8084683998784535e-05, "loss": 0.1422, "step": 11934 }, { "epoch": 0.21287411265294473, "grad_norm": 0.30131295323371887, "learning_rate": 4.808408645827295e-05, "loss": 0.2458, "step": 11935 }, { "epoch": 0.21289194877465845, "grad_norm": 0.2846871018409729, "learning_rate": 4.808348882827948e-05, "loss": 0.2268, "step": 11936 }, { "epoch": 0.21290978489637213, "grad_norm": 0.22972151637077332, "learning_rate": 4.8082891108806426e-05, "loss": 0.1559, "step": 11937 }, { "epoch": 0.21292762101808582, "grad_norm": 0.28498220443725586, "learning_rate": 4.808229329985612e-05, "loss": 0.222, "step": 11938 }, { "epoch": 0.2129454571397995, "grad_norm": 0.2661803960800171, "learning_rate": 4.8081695401430866e-05, "loss": 0.2112, "step": 11939 }, { "epoch": 0.21296329326151323, "grad_norm": 0.24602225422859192, "learning_rate": 4.808109741353299e-05, "loss": 0.2333, "step": 11940 }, { "epoch": 0.2129811293832269, "grad_norm": 0.2584832012653351, "learning_rate": 4.808049933616481e-05, "loss": 0.1862, "step": 11941 }, { "epoch": 0.2129989655049406, "grad_norm": 0.23810431361198425, "learning_rate": 4.8079901169328645e-05, "loss": 0.1852, "step": 11942 }, { "epoch": 0.2130168016266543, "grad_norm": 0.4730599522590637, "learning_rate": 4.8079302913026807e-05, "loss": 0.2304, "step": 11943 }, { "epoch": 0.213034637748368, "grad_norm": 0.20296625792980194, "learning_rate": 4.8078704567261626e-05, "loss": 0.1609, "step": 11944 }, { "epoch": 0.2130524738700817, "grad_norm": 0.20573608577251434, "learning_rate": 4.807810613203541e-05, "loss": 0.1551, "step": 11945 }, { "epoch": 0.21307030999179538, "grad_norm": 0.2590511441230774, "learning_rate": 4.807750760735048e-05, "loss": 0.1581, "step": 11946 }, { "epoch": 0.21308814611350907, "grad_norm": 0.2788696885108948, "learning_rate": 4.8076908993209166e-05, "loss": 0.19, "step": 11947 }, { "epoch": 0.21310598223522279, "grad_norm": 0.2824763357639313, "learning_rate": 4.807631028961378e-05, "loss": 0.2314, "step": 11948 }, { "epoch": 0.21312381835693647, "grad_norm": 0.30209213495254517, "learning_rate": 4.807571149656664e-05, "loss": 0.1945, "step": 11949 }, { "epoch": 0.21314165447865016, "grad_norm": 0.3376280665397644, "learning_rate": 4.8075112614070084e-05, "loss": 0.2085, "step": 11950 }, { "epoch": 0.21315949060036385, "grad_norm": 0.26649901270866394, "learning_rate": 4.807451364212642e-05, "loss": 0.2408, "step": 11951 }, { "epoch": 0.21317732672207756, "grad_norm": 0.2890677750110626, "learning_rate": 4.807391458073796e-05, "loss": 0.1671, "step": 11952 }, { "epoch": 0.21319516284379125, "grad_norm": 0.1995590478181839, "learning_rate": 4.8073315429907046e-05, "loss": 0.1647, "step": 11953 }, { "epoch": 0.21321299896550494, "grad_norm": 0.35758885741233826, "learning_rate": 4.8072716189635995e-05, "loss": 0.1911, "step": 11954 }, { "epoch": 0.21323083508721863, "grad_norm": 0.28260499238967896, "learning_rate": 4.807211685992712e-05, "loss": 0.1685, "step": 11955 }, { "epoch": 0.21324867120893232, "grad_norm": 0.257367879152298, "learning_rate": 4.8071517440782754e-05, "loss": 0.1681, "step": 11956 }, { "epoch": 0.21326650733064603, "grad_norm": 0.24316635727882385, "learning_rate": 4.8070917932205214e-05, "loss": 0.1854, "step": 11957 }, { "epoch": 0.21328434345235972, "grad_norm": 0.4340924918651581, "learning_rate": 4.807031833419683e-05, "loss": 0.2457, "step": 11958 }, { "epoch": 0.2133021795740734, "grad_norm": 0.27530351281166077, "learning_rate": 4.806971864675993e-05, "loss": 0.2573, "step": 11959 }, { "epoch": 0.2133200156957871, "grad_norm": 0.2605418562889099, "learning_rate": 4.8069118869896826e-05, "loss": 0.2114, "step": 11960 }, { "epoch": 0.2133378518175008, "grad_norm": 0.2141258865594864, "learning_rate": 4.806851900360985e-05, "loss": 0.1921, "step": 11961 }, { "epoch": 0.2133556879392145, "grad_norm": 0.19329896569252014, "learning_rate": 4.8067919047901326e-05, "loss": 0.1408, "step": 11962 }, { "epoch": 0.2133735240609282, "grad_norm": 0.3235015869140625, "learning_rate": 4.8067319002773584e-05, "loss": 0.1239, "step": 11963 }, { "epoch": 0.21339136018264188, "grad_norm": 0.35358837246894836, "learning_rate": 4.8066718868228945e-05, "loss": 0.2181, "step": 11964 }, { "epoch": 0.2134091963043556, "grad_norm": 0.20358708500862122, "learning_rate": 4.806611864426974e-05, "loss": 0.1683, "step": 11965 }, { "epoch": 0.21342703242606928, "grad_norm": 0.275892049074173, "learning_rate": 4.8065518330898285e-05, "loss": 0.2148, "step": 11966 }, { "epoch": 0.21344486854778297, "grad_norm": 0.3201720118522644, "learning_rate": 4.806491792811691e-05, "loss": 0.2338, "step": 11967 }, { "epoch": 0.21346270466949666, "grad_norm": 0.1990060806274414, "learning_rate": 4.8064317435927966e-05, "loss": 0.1785, "step": 11968 }, { "epoch": 0.21348054079121037, "grad_norm": 0.39237022399902344, "learning_rate": 4.8063716854333743e-05, "loss": 0.2709, "step": 11969 }, { "epoch": 0.21349837691292406, "grad_norm": 0.2247430980205536, "learning_rate": 4.8063116183336596e-05, "loss": 0.1909, "step": 11970 }, { "epoch": 0.21351621303463775, "grad_norm": 0.3619181215763092, "learning_rate": 4.806251542293885e-05, "loss": 0.1805, "step": 11971 }, { "epoch": 0.21353404915635144, "grad_norm": 0.29609400033950806, "learning_rate": 4.806191457314282e-05, "loss": 0.2129, "step": 11972 }, { "epoch": 0.21355188527806515, "grad_norm": 0.2318839281797409, "learning_rate": 4.806131363395084e-05, "loss": 0.1717, "step": 11973 }, { "epoch": 0.21356972139977884, "grad_norm": 0.2564683258533478, "learning_rate": 4.806071260536526e-05, "loss": 0.1953, "step": 11974 }, { "epoch": 0.21358755752149253, "grad_norm": 0.2644842863082886, "learning_rate": 4.8060111487388376e-05, "loss": 0.1874, "step": 11975 }, { "epoch": 0.21360539364320622, "grad_norm": 0.3433472812175751, "learning_rate": 4.805951028002254e-05, "loss": 0.2004, "step": 11976 }, { "epoch": 0.2136232297649199, "grad_norm": 0.2824678421020508, "learning_rate": 4.8058908983270076e-05, "loss": 0.2133, "step": 11977 }, { "epoch": 0.21364106588663362, "grad_norm": 0.3142847716808319, "learning_rate": 4.8058307597133324e-05, "loss": 0.2046, "step": 11978 }, { "epoch": 0.2136589020083473, "grad_norm": 0.24438883364200592, "learning_rate": 4.80577061216146e-05, "loss": 0.2191, "step": 11979 }, { "epoch": 0.213676738130061, "grad_norm": 0.3002220690250397, "learning_rate": 4.805710455671625e-05, "loss": 0.1785, "step": 11980 }, { "epoch": 0.21369457425177468, "grad_norm": 0.35666581988334656, "learning_rate": 4.805650290244059e-05, "loss": 0.1978, "step": 11981 }, { "epoch": 0.2137124103734884, "grad_norm": 0.2793177664279938, "learning_rate": 4.805590115878997e-05, "loss": 0.1631, "step": 11982 }, { "epoch": 0.2137302464952021, "grad_norm": 0.2895793318748474, "learning_rate": 4.8055299325766714e-05, "loss": 0.1832, "step": 11983 }, { "epoch": 0.21374808261691577, "grad_norm": 0.19846618175506592, "learning_rate": 4.805469740337315e-05, "loss": 0.1969, "step": 11984 }, { "epoch": 0.21376591873862946, "grad_norm": 0.286077618598938, "learning_rate": 4.805409539161162e-05, "loss": 0.2073, "step": 11985 }, { "epoch": 0.21378375486034318, "grad_norm": 0.2080845832824707, "learning_rate": 4.8053493290484444e-05, "loss": 0.2, "step": 11986 }, { "epoch": 0.21380159098205687, "grad_norm": 0.23820963501930237, "learning_rate": 4.805289109999398e-05, "loss": 0.1443, "step": 11987 }, { "epoch": 0.21381942710377055, "grad_norm": 0.3149418234825134, "learning_rate": 4.805228882014254e-05, "loss": 0.2416, "step": 11988 }, { "epoch": 0.21383726322548424, "grad_norm": 0.3047579526901245, "learning_rate": 4.8051686450932465e-05, "loss": 0.1769, "step": 11989 }, { "epoch": 0.21385509934719796, "grad_norm": 0.4249333143234253, "learning_rate": 4.805108399236609e-05, "loss": 0.2322, "step": 11990 }, { "epoch": 0.21387293546891165, "grad_norm": 0.2610306739807129, "learning_rate": 4.805048144444576e-05, "loss": 0.2014, "step": 11991 }, { "epoch": 0.21389077159062533, "grad_norm": 0.2611127197742462, "learning_rate": 4.80498788071738e-05, "loss": 0.204, "step": 11992 }, { "epoch": 0.21390860771233902, "grad_norm": 0.22319549322128296, "learning_rate": 4.8049276080552544e-05, "loss": 0.2071, "step": 11993 }, { "epoch": 0.21392644383405274, "grad_norm": 0.3226379156112671, "learning_rate": 4.804867326458433e-05, "loss": 0.2498, "step": 11994 }, { "epoch": 0.21394427995576643, "grad_norm": 0.31403273344039917, "learning_rate": 4.804807035927151e-05, "loss": 0.201, "step": 11995 }, { "epoch": 0.2139621160774801, "grad_norm": 0.26563963294029236, "learning_rate": 4.80474673646164e-05, "loss": 0.2283, "step": 11996 }, { "epoch": 0.2139799521991938, "grad_norm": 0.32253947854042053, "learning_rate": 4.804686428062135e-05, "loss": 0.2401, "step": 11997 }, { "epoch": 0.2139977883209075, "grad_norm": 0.2164137363433838, "learning_rate": 4.804626110728869e-05, "loss": 0.15, "step": 11998 }, { "epoch": 0.2140156244426212, "grad_norm": 0.27956709265708923, "learning_rate": 4.804565784462076e-05, "loss": 0.1889, "step": 11999 }, { "epoch": 0.2140334605643349, "grad_norm": 0.20151473581790924, "learning_rate": 4.8045054492619914e-05, "loss": 0.196, "step": 12000 }, { "epoch": 0.2140334605643349, "eval_loss": 0.19065994024276733, "eval_runtime": 106.7888, "eval_samples_per_second": 9.589, "eval_steps_per_second": 1.601, "step": 12000 }, { "epoch": 0.21405129668604858, "grad_norm": 0.1964709311723709, "learning_rate": 4.8044451051288465e-05, "loss": 0.1239, "step": 12001 }, { "epoch": 0.21406913280776227, "grad_norm": 0.3363712430000305, "learning_rate": 4.804384752062876e-05, "loss": 0.2575, "step": 12002 }, { "epoch": 0.21408696892947598, "grad_norm": 0.26239413022994995, "learning_rate": 4.804324390064315e-05, "loss": 0.2051, "step": 12003 }, { "epoch": 0.21410480505118967, "grad_norm": 0.20325836539268494, "learning_rate": 4.804264019133397e-05, "loss": 0.1709, "step": 12004 }, { "epoch": 0.21412264117290336, "grad_norm": 0.3335111141204834, "learning_rate": 4.804203639270356e-05, "loss": 0.2097, "step": 12005 }, { "epoch": 0.21414047729461705, "grad_norm": 0.3034976124763489, "learning_rate": 4.804143250475425e-05, "loss": 0.1847, "step": 12006 }, { "epoch": 0.21415831341633076, "grad_norm": 0.26121431589126587, "learning_rate": 4.80408285274884e-05, "loss": 0.1882, "step": 12007 }, { "epoch": 0.21417614953804445, "grad_norm": 0.32125476002693176, "learning_rate": 4.804022446090834e-05, "loss": 0.1768, "step": 12008 }, { "epoch": 0.21419398565975814, "grad_norm": 0.2741241455078125, "learning_rate": 4.8039620305016396e-05, "loss": 0.1964, "step": 12009 }, { "epoch": 0.21421182178147183, "grad_norm": 0.1951020509004593, "learning_rate": 4.8039016059814935e-05, "loss": 0.159, "step": 12010 }, { "epoch": 0.21422965790318554, "grad_norm": 0.25098711252212524, "learning_rate": 4.803841172530629e-05, "loss": 0.2214, "step": 12011 }, { "epoch": 0.21424749402489923, "grad_norm": 0.3764820992946625, "learning_rate": 4.803780730149281e-05, "loss": 0.2713, "step": 12012 }, { "epoch": 0.21426533014661292, "grad_norm": 0.2963665723800659, "learning_rate": 4.803720278837683e-05, "loss": 0.1738, "step": 12013 }, { "epoch": 0.2142831662683266, "grad_norm": 0.19593137502670288, "learning_rate": 4.803659818596069e-05, "loss": 0.1776, "step": 12014 }, { "epoch": 0.2143010023900403, "grad_norm": 0.3092212378978729, "learning_rate": 4.803599349424675e-05, "loss": 0.1467, "step": 12015 }, { "epoch": 0.214318838511754, "grad_norm": 0.24250073730945587, "learning_rate": 4.803538871323733e-05, "loss": 0.2136, "step": 12016 }, { "epoch": 0.2143366746334677, "grad_norm": 0.2425922006368637, "learning_rate": 4.803478384293479e-05, "loss": 0.2097, "step": 12017 }, { "epoch": 0.2143545107551814, "grad_norm": 0.24515973031520844, "learning_rate": 4.803417888334148e-05, "loss": 0.1966, "step": 12018 }, { "epoch": 0.21437234687689508, "grad_norm": 0.24052035808563232, "learning_rate": 4.803357383445972e-05, "loss": 0.1853, "step": 12019 }, { "epoch": 0.2143901829986088, "grad_norm": 0.24403268098831177, "learning_rate": 4.803296869629188e-05, "loss": 0.1928, "step": 12020 }, { "epoch": 0.21440801912032248, "grad_norm": 0.33883607387542725, "learning_rate": 4.80323634688403e-05, "loss": 0.1631, "step": 12021 }, { "epoch": 0.21442585524203617, "grad_norm": 0.259956032037735, "learning_rate": 4.803175815210733e-05, "loss": 0.1825, "step": 12022 }, { "epoch": 0.21444369136374986, "grad_norm": 0.36655721068382263, "learning_rate": 4.803115274609531e-05, "loss": 0.2128, "step": 12023 }, { "epoch": 0.21446152748546357, "grad_norm": 0.2695331573486328, "learning_rate": 4.803054725080658e-05, "loss": 0.2105, "step": 12024 }, { "epoch": 0.21447936360717726, "grad_norm": 0.24818134307861328, "learning_rate": 4.802994166624349e-05, "loss": 0.2043, "step": 12025 }, { "epoch": 0.21449719972889095, "grad_norm": 0.2615300118923187, "learning_rate": 4.8029335992408406e-05, "loss": 0.2032, "step": 12026 }, { "epoch": 0.21451503585060464, "grad_norm": 0.20985738933086395, "learning_rate": 4.8028730229303655e-05, "loss": 0.1569, "step": 12027 }, { "epoch": 0.21453287197231835, "grad_norm": 0.30844759941101074, "learning_rate": 4.802812437693159e-05, "loss": 0.2262, "step": 12028 }, { "epoch": 0.21455070809403204, "grad_norm": 0.2554391920566559, "learning_rate": 4.802751843529456e-05, "loss": 0.2213, "step": 12029 }, { "epoch": 0.21456854421574573, "grad_norm": 0.2754734456539154, "learning_rate": 4.802691240439492e-05, "loss": 0.193, "step": 12030 }, { "epoch": 0.21458638033745941, "grad_norm": 0.38308730721473694, "learning_rate": 4.802630628423501e-05, "loss": 0.1922, "step": 12031 }, { "epoch": 0.21460421645917313, "grad_norm": 0.3109648525714874, "learning_rate": 4.8025700074817184e-05, "loss": 0.1987, "step": 12032 }, { "epoch": 0.21462205258088682, "grad_norm": 0.2678164541721344, "learning_rate": 4.8025093776143794e-05, "loss": 0.2009, "step": 12033 }, { "epoch": 0.2146398887026005, "grad_norm": 0.32864266633987427, "learning_rate": 4.802448738821719e-05, "loss": 0.2062, "step": 12034 }, { "epoch": 0.2146577248243142, "grad_norm": 0.30044132471084595, "learning_rate": 4.8023880911039715e-05, "loss": 0.2207, "step": 12035 }, { "epoch": 0.21467556094602788, "grad_norm": 0.27375471591949463, "learning_rate": 4.802327434461373e-05, "loss": 0.1924, "step": 12036 }, { "epoch": 0.2146933970677416, "grad_norm": 0.3943275809288025, "learning_rate": 4.802266768894158e-05, "loss": 0.2027, "step": 12037 }, { "epoch": 0.2147112331894553, "grad_norm": 0.3643236756324768, "learning_rate": 4.802206094402561e-05, "loss": 0.2858, "step": 12038 }, { "epoch": 0.21472906931116897, "grad_norm": 0.27129387855529785, "learning_rate": 4.802145410986819e-05, "loss": 0.2144, "step": 12039 }, { "epoch": 0.21474690543288266, "grad_norm": 0.2571626603603363, "learning_rate": 4.802084718647166e-05, "loss": 0.201, "step": 12040 }, { "epoch": 0.21476474155459638, "grad_norm": 0.2622687816619873, "learning_rate": 4.802024017383838e-05, "loss": 0.2093, "step": 12041 }, { "epoch": 0.21478257767631007, "grad_norm": 0.2342766672372818, "learning_rate": 4.8019633071970696e-05, "loss": 0.1853, "step": 12042 }, { "epoch": 0.21480041379802375, "grad_norm": 0.338860422372818, "learning_rate": 4.801902588087096e-05, "loss": 0.1861, "step": 12043 }, { "epoch": 0.21481824991973744, "grad_norm": 0.3265077471733093, "learning_rate": 4.801841860054153e-05, "loss": 0.1804, "step": 12044 }, { "epoch": 0.21483608604145116, "grad_norm": 0.23601371049880981, "learning_rate": 4.801781123098476e-05, "loss": 0.2038, "step": 12045 }, { "epoch": 0.21485392216316485, "grad_norm": 0.2651790380477905, "learning_rate": 4.8017203772203e-05, "loss": 0.2151, "step": 12046 }, { "epoch": 0.21487175828487853, "grad_norm": 0.26052558422088623, "learning_rate": 4.8016596224198616e-05, "loss": 0.2408, "step": 12047 }, { "epoch": 0.21488959440659222, "grad_norm": 0.3609578013420105, "learning_rate": 4.801598858697395e-05, "loss": 0.2196, "step": 12048 }, { "epoch": 0.21490743052830594, "grad_norm": 0.24095414578914642, "learning_rate": 4.8015380860531366e-05, "loss": 0.2276, "step": 12049 }, { "epoch": 0.21492526665001963, "grad_norm": 0.24156628549098969, "learning_rate": 4.801477304487321e-05, "loss": 0.1881, "step": 12050 }, { "epoch": 0.2149431027717333, "grad_norm": 0.2123507857322693, "learning_rate": 4.801416514000186e-05, "loss": 0.1919, "step": 12051 }, { "epoch": 0.214960938893447, "grad_norm": 0.21883635222911835, "learning_rate": 4.801355714591964e-05, "loss": 0.2207, "step": 12052 }, { "epoch": 0.21497877501516072, "grad_norm": 0.2538836896419525, "learning_rate": 4.801294906262893e-05, "loss": 0.2068, "step": 12053 }, { "epoch": 0.2149966111368744, "grad_norm": 0.22579586505889893, "learning_rate": 4.801234089013208e-05, "loss": 0.1817, "step": 12054 }, { "epoch": 0.2150144472585881, "grad_norm": 0.20638296008110046, "learning_rate": 4.801173262843145e-05, "loss": 0.1581, "step": 12055 }, { "epoch": 0.21503228338030178, "grad_norm": 0.34238219261169434, "learning_rate": 4.8011124277529394e-05, "loss": 0.2141, "step": 12056 }, { "epoch": 0.21505011950201547, "grad_norm": 0.2788119614124298, "learning_rate": 4.801051583742827e-05, "loss": 0.2361, "step": 12057 }, { "epoch": 0.21506795562372918, "grad_norm": 0.36585521697998047, "learning_rate": 4.800990730813045e-05, "loss": 0.1913, "step": 12058 }, { "epoch": 0.21508579174544287, "grad_norm": 0.37219300866127014, "learning_rate": 4.800929868963827e-05, "loss": 0.2789, "step": 12059 }, { "epoch": 0.21510362786715656, "grad_norm": 0.27912479639053345, "learning_rate": 4.800868998195411e-05, "loss": 0.2172, "step": 12060 }, { "epoch": 0.21512146398887025, "grad_norm": 0.3401962220668793, "learning_rate": 4.800808118508032e-05, "loss": 0.2169, "step": 12061 }, { "epoch": 0.21513930011058396, "grad_norm": 0.29472339153289795, "learning_rate": 4.800747229901925e-05, "loss": 0.2179, "step": 12062 }, { "epoch": 0.21515713623229765, "grad_norm": 0.24006521701812744, "learning_rate": 4.800686332377329e-05, "loss": 0.1951, "step": 12063 }, { "epoch": 0.21517497235401134, "grad_norm": 0.2039320319890976, "learning_rate": 4.800625425934476e-05, "loss": 0.1813, "step": 12064 }, { "epoch": 0.21519280847572503, "grad_norm": 0.26818224787712097, "learning_rate": 4.8005645105736054e-05, "loss": 0.2396, "step": 12065 }, { "epoch": 0.21521064459743874, "grad_norm": 0.3269694149494171, "learning_rate": 4.8005035862949524e-05, "loss": 0.1883, "step": 12066 }, { "epoch": 0.21522848071915243, "grad_norm": 0.3098810911178589, "learning_rate": 4.800442653098752e-05, "loss": 0.2599, "step": 12067 }, { "epoch": 0.21524631684086612, "grad_norm": 0.25813785195350647, "learning_rate": 4.8003817109852424e-05, "loss": 0.1805, "step": 12068 }, { "epoch": 0.2152641529625798, "grad_norm": 0.27266553044319153, "learning_rate": 4.800320759954658e-05, "loss": 0.1648, "step": 12069 }, { "epoch": 0.21528198908429352, "grad_norm": 0.23762328922748566, "learning_rate": 4.8002598000072365e-05, "loss": 0.2227, "step": 12070 }, { "epoch": 0.2152998252060072, "grad_norm": 0.23860234022140503, "learning_rate": 4.800198831143212e-05, "loss": 0.1918, "step": 12071 }, { "epoch": 0.2153176613277209, "grad_norm": 0.19405187666416168, "learning_rate": 4.800137853362824e-05, "loss": 0.1624, "step": 12072 }, { "epoch": 0.2153354974494346, "grad_norm": 0.29371708631515503, "learning_rate": 4.800076866666306e-05, "loss": 0.2006, "step": 12073 }, { "epoch": 0.2153533335711483, "grad_norm": 0.20635518431663513, "learning_rate": 4.8000158710538975e-05, "loss": 0.1952, "step": 12074 }, { "epoch": 0.215371169692862, "grad_norm": 0.19975915551185608, "learning_rate": 4.799954866525831e-05, "loss": 0.1702, "step": 12075 }, { "epoch": 0.21538900581457568, "grad_norm": 0.23310358822345734, "learning_rate": 4.799893853082347e-05, "loss": 0.1834, "step": 12076 }, { "epoch": 0.21540684193628937, "grad_norm": 0.2859938442707062, "learning_rate": 4.799832830723678e-05, "loss": 0.2155, "step": 12077 }, { "epoch": 0.21542467805800306, "grad_norm": 0.2594512104988098, "learning_rate": 4.799771799450065e-05, "loss": 0.1832, "step": 12078 }, { "epoch": 0.21544251417971677, "grad_norm": 0.2855367362499237, "learning_rate": 4.79971075926174e-05, "loss": 0.249, "step": 12079 }, { "epoch": 0.21546035030143046, "grad_norm": 0.31071969866752625, "learning_rate": 4.7996497101589424e-05, "loss": 0.1791, "step": 12080 }, { "epoch": 0.21547818642314415, "grad_norm": 0.36608681082725525, "learning_rate": 4.7995886521419084e-05, "loss": 0.2383, "step": 12081 }, { "epoch": 0.21549602254485783, "grad_norm": 0.2994576096534729, "learning_rate": 4.799527585210875e-05, "loss": 0.1858, "step": 12082 }, { "epoch": 0.21551385866657155, "grad_norm": 0.22747214138507843, "learning_rate": 4.7994665093660784e-05, "loss": 0.1565, "step": 12083 }, { "epoch": 0.21553169478828524, "grad_norm": 0.32047033309936523, "learning_rate": 4.799405424607755e-05, "loss": 0.2137, "step": 12084 }, { "epoch": 0.21554953090999893, "grad_norm": 0.2919504940509796, "learning_rate": 4.799344330936142e-05, "loss": 0.2513, "step": 12085 }, { "epoch": 0.21556736703171261, "grad_norm": 0.2635859549045563, "learning_rate": 4.799283228351476e-05, "loss": 0.172, "step": 12086 }, { "epoch": 0.21558520315342633, "grad_norm": 0.27573275566101074, "learning_rate": 4.799222116853994e-05, "loss": 0.2189, "step": 12087 }, { "epoch": 0.21560303927514002, "grad_norm": 0.4691026508808136, "learning_rate": 4.799160996443934e-05, "loss": 0.2742, "step": 12088 }, { "epoch": 0.2156208753968537, "grad_norm": 0.26830360293388367, "learning_rate": 4.799099867121531e-05, "loss": 0.2154, "step": 12089 }, { "epoch": 0.2156387115185674, "grad_norm": 0.15114668011665344, "learning_rate": 4.799038728887023e-05, "loss": 0.1332, "step": 12090 }, { "epoch": 0.2156565476402811, "grad_norm": 0.2538500726222992, "learning_rate": 4.798977581740647e-05, "loss": 0.2175, "step": 12091 }, { "epoch": 0.2156743837619948, "grad_norm": 0.23798954486846924, "learning_rate": 4.798916425682639e-05, "loss": 0.2165, "step": 12092 }, { "epoch": 0.21569221988370849, "grad_norm": 0.3038810193538666, "learning_rate": 4.798855260713238e-05, "loss": 0.2489, "step": 12093 }, { "epoch": 0.21571005600542217, "grad_norm": 0.21753878891468048, "learning_rate": 4.79879408683268e-05, "loss": 0.1744, "step": 12094 }, { "epoch": 0.21572789212713586, "grad_norm": 0.6992783546447754, "learning_rate": 4.798732904041201e-05, "loss": 0.1578, "step": 12095 }, { "epoch": 0.21574572824884958, "grad_norm": 0.23858462274074554, "learning_rate": 4.79867171233904e-05, "loss": 0.1804, "step": 12096 }, { "epoch": 0.21576356437056327, "grad_norm": 0.2669230103492737, "learning_rate": 4.798610511726434e-05, "loss": 0.1789, "step": 12097 }, { "epoch": 0.21578140049227695, "grad_norm": 0.2066022753715515, "learning_rate": 4.798549302203619e-05, "loss": 0.1746, "step": 12098 }, { "epoch": 0.21579923661399064, "grad_norm": 0.20645281672477722, "learning_rate": 4.7984880837708335e-05, "loss": 0.1541, "step": 12099 }, { "epoch": 0.21581707273570436, "grad_norm": 0.28157174587249756, "learning_rate": 4.7984268564283144e-05, "loss": 0.2116, "step": 12100 }, { "epoch": 0.21583490885741805, "grad_norm": 0.4844302535057068, "learning_rate": 4.798365620176298e-05, "loss": 0.2843, "step": 12101 }, { "epoch": 0.21585274497913173, "grad_norm": 0.26500019431114197, "learning_rate": 4.7983043750150236e-05, "loss": 0.1995, "step": 12102 }, { "epoch": 0.21587058110084542, "grad_norm": 0.2732619643211365, "learning_rate": 4.7982431209447275e-05, "loss": 0.1719, "step": 12103 }, { "epoch": 0.21588841722255914, "grad_norm": 0.2765786051750183, "learning_rate": 4.7981818579656465e-05, "loss": 0.1721, "step": 12104 }, { "epoch": 0.21590625334427282, "grad_norm": 0.3647291660308838, "learning_rate": 4.79812058607802e-05, "loss": 0.2315, "step": 12105 }, { "epoch": 0.2159240894659865, "grad_norm": 0.2344547063112259, "learning_rate": 4.798059305282083e-05, "loss": 0.2106, "step": 12106 }, { "epoch": 0.2159419255877002, "grad_norm": 0.3204667568206787, "learning_rate": 4.797998015578076e-05, "loss": 0.233, "step": 12107 }, { "epoch": 0.21595976170941392, "grad_norm": 0.28090086579322815, "learning_rate": 4.797936716966234e-05, "loss": 0.2314, "step": 12108 }, { "epoch": 0.2159775978311276, "grad_norm": 0.2808286249637604, "learning_rate": 4.797875409446796e-05, "loss": 0.1293, "step": 12109 }, { "epoch": 0.2159954339528413, "grad_norm": 0.35246843099594116, "learning_rate": 4.7978140930199985e-05, "loss": 0.2188, "step": 12110 }, { "epoch": 0.21601327007455498, "grad_norm": 0.35542526841163635, "learning_rate": 4.797752767686081e-05, "loss": 0.1862, "step": 12111 }, { "epoch": 0.2160311061962687, "grad_norm": 0.40284082293510437, "learning_rate": 4.797691433445279e-05, "loss": 0.2124, "step": 12112 }, { "epoch": 0.21604894231798238, "grad_norm": 0.22678276896476746, "learning_rate": 4.7976300902978325e-05, "loss": 0.175, "step": 12113 }, { "epoch": 0.21606677843969607, "grad_norm": 0.2312001883983612, "learning_rate": 4.797568738243978e-05, "loss": 0.2215, "step": 12114 }, { "epoch": 0.21608461456140976, "grad_norm": 0.17601428925991058, "learning_rate": 4.797507377283953e-05, "loss": 0.1576, "step": 12115 }, { "epoch": 0.21610245068312345, "grad_norm": 0.3084196150302887, "learning_rate": 4.7974460074179964e-05, "loss": 0.2023, "step": 12116 }, { "epoch": 0.21612028680483716, "grad_norm": 0.3049376904964447, "learning_rate": 4.797384628646345e-05, "loss": 0.2035, "step": 12117 }, { "epoch": 0.21613812292655085, "grad_norm": 0.2835913896560669, "learning_rate": 4.797323240969238e-05, "loss": 0.2013, "step": 12118 }, { "epoch": 0.21615595904826454, "grad_norm": 0.21759581565856934, "learning_rate": 4.797261844386912e-05, "loss": 0.1578, "step": 12119 }, { "epoch": 0.21617379516997823, "grad_norm": 0.31380537152290344, "learning_rate": 4.7972004388996064e-05, "loss": 0.1689, "step": 12120 }, { "epoch": 0.21619163129169194, "grad_norm": 0.5979520082473755, "learning_rate": 4.797139024507558e-05, "loss": 0.1988, "step": 12121 }, { "epoch": 0.21620946741340563, "grad_norm": 0.29887494444847107, "learning_rate": 4.797077601211005e-05, "loss": 0.1988, "step": 12122 }, { "epoch": 0.21622730353511932, "grad_norm": 0.24623669683933258, "learning_rate": 4.7970161690101856e-05, "loss": 0.1765, "step": 12123 }, { "epoch": 0.216245139656833, "grad_norm": 0.27386224269866943, "learning_rate": 4.796954727905339e-05, "loss": 0.2408, "step": 12124 }, { "epoch": 0.21626297577854672, "grad_norm": 0.256830096244812, "learning_rate": 4.796893277896702e-05, "loss": 0.1673, "step": 12125 }, { "epoch": 0.2162808119002604, "grad_norm": 0.2863677144050598, "learning_rate": 4.796831818984514e-05, "loss": 0.2012, "step": 12126 }, { "epoch": 0.2162986480219741, "grad_norm": 0.24352280795574188, "learning_rate": 4.796770351169012e-05, "loss": 0.2075, "step": 12127 }, { "epoch": 0.2163164841436878, "grad_norm": 0.2724657356739044, "learning_rate": 4.796708874450435e-05, "loss": 0.1693, "step": 12128 }, { "epoch": 0.2163343202654015, "grad_norm": 0.21111701428890228, "learning_rate": 4.796647388829021e-05, "loss": 0.1456, "step": 12129 }, { "epoch": 0.2163521563871152, "grad_norm": 0.29900088906288147, "learning_rate": 4.796585894305009e-05, "loss": 0.1979, "step": 12130 }, { "epoch": 0.21636999250882888, "grad_norm": 0.37574273347854614, "learning_rate": 4.796524390878636e-05, "loss": 0.2705, "step": 12131 }, { "epoch": 0.21638782863054257, "grad_norm": 0.2196299433708191, "learning_rate": 4.796462878550142e-05, "loss": 0.2057, "step": 12132 }, { "epoch": 0.21640566475225628, "grad_norm": 0.4287925660610199, "learning_rate": 4.7964013573197643e-05, "loss": 0.1545, "step": 12133 }, { "epoch": 0.21642350087396997, "grad_norm": 0.3357856869697571, "learning_rate": 4.796339827187742e-05, "loss": 0.1947, "step": 12134 }, { "epoch": 0.21644133699568366, "grad_norm": 0.323546439409256, "learning_rate": 4.7962782881543135e-05, "loss": 0.2179, "step": 12135 }, { "epoch": 0.21645917311739735, "grad_norm": 0.3660182058811188, "learning_rate": 4.7962167402197165e-05, "loss": 0.1956, "step": 12136 }, { "epoch": 0.21647700923911103, "grad_norm": 0.5602064728736877, "learning_rate": 4.796155183384191e-05, "loss": 0.2135, "step": 12137 }, { "epoch": 0.21649484536082475, "grad_norm": 0.23747332394123077, "learning_rate": 4.796093617647975e-05, "loss": 0.1801, "step": 12138 }, { "epoch": 0.21651268148253844, "grad_norm": 0.30389052629470825, "learning_rate": 4.796032043011307e-05, "loss": 0.2015, "step": 12139 }, { "epoch": 0.21653051760425213, "grad_norm": 0.29486486315727234, "learning_rate": 4.795970459474426e-05, "loss": 0.2369, "step": 12140 }, { "epoch": 0.21654835372596581, "grad_norm": 0.24094629287719727, "learning_rate": 4.7959088670375695e-05, "loss": 0.1953, "step": 12141 }, { "epoch": 0.21656618984767953, "grad_norm": 0.331759512424469, "learning_rate": 4.795847265700978e-05, "loss": 0.1681, "step": 12142 }, { "epoch": 0.21658402596939322, "grad_norm": 0.22682291269302368, "learning_rate": 4.7957856554648893e-05, "loss": 0.1985, "step": 12143 }, { "epoch": 0.2166018620911069, "grad_norm": 0.2786453068256378, "learning_rate": 4.795724036329543e-05, "loss": 0.2236, "step": 12144 }, { "epoch": 0.2166196982128206, "grad_norm": 0.19285887479782104, "learning_rate": 4.795662408295177e-05, "loss": 0.174, "step": 12145 }, { "epoch": 0.2166375343345343, "grad_norm": 0.27381888031959534, "learning_rate": 4.795600771362031e-05, "loss": 0.1749, "step": 12146 }, { "epoch": 0.216655370456248, "grad_norm": 0.26288434863090515, "learning_rate": 4.795539125530343e-05, "loss": 0.196, "step": 12147 }, { "epoch": 0.21667320657796169, "grad_norm": 0.2533511817455292, "learning_rate": 4.795477470800353e-05, "loss": 0.1775, "step": 12148 }, { "epoch": 0.21669104269967537, "grad_norm": 0.3898746073246002, "learning_rate": 4.795415807172299e-05, "loss": 0.2122, "step": 12149 }, { "epoch": 0.2167088788213891, "grad_norm": 0.3281749188899994, "learning_rate": 4.7953541346464204e-05, "loss": 0.2089, "step": 12150 }, { "epoch": 0.21672671494310278, "grad_norm": 0.3043719232082367, "learning_rate": 4.795292453222957e-05, "loss": 0.1722, "step": 12151 }, { "epoch": 0.21674455106481647, "grad_norm": 0.32433322072029114, "learning_rate": 4.7952307629021463e-05, "loss": 0.197, "step": 12152 }, { "epoch": 0.21676238718653015, "grad_norm": 0.2759004533290863, "learning_rate": 4.7951690636842294e-05, "loss": 0.2065, "step": 12153 }, { "epoch": 0.21678022330824387, "grad_norm": 0.48696210980415344, "learning_rate": 4.795107355569445e-05, "loss": 0.1324, "step": 12154 }, { "epoch": 0.21679805942995756, "grad_norm": 0.3025630712509155, "learning_rate": 4.79504563855803e-05, "loss": 0.2084, "step": 12155 }, { "epoch": 0.21681589555167124, "grad_norm": 0.236316978931427, "learning_rate": 4.794983912650227e-05, "loss": 0.1819, "step": 12156 }, { "epoch": 0.21683373167338493, "grad_norm": 0.2562558948993683, "learning_rate": 4.7949221778462726e-05, "loss": 0.1969, "step": 12157 }, { "epoch": 0.21685156779509862, "grad_norm": 0.20902837812900543, "learning_rate": 4.7948604341464075e-05, "loss": 0.1526, "step": 12158 }, { "epoch": 0.21686940391681234, "grad_norm": 0.2542819678783417, "learning_rate": 4.7947986815508716e-05, "loss": 0.1609, "step": 12159 }, { "epoch": 0.21688724003852602, "grad_norm": 0.2710743248462677, "learning_rate": 4.794736920059902e-05, "loss": 0.1999, "step": 12160 }, { "epoch": 0.2169050761602397, "grad_norm": 0.4025109112262726, "learning_rate": 4.794675149673741e-05, "loss": 0.1791, "step": 12161 }, { "epoch": 0.2169229122819534, "grad_norm": 0.26331812143325806, "learning_rate": 4.794613370392625e-05, "loss": 0.1838, "step": 12162 }, { "epoch": 0.21694074840366712, "grad_norm": 0.4019258916378021, "learning_rate": 4.794551582216796e-05, "loss": 0.2056, "step": 12163 }, { "epoch": 0.2169585845253808, "grad_norm": 0.31794071197509766, "learning_rate": 4.794489785146493e-05, "loss": 0.2645, "step": 12164 }, { "epoch": 0.2169764206470945, "grad_norm": 0.29514479637145996, "learning_rate": 4.794427979181955e-05, "loss": 0.1921, "step": 12165 }, { "epoch": 0.21699425676880818, "grad_norm": 0.26694512367248535, "learning_rate": 4.7943661643234204e-05, "loss": 0.2074, "step": 12166 }, { "epoch": 0.2170120928905219, "grad_norm": 0.28734442591667175, "learning_rate": 4.794304340571131e-05, "loss": 0.2038, "step": 12167 }, { "epoch": 0.21702992901223558, "grad_norm": 0.38088735938072205, "learning_rate": 4.794242507925325e-05, "loss": 0.1931, "step": 12168 }, { "epoch": 0.21704776513394927, "grad_norm": 0.24369138479232788, "learning_rate": 4.794180666386243e-05, "loss": 0.1778, "step": 12169 }, { "epoch": 0.21706560125566296, "grad_norm": 0.28930190205574036, "learning_rate": 4.794118815954125e-05, "loss": 0.2593, "step": 12170 }, { "epoch": 0.21708343737737668, "grad_norm": 0.28787702322006226, "learning_rate": 4.794056956629209e-05, "loss": 0.1811, "step": 12171 }, { "epoch": 0.21710127349909036, "grad_norm": 0.39515286684036255, "learning_rate": 4.7939950884117366e-05, "loss": 0.1736, "step": 12172 }, { "epoch": 0.21711910962080405, "grad_norm": 0.3337680399417877, "learning_rate": 4.793933211301947e-05, "loss": 0.2176, "step": 12173 }, { "epoch": 0.21713694574251774, "grad_norm": 0.24080854654312134, "learning_rate": 4.79387132530008e-05, "loss": 0.2172, "step": 12174 }, { "epoch": 0.21715478186423146, "grad_norm": 0.28199151158332825, "learning_rate": 4.793809430406375e-05, "loss": 0.1989, "step": 12175 }, { "epoch": 0.21717261798594514, "grad_norm": 0.37244999408721924, "learning_rate": 4.7937475266210724e-05, "loss": 0.2353, "step": 12176 }, { "epoch": 0.21719045410765883, "grad_norm": 0.2573454976081848, "learning_rate": 4.7936856139444125e-05, "loss": 0.1933, "step": 12177 }, { "epoch": 0.21720829022937252, "grad_norm": 0.2606930136680603, "learning_rate": 4.793623692376635e-05, "loss": 0.2353, "step": 12178 }, { "epoch": 0.2172261263510862, "grad_norm": 0.2101358026266098, "learning_rate": 4.793561761917979e-05, "loss": 0.1799, "step": 12179 }, { "epoch": 0.21724396247279992, "grad_norm": 0.26700419187545776, "learning_rate": 4.793499822568687e-05, "loss": 0.1509, "step": 12180 }, { "epoch": 0.2172617985945136, "grad_norm": 0.3781948387622833, "learning_rate": 4.7934378743289964e-05, "loss": 0.1807, "step": 12181 }, { "epoch": 0.2172796347162273, "grad_norm": 0.2628592252731323, "learning_rate": 4.793375917199149e-05, "loss": 0.1672, "step": 12182 }, { "epoch": 0.217297470837941, "grad_norm": 0.3336634635925293, "learning_rate": 4.793313951179383e-05, "loss": 0.2129, "step": 12183 }, { "epoch": 0.2173153069596547, "grad_norm": 0.4094424545764923, "learning_rate": 4.793251976269942e-05, "loss": 0.1769, "step": 12184 }, { "epoch": 0.2173331430813684, "grad_norm": 0.3108433187007904, "learning_rate": 4.793189992471063e-05, "loss": 0.1407, "step": 12185 }, { "epoch": 0.21735097920308208, "grad_norm": 0.29962775111198425, "learning_rate": 4.793127999782988e-05, "loss": 0.2472, "step": 12186 }, { "epoch": 0.21736881532479577, "grad_norm": 0.2748205065727234, "learning_rate": 4.793065998205957e-05, "loss": 0.1938, "step": 12187 }, { "epoch": 0.21738665144650948, "grad_norm": 0.3648146986961365, "learning_rate": 4.79300398774021e-05, "loss": 0.2381, "step": 12188 }, { "epoch": 0.21740448756822317, "grad_norm": 0.29264548420906067, "learning_rate": 4.792941968385988e-05, "loss": 0.1776, "step": 12189 }, { "epoch": 0.21742232368993686, "grad_norm": 0.30888015031814575, "learning_rate": 4.7928799401435305e-05, "loss": 0.2233, "step": 12190 }, { "epoch": 0.21744015981165055, "grad_norm": 0.23494990170001984, "learning_rate": 4.792817903013078e-05, "loss": 0.2076, "step": 12191 }, { "epoch": 0.21745799593336426, "grad_norm": 0.3458961844444275, "learning_rate": 4.792755856994872e-05, "loss": 0.2022, "step": 12192 }, { "epoch": 0.21747583205507795, "grad_norm": 0.1945781409740448, "learning_rate": 4.7926938020891526e-05, "loss": 0.1732, "step": 12193 }, { "epoch": 0.21749366817679164, "grad_norm": 0.3091394603252411, "learning_rate": 4.7926317382961595e-05, "loss": 0.2258, "step": 12194 }, { "epoch": 0.21751150429850533, "grad_norm": 0.456118106842041, "learning_rate": 4.792569665616135e-05, "loss": 0.1607, "step": 12195 }, { "epoch": 0.21752934042021901, "grad_norm": 0.3005441129207611, "learning_rate": 4.792507584049317e-05, "loss": 0.2279, "step": 12196 }, { "epoch": 0.21754717654193273, "grad_norm": 0.262269526720047, "learning_rate": 4.792445493595949e-05, "loss": 0.1413, "step": 12197 }, { "epoch": 0.21756501266364642, "grad_norm": 0.30201366543769836, "learning_rate": 4.79238339425627e-05, "loss": 0.2252, "step": 12198 }, { "epoch": 0.2175828487853601, "grad_norm": 0.23367714881896973, "learning_rate": 4.7923212860305223e-05, "loss": 0.2042, "step": 12199 }, { "epoch": 0.2176006849070738, "grad_norm": 0.2498628795146942, "learning_rate": 4.7922591689189445e-05, "loss": 0.1889, "step": 12200 }, { "epoch": 0.2176185210287875, "grad_norm": 0.3375679552555084, "learning_rate": 4.792197042921778e-05, "loss": 0.1977, "step": 12201 }, { "epoch": 0.2176363571505012, "grad_norm": 0.2772034704685211, "learning_rate": 4.7921349080392655e-05, "loss": 0.1729, "step": 12202 }, { "epoch": 0.21765419327221489, "grad_norm": 0.2688341438770294, "learning_rate": 4.792072764271645e-05, "loss": 0.2039, "step": 12203 }, { "epoch": 0.21767202939392857, "grad_norm": 0.2971316874027252, "learning_rate": 4.79201061161916e-05, "loss": 0.2044, "step": 12204 }, { "epoch": 0.2176898655156423, "grad_norm": 0.28549161553382874, "learning_rate": 4.7919484500820485e-05, "loss": 0.1834, "step": 12205 }, { "epoch": 0.21770770163735598, "grad_norm": 0.23461858928203583, "learning_rate": 4.791886279660555e-05, "loss": 0.1859, "step": 12206 }, { "epoch": 0.21772553775906967, "grad_norm": 0.26371443271636963, "learning_rate": 4.791824100354918e-05, "loss": 0.1898, "step": 12207 }, { "epoch": 0.21774337388078335, "grad_norm": 0.3226472735404968, "learning_rate": 4.791761912165379e-05, "loss": 0.1528, "step": 12208 }, { "epoch": 0.21776121000249707, "grad_norm": 0.3185974359512329, "learning_rate": 4.79169971509218e-05, "loss": 0.218, "step": 12209 }, { "epoch": 0.21777904612421076, "grad_norm": 0.24560217559337616, "learning_rate": 4.7916375091355606e-05, "loss": 0.166, "step": 12210 }, { "epoch": 0.21779688224592444, "grad_norm": 0.32225093245506287, "learning_rate": 4.791575294295763e-05, "loss": 0.2163, "step": 12211 }, { "epoch": 0.21781471836763813, "grad_norm": 0.28764331340789795, "learning_rate": 4.791513070573028e-05, "loss": 0.2047, "step": 12212 }, { "epoch": 0.21783255448935185, "grad_norm": 0.33743715286254883, "learning_rate": 4.791450837967597e-05, "loss": 0.2239, "step": 12213 }, { "epoch": 0.21785039061106554, "grad_norm": 0.24375684559345245, "learning_rate": 4.791388596479711e-05, "loss": 0.1919, "step": 12214 }, { "epoch": 0.21786822673277922, "grad_norm": 0.28009992837905884, "learning_rate": 4.791326346109611e-05, "loss": 0.1917, "step": 12215 }, { "epoch": 0.2178860628544929, "grad_norm": 0.23669345676898956, "learning_rate": 4.7912640868575396e-05, "loss": 0.1318, "step": 12216 }, { "epoch": 0.2179038989762066, "grad_norm": 0.23772671818733215, "learning_rate": 4.7912018187237365e-05, "loss": 0.1475, "step": 12217 }, { "epoch": 0.21792173509792032, "grad_norm": 0.2695966958999634, "learning_rate": 4.791139541708444e-05, "loss": 0.2337, "step": 12218 }, { "epoch": 0.217939571219634, "grad_norm": 0.3321371376514435, "learning_rate": 4.791077255811904e-05, "loss": 0.2339, "step": 12219 }, { "epoch": 0.2179574073413477, "grad_norm": 0.334119975566864, "learning_rate": 4.7910149610343564e-05, "loss": 0.2019, "step": 12220 }, { "epoch": 0.21797524346306138, "grad_norm": 0.8641327619552612, "learning_rate": 4.790952657376043e-05, "loss": 0.2221, "step": 12221 }, { "epoch": 0.2179930795847751, "grad_norm": 0.2770143747329712, "learning_rate": 4.7908903448372065e-05, "loss": 0.1945, "step": 12222 }, { "epoch": 0.21801091570648878, "grad_norm": 0.2931188642978668, "learning_rate": 4.790828023418088e-05, "loss": 0.1901, "step": 12223 }, { "epoch": 0.21802875182820247, "grad_norm": 0.2954394817352295, "learning_rate": 4.790765693118929e-05, "loss": 0.2452, "step": 12224 }, { "epoch": 0.21804658794991616, "grad_norm": 0.3034812808036804, "learning_rate": 4.7907033539399706e-05, "loss": 0.1653, "step": 12225 }, { "epoch": 0.21806442407162988, "grad_norm": 0.2628895938396454, "learning_rate": 4.790641005881455e-05, "loss": 0.1726, "step": 12226 }, { "epoch": 0.21808226019334356, "grad_norm": 0.21256977319717407, "learning_rate": 4.790578648943623e-05, "loss": 0.1834, "step": 12227 }, { "epoch": 0.21810009631505725, "grad_norm": 0.2179006040096283, "learning_rate": 4.7905162831267183e-05, "loss": 0.1966, "step": 12228 }, { "epoch": 0.21811793243677094, "grad_norm": 0.2746255397796631, "learning_rate": 4.79045390843098e-05, "loss": 0.1546, "step": 12229 }, { "epoch": 0.21813576855848466, "grad_norm": 0.29825106263160706, "learning_rate": 4.790391524856652e-05, "loss": 0.1767, "step": 12230 }, { "epoch": 0.21815360468019834, "grad_norm": 0.31748199462890625, "learning_rate": 4.790329132403975e-05, "loss": 0.231, "step": 12231 }, { "epoch": 0.21817144080191203, "grad_norm": 0.35434678196907043, "learning_rate": 4.790266731073192e-05, "loss": 0.2346, "step": 12232 }, { "epoch": 0.21818927692362572, "grad_norm": 0.23999528586864471, "learning_rate": 4.790204320864544e-05, "loss": 0.1707, "step": 12233 }, { "epoch": 0.21820711304533943, "grad_norm": 0.22765415906906128, "learning_rate": 4.7901419017782725e-05, "loss": 0.1612, "step": 12234 }, { "epoch": 0.21822494916705312, "grad_norm": 0.3081587851047516, "learning_rate": 4.7900794738146195e-05, "loss": 0.1684, "step": 12235 }, { "epoch": 0.2182427852887668, "grad_norm": 0.2984144389629364, "learning_rate": 4.790017036973828e-05, "loss": 0.2263, "step": 12236 }, { "epoch": 0.2182606214104805, "grad_norm": 0.32591012120246887, "learning_rate": 4.78995459125614e-05, "loss": 0.2128, "step": 12237 }, { "epoch": 0.2182784575321942, "grad_norm": 0.29073888063430786, "learning_rate": 4.7898921366617964e-05, "loss": 0.197, "step": 12238 }, { "epoch": 0.2182962936539079, "grad_norm": 0.42854738235473633, "learning_rate": 4.7898296731910407e-05, "loss": 0.2, "step": 12239 }, { "epoch": 0.2183141297756216, "grad_norm": 0.287572979927063, "learning_rate": 4.789767200844114e-05, "loss": 0.17, "step": 12240 }, { "epoch": 0.21833196589733528, "grad_norm": 0.3488253355026245, "learning_rate": 4.789704719621259e-05, "loss": 0.2905, "step": 12241 }, { "epoch": 0.21834980201904897, "grad_norm": 0.3265188932418823, "learning_rate": 4.789642229522717e-05, "loss": 0.1688, "step": 12242 }, { "epoch": 0.21836763814076268, "grad_norm": 0.33907297253608704, "learning_rate": 4.789579730548731e-05, "loss": 0.2235, "step": 12243 }, { "epoch": 0.21838547426247637, "grad_norm": 0.2541789710521698, "learning_rate": 4.7895172226995436e-05, "loss": 0.2259, "step": 12244 }, { "epoch": 0.21840331038419006, "grad_norm": 0.4170028269290924, "learning_rate": 4.789454705975397e-05, "loss": 0.2484, "step": 12245 }, { "epoch": 0.21842114650590375, "grad_norm": 0.3981008529663086, "learning_rate": 4.789392180376532e-05, "loss": 0.1641, "step": 12246 }, { "epoch": 0.21843898262761746, "grad_norm": 0.33615782856941223, "learning_rate": 4.7893296459031935e-05, "loss": 0.2295, "step": 12247 }, { "epoch": 0.21845681874933115, "grad_norm": 0.27407175302505493, "learning_rate": 4.7892671025556214e-05, "loss": 0.2229, "step": 12248 }, { "epoch": 0.21847465487104484, "grad_norm": 0.2900344431400299, "learning_rate": 4.78920455033406e-05, "loss": 0.2087, "step": 12249 }, { "epoch": 0.21849249099275853, "grad_norm": 0.4448204040527344, "learning_rate": 4.7891419892387515e-05, "loss": 0.225, "step": 12250 }, { "epoch": 0.21851032711447224, "grad_norm": 0.27972525358200073, "learning_rate": 4.7890794192699375e-05, "loss": 0.2287, "step": 12251 }, { "epoch": 0.21852816323618593, "grad_norm": 0.28016993403434753, "learning_rate": 4.7890168404278604e-05, "loss": 0.2018, "step": 12252 }, { "epoch": 0.21854599935789962, "grad_norm": 0.2946826219558716, "learning_rate": 4.788954252712764e-05, "loss": 0.1597, "step": 12253 }, { "epoch": 0.2185638354796133, "grad_norm": 0.2103988528251648, "learning_rate": 4.788891656124891e-05, "loss": 0.1728, "step": 12254 }, { "epoch": 0.21858167160132702, "grad_norm": 0.2620214521884918, "learning_rate": 4.788829050664483e-05, "loss": 0.1533, "step": 12255 }, { "epoch": 0.2185995077230407, "grad_norm": 0.29502132534980774, "learning_rate": 4.788766436331782e-05, "loss": 0.2134, "step": 12256 }, { "epoch": 0.2186173438447544, "grad_norm": 0.3599487841129303, "learning_rate": 4.7887038131270335e-05, "loss": 0.1665, "step": 12257 }, { "epoch": 0.21863517996646809, "grad_norm": 0.23317238688468933, "learning_rate": 4.788641181050477e-05, "loss": 0.1911, "step": 12258 }, { "epoch": 0.21865301608818177, "grad_norm": 0.1978743076324463, "learning_rate": 4.7885785401023575e-05, "loss": 0.1875, "step": 12259 }, { "epoch": 0.2186708522098955, "grad_norm": 0.3456242084503174, "learning_rate": 4.7885158902829175e-05, "loss": 0.2045, "step": 12260 }, { "epoch": 0.21868868833160918, "grad_norm": 0.3013564646244049, "learning_rate": 4.788453231592399e-05, "loss": 0.1876, "step": 12261 }, { "epoch": 0.21870652445332286, "grad_norm": 0.3238040506839752, "learning_rate": 4.7883905640310455e-05, "loss": 0.2449, "step": 12262 }, { "epoch": 0.21872436057503655, "grad_norm": 0.2792685925960541, "learning_rate": 4.7883278875990994e-05, "loss": 0.197, "step": 12263 }, { "epoch": 0.21874219669675027, "grad_norm": 0.24670548737049103, "learning_rate": 4.788265202296805e-05, "loss": 0.2046, "step": 12264 }, { "epoch": 0.21876003281846396, "grad_norm": 0.2170332372188568, "learning_rate": 4.788202508124403e-05, "loss": 0.1863, "step": 12265 }, { "epoch": 0.21877786894017764, "grad_norm": 0.2853853106498718, "learning_rate": 4.788139805082139e-05, "loss": 0.1468, "step": 12266 }, { "epoch": 0.21879570506189133, "grad_norm": 0.28167521953582764, "learning_rate": 4.788077093170253e-05, "loss": 0.1929, "step": 12267 }, { "epoch": 0.21881354118360505, "grad_norm": 0.26518744230270386, "learning_rate": 4.788014372388992e-05, "loss": 0.1677, "step": 12268 }, { "epoch": 0.21883137730531874, "grad_norm": 0.2601465582847595, "learning_rate": 4.787951642738596e-05, "loss": 0.2303, "step": 12269 }, { "epoch": 0.21884921342703242, "grad_norm": 0.2647935152053833, "learning_rate": 4.78788890421931e-05, "loss": 0.1886, "step": 12270 }, { "epoch": 0.2188670495487461, "grad_norm": 0.3722890317440033, "learning_rate": 4.787826156831375e-05, "loss": 0.2433, "step": 12271 }, { "epoch": 0.21888488567045983, "grad_norm": 0.41759246587753296, "learning_rate": 4.7877634005750375e-05, "loss": 0.193, "step": 12272 }, { "epoch": 0.21890272179217352, "grad_norm": 0.23161475360393524, "learning_rate": 4.787700635450537e-05, "loss": 0.1773, "step": 12273 }, { "epoch": 0.2189205579138872, "grad_norm": 0.3327459990978241, "learning_rate": 4.7876378614581195e-05, "loss": 0.231, "step": 12274 }, { "epoch": 0.2189383940356009, "grad_norm": 0.2902162969112396, "learning_rate": 4.787575078598028e-05, "loss": 0.1908, "step": 12275 }, { "epoch": 0.21895623015731458, "grad_norm": 0.2756287157535553, "learning_rate": 4.787512286870505e-05, "loss": 0.2331, "step": 12276 }, { "epoch": 0.2189740662790283, "grad_norm": 0.3340472877025604, "learning_rate": 4.787449486275794e-05, "loss": 0.2683, "step": 12277 }, { "epoch": 0.21899190240074198, "grad_norm": 0.2721349596977234, "learning_rate": 4.787386676814139e-05, "loss": 0.1819, "step": 12278 }, { "epoch": 0.21900973852245567, "grad_norm": 0.3155156373977661, "learning_rate": 4.787323858485783e-05, "loss": 0.199, "step": 12279 }, { "epoch": 0.21902757464416936, "grad_norm": 0.3061648905277252, "learning_rate": 4.7872610312909706e-05, "loss": 0.2095, "step": 12280 }, { "epoch": 0.21904541076588308, "grad_norm": 0.35271456837654114, "learning_rate": 4.787198195229943e-05, "loss": 0.2197, "step": 12281 }, { "epoch": 0.21906324688759676, "grad_norm": 0.28183358907699585, "learning_rate": 4.787135350302946e-05, "loss": 0.2137, "step": 12282 }, { "epoch": 0.21908108300931045, "grad_norm": 0.3016718924045563, "learning_rate": 4.787072496510223e-05, "loss": 0.2259, "step": 12283 }, { "epoch": 0.21909891913102414, "grad_norm": 0.19393084943294525, "learning_rate": 4.787009633852016e-05, "loss": 0.1584, "step": 12284 }, { "epoch": 0.21911675525273785, "grad_norm": 0.2776353061199188, "learning_rate": 4.78694676232857e-05, "loss": 0.2061, "step": 12285 }, { "epoch": 0.21913459137445154, "grad_norm": 0.26373741030693054, "learning_rate": 4.786883881940129e-05, "loss": 0.2597, "step": 12286 }, { "epoch": 0.21915242749616523, "grad_norm": 0.24344190955162048, "learning_rate": 4.7868209926869355e-05, "loss": 0.1913, "step": 12287 }, { "epoch": 0.21917026361787892, "grad_norm": 0.22540917992591858, "learning_rate": 4.786758094569235e-05, "loss": 0.1767, "step": 12288 }, { "epoch": 0.21918809973959263, "grad_norm": 0.1850290447473526, "learning_rate": 4.78669518758727e-05, "loss": 0.1381, "step": 12289 }, { "epoch": 0.21920593586130632, "grad_norm": 0.30858656764030457, "learning_rate": 4.786632271741284e-05, "loss": 0.1993, "step": 12290 }, { "epoch": 0.21922377198302, "grad_norm": 0.253150075674057, "learning_rate": 4.786569347031522e-05, "loss": 0.1938, "step": 12291 }, { "epoch": 0.2192416081047337, "grad_norm": 0.23767095804214478, "learning_rate": 4.786506413458227e-05, "loss": 0.1879, "step": 12292 }, { "epoch": 0.21925944422644741, "grad_norm": 0.3637741208076477, "learning_rate": 4.786443471021644e-05, "loss": 0.1946, "step": 12293 }, { "epoch": 0.2192772803481611, "grad_norm": 0.27433422207832336, "learning_rate": 4.786380519722016e-05, "loss": 0.2105, "step": 12294 }, { "epoch": 0.2192951164698748, "grad_norm": 0.22182419896125793, "learning_rate": 4.7863175595595877e-05, "loss": 0.136, "step": 12295 }, { "epoch": 0.21931295259158848, "grad_norm": 0.25109902024269104, "learning_rate": 4.7862545905346024e-05, "loss": 0.1871, "step": 12296 }, { "epoch": 0.21933078871330217, "grad_norm": 0.2733488380908966, "learning_rate": 4.786191612647305e-05, "loss": 0.1789, "step": 12297 }, { "epoch": 0.21934862483501588, "grad_norm": 0.2835102677345276, "learning_rate": 4.786128625897939e-05, "loss": 0.2043, "step": 12298 }, { "epoch": 0.21936646095672957, "grad_norm": 0.31644755601882935, "learning_rate": 4.786065630286749e-05, "loss": 0.2068, "step": 12299 }, { "epoch": 0.21938429707844326, "grad_norm": 0.2616254985332489, "learning_rate": 4.786002625813979e-05, "loss": 0.2211, "step": 12300 }, { "epoch": 0.21940213320015695, "grad_norm": 0.3720972239971161, "learning_rate": 4.785939612479874e-05, "loss": 0.1697, "step": 12301 }, { "epoch": 0.21941996932187066, "grad_norm": 0.28673508763313293, "learning_rate": 4.785876590284677e-05, "loss": 0.2045, "step": 12302 }, { "epoch": 0.21943780544358435, "grad_norm": 0.31360381841659546, "learning_rate": 4.785813559228632e-05, "loss": 0.1938, "step": 12303 }, { "epoch": 0.21945564156529804, "grad_norm": 0.21911442279815674, "learning_rate": 4.7857505193119854e-05, "loss": 0.1566, "step": 12304 }, { "epoch": 0.21947347768701173, "grad_norm": 0.21743826568126678, "learning_rate": 4.7856874705349795e-05, "loss": 0.1601, "step": 12305 }, { "epoch": 0.21949131380872544, "grad_norm": 0.2897314131259918, "learning_rate": 4.78562441289786e-05, "loss": 0.2092, "step": 12306 }, { "epoch": 0.21950914993043913, "grad_norm": 0.22341065108776093, "learning_rate": 4.7855613464008706e-05, "loss": 0.1722, "step": 12307 }, { "epoch": 0.21952698605215282, "grad_norm": 0.2659430503845215, "learning_rate": 4.785498271044256e-05, "loss": 0.1837, "step": 12308 }, { "epoch": 0.2195448221738665, "grad_norm": 0.22382672131061554, "learning_rate": 4.785435186828261e-05, "loss": 0.1463, "step": 12309 }, { "epoch": 0.21956265829558022, "grad_norm": 0.39409008622169495, "learning_rate": 4.7853720937531296e-05, "loss": 0.2372, "step": 12310 }, { "epoch": 0.2195804944172939, "grad_norm": 0.2905671000480652, "learning_rate": 4.7853089918191075e-05, "loss": 0.2056, "step": 12311 }, { "epoch": 0.2195983305390076, "grad_norm": 0.2422112375497818, "learning_rate": 4.785245881026437e-05, "loss": 0.1488, "step": 12312 }, { "epoch": 0.21961616666072128, "grad_norm": 0.23518626391887665, "learning_rate": 4.785182761375365e-05, "loss": 0.1402, "step": 12313 }, { "epoch": 0.219634002782435, "grad_norm": 0.2504526674747467, "learning_rate": 4.785119632866135e-05, "loss": 0.1965, "step": 12314 }, { "epoch": 0.2196518389041487, "grad_norm": 0.31703639030456543, "learning_rate": 4.785056495498992e-05, "loss": 0.2529, "step": 12315 }, { "epoch": 0.21966967502586238, "grad_norm": 0.2896729111671448, "learning_rate": 4.784993349274181e-05, "loss": 0.1814, "step": 12316 }, { "epoch": 0.21968751114757606, "grad_norm": 0.2588949203491211, "learning_rate": 4.784930194191947e-05, "loss": 0.2313, "step": 12317 }, { "epoch": 0.21970534726928975, "grad_norm": 0.27120110392570496, "learning_rate": 4.784867030252533e-05, "loss": 0.212, "step": 12318 }, { "epoch": 0.21972318339100347, "grad_norm": 0.3410847783088684, "learning_rate": 4.7848038574561866e-05, "loss": 0.2028, "step": 12319 }, { "epoch": 0.21974101951271716, "grad_norm": 0.2402489185333252, "learning_rate": 4.784740675803151e-05, "loss": 0.1879, "step": 12320 }, { "epoch": 0.21975885563443084, "grad_norm": 0.2496604025363922, "learning_rate": 4.784677485293671e-05, "loss": 0.185, "step": 12321 }, { "epoch": 0.21977669175614453, "grad_norm": 0.3286452889442444, "learning_rate": 4.7846142859279916e-05, "loss": 0.2043, "step": 12322 }, { "epoch": 0.21979452787785825, "grad_norm": 0.3150593638420105, "learning_rate": 4.7845510777063596e-05, "loss": 0.188, "step": 12323 }, { "epoch": 0.21981236399957194, "grad_norm": 0.3060225248336792, "learning_rate": 4.784487860629018e-05, "loss": 0.1816, "step": 12324 }, { "epoch": 0.21983020012128562, "grad_norm": 0.20153634250164032, "learning_rate": 4.784424634696212e-05, "loss": 0.166, "step": 12325 }, { "epoch": 0.2198480362429993, "grad_norm": 0.2629064619541168, "learning_rate": 4.784361399908187e-05, "loss": 0.2131, "step": 12326 }, { "epoch": 0.21986587236471303, "grad_norm": 0.35216501355171204, "learning_rate": 4.7842981562651885e-05, "loss": 0.1888, "step": 12327 }, { "epoch": 0.21988370848642672, "grad_norm": 0.3123012185096741, "learning_rate": 4.784234903767461e-05, "loss": 0.1586, "step": 12328 }, { "epoch": 0.2199015446081404, "grad_norm": 0.23378488421440125, "learning_rate": 4.7841716424152504e-05, "loss": 0.179, "step": 12329 }, { "epoch": 0.2199193807298541, "grad_norm": 0.27993661165237427, "learning_rate": 4.784108372208802e-05, "loss": 0.2027, "step": 12330 }, { "epoch": 0.2199372168515678, "grad_norm": 0.2942934036254883, "learning_rate": 4.7840450931483596e-05, "loss": 0.237, "step": 12331 }, { "epoch": 0.2199550529732815, "grad_norm": 0.24330751597881317, "learning_rate": 4.783981805234171e-05, "loss": 0.1687, "step": 12332 }, { "epoch": 0.21997288909499518, "grad_norm": 0.2552853524684906, "learning_rate": 4.7839185084664785e-05, "loss": 0.1772, "step": 12333 }, { "epoch": 0.21999072521670887, "grad_norm": 0.2882727086544037, "learning_rate": 4.7838552028455294e-05, "loss": 0.1679, "step": 12334 }, { "epoch": 0.2200085613384226, "grad_norm": 0.37794631719589233, "learning_rate": 4.7837918883715695e-05, "loss": 0.1799, "step": 12335 }, { "epoch": 0.22002639746013627, "grad_norm": 0.3529682457447052, "learning_rate": 4.7837285650448434e-05, "loss": 0.1801, "step": 12336 }, { "epoch": 0.22004423358184996, "grad_norm": 0.34412533044815063, "learning_rate": 4.7836652328655964e-05, "loss": 0.1926, "step": 12337 }, { "epoch": 0.22006206970356365, "grad_norm": 0.3385235667228699, "learning_rate": 4.783601891834074e-05, "loss": 0.1939, "step": 12338 }, { "epoch": 0.22007990582527734, "grad_norm": 0.28697073459625244, "learning_rate": 4.7835385419505215e-05, "loss": 0.1902, "step": 12339 }, { "epoch": 0.22009774194699105, "grad_norm": 0.2434438318014145, "learning_rate": 4.783475183215185e-05, "loss": 0.234, "step": 12340 }, { "epoch": 0.22011557806870474, "grad_norm": 0.20635941624641418, "learning_rate": 4.78341181562831e-05, "loss": 0.17, "step": 12341 }, { "epoch": 0.22013341419041843, "grad_norm": 0.2844592332839966, "learning_rate": 4.783348439190143e-05, "loss": 0.2128, "step": 12342 }, { "epoch": 0.22015125031213212, "grad_norm": 0.3016209900379181, "learning_rate": 4.7832850539009284e-05, "loss": 0.177, "step": 12343 }, { "epoch": 0.22016908643384583, "grad_norm": 0.3935510814189911, "learning_rate": 4.7832216597609123e-05, "loss": 0.2482, "step": 12344 }, { "epoch": 0.22018692255555952, "grad_norm": 0.29393836855888367, "learning_rate": 4.7831582567703405e-05, "loss": 0.213, "step": 12345 }, { "epoch": 0.2202047586772732, "grad_norm": 0.21075181663036346, "learning_rate": 4.783094844929458e-05, "loss": 0.1891, "step": 12346 }, { "epoch": 0.2202225947989869, "grad_norm": 0.2374795824289322, "learning_rate": 4.783031424238512e-05, "loss": 0.2152, "step": 12347 }, { "epoch": 0.22024043092070061, "grad_norm": 0.3203499913215637, "learning_rate": 4.782967994697748e-05, "loss": 0.2363, "step": 12348 }, { "epoch": 0.2202582670424143, "grad_norm": 0.29125335812568665, "learning_rate": 4.782904556307411e-05, "loss": 0.1946, "step": 12349 }, { "epoch": 0.220276103164128, "grad_norm": 0.29094862937927246, "learning_rate": 4.7828411090677473e-05, "loss": 0.1917, "step": 12350 }, { "epoch": 0.22029393928584168, "grad_norm": 0.22975733876228333, "learning_rate": 4.7827776529790036e-05, "loss": 0.157, "step": 12351 }, { "epoch": 0.2203117754075554, "grad_norm": 0.29881027340888977, "learning_rate": 4.782714188041425e-05, "loss": 0.2171, "step": 12352 }, { "epoch": 0.22032961152926908, "grad_norm": 0.32889387011528015, "learning_rate": 4.7826507142552575e-05, "loss": 0.2454, "step": 12353 }, { "epoch": 0.22034744765098277, "grad_norm": 0.34230297803878784, "learning_rate": 4.782587231620748e-05, "loss": 0.218, "step": 12354 }, { "epoch": 0.22036528377269646, "grad_norm": 0.3229703903198242, "learning_rate": 4.7825237401381414e-05, "loss": 0.195, "step": 12355 }, { "epoch": 0.22038311989441017, "grad_norm": 0.2649451196193695, "learning_rate": 4.7824602398076844e-05, "loss": 0.2257, "step": 12356 }, { "epoch": 0.22040095601612386, "grad_norm": 0.26519548892974854, "learning_rate": 4.782396730629623e-05, "loss": 0.1385, "step": 12357 }, { "epoch": 0.22041879213783755, "grad_norm": 0.26670214533805847, "learning_rate": 4.782333212604204e-05, "loss": 0.2327, "step": 12358 }, { "epoch": 0.22043662825955124, "grad_norm": 0.3590201437473297, "learning_rate": 4.782269685731674e-05, "loss": 0.2465, "step": 12359 }, { "epoch": 0.22045446438126493, "grad_norm": 0.20097680389881134, "learning_rate": 4.7822061500122774e-05, "loss": 0.1764, "step": 12360 }, { "epoch": 0.22047230050297864, "grad_norm": 0.8733730912208557, "learning_rate": 4.782142605446261e-05, "loss": 0.2678, "step": 12361 }, { "epoch": 0.22049013662469233, "grad_norm": 0.30836859345436096, "learning_rate": 4.782079052033873e-05, "loss": 0.1088, "step": 12362 }, { "epoch": 0.22050797274640602, "grad_norm": 0.3191524147987366, "learning_rate": 4.782015489775358e-05, "loss": 0.1871, "step": 12363 }, { "epoch": 0.2205258088681197, "grad_norm": 0.34677889943122864, "learning_rate": 4.781951918670962e-05, "loss": 0.2062, "step": 12364 }, { "epoch": 0.22054364498983342, "grad_norm": 0.28938624262809753, "learning_rate": 4.781888338720933e-05, "loss": 0.2099, "step": 12365 }, { "epoch": 0.2205614811115471, "grad_norm": 0.5357033610343933, "learning_rate": 4.781824749925516e-05, "loss": 0.2763, "step": 12366 }, { "epoch": 0.2205793172332608, "grad_norm": 0.2821754217147827, "learning_rate": 4.7817611522849584e-05, "loss": 0.2164, "step": 12367 }, { "epoch": 0.22059715335497448, "grad_norm": 0.25177887082099915, "learning_rate": 4.781697545799507e-05, "loss": 0.2464, "step": 12368 }, { "epoch": 0.2206149894766882, "grad_norm": 0.24871040880680084, "learning_rate": 4.781633930469407e-05, "loss": 0.1728, "step": 12369 }, { "epoch": 0.2206328255984019, "grad_norm": 0.26730161905288696, "learning_rate": 4.781570306294907e-05, "loss": 0.2026, "step": 12370 }, { "epoch": 0.22065066172011558, "grad_norm": 0.23566238582134247, "learning_rate": 4.781506673276251e-05, "loss": 0.1298, "step": 12371 }, { "epoch": 0.22066849784182926, "grad_norm": 0.3828012943267822, "learning_rate": 4.781443031413688e-05, "loss": 0.1918, "step": 12372 }, { "epoch": 0.22068633396354298, "grad_norm": 0.2641248404979706, "learning_rate": 4.7813793807074636e-05, "loss": 0.1923, "step": 12373 }, { "epoch": 0.22070417008525667, "grad_norm": 0.2882792055606842, "learning_rate": 4.781315721157825e-05, "loss": 0.2229, "step": 12374 }, { "epoch": 0.22072200620697036, "grad_norm": 0.2525346875190735, "learning_rate": 4.781252052765019e-05, "loss": 0.1523, "step": 12375 }, { "epoch": 0.22073984232868404, "grad_norm": 0.4984354078769684, "learning_rate": 4.781188375529292e-05, "loss": 0.1894, "step": 12376 }, { "epoch": 0.22075767845039773, "grad_norm": 0.29797837138175964, "learning_rate": 4.78112468945089e-05, "loss": 0.2333, "step": 12377 }, { "epoch": 0.22077551457211145, "grad_norm": 0.34396496415138245, "learning_rate": 4.781060994530062e-05, "loss": 0.2196, "step": 12378 }, { "epoch": 0.22079335069382514, "grad_norm": 0.3002025783061981, "learning_rate": 4.780997290767053e-05, "loss": 0.2182, "step": 12379 }, { "epoch": 0.22081118681553882, "grad_norm": 0.32589346170425415, "learning_rate": 4.780933578162111e-05, "loss": 0.2119, "step": 12380 }, { "epoch": 0.2208290229372525, "grad_norm": 0.3294805586338043, "learning_rate": 4.780869856715482e-05, "loss": 0.2759, "step": 12381 }, { "epoch": 0.22084685905896623, "grad_norm": 0.20914940536022186, "learning_rate": 4.7808061264274145e-05, "loss": 0.1827, "step": 12382 }, { "epoch": 0.22086469518067992, "grad_norm": 0.1981377750635147, "learning_rate": 4.7807423872981546e-05, "loss": 0.1795, "step": 12383 }, { "epoch": 0.2208825313023936, "grad_norm": 0.26864659786224365, "learning_rate": 4.780678639327949e-05, "loss": 0.1919, "step": 12384 }, { "epoch": 0.2209003674241073, "grad_norm": 0.26839518547058105, "learning_rate": 4.780614882517045e-05, "loss": 0.195, "step": 12385 }, { "epoch": 0.220918203545821, "grad_norm": 0.27852749824523926, "learning_rate": 4.7805511168656916e-05, "loss": 0.1043, "step": 12386 }, { "epoch": 0.2209360396675347, "grad_norm": 0.33031165599823, "learning_rate": 4.7804873423741323e-05, "loss": 0.2583, "step": 12387 }, { "epoch": 0.22095387578924838, "grad_norm": 0.23721639811992645, "learning_rate": 4.780423559042618e-05, "loss": 0.1855, "step": 12388 }, { "epoch": 0.22097171191096207, "grad_norm": 0.2521204650402069, "learning_rate": 4.780359766871394e-05, "loss": 0.1738, "step": 12389 }, { "epoch": 0.2209895480326758, "grad_norm": 0.26155155897140503, "learning_rate": 4.780295965860707e-05, "loss": 0.2106, "step": 12390 }, { "epoch": 0.22100738415438947, "grad_norm": 0.29395127296447754, "learning_rate": 4.7802321560108064e-05, "loss": 0.1983, "step": 12391 }, { "epoch": 0.22102522027610316, "grad_norm": 0.35063931345939636, "learning_rate": 4.780168337321938e-05, "loss": 0.2406, "step": 12392 }, { "epoch": 0.22104305639781685, "grad_norm": 0.3468126654624939, "learning_rate": 4.78010450979435e-05, "loss": 0.1558, "step": 12393 }, { "epoch": 0.22106089251953057, "grad_norm": 0.3247314989566803, "learning_rate": 4.780040673428289e-05, "loss": 0.1939, "step": 12394 }, { "epoch": 0.22107872864124425, "grad_norm": 0.29034194350242615, "learning_rate": 4.779976828224002e-05, "loss": 0.1906, "step": 12395 }, { "epoch": 0.22109656476295794, "grad_norm": 0.2510344386100769, "learning_rate": 4.779912974181738e-05, "loss": 0.1675, "step": 12396 }, { "epoch": 0.22111440088467163, "grad_norm": 0.2595363259315491, "learning_rate": 4.779849111301744e-05, "loss": 0.1586, "step": 12397 }, { "epoch": 0.22113223700638532, "grad_norm": 0.34143128991127014, "learning_rate": 4.779785239584268e-05, "loss": 0.1739, "step": 12398 }, { "epoch": 0.22115007312809903, "grad_norm": 0.2366911768913269, "learning_rate": 4.779721359029556e-05, "loss": 0.1999, "step": 12399 }, { "epoch": 0.22116790924981272, "grad_norm": 0.37339404225349426, "learning_rate": 4.7796574696378574e-05, "loss": 0.1865, "step": 12400 }, { "epoch": 0.2211857453715264, "grad_norm": 0.3215400278568268, "learning_rate": 4.7795935714094186e-05, "loss": 0.2001, "step": 12401 }, { "epoch": 0.2212035814932401, "grad_norm": 0.21011440455913544, "learning_rate": 4.7795296643444874e-05, "loss": 0.1924, "step": 12402 }, { "epoch": 0.2212214176149538, "grad_norm": 0.33960291743278503, "learning_rate": 4.7794657484433126e-05, "loss": 0.2013, "step": 12403 }, { "epoch": 0.2212392537366675, "grad_norm": 0.2527913749217987, "learning_rate": 4.7794018237061414e-05, "loss": 0.1877, "step": 12404 }, { "epoch": 0.2212570898583812, "grad_norm": 0.22020575404167175, "learning_rate": 4.7793378901332206e-05, "loss": 0.177, "step": 12405 }, { "epoch": 0.22127492598009488, "grad_norm": 0.2744147777557373, "learning_rate": 4.7792739477247996e-05, "loss": 0.1796, "step": 12406 }, { "epoch": 0.2212927621018086, "grad_norm": 0.29843688011169434, "learning_rate": 4.779209996481125e-05, "loss": 0.1583, "step": 12407 }, { "epoch": 0.22131059822352228, "grad_norm": 0.34808576107025146, "learning_rate": 4.779146036402445e-05, "loss": 0.1641, "step": 12408 }, { "epoch": 0.22132843434523597, "grad_norm": 0.3092830777168274, "learning_rate": 4.7790820674890094e-05, "loss": 0.2148, "step": 12409 }, { "epoch": 0.22134627046694966, "grad_norm": 0.3606700301170349, "learning_rate": 4.779018089741063e-05, "loss": 0.2408, "step": 12410 }, { "epoch": 0.22136410658866337, "grad_norm": 0.28641277551651, "learning_rate": 4.778954103158856e-05, "loss": 0.2272, "step": 12411 }, { "epoch": 0.22138194271037706, "grad_norm": 0.2776421904563904, "learning_rate": 4.778890107742635e-05, "loss": 0.2094, "step": 12412 }, { "epoch": 0.22139977883209075, "grad_norm": 0.2324228435754776, "learning_rate": 4.778826103492649e-05, "loss": 0.182, "step": 12413 }, { "epoch": 0.22141761495380444, "grad_norm": 0.27369239926338196, "learning_rate": 4.778762090409147e-05, "loss": 0.19, "step": 12414 }, { "epoch": 0.22143545107551815, "grad_norm": 0.23946613073349, "learning_rate": 4.778698068492376e-05, "loss": 0.1773, "step": 12415 }, { "epoch": 0.22145328719723184, "grad_norm": 0.30583590269088745, "learning_rate": 4.778634037742583e-05, "loss": 0.194, "step": 12416 }, { "epoch": 0.22147112331894553, "grad_norm": 0.2545165419578552, "learning_rate": 4.778569998160018e-05, "loss": 0.1727, "step": 12417 }, { "epoch": 0.22148895944065922, "grad_norm": 0.37317144870758057, "learning_rate": 4.778505949744929e-05, "loss": 0.2441, "step": 12418 }, { "epoch": 0.2215067955623729, "grad_norm": 0.30775630474090576, "learning_rate": 4.778441892497564e-05, "loss": 0.2023, "step": 12419 }, { "epoch": 0.22152463168408662, "grad_norm": 0.24987727403640747, "learning_rate": 4.7783778264181704e-05, "loss": 0.2301, "step": 12420 }, { "epoch": 0.2215424678058003, "grad_norm": 0.2718750834465027, "learning_rate": 4.778313751506998e-05, "loss": 0.2274, "step": 12421 }, { "epoch": 0.221560303927514, "grad_norm": 0.28519824147224426, "learning_rate": 4.7782496677642954e-05, "loss": 0.1384, "step": 12422 }, { "epoch": 0.22157814004922768, "grad_norm": 0.3110770285129547, "learning_rate": 4.7781855751903084e-05, "loss": 0.2168, "step": 12423 }, { "epoch": 0.2215959761709414, "grad_norm": 0.26959535479545593, "learning_rate": 4.778121473785289e-05, "loss": 0.181, "step": 12424 }, { "epoch": 0.2216138122926551, "grad_norm": 0.2882334887981415, "learning_rate": 4.778057363549483e-05, "loss": 0.2345, "step": 12425 }, { "epoch": 0.22163164841436878, "grad_norm": 0.36539745330810547, "learning_rate": 4.77799324448314e-05, "loss": 0.2083, "step": 12426 }, { "epoch": 0.22164948453608246, "grad_norm": 0.3765331506729126, "learning_rate": 4.7779291165865084e-05, "loss": 0.2543, "step": 12427 }, { "epoch": 0.22166732065779618, "grad_norm": 0.34074121713638306, "learning_rate": 4.7778649798598374e-05, "loss": 0.2185, "step": 12428 }, { "epoch": 0.22168515677950987, "grad_norm": 0.28381723165512085, "learning_rate": 4.7778008343033745e-05, "loss": 0.1947, "step": 12429 }, { "epoch": 0.22170299290122356, "grad_norm": 0.2325785756111145, "learning_rate": 4.777736679917368e-05, "loss": 0.1974, "step": 12430 }, { "epoch": 0.22172082902293724, "grad_norm": 0.2748464345932007, "learning_rate": 4.777672516702069e-05, "loss": 0.1967, "step": 12431 }, { "epoch": 0.22173866514465096, "grad_norm": 0.34121984243392944, "learning_rate": 4.7776083446577235e-05, "loss": 0.2096, "step": 12432 }, { "epoch": 0.22175650126636465, "grad_norm": 0.2746466398239136, "learning_rate": 4.777544163784582e-05, "loss": 0.2211, "step": 12433 }, { "epoch": 0.22177433738807834, "grad_norm": 0.3871321380138397, "learning_rate": 4.777479974082893e-05, "loss": 0.1872, "step": 12434 }, { "epoch": 0.22179217350979202, "grad_norm": 0.38841402530670166, "learning_rate": 4.777415775552904e-05, "loss": 0.2271, "step": 12435 }, { "epoch": 0.22181000963150574, "grad_norm": 0.21632587909698486, "learning_rate": 4.7773515681948645e-05, "loss": 0.1863, "step": 12436 }, { "epoch": 0.22182784575321943, "grad_norm": 0.25671523809432983, "learning_rate": 4.777287352009024e-05, "loss": 0.1826, "step": 12437 }, { "epoch": 0.22184568187493311, "grad_norm": 0.31478574872016907, "learning_rate": 4.777223126995633e-05, "loss": 0.2155, "step": 12438 }, { "epoch": 0.2218635179966468, "grad_norm": 0.2308403104543686, "learning_rate": 4.777158893154937e-05, "loss": 0.1729, "step": 12439 }, { "epoch": 0.2218813541183605, "grad_norm": 0.31062284111976624, "learning_rate": 4.777094650487187e-05, "loss": 0.1945, "step": 12440 }, { "epoch": 0.2218991902400742, "grad_norm": 0.2726714015007019, "learning_rate": 4.7770303989926315e-05, "loss": 0.2605, "step": 12441 }, { "epoch": 0.2219170263617879, "grad_norm": 0.3029727339744568, "learning_rate": 4.77696613867152e-05, "loss": 0.1779, "step": 12442 }, { "epoch": 0.22193486248350158, "grad_norm": 0.1870802640914917, "learning_rate": 4.776901869524101e-05, "loss": 0.1698, "step": 12443 }, { "epoch": 0.22195269860521527, "grad_norm": 0.3114982843399048, "learning_rate": 4.776837591550624e-05, "loss": 0.1462, "step": 12444 }, { "epoch": 0.221970534726929, "grad_norm": 0.2691832482814789, "learning_rate": 4.776773304751338e-05, "loss": 0.1868, "step": 12445 }, { "epoch": 0.22198837084864267, "grad_norm": 0.24110494554042816, "learning_rate": 4.776709009126492e-05, "loss": 0.1902, "step": 12446 }, { "epoch": 0.22200620697035636, "grad_norm": 0.38330644369125366, "learning_rate": 4.776644704676336e-05, "loss": 0.2426, "step": 12447 }, { "epoch": 0.22202404309207005, "grad_norm": 0.336843341588974, "learning_rate": 4.7765803914011194e-05, "loss": 0.2182, "step": 12448 }, { "epoch": 0.22204187921378377, "grad_norm": 0.31483957171440125, "learning_rate": 4.77651606930109e-05, "loss": 0.2141, "step": 12449 }, { "epoch": 0.22205971533549745, "grad_norm": 0.30925506353378296, "learning_rate": 4.776451738376499e-05, "loss": 0.1692, "step": 12450 }, { "epoch": 0.22207755145721114, "grad_norm": 0.3509606122970581, "learning_rate": 4.776387398627594e-05, "loss": 0.1632, "step": 12451 }, { "epoch": 0.22209538757892483, "grad_norm": 0.22312790155410767, "learning_rate": 4.776323050054625e-05, "loss": 0.1781, "step": 12452 }, { "epoch": 0.22211322370063855, "grad_norm": 0.2647301256656647, "learning_rate": 4.776258692657842e-05, "loss": 0.2245, "step": 12453 }, { "epoch": 0.22213105982235223, "grad_norm": 0.2997078001499176, "learning_rate": 4.776194326437494e-05, "loss": 0.2456, "step": 12454 }, { "epoch": 0.22214889594406592, "grad_norm": 0.25826555490493774, "learning_rate": 4.776129951393831e-05, "loss": 0.1465, "step": 12455 }, { "epoch": 0.2221667320657796, "grad_norm": 0.24937480688095093, "learning_rate": 4.776065567527102e-05, "loss": 0.1995, "step": 12456 }, { "epoch": 0.2221845681874933, "grad_norm": 0.2960071563720703, "learning_rate": 4.7760011748375564e-05, "loss": 0.1851, "step": 12457 }, { "epoch": 0.222202404309207, "grad_norm": 0.2337069809436798, "learning_rate": 4.775936773325444e-05, "loss": 0.1983, "step": 12458 }, { "epoch": 0.2222202404309207, "grad_norm": 0.23904629051685333, "learning_rate": 4.775872362991015e-05, "loss": 0.2313, "step": 12459 }, { "epoch": 0.2222380765526344, "grad_norm": 0.3666958808898926, "learning_rate": 4.775807943834518e-05, "loss": 0.2248, "step": 12460 }, { "epoch": 0.22225591267434808, "grad_norm": 0.2366989105939865, "learning_rate": 4.775743515856205e-05, "loss": 0.2115, "step": 12461 }, { "epoch": 0.2222737487960618, "grad_norm": 0.24880069494247437, "learning_rate": 4.775679079056323e-05, "loss": 0.2274, "step": 12462 }, { "epoch": 0.22229158491777548, "grad_norm": 0.20895929634571075, "learning_rate": 4.775614633435123e-05, "loss": 0.1992, "step": 12463 }, { "epoch": 0.22230942103948917, "grad_norm": 0.23697924613952637, "learning_rate": 4.7755501789928544e-05, "loss": 0.2289, "step": 12464 }, { "epoch": 0.22232725716120286, "grad_norm": 0.292427122592926, "learning_rate": 4.775485715729767e-05, "loss": 0.2371, "step": 12465 }, { "epoch": 0.22234509328291657, "grad_norm": 0.28915947675704956, "learning_rate": 4.775421243646112e-05, "loss": 0.2467, "step": 12466 }, { "epoch": 0.22236292940463026, "grad_norm": 0.4016871750354767, "learning_rate": 4.775356762742138e-05, "loss": 0.2503, "step": 12467 }, { "epoch": 0.22238076552634395, "grad_norm": 0.301969438791275, "learning_rate": 4.775292273018095e-05, "loss": 0.1695, "step": 12468 }, { "epoch": 0.22239860164805764, "grad_norm": 0.2205217480659485, "learning_rate": 4.775227774474234e-05, "loss": 0.1614, "step": 12469 }, { "epoch": 0.22241643776977135, "grad_norm": 0.30096668004989624, "learning_rate": 4.775163267110804e-05, "loss": 0.2206, "step": 12470 }, { "epoch": 0.22243427389148504, "grad_norm": 0.29696354269981384, "learning_rate": 4.7750987509280554e-05, "loss": 0.2207, "step": 12471 }, { "epoch": 0.22245211001319873, "grad_norm": 0.22098317742347717, "learning_rate": 4.775034225926238e-05, "loss": 0.19, "step": 12472 }, { "epoch": 0.22246994613491242, "grad_norm": 0.26950424909591675, "learning_rate": 4.774969692105602e-05, "loss": 0.1691, "step": 12473 }, { "epoch": 0.22248778225662613, "grad_norm": 0.33573946356773376, "learning_rate": 4.774905149466398e-05, "loss": 0.2512, "step": 12474 }, { "epoch": 0.22250561837833982, "grad_norm": 0.22495293617248535, "learning_rate": 4.7748405980088764e-05, "loss": 0.182, "step": 12475 }, { "epoch": 0.2225234545000535, "grad_norm": 0.4752029776573181, "learning_rate": 4.7747760377332864e-05, "loss": 0.1851, "step": 12476 }, { "epoch": 0.2225412906217672, "grad_norm": 0.2621522843837738, "learning_rate": 4.774711468639879e-05, "loss": 0.2193, "step": 12477 }, { "epoch": 0.22255912674348088, "grad_norm": 0.19988669455051422, "learning_rate": 4.7746468907289035e-05, "loss": 0.1945, "step": 12478 }, { "epoch": 0.2225769628651946, "grad_norm": 0.2635309398174286, "learning_rate": 4.774582304000612e-05, "loss": 0.2111, "step": 12479 }, { "epoch": 0.2225947989869083, "grad_norm": 0.2650481164455414, "learning_rate": 4.774517708455254e-05, "loss": 0.162, "step": 12480 }, { "epoch": 0.22261263510862198, "grad_norm": 0.24110287427902222, "learning_rate": 4.774453104093079e-05, "loss": 0.2034, "step": 12481 }, { "epoch": 0.22263047123033566, "grad_norm": 0.22477370500564575, "learning_rate": 4.774388490914339e-05, "loss": 0.1907, "step": 12482 }, { "epoch": 0.22264830735204938, "grad_norm": 0.26015955209732056, "learning_rate": 4.774323868919283e-05, "loss": 0.1822, "step": 12483 }, { "epoch": 0.22266614347376307, "grad_norm": 0.26121658086776733, "learning_rate": 4.774259238108162e-05, "loss": 0.1732, "step": 12484 }, { "epoch": 0.22268397959547676, "grad_norm": 0.23913203179836273, "learning_rate": 4.774194598481227e-05, "loss": 0.2201, "step": 12485 }, { "epoch": 0.22270181571719044, "grad_norm": 0.3138361871242523, "learning_rate": 4.774129950038728e-05, "loss": 0.1932, "step": 12486 }, { "epoch": 0.22271965183890416, "grad_norm": 0.30828264355659485, "learning_rate": 4.774065292780916e-05, "loss": 0.2261, "step": 12487 }, { "epoch": 0.22273748796061785, "grad_norm": 0.30259039998054504, "learning_rate": 4.7740006267080415e-05, "loss": 0.1323, "step": 12488 }, { "epoch": 0.22275532408233154, "grad_norm": 0.2695466876029968, "learning_rate": 4.7739359518203556e-05, "loss": 0.2086, "step": 12489 }, { "epoch": 0.22277316020404522, "grad_norm": 0.2501279413700104, "learning_rate": 4.773871268118109e-05, "loss": 0.2003, "step": 12490 }, { "epoch": 0.22279099632575894, "grad_norm": 0.27336621284484863, "learning_rate": 4.7738065756015504e-05, "loss": 0.2233, "step": 12491 }, { "epoch": 0.22280883244747263, "grad_norm": 0.2427319586277008, "learning_rate": 4.773741874270933e-05, "loss": 0.1901, "step": 12492 }, { "epoch": 0.22282666856918631, "grad_norm": 0.2651156187057495, "learning_rate": 4.773677164126507e-05, "loss": 0.1881, "step": 12493 }, { "epoch": 0.2228445046909, "grad_norm": 0.26140034198760986, "learning_rate": 4.773612445168523e-05, "loss": 0.1733, "step": 12494 }, { "epoch": 0.22286234081261372, "grad_norm": 0.30591756105422974, "learning_rate": 4.7735477173972315e-05, "loss": 0.1733, "step": 12495 }, { "epoch": 0.2228801769343274, "grad_norm": 0.2474837601184845, "learning_rate": 4.773482980812884e-05, "loss": 0.1775, "step": 12496 }, { "epoch": 0.2228980130560411, "grad_norm": 0.30363044142723083, "learning_rate": 4.773418235415731e-05, "loss": 0.1746, "step": 12497 }, { "epoch": 0.22291584917775478, "grad_norm": 0.2405654489994049, "learning_rate": 4.773353481206024e-05, "loss": 0.1915, "step": 12498 }, { "epoch": 0.22293368529946847, "grad_norm": 0.2663644850254059, "learning_rate": 4.7732887181840135e-05, "loss": 0.2052, "step": 12499 }, { "epoch": 0.22295152142118219, "grad_norm": 0.22968687117099762, "learning_rate": 4.773223946349951e-05, "loss": 0.1872, "step": 12500 }, { "epoch": 0.22296935754289587, "grad_norm": 0.2624761462211609, "learning_rate": 4.7731591657040866e-05, "loss": 0.1883, "step": 12501 }, { "epoch": 0.22298719366460956, "grad_norm": 0.23904605209827423, "learning_rate": 4.773094376246673e-05, "loss": 0.1906, "step": 12502 }, { "epoch": 0.22300502978632325, "grad_norm": 0.2636966407299042, "learning_rate": 4.773029577977961e-05, "loss": 0.193, "step": 12503 }, { "epoch": 0.22302286590803697, "grad_norm": 0.33867472410202026, "learning_rate": 4.772964770898199e-05, "loss": 0.1561, "step": 12504 }, { "epoch": 0.22304070202975065, "grad_norm": 0.3870651423931122, "learning_rate": 4.7728999550076424e-05, "loss": 0.1675, "step": 12505 }, { "epoch": 0.22305853815146434, "grad_norm": 0.34564855694770813, "learning_rate": 4.772835130306541e-05, "loss": 0.1897, "step": 12506 }, { "epoch": 0.22307637427317803, "grad_norm": 0.19082261621952057, "learning_rate": 4.772770296795144e-05, "loss": 0.1563, "step": 12507 }, { "epoch": 0.22309421039489175, "grad_norm": 0.32542216777801514, "learning_rate": 4.772705454473705e-05, "loss": 0.2012, "step": 12508 }, { "epoch": 0.22311204651660543, "grad_norm": 0.19786059856414795, "learning_rate": 4.7726406033424754e-05, "loss": 0.1493, "step": 12509 }, { "epoch": 0.22312988263831912, "grad_norm": 0.30620458722114563, "learning_rate": 4.772575743401705e-05, "loss": 0.2117, "step": 12510 }, { "epoch": 0.2231477187600328, "grad_norm": 0.22794589400291443, "learning_rate": 4.7725108746516466e-05, "loss": 0.1536, "step": 12511 }, { "epoch": 0.22316555488174653, "grad_norm": 0.27520862221717834, "learning_rate": 4.7724459970925503e-05, "loss": 0.1601, "step": 12512 }, { "epoch": 0.2231833910034602, "grad_norm": 0.27101796865463257, "learning_rate": 4.77238111072467e-05, "loss": 0.1833, "step": 12513 }, { "epoch": 0.2232012271251739, "grad_norm": 0.24299657344818115, "learning_rate": 4.772316215548254e-05, "loss": 0.1955, "step": 12514 }, { "epoch": 0.2232190632468876, "grad_norm": 0.28873303532600403, "learning_rate": 4.772251311563557e-05, "loss": 0.2151, "step": 12515 }, { "epoch": 0.2232368993686013, "grad_norm": 0.2779642343521118, "learning_rate": 4.772186398770828e-05, "loss": 0.2124, "step": 12516 }, { "epoch": 0.223254735490315, "grad_norm": 0.32345345616340637, "learning_rate": 4.77212147717032e-05, "loss": 0.2303, "step": 12517 }, { "epoch": 0.22327257161202868, "grad_norm": 0.2563976049423218, "learning_rate": 4.7720565467622844e-05, "loss": 0.1996, "step": 12518 }, { "epoch": 0.22329040773374237, "grad_norm": 0.4193630814552307, "learning_rate": 4.771991607546973e-05, "loss": 0.1548, "step": 12519 }, { "epoch": 0.22330824385545606, "grad_norm": 0.2679010033607483, "learning_rate": 4.771926659524637e-05, "loss": 0.2244, "step": 12520 }, { "epoch": 0.22332607997716977, "grad_norm": 0.2944014072418213, "learning_rate": 4.771861702695529e-05, "loss": 0.1156, "step": 12521 }, { "epoch": 0.22334391609888346, "grad_norm": 0.2515939176082611, "learning_rate": 4.7717967370599e-05, "loss": 0.1958, "step": 12522 }, { "epoch": 0.22336175222059715, "grad_norm": 0.25248903036117554, "learning_rate": 4.771731762618003e-05, "loss": 0.1916, "step": 12523 }, { "epoch": 0.22337958834231084, "grad_norm": 0.2968897521495819, "learning_rate": 4.7716667793700886e-05, "loss": 0.2102, "step": 12524 }, { "epoch": 0.22339742446402455, "grad_norm": 0.31653228402137756, "learning_rate": 4.7716017873164085e-05, "loss": 0.2381, "step": 12525 }, { "epoch": 0.22341526058573824, "grad_norm": 0.28874629735946655, "learning_rate": 4.7715367864572164e-05, "loss": 0.1976, "step": 12526 }, { "epoch": 0.22343309670745193, "grad_norm": 0.2989388108253479, "learning_rate": 4.7714717767927624e-05, "loss": 0.1361, "step": 12527 }, { "epoch": 0.22345093282916562, "grad_norm": 0.2818071246147156, "learning_rate": 4.7714067583232993e-05, "loss": 0.1458, "step": 12528 }, { "epoch": 0.22346876895087933, "grad_norm": 0.3405362665653229, "learning_rate": 4.771341731049079e-05, "loss": 0.2437, "step": 12529 }, { "epoch": 0.22348660507259302, "grad_norm": 0.227031409740448, "learning_rate": 4.771276694970355e-05, "loss": 0.1537, "step": 12530 }, { "epoch": 0.2235044411943067, "grad_norm": 0.21507854759693146, "learning_rate": 4.771211650087377e-05, "loss": 0.2011, "step": 12531 }, { "epoch": 0.2235222773160204, "grad_norm": 0.28749793767929077, "learning_rate": 4.771146596400398e-05, "loss": 0.2227, "step": 12532 }, { "epoch": 0.2235401134377341, "grad_norm": 0.20286786556243896, "learning_rate": 4.77108153390967e-05, "loss": 0.1821, "step": 12533 }, { "epoch": 0.2235579495594478, "grad_norm": 0.2581403851509094, "learning_rate": 4.771016462615446e-05, "loss": 0.191, "step": 12534 }, { "epoch": 0.2235757856811615, "grad_norm": 0.2919760048389435, "learning_rate": 4.7709513825179785e-05, "loss": 0.2362, "step": 12535 }, { "epoch": 0.22359362180287518, "grad_norm": 0.25975555181503296, "learning_rate": 4.770886293617518e-05, "loss": 0.2548, "step": 12536 }, { "epoch": 0.2236114579245889, "grad_norm": 0.3117729723453522, "learning_rate": 4.770821195914319e-05, "loss": 0.2579, "step": 12537 }, { "epoch": 0.22362929404630258, "grad_norm": 0.20171460509300232, "learning_rate": 4.770756089408632e-05, "loss": 0.1327, "step": 12538 }, { "epoch": 0.22364713016801627, "grad_norm": 0.2525087893009186, "learning_rate": 4.77069097410071e-05, "loss": 0.1552, "step": 12539 }, { "epoch": 0.22366496628972996, "grad_norm": 0.3625781238079071, "learning_rate": 4.770625849990806e-05, "loss": 0.2671, "step": 12540 }, { "epoch": 0.22368280241144364, "grad_norm": 0.3005658686161041, "learning_rate": 4.770560717079171e-05, "loss": 0.1834, "step": 12541 }, { "epoch": 0.22370063853315736, "grad_norm": 0.23191936314105988, "learning_rate": 4.77049557536606e-05, "loss": 0.1706, "step": 12542 }, { "epoch": 0.22371847465487105, "grad_norm": 0.316008597612381, "learning_rate": 4.7704304248517225e-05, "loss": 0.183, "step": 12543 }, { "epoch": 0.22373631077658473, "grad_norm": 0.27352115511894226, "learning_rate": 4.770365265536413e-05, "loss": 0.1931, "step": 12544 }, { "epoch": 0.22375414689829842, "grad_norm": 0.28434449434280396, "learning_rate": 4.770300097420384e-05, "loss": 0.2155, "step": 12545 }, { "epoch": 0.22377198302001214, "grad_norm": 0.2685566246509552, "learning_rate": 4.770234920503887e-05, "loss": 0.1824, "step": 12546 }, { "epoch": 0.22378981914172583, "grad_norm": 0.25530388951301575, "learning_rate": 4.770169734787175e-05, "loss": 0.1808, "step": 12547 }, { "epoch": 0.22380765526343951, "grad_norm": 0.21423391997814178, "learning_rate": 4.770104540270502e-05, "loss": 0.1425, "step": 12548 }, { "epoch": 0.2238254913851532, "grad_norm": 0.2734600305557251, "learning_rate": 4.77003933695412e-05, "loss": 0.191, "step": 12549 }, { "epoch": 0.22384332750686692, "grad_norm": 0.2774091958999634, "learning_rate": 4.769974124838281e-05, "loss": 0.1139, "step": 12550 }, { "epoch": 0.2238611636285806, "grad_norm": 0.2846418619155884, "learning_rate": 4.769908903923238e-05, "loss": 0.1406, "step": 12551 }, { "epoch": 0.2238789997502943, "grad_norm": 0.25708311796188354, "learning_rate": 4.769843674209244e-05, "loss": 0.1881, "step": 12552 }, { "epoch": 0.22389683587200798, "grad_norm": 0.2455696016550064, "learning_rate": 4.769778435696552e-05, "loss": 0.1705, "step": 12553 }, { "epoch": 0.2239146719937217, "grad_norm": 0.17797338962554932, "learning_rate": 4.769713188385415e-05, "loss": 0.1583, "step": 12554 }, { "epoch": 0.22393250811543539, "grad_norm": 0.23763418197631836, "learning_rate": 4.769647932276086e-05, "loss": 0.1589, "step": 12555 }, { "epoch": 0.22395034423714907, "grad_norm": 0.30871322751045227, "learning_rate": 4.769582667368817e-05, "loss": 0.2244, "step": 12556 }, { "epoch": 0.22396818035886276, "grad_norm": 0.25054746866226196, "learning_rate": 4.769517393663863e-05, "loss": 0.2029, "step": 12557 }, { "epoch": 0.22398601648057645, "grad_norm": 0.24173471331596375, "learning_rate": 4.7694521111614744e-05, "loss": 0.1945, "step": 12558 }, { "epoch": 0.22400385260229017, "grad_norm": 0.29892677068710327, "learning_rate": 4.769386819861906e-05, "loss": 0.2288, "step": 12559 }, { "epoch": 0.22402168872400385, "grad_norm": 0.22668752074241638, "learning_rate": 4.769321519765411e-05, "loss": 0.2058, "step": 12560 }, { "epoch": 0.22403952484571754, "grad_norm": 0.22964948415756226, "learning_rate": 4.7692562108722406e-05, "loss": 0.1918, "step": 12561 }, { "epoch": 0.22405736096743123, "grad_norm": 0.2790120840072632, "learning_rate": 4.76919089318265e-05, "loss": 0.1506, "step": 12562 }, { "epoch": 0.22407519708914495, "grad_norm": 0.25331994891166687, "learning_rate": 4.769125566696893e-05, "loss": 0.1688, "step": 12563 }, { "epoch": 0.22409303321085863, "grad_norm": 0.3228544592857361, "learning_rate": 4.76906023141522e-05, "loss": 0.2381, "step": 12564 }, { "epoch": 0.22411086933257232, "grad_norm": 0.3493567109107971, "learning_rate": 4.768994887337887e-05, "loss": 0.191, "step": 12565 }, { "epoch": 0.224128705454286, "grad_norm": 0.2573733925819397, "learning_rate": 4.768929534465145e-05, "loss": 0.2453, "step": 12566 }, { "epoch": 0.22414654157599972, "grad_norm": 0.26742616295814514, "learning_rate": 4.768864172797249e-05, "loss": 0.1768, "step": 12567 }, { "epoch": 0.2241643776977134, "grad_norm": 0.2692878246307373, "learning_rate": 4.768798802334452e-05, "loss": 0.1718, "step": 12568 }, { "epoch": 0.2241822138194271, "grad_norm": 0.31958574056625366, "learning_rate": 4.768733423077007e-05, "loss": 0.2664, "step": 12569 }, { "epoch": 0.2242000499411408, "grad_norm": 0.20087982714176178, "learning_rate": 4.768668035025168e-05, "loss": 0.1579, "step": 12570 }, { "epoch": 0.2242178860628545, "grad_norm": 0.2572091817855835, "learning_rate": 4.768602638179187e-05, "loss": 0.1833, "step": 12571 }, { "epoch": 0.2242357221845682, "grad_norm": 0.23120689392089844, "learning_rate": 4.7685372325393195e-05, "loss": 0.1457, "step": 12572 }, { "epoch": 0.22425355830628188, "grad_norm": 0.2992466390132904, "learning_rate": 4.768471818105819e-05, "loss": 0.229, "step": 12573 }, { "epoch": 0.22427139442799557, "grad_norm": 0.3062695860862732, "learning_rate": 4.7684063948789365e-05, "loss": 0.2456, "step": 12574 }, { "epoch": 0.22428923054970928, "grad_norm": 0.35636234283447266, "learning_rate": 4.768340962858928e-05, "loss": 0.1896, "step": 12575 }, { "epoch": 0.22430706667142297, "grad_norm": 0.25400036573410034, "learning_rate": 4.768275522046047e-05, "loss": 0.1699, "step": 12576 }, { "epoch": 0.22432490279313666, "grad_norm": 0.47366049885749817, "learning_rate": 4.768210072440546e-05, "loss": 0.189, "step": 12577 }, { "epoch": 0.22434273891485035, "grad_norm": 0.21881255507469177, "learning_rate": 4.76814461404268e-05, "loss": 0.1683, "step": 12578 }, { "epoch": 0.22436057503656404, "grad_norm": 0.29892686009407043, "learning_rate": 4.768079146852701e-05, "loss": 0.1935, "step": 12579 }, { "epoch": 0.22437841115827775, "grad_norm": 0.3141668140888214, "learning_rate": 4.768013670870865e-05, "loss": 0.2034, "step": 12580 }, { "epoch": 0.22439624727999144, "grad_norm": 0.36283034086227417, "learning_rate": 4.767948186097424e-05, "loss": 0.241, "step": 12581 }, { "epoch": 0.22441408340170513, "grad_norm": 0.2098165899515152, "learning_rate": 4.767882692532632e-05, "loss": 0.168, "step": 12582 }, { "epoch": 0.22443191952341882, "grad_norm": 0.2089463323354721, "learning_rate": 4.767817190176745e-05, "loss": 0.1973, "step": 12583 }, { "epoch": 0.22444975564513253, "grad_norm": 0.2082255780696869, "learning_rate": 4.767751679030015e-05, "loss": 0.1568, "step": 12584 }, { "epoch": 0.22446759176684622, "grad_norm": 0.18804092705249786, "learning_rate": 4.767686159092695e-05, "loss": 0.1833, "step": 12585 }, { "epoch": 0.2244854278885599, "grad_norm": 0.2836175560951233, "learning_rate": 4.767620630365041e-05, "loss": 0.1972, "step": 12586 }, { "epoch": 0.2245032640102736, "grad_norm": 0.28468000888824463, "learning_rate": 4.7675550928473056e-05, "loss": 0.2064, "step": 12587 }, { "epoch": 0.2245211001319873, "grad_norm": 0.25813114643096924, "learning_rate": 4.767489546539744e-05, "loss": 0.1701, "step": 12588 }, { "epoch": 0.224538936253701, "grad_norm": 0.24302178621292114, "learning_rate": 4.76742399144261e-05, "loss": 0.1777, "step": 12589 }, { "epoch": 0.2245567723754147, "grad_norm": 0.4876336455345154, "learning_rate": 4.767358427556157e-05, "loss": 0.1935, "step": 12590 }, { "epoch": 0.22457460849712838, "grad_norm": 0.2273038625717163, "learning_rate": 4.7672928548806394e-05, "loss": 0.1488, "step": 12591 }, { "epoch": 0.2245924446188421, "grad_norm": 0.26203829050064087, "learning_rate": 4.767227273416313e-05, "loss": 0.204, "step": 12592 }, { "epoch": 0.22461028074055578, "grad_norm": 0.2577301561832428, "learning_rate": 4.767161683163429e-05, "loss": 0.1955, "step": 12593 }, { "epoch": 0.22462811686226947, "grad_norm": 0.3304455280303955, "learning_rate": 4.767096084122244e-05, "loss": 0.2291, "step": 12594 }, { "epoch": 0.22464595298398315, "grad_norm": 0.20761314034461975, "learning_rate": 4.767030476293011e-05, "loss": 0.1697, "step": 12595 }, { "epoch": 0.22466378910569687, "grad_norm": 0.32202431559562683, "learning_rate": 4.766964859675985e-05, "loss": 0.2, "step": 12596 }, { "epoch": 0.22468162522741056, "grad_norm": 0.32054996490478516, "learning_rate": 4.766899234271421e-05, "loss": 0.1984, "step": 12597 }, { "epoch": 0.22469946134912425, "grad_norm": 0.22482392191886902, "learning_rate": 4.766833600079572e-05, "loss": 0.1116, "step": 12598 }, { "epoch": 0.22471729747083793, "grad_norm": 0.2975764572620392, "learning_rate": 4.7667679571006926e-05, "loss": 0.1901, "step": 12599 }, { "epoch": 0.22473513359255162, "grad_norm": 0.2900422513484955, "learning_rate": 4.766702305335039e-05, "loss": 0.2051, "step": 12600 }, { "epoch": 0.22475296971426534, "grad_norm": 0.23242852091789246, "learning_rate": 4.766636644782864e-05, "loss": 0.2115, "step": 12601 }, { "epoch": 0.22477080583597903, "grad_norm": 0.3072696626186371, "learning_rate": 4.7665709754444214e-05, "loss": 0.1842, "step": 12602 }, { "epoch": 0.22478864195769271, "grad_norm": 0.20726057887077332, "learning_rate": 4.766505297319968e-05, "loss": 0.2156, "step": 12603 }, { "epoch": 0.2248064780794064, "grad_norm": 0.2729635536670685, "learning_rate": 4.7664396104097564e-05, "loss": 0.163, "step": 12604 }, { "epoch": 0.22482431420112012, "grad_norm": 0.3674893081188202, "learning_rate": 4.766373914714043e-05, "loss": 0.2338, "step": 12605 }, { "epoch": 0.2248421503228338, "grad_norm": 0.3503851592540741, "learning_rate": 4.76630821023308e-05, "loss": 0.1877, "step": 12606 }, { "epoch": 0.2248599864445475, "grad_norm": 0.3252289891242981, "learning_rate": 4.766242496967125e-05, "loss": 0.1552, "step": 12607 }, { "epoch": 0.22487782256626118, "grad_norm": 0.2753390669822693, "learning_rate": 4.766176774916431e-05, "loss": 0.2123, "step": 12608 }, { "epoch": 0.2248956586879749, "grad_norm": 0.20343218743801117, "learning_rate": 4.766111044081253e-05, "loss": 0.1371, "step": 12609 }, { "epoch": 0.22491349480968859, "grad_norm": 0.2973015606403351, "learning_rate": 4.766045304461846e-05, "loss": 0.1767, "step": 12610 }, { "epoch": 0.22493133093140227, "grad_norm": 0.24229112267494202, "learning_rate": 4.765979556058464e-05, "loss": 0.2021, "step": 12611 }, { "epoch": 0.22494916705311596, "grad_norm": 0.22636985778808594, "learning_rate": 4.7659137988713635e-05, "loss": 0.1984, "step": 12612 }, { "epoch": 0.22496700317482968, "grad_norm": 0.3754446506500244, "learning_rate": 4.765848032900798e-05, "loss": 0.2447, "step": 12613 }, { "epoch": 0.22498483929654337, "grad_norm": 0.30979442596435547, "learning_rate": 4.765782258147023e-05, "loss": 0.2429, "step": 12614 }, { "epoch": 0.22500267541825705, "grad_norm": 0.35259950160980225, "learning_rate": 4.7657164746102944e-05, "loss": 0.1906, "step": 12615 }, { "epoch": 0.22502051153997074, "grad_norm": 0.3277779221534729, "learning_rate": 4.765650682290865e-05, "loss": 0.1577, "step": 12616 }, { "epoch": 0.22503834766168446, "grad_norm": 0.25805234909057617, "learning_rate": 4.765584881188991e-05, "loss": 0.2183, "step": 12617 }, { "epoch": 0.22505618378339814, "grad_norm": 0.3252462148666382, "learning_rate": 4.765519071304928e-05, "loss": 0.1116, "step": 12618 }, { "epoch": 0.22507401990511183, "grad_norm": 0.39523595571517944, "learning_rate": 4.7654532526389306e-05, "loss": 0.2342, "step": 12619 }, { "epoch": 0.22509185602682552, "grad_norm": 0.25782206654548645, "learning_rate": 4.765387425191254e-05, "loss": 0.1651, "step": 12620 }, { "epoch": 0.2251096921485392, "grad_norm": 0.22414354979991913, "learning_rate": 4.765321588962153e-05, "loss": 0.1737, "step": 12621 }, { "epoch": 0.22512752827025292, "grad_norm": 0.29870933294296265, "learning_rate": 4.765255743951883e-05, "loss": 0.2362, "step": 12622 }, { "epoch": 0.2251453643919666, "grad_norm": 0.23767724633216858, "learning_rate": 4.7651898901606994e-05, "loss": 0.2038, "step": 12623 }, { "epoch": 0.2251632005136803, "grad_norm": 0.23248374462127686, "learning_rate": 4.765124027588858e-05, "loss": 0.1979, "step": 12624 }, { "epoch": 0.225181036635394, "grad_norm": 0.2406260371208191, "learning_rate": 4.765058156236613e-05, "loss": 0.1775, "step": 12625 }, { "epoch": 0.2251988727571077, "grad_norm": 0.23524776101112366, "learning_rate": 4.76499227610422e-05, "loss": 0.1654, "step": 12626 }, { "epoch": 0.2252167088788214, "grad_norm": 0.25309523940086365, "learning_rate": 4.7649263871919355e-05, "loss": 0.183, "step": 12627 }, { "epoch": 0.22523454500053508, "grad_norm": 0.22781570255756378, "learning_rate": 4.764860489500014e-05, "loss": 0.2338, "step": 12628 }, { "epoch": 0.22525238112224877, "grad_norm": 0.3151392936706543, "learning_rate": 4.76479458302871e-05, "loss": 0.2314, "step": 12629 }, { "epoch": 0.22527021724396248, "grad_norm": 0.1762322336435318, "learning_rate": 4.7647286677782803e-05, "loss": 0.1768, "step": 12630 }, { "epoch": 0.22528805336567617, "grad_norm": 0.24711892008781433, "learning_rate": 4.76466274374898e-05, "loss": 0.1755, "step": 12631 }, { "epoch": 0.22530588948738986, "grad_norm": 0.27915292978286743, "learning_rate": 4.764596810941065e-05, "loss": 0.2159, "step": 12632 }, { "epoch": 0.22532372560910355, "grad_norm": 0.2858349680900574, "learning_rate": 4.7645308693547905e-05, "loss": 0.1921, "step": 12633 }, { "epoch": 0.22534156173081726, "grad_norm": 0.251296728849411, "learning_rate": 4.7644649189904125e-05, "loss": 0.242, "step": 12634 }, { "epoch": 0.22535939785253095, "grad_norm": 0.2733573019504547, "learning_rate": 4.7643989598481866e-05, "loss": 0.1767, "step": 12635 }, { "epoch": 0.22537723397424464, "grad_norm": 0.3312844932079315, "learning_rate": 4.7643329919283676e-05, "loss": 0.2093, "step": 12636 }, { "epoch": 0.22539507009595833, "grad_norm": 0.3516193926334381, "learning_rate": 4.764267015231212e-05, "loss": 0.2128, "step": 12637 }, { "epoch": 0.22541290621767202, "grad_norm": 0.24420495331287384, "learning_rate": 4.764201029756975e-05, "loss": 0.1676, "step": 12638 }, { "epoch": 0.22543074233938573, "grad_norm": 0.22111408412456512, "learning_rate": 4.764135035505913e-05, "loss": 0.1143, "step": 12639 }, { "epoch": 0.22544857846109942, "grad_norm": 0.38935935497283936, "learning_rate": 4.764069032478282e-05, "loss": 0.3068, "step": 12640 }, { "epoch": 0.2254664145828131, "grad_norm": 0.3000342547893524, "learning_rate": 4.764003020674337e-05, "loss": 0.1927, "step": 12641 }, { "epoch": 0.2254842507045268, "grad_norm": 0.32595810294151306, "learning_rate": 4.7639370000943345e-05, "loss": 0.1914, "step": 12642 }, { "epoch": 0.2255020868262405, "grad_norm": 0.344596266746521, "learning_rate": 4.76387097073853e-05, "loss": 0.1982, "step": 12643 }, { "epoch": 0.2255199229479542, "grad_norm": 0.22488991916179657, "learning_rate": 4.7638049326071805e-05, "loss": 0.202, "step": 12644 }, { "epoch": 0.2255377590696679, "grad_norm": 0.23531319200992584, "learning_rate": 4.7637388857005404e-05, "loss": 0.182, "step": 12645 }, { "epoch": 0.22555559519138157, "grad_norm": 0.32740113139152527, "learning_rate": 4.7636728300188674e-05, "loss": 0.174, "step": 12646 }, { "epoch": 0.2255734313130953, "grad_norm": 0.219620943069458, "learning_rate": 4.7636067655624154e-05, "loss": 0.1723, "step": 12647 }, { "epoch": 0.22559126743480898, "grad_norm": 0.217381089925766, "learning_rate": 4.763540692331443e-05, "loss": 0.1572, "step": 12648 }, { "epoch": 0.22560910355652267, "grad_norm": 0.271856427192688, "learning_rate": 4.763474610326204e-05, "loss": 0.2022, "step": 12649 }, { "epoch": 0.22562693967823635, "grad_norm": 0.29291120171546936, "learning_rate": 4.763408519546956e-05, "loss": 0.2298, "step": 12650 }, { "epoch": 0.22564477579995007, "grad_norm": 0.2949700355529785, "learning_rate": 4.7633424199939555e-05, "loss": 0.2076, "step": 12651 }, { "epoch": 0.22566261192166376, "grad_norm": 0.26310575008392334, "learning_rate": 4.7632763116674575e-05, "loss": 0.1599, "step": 12652 }, { "epoch": 0.22568044804337745, "grad_norm": 0.2513583302497864, "learning_rate": 4.763210194567719e-05, "loss": 0.1971, "step": 12653 }, { "epoch": 0.22569828416509113, "grad_norm": 0.2546949088573456, "learning_rate": 4.763144068694995e-05, "loss": 0.2049, "step": 12654 }, { "epoch": 0.22571612028680485, "grad_norm": 0.27113422751426697, "learning_rate": 4.763077934049544e-05, "loss": 0.1984, "step": 12655 }, { "epoch": 0.22573395640851854, "grad_norm": 0.2423454374074936, "learning_rate": 4.763011790631621e-05, "loss": 0.2333, "step": 12656 }, { "epoch": 0.22575179253023223, "grad_norm": 0.31101346015930176, "learning_rate": 4.7629456384414826e-05, "loss": 0.2029, "step": 12657 }, { "epoch": 0.22576962865194591, "grad_norm": 0.3214392066001892, "learning_rate": 4.7628794774793855e-05, "loss": 0.2725, "step": 12658 }, { "epoch": 0.2257874647736596, "grad_norm": 0.2528858482837677, "learning_rate": 4.762813307745586e-05, "loss": 0.1879, "step": 12659 }, { "epoch": 0.22580530089537332, "grad_norm": 0.2589768171310425, "learning_rate": 4.76274712924034e-05, "loss": 0.18, "step": 12660 }, { "epoch": 0.225823137017087, "grad_norm": 0.3322522044181824, "learning_rate": 4.7626809419639056e-05, "loss": 0.2285, "step": 12661 }, { "epoch": 0.2258409731388007, "grad_norm": 0.3407336175441742, "learning_rate": 4.762614745916538e-05, "loss": 0.2062, "step": 12662 }, { "epoch": 0.22585880926051438, "grad_norm": 0.25700899958610535, "learning_rate": 4.762548541098494e-05, "loss": 0.1855, "step": 12663 }, { "epoch": 0.2258766453822281, "grad_norm": 0.2184104025363922, "learning_rate": 4.7624823275100304e-05, "loss": 0.201, "step": 12664 }, { "epoch": 0.22589448150394179, "grad_norm": 0.22487549483776093, "learning_rate": 4.762416105151404e-05, "loss": 0.2114, "step": 12665 }, { "epoch": 0.22591231762565547, "grad_norm": 0.28850653767585754, "learning_rate": 4.762349874022871e-05, "loss": 0.2195, "step": 12666 }, { "epoch": 0.22593015374736916, "grad_norm": 0.22642260789871216, "learning_rate": 4.7622836341246894e-05, "loss": 0.1728, "step": 12667 }, { "epoch": 0.22594798986908288, "grad_norm": 0.2599693834781647, "learning_rate": 4.762217385457114e-05, "loss": 0.2182, "step": 12668 }, { "epoch": 0.22596582599079656, "grad_norm": 0.2706993818283081, "learning_rate": 4.762151128020404e-05, "loss": 0.2161, "step": 12669 }, { "epoch": 0.22598366211251025, "grad_norm": 0.16822826862335205, "learning_rate": 4.7620848618148126e-05, "loss": 0.1426, "step": 12670 }, { "epoch": 0.22600149823422394, "grad_norm": 0.19447705149650574, "learning_rate": 4.762018586840601e-05, "loss": 0.1589, "step": 12671 }, { "epoch": 0.22601933435593766, "grad_norm": 0.2862134575843811, "learning_rate": 4.761952303098023e-05, "loss": 0.1942, "step": 12672 }, { "epoch": 0.22603717047765134, "grad_norm": 0.2801419198513031, "learning_rate": 4.7618860105873375e-05, "loss": 0.181, "step": 12673 }, { "epoch": 0.22605500659936503, "grad_norm": 0.2613162100315094, "learning_rate": 4.7618197093088e-05, "loss": 0.1955, "step": 12674 }, { "epoch": 0.22607284272107872, "grad_norm": 0.24245457351207733, "learning_rate": 4.761753399262668e-05, "loss": 0.2491, "step": 12675 }, { "epoch": 0.22609067884279244, "grad_norm": 0.3227483630180359, "learning_rate": 4.761687080449199e-05, "loss": 0.2287, "step": 12676 }, { "epoch": 0.22610851496450612, "grad_norm": 0.41901516914367676, "learning_rate": 4.7616207528686496e-05, "loss": 0.2599, "step": 12677 }, { "epoch": 0.2261263510862198, "grad_norm": 0.2581169605255127, "learning_rate": 4.761554416521278e-05, "loss": 0.2277, "step": 12678 }, { "epoch": 0.2261441872079335, "grad_norm": 0.24728922545909882, "learning_rate": 4.761488071407339e-05, "loss": 0.1854, "step": 12679 }, { "epoch": 0.2261620233296472, "grad_norm": 0.25558969378471375, "learning_rate": 4.761421717527091e-05, "loss": 0.1577, "step": 12680 }, { "epoch": 0.2261798594513609, "grad_norm": 0.28904178738594055, "learning_rate": 4.761355354880792e-05, "loss": 0.1776, "step": 12681 }, { "epoch": 0.2261976955730746, "grad_norm": 0.27921637892723083, "learning_rate": 4.761288983468699e-05, "loss": 0.165, "step": 12682 }, { "epoch": 0.22621553169478828, "grad_norm": 0.24143558740615845, "learning_rate": 4.761222603291068e-05, "loss": 0.1766, "step": 12683 }, { "epoch": 0.22623336781650197, "grad_norm": 0.3705345392227173, "learning_rate": 4.761156214348158e-05, "loss": 0.2192, "step": 12684 }, { "epoch": 0.22625120393821568, "grad_norm": 0.26557087898254395, "learning_rate": 4.761089816640225e-05, "loss": 0.1727, "step": 12685 }, { "epoch": 0.22626904005992937, "grad_norm": 0.28113749623298645, "learning_rate": 4.761023410167527e-05, "loss": 0.1716, "step": 12686 }, { "epoch": 0.22628687618164306, "grad_norm": 0.32993751764297485, "learning_rate": 4.760956994930321e-05, "loss": 0.2177, "step": 12687 }, { "epoch": 0.22630471230335675, "grad_norm": 0.24255356192588806, "learning_rate": 4.760890570928865e-05, "loss": 0.1975, "step": 12688 }, { "epoch": 0.22632254842507046, "grad_norm": 0.24917371571063995, "learning_rate": 4.7608241381634165e-05, "loss": 0.2161, "step": 12689 }, { "epoch": 0.22634038454678415, "grad_norm": 0.27715256810188293, "learning_rate": 4.7607576966342324e-05, "loss": 0.207, "step": 12690 }, { "epoch": 0.22635822066849784, "grad_norm": 0.3086145222187042, "learning_rate": 4.7606912463415716e-05, "loss": 0.2003, "step": 12691 }, { "epoch": 0.22637605679021153, "grad_norm": 0.29022637009620667, "learning_rate": 4.76062478728569e-05, "loss": 0.1795, "step": 12692 }, { "epoch": 0.22639389291192524, "grad_norm": 0.28601041436195374, "learning_rate": 4.7605583194668457e-05, "loss": 0.2078, "step": 12693 }, { "epoch": 0.22641172903363893, "grad_norm": 0.28157880902290344, "learning_rate": 4.760491842885297e-05, "loss": 0.2023, "step": 12694 }, { "epoch": 0.22642956515535262, "grad_norm": 0.32047608494758606, "learning_rate": 4.760425357541301e-05, "loss": 0.2534, "step": 12695 }, { "epoch": 0.2264474012770663, "grad_norm": 0.2760776877403259, "learning_rate": 4.760358863435115e-05, "loss": 0.1793, "step": 12696 }, { "epoch": 0.22646523739878002, "grad_norm": 0.22464804351329803, "learning_rate": 4.760292360566998e-05, "loss": 0.2103, "step": 12697 }, { "epoch": 0.2264830735204937, "grad_norm": 0.30228474736213684, "learning_rate": 4.7602258489372074e-05, "loss": 0.2179, "step": 12698 }, { "epoch": 0.2265009096422074, "grad_norm": 0.3229995369911194, "learning_rate": 4.760159328546e-05, "loss": 0.2361, "step": 12699 }, { "epoch": 0.2265187457639211, "grad_norm": 0.3854117691516876, "learning_rate": 4.760092799393635e-05, "loss": 0.2036, "step": 12700 }, { "epoch": 0.22653658188563477, "grad_norm": 0.22347381711006165, "learning_rate": 4.76002626148037e-05, "loss": 0.1794, "step": 12701 }, { "epoch": 0.2265544180073485, "grad_norm": 0.27365759015083313, "learning_rate": 4.7599597148064614e-05, "loss": 0.1925, "step": 12702 }, { "epoch": 0.22657225412906218, "grad_norm": 0.28982412815093994, "learning_rate": 4.7598931593721694e-05, "loss": 0.182, "step": 12703 }, { "epoch": 0.22659009025077587, "grad_norm": 0.3106169104576111, "learning_rate": 4.759826595177751e-05, "loss": 0.1153, "step": 12704 }, { "epoch": 0.22660792637248955, "grad_norm": 0.33322378993034363, "learning_rate": 4.759760022223464e-05, "loss": 0.2377, "step": 12705 }, { "epoch": 0.22662576249420327, "grad_norm": 0.30550891160964966, "learning_rate": 4.759693440509566e-05, "loss": 0.2241, "step": 12706 }, { "epoch": 0.22664359861591696, "grad_norm": 0.2674667239189148, "learning_rate": 4.759626850036317e-05, "loss": 0.1504, "step": 12707 }, { "epoch": 0.22666143473763065, "grad_norm": 0.2966086268424988, "learning_rate": 4.7595602508039724e-05, "loss": 0.1877, "step": 12708 }, { "epoch": 0.22667927085934433, "grad_norm": 0.30703243613243103, "learning_rate": 4.759493642812793e-05, "loss": 0.1762, "step": 12709 }, { "epoch": 0.22669710698105805, "grad_norm": 0.3626992404460907, "learning_rate": 4.7594270260630355e-05, "loss": 0.2374, "step": 12710 }, { "epoch": 0.22671494310277174, "grad_norm": 0.19699066877365112, "learning_rate": 4.7593604005549586e-05, "loss": 0.2244, "step": 12711 }, { "epoch": 0.22673277922448543, "grad_norm": 0.2944333553314209, "learning_rate": 4.75929376628882e-05, "loss": 0.154, "step": 12712 }, { "epoch": 0.2267506153461991, "grad_norm": 0.3691287338733673, "learning_rate": 4.759227123264879e-05, "loss": 0.1914, "step": 12713 }, { "epoch": 0.22676845146791283, "grad_norm": 0.2353912591934204, "learning_rate": 4.7591604714833924e-05, "loss": 0.1987, "step": 12714 }, { "epoch": 0.22678628758962652, "grad_norm": 0.24651099741458893, "learning_rate": 4.75909381094462e-05, "loss": 0.2126, "step": 12715 }, { "epoch": 0.2268041237113402, "grad_norm": 0.422842800617218, "learning_rate": 4.7590271416488206e-05, "loss": 0.1782, "step": 12716 }, { "epoch": 0.2268219598330539, "grad_norm": 0.3569892942905426, "learning_rate": 4.75896046359625e-05, "loss": 0.1954, "step": 12717 }, { "epoch": 0.2268397959547676, "grad_norm": 0.19210238754749298, "learning_rate": 4.7588937767871697e-05, "loss": 0.1507, "step": 12718 }, { "epoch": 0.2268576320764813, "grad_norm": 0.26525914669036865, "learning_rate": 4.758827081221837e-05, "loss": 0.1736, "step": 12719 }, { "epoch": 0.22687546819819499, "grad_norm": 0.2761824429035187, "learning_rate": 4.75876037690051e-05, "loss": 0.1757, "step": 12720 }, { "epoch": 0.22689330431990867, "grad_norm": 0.26994580030441284, "learning_rate": 4.758693663823448e-05, "loss": 0.1805, "step": 12721 }, { "epoch": 0.22691114044162236, "grad_norm": 0.2924158275127411, "learning_rate": 4.758626941990909e-05, "loss": 0.1973, "step": 12722 }, { "epoch": 0.22692897656333608, "grad_norm": 0.2680577337741852, "learning_rate": 4.758560211403151e-05, "loss": 0.1803, "step": 12723 }, { "epoch": 0.22694681268504976, "grad_norm": 0.30167117714881897, "learning_rate": 4.758493472060435e-05, "loss": 0.2549, "step": 12724 }, { "epoch": 0.22696464880676345, "grad_norm": 0.21846014261245728, "learning_rate": 4.7584267239630175e-05, "loss": 0.181, "step": 12725 }, { "epoch": 0.22698248492847714, "grad_norm": 0.2149186134338379, "learning_rate": 4.758359967111158e-05, "loss": 0.1836, "step": 12726 }, { "epoch": 0.22700032105019086, "grad_norm": 0.4402591288089752, "learning_rate": 4.7582932015051154e-05, "loss": 0.2634, "step": 12727 }, { "epoch": 0.22701815717190454, "grad_norm": 0.19699132442474365, "learning_rate": 4.758226427145148e-05, "loss": 0.1991, "step": 12728 }, { "epoch": 0.22703599329361823, "grad_norm": 0.25451552867889404, "learning_rate": 4.758159644031515e-05, "loss": 0.2104, "step": 12729 }, { "epoch": 0.22705382941533192, "grad_norm": 0.2836599349975586, "learning_rate": 4.758092852164476e-05, "loss": 0.1335, "step": 12730 }, { "epoch": 0.22707166553704564, "grad_norm": 0.2872105836868286, "learning_rate": 4.7580260515442886e-05, "loss": 0.1806, "step": 12731 }, { "epoch": 0.22708950165875932, "grad_norm": 0.2629055678844452, "learning_rate": 4.757959242171213e-05, "loss": 0.1859, "step": 12732 }, { "epoch": 0.227107337780473, "grad_norm": 0.2944047152996063, "learning_rate": 4.757892424045506e-05, "loss": 0.2199, "step": 12733 }, { "epoch": 0.2271251739021867, "grad_norm": 0.43053120374679565, "learning_rate": 4.757825597167429e-05, "loss": 0.1515, "step": 12734 }, { "epoch": 0.22714301002390042, "grad_norm": 0.24039088189601898, "learning_rate": 4.7577587615372405e-05, "loss": 0.1928, "step": 12735 }, { "epoch": 0.2271608461456141, "grad_norm": 0.2977534234523773, "learning_rate": 4.7576919171551996e-05, "loss": 0.201, "step": 12736 }, { "epoch": 0.2271786822673278, "grad_norm": 0.2886347472667694, "learning_rate": 4.7576250640215634e-05, "loss": 0.219, "step": 12737 }, { "epoch": 0.22719651838904148, "grad_norm": 0.25993335247039795, "learning_rate": 4.757558202136594e-05, "loss": 0.1613, "step": 12738 }, { "epoch": 0.22721435451075517, "grad_norm": 0.22002577781677246, "learning_rate": 4.757491331500549e-05, "loss": 0.1665, "step": 12739 }, { "epoch": 0.22723219063246888, "grad_norm": 0.2938578724861145, "learning_rate": 4.757424452113688e-05, "loss": 0.1938, "step": 12740 }, { "epoch": 0.22725002675418257, "grad_norm": 0.33019351959228516, "learning_rate": 4.75735756397627e-05, "loss": 0.1867, "step": 12741 }, { "epoch": 0.22726786287589626, "grad_norm": 0.2709706425666809, "learning_rate": 4.7572906670885544e-05, "loss": 0.2353, "step": 12742 }, { "epoch": 0.22728569899760995, "grad_norm": 0.23868712782859802, "learning_rate": 4.7572237614508e-05, "loss": 0.207, "step": 12743 }, { "epoch": 0.22730353511932366, "grad_norm": 0.19620831310749054, "learning_rate": 4.757156847063268e-05, "loss": 0.1959, "step": 12744 }, { "epoch": 0.22732137124103735, "grad_norm": 0.2820335030555725, "learning_rate": 4.7570899239262155e-05, "loss": 0.1799, "step": 12745 }, { "epoch": 0.22733920736275104, "grad_norm": 0.2542852759361267, "learning_rate": 4.757022992039903e-05, "loss": 0.1962, "step": 12746 }, { "epoch": 0.22735704348446473, "grad_norm": 0.2287750542163849, "learning_rate": 4.7569560514045895e-05, "loss": 0.1549, "step": 12747 }, { "epoch": 0.22737487960617844, "grad_norm": 0.2620689868927002, "learning_rate": 4.7568891020205354e-05, "loss": 0.1945, "step": 12748 }, { "epoch": 0.22739271572789213, "grad_norm": 0.30139219760894775, "learning_rate": 4.756822143887999e-05, "loss": 0.2162, "step": 12749 }, { "epoch": 0.22741055184960582, "grad_norm": 0.23995572328567505, "learning_rate": 4.7567551770072416e-05, "loss": 0.1731, "step": 12750 }, { "epoch": 0.2274283879713195, "grad_norm": 0.23730701208114624, "learning_rate": 4.756688201378521e-05, "loss": 0.1883, "step": 12751 }, { "epoch": 0.22744622409303322, "grad_norm": 0.27638038992881775, "learning_rate": 4.756621217002097e-05, "loss": 0.2125, "step": 12752 }, { "epoch": 0.2274640602147469, "grad_norm": 0.3324611485004425, "learning_rate": 4.75655422387823e-05, "loss": 0.1992, "step": 12753 }, { "epoch": 0.2274818963364606, "grad_norm": 0.2876867353916168, "learning_rate": 4.75648722200718e-05, "loss": 0.2105, "step": 12754 }, { "epoch": 0.2274997324581743, "grad_norm": 0.2931671738624573, "learning_rate": 4.756420211389206e-05, "loss": 0.1459, "step": 12755 }, { "epoch": 0.227517568579888, "grad_norm": 0.3282272517681122, "learning_rate": 4.7563531920245675e-05, "loss": 0.1789, "step": 12756 }, { "epoch": 0.2275354047016017, "grad_norm": 0.24702872335910797, "learning_rate": 4.7562861639135254e-05, "loss": 0.2027, "step": 12757 }, { "epoch": 0.22755324082331538, "grad_norm": 0.32239800691604614, "learning_rate": 4.756219127056338e-05, "loss": 0.1549, "step": 12758 }, { "epoch": 0.22757107694502907, "grad_norm": 0.3866209387779236, "learning_rate": 4.756152081453267e-05, "loss": 0.2548, "step": 12759 }, { "epoch": 0.22758891306674275, "grad_norm": 0.27214887738227844, "learning_rate": 4.75608502710457e-05, "loss": 0.2139, "step": 12760 }, { "epoch": 0.22760674918845647, "grad_norm": 0.3311125338077545, "learning_rate": 4.75601796401051e-05, "loss": 0.2604, "step": 12761 }, { "epoch": 0.22762458531017016, "grad_norm": 0.23066891729831696, "learning_rate": 4.7559508921713436e-05, "loss": 0.1856, "step": 12762 }, { "epoch": 0.22764242143188385, "grad_norm": 0.2128305584192276, "learning_rate": 4.755883811587333e-05, "loss": 0.1963, "step": 12763 }, { "epoch": 0.22766025755359753, "grad_norm": 0.31742042303085327, "learning_rate": 4.755816722258737e-05, "loss": 0.1727, "step": 12764 }, { "epoch": 0.22767809367531125, "grad_norm": 0.27673065662384033, "learning_rate": 4.7557496241858165e-05, "loss": 0.2091, "step": 12765 }, { "epoch": 0.22769592979702494, "grad_norm": 0.459807425737381, "learning_rate": 4.7556825173688314e-05, "loss": 0.2079, "step": 12766 }, { "epoch": 0.22771376591873863, "grad_norm": 0.4824574887752533, "learning_rate": 4.7556154018080424e-05, "loss": 0.1712, "step": 12767 }, { "epoch": 0.2277316020404523, "grad_norm": 0.3288426995277405, "learning_rate": 4.7555482775037084e-05, "loss": 0.1877, "step": 12768 }, { "epoch": 0.22774943816216603, "grad_norm": 0.23207063972949982, "learning_rate": 4.7554811444560896e-05, "loss": 0.1687, "step": 12769 }, { "epoch": 0.22776727428387972, "grad_norm": 0.22554905712604523, "learning_rate": 4.755414002665448e-05, "loss": 0.2006, "step": 12770 }, { "epoch": 0.2277851104055934, "grad_norm": 0.33833837509155273, "learning_rate": 4.7553468521320424e-05, "loss": 0.2134, "step": 12771 }, { "epoch": 0.2278029465273071, "grad_norm": 0.28130850195884705, "learning_rate": 4.755279692856134e-05, "loss": 0.1603, "step": 12772 }, { "epoch": 0.2278207826490208, "grad_norm": 0.2707313299179077, "learning_rate": 4.755212524837981e-05, "loss": 0.218, "step": 12773 }, { "epoch": 0.2278386187707345, "grad_norm": 0.2821885049343109, "learning_rate": 4.755145348077847e-05, "loss": 0.1581, "step": 12774 }, { "epoch": 0.22785645489244818, "grad_norm": 0.2544398903846741, "learning_rate": 4.75507816257599e-05, "loss": 0.1915, "step": 12775 }, { "epoch": 0.22787429101416187, "grad_norm": 0.2084919959306717, "learning_rate": 4.755010968332671e-05, "loss": 0.1642, "step": 12776 }, { "epoch": 0.2278921271358756, "grad_norm": 0.25685441493988037, "learning_rate": 4.754943765348151e-05, "loss": 0.1984, "step": 12777 }, { "epoch": 0.22790996325758928, "grad_norm": 0.2325342744588852, "learning_rate": 4.75487655362269e-05, "loss": 0.2185, "step": 12778 }, { "epoch": 0.22792779937930296, "grad_norm": 0.21224354207515717, "learning_rate": 4.754809333156548e-05, "loss": 0.1796, "step": 12779 }, { "epoch": 0.22794563550101665, "grad_norm": 0.32012009620666504, "learning_rate": 4.754742103949987e-05, "loss": 0.2174, "step": 12780 }, { "epoch": 0.22796347162273034, "grad_norm": 0.26159098744392395, "learning_rate": 4.754674866003267e-05, "loss": 0.126, "step": 12781 }, { "epoch": 0.22798130774444406, "grad_norm": 0.22669540345668793, "learning_rate": 4.7546076193166477e-05, "loss": 0.2208, "step": 12782 }, { "epoch": 0.22799914386615774, "grad_norm": 0.28815677762031555, "learning_rate": 4.754540363890391e-05, "loss": 0.2018, "step": 12783 }, { "epoch": 0.22801697998787143, "grad_norm": 0.40440696477890015, "learning_rate": 4.754473099724758e-05, "loss": 0.2128, "step": 12784 }, { "epoch": 0.22803481610958512, "grad_norm": 0.26720649003982544, "learning_rate": 4.754405826820008e-05, "loss": 0.2146, "step": 12785 }, { "epoch": 0.22805265223129884, "grad_norm": 0.2865886092185974, "learning_rate": 4.754338545176401e-05, "loss": 0.1743, "step": 12786 }, { "epoch": 0.22807048835301252, "grad_norm": 0.2756912410259247, "learning_rate": 4.7542712547942e-05, "loss": 0.2153, "step": 12787 }, { "epoch": 0.2280883244747262, "grad_norm": 0.2839483320713043, "learning_rate": 4.7542039556736663e-05, "loss": 0.2104, "step": 12788 }, { "epoch": 0.2281061605964399, "grad_norm": 0.30994048714637756, "learning_rate": 4.7541366478150585e-05, "loss": 0.2056, "step": 12789 }, { "epoch": 0.22812399671815362, "grad_norm": 0.2848181128501892, "learning_rate": 4.754069331218638e-05, "loss": 0.1902, "step": 12790 }, { "epoch": 0.2281418328398673, "grad_norm": 0.3385939598083496, "learning_rate": 4.754002005884667e-05, "loss": 0.2294, "step": 12791 }, { "epoch": 0.228159668961581, "grad_norm": 0.2165912538766861, "learning_rate": 4.7539346718134055e-05, "loss": 0.1937, "step": 12792 }, { "epoch": 0.22817750508329468, "grad_norm": 0.29350340366363525, "learning_rate": 4.7538673290051144e-05, "loss": 0.2353, "step": 12793 }, { "epoch": 0.2281953412050084, "grad_norm": 0.310447096824646, "learning_rate": 4.753799977460055e-05, "loss": 0.2461, "step": 12794 }, { "epoch": 0.22821317732672208, "grad_norm": 0.24659083783626556, "learning_rate": 4.753732617178489e-05, "loss": 0.2246, "step": 12795 }, { "epoch": 0.22823101344843577, "grad_norm": 0.35219600796699524, "learning_rate": 4.753665248160677e-05, "loss": 0.1628, "step": 12796 }, { "epoch": 0.22824884957014946, "grad_norm": 0.25911155343055725, "learning_rate": 4.75359787040688e-05, "loss": 0.1808, "step": 12797 }, { "epoch": 0.22826668569186317, "grad_norm": 0.23166437447071075, "learning_rate": 4.753530483917359e-05, "loss": 0.1914, "step": 12798 }, { "epoch": 0.22828452181357686, "grad_norm": 0.3343127369880676, "learning_rate": 4.753463088692376e-05, "loss": 0.1493, "step": 12799 }, { "epoch": 0.22830235793529055, "grad_norm": 0.3121708333492279, "learning_rate": 4.7533956847321916e-05, "loss": 0.1674, "step": 12800 }, { "epoch": 0.22832019405700424, "grad_norm": 0.30146902799606323, "learning_rate": 4.753328272037066e-05, "loss": 0.1684, "step": 12801 }, { "epoch": 0.22833803017871793, "grad_norm": 0.2249869406223297, "learning_rate": 4.7532608506072636e-05, "loss": 0.1633, "step": 12802 }, { "epoch": 0.22835586630043164, "grad_norm": 0.3040216565132141, "learning_rate": 4.753193420443043e-05, "loss": 0.258, "step": 12803 }, { "epoch": 0.22837370242214533, "grad_norm": 0.24092234671115875, "learning_rate": 4.7531259815446666e-05, "loss": 0.1817, "step": 12804 }, { "epoch": 0.22839153854385902, "grad_norm": 0.27227193117141724, "learning_rate": 4.753058533912396e-05, "loss": 0.1663, "step": 12805 }, { "epoch": 0.2284093746655727, "grad_norm": 0.37233835458755493, "learning_rate": 4.752991077546491e-05, "loss": 0.1546, "step": 12806 }, { "epoch": 0.22842721078728642, "grad_norm": 0.3068927526473999, "learning_rate": 4.752923612447216e-05, "loss": 0.1746, "step": 12807 }, { "epoch": 0.2284450469090001, "grad_norm": 0.28168389201164246, "learning_rate": 4.7528561386148305e-05, "loss": 0.2291, "step": 12808 }, { "epoch": 0.2284628830307138, "grad_norm": 0.29312944412231445, "learning_rate": 4.752788656049596e-05, "loss": 0.2322, "step": 12809 }, { "epoch": 0.22848071915242749, "grad_norm": 0.4435538053512573, "learning_rate": 4.7527211647517757e-05, "loss": 0.2544, "step": 12810 }, { "epoch": 0.2284985552741412, "grad_norm": 0.41616931557655334, "learning_rate": 4.7526536647216294e-05, "loss": 0.1661, "step": 12811 }, { "epoch": 0.2285163913958549, "grad_norm": 0.23465916514396667, "learning_rate": 4.7525861559594185e-05, "loss": 0.1807, "step": 12812 }, { "epoch": 0.22853422751756858, "grad_norm": 0.2502564787864685, "learning_rate": 4.752518638465407e-05, "loss": 0.1787, "step": 12813 }, { "epoch": 0.22855206363928227, "grad_norm": 0.33122512698173523, "learning_rate": 4.752451112239854e-05, "loss": 0.16, "step": 12814 }, { "epoch": 0.22856989976099598, "grad_norm": 0.24872533977031708, "learning_rate": 4.752383577283024e-05, "loss": 0.1655, "step": 12815 }, { "epoch": 0.22858773588270967, "grad_norm": 0.3210082948207855, "learning_rate": 4.752316033595177e-05, "loss": 0.187, "step": 12816 }, { "epoch": 0.22860557200442336, "grad_norm": 0.3571023941040039, "learning_rate": 4.752248481176574e-05, "loss": 0.2349, "step": 12817 }, { "epoch": 0.22862340812613705, "grad_norm": 0.3391280770301819, "learning_rate": 4.752180920027479e-05, "loss": 0.1791, "step": 12818 }, { "epoch": 0.22864124424785076, "grad_norm": 0.25080451369285583, "learning_rate": 4.752113350148153e-05, "loss": 0.1973, "step": 12819 }, { "epoch": 0.22865908036956445, "grad_norm": 0.25983667373657227, "learning_rate": 4.7520457715388566e-05, "loss": 0.1727, "step": 12820 }, { "epoch": 0.22867691649127814, "grad_norm": 0.4141266644001007, "learning_rate": 4.751978184199854e-05, "loss": 0.2127, "step": 12821 }, { "epoch": 0.22869475261299183, "grad_norm": 0.2053796350955963, "learning_rate": 4.751910588131406e-05, "loss": 0.1704, "step": 12822 }, { "epoch": 0.2287125887347055, "grad_norm": 0.20918521285057068, "learning_rate": 4.7518429833337754e-05, "loss": 0.1617, "step": 12823 }, { "epoch": 0.22873042485641923, "grad_norm": 0.2291272133588791, "learning_rate": 4.751775369807222e-05, "loss": 0.18, "step": 12824 }, { "epoch": 0.22874826097813292, "grad_norm": 0.31480973958969116, "learning_rate": 4.751707747552011e-05, "loss": 0.1797, "step": 12825 }, { "epoch": 0.2287660970998466, "grad_norm": 0.30694177746772766, "learning_rate": 4.751640116568402e-05, "loss": 0.2233, "step": 12826 }, { "epoch": 0.2287839332215603, "grad_norm": 0.2968466281890869, "learning_rate": 4.7515724768566595e-05, "loss": 0.2103, "step": 12827 }, { "epoch": 0.228801769343274, "grad_norm": 0.2747965455055237, "learning_rate": 4.751504828417043e-05, "loss": 0.1908, "step": 12828 }, { "epoch": 0.2288196054649877, "grad_norm": 0.28568482398986816, "learning_rate": 4.751437171249817e-05, "loss": 0.2047, "step": 12829 }, { "epoch": 0.22883744158670138, "grad_norm": 0.3147212564945221, "learning_rate": 4.751369505355242e-05, "loss": 0.1898, "step": 12830 }, { "epoch": 0.22885527770841507, "grad_norm": 0.28301647305488586, "learning_rate": 4.751301830733582e-05, "loss": 0.2023, "step": 12831 }, { "epoch": 0.2288731138301288, "grad_norm": 0.2519884705543518, "learning_rate": 4.751234147385099e-05, "loss": 0.1737, "step": 12832 }, { "epoch": 0.22889094995184248, "grad_norm": 0.40235304832458496, "learning_rate": 4.7511664553100544e-05, "loss": 0.2205, "step": 12833 }, { "epoch": 0.22890878607355616, "grad_norm": 0.21696004271507263, "learning_rate": 4.7510987545087105e-05, "loss": 0.1797, "step": 12834 }, { "epoch": 0.22892662219526985, "grad_norm": 0.29813551902770996, "learning_rate": 4.751031044981331e-05, "loss": 0.2039, "step": 12835 }, { "epoch": 0.22894445831698357, "grad_norm": 0.22463171184062958, "learning_rate": 4.7509633267281775e-05, "loss": 0.203, "step": 12836 }, { "epoch": 0.22896229443869726, "grad_norm": 0.29856470227241516, "learning_rate": 4.750895599749513e-05, "loss": 0.1771, "step": 12837 }, { "epoch": 0.22898013056041094, "grad_norm": 0.32924017310142517, "learning_rate": 4.750827864045599e-05, "loss": 0.1918, "step": 12838 }, { "epoch": 0.22899796668212463, "grad_norm": 0.3483649790287018, "learning_rate": 4.7507601196167e-05, "loss": 0.2634, "step": 12839 }, { "epoch": 0.22901580280383832, "grad_norm": 0.37428799271583557, "learning_rate": 4.7506923664630765e-05, "loss": 0.2369, "step": 12840 }, { "epoch": 0.22903363892555204, "grad_norm": 0.3149733543395996, "learning_rate": 4.7506246045849916e-05, "loss": 0.2341, "step": 12841 }, { "epoch": 0.22905147504726572, "grad_norm": 0.23539508879184723, "learning_rate": 4.750556833982709e-05, "loss": 0.1631, "step": 12842 }, { "epoch": 0.2290693111689794, "grad_norm": 0.3394657373428345, "learning_rate": 4.750489054656491e-05, "loss": 0.2337, "step": 12843 }, { "epoch": 0.2290871472906931, "grad_norm": 0.26157131791114807, "learning_rate": 4.7504212666065996e-05, "loss": 0.2209, "step": 12844 }, { "epoch": 0.22910498341240682, "grad_norm": 0.3097561299800873, "learning_rate": 4.750353469833298e-05, "loss": 0.1824, "step": 12845 }, { "epoch": 0.2291228195341205, "grad_norm": 0.2788170874118805, "learning_rate": 4.75028566433685e-05, "loss": 0.1674, "step": 12846 }, { "epoch": 0.2291406556558342, "grad_norm": 0.23734356462955475, "learning_rate": 4.7502178501175165e-05, "loss": 0.1911, "step": 12847 }, { "epoch": 0.22915849177754788, "grad_norm": 0.33548101782798767, "learning_rate": 4.750150027175562e-05, "loss": 0.2315, "step": 12848 }, { "epoch": 0.2291763278992616, "grad_norm": 0.22799284756183624, "learning_rate": 4.750082195511248e-05, "loss": 0.1867, "step": 12849 }, { "epoch": 0.22919416402097528, "grad_norm": 0.329924076795578, "learning_rate": 4.75001435512484e-05, "loss": 0.2362, "step": 12850 }, { "epoch": 0.22921200014268897, "grad_norm": 0.26934555172920227, "learning_rate": 4.749946506016598e-05, "loss": 0.2227, "step": 12851 }, { "epoch": 0.22922983626440266, "grad_norm": 0.28445518016815186, "learning_rate": 4.7498786481867864e-05, "loss": 0.1908, "step": 12852 }, { "epoch": 0.22924767238611637, "grad_norm": 0.23385927081108093, "learning_rate": 4.749810781635668e-05, "loss": 0.1717, "step": 12853 }, { "epoch": 0.22926550850783006, "grad_norm": 0.3572027087211609, "learning_rate": 4.749742906363506e-05, "loss": 0.2461, "step": 12854 }, { "epoch": 0.22928334462954375, "grad_norm": 0.26154258847236633, "learning_rate": 4.7496750223705635e-05, "loss": 0.1837, "step": 12855 }, { "epoch": 0.22930118075125744, "grad_norm": 0.27525046467781067, "learning_rate": 4.749607129657104e-05, "loss": 0.1844, "step": 12856 }, { "epoch": 0.22931901687297115, "grad_norm": 0.3137851357460022, "learning_rate": 4.7495392282233896e-05, "loss": 0.1366, "step": 12857 }, { "epoch": 0.22933685299468484, "grad_norm": 0.32831886410713196, "learning_rate": 4.749471318069685e-05, "loss": 0.207, "step": 12858 }, { "epoch": 0.22935468911639853, "grad_norm": 0.3689045011997223, "learning_rate": 4.7494033991962514e-05, "loss": 0.2653, "step": 12859 }, { "epoch": 0.22937252523811222, "grad_norm": 0.36577335000038147, "learning_rate": 4.7493354716033545e-05, "loss": 0.2443, "step": 12860 }, { "epoch": 0.2293903613598259, "grad_norm": 0.4057885706424713, "learning_rate": 4.7492675352912556e-05, "loss": 0.1787, "step": 12861 }, { "epoch": 0.22940819748153962, "grad_norm": 0.24981991946697235, "learning_rate": 4.7491995902602196e-05, "loss": 0.1903, "step": 12862 }, { "epoch": 0.2294260336032533, "grad_norm": 0.23707671463489532, "learning_rate": 4.749131636510509e-05, "loss": 0.184, "step": 12863 }, { "epoch": 0.229443869724967, "grad_norm": 0.242751806974411, "learning_rate": 4.7490636740423863e-05, "loss": 0.1633, "step": 12864 }, { "epoch": 0.22946170584668069, "grad_norm": 0.34951525926589966, "learning_rate": 4.748995702856117e-05, "loss": 0.24, "step": 12865 }, { "epoch": 0.2294795419683944, "grad_norm": 0.2674331068992615, "learning_rate": 4.748927722951963e-05, "loss": 0.1761, "step": 12866 }, { "epoch": 0.2294973780901081, "grad_norm": 0.34835100173950195, "learning_rate": 4.748859734330189e-05, "loss": 0.244, "step": 12867 }, { "epoch": 0.22951521421182178, "grad_norm": 0.2509762942790985, "learning_rate": 4.748791736991058e-05, "loss": 0.1699, "step": 12868 }, { "epoch": 0.22953305033353547, "grad_norm": 0.2583219110965729, "learning_rate": 4.7487237309348334e-05, "loss": 0.1958, "step": 12869 }, { "epoch": 0.22955088645524918, "grad_norm": 0.24803723394870758, "learning_rate": 4.7486557161617785e-05, "loss": 0.1898, "step": 12870 }, { "epoch": 0.22956872257696287, "grad_norm": 0.23758569359779358, "learning_rate": 4.7485876926721576e-05, "loss": 0.2047, "step": 12871 }, { "epoch": 0.22958655869867656, "grad_norm": 0.30725789070129395, "learning_rate": 4.748519660466234e-05, "loss": 0.217, "step": 12872 }, { "epoch": 0.22960439482039025, "grad_norm": 0.29835864901542664, "learning_rate": 4.748451619544272e-05, "loss": 0.1959, "step": 12873 }, { "epoch": 0.22962223094210396, "grad_norm": 0.26595067977905273, "learning_rate": 4.748383569906535e-05, "loss": 0.1975, "step": 12874 }, { "epoch": 0.22964006706381765, "grad_norm": 0.3447571098804474, "learning_rate": 4.7483155115532865e-05, "loss": 0.237, "step": 12875 }, { "epoch": 0.22965790318553134, "grad_norm": 0.2722747325897217, "learning_rate": 4.74824744448479e-05, "loss": 0.212, "step": 12876 }, { "epoch": 0.22967573930724502, "grad_norm": 0.207061767578125, "learning_rate": 4.748179368701311e-05, "loss": 0.1841, "step": 12877 }, { "epoch": 0.22969357542895874, "grad_norm": 0.31938791275024414, "learning_rate": 4.7481112842031104e-05, "loss": 0.1613, "step": 12878 }, { "epoch": 0.22971141155067243, "grad_norm": 0.2799973785877228, "learning_rate": 4.7480431909904556e-05, "loss": 0.1937, "step": 12879 }, { "epoch": 0.22972924767238612, "grad_norm": 0.22087237238883972, "learning_rate": 4.7479750890636085e-05, "loss": 0.1444, "step": 12880 }, { "epoch": 0.2297470837940998, "grad_norm": 0.35536837577819824, "learning_rate": 4.7479069784228333e-05, "loss": 0.1714, "step": 12881 }, { "epoch": 0.2297649199158135, "grad_norm": 0.3145037889480591, "learning_rate": 4.747838859068395e-05, "loss": 0.2284, "step": 12882 }, { "epoch": 0.2297827560375272, "grad_norm": 0.2892024517059326, "learning_rate": 4.747770731000556e-05, "loss": 0.2337, "step": 12883 }, { "epoch": 0.2298005921592409, "grad_norm": 0.29131457209587097, "learning_rate": 4.747702594219582e-05, "loss": 0.1857, "step": 12884 }, { "epoch": 0.22981842828095458, "grad_norm": 0.32974138855934143, "learning_rate": 4.747634448725736e-05, "loss": 0.2029, "step": 12885 }, { "epoch": 0.22983626440266827, "grad_norm": 0.27079445123672485, "learning_rate": 4.747566294519283e-05, "loss": 0.2205, "step": 12886 }, { "epoch": 0.229854100524382, "grad_norm": 0.25136810541152954, "learning_rate": 4.747498131600486e-05, "loss": 0.2119, "step": 12887 }, { "epoch": 0.22987193664609568, "grad_norm": 0.3759794533252716, "learning_rate": 4.747429959969611e-05, "loss": 0.2434, "step": 12888 }, { "epoch": 0.22988977276780936, "grad_norm": 0.34786808490753174, "learning_rate": 4.7473617796269204e-05, "loss": 0.2014, "step": 12889 }, { "epoch": 0.22990760888952305, "grad_norm": 0.23923760652542114, "learning_rate": 4.74729359057268e-05, "loss": 0.1901, "step": 12890 }, { "epoch": 0.22992544501123677, "grad_norm": 0.336771696805954, "learning_rate": 4.747225392807153e-05, "loss": 0.2576, "step": 12891 }, { "epoch": 0.22994328113295046, "grad_norm": 0.28458261489868164, "learning_rate": 4.7471571863306045e-05, "loss": 0.2122, "step": 12892 }, { "epoch": 0.22996111725466414, "grad_norm": 0.28829190135002136, "learning_rate": 4.747088971143298e-05, "loss": 0.1759, "step": 12893 }, { "epoch": 0.22997895337637783, "grad_norm": 0.1950722336769104, "learning_rate": 4.7470207472454985e-05, "loss": 0.1724, "step": 12894 }, { "epoch": 0.22999678949809155, "grad_norm": 0.30134278535842896, "learning_rate": 4.746952514637471e-05, "loss": 0.2178, "step": 12895 }, { "epoch": 0.23001462561980524, "grad_norm": 0.2403934895992279, "learning_rate": 4.74688427331948e-05, "loss": 0.1623, "step": 12896 }, { "epoch": 0.23003246174151892, "grad_norm": 0.3011443316936493, "learning_rate": 4.746816023291788e-05, "loss": 0.1716, "step": 12897 }, { "epoch": 0.2300502978632326, "grad_norm": 0.2563803195953369, "learning_rate": 4.746747764554662e-05, "loss": 0.1566, "step": 12898 }, { "epoch": 0.23006813398494633, "grad_norm": 0.5843698978424072, "learning_rate": 4.746679497108366e-05, "loss": 0.209, "step": 12899 }, { "epoch": 0.23008597010666001, "grad_norm": 0.3100547790527344, "learning_rate": 4.746611220953164e-05, "loss": 0.2086, "step": 12900 }, { "epoch": 0.2301038062283737, "grad_norm": 0.4501771926879883, "learning_rate": 4.746542936089321e-05, "loss": 0.1677, "step": 12901 }, { "epoch": 0.2301216423500874, "grad_norm": 0.24397575855255127, "learning_rate": 4.746474642517101e-05, "loss": 0.1961, "step": 12902 }, { "epoch": 0.23013947847180108, "grad_norm": 0.25141119956970215, "learning_rate": 4.74640634023677e-05, "loss": 0.1715, "step": 12903 }, { "epoch": 0.2301573145935148, "grad_norm": 0.2494877725839615, "learning_rate": 4.746338029248592e-05, "loss": 0.1348, "step": 12904 }, { "epoch": 0.23017515071522848, "grad_norm": 0.3292723000049591, "learning_rate": 4.746269709552832e-05, "loss": 0.1579, "step": 12905 }, { "epoch": 0.23019298683694217, "grad_norm": 0.2648920714855194, "learning_rate": 4.746201381149755e-05, "loss": 0.1587, "step": 12906 }, { "epoch": 0.23021082295865586, "grad_norm": 0.1770116090774536, "learning_rate": 4.746133044039625e-05, "loss": 0.1682, "step": 12907 }, { "epoch": 0.23022865908036957, "grad_norm": 0.28378725051879883, "learning_rate": 4.746064698222708e-05, "loss": 0.115, "step": 12908 }, { "epoch": 0.23024649520208326, "grad_norm": 0.23963265120983124, "learning_rate": 4.745996343699268e-05, "loss": 0.174, "step": 12909 }, { "epoch": 0.23026433132379695, "grad_norm": 0.3272024989128113, "learning_rate": 4.745927980469571e-05, "loss": 0.1827, "step": 12910 }, { "epoch": 0.23028216744551064, "grad_norm": 0.26129600405693054, "learning_rate": 4.7458596085338816e-05, "loss": 0.1964, "step": 12911 }, { "epoch": 0.23030000356722435, "grad_norm": 0.2599860429763794, "learning_rate": 4.745791227892464e-05, "loss": 0.1631, "step": 12912 }, { "epoch": 0.23031783968893804, "grad_norm": 0.2960149943828583, "learning_rate": 4.745722838545584e-05, "loss": 0.1618, "step": 12913 }, { "epoch": 0.23033567581065173, "grad_norm": 0.29296499490737915, "learning_rate": 4.7456544404935074e-05, "loss": 0.205, "step": 12914 }, { "epoch": 0.23035351193236542, "grad_norm": 0.23233060538768768, "learning_rate": 4.7455860337364974e-05, "loss": 0.1898, "step": 12915 }, { "epoch": 0.23037134805407913, "grad_norm": 0.28250375390052795, "learning_rate": 4.745517618274821e-05, "loss": 0.1923, "step": 12916 }, { "epoch": 0.23038918417579282, "grad_norm": 0.3355405926704407, "learning_rate": 4.745449194108743e-05, "loss": 0.2139, "step": 12917 }, { "epoch": 0.2304070202975065, "grad_norm": 0.3375043272972107, "learning_rate": 4.745380761238528e-05, "loss": 0.222, "step": 12918 }, { "epoch": 0.2304248564192202, "grad_norm": 0.24035699665546417, "learning_rate": 4.7453123196644415e-05, "loss": 0.1886, "step": 12919 }, { "epoch": 0.23044269254093389, "grad_norm": 0.2645739018917084, "learning_rate": 4.7452438693867493e-05, "loss": 0.2325, "step": 12920 }, { "epoch": 0.2304605286626476, "grad_norm": 0.31312644481658936, "learning_rate": 4.7451754104057164e-05, "loss": 0.2007, "step": 12921 }, { "epoch": 0.2304783647843613, "grad_norm": 0.21971257030963898, "learning_rate": 4.7451069427216075e-05, "loss": 0.1921, "step": 12922 }, { "epoch": 0.23049620090607498, "grad_norm": 0.2702762484550476, "learning_rate": 4.745038466334689e-05, "loss": 0.2327, "step": 12923 }, { "epoch": 0.23051403702778867, "grad_norm": 0.33635812997817993, "learning_rate": 4.744969981245226e-05, "loss": 0.258, "step": 12924 }, { "epoch": 0.23053187314950238, "grad_norm": 0.3847140073776245, "learning_rate": 4.7449014874534844e-05, "loss": 0.1502, "step": 12925 }, { "epoch": 0.23054970927121607, "grad_norm": 0.2540401816368103, "learning_rate": 4.744832984959729e-05, "loss": 0.1938, "step": 12926 }, { "epoch": 0.23056754539292976, "grad_norm": 0.3007860779762268, "learning_rate": 4.7447644737642264e-05, "loss": 0.1571, "step": 12927 }, { "epoch": 0.23058538151464344, "grad_norm": 0.235521137714386, "learning_rate": 4.7446959538672395e-05, "loss": 0.1882, "step": 12928 }, { "epoch": 0.23060321763635716, "grad_norm": 0.2145262509584427, "learning_rate": 4.744627425269037e-05, "loss": 0.1477, "step": 12929 }, { "epoch": 0.23062105375807085, "grad_norm": 0.22935424745082855, "learning_rate": 4.744558887969883e-05, "loss": 0.1882, "step": 12930 }, { "epoch": 0.23063888987978454, "grad_norm": 0.3163914680480957, "learning_rate": 4.744490341970044e-05, "loss": 0.1726, "step": 12931 }, { "epoch": 0.23065672600149822, "grad_norm": 0.3141555190086365, "learning_rate": 4.744421787269785e-05, "loss": 0.23, "step": 12932 }, { "epoch": 0.23067456212321194, "grad_norm": 0.335625559091568, "learning_rate": 4.744353223869372e-05, "loss": 0.1428, "step": 12933 }, { "epoch": 0.23069239824492563, "grad_norm": 0.2633238136768341, "learning_rate": 4.74428465176907e-05, "loss": 0.1786, "step": 12934 }, { "epoch": 0.23071023436663932, "grad_norm": 0.31745925545692444, "learning_rate": 4.744216070969146e-05, "loss": 0.2171, "step": 12935 }, { "epoch": 0.230728070488353, "grad_norm": 0.20374995470046997, "learning_rate": 4.744147481469866e-05, "loss": 0.1751, "step": 12936 }, { "epoch": 0.23074590661006672, "grad_norm": 0.2697664797306061, "learning_rate": 4.744078883271494e-05, "loss": 0.164, "step": 12937 }, { "epoch": 0.2307637427317804, "grad_norm": 0.3518921732902527, "learning_rate": 4.7440102763742983e-05, "loss": 0.1947, "step": 12938 }, { "epoch": 0.2307815788534941, "grad_norm": 0.3237800598144531, "learning_rate": 4.743941660778544e-05, "loss": 0.2331, "step": 12939 }, { "epoch": 0.23079941497520778, "grad_norm": 0.2582206130027771, "learning_rate": 4.7438730364844953e-05, "loss": 0.2031, "step": 12940 }, { "epoch": 0.23081725109692147, "grad_norm": 0.29988721013069153, "learning_rate": 4.743804403492421e-05, "loss": 0.1564, "step": 12941 }, { "epoch": 0.2308350872186352, "grad_norm": 0.25897279381752014, "learning_rate": 4.743735761802585e-05, "loss": 0.182, "step": 12942 }, { "epoch": 0.23085292334034888, "grad_norm": 0.23908762633800507, "learning_rate": 4.743667111415255e-05, "loss": 0.2118, "step": 12943 }, { "epoch": 0.23087075946206256, "grad_norm": 0.274428129196167, "learning_rate": 4.743598452330695e-05, "loss": 0.2126, "step": 12944 }, { "epoch": 0.23088859558377625, "grad_norm": 0.27773234248161316, "learning_rate": 4.743529784549174e-05, "loss": 0.1647, "step": 12945 }, { "epoch": 0.23090643170548997, "grad_norm": 0.29228320717811584, "learning_rate": 4.743461108070956e-05, "loss": 0.1776, "step": 12946 }, { "epoch": 0.23092426782720366, "grad_norm": 0.29485487937927246, "learning_rate": 4.743392422896308e-05, "loss": 0.1855, "step": 12947 }, { "epoch": 0.23094210394891734, "grad_norm": 0.2691425681114197, "learning_rate": 4.743323729025496e-05, "loss": 0.203, "step": 12948 }, { "epoch": 0.23095994007063103, "grad_norm": 0.24132020771503448, "learning_rate": 4.743255026458786e-05, "loss": 0.2317, "step": 12949 }, { "epoch": 0.23097777619234475, "grad_norm": 0.24747806787490845, "learning_rate": 4.7431863151964454e-05, "loss": 0.1768, "step": 12950 }, { "epoch": 0.23099561231405843, "grad_norm": 0.24529355764389038, "learning_rate": 4.74311759523874e-05, "loss": 0.1878, "step": 12951 }, { "epoch": 0.23101344843577212, "grad_norm": 0.2427300214767456, "learning_rate": 4.7430488665859356e-05, "loss": 0.1797, "step": 12952 }, { "epoch": 0.2310312845574858, "grad_norm": 0.24497056007385254, "learning_rate": 4.7429801292382994e-05, "loss": 0.1618, "step": 12953 }, { "epoch": 0.23104912067919953, "grad_norm": 0.24052974581718445, "learning_rate": 4.742911383196097e-05, "loss": 0.2116, "step": 12954 }, { "epoch": 0.23106695680091321, "grad_norm": 0.3020787537097931, "learning_rate": 4.742842628459596e-05, "loss": 0.22, "step": 12955 }, { "epoch": 0.2310847929226269, "grad_norm": 0.228055939078331, "learning_rate": 4.742773865029062e-05, "loss": 0.2189, "step": 12956 }, { "epoch": 0.2311026290443406, "grad_norm": 0.3163982331752777, "learning_rate": 4.742705092904762e-05, "loss": 0.1655, "step": 12957 }, { "epoch": 0.2311204651660543, "grad_norm": 0.27987125515937805, "learning_rate": 4.742636312086962e-05, "loss": 0.1843, "step": 12958 }, { "epoch": 0.231138301287768, "grad_norm": 0.28908318281173706, "learning_rate": 4.742567522575929e-05, "loss": 0.2074, "step": 12959 }, { "epoch": 0.23115613740948168, "grad_norm": 0.3305481970310211, "learning_rate": 4.742498724371931e-05, "loss": 0.2145, "step": 12960 }, { "epoch": 0.23117397353119537, "grad_norm": 0.22384819388389587, "learning_rate": 4.7424299174752326e-05, "loss": 0.1745, "step": 12961 }, { "epoch": 0.23119180965290906, "grad_norm": 0.3761615455150604, "learning_rate": 4.742361101886101e-05, "loss": 0.2431, "step": 12962 }, { "epoch": 0.23120964577462277, "grad_norm": 0.27289512753486633, "learning_rate": 4.742292277604803e-05, "loss": 0.1987, "step": 12963 }, { "epoch": 0.23122748189633646, "grad_norm": 0.24646975100040436, "learning_rate": 4.7422234446316074e-05, "loss": 0.1656, "step": 12964 }, { "epoch": 0.23124531801805015, "grad_norm": 0.4282931685447693, "learning_rate": 4.7421546029667775e-05, "loss": 0.1718, "step": 12965 }, { "epoch": 0.23126315413976384, "grad_norm": 0.269220232963562, "learning_rate": 4.7420857526105825e-05, "loss": 0.1874, "step": 12966 }, { "epoch": 0.23128099026147755, "grad_norm": 0.31382322311401367, "learning_rate": 4.7420168935632895e-05, "loss": 0.2035, "step": 12967 }, { "epoch": 0.23129882638319124, "grad_norm": 0.29719898104667664, "learning_rate": 4.741948025825164e-05, "loss": 0.2168, "step": 12968 }, { "epoch": 0.23131666250490493, "grad_norm": 0.22965769469738007, "learning_rate": 4.741879149396473e-05, "loss": 0.1878, "step": 12969 }, { "epoch": 0.23133449862661862, "grad_norm": 0.32541218400001526, "learning_rate": 4.7418102642774846e-05, "loss": 0.238, "step": 12970 }, { "epoch": 0.23135233474833233, "grad_norm": 0.37302398681640625, "learning_rate": 4.7417413704684656e-05, "loss": 0.1912, "step": 12971 }, { "epoch": 0.23137017087004602, "grad_norm": 0.3561212122440338, "learning_rate": 4.741672467969682e-05, "loss": 0.2219, "step": 12972 }, { "epoch": 0.2313880069917597, "grad_norm": 0.23414330184459686, "learning_rate": 4.741603556781403e-05, "loss": 0.2264, "step": 12973 }, { "epoch": 0.2314058431134734, "grad_norm": 0.27014073729515076, "learning_rate": 4.741534636903893e-05, "loss": 0.2182, "step": 12974 }, { "epoch": 0.2314236792351871, "grad_norm": 0.6651800274848938, "learning_rate": 4.741465708337421e-05, "loss": 0.2513, "step": 12975 }, { "epoch": 0.2314415153569008, "grad_norm": 0.21229223906993866, "learning_rate": 4.741396771082254e-05, "loss": 0.1925, "step": 12976 }, { "epoch": 0.2314593514786145, "grad_norm": 0.35066893696784973, "learning_rate": 4.7413278251386594e-05, "loss": 0.2169, "step": 12977 }, { "epoch": 0.23147718760032818, "grad_norm": 0.26448994874954224, "learning_rate": 4.7412588705069034e-05, "loss": 0.1784, "step": 12978 }, { "epoch": 0.2314950237220419, "grad_norm": 0.2352786809206009, "learning_rate": 4.741189907187254e-05, "loss": 0.2064, "step": 12979 }, { "epoch": 0.23151285984375558, "grad_norm": 0.2799092233181, "learning_rate": 4.741120935179978e-05, "loss": 0.1786, "step": 12980 }, { "epoch": 0.23153069596546927, "grad_norm": 0.21805337071418762, "learning_rate": 4.7410519544853437e-05, "loss": 0.1719, "step": 12981 }, { "epoch": 0.23154853208718296, "grad_norm": 0.21304574608802795, "learning_rate": 4.740982965103618e-05, "loss": 0.1599, "step": 12982 }, { "epoch": 0.23156636820889664, "grad_norm": 0.2549423277378082, "learning_rate": 4.7409139670350683e-05, "loss": 0.1628, "step": 12983 }, { "epoch": 0.23158420433061036, "grad_norm": 0.24077706038951874, "learning_rate": 4.740844960279962e-05, "loss": 0.2012, "step": 12984 }, { "epoch": 0.23160204045232405, "grad_norm": 0.25259336829185486, "learning_rate": 4.740775944838567e-05, "loss": 0.1884, "step": 12985 }, { "epoch": 0.23161987657403774, "grad_norm": 0.32633882761001587, "learning_rate": 4.74070692071115e-05, "loss": 0.2058, "step": 12986 }, { "epoch": 0.23163771269575142, "grad_norm": 0.25864505767822266, "learning_rate": 4.740637887897979e-05, "loss": 0.1927, "step": 12987 }, { "epoch": 0.23165554881746514, "grad_norm": 0.3024556338787079, "learning_rate": 4.7405688463993217e-05, "loss": 0.2116, "step": 12988 }, { "epoch": 0.23167338493917883, "grad_norm": 0.24341925978660583, "learning_rate": 4.7404997962154465e-05, "loss": 0.1732, "step": 12989 }, { "epoch": 0.23169122106089252, "grad_norm": 0.32996392250061035, "learning_rate": 4.740430737346619e-05, "loss": 0.1795, "step": 12990 }, { "epoch": 0.2317090571826062, "grad_norm": 0.25818613171577454, "learning_rate": 4.740361669793109e-05, "loss": 0.2149, "step": 12991 }, { "epoch": 0.23172689330431992, "grad_norm": 0.29291197657585144, "learning_rate": 4.7402925935551836e-05, "loss": 0.1878, "step": 12992 }, { "epoch": 0.2317447294260336, "grad_norm": 0.2562229335308075, "learning_rate": 4.740223508633109e-05, "loss": 0.1389, "step": 12993 }, { "epoch": 0.2317625655477473, "grad_norm": 0.26252058148384094, "learning_rate": 4.7401544150271557e-05, "loss": 0.1662, "step": 12994 }, { "epoch": 0.23178040166946098, "grad_norm": 0.3297211527824402, "learning_rate": 4.74008531273759e-05, "loss": 0.1865, "step": 12995 }, { "epoch": 0.2317982377911747, "grad_norm": 0.27339687943458557, "learning_rate": 4.7400162017646796e-05, "loss": 0.2139, "step": 12996 }, { "epoch": 0.2318160739128884, "grad_norm": 0.3054243326187134, "learning_rate": 4.739947082108692e-05, "loss": 0.2196, "step": 12997 }, { "epoch": 0.23183391003460208, "grad_norm": 0.4263780415058136, "learning_rate": 4.739877953769897e-05, "loss": 0.1996, "step": 12998 }, { "epoch": 0.23185174615631576, "grad_norm": 0.20971044898033142, "learning_rate": 4.739808816748561e-05, "loss": 0.1824, "step": 12999 }, { "epoch": 0.23186958227802948, "grad_norm": 0.4023171067237854, "learning_rate": 4.7397396710449525e-05, "loss": 0.23, "step": 13000 }, { "epoch": 0.23186958227802948, "eval_loss": 0.18585029244422913, "eval_runtime": 106.8316, "eval_samples_per_second": 9.585, "eval_steps_per_second": 1.601, "step": 13000 }, { "epoch": 0.23188741839974317, "grad_norm": 0.3185226321220398, "learning_rate": 4.739670516659339e-05, "loss": 0.2028, "step": 13001 }, { "epoch": 0.23190525452145686, "grad_norm": 0.26754093170166016, "learning_rate": 4.7396013535919894e-05, "loss": 0.1318, "step": 13002 }, { "epoch": 0.23192309064317054, "grad_norm": 0.3592711389064789, "learning_rate": 4.7395321818431715e-05, "loss": 0.2066, "step": 13003 }, { "epoch": 0.23194092676488423, "grad_norm": 0.22774259746074677, "learning_rate": 4.7394630014131536e-05, "loss": 0.1387, "step": 13004 }, { "epoch": 0.23195876288659795, "grad_norm": 0.2407887727022171, "learning_rate": 4.739393812302203e-05, "loss": 0.1774, "step": 13005 }, { "epoch": 0.23197659900831163, "grad_norm": 0.24861784279346466, "learning_rate": 4.739324614510589e-05, "loss": 0.1211, "step": 13006 }, { "epoch": 0.23199443513002532, "grad_norm": 0.31071850657463074, "learning_rate": 4.739255408038579e-05, "loss": 0.1912, "step": 13007 }, { "epoch": 0.232012271251739, "grad_norm": 0.272596538066864, "learning_rate": 4.7391861928864424e-05, "loss": 0.1657, "step": 13008 }, { "epoch": 0.23203010737345273, "grad_norm": 0.37417080998420715, "learning_rate": 4.7391169690544454e-05, "loss": 0.253, "step": 13009 }, { "epoch": 0.23204794349516641, "grad_norm": 0.3753811717033386, "learning_rate": 4.7390477365428584e-05, "loss": 0.2186, "step": 13010 }, { "epoch": 0.2320657796168801, "grad_norm": 0.4284873902797699, "learning_rate": 4.738978495351949e-05, "loss": 0.2663, "step": 13011 }, { "epoch": 0.2320836157385938, "grad_norm": 0.33204787969589233, "learning_rate": 4.738909245481986e-05, "loss": 0.1966, "step": 13012 }, { "epoch": 0.2321014518603075, "grad_norm": 0.28605467081069946, "learning_rate": 4.738839986933237e-05, "loss": 0.2157, "step": 13013 }, { "epoch": 0.2321192879820212, "grad_norm": 0.3401779234409332, "learning_rate": 4.7387707197059714e-05, "loss": 0.17, "step": 13014 }, { "epoch": 0.23213712410373488, "grad_norm": 0.24983623623847961, "learning_rate": 4.738701443800456e-05, "loss": 0.1968, "step": 13015 }, { "epoch": 0.23215496022544857, "grad_norm": 0.2762739956378937, "learning_rate": 4.738632159216962e-05, "loss": 0.188, "step": 13016 }, { "epoch": 0.23217279634716229, "grad_norm": 0.41999778151512146, "learning_rate": 4.738562865955756e-05, "loss": 0.1928, "step": 13017 }, { "epoch": 0.23219063246887597, "grad_norm": 0.34503859281539917, "learning_rate": 4.738493564017107e-05, "loss": 0.2027, "step": 13018 }, { "epoch": 0.23220846859058966, "grad_norm": 0.24494142830371857, "learning_rate": 4.7384242534012844e-05, "loss": 0.1644, "step": 13019 }, { "epoch": 0.23222630471230335, "grad_norm": 0.27364447712898254, "learning_rate": 4.738354934108556e-05, "loss": 0.2003, "step": 13020 }, { "epoch": 0.23224414083401704, "grad_norm": 0.3707612454891205, "learning_rate": 4.73828560613919e-05, "loss": 0.2234, "step": 13021 }, { "epoch": 0.23226197695573075, "grad_norm": 0.27612432837486267, "learning_rate": 4.738216269493457e-05, "loss": 0.213, "step": 13022 }, { "epoch": 0.23227981307744444, "grad_norm": 0.30849191546440125, "learning_rate": 4.738146924171624e-05, "loss": 0.1851, "step": 13023 }, { "epoch": 0.23229764919915813, "grad_norm": 0.21203866600990295, "learning_rate": 4.738077570173961e-05, "loss": 0.1864, "step": 13024 }, { "epoch": 0.23231548532087182, "grad_norm": 0.3886302709579468, "learning_rate": 4.738008207500736e-05, "loss": 0.2098, "step": 13025 }, { "epoch": 0.23233332144258553, "grad_norm": 0.22119519114494324, "learning_rate": 4.737938836152218e-05, "loss": 0.1629, "step": 13026 }, { "epoch": 0.23235115756429922, "grad_norm": 0.2599148750305176, "learning_rate": 4.737869456128676e-05, "loss": 0.1937, "step": 13027 }, { "epoch": 0.2323689936860129, "grad_norm": 0.5678700804710388, "learning_rate": 4.73780006743038e-05, "loss": 0.2066, "step": 13028 }, { "epoch": 0.2323868298077266, "grad_norm": 0.29183149337768555, "learning_rate": 4.737730670057597e-05, "loss": 0.2033, "step": 13029 }, { "epoch": 0.2324046659294403, "grad_norm": 0.31361421942710876, "learning_rate": 4.737661264010598e-05, "loss": 0.2253, "step": 13030 }, { "epoch": 0.232422502051154, "grad_norm": 0.42441412806510925, "learning_rate": 4.7375918492896506e-05, "loss": 0.1599, "step": 13031 }, { "epoch": 0.2324403381728677, "grad_norm": 0.29404038190841675, "learning_rate": 4.737522425895024e-05, "loss": 0.2201, "step": 13032 }, { "epoch": 0.23245817429458138, "grad_norm": 0.2951338291168213, "learning_rate": 4.7374529938269886e-05, "loss": 0.2216, "step": 13033 }, { "epoch": 0.2324760104162951, "grad_norm": 0.34812551736831665, "learning_rate": 4.737383553085811e-05, "loss": 0.1922, "step": 13034 }, { "epoch": 0.23249384653800878, "grad_norm": 0.26205068826675415, "learning_rate": 4.737314103671763e-05, "loss": 0.1671, "step": 13035 }, { "epoch": 0.23251168265972247, "grad_norm": 0.2434355914592743, "learning_rate": 4.737244645585113e-05, "loss": 0.2038, "step": 13036 }, { "epoch": 0.23252951878143616, "grad_norm": 0.2926849126815796, "learning_rate": 4.73717517882613e-05, "loss": 0.2421, "step": 13037 }, { "epoch": 0.23254735490314987, "grad_norm": 0.2663050889968872, "learning_rate": 4.737105703395083e-05, "loss": 0.1713, "step": 13038 }, { "epoch": 0.23256519102486356, "grad_norm": 0.25067776441574097, "learning_rate": 4.737036219292241e-05, "loss": 0.1919, "step": 13039 }, { "epoch": 0.23258302714657725, "grad_norm": 0.2642282545566559, "learning_rate": 4.736966726517875e-05, "loss": 0.1491, "step": 13040 }, { "epoch": 0.23260086326829094, "grad_norm": 0.2530624568462372, "learning_rate": 4.7368972250722535e-05, "loss": 0.1728, "step": 13041 }, { "epoch": 0.23261869939000462, "grad_norm": 0.29371771216392517, "learning_rate": 4.736827714955645e-05, "loss": 0.2156, "step": 13042 }, { "epoch": 0.23263653551171834, "grad_norm": 0.23769794404506683, "learning_rate": 4.73675819616832e-05, "loss": 0.1477, "step": 13043 }, { "epoch": 0.23265437163343203, "grad_norm": 0.4349018633365631, "learning_rate": 4.736688668710548e-05, "loss": 0.2286, "step": 13044 }, { "epoch": 0.23267220775514572, "grad_norm": 0.3502461314201355, "learning_rate": 4.736619132582598e-05, "loss": 0.194, "step": 13045 }, { "epoch": 0.2326900438768594, "grad_norm": 0.34425291419029236, "learning_rate": 4.7365495877847395e-05, "loss": 0.1814, "step": 13046 }, { "epoch": 0.23270787999857312, "grad_norm": 0.26752737164497375, "learning_rate": 4.736480034317242e-05, "loss": 0.1916, "step": 13047 }, { "epoch": 0.2327257161202868, "grad_norm": 0.22934582829475403, "learning_rate": 4.736410472180376e-05, "loss": 0.18, "step": 13048 }, { "epoch": 0.2327435522420005, "grad_norm": 0.2358386367559433, "learning_rate": 4.7363409013744105e-05, "loss": 0.1748, "step": 13049 }, { "epoch": 0.23276138836371418, "grad_norm": 0.33683323860168457, "learning_rate": 4.736271321899615e-05, "loss": 0.2094, "step": 13050 }, { "epoch": 0.2327792244854279, "grad_norm": 0.36347466707229614, "learning_rate": 4.73620173375626e-05, "loss": 0.2289, "step": 13051 }, { "epoch": 0.2327970606071416, "grad_norm": 0.3697187602519989, "learning_rate": 4.7361321369446147e-05, "loss": 0.2265, "step": 13052 }, { "epoch": 0.23281489672885528, "grad_norm": 0.45988330245018005, "learning_rate": 4.7360625314649474e-05, "loss": 0.219, "step": 13053 }, { "epoch": 0.23283273285056896, "grad_norm": 0.25618958473205566, "learning_rate": 4.735992917317531e-05, "loss": 0.142, "step": 13054 }, { "epoch": 0.23285056897228268, "grad_norm": 0.23562154173851013, "learning_rate": 4.735923294502633e-05, "loss": 0.1655, "step": 13055 }, { "epoch": 0.23286840509399637, "grad_norm": 0.25488919019699097, "learning_rate": 4.7358536630205255e-05, "loss": 0.1917, "step": 13056 }, { "epoch": 0.23288624121571005, "grad_norm": 0.2664492726325989, "learning_rate": 4.735784022871476e-05, "loss": 0.2198, "step": 13057 }, { "epoch": 0.23290407733742374, "grad_norm": 0.4016435444355011, "learning_rate": 4.735714374055755e-05, "loss": 0.1594, "step": 13058 }, { "epoch": 0.23292191345913746, "grad_norm": 0.19750067591667175, "learning_rate": 4.735644716573633e-05, "loss": 0.1749, "step": 13059 }, { "epoch": 0.23293974958085115, "grad_norm": 0.24430808424949646, "learning_rate": 4.73557505042538e-05, "loss": 0.231, "step": 13060 }, { "epoch": 0.23295758570256483, "grad_norm": 0.319717139005661, "learning_rate": 4.735505375611266e-05, "loss": 0.2144, "step": 13061 }, { "epoch": 0.23297542182427852, "grad_norm": 0.30825549364089966, "learning_rate": 4.735435692131561e-05, "loss": 0.1976, "step": 13062 }, { "epoch": 0.2329932579459922, "grad_norm": 0.31341513991355896, "learning_rate": 4.735365999986535e-05, "loss": 0.1904, "step": 13063 }, { "epoch": 0.23301109406770593, "grad_norm": 0.32323968410491943, "learning_rate": 4.735296299176459e-05, "loss": 0.2164, "step": 13064 }, { "epoch": 0.23302893018941961, "grad_norm": 0.2604954242706299, "learning_rate": 4.735226589701602e-05, "loss": 0.2051, "step": 13065 }, { "epoch": 0.2330467663111333, "grad_norm": 0.23131580650806427, "learning_rate": 4.7351568715622347e-05, "loss": 0.2007, "step": 13066 }, { "epoch": 0.233064602432847, "grad_norm": 0.23712332546710968, "learning_rate": 4.735087144758628e-05, "loss": 0.1675, "step": 13067 }, { "epoch": 0.2330824385545607, "grad_norm": 0.3033466935157776, "learning_rate": 4.7350174092910504e-05, "loss": 0.2001, "step": 13068 }, { "epoch": 0.2331002746762744, "grad_norm": 0.3434779644012451, "learning_rate": 4.734947665159774e-05, "loss": 0.2133, "step": 13069 }, { "epoch": 0.23311811079798808, "grad_norm": 0.2736477255821228, "learning_rate": 4.734877912365069e-05, "loss": 0.198, "step": 13070 }, { "epoch": 0.23313594691970177, "grad_norm": 0.3114743232727051, "learning_rate": 4.734808150907204e-05, "loss": 0.2444, "step": 13071 }, { "epoch": 0.23315378304141549, "grad_norm": 0.24049529433250427, "learning_rate": 4.734738380786452e-05, "loss": 0.1613, "step": 13072 }, { "epoch": 0.23317161916312917, "grad_norm": 0.20932382345199585, "learning_rate": 4.734668602003082e-05, "loss": 0.1679, "step": 13073 }, { "epoch": 0.23318945528484286, "grad_norm": 0.22552795708179474, "learning_rate": 4.734598814557364e-05, "loss": 0.1074, "step": 13074 }, { "epoch": 0.23320729140655655, "grad_norm": 0.3333311676979065, "learning_rate": 4.73452901844957e-05, "loss": 0.2534, "step": 13075 }, { "epoch": 0.23322512752827027, "grad_norm": 0.18414105474948883, "learning_rate": 4.7344592136799696e-05, "loss": 0.1579, "step": 13076 }, { "epoch": 0.23324296364998395, "grad_norm": 0.2633228302001953, "learning_rate": 4.734389400248833e-05, "loss": 0.1584, "step": 13077 }, { "epoch": 0.23326079977169764, "grad_norm": 0.2603215277194977, "learning_rate": 4.734319578156431e-05, "loss": 0.203, "step": 13078 }, { "epoch": 0.23327863589341133, "grad_norm": 0.23938825726509094, "learning_rate": 4.7342497474030355e-05, "loss": 0.202, "step": 13079 }, { "epoch": 0.23329647201512504, "grad_norm": 0.3025229275226593, "learning_rate": 4.734179907988916e-05, "loss": 0.2084, "step": 13080 }, { "epoch": 0.23331430813683873, "grad_norm": 0.2092043161392212, "learning_rate": 4.7341100599143436e-05, "loss": 0.1863, "step": 13081 }, { "epoch": 0.23333214425855242, "grad_norm": 0.25694260001182556, "learning_rate": 4.7340402031795886e-05, "loss": 0.1711, "step": 13082 }, { "epoch": 0.2333499803802661, "grad_norm": 0.23915836215019226, "learning_rate": 4.733970337784922e-05, "loss": 0.1939, "step": 13083 }, { "epoch": 0.2333678165019798, "grad_norm": 0.4468158185482025, "learning_rate": 4.733900463730616e-05, "loss": 0.2067, "step": 13084 }, { "epoch": 0.2333856526236935, "grad_norm": 0.7035412192344666, "learning_rate": 4.733830581016939e-05, "loss": 0.1528, "step": 13085 }, { "epoch": 0.2334034887454072, "grad_norm": 0.2828312814235687, "learning_rate": 4.733760689644164e-05, "loss": 0.1913, "step": 13086 }, { "epoch": 0.2334213248671209, "grad_norm": 0.3584742546081543, "learning_rate": 4.7336907896125605e-05, "loss": 0.1756, "step": 13087 }, { "epoch": 0.23343916098883458, "grad_norm": 0.3478483557701111, "learning_rate": 4.7336208809224e-05, "loss": 0.1625, "step": 13088 }, { "epoch": 0.2334569971105483, "grad_norm": 0.2671332359313965, "learning_rate": 4.733550963573954e-05, "loss": 0.1842, "step": 13089 }, { "epoch": 0.23347483323226198, "grad_norm": 0.2902505695819855, "learning_rate": 4.733481037567492e-05, "loss": 0.2057, "step": 13090 }, { "epoch": 0.23349266935397567, "grad_norm": 0.4179877042770386, "learning_rate": 4.733411102903287e-05, "loss": 0.1954, "step": 13091 }, { "epoch": 0.23351050547568936, "grad_norm": 0.3280573785305023, "learning_rate": 4.7333411595816094e-05, "loss": 0.2002, "step": 13092 }, { "epoch": 0.23352834159740307, "grad_norm": 0.33432939648628235, "learning_rate": 4.733271207602729e-05, "loss": 0.2547, "step": 13093 }, { "epoch": 0.23354617771911676, "grad_norm": 0.3041699230670929, "learning_rate": 4.733201246966919e-05, "loss": 0.1653, "step": 13094 }, { "epoch": 0.23356401384083045, "grad_norm": 0.3179178535938263, "learning_rate": 4.73313127767445e-05, "loss": 0.2096, "step": 13095 }, { "epoch": 0.23358184996254414, "grad_norm": 0.27879855036735535, "learning_rate": 4.733061299725591e-05, "loss": 0.2239, "step": 13096 }, { "epoch": 0.23359968608425785, "grad_norm": 0.6498161554336548, "learning_rate": 4.7329913131206174e-05, "loss": 0.2571, "step": 13097 }, { "epoch": 0.23361752220597154, "grad_norm": 0.32810789346694946, "learning_rate": 4.7329213178597964e-05, "loss": 0.2375, "step": 13098 }, { "epoch": 0.23363535832768523, "grad_norm": 0.28718990087509155, "learning_rate": 4.732851313943402e-05, "loss": 0.2159, "step": 13099 }, { "epoch": 0.23365319444939892, "grad_norm": 0.3003918528556824, "learning_rate": 4.732781301371705e-05, "loss": 0.1594, "step": 13100 }, { "epoch": 0.2336710305711126, "grad_norm": 0.22157765924930573, "learning_rate": 4.7327112801449756e-05, "loss": 0.1874, "step": 13101 }, { "epoch": 0.23368886669282632, "grad_norm": 0.27124059200286865, "learning_rate": 4.732641250263487e-05, "loss": 0.1905, "step": 13102 }, { "epoch": 0.23370670281454, "grad_norm": 0.3923420011997223, "learning_rate": 4.732571211727509e-05, "loss": 0.1689, "step": 13103 }, { "epoch": 0.2337245389362537, "grad_norm": 0.22294719517230988, "learning_rate": 4.732501164537314e-05, "loss": 0.2173, "step": 13104 }, { "epoch": 0.23374237505796738, "grad_norm": 0.25595393776893616, "learning_rate": 4.732431108693174e-05, "loss": 0.1879, "step": 13105 }, { "epoch": 0.2337602111796811, "grad_norm": 0.3274551033973694, "learning_rate": 4.73236104419536e-05, "loss": 0.2778, "step": 13106 }, { "epoch": 0.2337780473013948, "grad_norm": 0.3030029535293579, "learning_rate": 4.732290971044143e-05, "loss": 0.1698, "step": 13107 }, { "epoch": 0.23379588342310847, "grad_norm": 0.21548080444335938, "learning_rate": 4.732220889239795e-05, "loss": 0.1683, "step": 13108 }, { "epoch": 0.23381371954482216, "grad_norm": 0.2954399883747101, "learning_rate": 4.7321507987825886e-05, "loss": 0.1847, "step": 13109 }, { "epoch": 0.23383155566653588, "grad_norm": 0.25482800602912903, "learning_rate": 4.732080699672794e-05, "loss": 0.2194, "step": 13110 }, { "epoch": 0.23384939178824957, "grad_norm": 0.23453563451766968, "learning_rate": 4.732010591910685e-05, "loss": 0.1323, "step": 13111 }, { "epoch": 0.23386722790996325, "grad_norm": 0.28474459052085876, "learning_rate": 4.73194047549653e-05, "loss": 0.2208, "step": 13112 }, { "epoch": 0.23388506403167694, "grad_norm": 0.39361241459846497, "learning_rate": 4.731870350430604e-05, "loss": 0.1788, "step": 13113 }, { "epoch": 0.23390290015339066, "grad_norm": 0.25086504220962524, "learning_rate": 4.731800216713178e-05, "loss": 0.1546, "step": 13114 }, { "epoch": 0.23392073627510435, "grad_norm": 0.20546072721481323, "learning_rate": 4.7317300743445224e-05, "loss": 0.1503, "step": 13115 }, { "epoch": 0.23393857239681803, "grad_norm": 0.20666436851024628, "learning_rate": 4.7316599233249114e-05, "loss": 0.1756, "step": 13116 }, { "epoch": 0.23395640851853172, "grad_norm": 0.27438873052597046, "learning_rate": 4.731589763654615e-05, "loss": 0.2358, "step": 13117 }, { "epoch": 0.23397424464024544, "grad_norm": 0.44955042004585266, "learning_rate": 4.731519595333906e-05, "loss": 0.1772, "step": 13118 }, { "epoch": 0.23399208076195913, "grad_norm": 0.25977659225463867, "learning_rate": 4.731449418363057e-05, "loss": 0.2078, "step": 13119 }, { "epoch": 0.2340099168836728, "grad_norm": 0.2971154749393463, "learning_rate": 4.731379232742339e-05, "loss": 0.1657, "step": 13120 }, { "epoch": 0.2340277530053865, "grad_norm": 0.2584383189678192, "learning_rate": 4.7313090384720236e-05, "loss": 0.1963, "step": 13121 }, { "epoch": 0.2340455891271002, "grad_norm": 0.25320371985435486, "learning_rate": 4.731238835552384e-05, "loss": 0.1343, "step": 13122 }, { "epoch": 0.2340634252488139, "grad_norm": 0.41943204402923584, "learning_rate": 4.731168623983693e-05, "loss": 0.2122, "step": 13123 }, { "epoch": 0.2340812613705276, "grad_norm": 0.24102890491485596, "learning_rate": 4.7310984037662206e-05, "loss": 0.2281, "step": 13124 }, { "epoch": 0.23409909749224128, "grad_norm": 0.34076613187789917, "learning_rate": 4.731028174900242e-05, "loss": 0.216, "step": 13125 }, { "epoch": 0.23411693361395497, "grad_norm": 0.2129395753145218, "learning_rate": 4.730957937386026e-05, "loss": 0.1598, "step": 13126 }, { "epoch": 0.23413476973566869, "grad_norm": 0.226332888007164, "learning_rate": 4.730887691223846e-05, "loss": 0.199, "step": 13127 }, { "epoch": 0.23415260585738237, "grad_norm": 0.26704511046409607, "learning_rate": 4.730817436413976e-05, "loss": 0.2175, "step": 13128 }, { "epoch": 0.23417044197909606, "grad_norm": 0.26819998025894165, "learning_rate": 4.730747172956687e-05, "loss": 0.1581, "step": 13129 }, { "epoch": 0.23418827810080975, "grad_norm": 0.42599111795425415, "learning_rate": 4.7306769008522514e-05, "loss": 0.1799, "step": 13130 }, { "epoch": 0.23420611422252346, "grad_norm": 0.3520090878009796, "learning_rate": 4.7306066201009414e-05, "loss": 0.2521, "step": 13131 }, { "epoch": 0.23422395034423715, "grad_norm": 0.22327473759651184, "learning_rate": 4.7305363307030295e-05, "loss": 0.1989, "step": 13132 }, { "epoch": 0.23424178646595084, "grad_norm": 0.27530860900878906, "learning_rate": 4.730466032658788e-05, "loss": 0.1714, "step": 13133 }, { "epoch": 0.23425962258766453, "grad_norm": 0.22513854503631592, "learning_rate": 4.730395725968491e-05, "loss": 0.1487, "step": 13134 }, { "epoch": 0.23427745870937824, "grad_norm": 0.2687302827835083, "learning_rate": 4.730325410632409e-05, "loss": 0.1795, "step": 13135 }, { "epoch": 0.23429529483109193, "grad_norm": 0.5469068884849548, "learning_rate": 4.730255086650816e-05, "loss": 0.1836, "step": 13136 }, { "epoch": 0.23431313095280562, "grad_norm": 0.23095735907554626, "learning_rate": 4.730184754023984e-05, "loss": 0.1678, "step": 13137 }, { "epoch": 0.2343309670745193, "grad_norm": 0.31276312470436096, "learning_rate": 4.730114412752185e-05, "loss": 0.2264, "step": 13138 }, { "epoch": 0.23434880319623302, "grad_norm": 0.32502320408821106, "learning_rate": 4.7300440628356926e-05, "loss": 0.2478, "step": 13139 }, { "epoch": 0.2343666393179467, "grad_norm": 0.34323054552078247, "learning_rate": 4.729973704274779e-05, "loss": 0.1917, "step": 13140 }, { "epoch": 0.2343844754396604, "grad_norm": 0.30752578377723694, "learning_rate": 4.729903337069717e-05, "loss": 0.1974, "step": 13141 }, { "epoch": 0.2344023115613741, "grad_norm": 0.24402940273284912, "learning_rate": 4.72983296122078e-05, "loss": 0.1883, "step": 13142 }, { "epoch": 0.23442014768308778, "grad_norm": 0.2565370500087738, "learning_rate": 4.72976257672824e-05, "loss": 0.1759, "step": 13143 }, { "epoch": 0.2344379838048015, "grad_norm": 0.2792462110519409, "learning_rate": 4.72969218359237e-05, "loss": 0.1586, "step": 13144 }, { "epoch": 0.23445581992651518, "grad_norm": 0.23141470551490784, "learning_rate": 4.729621781813443e-05, "loss": 0.1625, "step": 13145 }, { "epoch": 0.23447365604822887, "grad_norm": 0.3027353882789612, "learning_rate": 4.729551371391732e-05, "loss": 0.1638, "step": 13146 }, { "epoch": 0.23449149216994256, "grad_norm": 0.3027116358280182, "learning_rate": 4.72948095232751e-05, "loss": 0.2218, "step": 13147 }, { "epoch": 0.23450932829165627, "grad_norm": 0.331464946269989, "learning_rate": 4.7294105246210494e-05, "loss": 0.2074, "step": 13148 }, { "epoch": 0.23452716441336996, "grad_norm": 0.28187480568885803, "learning_rate": 4.7293400882726235e-05, "loss": 0.2212, "step": 13149 }, { "epoch": 0.23454500053508365, "grad_norm": 0.20477347075939178, "learning_rate": 4.729269643282506e-05, "loss": 0.1635, "step": 13150 }, { "epoch": 0.23456283665679734, "grad_norm": 0.21685895323753357, "learning_rate": 4.729199189650969e-05, "loss": 0.1618, "step": 13151 }, { "epoch": 0.23458067277851105, "grad_norm": 0.30709367990493774, "learning_rate": 4.7291287273782865e-05, "loss": 0.19, "step": 13152 }, { "epoch": 0.23459850890022474, "grad_norm": 0.3493216037750244, "learning_rate": 4.729058256464731e-05, "loss": 0.1984, "step": 13153 }, { "epoch": 0.23461634502193843, "grad_norm": 0.27642378211021423, "learning_rate": 4.728987776910575e-05, "loss": 0.1888, "step": 13154 }, { "epoch": 0.23463418114365212, "grad_norm": 0.3807377219200134, "learning_rate": 4.7289172887160934e-05, "loss": 0.1897, "step": 13155 }, { "epoch": 0.23465201726536583, "grad_norm": 0.327101469039917, "learning_rate": 4.7288467918815584e-05, "loss": 0.2586, "step": 13156 }, { "epoch": 0.23466985338707952, "grad_norm": 0.3536888062953949, "learning_rate": 4.7287762864072425e-05, "loss": 0.1901, "step": 13157 }, { "epoch": 0.2346876895087932, "grad_norm": 0.32119861245155334, "learning_rate": 4.728705772293421e-05, "loss": 0.2284, "step": 13158 }, { "epoch": 0.2347055256305069, "grad_norm": 0.24259814620018005, "learning_rate": 4.7286352495403656e-05, "loss": 0.2237, "step": 13159 }, { "epoch": 0.2347233617522206, "grad_norm": 0.24325969815254211, "learning_rate": 4.7285647181483506e-05, "loss": 0.1606, "step": 13160 }, { "epoch": 0.2347411978739343, "grad_norm": 0.2513333559036255, "learning_rate": 4.7284941781176485e-05, "loss": 0.2097, "step": 13161 }, { "epoch": 0.234759033995648, "grad_norm": 0.5247678756713867, "learning_rate": 4.728423629448534e-05, "loss": 0.229, "step": 13162 }, { "epoch": 0.23477687011736167, "grad_norm": 0.975025475025177, "learning_rate": 4.7283530721412795e-05, "loss": 0.1968, "step": 13163 }, { "epoch": 0.23479470623907536, "grad_norm": 0.22964558005332947, "learning_rate": 4.728282506196159e-05, "loss": 0.204, "step": 13164 }, { "epoch": 0.23481254236078908, "grad_norm": 0.29146355390548706, "learning_rate": 4.728211931613445e-05, "loss": 0.1612, "step": 13165 }, { "epoch": 0.23483037848250277, "grad_norm": 0.3188380300998688, "learning_rate": 4.7281413483934134e-05, "loss": 0.1968, "step": 13166 }, { "epoch": 0.23484821460421645, "grad_norm": 0.38089612126350403, "learning_rate": 4.728070756536335e-05, "loss": 0.173, "step": 13167 }, { "epoch": 0.23486605072593014, "grad_norm": 0.3769915699958801, "learning_rate": 4.728000156042486e-05, "loss": 0.2067, "step": 13168 }, { "epoch": 0.23488388684764386, "grad_norm": 0.2790173292160034, "learning_rate": 4.727929546912138e-05, "loss": 0.2152, "step": 13169 }, { "epoch": 0.23490172296935755, "grad_norm": 0.2992670238018036, "learning_rate": 4.7278589291455656e-05, "loss": 0.1843, "step": 13170 }, { "epoch": 0.23491955909107123, "grad_norm": 0.2138238102197647, "learning_rate": 4.727788302743043e-05, "loss": 0.1673, "step": 13171 }, { "epoch": 0.23493739521278492, "grad_norm": 0.3130911588668823, "learning_rate": 4.727717667704843e-05, "loss": 0.2143, "step": 13172 }, { "epoch": 0.23495523133449864, "grad_norm": 0.2582393288612366, "learning_rate": 4.72764702403124e-05, "loss": 0.1538, "step": 13173 }, { "epoch": 0.23497306745621233, "grad_norm": 0.27510881423950195, "learning_rate": 4.727576371722508e-05, "loss": 0.1735, "step": 13174 }, { "epoch": 0.234990903577926, "grad_norm": 0.40130189061164856, "learning_rate": 4.7275057107789205e-05, "loss": 0.3251, "step": 13175 }, { "epoch": 0.2350087396996397, "grad_norm": 0.4659244120121002, "learning_rate": 4.727435041200752e-05, "loss": 0.2313, "step": 13176 }, { "epoch": 0.23502657582135342, "grad_norm": 0.31697192788124084, "learning_rate": 4.727364362988275e-05, "loss": 0.2322, "step": 13177 }, { "epoch": 0.2350444119430671, "grad_norm": 0.24357974529266357, "learning_rate": 4.7272936761417647e-05, "loss": 0.1556, "step": 13178 }, { "epoch": 0.2350622480647808, "grad_norm": 0.31785544753074646, "learning_rate": 4.727222980661495e-05, "loss": 0.2079, "step": 13179 }, { "epoch": 0.23508008418649448, "grad_norm": 0.3436828851699829, "learning_rate": 4.727152276547739e-05, "loss": 0.2003, "step": 13180 }, { "epoch": 0.2350979203082082, "grad_norm": 0.24924665689468384, "learning_rate": 4.727081563800773e-05, "loss": 0.2049, "step": 13181 }, { "epoch": 0.23511575642992188, "grad_norm": 0.27374500036239624, "learning_rate": 4.727010842420869e-05, "loss": 0.144, "step": 13182 }, { "epoch": 0.23513359255163557, "grad_norm": 0.2758798897266388, "learning_rate": 4.726940112408301e-05, "loss": 0.187, "step": 13183 }, { "epoch": 0.23515142867334926, "grad_norm": 0.34047555923461914, "learning_rate": 4.726869373763345e-05, "loss": 0.189, "step": 13184 }, { "epoch": 0.23516926479506295, "grad_norm": 0.2357666790485382, "learning_rate": 4.726798626486274e-05, "loss": 0.1756, "step": 13185 }, { "epoch": 0.23518710091677666, "grad_norm": 0.3062521517276764, "learning_rate": 4.726727870577362e-05, "loss": 0.2419, "step": 13186 }, { "epoch": 0.23520493703849035, "grad_norm": 0.3334413766860962, "learning_rate": 4.7266571060368844e-05, "loss": 0.2351, "step": 13187 }, { "epoch": 0.23522277316020404, "grad_norm": 0.20553076267242432, "learning_rate": 4.7265863328651145e-05, "loss": 0.1504, "step": 13188 }, { "epoch": 0.23524060928191773, "grad_norm": 0.2524462640285492, "learning_rate": 4.726515551062327e-05, "loss": 0.1784, "step": 13189 }, { "epoch": 0.23525844540363144, "grad_norm": 0.2631435692310333, "learning_rate": 4.726444760628795e-05, "loss": 0.1875, "step": 13190 }, { "epoch": 0.23527628152534513, "grad_norm": 0.27673405408859253, "learning_rate": 4.726373961564796e-05, "loss": 0.184, "step": 13191 }, { "epoch": 0.23529411764705882, "grad_norm": 0.24376653134822845, "learning_rate": 4.726303153870602e-05, "loss": 0.1721, "step": 13192 }, { "epoch": 0.2353119537687725, "grad_norm": 0.340243935585022, "learning_rate": 4.726232337546487e-05, "loss": 0.198, "step": 13193 }, { "epoch": 0.23532978989048622, "grad_norm": 0.3555149435997009, "learning_rate": 4.726161512592727e-05, "loss": 0.2535, "step": 13194 }, { "epoch": 0.2353476260121999, "grad_norm": 0.22092889249324799, "learning_rate": 4.726090679009597e-05, "loss": 0.1779, "step": 13195 }, { "epoch": 0.2353654621339136, "grad_norm": 0.4123993217945099, "learning_rate": 4.72601983679737e-05, "loss": 0.2064, "step": 13196 }, { "epoch": 0.2353832982556273, "grad_norm": 0.25728389620780945, "learning_rate": 4.725948985956321e-05, "loss": 0.1713, "step": 13197 }, { "epoch": 0.235401134377341, "grad_norm": 0.2735288739204407, "learning_rate": 4.7258781264867254e-05, "loss": 0.2049, "step": 13198 }, { "epoch": 0.2354189704990547, "grad_norm": 0.22504852712154388, "learning_rate": 4.7258072583888566e-05, "loss": 0.1938, "step": 13199 }, { "epoch": 0.23543680662076838, "grad_norm": 0.25486525893211365, "learning_rate": 4.7257363816629904e-05, "loss": 0.1789, "step": 13200 }, { "epoch": 0.23545464274248207, "grad_norm": 0.25982269644737244, "learning_rate": 4.7256654963094024e-05, "loss": 0.1896, "step": 13201 }, { "epoch": 0.23547247886419576, "grad_norm": 0.2815377116203308, "learning_rate": 4.725594602328365e-05, "loss": 0.1669, "step": 13202 }, { "epoch": 0.23549031498590947, "grad_norm": 0.31810420751571655, "learning_rate": 4.725523699720155e-05, "loss": 0.2062, "step": 13203 }, { "epoch": 0.23550815110762316, "grad_norm": 0.24188999831676483, "learning_rate": 4.725452788485046e-05, "loss": 0.1543, "step": 13204 }, { "epoch": 0.23552598722933685, "grad_norm": 0.321250319480896, "learning_rate": 4.725381868623313e-05, "loss": 0.1962, "step": 13205 }, { "epoch": 0.23554382335105054, "grad_norm": 0.31936588883399963, "learning_rate": 4.725310940135231e-05, "loss": 0.2321, "step": 13206 }, { "epoch": 0.23556165947276425, "grad_norm": 0.2989104688167572, "learning_rate": 4.725240003021077e-05, "loss": 0.1666, "step": 13207 }, { "epoch": 0.23557949559447794, "grad_norm": 0.27593496441841125, "learning_rate": 4.725169057281123e-05, "loss": 0.1981, "step": 13208 }, { "epoch": 0.23559733171619163, "grad_norm": 0.22347703576087952, "learning_rate": 4.7250981029156446e-05, "loss": 0.243, "step": 13209 }, { "epoch": 0.23561516783790531, "grad_norm": 0.2823682129383087, "learning_rate": 4.725027139924918e-05, "loss": 0.2232, "step": 13210 }, { "epoch": 0.23563300395961903, "grad_norm": 0.3040268123149872, "learning_rate": 4.724956168309218e-05, "loss": 0.217, "step": 13211 }, { "epoch": 0.23565084008133272, "grad_norm": 0.2807352840900421, "learning_rate": 4.724885188068819e-05, "loss": 0.2244, "step": 13212 }, { "epoch": 0.2356686762030464, "grad_norm": 0.28299254179000854, "learning_rate": 4.7248141992039965e-05, "loss": 0.2017, "step": 13213 }, { "epoch": 0.2356865123247601, "grad_norm": 0.2473675012588501, "learning_rate": 4.724743201715026e-05, "loss": 0.1866, "step": 13214 }, { "epoch": 0.2357043484464738, "grad_norm": 0.34206148982048035, "learning_rate": 4.724672195602182e-05, "loss": 0.2068, "step": 13215 }, { "epoch": 0.2357221845681875, "grad_norm": 0.2382190227508545, "learning_rate": 4.7246011808657406e-05, "loss": 0.1943, "step": 13216 }, { "epoch": 0.2357400206899012, "grad_norm": 0.3492027819156647, "learning_rate": 4.724530157505978e-05, "loss": 0.2146, "step": 13217 }, { "epoch": 0.23575785681161487, "grad_norm": 0.23248231410980225, "learning_rate": 4.724459125523166e-05, "loss": 0.1774, "step": 13218 }, { "epoch": 0.2357756929333286, "grad_norm": 0.3013003468513489, "learning_rate": 4.724388084917583e-05, "loss": 0.1592, "step": 13219 }, { "epoch": 0.23579352905504228, "grad_norm": 0.36341676115989685, "learning_rate": 4.7243170356895035e-05, "loss": 0.1935, "step": 13220 }, { "epoch": 0.23581136517675597, "grad_norm": 0.218472421169281, "learning_rate": 4.724245977839202e-05, "loss": 0.1904, "step": 13221 }, { "epoch": 0.23582920129846965, "grad_norm": 0.4290112555027008, "learning_rate": 4.7241749113669564e-05, "loss": 0.1722, "step": 13222 }, { "epoch": 0.23584703742018334, "grad_norm": 0.23001818358898163, "learning_rate": 4.72410383627304e-05, "loss": 0.1774, "step": 13223 }, { "epoch": 0.23586487354189706, "grad_norm": 0.31045591831207275, "learning_rate": 4.7240327525577286e-05, "loss": 0.1402, "step": 13224 }, { "epoch": 0.23588270966361075, "grad_norm": 0.23984810709953308, "learning_rate": 4.7239616602212986e-05, "loss": 0.1876, "step": 13225 }, { "epoch": 0.23590054578532443, "grad_norm": 0.2721925973892212, "learning_rate": 4.723890559264025e-05, "loss": 0.2085, "step": 13226 }, { "epoch": 0.23591838190703812, "grad_norm": 0.25344839692115784, "learning_rate": 4.723819449686183e-05, "loss": 0.1613, "step": 13227 }, { "epoch": 0.23593621802875184, "grad_norm": 0.42632994055747986, "learning_rate": 4.723748331488049e-05, "loss": 0.2236, "step": 13228 }, { "epoch": 0.23595405415046553, "grad_norm": 0.24490170180797577, "learning_rate": 4.723677204669899e-05, "loss": 0.1702, "step": 13229 }, { "epoch": 0.2359718902721792, "grad_norm": 0.36458730697631836, "learning_rate": 4.723606069232007e-05, "loss": 0.1747, "step": 13230 }, { "epoch": 0.2359897263938929, "grad_norm": 0.25357580184936523, "learning_rate": 4.7235349251746505e-05, "loss": 0.2037, "step": 13231 }, { "epoch": 0.23600756251560662, "grad_norm": 0.23994719982147217, "learning_rate": 4.7234637724981054e-05, "loss": 0.1721, "step": 13232 }, { "epoch": 0.2360253986373203, "grad_norm": 0.26088428497314453, "learning_rate": 4.723392611202646e-05, "loss": 0.1838, "step": 13233 }, { "epoch": 0.236043234759034, "grad_norm": 0.3496204912662506, "learning_rate": 4.7233214412885484e-05, "loss": 0.2484, "step": 13234 }, { "epoch": 0.23606107088074768, "grad_norm": 0.33100613951683044, "learning_rate": 4.72325026275609e-05, "loss": 0.1766, "step": 13235 }, { "epoch": 0.2360789070024614, "grad_norm": 0.20524318516254425, "learning_rate": 4.723179075605545e-05, "loss": 0.1552, "step": 13236 }, { "epoch": 0.23609674312417508, "grad_norm": 0.24219731986522675, "learning_rate": 4.7231078798371896e-05, "loss": 0.1978, "step": 13237 }, { "epoch": 0.23611457924588877, "grad_norm": 0.4554736614227295, "learning_rate": 4.723036675451301e-05, "loss": 0.2012, "step": 13238 }, { "epoch": 0.23613241536760246, "grad_norm": 0.30054935812950134, "learning_rate": 4.7229654624481546e-05, "loss": 0.1947, "step": 13239 }, { "epoch": 0.23615025148931618, "grad_norm": 0.3623890280723572, "learning_rate": 4.722894240828026e-05, "loss": 0.1769, "step": 13240 }, { "epoch": 0.23616808761102986, "grad_norm": 0.39175453782081604, "learning_rate": 4.722823010591192e-05, "loss": 0.1848, "step": 13241 }, { "epoch": 0.23618592373274355, "grad_norm": 0.27913540601730347, "learning_rate": 4.7227517717379275e-05, "loss": 0.1751, "step": 13242 }, { "epoch": 0.23620375985445724, "grad_norm": 0.2484302967786789, "learning_rate": 4.72268052426851e-05, "loss": 0.1674, "step": 13243 }, { "epoch": 0.23622159597617093, "grad_norm": 0.2830994129180908, "learning_rate": 4.7226092681832144e-05, "loss": 0.2658, "step": 13244 }, { "epoch": 0.23623943209788464, "grad_norm": 0.310022234916687, "learning_rate": 4.722538003482318e-05, "loss": 0.1573, "step": 13245 }, { "epoch": 0.23625726821959833, "grad_norm": 0.28544631600379944, "learning_rate": 4.7224667301660964e-05, "loss": 0.1661, "step": 13246 }, { "epoch": 0.23627510434131202, "grad_norm": 0.3461221158504486, "learning_rate": 4.7223954482348266e-05, "loss": 0.182, "step": 13247 }, { "epoch": 0.2362929404630257, "grad_norm": 0.3091566860675812, "learning_rate": 4.7223241576887846e-05, "loss": 0.2046, "step": 13248 }, { "epoch": 0.23631077658473942, "grad_norm": 0.283374160528183, "learning_rate": 4.722252858528246e-05, "loss": 0.1829, "step": 13249 }, { "epoch": 0.2363286127064531, "grad_norm": 0.2626781165599823, "learning_rate": 4.722181550753488e-05, "loss": 0.2059, "step": 13250 }, { "epoch": 0.2363464488281668, "grad_norm": 0.2459656298160553, "learning_rate": 4.722110234364787e-05, "loss": 0.193, "step": 13251 }, { "epoch": 0.2363642849498805, "grad_norm": 0.3419763445854187, "learning_rate": 4.722038909362419e-05, "loss": 0.1987, "step": 13252 }, { "epoch": 0.2363821210715942, "grad_norm": 0.2317509651184082, "learning_rate": 4.721967575746661e-05, "loss": 0.1803, "step": 13253 }, { "epoch": 0.2363999571933079, "grad_norm": 0.2360445111989975, "learning_rate": 4.721896233517788e-05, "loss": 0.1838, "step": 13254 }, { "epoch": 0.23641779331502158, "grad_norm": 0.22321978211402893, "learning_rate": 4.72182488267608e-05, "loss": 0.1199, "step": 13255 }, { "epoch": 0.23643562943673527, "grad_norm": 0.2774013578891754, "learning_rate": 4.72175352322181e-05, "loss": 0.1518, "step": 13256 }, { "epoch": 0.23645346555844898, "grad_norm": 0.25381648540496826, "learning_rate": 4.721682155155256e-05, "loss": 0.1644, "step": 13257 }, { "epoch": 0.23647130168016267, "grad_norm": 0.3024599850177765, "learning_rate": 4.721610778476695e-05, "loss": 0.1874, "step": 13258 }, { "epoch": 0.23648913780187636, "grad_norm": 0.3168708384037018, "learning_rate": 4.7215393931864025e-05, "loss": 0.1743, "step": 13259 }, { "epoch": 0.23650697392359005, "grad_norm": 0.21824829280376434, "learning_rate": 4.721467999284657e-05, "loss": 0.1178, "step": 13260 }, { "epoch": 0.23652481004530376, "grad_norm": 0.3472624719142914, "learning_rate": 4.721396596771734e-05, "loss": 0.2007, "step": 13261 }, { "epoch": 0.23654264616701745, "grad_norm": 0.2955172657966614, "learning_rate": 4.72132518564791e-05, "loss": 0.1753, "step": 13262 }, { "epoch": 0.23656048228873114, "grad_norm": 0.2570713758468628, "learning_rate": 4.721253765913462e-05, "loss": 0.1423, "step": 13263 }, { "epoch": 0.23657831841044483, "grad_norm": 0.24804842472076416, "learning_rate": 4.7211823375686695e-05, "loss": 0.1539, "step": 13264 }, { "epoch": 0.23659615453215851, "grad_norm": 0.2627914547920227, "learning_rate": 4.721110900613805e-05, "loss": 0.1908, "step": 13265 }, { "epoch": 0.23661399065387223, "grad_norm": 0.25864657759666443, "learning_rate": 4.721039455049148e-05, "loss": 0.2274, "step": 13266 }, { "epoch": 0.23663182677558592, "grad_norm": 0.24527081847190857, "learning_rate": 4.7209680008749744e-05, "loss": 0.174, "step": 13267 }, { "epoch": 0.2366496628972996, "grad_norm": 0.2619577646255493, "learning_rate": 4.720896538091563e-05, "loss": 0.1532, "step": 13268 }, { "epoch": 0.2366674990190133, "grad_norm": 0.2521253526210785, "learning_rate": 4.720825066699189e-05, "loss": 0.219, "step": 13269 }, { "epoch": 0.236685335140727, "grad_norm": 0.2410418689250946, "learning_rate": 4.72075358669813e-05, "loss": 0.167, "step": 13270 }, { "epoch": 0.2367031712624407, "grad_norm": 0.39443933963775635, "learning_rate": 4.720682098088662e-05, "loss": 0.2411, "step": 13271 }, { "epoch": 0.23672100738415439, "grad_norm": 0.2510274648666382, "learning_rate": 4.720610600871065e-05, "loss": 0.1984, "step": 13272 }, { "epoch": 0.23673884350586807, "grad_norm": 0.23060636222362518, "learning_rate": 4.720539095045613e-05, "loss": 0.1753, "step": 13273 }, { "epoch": 0.2367566796275818, "grad_norm": 0.2736614942550659, "learning_rate": 4.7204675806125854e-05, "loss": 0.2107, "step": 13274 }, { "epoch": 0.23677451574929548, "grad_norm": 0.2579177916049957, "learning_rate": 4.720396057572258e-05, "loss": 0.2351, "step": 13275 }, { "epoch": 0.23679235187100917, "grad_norm": 0.21524077653884888, "learning_rate": 4.720324525924908e-05, "loss": 0.1999, "step": 13276 }, { "epoch": 0.23681018799272285, "grad_norm": 0.23854389786720276, "learning_rate": 4.7202529856708144e-05, "loss": 0.1631, "step": 13277 }, { "epoch": 0.23682802411443657, "grad_norm": 0.2408314198255539, "learning_rate": 4.720181436810253e-05, "loss": 0.1863, "step": 13278 }, { "epoch": 0.23684586023615026, "grad_norm": 0.2868008315563202, "learning_rate": 4.720109879343502e-05, "loss": 0.1997, "step": 13279 }, { "epoch": 0.23686369635786395, "grad_norm": 0.2984357476234436, "learning_rate": 4.7200383132708375e-05, "loss": 0.1433, "step": 13280 }, { "epoch": 0.23688153247957763, "grad_norm": 0.3249823749065399, "learning_rate": 4.7199667385925386e-05, "loss": 0.2213, "step": 13281 }, { "epoch": 0.23689936860129132, "grad_norm": 0.23707419633865356, "learning_rate": 4.719895155308881e-05, "loss": 0.1882, "step": 13282 }, { "epoch": 0.23691720472300504, "grad_norm": 0.35891905426979065, "learning_rate": 4.7198235634201425e-05, "loss": 0.2058, "step": 13283 }, { "epoch": 0.23693504084471873, "grad_norm": 0.4096406102180481, "learning_rate": 4.719751962926602e-05, "loss": 0.1501, "step": 13284 }, { "epoch": 0.2369528769664324, "grad_norm": 0.2608923614025116, "learning_rate": 4.719680353828537e-05, "loss": 0.2068, "step": 13285 }, { "epoch": 0.2369707130881461, "grad_norm": 0.35470229387283325, "learning_rate": 4.7196087361262233e-05, "loss": 0.1599, "step": 13286 }, { "epoch": 0.23698854920985982, "grad_norm": 0.3483262062072754, "learning_rate": 4.7195371098199395e-05, "loss": 0.2137, "step": 13287 }, { "epoch": 0.2370063853315735, "grad_norm": 0.3126085102558136, "learning_rate": 4.719465474909963e-05, "loss": 0.2526, "step": 13288 }, { "epoch": 0.2370242214532872, "grad_norm": 0.22941049933433533, "learning_rate": 4.7193938313965724e-05, "loss": 0.2025, "step": 13289 }, { "epoch": 0.23704205757500088, "grad_norm": 0.32364124059677124, "learning_rate": 4.719322179280045e-05, "loss": 0.2427, "step": 13290 }, { "epoch": 0.2370598936967146, "grad_norm": 0.3139931261539459, "learning_rate": 4.7192505185606575e-05, "loss": 0.1877, "step": 13291 }, { "epoch": 0.23707772981842828, "grad_norm": 0.20971724390983582, "learning_rate": 4.719178849238689e-05, "loss": 0.1841, "step": 13292 }, { "epoch": 0.23709556594014197, "grad_norm": 0.2499406784772873, "learning_rate": 4.719107171314416e-05, "loss": 0.2355, "step": 13293 }, { "epoch": 0.23711340206185566, "grad_norm": 0.259162038564682, "learning_rate": 4.719035484788119e-05, "loss": 0.1946, "step": 13294 }, { "epoch": 0.23713123818356938, "grad_norm": 0.2665821611881256, "learning_rate": 4.718963789660073e-05, "loss": 0.2125, "step": 13295 }, { "epoch": 0.23714907430528306, "grad_norm": 0.20773674547672272, "learning_rate": 4.7188920859305566e-05, "loss": 0.1398, "step": 13296 }, { "epoch": 0.23716691042699675, "grad_norm": 0.3031942546367645, "learning_rate": 4.718820373599848e-05, "loss": 0.1848, "step": 13297 }, { "epoch": 0.23718474654871044, "grad_norm": 0.4091244637966156, "learning_rate": 4.718748652668226e-05, "loss": 0.2595, "step": 13298 }, { "epoch": 0.23720258267042416, "grad_norm": 0.32161083817481995, "learning_rate": 4.7186769231359666e-05, "loss": 0.2169, "step": 13299 }, { "epoch": 0.23722041879213784, "grad_norm": 0.2657436728477478, "learning_rate": 4.71860518500335e-05, "loss": 0.2408, "step": 13300 }, { "epoch": 0.23723825491385153, "grad_norm": 0.2363685667514801, "learning_rate": 4.718533438270654e-05, "loss": 0.1973, "step": 13301 }, { "epoch": 0.23725609103556522, "grad_norm": 0.20735211670398712, "learning_rate": 4.718461682938155e-05, "loss": 0.1518, "step": 13302 }, { "epoch": 0.2372739271572789, "grad_norm": 0.45115360617637634, "learning_rate": 4.718389919006133e-05, "loss": 0.1951, "step": 13303 }, { "epoch": 0.23729176327899262, "grad_norm": 0.534774899482727, "learning_rate": 4.718318146474865e-05, "loss": 0.2071, "step": 13304 }, { "epoch": 0.2373095994007063, "grad_norm": 0.34899401664733887, "learning_rate": 4.71824636534463e-05, "loss": 0.171, "step": 13305 }, { "epoch": 0.23732743552242, "grad_norm": 0.2766760587692261, "learning_rate": 4.718174575615706e-05, "loss": 0.2194, "step": 13306 }, { "epoch": 0.2373452716441337, "grad_norm": 0.3018389344215393, "learning_rate": 4.718102777288371e-05, "loss": 0.171, "step": 13307 }, { "epoch": 0.2373631077658474, "grad_norm": 0.38258108496665955, "learning_rate": 4.718030970362904e-05, "loss": 0.1739, "step": 13308 }, { "epoch": 0.2373809438875611, "grad_norm": 0.2464757263660431, "learning_rate": 4.717959154839582e-05, "loss": 0.2393, "step": 13309 }, { "epoch": 0.23739878000927478, "grad_norm": 0.36514636874198914, "learning_rate": 4.7178873307186855e-05, "loss": 0.1899, "step": 13310 }, { "epoch": 0.23741661613098847, "grad_norm": 0.2671298086643219, "learning_rate": 4.7178154980004905e-05, "loss": 0.1968, "step": 13311 }, { "epoch": 0.23743445225270218, "grad_norm": 0.2568267583847046, "learning_rate": 4.717743656685277e-05, "loss": 0.1628, "step": 13312 }, { "epoch": 0.23745228837441587, "grad_norm": 0.2959321439266205, "learning_rate": 4.7176718067733235e-05, "loss": 0.1864, "step": 13313 }, { "epoch": 0.23747012449612956, "grad_norm": 0.27243509888648987, "learning_rate": 4.717599948264908e-05, "loss": 0.2192, "step": 13314 }, { "epoch": 0.23748796061784325, "grad_norm": 0.2892591655254364, "learning_rate": 4.7175280811603084e-05, "loss": 0.2184, "step": 13315 }, { "epoch": 0.23750579673955696, "grad_norm": 0.29562073945999146, "learning_rate": 4.7174562054598046e-05, "loss": 0.1802, "step": 13316 }, { "epoch": 0.23752363286127065, "grad_norm": 0.28665128350257874, "learning_rate": 4.717384321163675e-05, "loss": 0.1569, "step": 13317 }, { "epoch": 0.23754146898298434, "grad_norm": 0.32626378536224365, "learning_rate": 4.717312428272197e-05, "loss": 0.2696, "step": 13318 }, { "epoch": 0.23755930510469803, "grad_norm": 0.2862628400325775, "learning_rate": 4.7172405267856514e-05, "loss": 0.1877, "step": 13319 }, { "epoch": 0.23757714122641174, "grad_norm": 0.31922516226768494, "learning_rate": 4.717168616704315e-05, "loss": 0.2933, "step": 13320 }, { "epoch": 0.23759497734812543, "grad_norm": 0.26153549551963806, "learning_rate": 4.717096698028467e-05, "loss": 0.1627, "step": 13321 }, { "epoch": 0.23761281346983912, "grad_norm": 0.2663293778896332, "learning_rate": 4.717024770758387e-05, "loss": 0.2204, "step": 13322 }, { "epoch": 0.2376306495915528, "grad_norm": 0.24172881245613098, "learning_rate": 4.716952834894353e-05, "loss": 0.1635, "step": 13323 }, { "epoch": 0.2376484857132665, "grad_norm": 0.34882476925849915, "learning_rate": 4.716880890436644e-05, "loss": 0.1801, "step": 13324 }, { "epoch": 0.2376663218349802, "grad_norm": 0.22524629533290863, "learning_rate": 4.7168089373855396e-05, "loss": 0.2157, "step": 13325 }, { "epoch": 0.2376841579566939, "grad_norm": 0.3022221624851227, "learning_rate": 4.716736975741317e-05, "loss": 0.2246, "step": 13326 }, { "epoch": 0.23770199407840759, "grad_norm": 0.2862691283226013, "learning_rate": 4.716665005504257e-05, "loss": 0.1842, "step": 13327 }, { "epoch": 0.23771983020012127, "grad_norm": 0.25495675206184387, "learning_rate": 4.716593026674638e-05, "loss": 0.2096, "step": 13328 }, { "epoch": 0.237737666321835, "grad_norm": 0.26478201150894165, "learning_rate": 4.716521039252738e-05, "loss": 0.1761, "step": 13329 }, { "epoch": 0.23775550244354868, "grad_norm": 0.23873406648635864, "learning_rate": 4.7164490432388376e-05, "loss": 0.1483, "step": 13330 }, { "epoch": 0.23777333856526237, "grad_norm": 0.29290691018104553, "learning_rate": 4.716377038633215e-05, "loss": 0.1502, "step": 13331 }, { "epoch": 0.23779117468697605, "grad_norm": 0.39081674814224243, "learning_rate": 4.71630502543615e-05, "loss": 0.1972, "step": 13332 }, { "epoch": 0.23780901080868977, "grad_norm": 0.34899893403053284, "learning_rate": 4.7162330036479205e-05, "loss": 0.1831, "step": 13333 }, { "epoch": 0.23782684693040346, "grad_norm": 0.30515649914741516, "learning_rate": 4.7161609732688064e-05, "loss": 0.1556, "step": 13334 }, { "epoch": 0.23784468305211715, "grad_norm": 0.2563779056072235, "learning_rate": 4.716088934299087e-05, "loss": 0.1454, "step": 13335 }, { "epoch": 0.23786251917383083, "grad_norm": 0.34990549087524414, "learning_rate": 4.716016886739042e-05, "loss": 0.2211, "step": 13336 }, { "epoch": 0.23788035529554455, "grad_norm": 0.3291740417480469, "learning_rate": 4.7159448305889495e-05, "loss": 0.1685, "step": 13337 }, { "epoch": 0.23789819141725824, "grad_norm": 0.3897515535354614, "learning_rate": 4.7158727658490894e-05, "loss": 0.1689, "step": 13338 }, { "epoch": 0.23791602753897192, "grad_norm": 0.2098303884267807, "learning_rate": 4.715800692519742e-05, "loss": 0.1567, "step": 13339 }, { "epoch": 0.2379338636606856, "grad_norm": 0.36193516850471497, "learning_rate": 4.715728610601185e-05, "loss": 0.223, "step": 13340 }, { "epoch": 0.23795169978239933, "grad_norm": 0.2321547120809555, "learning_rate": 4.7156565200936984e-05, "loss": 0.1562, "step": 13341 }, { "epoch": 0.23796953590411302, "grad_norm": 0.33449116349220276, "learning_rate": 4.715584420997563e-05, "loss": 0.2231, "step": 13342 }, { "epoch": 0.2379873720258267, "grad_norm": 0.7138961553573608, "learning_rate": 4.715512313313056e-05, "loss": 0.1844, "step": 13343 }, { "epoch": 0.2380052081475404, "grad_norm": 0.29571333527565, "learning_rate": 4.71544019704046e-05, "loss": 0.2249, "step": 13344 }, { "epoch": 0.23802304426925408, "grad_norm": 0.2784079909324646, "learning_rate": 4.7153680721800496e-05, "loss": 0.216, "step": 13345 }, { "epoch": 0.2380408803909678, "grad_norm": 0.37449702620506287, "learning_rate": 4.715295938732109e-05, "loss": 0.2098, "step": 13346 }, { "epoch": 0.23805871651268148, "grad_norm": 0.27389535307884216, "learning_rate": 4.715223796696917e-05, "loss": 0.2029, "step": 13347 }, { "epoch": 0.23807655263439517, "grad_norm": 0.38538825511932373, "learning_rate": 4.715151646074752e-05, "loss": 0.1832, "step": 13348 }, { "epoch": 0.23809438875610886, "grad_norm": 0.2619556784629822, "learning_rate": 4.715079486865893e-05, "loss": 0.193, "step": 13349 }, { "epoch": 0.23811222487782258, "grad_norm": 0.21136973798274994, "learning_rate": 4.7150073190706216e-05, "loss": 0.1423, "step": 13350 }, { "epoch": 0.23813006099953626, "grad_norm": 0.4439290761947632, "learning_rate": 4.714935142689217e-05, "loss": 0.2909, "step": 13351 }, { "epoch": 0.23814789712124995, "grad_norm": 0.3308385908603668, "learning_rate": 4.7148629577219584e-05, "loss": 0.201, "step": 13352 }, { "epoch": 0.23816573324296364, "grad_norm": 0.2681209146976471, "learning_rate": 4.714790764169126e-05, "loss": 0.1539, "step": 13353 }, { "epoch": 0.23818356936467736, "grad_norm": 0.20436906814575195, "learning_rate": 4.714718562031e-05, "loss": 0.1623, "step": 13354 }, { "epoch": 0.23820140548639104, "grad_norm": 0.39103105664253235, "learning_rate": 4.71464635130786e-05, "loss": 0.1803, "step": 13355 }, { "epoch": 0.23821924160810473, "grad_norm": 0.2353384643793106, "learning_rate": 4.714574131999985e-05, "loss": 0.2236, "step": 13356 }, { "epoch": 0.23823707772981842, "grad_norm": 0.3549315631389618, "learning_rate": 4.714501904107657e-05, "loss": 0.1785, "step": 13357 }, { "epoch": 0.23825491385153214, "grad_norm": 0.4552212059497833, "learning_rate": 4.714429667631154e-05, "loss": 0.1652, "step": 13358 }, { "epoch": 0.23827274997324582, "grad_norm": 0.2647263705730438, "learning_rate": 4.7143574225707564e-05, "loss": 0.1553, "step": 13359 }, { "epoch": 0.2382905860949595, "grad_norm": 0.27400949597358704, "learning_rate": 4.7142851689267455e-05, "loss": 0.1914, "step": 13360 }, { "epoch": 0.2383084222166732, "grad_norm": 0.2867042124271393, "learning_rate": 4.7142129066993994e-05, "loss": 0.219, "step": 13361 }, { "epoch": 0.23832625833838691, "grad_norm": 0.25492849946022034, "learning_rate": 4.714140635889e-05, "loss": 0.1751, "step": 13362 }, { "epoch": 0.2383440944601006, "grad_norm": 0.24152772128582, "learning_rate": 4.7140683564958265e-05, "loss": 0.2044, "step": 13363 }, { "epoch": 0.2383619305818143, "grad_norm": 0.2686931788921356, "learning_rate": 4.71399606852016e-05, "loss": 0.1816, "step": 13364 }, { "epoch": 0.23837976670352798, "grad_norm": 0.32738491892814636, "learning_rate": 4.71392377196228e-05, "loss": 0.1516, "step": 13365 }, { "epoch": 0.23839760282524167, "grad_norm": 0.3190734088420868, "learning_rate": 4.713851466822465e-05, "loss": 0.1692, "step": 13366 }, { "epoch": 0.23841543894695538, "grad_norm": 0.231252059340477, "learning_rate": 4.713779153100999e-05, "loss": 0.2009, "step": 13367 }, { "epoch": 0.23843327506866907, "grad_norm": 0.2271817922592163, "learning_rate": 4.7137068307981605e-05, "loss": 0.1553, "step": 13368 }, { "epoch": 0.23845111119038276, "grad_norm": 0.48560476303100586, "learning_rate": 4.713634499914229e-05, "loss": 0.2394, "step": 13369 }, { "epoch": 0.23846894731209645, "grad_norm": 0.21157796680927277, "learning_rate": 4.713562160449485e-05, "loss": 0.1988, "step": 13370 }, { "epoch": 0.23848678343381016, "grad_norm": 0.2157321274280548, "learning_rate": 4.71348981240421e-05, "loss": 0.2204, "step": 13371 }, { "epoch": 0.23850461955552385, "grad_norm": 0.23527267575263977, "learning_rate": 4.7134174557786845e-05, "loss": 0.1991, "step": 13372 }, { "epoch": 0.23852245567723754, "grad_norm": 0.23589536547660828, "learning_rate": 4.7133450905731885e-05, "loss": 0.1622, "step": 13373 }, { "epoch": 0.23854029179895123, "grad_norm": 0.33560240268707275, "learning_rate": 4.7132727167880017e-05, "loss": 0.2223, "step": 13374 }, { "epoch": 0.23855812792066494, "grad_norm": 0.2951493263244629, "learning_rate": 4.713200334423406e-05, "loss": 0.1923, "step": 13375 }, { "epoch": 0.23857596404237863, "grad_norm": 0.2860085964202881, "learning_rate": 4.7131279434796814e-05, "loss": 0.2174, "step": 13376 }, { "epoch": 0.23859380016409232, "grad_norm": 0.19068557024002075, "learning_rate": 4.713055543957108e-05, "loss": 0.1551, "step": 13377 }, { "epoch": 0.238611636285806, "grad_norm": 0.3185647428035736, "learning_rate": 4.7129831358559674e-05, "loss": 0.1422, "step": 13378 }, { "epoch": 0.23862947240751972, "grad_norm": 0.2435835599899292, "learning_rate": 4.71291071917654e-05, "loss": 0.1594, "step": 13379 }, { "epoch": 0.2386473085292334, "grad_norm": 0.2497451901435852, "learning_rate": 4.712838293919106e-05, "loss": 0.1956, "step": 13380 }, { "epoch": 0.2386651446509471, "grad_norm": 0.3476564288139343, "learning_rate": 4.7127658600839465e-05, "loss": 0.1992, "step": 13381 }, { "epoch": 0.23868298077266079, "grad_norm": 0.2773374617099762, "learning_rate": 4.7126934176713425e-05, "loss": 0.1769, "step": 13382 }, { "epoch": 0.23870081689437447, "grad_norm": 0.26434776186943054, "learning_rate": 4.712620966681574e-05, "loss": 0.1946, "step": 13383 }, { "epoch": 0.2387186530160882, "grad_norm": 0.2497277408838272, "learning_rate": 4.712548507114922e-05, "loss": 0.2064, "step": 13384 }, { "epoch": 0.23873648913780188, "grad_norm": 0.3211526572704315, "learning_rate": 4.712476038971669e-05, "loss": 0.2179, "step": 13385 }, { "epoch": 0.23875432525951557, "grad_norm": 0.35197022557258606, "learning_rate": 4.712403562252094e-05, "loss": 0.1532, "step": 13386 }, { "epoch": 0.23877216138122925, "grad_norm": 0.31447118520736694, "learning_rate": 4.7123310769564795e-05, "loss": 0.2028, "step": 13387 }, { "epoch": 0.23878999750294297, "grad_norm": 0.4230057895183563, "learning_rate": 4.7122585830851054e-05, "loss": 0.2099, "step": 13388 }, { "epoch": 0.23880783362465666, "grad_norm": 0.2894974946975708, "learning_rate": 4.7121860806382526e-05, "loss": 0.162, "step": 13389 }, { "epoch": 0.23882566974637034, "grad_norm": 0.1895712912082672, "learning_rate": 4.712113569616202e-05, "loss": 0.1744, "step": 13390 }, { "epoch": 0.23884350586808403, "grad_norm": 0.3055501878261566, "learning_rate": 4.712041050019236e-05, "loss": 0.2189, "step": 13391 }, { "epoch": 0.23886134198979775, "grad_norm": 0.22077152132987976, "learning_rate": 4.711968521847634e-05, "loss": 0.1747, "step": 13392 }, { "epoch": 0.23887917811151144, "grad_norm": 0.2606711983680725, "learning_rate": 4.711895985101679e-05, "loss": 0.1814, "step": 13393 }, { "epoch": 0.23889701423322512, "grad_norm": 0.2561998665332794, "learning_rate": 4.71182343978165e-05, "loss": 0.1778, "step": 13394 }, { "epoch": 0.2389148503549388, "grad_norm": 0.3184613287448883, "learning_rate": 4.71175088588783e-05, "loss": 0.2359, "step": 13395 }, { "epoch": 0.23893268647665253, "grad_norm": 0.25398892164230347, "learning_rate": 4.7116783234205006e-05, "loss": 0.178, "step": 13396 }, { "epoch": 0.23895052259836622, "grad_norm": 0.24392369389533997, "learning_rate": 4.7116057523799405e-05, "loss": 0.1541, "step": 13397 }, { "epoch": 0.2389683587200799, "grad_norm": 0.23380355536937714, "learning_rate": 4.711533172766434e-05, "loss": 0.1415, "step": 13398 }, { "epoch": 0.2389861948417936, "grad_norm": 0.25240951776504517, "learning_rate": 4.7114605845802606e-05, "loss": 0.1495, "step": 13399 }, { "epoch": 0.2390040309635073, "grad_norm": 0.4615921676158905, "learning_rate": 4.711387987821701e-05, "loss": 0.245, "step": 13400 }, { "epoch": 0.239021867085221, "grad_norm": 0.3106546401977539, "learning_rate": 4.711315382491039e-05, "loss": 0.1752, "step": 13401 }, { "epoch": 0.23903970320693468, "grad_norm": 0.27275028824806213, "learning_rate": 4.711242768588555e-05, "loss": 0.1914, "step": 13402 }, { "epoch": 0.23905753932864837, "grad_norm": 0.2753840684890747, "learning_rate": 4.711170146114531e-05, "loss": 0.1445, "step": 13403 }, { "epoch": 0.23907537545036206, "grad_norm": 0.2281634360551834, "learning_rate": 4.711097515069246e-05, "loss": 0.2016, "step": 13404 }, { "epoch": 0.23909321157207578, "grad_norm": 0.24798743426799774, "learning_rate": 4.711024875452984e-05, "loss": 0.1813, "step": 13405 }, { "epoch": 0.23911104769378946, "grad_norm": 0.32124871015548706, "learning_rate": 4.7109522272660265e-05, "loss": 0.164, "step": 13406 }, { "epoch": 0.23912888381550315, "grad_norm": 0.29859793186187744, "learning_rate": 4.710879570508654e-05, "loss": 0.2564, "step": 13407 }, { "epoch": 0.23914671993721684, "grad_norm": 0.33276113867759705, "learning_rate": 4.7108069051811486e-05, "loss": 0.1657, "step": 13408 }, { "epoch": 0.23916455605893056, "grad_norm": 0.28450921177864075, "learning_rate": 4.710734231283792e-05, "loss": 0.1949, "step": 13409 }, { "epoch": 0.23918239218064424, "grad_norm": 0.5976627469062805, "learning_rate": 4.7106615488168664e-05, "loss": 0.2967, "step": 13410 }, { "epoch": 0.23920022830235793, "grad_norm": 0.22599709033966064, "learning_rate": 4.7105888577806526e-05, "loss": 0.1392, "step": 13411 }, { "epoch": 0.23921806442407162, "grad_norm": 0.33586129546165466, "learning_rate": 4.710516158175433e-05, "loss": 0.1978, "step": 13412 }, { "epoch": 0.23923590054578533, "grad_norm": 0.2430362105369568, "learning_rate": 4.71044345000149e-05, "loss": 0.1672, "step": 13413 }, { "epoch": 0.23925373666749902, "grad_norm": 0.3307092785835266, "learning_rate": 4.710370733259104e-05, "loss": 0.2427, "step": 13414 }, { "epoch": 0.2392715727892127, "grad_norm": 0.3205855190753937, "learning_rate": 4.710298007948558e-05, "loss": 0.1744, "step": 13415 }, { "epoch": 0.2392894089109264, "grad_norm": 0.2526535987854004, "learning_rate": 4.7102252740701324e-05, "loss": 0.1953, "step": 13416 }, { "epoch": 0.23930724503264011, "grad_norm": 0.3021654486656189, "learning_rate": 4.710152531624111e-05, "loss": 0.1945, "step": 13417 }, { "epoch": 0.2393250811543538, "grad_norm": 0.3173452615737915, "learning_rate": 4.710079780610776e-05, "loss": 0.1755, "step": 13418 }, { "epoch": 0.2393429172760675, "grad_norm": 0.293372243642807, "learning_rate": 4.710007021030407e-05, "loss": 0.2034, "step": 13419 }, { "epoch": 0.23936075339778118, "grad_norm": 0.34677833318710327, "learning_rate": 4.709934252883288e-05, "loss": 0.2264, "step": 13420 }, { "epoch": 0.2393785895194949, "grad_norm": 0.3412090539932251, "learning_rate": 4.709861476169701e-05, "loss": 0.1467, "step": 13421 }, { "epoch": 0.23939642564120858, "grad_norm": 0.3284223973751068, "learning_rate": 4.709788690889927e-05, "loss": 0.1861, "step": 13422 }, { "epoch": 0.23941426176292227, "grad_norm": 0.2922373414039612, "learning_rate": 4.709715897044249e-05, "loss": 0.16, "step": 13423 }, { "epoch": 0.23943209788463596, "grad_norm": 0.250119149684906, "learning_rate": 4.709643094632949e-05, "loss": 0.1501, "step": 13424 }, { "epoch": 0.23944993400634965, "grad_norm": 0.32594916224479675, "learning_rate": 4.7095702836563094e-05, "loss": 0.1938, "step": 13425 }, { "epoch": 0.23946777012806336, "grad_norm": 0.365202397108078, "learning_rate": 4.709497464114612e-05, "loss": 0.1598, "step": 13426 }, { "epoch": 0.23948560624977705, "grad_norm": 0.22981515526771545, "learning_rate": 4.709424636008139e-05, "loss": 0.1764, "step": 13427 }, { "epoch": 0.23950344237149074, "grad_norm": 0.23167452216148376, "learning_rate": 4.709351799337173e-05, "loss": 0.1622, "step": 13428 }, { "epoch": 0.23952127849320443, "grad_norm": 0.526567816734314, "learning_rate": 4.709278954101997e-05, "loss": 0.2555, "step": 13429 }, { "epoch": 0.23953911461491814, "grad_norm": 0.25232312083244324, "learning_rate": 4.709206100302892e-05, "loss": 0.1451, "step": 13430 }, { "epoch": 0.23955695073663183, "grad_norm": 0.3796832263469696, "learning_rate": 4.709133237940142e-05, "loss": 0.2461, "step": 13431 }, { "epoch": 0.23957478685834552, "grad_norm": 0.20113413035869598, "learning_rate": 4.7090603670140275e-05, "loss": 0.2, "step": 13432 }, { "epoch": 0.2395926229800592, "grad_norm": 0.27989864349365234, "learning_rate": 4.708987487524833e-05, "loss": 0.1512, "step": 13433 }, { "epoch": 0.23961045910177292, "grad_norm": 0.2920389175415039, "learning_rate": 4.708914599472839e-05, "loss": 0.2306, "step": 13434 }, { "epoch": 0.2396282952234866, "grad_norm": 0.2332906424999237, "learning_rate": 4.70884170285833e-05, "loss": 0.1965, "step": 13435 }, { "epoch": 0.2396461313452003, "grad_norm": 0.27118706703186035, "learning_rate": 4.7087687976815875e-05, "loss": 0.1589, "step": 13436 }, { "epoch": 0.23966396746691399, "grad_norm": 0.36195898056030273, "learning_rate": 4.708695883942894e-05, "loss": 0.2286, "step": 13437 }, { "epoch": 0.2396818035886277, "grad_norm": 0.3250676393508911, "learning_rate": 4.708622961642532e-05, "loss": 0.1648, "step": 13438 }, { "epoch": 0.2396996397103414, "grad_norm": 0.27408838272094727, "learning_rate": 4.708550030780786e-05, "loss": 0.1946, "step": 13439 }, { "epoch": 0.23971747583205508, "grad_norm": 0.2470027208328247, "learning_rate": 4.708477091357936e-05, "loss": 0.1832, "step": 13440 }, { "epoch": 0.23973531195376876, "grad_norm": 0.24596144258975983, "learning_rate": 4.708404143374266e-05, "loss": 0.1635, "step": 13441 }, { "epoch": 0.23975314807548248, "grad_norm": 0.2868826389312744, "learning_rate": 4.7083311868300596e-05, "loss": 0.1957, "step": 13442 }, { "epoch": 0.23977098419719617, "grad_norm": 0.21057447791099548, "learning_rate": 4.7082582217255975e-05, "loss": 0.1766, "step": 13443 }, { "epoch": 0.23978882031890986, "grad_norm": 0.2822345495223999, "learning_rate": 4.708185248061165e-05, "loss": 0.208, "step": 13444 }, { "epoch": 0.23980665644062354, "grad_norm": 0.3563338816165924, "learning_rate": 4.708112265837044e-05, "loss": 0.2117, "step": 13445 }, { "epoch": 0.23982449256233723, "grad_norm": 0.3101353943347931, "learning_rate": 4.708039275053516e-05, "loss": 0.2296, "step": 13446 }, { "epoch": 0.23984232868405095, "grad_norm": 0.22491110861301422, "learning_rate": 4.7079662757108655e-05, "loss": 0.1631, "step": 13447 }, { "epoch": 0.23986016480576464, "grad_norm": 0.2542402148246765, "learning_rate": 4.707893267809376e-05, "loss": 0.1587, "step": 13448 }, { "epoch": 0.23987800092747832, "grad_norm": 0.3075088560581207, "learning_rate": 4.7078202513493285e-05, "loss": 0.1872, "step": 13449 }, { "epoch": 0.239895837049192, "grad_norm": 0.3424496352672577, "learning_rate": 4.707747226331007e-05, "loss": 0.1851, "step": 13450 }, { "epoch": 0.23991367317090573, "grad_norm": 0.26897549629211426, "learning_rate": 4.707674192754696e-05, "loss": 0.1397, "step": 13451 }, { "epoch": 0.23993150929261942, "grad_norm": 0.29516106843948364, "learning_rate": 4.707601150620676e-05, "loss": 0.1486, "step": 13452 }, { "epoch": 0.2399493454143331, "grad_norm": 0.24099019169807434, "learning_rate": 4.707528099929233e-05, "loss": 0.152, "step": 13453 }, { "epoch": 0.2399671815360468, "grad_norm": 0.23713742196559906, "learning_rate": 4.707455040680647e-05, "loss": 0.1645, "step": 13454 }, { "epoch": 0.2399850176577605, "grad_norm": 0.2817937135696411, "learning_rate": 4.707381972875204e-05, "loss": 0.1982, "step": 13455 }, { "epoch": 0.2400028537794742, "grad_norm": 0.23757711052894592, "learning_rate": 4.707308896513185e-05, "loss": 0.1767, "step": 13456 }, { "epoch": 0.24002068990118788, "grad_norm": 0.329540491104126, "learning_rate": 4.707235811594875e-05, "loss": 0.2195, "step": 13457 }, { "epoch": 0.24003852602290157, "grad_norm": 0.27588650584220886, "learning_rate": 4.707162718120557e-05, "loss": 0.2181, "step": 13458 }, { "epoch": 0.2400563621446153, "grad_norm": 0.3009127676486969, "learning_rate": 4.7070896160905136e-05, "loss": 0.1929, "step": 13459 }, { "epoch": 0.24007419826632898, "grad_norm": 0.2650068700313568, "learning_rate": 4.7070165055050284e-05, "loss": 0.192, "step": 13460 }, { "epoch": 0.24009203438804266, "grad_norm": 0.41197818517684937, "learning_rate": 4.706943386364385e-05, "loss": 0.1791, "step": 13461 }, { "epoch": 0.24010987050975635, "grad_norm": 0.23184789717197418, "learning_rate": 4.7068702586688675e-05, "loss": 0.1753, "step": 13462 }, { "epoch": 0.24012770663147007, "grad_norm": 0.43426835536956787, "learning_rate": 4.7067971224187576e-05, "loss": 0.1491, "step": 13463 }, { "epoch": 0.24014554275318375, "grad_norm": 0.2904857397079468, "learning_rate": 4.70672397761434e-05, "loss": 0.215, "step": 13464 }, { "epoch": 0.24016337887489744, "grad_norm": 0.322457879781723, "learning_rate": 4.7066508242558993e-05, "loss": 0.2209, "step": 13465 }, { "epoch": 0.24018121499661113, "grad_norm": 0.28330883383750916, "learning_rate": 4.706577662343716e-05, "loss": 0.1696, "step": 13466 }, { "epoch": 0.24019905111832482, "grad_norm": 0.18052725493907928, "learning_rate": 4.706504491878077e-05, "loss": 0.1727, "step": 13467 }, { "epoch": 0.24021688724003853, "grad_norm": 0.24228636920452118, "learning_rate": 4.7064313128592644e-05, "loss": 0.1801, "step": 13468 }, { "epoch": 0.24023472336175222, "grad_norm": 0.24460303783416748, "learning_rate": 4.706358125287561e-05, "loss": 0.1913, "step": 13469 }, { "epoch": 0.2402525594834659, "grad_norm": 0.296675443649292, "learning_rate": 4.7062849291632516e-05, "loss": 0.2138, "step": 13470 }, { "epoch": 0.2402703956051796, "grad_norm": 0.23877549171447754, "learning_rate": 4.7062117244866205e-05, "loss": 0.188, "step": 13471 }, { "epoch": 0.24028823172689331, "grad_norm": 0.2380308359861374, "learning_rate": 4.7061385112579503e-05, "loss": 0.2084, "step": 13472 }, { "epoch": 0.240306067848607, "grad_norm": 0.25362640619277954, "learning_rate": 4.706065289477525e-05, "loss": 0.1483, "step": 13473 }, { "epoch": 0.2403239039703207, "grad_norm": 0.23467028141021729, "learning_rate": 4.7059920591456295e-05, "loss": 0.1305, "step": 13474 }, { "epoch": 0.24034174009203438, "grad_norm": 0.2669031023979187, "learning_rate": 4.705918820262546e-05, "loss": 0.1764, "step": 13475 }, { "epoch": 0.2403595762137481, "grad_norm": 0.3469369113445282, "learning_rate": 4.70584557282856e-05, "loss": 0.2136, "step": 13476 }, { "epoch": 0.24037741233546178, "grad_norm": 0.3109643757343292, "learning_rate": 4.705772316843955e-05, "loss": 0.2137, "step": 13477 }, { "epoch": 0.24039524845717547, "grad_norm": 0.3049291670322418, "learning_rate": 4.7056990523090136e-05, "loss": 0.1494, "step": 13478 }, { "epoch": 0.24041308457888916, "grad_norm": 0.3413618206977844, "learning_rate": 4.705625779224021e-05, "loss": 0.1783, "step": 13479 }, { "epoch": 0.24043092070060287, "grad_norm": 0.3412415683269501, "learning_rate": 4.7055524975892614e-05, "loss": 0.1698, "step": 13480 }, { "epoch": 0.24044875682231656, "grad_norm": 0.28880491852760315, "learning_rate": 4.705479207405018e-05, "loss": 0.1929, "step": 13481 }, { "epoch": 0.24046659294403025, "grad_norm": 0.3133130669593811, "learning_rate": 4.7054059086715766e-05, "loss": 0.1947, "step": 13482 }, { "epoch": 0.24048442906574394, "grad_norm": 0.3986121118068695, "learning_rate": 4.705332601389219e-05, "loss": 0.1756, "step": 13483 }, { "epoch": 0.24050226518745763, "grad_norm": 0.19810420274734497, "learning_rate": 4.705259285558231e-05, "loss": 0.1548, "step": 13484 }, { "epoch": 0.24052010130917134, "grad_norm": 0.3069475591182709, "learning_rate": 4.7051859611788964e-05, "loss": 0.1922, "step": 13485 }, { "epoch": 0.24053793743088503, "grad_norm": 0.2690931260585785, "learning_rate": 4.705112628251499e-05, "loss": 0.2428, "step": 13486 }, { "epoch": 0.24055577355259872, "grad_norm": 0.3050071597099304, "learning_rate": 4.7050392867763236e-05, "loss": 0.1767, "step": 13487 }, { "epoch": 0.2405736096743124, "grad_norm": 0.34742656350135803, "learning_rate": 4.7049659367536546e-05, "loss": 0.2042, "step": 13488 }, { "epoch": 0.24059144579602612, "grad_norm": 0.40350329875946045, "learning_rate": 4.704892578183776e-05, "loss": 0.2179, "step": 13489 }, { "epoch": 0.2406092819177398, "grad_norm": 0.2532975673675537, "learning_rate": 4.7048192110669726e-05, "loss": 0.2364, "step": 13490 }, { "epoch": 0.2406271180394535, "grad_norm": 0.2537136673927307, "learning_rate": 4.704745835403528e-05, "loss": 0.1617, "step": 13491 }, { "epoch": 0.24064495416116718, "grad_norm": 0.21060170233249664, "learning_rate": 4.704672451193727e-05, "loss": 0.1526, "step": 13492 }, { "epoch": 0.2406627902828809, "grad_norm": 0.27835801243782043, "learning_rate": 4.704599058437854e-05, "loss": 0.1425, "step": 13493 }, { "epoch": 0.2406806264045946, "grad_norm": 0.29193437099456787, "learning_rate": 4.704525657136194e-05, "loss": 0.163, "step": 13494 }, { "epoch": 0.24069846252630828, "grad_norm": 0.38879725337028503, "learning_rate": 4.704452247289031e-05, "loss": 0.2023, "step": 13495 }, { "epoch": 0.24071629864802196, "grad_norm": 0.26055172085762024, "learning_rate": 4.7043788288966495e-05, "loss": 0.2143, "step": 13496 }, { "epoch": 0.24073413476973568, "grad_norm": 0.278653085231781, "learning_rate": 4.704305401959334e-05, "loss": 0.1887, "step": 13497 }, { "epoch": 0.24075197089144937, "grad_norm": 0.2415715754032135, "learning_rate": 4.70423196647737e-05, "loss": 0.1534, "step": 13498 }, { "epoch": 0.24076980701316306, "grad_norm": 0.27306032180786133, "learning_rate": 4.704158522451041e-05, "loss": 0.1953, "step": 13499 }, { "epoch": 0.24078764313487674, "grad_norm": 0.33374711871147156, "learning_rate": 4.7040850698806324e-05, "loss": 0.2288, "step": 13500 }, { "epoch": 0.24080547925659046, "grad_norm": 0.28850099444389343, "learning_rate": 4.704011608766429e-05, "loss": 0.1811, "step": 13501 }, { "epoch": 0.24082331537830415, "grad_norm": 0.2672603726387024, "learning_rate": 4.703938139108716e-05, "loss": 0.24, "step": 13502 }, { "epoch": 0.24084115150001784, "grad_norm": 0.4115087687969208, "learning_rate": 4.703864660907776e-05, "loss": 0.2354, "step": 13503 }, { "epoch": 0.24085898762173152, "grad_norm": 0.24012894928455353, "learning_rate": 4.703791174163897e-05, "loss": 0.1711, "step": 13504 }, { "epoch": 0.2408768237434452, "grad_norm": 0.3480866253376007, "learning_rate": 4.703717678877362e-05, "loss": 0.2557, "step": 13505 }, { "epoch": 0.24089465986515893, "grad_norm": 0.21841463446617126, "learning_rate": 4.7036441750484555e-05, "loss": 0.2063, "step": 13506 }, { "epoch": 0.24091249598687262, "grad_norm": 0.192271426320076, "learning_rate": 4.703570662677463e-05, "loss": 0.1887, "step": 13507 }, { "epoch": 0.2409303321085863, "grad_norm": 0.2641043961048126, "learning_rate": 4.70349714176467e-05, "loss": 0.2066, "step": 13508 }, { "epoch": 0.2409481682303, "grad_norm": 0.25226837396621704, "learning_rate": 4.703423612310361e-05, "loss": 0.1988, "step": 13509 }, { "epoch": 0.2409660043520137, "grad_norm": 0.22991840541362762, "learning_rate": 4.703350074314821e-05, "loss": 0.1692, "step": 13510 }, { "epoch": 0.2409838404737274, "grad_norm": 0.24048392474651337, "learning_rate": 4.703276527778335e-05, "loss": 0.157, "step": 13511 }, { "epoch": 0.24100167659544108, "grad_norm": 0.3038395941257477, "learning_rate": 4.703202972701188e-05, "loss": 0.1583, "step": 13512 }, { "epoch": 0.24101951271715477, "grad_norm": 0.5107954144477844, "learning_rate": 4.7031294090836655e-05, "loss": 0.1829, "step": 13513 }, { "epoch": 0.2410373488388685, "grad_norm": 0.2879059314727783, "learning_rate": 4.7030558369260525e-05, "loss": 0.1498, "step": 13514 }, { "epoch": 0.24105518496058218, "grad_norm": 0.2327626496553421, "learning_rate": 4.7029822562286344e-05, "loss": 0.1626, "step": 13515 }, { "epoch": 0.24107302108229586, "grad_norm": 0.31451553106307983, "learning_rate": 4.702908666991696e-05, "loss": 0.1328, "step": 13516 }, { "epoch": 0.24109085720400955, "grad_norm": 0.2627813518047333, "learning_rate": 4.702835069215522e-05, "loss": 0.1384, "step": 13517 }, { "epoch": 0.24110869332572327, "grad_norm": 0.4106541574001312, "learning_rate": 4.702761462900399e-05, "loss": 0.1711, "step": 13518 }, { "epoch": 0.24112652944743695, "grad_norm": 0.30055779218673706, "learning_rate": 4.702687848046612e-05, "loss": 0.1599, "step": 13519 }, { "epoch": 0.24114436556915064, "grad_norm": 0.40765902400016785, "learning_rate": 4.702614224654446e-05, "loss": 0.2343, "step": 13520 }, { "epoch": 0.24116220169086433, "grad_norm": 0.3206610381603241, "learning_rate": 4.7025405927241864e-05, "loss": 0.2456, "step": 13521 }, { "epoch": 0.24118003781257805, "grad_norm": 0.35464996099472046, "learning_rate": 4.702466952256119e-05, "loss": 0.2533, "step": 13522 }, { "epoch": 0.24119787393429173, "grad_norm": 0.36289629340171814, "learning_rate": 4.702393303250529e-05, "loss": 0.1751, "step": 13523 }, { "epoch": 0.24121571005600542, "grad_norm": 0.3485252857208252, "learning_rate": 4.702319645707701e-05, "loss": 0.2052, "step": 13524 }, { "epoch": 0.2412335461777191, "grad_norm": 0.19893619418144226, "learning_rate": 4.702245979627922e-05, "loss": 0.1604, "step": 13525 }, { "epoch": 0.2412513822994328, "grad_norm": 0.5408480167388916, "learning_rate": 4.702172305011477e-05, "loss": 0.1686, "step": 13526 }, { "epoch": 0.24126921842114651, "grad_norm": 0.40812939405441284, "learning_rate": 4.702098621858651e-05, "loss": 0.2073, "step": 13527 }, { "epoch": 0.2412870545428602, "grad_norm": 0.36756691336631775, "learning_rate": 4.7020249301697315e-05, "loss": 0.2467, "step": 13528 }, { "epoch": 0.2413048906645739, "grad_norm": 0.35905441641807556, "learning_rate": 4.701951229945002e-05, "loss": 0.1918, "step": 13529 }, { "epoch": 0.24132272678628758, "grad_norm": 0.3223443031311035, "learning_rate": 4.701877521184749e-05, "loss": 0.1879, "step": 13530 }, { "epoch": 0.2413405629080013, "grad_norm": 0.31790000200271606, "learning_rate": 4.701803803889259e-05, "loss": 0.2048, "step": 13531 }, { "epoch": 0.24135839902971498, "grad_norm": 0.227671816945076, "learning_rate": 4.701730078058816e-05, "loss": 0.169, "step": 13532 }, { "epoch": 0.24137623515142867, "grad_norm": 0.20597079396247864, "learning_rate": 4.7016563436937065e-05, "loss": 0.1484, "step": 13533 }, { "epoch": 0.24139407127314236, "grad_norm": 0.30770665407180786, "learning_rate": 4.701582600794217e-05, "loss": 0.2302, "step": 13534 }, { "epoch": 0.24141190739485607, "grad_norm": 0.3436049222946167, "learning_rate": 4.701508849360633e-05, "loss": 0.2524, "step": 13535 }, { "epoch": 0.24142974351656976, "grad_norm": 0.23202307522296906, "learning_rate": 4.70143508939324e-05, "loss": 0.2159, "step": 13536 }, { "epoch": 0.24144757963828345, "grad_norm": 0.323688268661499, "learning_rate": 4.701361320892325e-05, "loss": 0.157, "step": 13537 }, { "epoch": 0.24146541575999714, "grad_norm": 0.294297993183136, "learning_rate": 4.701287543858173e-05, "loss": 0.1976, "step": 13538 }, { "epoch": 0.24148325188171085, "grad_norm": 0.28315097093582153, "learning_rate": 4.70121375829107e-05, "loss": 0.1812, "step": 13539 }, { "epoch": 0.24150108800342454, "grad_norm": 0.27391791343688965, "learning_rate": 4.701139964191302e-05, "loss": 0.1991, "step": 13540 }, { "epoch": 0.24151892412513823, "grad_norm": 0.3426937162876129, "learning_rate": 4.7010661615591556e-05, "loss": 0.1885, "step": 13541 }, { "epoch": 0.24153676024685192, "grad_norm": 0.20778916776180267, "learning_rate": 4.700992350394916e-05, "loss": 0.2054, "step": 13542 }, { "epoch": 0.24155459636856563, "grad_norm": 0.3177711069583893, "learning_rate": 4.7009185306988704e-05, "loss": 0.184, "step": 13543 }, { "epoch": 0.24157243249027932, "grad_norm": 0.2847771942615509, "learning_rate": 4.7008447024713044e-05, "loss": 0.2483, "step": 13544 }, { "epoch": 0.241590268611993, "grad_norm": 0.2583253085613251, "learning_rate": 4.700770865712504e-05, "loss": 0.1702, "step": 13545 }, { "epoch": 0.2416081047337067, "grad_norm": 0.2858251631259918, "learning_rate": 4.700697020422755e-05, "loss": 0.1952, "step": 13546 }, { "epoch": 0.24162594085542038, "grad_norm": 0.29717332124710083, "learning_rate": 4.7006231666023445e-05, "loss": 0.2201, "step": 13547 }, { "epoch": 0.2416437769771341, "grad_norm": 0.28044217824935913, "learning_rate": 4.700549304251559e-05, "loss": 0.1922, "step": 13548 }, { "epoch": 0.2416616130988478, "grad_norm": 0.2241005003452301, "learning_rate": 4.7004754333706846e-05, "loss": 0.1351, "step": 13549 }, { "epoch": 0.24167944922056148, "grad_norm": 0.288309246301651, "learning_rate": 4.700401553960007e-05, "loss": 0.2256, "step": 13550 }, { "epoch": 0.24169728534227516, "grad_norm": 0.3168530762195587, "learning_rate": 4.7003276660198125e-05, "loss": 0.1539, "step": 13551 }, { "epoch": 0.24171512146398888, "grad_norm": 0.37794947624206543, "learning_rate": 4.7002537695503887e-05, "loss": 0.2087, "step": 13552 }, { "epoch": 0.24173295758570257, "grad_norm": 0.27659884095191956, "learning_rate": 4.700179864552021e-05, "loss": 0.1749, "step": 13553 }, { "epoch": 0.24175079370741626, "grad_norm": 0.3827894628047943, "learning_rate": 4.7001059510249965e-05, "loss": 0.2402, "step": 13554 }, { "epoch": 0.24176862982912994, "grad_norm": 0.221628800034523, "learning_rate": 4.7000320289696014e-05, "loss": 0.1403, "step": 13555 }, { "epoch": 0.24178646595084366, "grad_norm": 0.28615614771842957, "learning_rate": 4.699958098386122e-05, "loss": 0.1782, "step": 13556 }, { "epoch": 0.24180430207255735, "grad_norm": 0.2050405740737915, "learning_rate": 4.699884159274845e-05, "loss": 0.1849, "step": 13557 }, { "epoch": 0.24182213819427104, "grad_norm": 0.21709440648555756, "learning_rate": 4.699810211636059e-05, "loss": 0.1412, "step": 13558 }, { "epoch": 0.24183997431598472, "grad_norm": 0.2735121250152588, "learning_rate": 4.6997362554700465e-05, "loss": 0.1911, "step": 13559 }, { "epoch": 0.24185781043769844, "grad_norm": 0.29296016693115234, "learning_rate": 4.699662290777098e-05, "loss": 0.1577, "step": 13560 }, { "epoch": 0.24187564655941213, "grad_norm": 0.3813689053058624, "learning_rate": 4.699588317557498e-05, "loss": 0.1932, "step": 13561 }, { "epoch": 0.24189348268112582, "grad_norm": 0.24916289746761322, "learning_rate": 4.6995143358115336e-05, "loss": 0.184, "step": 13562 }, { "epoch": 0.2419113188028395, "grad_norm": 0.3230383098125458, "learning_rate": 4.6994403455394925e-05, "loss": 0.1603, "step": 13563 }, { "epoch": 0.2419291549245532, "grad_norm": 0.1946956068277359, "learning_rate": 4.699366346741661e-05, "loss": 0.1439, "step": 13564 }, { "epoch": 0.2419469910462669, "grad_norm": 0.2937822937965393, "learning_rate": 4.699292339418326e-05, "loss": 0.1985, "step": 13565 }, { "epoch": 0.2419648271679806, "grad_norm": 0.31975552439689636, "learning_rate": 4.699218323569774e-05, "loss": 0.2309, "step": 13566 }, { "epoch": 0.24198266328969428, "grad_norm": 0.26577362418174744, "learning_rate": 4.699144299196292e-05, "loss": 0.1946, "step": 13567 }, { "epoch": 0.24200049941140797, "grad_norm": 0.2582256495952606, "learning_rate": 4.6990702662981676e-05, "loss": 0.1907, "step": 13568 }, { "epoch": 0.2420183355331217, "grad_norm": 0.34865236282348633, "learning_rate": 4.698996224875687e-05, "loss": 0.1794, "step": 13569 }, { "epoch": 0.24203617165483537, "grad_norm": 0.3029707074165344, "learning_rate": 4.698922174929138e-05, "loss": 0.1558, "step": 13570 }, { "epoch": 0.24205400777654906, "grad_norm": 0.2675391733646393, "learning_rate": 4.6988481164588063e-05, "loss": 0.1616, "step": 13571 }, { "epoch": 0.24207184389826275, "grad_norm": 0.30130940675735474, "learning_rate": 4.6987740494649806e-05, "loss": 0.1947, "step": 13572 }, { "epoch": 0.24208968001997647, "grad_norm": 0.22134047746658325, "learning_rate": 4.698699973947947e-05, "loss": 0.1947, "step": 13573 }, { "epoch": 0.24210751614169015, "grad_norm": 0.24363790452480316, "learning_rate": 4.698625889907993e-05, "loss": 0.151, "step": 13574 }, { "epoch": 0.24212535226340384, "grad_norm": 0.26265788078308105, "learning_rate": 4.698551797345405e-05, "loss": 0.1948, "step": 13575 }, { "epoch": 0.24214318838511753, "grad_norm": 0.21863479912281036, "learning_rate": 4.698477696260472e-05, "loss": 0.1597, "step": 13576 }, { "epoch": 0.24216102450683125, "grad_norm": 0.31729671359062195, "learning_rate": 4.6984035866534795e-05, "loss": 0.2143, "step": 13577 }, { "epoch": 0.24217886062854493, "grad_norm": 0.27609825134277344, "learning_rate": 4.698329468524715e-05, "loss": 0.1699, "step": 13578 }, { "epoch": 0.24219669675025862, "grad_norm": 0.24096006155014038, "learning_rate": 4.698255341874467e-05, "loss": 0.1863, "step": 13579 }, { "epoch": 0.2422145328719723, "grad_norm": 0.3234200179576874, "learning_rate": 4.698181206703022e-05, "loss": 0.1626, "step": 13580 }, { "epoch": 0.24223236899368603, "grad_norm": 0.2748022973537445, "learning_rate": 4.698107063010667e-05, "loss": 0.198, "step": 13581 }, { "epoch": 0.2422502051153997, "grad_norm": 0.2667080760002136, "learning_rate": 4.6980329107976895e-05, "loss": 0.1745, "step": 13582 }, { "epoch": 0.2422680412371134, "grad_norm": 0.29853636026382446, "learning_rate": 4.697958750064378e-05, "loss": 0.2101, "step": 13583 }, { "epoch": 0.2422858773588271, "grad_norm": 0.3241555392742157, "learning_rate": 4.697884580811019e-05, "loss": 0.1543, "step": 13584 }, { "epoch": 0.24230371348054078, "grad_norm": 0.2748297452926636, "learning_rate": 4.6978104030379e-05, "loss": 0.2242, "step": 13585 }, { "epoch": 0.2423215496022545, "grad_norm": 0.26102542877197266, "learning_rate": 4.6977362167453085e-05, "loss": 0.1627, "step": 13586 }, { "epoch": 0.24233938572396818, "grad_norm": 0.32901981472969055, "learning_rate": 4.6976620219335334e-05, "loss": 0.151, "step": 13587 }, { "epoch": 0.24235722184568187, "grad_norm": 0.31791335344314575, "learning_rate": 4.6975878186028607e-05, "loss": 0.2245, "step": 13588 }, { "epoch": 0.24237505796739556, "grad_norm": 0.35270991921424866, "learning_rate": 4.697513606753578e-05, "loss": 0.178, "step": 13589 }, { "epoch": 0.24239289408910927, "grad_norm": 0.2793395221233368, "learning_rate": 4.697439386385975e-05, "loss": 0.2045, "step": 13590 }, { "epoch": 0.24241073021082296, "grad_norm": 0.21825000643730164, "learning_rate": 4.697365157500336e-05, "loss": 0.1787, "step": 13591 }, { "epoch": 0.24242856633253665, "grad_norm": 0.33723846077919006, "learning_rate": 4.697290920096952e-05, "loss": 0.22, "step": 13592 }, { "epoch": 0.24244640245425034, "grad_norm": 0.24851396679878235, "learning_rate": 4.6972166741761095e-05, "loss": 0.199, "step": 13593 }, { "epoch": 0.24246423857596405, "grad_norm": 0.2603556215763092, "learning_rate": 4.6971424197380965e-05, "loss": 0.166, "step": 13594 }, { "epoch": 0.24248207469767774, "grad_norm": 0.25877392292022705, "learning_rate": 4.6970681567832e-05, "loss": 0.2086, "step": 13595 }, { "epoch": 0.24249991081939143, "grad_norm": 0.26149800419807434, "learning_rate": 4.6969938853117086e-05, "loss": 0.2017, "step": 13596 }, { "epoch": 0.24251774694110512, "grad_norm": 0.3259674608707428, "learning_rate": 4.6969196053239104e-05, "loss": 0.1815, "step": 13597 }, { "epoch": 0.24253558306281883, "grad_norm": 0.2884747087955475, "learning_rate": 4.6968453168200924e-05, "loss": 0.201, "step": 13598 }, { "epoch": 0.24255341918453252, "grad_norm": 0.29727765917778015, "learning_rate": 4.696771019800543e-05, "loss": 0.1526, "step": 13599 }, { "epoch": 0.2425712553062462, "grad_norm": 0.3936905860900879, "learning_rate": 4.6966967142655516e-05, "loss": 0.2052, "step": 13600 }, { "epoch": 0.2425890914279599, "grad_norm": 0.3130809962749481, "learning_rate": 4.696622400215404e-05, "loss": 0.1494, "step": 13601 }, { "epoch": 0.2426069275496736, "grad_norm": 0.3812466263771057, "learning_rate": 4.6965480776503897e-05, "loss": 0.1655, "step": 13602 }, { "epoch": 0.2426247636713873, "grad_norm": 0.21273046731948853, "learning_rate": 4.6964737465707966e-05, "loss": 0.1838, "step": 13603 }, { "epoch": 0.242642599793101, "grad_norm": 0.2797996997833252, "learning_rate": 4.696399406976912e-05, "loss": 0.1663, "step": 13604 }, { "epoch": 0.24266043591481468, "grad_norm": 0.35725197196006775, "learning_rate": 4.696325058869025e-05, "loss": 0.2026, "step": 13605 }, { "epoch": 0.24267827203652836, "grad_norm": 0.28045418858528137, "learning_rate": 4.696250702247423e-05, "loss": 0.2055, "step": 13606 }, { "epoch": 0.24269610815824208, "grad_norm": 0.2977827191352844, "learning_rate": 4.6961763371123956e-05, "loss": 0.1798, "step": 13607 }, { "epoch": 0.24271394427995577, "grad_norm": 0.3112426996231079, "learning_rate": 4.69610196346423e-05, "loss": 0.2341, "step": 13608 }, { "epoch": 0.24273178040166946, "grad_norm": 0.3510107696056366, "learning_rate": 4.696027581303215e-05, "loss": 0.2423, "step": 13609 }, { "epoch": 0.24274961652338314, "grad_norm": 0.3482983112335205, "learning_rate": 4.6959531906296375e-05, "loss": 0.2287, "step": 13610 }, { "epoch": 0.24276745264509686, "grad_norm": 0.43641147017478943, "learning_rate": 4.695878791443788e-05, "loss": 0.2492, "step": 13611 }, { "epoch": 0.24278528876681055, "grad_norm": 0.24481116235256195, "learning_rate": 4.695804383745953e-05, "loss": 0.1702, "step": 13612 }, { "epoch": 0.24280312488852424, "grad_norm": 0.3286797106266022, "learning_rate": 4.695729967536422e-05, "loss": 0.1842, "step": 13613 }, { "epoch": 0.24282096101023792, "grad_norm": 0.5271211862564087, "learning_rate": 4.6956555428154833e-05, "loss": 0.1755, "step": 13614 }, { "epoch": 0.24283879713195164, "grad_norm": 0.39401528239250183, "learning_rate": 4.6955811095834255e-05, "loss": 0.2669, "step": 13615 }, { "epoch": 0.24285663325366533, "grad_norm": 0.3487893044948578, "learning_rate": 4.695506667840537e-05, "loss": 0.2894, "step": 13616 }, { "epoch": 0.24287446937537902, "grad_norm": 0.2682149410247803, "learning_rate": 4.695432217587107e-05, "loss": 0.2305, "step": 13617 }, { "epoch": 0.2428923054970927, "grad_norm": 0.23941993713378906, "learning_rate": 4.695357758823423e-05, "loss": 0.2008, "step": 13618 }, { "epoch": 0.24291014161880642, "grad_norm": 0.3209134638309479, "learning_rate": 4.6952832915497736e-05, "loss": 0.1904, "step": 13619 }, { "epoch": 0.2429279777405201, "grad_norm": 0.36643916368484497, "learning_rate": 4.695208815766448e-05, "loss": 0.2104, "step": 13620 }, { "epoch": 0.2429458138622338, "grad_norm": 0.24296219646930695, "learning_rate": 4.695134331473735e-05, "loss": 0.192, "step": 13621 }, { "epoch": 0.24296364998394748, "grad_norm": 0.24099262058734894, "learning_rate": 4.695059838671923e-05, "loss": 0.1731, "step": 13622 }, { "epoch": 0.2429814861056612, "grad_norm": 0.30778101086616516, "learning_rate": 4.694985337361302e-05, "loss": 0.1963, "step": 13623 }, { "epoch": 0.2429993222273749, "grad_norm": 0.21597592532634735, "learning_rate": 4.694910827542158e-05, "loss": 0.1422, "step": 13624 }, { "epoch": 0.24301715834908857, "grad_norm": 0.27674752473831177, "learning_rate": 4.694836309214783e-05, "loss": 0.2272, "step": 13625 }, { "epoch": 0.24303499447080226, "grad_norm": 0.29003921151161194, "learning_rate": 4.6947617823794636e-05, "loss": 0.1958, "step": 13626 }, { "epoch": 0.24305283059251595, "grad_norm": 0.2573041021823883, "learning_rate": 4.69468724703649e-05, "loss": 0.159, "step": 13627 }, { "epoch": 0.24307066671422967, "grad_norm": 0.3151061236858368, "learning_rate": 4.69461270318615e-05, "loss": 0.1654, "step": 13628 }, { "epoch": 0.24308850283594335, "grad_norm": 0.3935214877128601, "learning_rate": 4.6945381508287335e-05, "loss": 0.1914, "step": 13629 }, { "epoch": 0.24310633895765704, "grad_norm": 0.26115089654922485, "learning_rate": 4.6944635899645294e-05, "loss": 0.1855, "step": 13630 }, { "epoch": 0.24312417507937073, "grad_norm": 0.33393970131874084, "learning_rate": 4.6943890205938255e-05, "loss": 0.214, "step": 13631 }, { "epoch": 0.24314201120108445, "grad_norm": 0.21798068284988403, "learning_rate": 4.6943144427169125e-05, "loss": 0.1698, "step": 13632 }, { "epoch": 0.24315984732279813, "grad_norm": 0.33182114362716675, "learning_rate": 4.69423985633408e-05, "loss": 0.236, "step": 13633 }, { "epoch": 0.24317768344451182, "grad_norm": 0.22838178277015686, "learning_rate": 4.6941652614456145e-05, "loss": 0.1958, "step": 13634 }, { "epoch": 0.2431955195662255, "grad_norm": 0.3527720868587494, "learning_rate": 4.694090658051806e-05, "loss": 0.1748, "step": 13635 }, { "epoch": 0.24321335568793923, "grad_norm": 0.22157545387744904, "learning_rate": 4.694016046152946e-05, "loss": 0.124, "step": 13636 }, { "epoch": 0.2432311918096529, "grad_norm": 0.29315274953842163, "learning_rate": 4.693941425749321e-05, "loss": 0.2178, "step": 13637 }, { "epoch": 0.2432490279313666, "grad_norm": 0.26105692982673645, "learning_rate": 4.693866796841222e-05, "loss": 0.2202, "step": 13638 }, { "epoch": 0.2432668640530803, "grad_norm": 0.2692136764526367, "learning_rate": 4.693792159428937e-05, "loss": 0.202, "step": 13639 }, { "epoch": 0.243284700174794, "grad_norm": 0.24614101648330688, "learning_rate": 4.693717513512755e-05, "loss": 0.1453, "step": 13640 }, { "epoch": 0.2433025362965077, "grad_norm": 0.22633449733257294, "learning_rate": 4.693642859092968e-05, "loss": 0.1601, "step": 13641 }, { "epoch": 0.24332037241822138, "grad_norm": 0.2577258050441742, "learning_rate": 4.693568196169862e-05, "loss": 0.2034, "step": 13642 }, { "epoch": 0.24333820853993507, "grad_norm": 0.41488316655158997, "learning_rate": 4.69349352474373e-05, "loss": 0.1547, "step": 13643 }, { "epoch": 0.24335604466164878, "grad_norm": 0.22476181387901306, "learning_rate": 4.6934188448148574e-05, "loss": 0.1864, "step": 13644 }, { "epoch": 0.24337388078336247, "grad_norm": 0.38418251276016235, "learning_rate": 4.693344156383537e-05, "loss": 0.2008, "step": 13645 }, { "epoch": 0.24339171690507616, "grad_norm": 0.32155823707580566, "learning_rate": 4.693269459450057e-05, "loss": 0.1858, "step": 13646 }, { "epoch": 0.24340955302678985, "grad_norm": 0.29565367102622986, "learning_rate": 4.693194754014707e-05, "loss": 0.2004, "step": 13647 }, { "epoch": 0.24342738914850354, "grad_norm": 0.341865211725235, "learning_rate": 4.693120040077776e-05, "loss": 0.182, "step": 13648 }, { "epoch": 0.24344522527021725, "grad_norm": 0.28684622049331665, "learning_rate": 4.693045317639555e-05, "loss": 0.1776, "step": 13649 }, { "epoch": 0.24346306139193094, "grad_norm": 0.36310359835624695, "learning_rate": 4.692970586700333e-05, "loss": 0.1937, "step": 13650 }, { "epoch": 0.24348089751364463, "grad_norm": 0.3165016770362854, "learning_rate": 4.692895847260399e-05, "loss": 0.2052, "step": 13651 }, { "epoch": 0.24349873363535832, "grad_norm": 0.31170836091041565, "learning_rate": 4.6928210993200425e-05, "loss": 0.2146, "step": 13652 }, { "epoch": 0.24351656975707203, "grad_norm": 0.2886374592781067, "learning_rate": 4.692746342879556e-05, "loss": 0.2198, "step": 13653 }, { "epoch": 0.24353440587878572, "grad_norm": 0.2144119143486023, "learning_rate": 4.6926715779392264e-05, "loss": 0.1618, "step": 13654 }, { "epoch": 0.2435522420004994, "grad_norm": 0.29546231031417847, "learning_rate": 4.692596804499344e-05, "loss": 0.2019, "step": 13655 }, { "epoch": 0.2435700781222131, "grad_norm": 0.27046895027160645, "learning_rate": 4.6925220225602e-05, "loss": 0.1928, "step": 13656 }, { "epoch": 0.2435879142439268, "grad_norm": 0.2773810923099518, "learning_rate": 4.6924472321220824e-05, "loss": 0.1639, "step": 13657 }, { "epoch": 0.2436057503656405, "grad_norm": 0.17827634513378143, "learning_rate": 4.692372433185282e-05, "loss": 0.1166, "step": 13658 }, { "epoch": 0.2436235864873542, "grad_norm": 0.30844777822494507, "learning_rate": 4.6922976257500895e-05, "loss": 0.1979, "step": 13659 }, { "epoch": 0.24364142260906788, "grad_norm": 0.29913416504859924, "learning_rate": 4.692222809816794e-05, "loss": 0.1864, "step": 13660 }, { "epoch": 0.2436592587307816, "grad_norm": 0.32545220851898193, "learning_rate": 4.692147985385686e-05, "loss": 0.2756, "step": 13661 }, { "epoch": 0.24367709485249528, "grad_norm": 0.3084665834903717, "learning_rate": 4.692073152457055e-05, "loss": 0.2001, "step": 13662 }, { "epoch": 0.24369493097420897, "grad_norm": 0.2230866402387619, "learning_rate": 4.691998311031192e-05, "loss": 0.1805, "step": 13663 }, { "epoch": 0.24371276709592266, "grad_norm": 0.29065218567848206, "learning_rate": 4.691923461108385e-05, "loss": 0.1487, "step": 13664 }, { "epoch": 0.24373060321763634, "grad_norm": 0.3638111352920532, "learning_rate": 4.691848602688926e-05, "loss": 0.2004, "step": 13665 }, { "epoch": 0.24374843933935006, "grad_norm": 0.48927006125450134, "learning_rate": 4.6917737357731064e-05, "loss": 0.1687, "step": 13666 }, { "epoch": 0.24376627546106375, "grad_norm": 0.3269551396369934, "learning_rate": 4.6916988603612136e-05, "loss": 0.1619, "step": 13667 }, { "epoch": 0.24378411158277744, "grad_norm": 0.2890358865261078, "learning_rate": 4.6916239764535384e-05, "loss": 0.1635, "step": 13668 }, { "epoch": 0.24380194770449112, "grad_norm": 0.3172226846218109, "learning_rate": 4.691549084050372e-05, "loss": 0.1855, "step": 13669 }, { "epoch": 0.24381978382620484, "grad_norm": 0.32188624143600464, "learning_rate": 4.6914741831520046e-05, "loss": 0.2119, "step": 13670 }, { "epoch": 0.24383761994791853, "grad_norm": 0.4289701282978058, "learning_rate": 4.691399273758727e-05, "loss": 0.2465, "step": 13671 }, { "epoch": 0.24385545606963221, "grad_norm": 0.3131117522716522, "learning_rate": 4.6913243558708286e-05, "loss": 0.1885, "step": 13672 }, { "epoch": 0.2438732921913459, "grad_norm": 0.20149429142475128, "learning_rate": 4.6912494294886e-05, "loss": 0.1781, "step": 13673 }, { "epoch": 0.24389112831305962, "grad_norm": 0.2202911376953125, "learning_rate": 4.6911744946123314e-05, "loss": 0.1956, "step": 13674 }, { "epoch": 0.2439089644347733, "grad_norm": 0.21662744879722595, "learning_rate": 4.691099551242314e-05, "loss": 0.1519, "step": 13675 }, { "epoch": 0.243926800556487, "grad_norm": 0.3102678954601288, "learning_rate": 4.6910245993788385e-05, "loss": 0.2248, "step": 13676 }, { "epoch": 0.24394463667820068, "grad_norm": 0.2462979555130005, "learning_rate": 4.6909496390221944e-05, "loss": 0.1847, "step": 13677 }, { "epoch": 0.2439624727999144, "grad_norm": 0.2958288788795471, "learning_rate": 4.690874670172672e-05, "loss": 0.1552, "step": 13678 }, { "epoch": 0.2439803089216281, "grad_norm": 0.30526313185691833, "learning_rate": 4.690799692830564e-05, "loss": 0.1745, "step": 13679 }, { "epoch": 0.24399814504334177, "grad_norm": 0.32857024669647217, "learning_rate": 4.690724706996159e-05, "loss": 0.2149, "step": 13680 }, { "epoch": 0.24401598116505546, "grad_norm": 0.2644297778606415, "learning_rate": 4.690649712669748e-05, "loss": 0.1757, "step": 13681 }, { "epoch": 0.24403381728676918, "grad_norm": 0.2937260568141937, "learning_rate": 4.690574709851623e-05, "loss": 0.1948, "step": 13682 }, { "epoch": 0.24405165340848287, "grad_norm": 0.22789674997329712, "learning_rate": 4.690499698542074e-05, "loss": 0.2055, "step": 13683 }, { "epoch": 0.24406948953019655, "grad_norm": 0.31945011019706726, "learning_rate": 4.69042467874139e-05, "loss": 0.182, "step": 13684 }, { "epoch": 0.24408732565191024, "grad_norm": 0.19396725296974182, "learning_rate": 4.690349650449864e-05, "loss": 0.15, "step": 13685 }, { "epoch": 0.24410516177362393, "grad_norm": 0.429905503988266, "learning_rate": 4.690274613667787e-05, "loss": 0.2581, "step": 13686 }, { "epoch": 0.24412299789533765, "grad_norm": 0.21720905601978302, "learning_rate": 4.690199568395449e-05, "loss": 0.1727, "step": 13687 }, { "epoch": 0.24414083401705133, "grad_norm": 0.2965554893016815, "learning_rate": 4.690124514633141e-05, "loss": 0.2113, "step": 13688 }, { "epoch": 0.24415867013876502, "grad_norm": 0.24341322481632233, "learning_rate": 4.690049452381153e-05, "loss": 0.1892, "step": 13689 }, { "epoch": 0.2441765062604787, "grad_norm": 0.2477460503578186, "learning_rate": 4.689974381639778e-05, "loss": 0.1516, "step": 13690 }, { "epoch": 0.24419434238219243, "grad_norm": 0.39323747158050537, "learning_rate": 4.689899302409305e-05, "loss": 0.1356, "step": 13691 }, { "epoch": 0.2442121785039061, "grad_norm": 0.31786781549453735, "learning_rate": 4.6898242146900266e-05, "loss": 0.2181, "step": 13692 }, { "epoch": 0.2442300146256198, "grad_norm": 0.29771488904953003, "learning_rate": 4.689749118482233e-05, "loss": 0.2113, "step": 13693 }, { "epoch": 0.2442478507473335, "grad_norm": 0.21941302716732025, "learning_rate": 4.689674013786216e-05, "loss": 0.1809, "step": 13694 }, { "epoch": 0.2442656868690472, "grad_norm": 0.2876521944999695, "learning_rate": 4.689598900602266e-05, "loss": 0.1833, "step": 13695 }, { "epoch": 0.2442835229907609, "grad_norm": 0.21469078958034515, "learning_rate": 4.6895237789306736e-05, "loss": 0.156, "step": 13696 }, { "epoch": 0.24430135911247458, "grad_norm": 0.26704058051109314, "learning_rate": 4.6894486487717314e-05, "loss": 0.2246, "step": 13697 }, { "epoch": 0.24431919523418827, "grad_norm": 0.2957056164741516, "learning_rate": 4.68937351012573e-05, "loss": 0.1857, "step": 13698 }, { "epoch": 0.24433703135590198, "grad_norm": 0.31056469678878784, "learning_rate": 4.689298362992961e-05, "loss": 0.1978, "step": 13699 }, { "epoch": 0.24435486747761567, "grad_norm": 0.2240130454301834, "learning_rate": 4.6892232073737144e-05, "loss": 0.1579, "step": 13700 }, { "epoch": 0.24437270359932936, "grad_norm": 0.25795117020606995, "learning_rate": 4.6891480432682836e-05, "loss": 0.2125, "step": 13701 }, { "epoch": 0.24439053972104305, "grad_norm": 0.22394831478595734, "learning_rate": 4.689072870676958e-05, "loss": 0.1604, "step": 13702 }, { "epoch": 0.24440837584275676, "grad_norm": 0.2760259211063385, "learning_rate": 4.688997689600031e-05, "loss": 0.1932, "step": 13703 }, { "epoch": 0.24442621196447045, "grad_norm": 0.21913674473762512, "learning_rate": 4.688922500037792e-05, "loss": 0.1583, "step": 13704 }, { "epoch": 0.24444404808618414, "grad_norm": 0.2677614986896515, "learning_rate": 4.688847301990533e-05, "loss": 0.1855, "step": 13705 }, { "epoch": 0.24446188420789783, "grad_norm": 0.29969218373298645, "learning_rate": 4.688772095458547e-05, "loss": 0.2287, "step": 13706 }, { "epoch": 0.24447972032961152, "grad_norm": 0.3517477512359619, "learning_rate": 4.688696880442124e-05, "loss": 0.1335, "step": 13707 }, { "epoch": 0.24449755645132523, "grad_norm": 0.3408452272415161, "learning_rate": 4.688621656941555e-05, "loss": 0.1944, "step": 13708 }, { "epoch": 0.24451539257303892, "grad_norm": 0.2209720015525818, "learning_rate": 4.688546424957133e-05, "loss": 0.1603, "step": 13709 }, { "epoch": 0.2445332286947526, "grad_norm": 0.45468583703041077, "learning_rate": 4.68847118448915e-05, "loss": 0.2845, "step": 13710 }, { "epoch": 0.2445510648164663, "grad_norm": 0.2596586048603058, "learning_rate": 4.6883959355378956e-05, "loss": 0.2129, "step": 13711 }, { "epoch": 0.24456890093818, "grad_norm": 0.21649125218391418, "learning_rate": 4.6883206781036634e-05, "loss": 0.1755, "step": 13712 }, { "epoch": 0.2445867370598937, "grad_norm": 0.28443682193756104, "learning_rate": 4.6882454121867446e-05, "loss": 0.2265, "step": 13713 }, { "epoch": 0.2446045731816074, "grad_norm": 0.25232720375061035, "learning_rate": 4.688170137787431e-05, "loss": 0.1328, "step": 13714 }, { "epoch": 0.24462240930332108, "grad_norm": 0.31689730286598206, "learning_rate": 4.688094854906013e-05, "loss": 0.1763, "step": 13715 }, { "epoch": 0.2446402454250348, "grad_norm": 0.3855779767036438, "learning_rate": 4.6880195635427846e-05, "loss": 0.1935, "step": 13716 }, { "epoch": 0.24465808154674848, "grad_norm": 0.3011952042579651, "learning_rate": 4.687944263698037e-05, "loss": 0.2933, "step": 13717 }, { "epoch": 0.24467591766846217, "grad_norm": 0.24400511384010315, "learning_rate": 4.687868955372061e-05, "loss": 0.1732, "step": 13718 }, { "epoch": 0.24469375379017586, "grad_norm": 0.25264862179756165, "learning_rate": 4.687793638565149e-05, "loss": 0.2059, "step": 13719 }, { "epoch": 0.24471158991188957, "grad_norm": 0.2522881031036377, "learning_rate": 4.687718313277594e-05, "loss": 0.1687, "step": 13720 }, { "epoch": 0.24472942603360326, "grad_norm": 0.32159167528152466, "learning_rate": 4.687642979509687e-05, "loss": 0.1804, "step": 13721 }, { "epoch": 0.24474726215531695, "grad_norm": 0.24564985930919647, "learning_rate": 4.68756763726172e-05, "loss": 0.1819, "step": 13722 }, { "epoch": 0.24476509827703063, "grad_norm": 0.25257232785224915, "learning_rate": 4.687492286533985e-05, "loss": 0.1674, "step": 13723 }, { "epoch": 0.24478293439874435, "grad_norm": 0.27753975987434387, "learning_rate": 4.687416927326775e-05, "loss": 0.1861, "step": 13724 }, { "epoch": 0.24480077052045804, "grad_norm": 0.4845978915691376, "learning_rate": 4.687341559640381e-05, "loss": 0.1874, "step": 13725 }, { "epoch": 0.24481860664217173, "grad_norm": 0.24164879322052002, "learning_rate": 4.687266183475096e-05, "loss": 0.2065, "step": 13726 }, { "epoch": 0.24483644276388541, "grad_norm": 0.2179027944803238, "learning_rate": 4.6871907988312114e-05, "loss": 0.1812, "step": 13727 }, { "epoch": 0.2448542788855991, "grad_norm": 0.31235405802726746, "learning_rate": 4.6871154057090204e-05, "loss": 0.2161, "step": 13728 }, { "epoch": 0.24487211500731282, "grad_norm": 0.31012001633644104, "learning_rate": 4.6870400041088136e-05, "loss": 0.2249, "step": 13729 }, { "epoch": 0.2448899511290265, "grad_norm": 0.2521165609359741, "learning_rate": 4.686964594030885e-05, "loss": 0.1872, "step": 13730 }, { "epoch": 0.2449077872507402, "grad_norm": 0.2586023509502411, "learning_rate": 4.686889175475527e-05, "loss": 0.1946, "step": 13731 }, { "epoch": 0.24492562337245388, "grad_norm": 0.2127128690481186, "learning_rate": 4.6868137484430307e-05, "loss": 0.1601, "step": 13732 }, { "epoch": 0.2449434594941676, "grad_norm": 0.2974259555339813, "learning_rate": 4.686738312933688e-05, "loss": 0.182, "step": 13733 }, { "epoch": 0.24496129561588129, "grad_norm": 0.31503793597221375, "learning_rate": 4.686662868947794e-05, "loss": 0.2267, "step": 13734 }, { "epoch": 0.24497913173759497, "grad_norm": 0.2340630441904068, "learning_rate": 4.686587416485638e-05, "loss": 0.1891, "step": 13735 }, { "epoch": 0.24499696785930866, "grad_norm": 0.4805942177772522, "learning_rate": 4.686511955547515e-05, "loss": 0.1731, "step": 13736 }, { "epoch": 0.24501480398102238, "grad_norm": 0.21227367222309113, "learning_rate": 4.6864364861337165e-05, "loss": 0.1764, "step": 13737 }, { "epoch": 0.24503264010273607, "grad_norm": 0.26857635378837585, "learning_rate": 4.686361008244534e-05, "loss": 0.1785, "step": 13738 }, { "epoch": 0.24505047622444975, "grad_norm": 0.3060723543167114, "learning_rate": 4.686285521880263e-05, "loss": 0.1383, "step": 13739 }, { "epoch": 0.24506831234616344, "grad_norm": 0.2117903083562851, "learning_rate": 4.686210027041192e-05, "loss": 0.1427, "step": 13740 }, { "epoch": 0.24508614846787716, "grad_norm": 0.2373329997062683, "learning_rate": 4.686134523727617e-05, "loss": 0.1934, "step": 13741 }, { "epoch": 0.24510398458959085, "grad_norm": 0.3884005844593048, "learning_rate": 4.686059011939829e-05, "loss": 0.225, "step": 13742 }, { "epoch": 0.24512182071130453, "grad_norm": 0.2836616635322571, "learning_rate": 4.685983491678122e-05, "loss": 0.1636, "step": 13743 }, { "epoch": 0.24513965683301822, "grad_norm": 0.24956491589546204, "learning_rate": 4.685907962942787e-05, "loss": 0.1826, "step": 13744 }, { "epoch": 0.2451574929547319, "grad_norm": 0.2174319326877594, "learning_rate": 4.685832425734118e-05, "loss": 0.1401, "step": 13745 }, { "epoch": 0.24517532907644563, "grad_norm": 0.35470327734947205, "learning_rate": 4.6857568800524085e-05, "loss": 0.2204, "step": 13746 }, { "epoch": 0.2451931651981593, "grad_norm": 0.2872637212276459, "learning_rate": 4.6856813258979494e-05, "loss": 0.1851, "step": 13747 }, { "epoch": 0.245211001319873, "grad_norm": 0.32511037588119507, "learning_rate": 4.685605763271035e-05, "loss": 0.2241, "step": 13748 }, { "epoch": 0.2452288374415867, "grad_norm": 0.34901243448257446, "learning_rate": 4.685530192171958e-05, "loss": 0.1742, "step": 13749 }, { "epoch": 0.2452466735633004, "grad_norm": 0.23517319560050964, "learning_rate": 4.68545461260101e-05, "loss": 0.1704, "step": 13750 }, { "epoch": 0.2452645096850141, "grad_norm": 0.2833958864212036, "learning_rate": 4.685379024558486e-05, "loss": 0.1947, "step": 13751 }, { "epoch": 0.24528234580672778, "grad_norm": 0.3658854365348816, "learning_rate": 4.685303428044678e-05, "loss": 0.2113, "step": 13752 }, { "epoch": 0.24530018192844147, "grad_norm": 0.3165271282196045, "learning_rate": 4.685227823059879e-05, "loss": 0.2736, "step": 13753 }, { "epoch": 0.24531801805015518, "grad_norm": 0.22832855582237244, "learning_rate": 4.685152209604382e-05, "loss": 0.1839, "step": 13754 }, { "epoch": 0.24533585417186887, "grad_norm": 0.20947571098804474, "learning_rate": 4.685076587678481e-05, "loss": 0.2235, "step": 13755 }, { "epoch": 0.24535369029358256, "grad_norm": 0.3125641644001007, "learning_rate": 4.685000957282468e-05, "loss": 0.2254, "step": 13756 }, { "epoch": 0.24537152641529625, "grad_norm": 0.2678644061088562, "learning_rate": 4.6849253184166366e-05, "loss": 0.2227, "step": 13757 }, { "epoch": 0.24538936253700996, "grad_norm": 0.20531684160232544, "learning_rate": 4.68484967108128e-05, "loss": 0.177, "step": 13758 }, { "epoch": 0.24540719865872365, "grad_norm": 0.288449764251709, "learning_rate": 4.684774015276692e-05, "loss": 0.1627, "step": 13759 }, { "epoch": 0.24542503478043734, "grad_norm": 0.2778984606266022, "learning_rate": 4.684698351003164e-05, "loss": 0.196, "step": 13760 }, { "epoch": 0.24544287090215103, "grad_norm": 0.22770462930202484, "learning_rate": 4.6846226782609915e-05, "loss": 0.174, "step": 13761 }, { "epoch": 0.24546070702386474, "grad_norm": 0.3416846990585327, "learning_rate": 4.6845469970504675e-05, "loss": 0.1793, "step": 13762 }, { "epoch": 0.24547854314557843, "grad_norm": 0.2990380525588989, "learning_rate": 4.684471307371884e-05, "loss": 0.1673, "step": 13763 }, { "epoch": 0.24549637926729212, "grad_norm": 0.32841476798057556, "learning_rate": 4.6843956092255346e-05, "loss": 0.1477, "step": 13764 }, { "epoch": 0.2455142153890058, "grad_norm": 0.2731004059314728, "learning_rate": 4.6843199026117146e-05, "loss": 0.182, "step": 13765 }, { "epoch": 0.2455320515107195, "grad_norm": 0.2715333104133606, "learning_rate": 4.684244187530716e-05, "loss": 0.1863, "step": 13766 }, { "epoch": 0.2455498876324332, "grad_norm": 0.2028142809867859, "learning_rate": 4.684168463982832e-05, "loss": 0.1641, "step": 13767 }, { "epoch": 0.2455677237541469, "grad_norm": 0.23987728357315063, "learning_rate": 4.684092731968357e-05, "loss": 0.1654, "step": 13768 }, { "epoch": 0.2455855598758606, "grad_norm": 0.23922580480575562, "learning_rate": 4.684016991487584e-05, "loss": 0.198, "step": 13769 }, { "epoch": 0.24560339599757428, "grad_norm": 0.2323124259710312, "learning_rate": 4.683941242540807e-05, "loss": 0.142, "step": 13770 }, { "epoch": 0.245621232119288, "grad_norm": 0.2378569096326828, "learning_rate": 4.68386548512832e-05, "loss": 0.2355, "step": 13771 }, { "epoch": 0.24563906824100168, "grad_norm": 0.23443584144115448, "learning_rate": 4.6837897192504154e-05, "loss": 0.2043, "step": 13772 }, { "epoch": 0.24565690436271537, "grad_norm": 0.2111978828907013, "learning_rate": 4.6837139449073876e-05, "loss": 0.1934, "step": 13773 }, { "epoch": 0.24567474048442905, "grad_norm": 0.3395061790943146, "learning_rate": 4.6836381620995306e-05, "loss": 0.2197, "step": 13774 }, { "epoch": 0.24569257660614277, "grad_norm": 0.2784058153629303, "learning_rate": 4.683562370827138e-05, "loss": 0.2545, "step": 13775 }, { "epoch": 0.24571041272785646, "grad_norm": 0.291057288646698, "learning_rate": 4.683486571090503e-05, "loss": 0.2525, "step": 13776 }, { "epoch": 0.24572824884957015, "grad_norm": 0.3297308385372162, "learning_rate": 4.68341076288992e-05, "loss": 0.1446, "step": 13777 }, { "epoch": 0.24574608497128383, "grad_norm": 0.3100256621837616, "learning_rate": 4.6833349462256825e-05, "loss": 0.1808, "step": 13778 }, { "epoch": 0.24576392109299755, "grad_norm": 0.2140001505613327, "learning_rate": 4.6832591210980855e-05, "loss": 0.2233, "step": 13779 }, { "epoch": 0.24578175721471124, "grad_norm": 0.17805612087249756, "learning_rate": 4.683183287507421e-05, "loss": 0.1771, "step": 13780 }, { "epoch": 0.24579959333642493, "grad_norm": 0.23784080147743225, "learning_rate": 4.683107445453985e-05, "loss": 0.18, "step": 13781 }, { "epoch": 0.24581742945813861, "grad_norm": 0.23661647737026215, "learning_rate": 4.6830315949380696e-05, "loss": 0.1696, "step": 13782 }, { "epoch": 0.24583526557985233, "grad_norm": 0.22108282148838043, "learning_rate": 4.6829557359599705e-05, "loss": 0.1479, "step": 13783 }, { "epoch": 0.24585310170156602, "grad_norm": 0.327790766954422, "learning_rate": 4.682879868519981e-05, "loss": 0.1682, "step": 13784 }, { "epoch": 0.2458709378232797, "grad_norm": 0.3368685245513916, "learning_rate": 4.682803992618395e-05, "loss": 0.2027, "step": 13785 }, { "epoch": 0.2458887739449934, "grad_norm": 0.30198389291763306, "learning_rate": 4.682728108255506e-05, "loss": 0.1334, "step": 13786 }, { "epoch": 0.24590661006670708, "grad_norm": 0.2669535279273987, "learning_rate": 4.68265221543161e-05, "loss": 0.2092, "step": 13787 }, { "epoch": 0.2459244461884208, "grad_norm": 0.27204829454421997, "learning_rate": 4.682576314147e-05, "loss": 0.206, "step": 13788 }, { "epoch": 0.24594228231013449, "grad_norm": 0.30816584825515747, "learning_rate": 4.68250040440197e-05, "loss": 0.2155, "step": 13789 }, { "epoch": 0.24596011843184817, "grad_norm": 0.30811265110969543, "learning_rate": 4.6824244861968156e-05, "loss": 0.1887, "step": 13790 }, { "epoch": 0.24597795455356186, "grad_norm": 0.23519901931285858, "learning_rate": 4.682348559531829e-05, "loss": 0.2166, "step": 13791 }, { "epoch": 0.24599579067527558, "grad_norm": 0.3085532486438751, "learning_rate": 4.682272624407306e-05, "loss": 0.2236, "step": 13792 }, { "epoch": 0.24601362679698927, "grad_norm": 0.24834729731082916, "learning_rate": 4.682196680823541e-05, "loss": 0.1837, "step": 13793 }, { "epoch": 0.24603146291870295, "grad_norm": 0.2953791618347168, "learning_rate": 4.6821207287808274e-05, "loss": 0.182, "step": 13794 }, { "epoch": 0.24604929904041664, "grad_norm": 0.3037715256214142, "learning_rate": 4.68204476827946e-05, "loss": 0.2271, "step": 13795 }, { "epoch": 0.24606713516213036, "grad_norm": 0.23621883988380432, "learning_rate": 4.681968799319734e-05, "loss": 0.1707, "step": 13796 }, { "epoch": 0.24608497128384405, "grad_norm": 0.5135491490364075, "learning_rate": 4.681892821901943e-05, "loss": 0.1852, "step": 13797 }, { "epoch": 0.24610280740555773, "grad_norm": 0.21563830971717834, "learning_rate": 4.681816836026381e-05, "loss": 0.1778, "step": 13798 }, { "epoch": 0.24612064352727142, "grad_norm": 0.2344902902841568, "learning_rate": 4.681740841693345e-05, "loss": 0.1944, "step": 13799 }, { "epoch": 0.24613847964898514, "grad_norm": 0.20485229790210724, "learning_rate": 4.681664838903127e-05, "loss": 0.1774, "step": 13800 }, { "epoch": 0.24615631577069882, "grad_norm": 0.25551292300224304, "learning_rate": 4.681588827656023e-05, "loss": 0.1759, "step": 13801 }, { "epoch": 0.2461741518924125, "grad_norm": 0.2572571933269501, "learning_rate": 4.681512807952326e-05, "loss": 0.2027, "step": 13802 }, { "epoch": 0.2461919880141262, "grad_norm": 0.2675979733467102, "learning_rate": 4.681436779792333e-05, "loss": 0.1601, "step": 13803 }, { "epoch": 0.24620982413583992, "grad_norm": 0.27787625789642334, "learning_rate": 4.681360743176337e-05, "loss": 0.1713, "step": 13804 }, { "epoch": 0.2462276602575536, "grad_norm": 0.2584262788295746, "learning_rate": 4.6812846981046346e-05, "loss": 0.1799, "step": 13805 }, { "epoch": 0.2462454963792673, "grad_norm": 0.3029763400554657, "learning_rate": 4.6812086445775185e-05, "loss": 0.2036, "step": 13806 }, { "epoch": 0.24626333250098098, "grad_norm": 0.46581169962882996, "learning_rate": 4.6811325825952844e-05, "loss": 0.16, "step": 13807 }, { "epoch": 0.24628116862269467, "grad_norm": 0.3240189850330353, "learning_rate": 4.681056512158227e-05, "loss": 0.1892, "step": 13808 }, { "epoch": 0.24629900474440838, "grad_norm": 0.24673877656459808, "learning_rate": 4.680980433266641e-05, "loss": 0.1674, "step": 13809 }, { "epoch": 0.24631684086612207, "grad_norm": 0.20339319109916687, "learning_rate": 4.6809043459208216e-05, "loss": 0.0996, "step": 13810 }, { "epoch": 0.24633467698783576, "grad_norm": 0.22289863228797913, "learning_rate": 4.680828250121064e-05, "loss": 0.1752, "step": 13811 }, { "epoch": 0.24635251310954945, "grad_norm": 0.31208154559135437, "learning_rate": 4.680752145867663e-05, "loss": 0.1882, "step": 13812 }, { "epoch": 0.24637034923126316, "grad_norm": 0.2521425783634186, "learning_rate": 4.680676033160913e-05, "loss": 0.1818, "step": 13813 }, { "epoch": 0.24638818535297685, "grad_norm": 0.25903889536857605, "learning_rate": 4.6805999120011093e-05, "loss": 0.1709, "step": 13814 }, { "epoch": 0.24640602147469054, "grad_norm": 0.32050594687461853, "learning_rate": 4.680523782388548e-05, "loss": 0.1938, "step": 13815 }, { "epoch": 0.24642385759640423, "grad_norm": 0.31170520186424255, "learning_rate": 4.680447644323523e-05, "loss": 0.2025, "step": 13816 }, { "epoch": 0.24644169371811794, "grad_norm": 0.2918287217617035, "learning_rate": 4.68037149780633e-05, "loss": 0.1677, "step": 13817 }, { "epoch": 0.24645952983983163, "grad_norm": 0.3026082515716553, "learning_rate": 4.680295342837263e-05, "loss": 0.1649, "step": 13818 }, { "epoch": 0.24647736596154532, "grad_norm": 0.2618713974952698, "learning_rate": 4.680219179416619e-05, "loss": 0.1933, "step": 13819 }, { "epoch": 0.246495202083259, "grad_norm": 0.31092211604118347, "learning_rate": 4.680143007544693e-05, "loss": 0.2254, "step": 13820 }, { "epoch": 0.24651303820497272, "grad_norm": 0.27318161725997925, "learning_rate": 4.6800668272217795e-05, "loss": 0.1807, "step": 13821 }, { "epoch": 0.2465308743266864, "grad_norm": 0.2453472763299942, "learning_rate": 4.6799906384481735e-05, "loss": 0.1732, "step": 13822 }, { "epoch": 0.2465487104484001, "grad_norm": 0.32099649310112, "learning_rate": 4.679914441224171e-05, "loss": 0.1928, "step": 13823 }, { "epoch": 0.2465665465701138, "grad_norm": 0.35580796003341675, "learning_rate": 4.679838235550067e-05, "loss": 0.1881, "step": 13824 }, { "epoch": 0.2465843826918275, "grad_norm": 0.32416588068008423, "learning_rate": 4.6797620214261574e-05, "loss": 0.1656, "step": 13825 }, { "epoch": 0.2466022188135412, "grad_norm": 0.28813648223876953, "learning_rate": 4.679685798852738e-05, "loss": 0.1873, "step": 13826 }, { "epoch": 0.24662005493525488, "grad_norm": 0.30523252487182617, "learning_rate": 4.679609567830102e-05, "loss": 0.1882, "step": 13827 }, { "epoch": 0.24663789105696857, "grad_norm": 0.3084180951118469, "learning_rate": 4.679533328358547e-05, "loss": 0.2246, "step": 13828 }, { "epoch": 0.24665572717868225, "grad_norm": 0.3172524869441986, "learning_rate": 4.6794570804383685e-05, "loss": 0.2964, "step": 13829 }, { "epoch": 0.24667356330039597, "grad_norm": 0.2175578624010086, "learning_rate": 4.679380824069862e-05, "loss": 0.1844, "step": 13830 }, { "epoch": 0.24669139942210966, "grad_norm": 0.28899720311164856, "learning_rate": 4.6793045592533225e-05, "loss": 0.2349, "step": 13831 }, { "epoch": 0.24670923554382335, "grad_norm": 0.3691122829914093, "learning_rate": 4.679228285989045e-05, "loss": 0.2098, "step": 13832 }, { "epoch": 0.24672707166553703, "grad_norm": 0.24319787323474884, "learning_rate": 4.679152004277327e-05, "loss": 0.2177, "step": 13833 }, { "epoch": 0.24674490778725075, "grad_norm": 0.34025055170059204, "learning_rate": 4.6790757141184626e-05, "loss": 0.2257, "step": 13834 }, { "epoch": 0.24676274390896444, "grad_norm": 0.22080552577972412, "learning_rate": 4.678999415512748e-05, "loss": 0.1809, "step": 13835 }, { "epoch": 0.24678058003067813, "grad_norm": 0.2829233407974243, "learning_rate": 4.678923108460479e-05, "loss": 0.2224, "step": 13836 }, { "epoch": 0.24679841615239181, "grad_norm": 0.2799453139305115, "learning_rate": 4.678846792961952e-05, "loss": 0.1787, "step": 13837 }, { "epoch": 0.24681625227410553, "grad_norm": 0.25913748145103455, "learning_rate": 4.678770469017462e-05, "loss": 0.2047, "step": 13838 }, { "epoch": 0.24683408839581922, "grad_norm": 0.22933343052864075, "learning_rate": 4.678694136627305e-05, "loss": 0.188, "step": 13839 }, { "epoch": 0.2468519245175329, "grad_norm": 0.21951298415660858, "learning_rate": 4.678617795791777e-05, "loss": 0.1366, "step": 13840 }, { "epoch": 0.2468697606392466, "grad_norm": 0.24158070981502533, "learning_rate": 4.678541446511174e-05, "loss": 0.2009, "step": 13841 }, { "epoch": 0.2468875967609603, "grad_norm": 0.20856155455112457, "learning_rate": 4.6784650887857926e-05, "loss": 0.181, "step": 13842 }, { "epoch": 0.246905432882674, "grad_norm": 0.27052634954452515, "learning_rate": 4.6783887226159276e-05, "loss": 0.1397, "step": 13843 }, { "epoch": 0.24692326900438769, "grad_norm": 0.2849443256855011, "learning_rate": 4.678312348001875e-05, "loss": 0.1978, "step": 13844 }, { "epoch": 0.24694110512610137, "grad_norm": 0.26534637808799744, "learning_rate": 4.678235964943932e-05, "loss": 0.1231, "step": 13845 }, { "epoch": 0.24695894124781506, "grad_norm": 0.2178906798362732, "learning_rate": 4.678159573442394e-05, "loss": 0.1849, "step": 13846 }, { "epoch": 0.24697677736952878, "grad_norm": 0.2549191415309906, "learning_rate": 4.6780831734975566e-05, "loss": 0.1632, "step": 13847 }, { "epoch": 0.24699461349124247, "grad_norm": 0.31294724345207214, "learning_rate": 4.678006765109717e-05, "loss": 0.2104, "step": 13848 }, { "epoch": 0.24701244961295615, "grad_norm": 0.41884496808052063, "learning_rate": 4.677930348279171e-05, "loss": 0.1686, "step": 13849 }, { "epoch": 0.24703028573466984, "grad_norm": 0.2828907370567322, "learning_rate": 4.6778539230062144e-05, "loss": 0.2075, "step": 13850 }, { "epoch": 0.24704812185638356, "grad_norm": 0.2417198121547699, "learning_rate": 4.6777774892911443e-05, "loss": 0.2292, "step": 13851 }, { "epoch": 0.24706595797809724, "grad_norm": 0.2362624704837799, "learning_rate": 4.6777010471342566e-05, "loss": 0.208, "step": 13852 }, { "epoch": 0.24708379409981093, "grad_norm": 0.2034156620502472, "learning_rate": 4.677624596535847e-05, "loss": 0.1718, "step": 13853 }, { "epoch": 0.24710163022152462, "grad_norm": 0.2332681566476822, "learning_rate": 4.6775481374962113e-05, "loss": 0.1626, "step": 13854 }, { "epoch": 0.24711946634323834, "grad_norm": 0.42489734292030334, "learning_rate": 4.677471670015649e-05, "loss": 0.2102, "step": 13855 }, { "epoch": 0.24713730246495202, "grad_norm": 0.24739526212215424, "learning_rate": 4.677395194094453e-05, "loss": 0.1599, "step": 13856 }, { "epoch": 0.2471551385866657, "grad_norm": 0.2923029363155365, "learning_rate": 4.677318709732922e-05, "loss": 0.1866, "step": 13857 }, { "epoch": 0.2471729747083794, "grad_norm": 0.34048333764076233, "learning_rate": 4.677242216931351e-05, "loss": 0.234, "step": 13858 }, { "epoch": 0.24719081083009312, "grad_norm": 0.22982923686504364, "learning_rate": 4.677165715690038e-05, "loss": 0.1714, "step": 13859 }, { "epoch": 0.2472086469518068, "grad_norm": 0.24919088184833527, "learning_rate": 4.6770892060092774e-05, "loss": 0.1907, "step": 13860 }, { "epoch": 0.2472264830735205, "grad_norm": 0.2675471007823944, "learning_rate": 4.6770126878893684e-05, "loss": 0.1524, "step": 13861 }, { "epoch": 0.24724431919523418, "grad_norm": 1.0161223411560059, "learning_rate": 4.676936161330606e-05, "loss": 0.2187, "step": 13862 }, { "epoch": 0.2472621553169479, "grad_norm": 0.26066574454307556, "learning_rate": 4.676859626333287e-05, "loss": 0.1955, "step": 13863 }, { "epoch": 0.24727999143866158, "grad_norm": 0.2394704967737198, "learning_rate": 4.6767830828977076e-05, "loss": 0.2172, "step": 13864 }, { "epoch": 0.24729782756037527, "grad_norm": 0.22838497161865234, "learning_rate": 4.676706531024166e-05, "loss": 0.1336, "step": 13865 }, { "epoch": 0.24731566368208896, "grad_norm": 0.5352074503898621, "learning_rate": 4.6766299707129576e-05, "loss": 0.2227, "step": 13866 }, { "epoch": 0.24733349980380265, "grad_norm": 0.26973477005958557, "learning_rate": 4.6765534019643796e-05, "loss": 0.1862, "step": 13867 }, { "epoch": 0.24735133592551636, "grad_norm": 0.26896733045578003, "learning_rate": 4.6764768247787294e-05, "loss": 0.239, "step": 13868 }, { "epoch": 0.24736917204723005, "grad_norm": 0.2494468241930008, "learning_rate": 4.676400239156303e-05, "loss": 0.2067, "step": 13869 }, { "epoch": 0.24738700816894374, "grad_norm": 0.33378568291664124, "learning_rate": 4.676323645097398e-05, "loss": 0.2509, "step": 13870 }, { "epoch": 0.24740484429065743, "grad_norm": 0.22868886590003967, "learning_rate": 4.67624704260231e-05, "loss": 0.1771, "step": 13871 }, { "epoch": 0.24742268041237114, "grad_norm": 0.23843562602996826, "learning_rate": 4.676170431671337e-05, "loss": 0.1774, "step": 13872 }, { "epoch": 0.24744051653408483, "grad_norm": 0.22828061878681183, "learning_rate": 4.6760938123047763e-05, "loss": 0.2106, "step": 13873 }, { "epoch": 0.24745835265579852, "grad_norm": 0.23694480955600739, "learning_rate": 4.676017184502924e-05, "loss": 0.1786, "step": 13874 }, { "epoch": 0.2474761887775122, "grad_norm": 0.2634585201740265, "learning_rate": 4.675940548266078e-05, "loss": 0.177, "step": 13875 }, { "epoch": 0.24749402489922592, "grad_norm": 0.23659314215183258, "learning_rate": 4.675863903594534e-05, "loss": 0.1845, "step": 13876 }, { "epoch": 0.2475118610209396, "grad_norm": 0.42080968618392944, "learning_rate": 4.6757872504885906e-05, "loss": 0.1954, "step": 13877 }, { "epoch": 0.2475296971426533, "grad_norm": 0.24054062366485596, "learning_rate": 4.675710588948544e-05, "loss": 0.1877, "step": 13878 }, { "epoch": 0.247547533264367, "grad_norm": 0.3301849663257599, "learning_rate": 4.6756339189746925e-05, "loss": 0.2279, "step": 13879 }, { "epoch": 0.2475653693860807, "grad_norm": 0.2731056213378906, "learning_rate": 4.675557240567332e-05, "loss": 0.1846, "step": 13880 }, { "epoch": 0.2475832055077944, "grad_norm": 0.26883235573768616, "learning_rate": 4.67548055372676e-05, "loss": 0.1695, "step": 13881 }, { "epoch": 0.24760104162950808, "grad_norm": 0.2931753695011139, "learning_rate": 4.675403858453274e-05, "loss": 0.192, "step": 13882 }, { "epoch": 0.24761887775122177, "grad_norm": 0.2757262885570526, "learning_rate": 4.675327154747171e-05, "loss": 0.192, "step": 13883 }, { "epoch": 0.24763671387293548, "grad_norm": 0.2826712131500244, "learning_rate": 4.67525044260875e-05, "loss": 0.2447, "step": 13884 }, { "epoch": 0.24765454999464917, "grad_norm": 0.2587839663028717, "learning_rate": 4.6751737220383054e-05, "loss": 0.1586, "step": 13885 }, { "epoch": 0.24767238611636286, "grad_norm": 0.3076946437358856, "learning_rate": 4.675096993036137e-05, "loss": 0.2106, "step": 13886 }, { "epoch": 0.24769022223807655, "grad_norm": 0.30966705083847046, "learning_rate": 4.675020255602541e-05, "loss": 0.1426, "step": 13887 }, { "epoch": 0.24770805835979023, "grad_norm": 0.2736508548259735, "learning_rate": 4.674943509737815e-05, "loss": 0.1293, "step": 13888 }, { "epoch": 0.24772589448150395, "grad_norm": 0.2839398682117462, "learning_rate": 4.6748667554422575e-05, "loss": 0.1608, "step": 13889 }, { "epoch": 0.24774373060321764, "grad_norm": 0.2591922879219055, "learning_rate": 4.674789992716165e-05, "loss": 0.2201, "step": 13890 }, { "epoch": 0.24776156672493133, "grad_norm": 0.3511675000190735, "learning_rate": 4.674713221559836e-05, "loss": 0.2231, "step": 13891 }, { "epoch": 0.247779402846645, "grad_norm": 0.21819651126861572, "learning_rate": 4.674636441973566e-05, "loss": 0.1782, "step": 13892 }, { "epoch": 0.24779723896835873, "grad_norm": 0.28787264227867126, "learning_rate": 4.6745596539576546e-05, "loss": 0.208, "step": 13893 }, { "epoch": 0.24781507509007242, "grad_norm": 0.2524046301841736, "learning_rate": 4.6744828575124e-05, "loss": 0.1806, "step": 13894 }, { "epoch": 0.2478329112117861, "grad_norm": 0.27405714988708496, "learning_rate": 4.674406052638097e-05, "loss": 0.2005, "step": 13895 }, { "epoch": 0.2478507473334998, "grad_norm": 0.2722206115722656, "learning_rate": 4.674329239335046e-05, "loss": 0.2047, "step": 13896 }, { "epoch": 0.2478685834552135, "grad_norm": 0.200779527425766, "learning_rate": 4.674252417603544e-05, "loss": 0.1557, "step": 13897 }, { "epoch": 0.2478864195769272, "grad_norm": 0.32716962695121765, "learning_rate": 4.6741755874438885e-05, "loss": 0.2071, "step": 13898 }, { "epoch": 0.24790425569864089, "grad_norm": 0.3389391601085663, "learning_rate": 4.674098748856378e-05, "loss": 0.1776, "step": 13899 }, { "epoch": 0.24792209182035457, "grad_norm": 0.2137138843536377, "learning_rate": 4.674021901841309e-05, "loss": 0.176, "step": 13900 }, { "epoch": 0.2479399279420683, "grad_norm": 0.29083147644996643, "learning_rate": 4.673945046398981e-05, "loss": 0.1618, "step": 13901 }, { "epoch": 0.24795776406378198, "grad_norm": 0.3967060446739197, "learning_rate": 4.6738681825296904e-05, "loss": 0.2201, "step": 13902 }, { "epoch": 0.24797560018549566, "grad_norm": 0.29259899258613586, "learning_rate": 4.673791310233737e-05, "loss": 0.2026, "step": 13903 }, { "epoch": 0.24799343630720935, "grad_norm": 0.3274999260902405, "learning_rate": 4.6737144295114164e-05, "loss": 0.209, "step": 13904 }, { "epoch": 0.24801127242892307, "grad_norm": 0.3241322636604309, "learning_rate": 4.673637540363028e-05, "loss": 0.1981, "step": 13905 }, { "epoch": 0.24802910855063676, "grad_norm": 0.24350115656852722, "learning_rate": 4.6735606427888705e-05, "loss": 0.198, "step": 13906 }, { "epoch": 0.24804694467235044, "grad_norm": 0.21662767231464386, "learning_rate": 4.6734837367892416e-05, "loss": 0.1729, "step": 13907 }, { "epoch": 0.24806478079406413, "grad_norm": 0.30358198285102844, "learning_rate": 4.6734068223644375e-05, "loss": 0.1966, "step": 13908 }, { "epoch": 0.24808261691577782, "grad_norm": 0.24385471642017365, "learning_rate": 4.673329899514759e-05, "loss": 0.1605, "step": 13909 }, { "epoch": 0.24810045303749154, "grad_norm": 0.24042540788650513, "learning_rate": 4.673252968240503e-05, "loss": 0.1512, "step": 13910 }, { "epoch": 0.24811828915920522, "grad_norm": 0.22144049406051636, "learning_rate": 4.673176028541968e-05, "loss": 0.193, "step": 13911 }, { "epoch": 0.2481361252809189, "grad_norm": 0.24732889235019684, "learning_rate": 4.6730990804194516e-05, "loss": 0.2004, "step": 13912 }, { "epoch": 0.2481539614026326, "grad_norm": 0.28715449571609497, "learning_rate": 4.673022123873253e-05, "loss": 0.2223, "step": 13913 }, { "epoch": 0.24817179752434632, "grad_norm": 0.2218928039073944, "learning_rate": 4.67294515890367e-05, "loss": 0.1211, "step": 13914 }, { "epoch": 0.24818963364606, "grad_norm": 0.2684513032436371, "learning_rate": 4.672868185511001e-05, "loss": 0.2213, "step": 13915 }, { "epoch": 0.2482074697677737, "grad_norm": 0.333819717168808, "learning_rate": 4.672791203695545e-05, "loss": 0.2066, "step": 13916 }, { "epoch": 0.24822530588948738, "grad_norm": 0.34282979369163513, "learning_rate": 4.672714213457599e-05, "loss": 0.1565, "step": 13917 }, { "epoch": 0.2482431420112011, "grad_norm": 0.2980656325817108, "learning_rate": 4.672637214797463e-05, "loss": 0.2044, "step": 13918 }, { "epoch": 0.24826097813291478, "grad_norm": 0.36185529828071594, "learning_rate": 4.672560207715434e-05, "loss": 0.2064, "step": 13919 }, { "epoch": 0.24827881425462847, "grad_norm": 0.27268537878990173, "learning_rate": 4.672483192211812e-05, "loss": 0.1905, "step": 13920 }, { "epoch": 0.24829665037634216, "grad_norm": 0.26096203923225403, "learning_rate": 4.672406168286894e-05, "loss": 0.1015, "step": 13921 }, { "epoch": 0.24831448649805588, "grad_norm": 0.3976256847381592, "learning_rate": 4.672329135940979e-05, "loss": 0.1928, "step": 13922 }, { "epoch": 0.24833232261976956, "grad_norm": 0.26295849680900574, "learning_rate": 4.6722520951743675e-05, "loss": 0.1487, "step": 13923 }, { "epoch": 0.24835015874148325, "grad_norm": 0.3481968939304352, "learning_rate": 4.672175045987356e-05, "loss": 0.2325, "step": 13924 }, { "epoch": 0.24836799486319694, "grad_norm": 0.37518441677093506, "learning_rate": 4.6720979883802435e-05, "loss": 0.246, "step": 13925 }, { "epoch": 0.24838583098491063, "grad_norm": 0.36704546213150024, "learning_rate": 4.672020922353329e-05, "loss": 0.2161, "step": 13926 }, { "epoch": 0.24840366710662434, "grad_norm": 0.2759122848510742, "learning_rate": 4.671943847906911e-05, "loss": 0.1594, "step": 13927 }, { "epoch": 0.24842150322833803, "grad_norm": 0.33444783091545105, "learning_rate": 4.67186676504129e-05, "loss": 0.2176, "step": 13928 }, { "epoch": 0.24843933935005172, "grad_norm": 0.2398654967546463, "learning_rate": 4.671789673756761e-05, "loss": 0.1841, "step": 13929 }, { "epoch": 0.2484571754717654, "grad_norm": 0.30182909965515137, "learning_rate": 4.671712574053626e-05, "loss": 0.1439, "step": 13930 }, { "epoch": 0.24847501159347912, "grad_norm": 0.279514878988266, "learning_rate": 4.671635465932184e-05, "loss": 0.1532, "step": 13931 }, { "epoch": 0.2484928477151928, "grad_norm": 0.28951379656791687, "learning_rate": 4.671558349392732e-05, "loss": 0.1933, "step": 13932 }, { "epoch": 0.2485106838369065, "grad_norm": 0.31557130813598633, "learning_rate": 4.671481224435569e-05, "loss": 0.21, "step": 13933 }, { "epoch": 0.2485285199586202, "grad_norm": 0.24612878262996674, "learning_rate": 4.6714040910609956e-05, "loss": 0.2274, "step": 13934 }, { "epoch": 0.2485463560803339, "grad_norm": 0.25948524475097656, "learning_rate": 4.67132694926931e-05, "loss": 0.2098, "step": 13935 }, { "epoch": 0.2485641922020476, "grad_norm": 0.24691280722618103, "learning_rate": 4.671249799060812e-05, "loss": 0.1869, "step": 13936 }, { "epoch": 0.24858202832376128, "grad_norm": 0.2153436243534088, "learning_rate": 4.6711726404357984e-05, "loss": 0.1731, "step": 13937 }, { "epoch": 0.24859986444547497, "grad_norm": 0.23904675245285034, "learning_rate": 4.671095473394571e-05, "loss": 0.193, "step": 13938 }, { "epoch": 0.24861770056718868, "grad_norm": 0.26901721954345703, "learning_rate": 4.6710182979374266e-05, "loss": 0.224, "step": 13939 }, { "epoch": 0.24863553668890237, "grad_norm": 0.23426665365695953, "learning_rate": 4.670941114064666e-05, "loss": 0.1704, "step": 13940 }, { "epoch": 0.24865337281061606, "grad_norm": 0.26688480377197266, "learning_rate": 4.670863921776588e-05, "loss": 0.1688, "step": 13941 }, { "epoch": 0.24867120893232975, "grad_norm": 0.37944281101226807, "learning_rate": 4.670786721073491e-05, "loss": 0.1455, "step": 13942 }, { "epoch": 0.24868904505404346, "grad_norm": 0.3439227342605591, "learning_rate": 4.6707095119556754e-05, "loss": 0.1205, "step": 13943 }, { "epoch": 0.24870688117575715, "grad_norm": 0.31361326575279236, "learning_rate": 4.670632294423439e-05, "loss": 0.1954, "step": 13944 }, { "epoch": 0.24872471729747084, "grad_norm": 0.2773951590061188, "learning_rate": 4.6705550684770835e-05, "loss": 0.2428, "step": 13945 }, { "epoch": 0.24874255341918453, "grad_norm": 0.2939739227294922, "learning_rate": 4.670477834116906e-05, "loss": 0.1485, "step": 13946 }, { "epoch": 0.2487603895408982, "grad_norm": 0.2697181701660156, "learning_rate": 4.6704005913432076e-05, "loss": 0.1402, "step": 13947 }, { "epoch": 0.24877822566261193, "grad_norm": 0.3270324468612671, "learning_rate": 4.6703233401562864e-05, "loss": 0.2131, "step": 13948 }, { "epoch": 0.24879606178432562, "grad_norm": 0.21917343139648438, "learning_rate": 4.6702460805564416e-05, "loss": 0.1686, "step": 13949 }, { "epoch": 0.2488138979060393, "grad_norm": 0.2303187996149063, "learning_rate": 4.6701688125439746e-05, "loss": 0.1687, "step": 13950 }, { "epoch": 0.248831734027753, "grad_norm": 0.2199353575706482, "learning_rate": 4.670091536119183e-05, "loss": 0.1237, "step": 13951 }, { "epoch": 0.2488495701494667, "grad_norm": 0.22015100717544556, "learning_rate": 4.6700142512823676e-05, "loss": 0.1531, "step": 13952 }, { "epoch": 0.2488674062711804, "grad_norm": 0.43796634674072266, "learning_rate": 4.669936958033827e-05, "loss": 0.2684, "step": 13953 }, { "epoch": 0.24888524239289408, "grad_norm": 0.2625996768474579, "learning_rate": 4.669859656373862e-05, "loss": 0.1964, "step": 13954 }, { "epoch": 0.24890307851460777, "grad_norm": 0.2555631101131439, "learning_rate": 4.669782346302771e-05, "loss": 0.2439, "step": 13955 }, { "epoch": 0.2489209146363215, "grad_norm": 0.3084341287612915, "learning_rate": 4.669705027820854e-05, "loss": 0.1401, "step": 13956 }, { "epoch": 0.24893875075803518, "grad_norm": 0.30264008045196533, "learning_rate": 4.669627700928411e-05, "loss": 0.1897, "step": 13957 }, { "epoch": 0.24895658687974886, "grad_norm": 0.18788209557533264, "learning_rate": 4.669550365625742e-05, "loss": 0.1675, "step": 13958 }, { "epoch": 0.24897442300146255, "grad_norm": 0.3613806664943695, "learning_rate": 4.669473021913146e-05, "loss": 0.1809, "step": 13959 }, { "epoch": 0.24899225912317627, "grad_norm": 0.26294445991516113, "learning_rate": 4.6693956697909236e-05, "loss": 0.1808, "step": 13960 }, { "epoch": 0.24901009524488996, "grad_norm": 0.2537631690502167, "learning_rate": 4.669318309259374e-05, "loss": 0.2106, "step": 13961 }, { "epoch": 0.24902793136660364, "grad_norm": 0.32182762026786804, "learning_rate": 4.669240940318797e-05, "loss": 0.1997, "step": 13962 }, { "epoch": 0.24904576748831733, "grad_norm": 0.304286926984787, "learning_rate": 4.669163562969494e-05, "loss": 0.231, "step": 13963 }, { "epoch": 0.24906360361003105, "grad_norm": 0.23288606107234955, "learning_rate": 4.669086177211763e-05, "loss": 0.1375, "step": 13964 }, { "epoch": 0.24908143973174474, "grad_norm": 0.2737966477870941, "learning_rate": 4.6690087830459053e-05, "loss": 0.1584, "step": 13965 }, { "epoch": 0.24909927585345842, "grad_norm": 0.28110888600349426, "learning_rate": 4.6689313804722204e-05, "loss": 0.1887, "step": 13966 }, { "epoch": 0.2491171119751721, "grad_norm": 0.3648214340209961, "learning_rate": 4.6688539694910084e-05, "loss": 0.1903, "step": 13967 }, { "epoch": 0.2491349480968858, "grad_norm": 0.3305388391017914, "learning_rate": 4.668776550102568e-05, "loss": 0.1804, "step": 13968 }, { "epoch": 0.24915278421859952, "grad_norm": 0.3478490710258484, "learning_rate": 4.668699122307202e-05, "loss": 0.1593, "step": 13969 }, { "epoch": 0.2491706203403132, "grad_norm": 0.35136494040489197, "learning_rate": 4.668621686105209e-05, "loss": 0.2062, "step": 13970 }, { "epoch": 0.2491884564620269, "grad_norm": 0.2550809681415558, "learning_rate": 4.6685442414968895e-05, "loss": 0.2109, "step": 13971 }, { "epoch": 0.24920629258374058, "grad_norm": 0.3216593563556671, "learning_rate": 4.668466788482543e-05, "loss": 0.1921, "step": 13972 }, { "epoch": 0.2492241287054543, "grad_norm": 0.23060789704322815, "learning_rate": 4.66838932706247e-05, "loss": 0.1681, "step": 13973 }, { "epoch": 0.24924196482716798, "grad_norm": 0.4060821235179901, "learning_rate": 4.668311857236972e-05, "loss": 0.1633, "step": 13974 }, { "epoch": 0.24925980094888167, "grad_norm": 0.253292441368103, "learning_rate": 4.668234379006348e-05, "loss": 0.1831, "step": 13975 }, { "epoch": 0.24927763707059536, "grad_norm": 0.25369203090667725, "learning_rate": 4.668156892370898e-05, "loss": 0.1615, "step": 13976 }, { "epoch": 0.24929547319230907, "grad_norm": 0.31818199157714844, "learning_rate": 4.668079397330923e-05, "loss": 0.21, "step": 13977 }, { "epoch": 0.24931330931402276, "grad_norm": 0.3466501235961914, "learning_rate": 4.6680018938867246e-05, "loss": 0.2102, "step": 13978 }, { "epoch": 0.24933114543573645, "grad_norm": 0.2604408264160156, "learning_rate": 4.667924382038601e-05, "loss": 0.1582, "step": 13979 }, { "epoch": 0.24934898155745014, "grad_norm": 0.24600160121917725, "learning_rate": 4.6678468617868545e-05, "loss": 0.1945, "step": 13980 }, { "epoch": 0.24936681767916385, "grad_norm": 0.22015896439552307, "learning_rate": 4.667769333131784e-05, "loss": 0.1889, "step": 13981 }, { "epoch": 0.24938465380087754, "grad_norm": 0.20485515892505646, "learning_rate": 4.667691796073691e-05, "loss": 0.1903, "step": 13982 }, { "epoch": 0.24940248992259123, "grad_norm": 0.37513837218284607, "learning_rate": 4.667614250612876e-05, "loss": 0.2642, "step": 13983 }, { "epoch": 0.24942032604430492, "grad_norm": 0.23538362979888916, "learning_rate": 4.6675366967496405e-05, "loss": 0.1712, "step": 13984 }, { "epoch": 0.24943816216601863, "grad_norm": 0.2649373710155487, "learning_rate": 4.6674591344842824e-05, "loss": 0.2313, "step": 13985 }, { "epoch": 0.24945599828773232, "grad_norm": 0.2700726389884949, "learning_rate": 4.667381563817105e-05, "loss": 0.1567, "step": 13986 }, { "epoch": 0.249473834409446, "grad_norm": 0.3040487468242645, "learning_rate": 4.667303984748408e-05, "loss": 0.1894, "step": 13987 }, { "epoch": 0.2494916705311597, "grad_norm": 0.26262998580932617, "learning_rate": 4.6672263972784925e-05, "loss": 0.1648, "step": 13988 }, { "epoch": 0.2495095066528734, "grad_norm": 0.3377794623374939, "learning_rate": 4.667148801407658e-05, "loss": 0.2279, "step": 13989 }, { "epoch": 0.2495273427745871, "grad_norm": 0.3334151804447174, "learning_rate": 4.667071197136207e-05, "loss": 0.2328, "step": 13990 }, { "epoch": 0.2495451788963008, "grad_norm": 0.30604588985443115, "learning_rate": 4.6669935844644397e-05, "loss": 0.158, "step": 13991 }, { "epoch": 0.24956301501801448, "grad_norm": 0.28032323718070984, "learning_rate": 4.6669159633926564e-05, "loss": 0.1416, "step": 13992 }, { "epoch": 0.24958085113972817, "grad_norm": 0.264448344707489, "learning_rate": 4.6668383339211585e-05, "loss": 0.1688, "step": 13993 }, { "epoch": 0.24959868726144188, "grad_norm": 0.29496899247169495, "learning_rate": 4.6667606960502474e-05, "loss": 0.1994, "step": 13994 }, { "epoch": 0.24961652338315557, "grad_norm": 0.27877846360206604, "learning_rate": 4.6666830497802226e-05, "loss": 0.166, "step": 13995 }, { "epoch": 0.24963435950486926, "grad_norm": 0.2546389698982239, "learning_rate": 4.6666053951113864e-05, "loss": 0.2118, "step": 13996 }, { "epoch": 0.24965219562658295, "grad_norm": 0.28670454025268555, "learning_rate": 4.666527732044039e-05, "loss": 0.211, "step": 13997 }, { "epoch": 0.24967003174829666, "grad_norm": 0.3268781900405884, "learning_rate": 4.6664500605784825e-05, "loss": 0.1489, "step": 13998 }, { "epoch": 0.24968786787001035, "grad_norm": 0.2760900855064392, "learning_rate": 4.6663723807150165e-05, "loss": 0.1437, "step": 13999 }, { "epoch": 0.24970570399172404, "grad_norm": 0.29700177907943726, "learning_rate": 4.666294692453943e-05, "loss": 0.1863, "step": 14000 }, { "epoch": 0.24970570399172404, "eval_loss": 0.18124181032180786, "eval_runtime": 107.1097, "eval_samples_per_second": 9.56, "eval_steps_per_second": 1.596, "step": 14000 }, { "epoch": 0.24972354011343773, "grad_norm": 0.2769380211830139, "learning_rate": 4.6662169957955636e-05, "loss": 0.1687, "step": 14001 }, { "epoch": 0.24974137623515144, "grad_norm": 0.19689103960990906, "learning_rate": 4.666139290740179e-05, "loss": 0.1459, "step": 14002 }, { "epoch": 0.24975921235686513, "grad_norm": 0.27558207511901855, "learning_rate": 4.66606157728809e-05, "loss": 0.179, "step": 14003 }, { "epoch": 0.24977704847857882, "grad_norm": 0.3142256438732147, "learning_rate": 4.665983855439598e-05, "loss": 0.2099, "step": 14004 }, { "epoch": 0.2497948846002925, "grad_norm": 0.25825726985931396, "learning_rate": 4.665906125195004e-05, "loss": 0.1936, "step": 14005 }, { "epoch": 0.24981272072200622, "grad_norm": 0.32794031500816345, "learning_rate": 4.665828386554611e-05, "loss": 0.2615, "step": 14006 }, { "epoch": 0.2498305568437199, "grad_norm": 0.2211904674768448, "learning_rate": 4.665750639518719e-05, "loss": 0.1725, "step": 14007 }, { "epoch": 0.2498483929654336, "grad_norm": 0.3203650116920471, "learning_rate": 4.6656728840876285e-05, "loss": 0.2209, "step": 14008 }, { "epoch": 0.24986622908714728, "grad_norm": 0.2985434830188751, "learning_rate": 4.665595120261643e-05, "loss": 0.2001, "step": 14009 }, { "epoch": 0.24988406520886097, "grad_norm": 0.2967106103897095, "learning_rate": 4.665517348041062e-05, "loss": 0.2341, "step": 14010 }, { "epoch": 0.2499019013305747, "grad_norm": 0.278253972530365, "learning_rate": 4.665439567426188e-05, "loss": 0.2365, "step": 14011 }, { "epoch": 0.24991973745228838, "grad_norm": 0.23545487225055695, "learning_rate": 4.6653617784173226e-05, "loss": 0.1705, "step": 14012 }, { "epoch": 0.24993757357400206, "grad_norm": 0.26917949318885803, "learning_rate": 4.6652839810147666e-05, "loss": 0.2017, "step": 14013 }, { "epoch": 0.24995540969571575, "grad_norm": 0.28392860293388367, "learning_rate": 4.665206175218822e-05, "loss": 0.1958, "step": 14014 }, { "epoch": 0.24997324581742947, "grad_norm": 0.3414732813835144, "learning_rate": 4.6651283610297916e-05, "loss": 0.2426, "step": 14015 }, { "epoch": 0.24999108193914316, "grad_norm": 0.2396143674850464, "learning_rate": 4.665050538447975e-05, "loss": 0.1976, "step": 14016 }, { "epoch": 0.25000891806085684, "grad_norm": 0.30143946409225464, "learning_rate": 4.664972707473674e-05, "loss": 0.1727, "step": 14017 }, { "epoch": 0.25002675418257053, "grad_norm": 0.30427926778793335, "learning_rate": 4.664894868107192e-05, "loss": 0.212, "step": 14018 }, { "epoch": 0.2500445903042842, "grad_norm": 0.25081542134284973, "learning_rate": 4.66481702034883e-05, "loss": 0.1948, "step": 14019 }, { "epoch": 0.2500624264259979, "grad_norm": 0.28087326884269714, "learning_rate": 4.664739164198889e-05, "loss": 0.1889, "step": 14020 }, { "epoch": 0.25008026254771165, "grad_norm": 0.30892935395240784, "learning_rate": 4.664661299657671e-05, "loss": 0.1885, "step": 14021 }, { "epoch": 0.25009809866942534, "grad_norm": 0.2081720232963562, "learning_rate": 4.6645834267254785e-05, "loss": 0.1487, "step": 14022 }, { "epoch": 0.250115934791139, "grad_norm": 0.2211052030324936, "learning_rate": 4.6645055454026135e-05, "loss": 0.1445, "step": 14023 }, { "epoch": 0.2501337709128527, "grad_norm": 0.24615950882434845, "learning_rate": 4.664427655689376e-05, "loss": 0.2184, "step": 14024 }, { "epoch": 0.2501516070345664, "grad_norm": 0.24498583376407623, "learning_rate": 4.664349757586071e-05, "loss": 0.2185, "step": 14025 }, { "epoch": 0.2501694431562801, "grad_norm": 0.32786017656326294, "learning_rate": 4.664271851092998e-05, "loss": 0.2017, "step": 14026 }, { "epoch": 0.2501872792779938, "grad_norm": 0.22174333035945892, "learning_rate": 4.66419393621046e-05, "loss": 0.1657, "step": 14027 }, { "epoch": 0.25020511539970747, "grad_norm": 0.3068290948867798, "learning_rate": 4.664116012938758e-05, "loss": 0.1638, "step": 14028 }, { "epoch": 0.25022295152142116, "grad_norm": 0.26966479420661926, "learning_rate": 4.664038081278196e-05, "loss": 0.2089, "step": 14029 }, { "epoch": 0.2502407876431349, "grad_norm": 0.29627472162246704, "learning_rate": 4.663960141229075e-05, "loss": 0.2112, "step": 14030 }, { "epoch": 0.2502586237648486, "grad_norm": 0.3220474421977997, "learning_rate": 4.6638821927916966e-05, "loss": 0.2046, "step": 14031 }, { "epoch": 0.2502764598865623, "grad_norm": 0.30703917145729065, "learning_rate": 4.663804235966363e-05, "loss": 0.1178, "step": 14032 }, { "epoch": 0.25029429600827596, "grad_norm": 0.24785824120044708, "learning_rate": 4.663726270753377e-05, "loss": 0.1493, "step": 14033 }, { "epoch": 0.25031213212998965, "grad_norm": 0.27839118242263794, "learning_rate": 4.663648297153041e-05, "loss": 0.2109, "step": 14034 }, { "epoch": 0.25032996825170334, "grad_norm": 0.2667781412601471, "learning_rate": 4.663570315165657e-05, "loss": 0.1948, "step": 14035 }, { "epoch": 0.250347804373417, "grad_norm": 0.29322579503059387, "learning_rate": 4.663492324791527e-05, "loss": 0.2001, "step": 14036 }, { "epoch": 0.2503656404951307, "grad_norm": 0.206663116812706, "learning_rate": 4.6634143260309534e-05, "loss": 0.1887, "step": 14037 }, { "epoch": 0.25038347661684446, "grad_norm": 0.19180206954479218, "learning_rate": 4.663336318884239e-05, "loss": 0.1769, "step": 14038 }, { "epoch": 0.25040131273855815, "grad_norm": 0.31328073143959045, "learning_rate": 4.6632583033516855e-05, "loss": 0.2141, "step": 14039 }, { "epoch": 0.25041914886027183, "grad_norm": 0.2851082682609558, "learning_rate": 4.663180279433595e-05, "loss": 0.156, "step": 14040 }, { "epoch": 0.2504369849819855, "grad_norm": 0.34165433049201965, "learning_rate": 4.663102247130272e-05, "loss": 0.2319, "step": 14041 }, { "epoch": 0.2504548211036992, "grad_norm": 0.25247126817703247, "learning_rate": 4.663024206442017e-05, "loss": 0.2174, "step": 14042 }, { "epoch": 0.2504726572254129, "grad_norm": 0.31844043731689453, "learning_rate": 4.662946157369133e-05, "loss": 0.2612, "step": 14043 }, { "epoch": 0.2504904933471266, "grad_norm": 0.321954607963562, "learning_rate": 4.6628680999119226e-05, "loss": 0.2259, "step": 14044 }, { "epoch": 0.2505083294688403, "grad_norm": 0.23858575522899628, "learning_rate": 4.662790034070689e-05, "loss": 0.2276, "step": 14045 }, { "epoch": 0.250526165590554, "grad_norm": 0.43849530816078186, "learning_rate": 4.662711959845733e-05, "loss": 0.2025, "step": 14046 }, { "epoch": 0.2505440017122677, "grad_norm": 0.350673109292984, "learning_rate": 4.662633877237359e-05, "loss": 0.2841, "step": 14047 }, { "epoch": 0.2505618378339814, "grad_norm": 0.2753816843032837, "learning_rate": 4.6625557862458697e-05, "loss": 0.2114, "step": 14048 }, { "epoch": 0.2505796739556951, "grad_norm": 0.26957908272743225, "learning_rate": 4.662477686871567e-05, "loss": 0.229, "step": 14049 }, { "epoch": 0.25059751007740877, "grad_norm": 0.34246590733528137, "learning_rate": 4.6623995791147535e-05, "loss": 0.2168, "step": 14050 }, { "epoch": 0.25061534619912246, "grad_norm": 0.2836030423641205, "learning_rate": 4.662321462975733e-05, "loss": 0.1862, "step": 14051 }, { "epoch": 0.25063318232083615, "grad_norm": 0.24299070239067078, "learning_rate": 4.662243338454807e-05, "loss": 0.1729, "step": 14052 }, { "epoch": 0.25065101844254983, "grad_norm": 0.24866726994514465, "learning_rate": 4.662165205552279e-05, "loss": 0.2179, "step": 14053 }, { "epoch": 0.2506688545642635, "grad_norm": 0.23637741804122925, "learning_rate": 4.6620870642684525e-05, "loss": 0.1675, "step": 14054 }, { "epoch": 0.25068669068597726, "grad_norm": 0.3448881506919861, "learning_rate": 4.6620089146036294e-05, "loss": 0.1557, "step": 14055 }, { "epoch": 0.25070452680769095, "grad_norm": 0.2475176602602005, "learning_rate": 4.6619307565581126e-05, "loss": 0.1809, "step": 14056 }, { "epoch": 0.25072236292940464, "grad_norm": 0.3104894459247589, "learning_rate": 4.661852590132206e-05, "loss": 0.1852, "step": 14057 }, { "epoch": 0.25074019905111833, "grad_norm": 0.23213335871696472, "learning_rate": 4.661774415326212e-05, "loss": 0.1819, "step": 14058 }, { "epoch": 0.250758035172832, "grad_norm": 0.29872894287109375, "learning_rate": 4.661696232140434e-05, "loss": 0.1499, "step": 14059 }, { "epoch": 0.2507758712945457, "grad_norm": 0.4917716085910797, "learning_rate": 4.661618040575174e-05, "loss": 0.18, "step": 14060 }, { "epoch": 0.2507937074162594, "grad_norm": 0.2520187497138977, "learning_rate": 4.661539840630736e-05, "loss": 0.1463, "step": 14061 }, { "epoch": 0.2508115435379731, "grad_norm": 0.3358801305294037, "learning_rate": 4.661461632307424e-05, "loss": 0.2269, "step": 14062 }, { "epoch": 0.2508293796596868, "grad_norm": 0.29628220200538635, "learning_rate": 4.6613834156055396e-05, "loss": 0.2343, "step": 14063 }, { "epoch": 0.2508472157814005, "grad_norm": 0.22381003201007843, "learning_rate": 4.661305190525387e-05, "loss": 0.157, "step": 14064 }, { "epoch": 0.2508650519031142, "grad_norm": 0.2660071551799774, "learning_rate": 4.661226957067268e-05, "loss": 0.1248, "step": 14065 }, { "epoch": 0.2508828880248279, "grad_norm": 0.28703537583351135, "learning_rate": 4.661148715231487e-05, "loss": 0.1663, "step": 14066 }, { "epoch": 0.2509007241465416, "grad_norm": 2.0280601978302, "learning_rate": 4.661070465018348e-05, "loss": 0.183, "step": 14067 }, { "epoch": 0.25091856026825526, "grad_norm": 0.25482022762298584, "learning_rate": 4.660992206428153e-05, "loss": 0.1899, "step": 14068 }, { "epoch": 0.25093639638996895, "grad_norm": 0.27000340819358826, "learning_rate": 4.660913939461206e-05, "loss": 0.2151, "step": 14069 }, { "epoch": 0.25095423251168264, "grad_norm": 0.26275894045829773, "learning_rate": 4.6608356641178095e-05, "loss": 0.2011, "step": 14070 }, { "epoch": 0.25097206863339633, "grad_norm": 0.3071376383304596, "learning_rate": 4.6607573803982684e-05, "loss": 0.1868, "step": 14071 }, { "epoch": 0.25098990475511007, "grad_norm": 0.24079711735248566, "learning_rate": 4.660679088302885e-05, "loss": 0.1954, "step": 14072 }, { "epoch": 0.25100774087682376, "grad_norm": 0.3508301079273224, "learning_rate": 4.6606007878319634e-05, "loss": 0.2373, "step": 14073 }, { "epoch": 0.25102557699853745, "grad_norm": 0.2862575650215149, "learning_rate": 4.660522478985807e-05, "loss": 0.2352, "step": 14074 }, { "epoch": 0.25104341312025114, "grad_norm": 0.295369029045105, "learning_rate": 4.6604441617647185e-05, "loss": 0.2459, "step": 14075 }, { "epoch": 0.2510612492419648, "grad_norm": 0.22300460934638977, "learning_rate": 4.660365836169003e-05, "loss": 0.1982, "step": 14076 }, { "epoch": 0.2510790853636785, "grad_norm": 0.3116849958896637, "learning_rate": 4.660287502198963e-05, "loss": 0.1929, "step": 14077 }, { "epoch": 0.2510969214853922, "grad_norm": 0.2994225323200226, "learning_rate": 4.660209159854902e-05, "loss": 0.2041, "step": 14078 }, { "epoch": 0.2511147576071059, "grad_norm": 0.2952130436897278, "learning_rate": 4.660130809137125e-05, "loss": 0.1701, "step": 14079 }, { "epoch": 0.25113259372881963, "grad_norm": 0.18220709264278412, "learning_rate": 4.6600524500459355e-05, "loss": 0.1484, "step": 14080 }, { "epoch": 0.2511504298505333, "grad_norm": 0.24857710301876068, "learning_rate": 4.6599740825816354e-05, "loss": 0.2074, "step": 14081 }, { "epoch": 0.251168265972247, "grad_norm": 0.25650152564048767, "learning_rate": 4.6598957067445305e-05, "loss": 0.1814, "step": 14082 }, { "epoch": 0.2511861020939607, "grad_norm": 0.20584620535373688, "learning_rate": 4.659817322534924e-05, "loss": 0.1903, "step": 14083 }, { "epoch": 0.2512039382156744, "grad_norm": 0.2543705403804779, "learning_rate": 4.659738929953119e-05, "loss": 0.2189, "step": 14084 }, { "epoch": 0.25122177433738807, "grad_norm": 0.28457480669021606, "learning_rate": 4.65966052899942e-05, "loss": 0.2304, "step": 14085 }, { "epoch": 0.25123961045910176, "grad_norm": 0.23300664126873016, "learning_rate": 4.659582119674131e-05, "loss": 0.1641, "step": 14086 }, { "epoch": 0.25125744658081545, "grad_norm": 0.271808385848999, "learning_rate": 4.6595037019775554e-05, "loss": 0.1733, "step": 14087 }, { "epoch": 0.25127528270252913, "grad_norm": 0.44355881214141846, "learning_rate": 4.6594252759099974e-05, "loss": 0.1575, "step": 14088 }, { "epoch": 0.2512931188242429, "grad_norm": 0.28535714745521545, "learning_rate": 4.6593468414717624e-05, "loss": 0.1157, "step": 14089 }, { "epoch": 0.25131095494595657, "grad_norm": 0.32193076610565186, "learning_rate": 4.6592683986631524e-05, "loss": 0.1914, "step": 14090 }, { "epoch": 0.25132879106767025, "grad_norm": 0.25828805565834045, "learning_rate": 4.6591899474844726e-05, "loss": 0.1729, "step": 14091 }, { "epoch": 0.25134662718938394, "grad_norm": 0.32732248306274414, "learning_rate": 4.6591114879360265e-05, "loss": 0.2501, "step": 14092 }, { "epoch": 0.25136446331109763, "grad_norm": 0.21630723774433136, "learning_rate": 4.659033020018119e-05, "loss": 0.1578, "step": 14093 }, { "epoch": 0.2513822994328113, "grad_norm": 0.23651619255542755, "learning_rate": 4.6589545437310535e-05, "loss": 0.1892, "step": 14094 }, { "epoch": 0.251400135554525, "grad_norm": 0.20079255104064941, "learning_rate": 4.6588760590751346e-05, "loss": 0.1582, "step": 14095 }, { "epoch": 0.2514179716762387, "grad_norm": 0.2668065130710602, "learning_rate": 4.658797566050666e-05, "loss": 0.1923, "step": 14096 }, { "epoch": 0.25143580779795244, "grad_norm": 0.28694504499435425, "learning_rate": 4.658719064657952e-05, "loss": 0.207, "step": 14097 }, { "epoch": 0.2514536439196661, "grad_norm": 0.23585495352745056, "learning_rate": 4.658640554897299e-05, "loss": 0.1879, "step": 14098 }, { "epoch": 0.2514714800413798, "grad_norm": 0.30941495299339294, "learning_rate": 4.658562036769009e-05, "loss": 0.1921, "step": 14099 }, { "epoch": 0.2514893161630935, "grad_norm": 0.284712553024292, "learning_rate": 4.658483510273386e-05, "loss": 0.216, "step": 14100 }, { "epoch": 0.2515071522848072, "grad_norm": 0.217615008354187, "learning_rate": 4.658404975410736e-05, "loss": 0.1759, "step": 14101 }, { "epoch": 0.2515249884065209, "grad_norm": 0.3259997069835663, "learning_rate": 4.6583264321813634e-05, "loss": 0.2496, "step": 14102 }, { "epoch": 0.25154282452823457, "grad_norm": 0.31865620613098145, "learning_rate": 4.658247880585572e-05, "loss": 0.1605, "step": 14103 }, { "epoch": 0.25156066064994825, "grad_norm": 0.23795956373214722, "learning_rate": 4.6581693206236655e-05, "loss": 0.1717, "step": 14104 }, { "epoch": 0.251578496771662, "grad_norm": 0.27040642499923706, "learning_rate": 4.65809075229595e-05, "loss": 0.1955, "step": 14105 }, { "epoch": 0.2515963328933757, "grad_norm": 0.4033662974834442, "learning_rate": 4.6580121756027296e-05, "loss": 0.2579, "step": 14106 }, { "epoch": 0.2516141690150894, "grad_norm": 0.34568798542022705, "learning_rate": 4.657933590544308e-05, "loss": 0.213, "step": 14107 }, { "epoch": 0.25163200513680306, "grad_norm": 0.35102951526641846, "learning_rate": 4.6578549971209904e-05, "loss": 0.2091, "step": 14108 }, { "epoch": 0.25164984125851675, "grad_norm": 0.44054707884788513, "learning_rate": 4.6577763953330824e-05, "loss": 0.1845, "step": 14109 }, { "epoch": 0.25166767738023044, "grad_norm": 0.18548522889614105, "learning_rate": 4.6576977851808866e-05, "loss": 0.1632, "step": 14110 }, { "epoch": 0.2516855135019441, "grad_norm": 0.28369206190109253, "learning_rate": 4.6576191666647095e-05, "loss": 0.1716, "step": 14111 }, { "epoch": 0.2517033496236578, "grad_norm": 0.24996811151504517, "learning_rate": 4.657540539784856e-05, "loss": 0.1563, "step": 14112 }, { "epoch": 0.2517211857453715, "grad_norm": 0.2981433570384979, "learning_rate": 4.657461904541629e-05, "loss": 0.1759, "step": 14113 }, { "epoch": 0.25173902186708524, "grad_norm": 0.3042716681957245, "learning_rate": 4.657383260935335e-05, "loss": 0.1837, "step": 14114 }, { "epoch": 0.25175685798879893, "grad_norm": 0.21691346168518066, "learning_rate": 4.657304608966278e-05, "loss": 0.1632, "step": 14115 }, { "epoch": 0.2517746941105126, "grad_norm": 0.22383587062358856, "learning_rate": 4.6572259486347645e-05, "loss": 0.1602, "step": 14116 }, { "epoch": 0.2517925302322263, "grad_norm": 0.24241840839385986, "learning_rate": 4.657147279941097e-05, "loss": 0.1841, "step": 14117 }, { "epoch": 0.25181036635394, "grad_norm": 0.22580492496490479, "learning_rate": 4.657068602885582e-05, "loss": 0.1798, "step": 14118 }, { "epoch": 0.2518282024756537, "grad_norm": 0.3285813629627228, "learning_rate": 4.656989917468524e-05, "loss": 0.2322, "step": 14119 }, { "epoch": 0.25184603859736737, "grad_norm": 0.28502634167671204, "learning_rate": 4.656911223690228e-05, "loss": 0.2135, "step": 14120 }, { "epoch": 0.25186387471908106, "grad_norm": 0.28449690341949463, "learning_rate": 4.656832521550999e-05, "loss": 0.1913, "step": 14121 }, { "epoch": 0.2518817108407948, "grad_norm": 0.3513454794883728, "learning_rate": 4.656753811051142e-05, "loss": 0.133, "step": 14122 }, { "epoch": 0.2518995469625085, "grad_norm": 0.34927311539649963, "learning_rate": 4.656675092190963e-05, "loss": 0.252, "step": 14123 }, { "epoch": 0.2519173830842222, "grad_norm": 0.24004197120666504, "learning_rate": 4.6565963649707664e-05, "loss": 0.1867, "step": 14124 }, { "epoch": 0.25193521920593587, "grad_norm": 0.3098396062850952, "learning_rate": 4.656517629390856e-05, "loss": 0.1596, "step": 14125 }, { "epoch": 0.25195305532764956, "grad_norm": 0.2269553691148758, "learning_rate": 4.6564388854515404e-05, "loss": 0.1666, "step": 14126 }, { "epoch": 0.25197089144936324, "grad_norm": 0.3178076446056366, "learning_rate": 4.656360133153122e-05, "loss": 0.2216, "step": 14127 }, { "epoch": 0.25198872757107693, "grad_norm": 0.2884099781513214, "learning_rate": 4.6562813724959063e-05, "loss": 0.2332, "step": 14128 }, { "epoch": 0.2520065636927906, "grad_norm": 0.2716696858406067, "learning_rate": 4.6562026034802006e-05, "loss": 0.1289, "step": 14129 }, { "epoch": 0.2520243998145043, "grad_norm": 0.24681685864925385, "learning_rate": 4.656123826106308e-05, "loss": 0.1841, "step": 14130 }, { "epoch": 0.25204223593621805, "grad_norm": 0.3825761079788208, "learning_rate": 4.656045040374535e-05, "loss": 0.1923, "step": 14131 }, { "epoch": 0.25206007205793174, "grad_norm": 0.21614547073841095, "learning_rate": 4.655966246285187e-05, "loss": 0.1855, "step": 14132 }, { "epoch": 0.2520779081796454, "grad_norm": 0.3005830943584442, "learning_rate": 4.6558874438385684e-05, "loss": 0.1698, "step": 14133 }, { "epoch": 0.2520957443013591, "grad_norm": 0.2826884984970093, "learning_rate": 4.655808633034986e-05, "loss": 0.1551, "step": 14134 }, { "epoch": 0.2521135804230728, "grad_norm": 0.24026356637477875, "learning_rate": 4.655729813874745e-05, "loss": 0.1626, "step": 14135 }, { "epoch": 0.2521314165447865, "grad_norm": 0.3220154941082001, "learning_rate": 4.6556509863581496e-05, "loss": 0.1735, "step": 14136 }, { "epoch": 0.2521492526665002, "grad_norm": 0.24585796892642975, "learning_rate": 4.655572150485508e-05, "loss": 0.1848, "step": 14137 }, { "epoch": 0.25216708878821387, "grad_norm": 0.27170753479003906, "learning_rate": 4.6554933062571226e-05, "loss": 0.1278, "step": 14138 }, { "epoch": 0.2521849249099276, "grad_norm": 0.2308691442012787, "learning_rate": 4.655414453673302e-05, "loss": 0.1817, "step": 14139 }, { "epoch": 0.2522027610316413, "grad_norm": 0.30388322472572327, "learning_rate": 4.65533559273435e-05, "loss": 0.153, "step": 14140 }, { "epoch": 0.252220597153355, "grad_norm": 0.32387250661849976, "learning_rate": 4.655256723440573e-05, "loss": 0.1787, "step": 14141 }, { "epoch": 0.2522384332750687, "grad_norm": 0.23165884613990784, "learning_rate": 4.655177845792276e-05, "loss": 0.1319, "step": 14142 }, { "epoch": 0.25225626939678236, "grad_norm": 0.22295813262462616, "learning_rate": 4.655098959789765e-05, "loss": 0.2123, "step": 14143 }, { "epoch": 0.25227410551849605, "grad_norm": 0.23038272559642792, "learning_rate": 4.6550200654333474e-05, "loss": 0.1977, "step": 14144 }, { "epoch": 0.25229194164020974, "grad_norm": 0.2907399535179138, "learning_rate": 4.6549411627233266e-05, "loss": 0.1679, "step": 14145 }, { "epoch": 0.2523097777619234, "grad_norm": 0.26177433133125305, "learning_rate": 4.6548622516600106e-05, "loss": 0.1844, "step": 14146 }, { "epoch": 0.25232761388363717, "grad_norm": 0.2839551568031311, "learning_rate": 4.6547833322437036e-05, "loss": 0.2156, "step": 14147 }, { "epoch": 0.25234545000535086, "grad_norm": 0.22857435047626495, "learning_rate": 4.6547044044747125e-05, "loss": 0.1947, "step": 14148 }, { "epoch": 0.25236328612706455, "grad_norm": 0.3288918137550354, "learning_rate": 4.6546254683533416e-05, "loss": 0.1721, "step": 14149 }, { "epoch": 0.25238112224877823, "grad_norm": 0.2554515302181244, "learning_rate": 4.6545465238799e-05, "loss": 0.2082, "step": 14150 }, { "epoch": 0.2523989583704919, "grad_norm": 0.2436758279800415, "learning_rate": 4.654467571054691e-05, "loss": 0.1507, "step": 14151 }, { "epoch": 0.2524167944922056, "grad_norm": 0.21374143660068512, "learning_rate": 4.654388609878022e-05, "loss": 0.1662, "step": 14152 }, { "epoch": 0.2524346306139193, "grad_norm": 0.2745634913444519, "learning_rate": 4.654309640350198e-05, "loss": 0.1645, "step": 14153 }, { "epoch": 0.252452466735633, "grad_norm": 0.2654988467693329, "learning_rate": 4.654230662471526e-05, "loss": 0.1494, "step": 14154 }, { "epoch": 0.2524703028573467, "grad_norm": 0.2651924788951874, "learning_rate": 4.654151676242312e-05, "loss": 0.1743, "step": 14155 }, { "epoch": 0.2524881389790604, "grad_norm": 0.2538650333881378, "learning_rate": 4.654072681662862e-05, "loss": 0.15, "step": 14156 }, { "epoch": 0.2525059751007741, "grad_norm": 0.2426513284444809, "learning_rate": 4.653993678733483e-05, "loss": 0.1764, "step": 14157 }, { "epoch": 0.2525238112224878, "grad_norm": 0.2860927879810333, "learning_rate": 4.65391466745448e-05, "loss": 0.1428, "step": 14158 }, { "epoch": 0.2525416473442015, "grad_norm": 0.3454885184764862, "learning_rate": 4.653835647826159e-05, "loss": 0.1676, "step": 14159 }, { "epoch": 0.25255948346591517, "grad_norm": 0.2563824951648712, "learning_rate": 4.653756619848828e-05, "loss": 0.211, "step": 14160 }, { "epoch": 0.25257731958762886, "grad_norm": 0.21312856674194336, "learning_rate": 4.653677583522793e-05, "loss": 0.1752, "step": 14161 }, { "epoch": 0.25259515570934254, "grad_norm": 0.4886600375175476, "learning_rate": 4.6535985388483586e-05, "loss": 0.1739, "step": 14162 }, { "epoch": 0.25261299183105623, "grad_norm": 0.2462834119796753, "learning_rate": 4.653519485825833e-05, "loss": 0.1785, "step": 14163 }, { "epoch": 0.25263082795277, "grad_norm": 0.3622860908508301, "learning_rate": 4.653440424455522e-05, "loss": 0.2362, "step": 14164 }, { "epoch": 0.25264866407448366, "grad_norm": 0.2675084173679352, "learning_rate": 4.653361354737732e-05, "loss": 0.2223, "step": 14165 }, { "epoch": 0.25266650019619735, "grad_norm": 0.23243822157382965, "learning_rate": 4.65328227667277e-05, "loss": 0.1972, "step": 14166 }, { "epoch": 0.25268433631791104, "grad_norm": 0.22301937639713287, "learning_rate": 4.653203190260942e-05, "loss": 0.1758, "step": 14167 }, { "epoch": 0.25270217243962473, "grad_norm": 0.25945422053337097, "learning_rate": 4.6531240955025544e-05, "loss": 0.1572, "step": 14168 }, { "epoch": 0.2527200085613384, "grad_norm": 0.4287818372249603, "learning_rate": 4.6530449923979146e-05, "loss": 0.1891, "step": 14169 }, { "epoch": 0.2527378446830521, "grad_norm": 0.28560107946395874, "learning_rate": 4.6529658809473285e-05, "loss": 0.191, "step": 14170 }, { "epoch": 0.2527556808047658, "grad_norm": 0.30198612809181213, "learning_rate": 4.6528867611511036e-05, "loss": 0.1361, "step": 14171 }, { "epoch": 0.2527735169264795, "grad_norm": 0.2874630093574524, "learning_rate": 4.652807633009546e-05, "loss": 0.18, "step": 14172 }, { "epoch": 0.2527913530481932, "grad_norm": 0.4322587549686432, "learning_rate": 4.652728496522962e-05, "loss": 0.1552, "step": 14173 }, { "epoch": 0.2528091891699069, "grad_norm": 0.23630595207214355, "learning_rate": 4.6526493516916584e-05, "loss": 0.1443, "step": 14174 }, { "epoch": 0.2528270252916206, "grad_norm": 0.35395070910453796, "learning_rate": 4.6525701985159433e-05, "loss": 0.1965, "step": 14175 }, { "epoch": 0.2528448614133343, "grad_norm": 0.32523298263549805, "learning_rate": 4.6524910369961216e-05, "loss": 0.2032, "step": 14176 }, { "epoch": 0.252862697535048, "grad_norm": 0.18269342184066772, "learning_rate": 4.652411867132502e-05, "loss": 0.1205, "step": 14177 }, { "epoch": 0.25288053365676166, "grad_norm": 0.24428625404834747, "learning_rate": 4.652332688925391e-05, "loss": 0.1807, "step": 14178 }, { "epoch": 0.25289836977847535, "grad_norm": 0.2848491966724396, "learning_rate": 4.652253502375095e-05, "loss": 0.1517, "step": 14179 }, { "epoch": 0.25291620590018904, "grad_norm": 0.3086334764957428, "learning_rate": 4.65217430748192e-05, "loss": 0.2058, "step": 14180 }, { "epoch": 0.2529340420219028, "grad_norm": 0.3114648461341858, "learning_rate": 4.6520951042461745e-05, "loss": 0.2672, "step": 14181 }, { "epoch": 0.25295187814361647, "grad_norm": 0.2515789568424225, "learning_rate": 4.652015892668166e-05, "loss": 0.1688, "step": 14182 }, { "epoch": 0.25296971426533016, "grad_norm": 0.21046918630599976, "learning_rate": 4.651936672748199e-05, "loss": 0.1516, "step": 14183 }, { "epoch": 0.25298755038704385, "grad_norm": 0.3786681294441223, "learning_rate": 4.651857444486583e-05, "loss": 0.1811, "step": 14184 }, { "epoch": 0.25300538650875753, "grad_norm": 0.29727619886398315, "learning_rate": 4.6517782078836244e-05, "loss": 0.2105, "step": 14185 }, { "epoch": 0.2530232226304712, "grad_norm": 0.26006007194519043, "learning_rate": 4.65169896293963e-05, "loss": 0.1873, "step": 14186 }, { "epoch": 0.2530410587521849, "grad_norm": 0.2879183888435364, "learning_rate": 4.6516197096549076e-05, "loss": 0.1966, "step": 14187 }, { "epoch": 0.2530588948738986, "grad_norm": 0.2707684636116028, "learning_rate": 4.651540448029764e-05, "loss": 0.2031, "step": 14188 }, { "epoch": 0.2530767309956123, "grad_norm": 0.26511630415916443, "learning_rate": 4.6514611780645067e-05, "loss": 0.1419, "step": 14189 }, { "epoch": 0.25309456711732603, "grad_norm": 0.38170453906059265, "learning_rate": 4.651381899759442e-05, "loss": 0.1947, "step": 14190 }, { "epoch": 0.2531124032390397, "grad_norm": 0.2836182415485382, "learning_rate": 4.6513026131148786e-05, "loss": 0.1683, "step": 14191 }, { "epoch": 0.2531302393607534, "grad_norm": 0.2315625697374344, "learning_rate": 4.651223318131123e-05, "loss": 0.1733, "step": 14192 }, { "epoch": 0.2531480754824671, "grad_norm": 0.2781814634799957, "learning_rate": 4.651144014808483e-05, "loss": 0.1726, "step": 14193 }, { "epoch": 0.2531659116041808, "grad_norm": 0.27597060799598694, "learning_rate": 4.651064703147266e-05, "loss": 0.1941, "step": 14194 }, { "epoch": 0.25318374772589447, "grad_norm": 0.2447982281446457, "learning_rate": 4.650985383147779e-05, "loss": 0.1858, "step": 14195 }, { "epoch": 0.25320158384760816, "grad_norm": 0.2601865231990814, "learning_rate": 4.65090605481033e-05, "loss": 0.1952, "step": 14196 }, { "epoch": 0.25321941996932185, "grad_norm": 0.26416856050491333, "learning_rate": 4.650826718135226e-05, "loss": 0.1723, "step": 14197 }, { "epoch": 0.2532372560910356, "grad_norm": 0.299405962228775, "learning_rate": 4.650747373122775e-05, "loss": 0.2659, "step": 14198 }, { "epoch": 0.2532550922127493, "grad_norm": 0.2368491291999817, "learning_rate": 4.650668019773283e-05, "loss": 0.18, "step": 14199 }, { "epoch": 0.25327292833446297, "grad_norm": 0.375325471162796, "learning_rate": 4.6505886580870604e-05, "loss": 0.1724, "step": 14200 }, { "epoch": 0.25329076445617665, "grad_norm": 0.22692859172821045, "learning_rate": 4.650509288064413e-05, "loss": 0.1803, "step": 14201 }, { "epoch": 0.25330860057789034, "grad_norm": 0.29675620794296265, "learning_rate": 4.650429909705649e-05, "loss": 0.2412, "step": 14202 }, { "epoch": 0.25332643669960403, "grad_norm": 0.33006274700164795, "learning_rate": 4.650350523011076e-05, "loss": 0.1499, "step": 14203 }, { "epoch": 0.2533442728213177, "grad_norm": 0.30536776781082153, "learning_rate": 4.650271127981001e-05, "loss": 0.2154, "step": 14204 }, { "epoch": 0.2533621089430314, "grad_norm": 0.3404679000377655, "learning_rate": 4.650191724615733e-05, "loss": 0.1753, "step": 14205 }, { "epoch": 0.25337994506474515, "grad_norm": 0.20536650717258453, "learning_rate": 4.650112312915579e-05, "loss": 0.1947, "step": 14206 }, { "epoch": 0.25339778118645884, "grad_norm": 0.2819133698940277, "learning_rate": 4.650032892880847e-05, "loss": 0.1656, "step": 14207 }, { "epoch": 0.2534156173081725, "grad_norm": 0.29955440759658813, "learning_rate": 4.649953464511845e-05, "loss": 0.1229, "step": 14208 }, { "epoch": 0.2534334534298862, "grad_norm": 0.293802410364151, "learning_rate": 4.6498740278088816e-05, "loss": 0.1391, "step": 14209 }, { "epoch": 0.2534512895515999, "grad_norm": 0.2326391488313675, "learning_rate": 4.6497945827722626e-05, "loss": 0.1332, "step": 14210 }, { "epoch": 0.2534691256733136, "grad_norm": 0.2509083151817322, "learning_rate": 4.6497151294022976e-05, "loss": 0.1685, "step": 14211 }, { "epoch": 0.2534869617950273, "grad_norm": 0.43427351117134094, "learning_rate": 4.6496356676992944e-05, "loss": 0.2319, "step": 14212 }, { "epoch": 0.25350479791674096, "grad_norm": 0.2087259441614151, "learning_rate": 4.649556197663562e-05, "loss": 0.1308, "step": 14213 }, { "epoch": 0.25352263403845465, "grad_norm": 0.2480999231338501, "learning_rate": 4.649476719295406e-05, "loss": 0.1598, "step": 14214 }, { "epoch": 0.2535404701601684, "grad_norm": 0.22258156538009644, "learning_rate": 4.6493972325951366e-05, "loss": 0.1759, "step": 14215 }, { "epoch": 0.2535583062818821, "grad_norm": 0.38022181391716003, "learning_rate": 4.6493177375630605e-05, "loss": 0.1472, "step": 14216 }, { "epoch": 0.2535761424035958, "grad_norm": 0.1586291342973709, "learning_rate": 4.6492382341994865e-05, "loss": 0.1396, "step": 14217 }, { "epoch": 0.25359397852530946, "grad_norm": 0.3604293465614319, "learning_rate": 4.6491587225047227e-05, "loss": 0.1781, "step": 14218 }, { "epoch": 0.25361181464702315, "grad_norm": 0.28630000352859497, "learning_rate": 4.649079202479078e-05, "loss": 0.2377, "step": 14219 }, { "epoch": 0.25362965076873684, "grad_norm": 0.2291271686553955, "learning_rate": 4.64899967412286e-05, "loss": 0.1684, "step": 14220 }, { "epoch": 0.2536474868904505, "grad_norm": 0.2845189869403839, "learning_rate": 4.6489201374363766e-05, "loss": 0.1737, "step": 14221 }, { "epoch": 0.2536653230121642, "grad_norm": 0.25370433926582336, "learning_rate": 4.6488405924199364e-05, "loss": 0.2223, "step": 14222 }, { "epoch": 0.25368315913387796, "grad_norm": 0.27321720123291016, "learning_rate": 4.6487610390738487e-05, "loss": 0.1486, "step": 14223 }, { "epoch": 0.25370099525559164, "grad_norm": 0.3075837790966034, "learning_rate": 4.6486814773984204e-05, "loss": 0.2256, "step": 14224 }, { "epoch": 0.25371883137730533, "grad_norm": 0.2625052034854889, "learning_rate": 4.648601907393961e-05, "loss": 0.2179, "step": 14225 }, { "epoch": 0.253736667499019, "grad_norm": 0.25153347849845886, "learning_rate": 4.6485223290607785e-05, "loss": 0.188, "step": 14226 }, { "epoch": 0.2537545036207327, "grad_norm": 0.3153325319290161, "learning_rate": 4.648442742399181e-05, "loss": 0.2053, "step": 14227 }, { "epoch": 0.2537723397424464, "grad_norm": 0.3147667348384857, "learning_rate": 4.648363147409477e-05, "loss": 0.2391, "step": 14228 }, { "epoch": 0.2537901758641601, "grad_norm": 0.3161238729953766, "learning_rate": 4.648283544091976e-05, "loss": 0.2056, "step": 14229 }, { "epoch": 0.25380801198587377, "grad_norm": 0.22965151071548462, "learning_rate": 4.648203932446986e-05, "loss": 0.1516, "step": 14230 }, { "epoch": 0.25382584810758746, "grad_norm": 0.2387286126613617, "learning_rate": 4.6481243124748155e-05, "loss": 0.1715, "step": 14231 }, { "epoch": 0.2538436842293012, "grad_norm": 0.24368348717689514, "learning_rate": 4.648044684175773e-05, "loss": 0.1908, "step": 14232 }, { "epoch": 0.2538615203510149, "grad_norm": 0.2694433927536011, "learning_rate": 4.647965047550168e-05, "loss": 0.191, "step": 14233 }, { "epoch": 0.2538793564727286, "grad_norm": 0.2929813861846924, "learning_rate": 4.6478854025983075e-05, "loss": 0.2187, "step": 14234 }, { "epoch": 0.25389719259444227, "grad_norm": 0.2125709056854248, "learning_rate": 4.6478057493205026e-05, "loss": 0.1898, "step": 14235 }, { "epoch": 0.25391502871615595, "grad_norm": 0.36264047026634216, "learning_rate": 4.64772608771706e-05, "loss": 0.2234, "step": 14236 }, { "epoch": 0.25393286483786964, "grad_norm": 0.2551672160625458, "learning_rate": 4.647646417788289e-05, "loss": 0.1913, "step": 14237 }, { "epoch": 0.25395070095958333, "grad_norm": 0.2613697946071625, "learning_rate": 4.6475667395344994e-05, "loss": 0.169, "step": 14238 }, { "epoch": 0.253968537081297, "grad_norm": 0.4206896424293518, "learning_rate": 4.647487052955999e-05, "loss": 0.2123, "step": 14239 }, { "epoch": 0.25398637320301076, "grad_norm": 0.34507039189338684, "learning_rate": 4.647407358053097e-05, "loss": 0.2134, "step": 14240 }, { "epoch": 0.25400420932472445, "grad_norm": 0.3244677484035492, "learning_rate": 4.647327654826104e-05, "loss": 0.1939, "step": 14241 }, { "epoch": 0.25402204544643814, "grad_norm": 0.29289910197257996, "learning_rate": 4.6472479432753255e-05, "loss": 0.1963, "step": 14242 }, { "epoch": 0.2540398815681518, "grad_norm": 0.2476482391357422, "learning_rate": 4.647168223401073e-05, "loss": 0.1624, "step": 14243 }, { "epoch": 0.2540577176898655, "grad_norm": 0.24047306180000305, "learning_rate": 4.6470884952036544e-05, "loss": 0.2102, "step": 14244 }, { "epoch": 0.2540755538115792, "grad_norm": 0.33967599272727966, "learning_rate": 4.6470087586833796e-05, "loss": 0.2317, "step": 14245 }, { "epoch": 0.2540933899332929, "grad_norm": 0.24407899379730225, "learning_rate": 4.646929013840556e-05, "loss": 0.1793, "step": 14246 }, { "epoch": 0.2541112260550066, "grad_norm": 0.29077231884002686, "learning_rate": 4.646849260675496e-05, "loss": 0.1998, "step": 14247 }, { "epoch": 0.25412906217672027, "grad_norm": 0.34494513273239136, "learning_rate": 4.646769499188506e-05, "loss": 0.1575, "step": 14248 }, { "epoch": 0.254146898298434, "grad_norm": 0.6917784810066223, "learning_rate": 4.646689729379896e-05, "loss": 0.1663, "step": 14249 }, { "epoch": 0.2541647344201477, "grad_norm": 0.23537231981754303, "learning_rate": 4.646609951249975e-05, "loss": 0.1447, "step": 14250 }, { "epoch": 0.2541825705418614, "grad_norm": 0.2726292908191681, "learning_rate": 4.6465301647990525e-05, "loss": 0.1618, "step": 14251 }, { "epoch": 0.2542004066635751, "grad_norm": 0.22526873648166656, "learning_rate": 4.6464503700274376e-05, "loss": 0.2046, "step": 14252 }, { "epoch": 0.25421824278528876, "grad_norm": 0.23197723925113678, "learning_rate": 4.64637056693544e-05, "loss": 0.1549, "step": 14253 }, { "epoch": 0.25423607890700245, "grad_norm": 0.27988550066947937, "learning_rate": 4.646290755523368e-05, "loss": 0.1789, "step": 14254 }, { "epoch": 0.25425391502871614, "grad_norm": 0.32018154859542847, "learning_rate": 4.646210935791533e-05, "loss": 0.2266, "step": 14255 }, { "epoch": 0.2542717511504298, "grad_norm": 0.24952074885368347, "learning_rate": 4.646131107740242e-05, "loss": 0.1817, "step": 14256 }, { "epoch": 0.25428958727214357, "grad_norm": 0.31356120109558105, "learning_rate": 4.6460512713698055e-05, "loss": 0.1792, "step": 14257 }, { "epoch": 0.25430742339385726, "grad_norm": 0.37971311807632446, "learning_rate": 4.6459714266805346e-05, "loss": 0.2357, "step": 14258 }, { "epoch": 0.25432525951557095, "grad_norm": 0.3118349015712738, "learning_rate": 4.645891573672736e-05, "loss": 0.2425, "step": 14259 }, { "epoch": 0.25434309563728463, "grad_norm": 0.25742360949516296, "learning_rate": 4.64581171234672e-05, "loss": 0.195, "step": 14260 }, { "epoch": 0.2543609317589983, "grad_norm": 0.2648634910583496, "learning_rate": 4.6457318427027977e-05, "loss": 0.1821, "step": 14261 }, { "epoch": 0.254378767880712, "grad_norm": 0.2841801047325134, "learning_rate": 4.645651964741277e-05, "loss": 0.1772, "step": 14262 }, { "epoch": 0.2543966040024257, "grad_norm": 0.41761597990989685, "learning_rate": 4.645572078462469e-05, "loss": 0.1754, "step": 14263 }, { "epoch": 0.2544144401241394, "grad_norm": 0.22964833676815033, "learning_rate": 4.645492183866682e-05, "loss": 0.1489, "step": 14264 }, { "epoch": 0.25443227624585313, "grad_norm": 0.4118020832538605, "learning_rate": 4.645412280954226e-05, "loss": 0.2494, "step": 14265 }, { "epoch": 0.2544501123675668, "grad_norm": 0.35143670439720154, "learning_rate": 4.645332369725411e-05, "loss": 0.2322, "step": 14266 }, { "epoch": 0.2544679484892805, "grad_norm": 0.29790523648262024, "learning_rate": 4.6452524501805474e-05, "loss": 0.2103, "step": 14267 }, { "epoch": 0.2544857846109942, "grad_norm": 0.26203373074531555, "learning_rate": 4.6451725223199446e-05, "loss": 0.1729, "step": 14268 }, { "epoch": 0.2545036207327079, "grad_norm": 0.2693018317222595, "learning_rate": 4.645092586143911e-05, "loss": 0.2157, "step": 14269 }, { "epoch": 0.25452145685442157, "grad_norm": 0.2562446892261505, "learning_rate": 4.645012641652759e-05, "loss": 0.193, "step": 14270 }, { "epoch": 0.25453929297613526, "grad_norm": 0.2537136673927307, "learning_rate": 4.6449326888467956e-05, "loss": 0.1669, "step": 14271 }, { "epoch": 0.25455712909784894, "grad_norm": 0.2820917069911957, "learning_rate": 4.6448527277263335e-05, "loss": 0.2135, "step": 14272 }, { "epoch": 0.25457496521956263, "grad_norm": 0.3061208128929138, "learning_rate": 4.644772758291681e-05, "loss": 0.1796, "step": 14273 }, { "epoch": 0.2545928013412764, "grad_norm": 0.32747864723205566, "learning_rate": 4.6446927805431484e-05, "loss": 0.182, "step": 14274 }, { "epoch": 0.25461063746299006, "grad_norm": 0.34844040870666504, "learning_rate": 4.6446127944810456e-05, "loss": 0.2114, "step": 14275 }, { "epoch": 0.25462847358470375, "grad_norm": 0.3345884680747986, "learning_rate": 4.644532800105684e-05, "loss": 0.1389, "step": 14276 }, { "epoch": 0.25464630970641744, "grad_norm": 0.2675981819629669, "learning_rate": 4.644452797417371e-05, "loss": 0.184, "step": 14277 }, { "epoch": 0.2546641458281311, "grad_norm": 0.2570849657058716, "learning_rate": 4.644372786416419e-05, "loss": 0.2201, "step": 14278 }, { "epoch": 0.2546819819498448, "grad_norm": 0.8513707518577576, "learning_rate": 4.6442927671031376e-05, "loss": 0.4233, "step": 14279 }, { "epoch": 0.2546998180715585, "grad_norm": 0.23098179697990417, "learning_rate": 4.644212739477837e-05, "loss": 0.1789, "step": 14280 }, { "epoch": 0.2547176541932722, "grad_norm": 0.27836576104164124, "learning_rate": 4.6441327035408274e-05, "loss": 0.1846, "step": 14281 }, { "epoch": 0.25473549031498594, "grad_norm": 0.34817731380462646, "learning_rate": 4.644052659292418e-05, "loss": 0.2512, "step": 14282 }, { "epoch": 0.2547533264366996, "grad_norm": 0.24810580909252167, "learning_rate": 4.6439726067329205e-05, "loss": 0.1333, "step": 14283 }, { "epoch": 0.2547711625584133, "grad_norm": 0.2629035413265228, "learning_rate": 4.6438925458626445e-05, "loss": 0.1593, "step": 14284 }, { "epoch": 0.254788998680127, "grad_norm": 0.2998231053352356, "learning_rate": 4.6438124766819006e-05, "loss": 0.1861, "step": 14285 }, { "epoch": 0.2548068348018407, "grad_norm": 0.3157520294189453, "learning_rate": 4.643732399190999e-05, "loss": 0.1973, "step": 14286 }, { "epoch": 0.2548246709235544, "grad_norm": 0.30597442388534546, "learning_rate": 4.643652313390251e-05, "loss": 0.2054, "step": 14287 }, { "epoch": 0.25484250704526806, "grad_norm": 0.2914910316467285, "learning_rate": 4.643572219279965e-05, "loss": 0.1648, "step": 14288 }, { "epoch": 0.25486034316698175, "grad_norm": 0.24053938686847687, "learning_rate": 4.643492116860453e-05, "loss": 0.2136, "step": 14289 }, { "epoch": 0.25487817928869544, "grad_norm": 0.29190152883529663, "learning_rate": 4.643412006132026e-05, "loss": 0.2144, "step": 14290 }, { "epoch": 0.2548960154104092, "grad_norm": 0.3769576847553253, "learning_rate": 4.643331887094993e-05, "loss": 0.289, "step": 14291 }, { "epoch": 0.25491385153212287, "grad_norm": 0.26358842849731445, "learning_rate": 4.6432517597496654e-05, "loss": 0.1982, "step": 14292 }, { "epoch": 0.25493168765383656, "grad_norm": 0.26545488834381104, "learning_rate": 4.643171624096354e-05, "loss": 0.1866, "step": 14293 }, { "epoch": 0.25494952377555025, "grad_norm": 0.2900102138519287, "learning_rate": 4.643091480135369e-05, "loss": 0.1699, "step": 14294 }, { "epoch": 0.25496735989726393, "grad_norm": 0.33532676100730896, "learning_rate": 4.643011327867021e-05, "loss": 0.2373, "step": 14295 }, { "epoch": 0.2549851960189776, "grad_norm": 0.40710389614105225, "learning_rate": 4.6429311672916214e-05, "loss": 0.2046, "step": 14296 }, { "epoch": 0.2550030321406913, "grad_norm": 0.21490029990673065, "learning_rate": 4.642850998409481e-05, "loss": 0.1755, "step": 14297 }, { "epoch": 0.255020868262405, "grad_norm": 0.2678086459636688, "learning_rate": 4.6427708212209087e-05, "loss": 0.2155, "step": 14298 }, { "epoch": 0.25503870438411874, "grad_norm": 0.24348019063472748, "learning_rate": 4.642690635726217e-05, "loss": 0.2043, "step": 14299 }, { "epoch": 0.25505654050583243, "grad_norm": 0.35054877400398254, "learning_rate": 4.642610441925717e-05, "loss": 0.1627, "step": 14300 }, { "epoch": 0.2550743766275461, "grad_norm": 0.3422543704509735, "learning_rate": 4.642530239819718e-05, "loss": 0.2355, "step": 14301 }, { "epoch": 0.2550922127492598, "grad_norm": 0.2594767212867737, "learning_rate": 4.6424500294085315e-05, "loss": 0.1627, "step": 14302 }, { "epoch": 0.2551100488709735, "grad_norm": 0.23776432871818542, "learning_rate": 4.64236981069247e-05, "loss": 0.1653, "step": 14303 }, { "epoch": 0.2551278849926872, "grad_norm": 0.39940017461776733, "learning_rate": 4.642289583671842e-05, "loss": 0.205, "step": 14304 }, { "epoch": 0.25514572111440087, "grad_norm": 0.2671494483947754, "learning_rate": 4.6422093483469606e-05, "loss": 0.1934, "step": 14305 }, { "epoch": 0.25516355723611456, "grad_norm": 0.42209678888320923, "learning_rate": 4.642129104718135e-05, "loss": 0.1695, "step": 14306 }, { "epoch": 0.2551813933578283, "grad_norm": 0.36727413535118103, "learning_rate": 4.642048852785678e-05, "loss": 0.2219, "step": 14307 }, { "epoch": 0.255199229479542, "grad_norm": 0.21790213882923126, "learning_rate": 4.641968592549899e-05, "loss": 0.1695, "step": 14308 }, { "epoch": 0.2552170656012557, "grad_norm": 0.3148146867752075, "learning_rate": 4.64188832401111e-05, "loss": 0.2148, "step": 14309 }, { "epoch": 0.25523490172296937, "grad_norm": 0.23866064846515656, "learning_rate": 4.641808047169623e-05, "loss": 0.1901, "step": 14310 }, { "epoch": 0.25525273784468305, "grad_norm": 0.33334094285964966, "learning_rate": 4.641727762025747e-05, "loss": 0.1811, "step": 14311 }, { "epoch": 0.25527057396639674, "grad_norm": 0.385278582572937, "learning_rate": 4.641647468579795e-05, "loss": 0.2473, "step": 14312 }, { "epoch": 0.25528841008811043, "grad_norm": 0.1848461627960205, "learning_rate": 4.6415671668320784e-05, "loss": 0.1515, "step": 14313 }, { "epoch": 0.2553062462098241, "grad_norm": 0.29272663593292236, "learning_rate": 4.6414868567829076e-05, "loss": 0.2557, "step": 14314 }, { "epoch": 0.2553240823315378, "grad_norm": 0.3228347599506378, "learning_rate": 4.641406538432593e-05, "loss": 0.1904, "step": 14315 }, { "epoch": 0.25534191845325155, "grad_norm": 0.3674481511116028, "learning_rate": 4.641326211781448e-05, "loss": 0.2122, "step": 14316 }, { "epoch": 0.25535975457496524, "grad_norm": 0.26000264286994934, "learning_rate": 4.641245876829783e-05, "loss": 0.1987, "step": 14317 }, { "epoch": 0.2553775906966789, "grad_norm": 0.252028226852417, "learning_rate": 4.6411655335779085e-05, "loss": 0.1511, "step": 14318 }, { "epoch": 0.2553954268183926, "grad_norm": 0.23151366412639618, "learning_rate": 4.641085182026138e-05, "loss": 0.1802, "step": 14319 }, { "epoch": 0.2554132629401063, "grad_norm": 0.6375302672386169, "learning_rate": 4.6410048221747814e-05, "loss": 0.1879, "step": 14320 }, { "epoch": 0.25543109906182, "grad_norm": 0.2893012762069702, "learning_rate": 4.6409244540241507e-05, "loss": 0.2059, "step": 14321 }, { "epoch": 0.2554489351835337, "grad_norm": 0.2172849476337433, "learning_rate": 4.640844077574557e-05, "loss": 0.1663, "step": 14322 }, { "epoch": 0.25546677130524736, "grad_norm": 0.3070853650569916, "learning_rate": 4.640763692826312e-05, "loss": 0.1468, "step": 14323 }, { "epoch": 0.2554846074269611, "grad_norm": 0.33351191878318787, "learning_rate": 4.6406832997797275e-05, "loss": 0.2312, "step": 14324 }, { "epoch": 0.2555024435486748, "grad_norm": 0.2882409691810608, "learning_rate": 4.640602898435116e-05, "loss": 0.1617, "step": 14325 }, { "epoch": 0.2555202796703885, "grad_norm": 0.29604238271713257, "learning_rate": 4.640522488792788e-05, "loss": 0.2165, "step": 14326 }, { "epoch": 0.25553811579210217, "grad_norm": 0.9195283055305481, "learning_rate": 4.640442070853056e-05, "loss": 0.1609, "step": 14327 }, { "epoch": 0.25555595191381586, "grad_norm": 0.3302413821220398, "learning_rate": 4.64036164461623e-05, "loss": 0.197, "step": 14328 }, { "epoch": 0.25557378803552955, "grad_norm": 0.2042795717716217, "learning_rate": 4.6402812100826243e-05, "loss": 0.145, "step": 14329 }, { "epoch": 0.25559162415724324, "grad_norm": 0.27050572633743286, "learning_rate": 4.640200767252549e-05, "loss": 0.229, "step": 14330 }, { "epoch": 0.2556094602789569, "grad_norm": 0.2793087661266327, "learning_rate": 4.640120316126316e-05, "loss": 0.1239, "step": 14331 }, { "epoch": 0.2556272964006706, "grad_norm": 0.19903309643268585, "learning_rate": 4.640039856704238e-05, "loss": 0.1472, "step": 14332 }, { "epoch": 0.25564513252238436, "grad_norm": 0.26694461703300476, "learning_rate": 4.6399593889866254e-05, "loss": 0.1934, "step": 14333 }, { "epoch": 0.25566296864409804, "grad_norm": 0.2252333164215088, "learning_rate": 4.639878912973792e-05, "loss": 0.1846, "step": 14334 }, { "epoch": 0.25568080476581173, "grad_norm": 0.24357199668884277, "learning_rate": 4.639798428666049e-05, "loss": 0.1945, "step": 14335 }, { "epoch": 0.2556986408875254, "grad_norm": 0.26161786913871765, "learning_rate": 4.639717936063707e-05, "loss": 0.2121, "step": 14336 }, { "epoch": 0.2557164770092391, "grad_norm": 0.4109809696674347, "learning_rate": 4.6396374351670804e-05, "loss": 0.2219, "step": 14337 }, { "epoch": 0.2557343131309528, "grad_norm": 0.26998665928840637, "learning_rate": 4.63955692597648e-05, "loss": 0.221, "step": 14338 }, { "epoch": 0.2557521492526665, "grad_norm": 0.18102248013019562, "learning_rate": 4.639476408492217e-05, "loss": 0.1586, "step": 14339 }, { "epoch": 0.25576998537438017, "grad_norm": 0.18463407456874847, "learning_rate": 4.639395882714606e-05, "loss": 0.1348, "step": 14340 }, { "epoch": 0.2557878214960939, "grad_norm": 0.2712778151035309, "learning_rate": 4.639315348643957e-05, "loss": 0.1969, "step": 14341 }, { "epoch": 0.2558056576178076, "grad_norm": 0.24200566112995148, "learning_rate": 4.6392348062805824e-05, "loss": 0.1914, "step": 14342 }, { "epoch": 0.2558234937395213, "grad_norm": 0.30416885018348694, "learning_rate": 4.6391542556247945e-05, "loss": 0.1787, "step": 14343 }, { "epoch": 0.255841329861235, "grad_norm": 0.3629536032676697, "learning_rate": 4.6390736966769065e-05, "loss": 0.2515, "step": 14344 }, { "epoch": 0.25585916598294867, "grad_norm": 0.3306543529033661, "learning_rate": 4.63899312943723e-05, "loss": 0.2223, "step": 14345 }, { "epoch": 0.25587700210466235, "grad_norm": 0.24814771115779877, "learning_rate": 4.6389125539060774e-05, "loss": 0.1712, "step": 14346 }, { "epoch": 0.25589483822637604, "grad_norm": 0.28987741470336914, "learning_rate": 4.638831970083761e-05, "loss": 0.2323, "step": 14347 }, { "epoch": 0.25591267434808973, "grad_norm": 0.239081472158432, "learning_rate": 4.638751377970593e-05, "loss": 0.1363, "step": 14348 }, { "epoch": 0.2559305104698034, "grad_norm": 0.24164652824401855, "learning_rate": 4.6386707775668856e-05, "loss": 0.1956, "step": 14349 }, { "epoch": 0.25594834659151716, "grad_norm": 0.2343667596578598, "learning_rate": 4.6385901688729525e-05, "loss": 0.1861, "step": 14350 }, { "epoch": 0.25596618271323085, "grad_norm": 0.27585670351982117, "learning_rate": 4.6385095518891046e-05, "loss": 0.1813, "step": 14351 }, { "epoch": 0.25598401883494454, "grad_norm": 0.29659050703048706, "learning_rate": 4.6384289266156555e-05, "loss": 0.1842, "step": 14352 }, { "epoch": 0.2560018549566582, "grad_norm": 0.2685989737510681, "learning_rate": 4.6383482930529164e-05, "loss": 0.1583, "step": 14353 }, { "epoch": 0.2560196910783719, "grad_norm": 0.292593777179718, "learning_rate": 4.6382676512012016e-05, "loss": 0.2134, "step": 14354 }, { "epoch": 0.2560375272000856, "grad_norm": 0.3552425503730774, "learning_rate": 4.638187001060823e-05, "loss": 0.2069, "step": 14355 }, { "epoch": 0.2560553633217993, "grad_norm": 0.23550207912921906, "learning_rate": 4.6381063426320926e-05, "loss": 0.151, "step": 14356 }, { "epoch": 0.256073199443513, "grad_norm": 0.31950822472572327, "learning_rate": 4.638025675915323e-05, "loss": 0.1915, "step": 14357 }, { "epoch": 0.2560910355652267, "grad_norm": 0.2374105006456375, "learning_rate": 4.637945000910828e-05, "loss": 0.1816, "step": 14358 }, { "epoch": 0.2561088716869404, "grad_norm": 0.3021567165851593, "learning_rate": 4.637864317618921e-05, "loss": 0.1837, "step": 14359 }, { "epoch": 0.2561267078086541, "grad_norm": 0.26406076550483704, "learning_rate": 4.637783626039912e-05, "loss": 0.2214, "step": 14360 }, { "epoch": 0.2561445439303678, "grad_norm": 0.31598490476608276, "learning_rate": 4.6377029261741156e-05, "loss": 0.214, "step": 14361 }, { "epoch": 0.2561623800520815, "grad_norm": 0.3069753050804138, "learning_rate": 4.637622218021844e-05, "loss": 0.1505, "step": 14362 }, { "epoch": 0.25618021617379516, "grad_norm": 0.3438122272491455, "learning_rate": 4.637541501583411e-05, "loss": 0.2167, "step": 14363 }, { "epoch": 0.25619805229550885, "grad_norm": 0.24852631986141205, "learning_rate": 4.637460776859128e-05, "loss": 0.1926, "step": 14364 }, { "epoch": 0.25621588841722254, "grad_norm": 0.20911794900894165, "learning_rate": 4.63738004384931e-05, "loss": 0.1647, "step": 14365 }, { "epoch": 0.2562337245389363, "grad_norm": 0.21022996306419373, "learning_rate": 4.637299302554268e-05, "loss": 0.1727, "step": 14366 }, { "epoch": 0.25625156066064997, "grad_norm": 0.42533010244369507, "learning_rate": 4.6372185529743155e-05, "loss": 0.1711, "step": 14367 }, { "epoch": 0.25626939678236366, "grad_norm": 0.2320965677499771, "learning_rate": 4.637137795109766e-05, "loss": 0.1642, "step": 14368 }, { "epoch": 0.25628723290407734, "grad_norm": 0.27223172783851624, "learning_rate": 4.6370570289609324e-05, "loss": 0.128, "step": 14369 }, { "epoch": 0.25630506902579103, "grad_norm": 0.23713983595371246, "learning_rate": 4.636976254528127e-05, "loss": 0.1496, "step": 14370 }, { "epoch": 0.2563229051475047, "grad_norm": 0.28813818097114563, "learning_rate": 4.6368954718116644e-05, "loss": 0.1962, "step": 14371 }, { "epoch": 0.2563407412692184, "grad_norm": 0.39149922132492065, "learning_rate": 4.6368146808118566e-05, "loss": 0.2297, "step": 14372 }, { "epoch": 0.2563585773909321, "grad_norm": 0.24483299255371094, "learning_rate": 4.6367338815290174e-05, "loss": 0.2152, "step": 14373 }, { "epoch": 0.2563764135126458, "grad_norm": 0.2821146249771118, "learning_rate": 4.636653073963459e-05, "loss": 0.2063, "step": 14374 }, { "epoch": 0.25639424963435953, "grad_norm": 0.25984206795692444, "learning_rate": 4.636572258115496e-05, "loss": 0.1158, "step": 14375 }, { "epoch": 0.2564120857560732, "grad_norm": 0.34578901529312134, "learning_rate": 4.6364914339854405e-05, "loss": 0.2325, "step": 14376 }, { "epoch": 0.2564299218777869, "grad_norm": 0.3061266839504242, "learning_rate": 4.636410601573606e-05, "loss": 0.1775, "step": 14377 }, { "epoch": 0.2564477579995006, "grad_norm": 0.2818215787410736, "learning_rate": 4.636329760880306e-05, "loss": 0.175, "step": 14378 }, { "epoch": 0.2564655941212143, "grad_norm": 0.3151984214782715, "learning_rate": 4.636248911905855e-05, "loss": 0.1827, "step": 14379 }, { "epoch": 0.25648343024292797, "grad_norm": 0.29902344942092896, "learning_rate": 4.636168054650565e-05, "loss": 0.1939, "step": 14380 }, { "epoch": 0.25650126636464166, "grad_norm": 0.30194783210754395, "learning_rate": 4.636087189114749e-05, "loss": 0.187, "step": 14381 }, { "epoch": 0.25651910248635534, "grad_norm": 0.3023608326911926, "learning_rate": 4.6360063152987224e-05, "loss": 0.2081, "step": 14382 }, { "epoch": 0.2565369386080691, "grad_norm": 0.17881596088409424, "learning_rate": 4.6359254332027967e-05, "loss": 0.176, "step": 14383 }, { "epoch": 0.2565547747297828, "grad_norm": 0.2737482190132141, "learning_rate": 4.6358445428272865e-05, "loss": 0.2216, "step": 14384 }, { "epoch": 0.25657261085149646, "grad_norm": 0.34418344497680664, "learning_rate": 4.6357636441725056e-05, "loss": 0.1315, "step": 14385 }, { "epoch": 0.25659044697321015, "grad_norm": 0.352762371301651, "learning_rate": 4.6356827372387664e-05, "loss": 0.1617, "step": 14386 }, { "epoch": 0.25660828309492384, "grad_norm": 0.2598668932914734, "learning_rate": 4.635601822026384e-05, "loss": 0.1769, "step": 14387 }, { "epoch": 0.2566261192166375, "grad_norm": 0.22755123674869537, "learning_rate": 4.6355208985356716e-05, "loss": 0.1632, "step": 14388 }, { "epoch": 0.2566439553383512, "grad_norm": 0.24085275828838348, "learning_rate": 4.635439966766942e-05, "loss": 0.1666, "step": 14389 }, { "epoch": 0.2566617914600649, "grad_norm": 0.41727307438850403, "learning_rate": 4.63535902672051e-05, "loss": 0.2247, "step": 14390 }, { "epoch": 0.2566796275817786, "grad_norm": 0.2603924572467804, "learning_rate": 4.635278078396688e-05, "loss": 0.2157, "step": 14391 }, { "epoch": 0.25669746370349233, "grad_norm": 0.24139870703220367, "learning_rate": 4.6351971217957915e-05, "loss": 0.177, "step": 14392 }, { "epoch": 0.256715299825206, "grad_norm": 0.21795283257961273, "learning_rate": 4.6351161569181323e-05, "loss": 0.2322, "step": 14393 }, { "epoch": 0.2567331359469197, "grad_norm": 0.29549121856689453, "learning_rate": 4.635035183764027e-05, "loss": 0.179, "step": 14394 }, { "epoch": 0.2567509720686334, "grad_norm": 0.36327677965164185, "learning_rate": 4.634954202333787e-05, "loss": 0.2085, "step": 14395 }, { "epoch": 0.2567688081903471, "grad_norm": 0.2972378134727478, "learning_rate": 4.634873212627727e-05, "loss": 0.1834, "step": 14396 }, { "epoch": 0.2567866443120608, "grad_norm": 0.2354905903339386, "learning_rate": 4.6347922146461616e-05, "loss": 0.192, "step": 14397 }, { "epoch": 0.25680448043377446, "grad_norm": 0.22473953664302826, "learning_rate": 4.634711208389404e-05, "loss": 0.2009, "step": 14398 }, { "epoch": 0.25682231655548815, "grad_norm": 0.3361295461654663, "learning_rate": 4.634630193857768e-05, "loss": 0.1629, "step": 14399 }, { "epoch": 0.2568401526772019, "grad_norm": 0.23022682964801788, "learning_rate": 4.6345491710515686e-05, "loss": 0.1571, "step": 14400 }, { "epoch": 0.2568579887989156, "grad_norm": 0.21497762203216553, "learning_rate": 4.63446813997112e-05, "loss": 0.1869, "step": 14401 }, { "epoch": 0.25687582492062927, "grad_norm": 0.331078439950943, "learning_rate": 4.6343871006167344e-05, "loss": 0.1123, "step": 14402 }, { "epoch": 0.25689366104234296, "grad_norm": 0.307820200920105, "learning_rate": 4.634306052988728e-05, "loss": 0.1748, "step": 14403 }, { "epoch": 0.25691149716405665, "grad_norm": 0.2890065908432007, "learning_rate": 4.6342249970874144e-05, "loss": 0.2082, "step": 14404 }, { "epoch": 0.25692933328577033, "grad_norm": 0.37075090408325195, "learning_rate": 4.634143932913107e-05, "loss": 0.2356, "step": 14405 }, { "epoch": 0.256947169407484, "grad_norm": 0.27535951137542725, "learning_rate": 4.634062860466121e-05, "loss": 0.1945, "step": 14406 }, { "epoch": 0.2569650055291977, "grad_norm": 0.21282683312892914, "learning_rate": 4.6339817797467696e-05, "loss": 0.1408, "step": 14407 }, { "epoch": 0.25698284165091145, "grad_norm": 0.30652379989624023, "learning_rate": 4.633900690755368e-05, "loss": 0.2446, "step": 14408 }, { "epoch": 0.25700067777262514, "grad_norm": 0.2307320386171341, "learning_rate": 4.63381959349223e-05, "loss": 0.206, "step": 14409 }, { "epoch": 0.25701851389433883, "grad_norm": 0.28244829177856445, "learning_rate": 4.633738487957671e-05, "loss": 0.1916, "step": 14410 }, { "epoch": 0.2570363500160525, "grad_norm": 0.26034751534461975, "learning_rate": 4.633657374152005e-05, "loss": 0.1675, "step": 14411 }, { "epoch": 0.2570541861377662, "grad_norm": 0.2954724431037903, "learning_rate": 4.633576252075546e-05, "loss": 0.1631, "step": 14412 }, { "epoch": 0.2570720222594799, "grad_norm": 0.2681712806224823, "learning_rate": 4.633495121728607e-05, "loss": 0.1928, "step": 14413 }, { "epoch": 0.2570898583811936, "grad_norm": 0.39349910616874695, "learning_rate": 4.633413983111505e-05, "loss": 0.1165, "step": 14414 }, { "epoch": 0.25710769450290727, "grad_norm": 0.22883769869804382, "learning_rate": 4.6333328362245535e-05, "loss": 0.1484, "step": 14415 }, { "epoch": 0.25712553062462096, "grad_norm": 0.3104557991027832, "learning_rate": 4.633251681068067e-05, "loss": 0.2302, "step": 14416 }, { "epoch": 0.2571433667463347, "grad_norm": 0.34116917848587036, "learning_rate": 4.633170517642361e-05, "loss": 0.2161, "step": 14417 }, { "epoch": 0.2571612028680484, "grad_norm": 0.23141366243362427, "learning_rate": 4.633089345947749e-05, "loss": 0.1492, "step": 14418 }, { "epoch": 0.2571790389897621, "grad_norm": 0.2699582576751709, "learning_rate": 4.633008165984545e-05, "loss": 0.1906, "step": 14419 }, { "epoch": 0.25719687511147576, "grad_norm": 0.2940155565738678, "learning_rate": 4.632926977753065e-05, "loss": 0.1785, "step": 14420 }, { "epoch": 0.25721471123318945, "grad_norm": 0.21754339337348938, "learning_rate": 4.632845781253624e-05, "loss": 0.154, "step": 14421 }, { "epoch": 0.25723254735490314, "grad_norm": 0.23799261450767517, "learning_rate": 4.6327645764865354e-05, "loss": 0.1213, "step": 14422 }, { "epoch": 0.25725038347661683, "grad_norm": 0.3053324818611145, "learning_rate": 4.632683363452115e-05, "loss": 0.2201, "step": 14423 }, { "epoch": 0.2572682195983305, "grad_norm": 0.36479657888412476, "learning_rate": 4.632602142150677e-05, "loss": 0.2298, "step": 14424 }, { "epoch": 0.25728605572004426, "grad_norm": 0.33901506662368774, "learning_rate": 4.632520912582537e-05, "loss": 0.2352, "step": 14425 }, { "epoch": 0.25730389184175795, "grad_norm": 0.35692498087882996, "learning_rate": 4.632439674748009e-05, "loss": 0.2259, "step": 14426 }, { "epoch": 0.25732172796347164, "grad_norm": 0.29226434230804443, "learning_rate": 4.6323584286474086e-05, "loss": 0.1966, "step": 14427 }, { "epoch": 0.2573395640851853, "grad_norm": 0.2755231559276581, "learning_rate": 4.63227717428105e-05, "loss": 0.1094, "step": 14428 }, { "epoch": 0.257357400206899, "grad_norm": 0.3111165165901184, "learning_rate": 4.632195911649249e-05, "loss": 0.2024, "step": 14429 }, { "epoch": 0.2573752363286127, "grad_norm": 0.3994561731815338, "learning_rate": 4.6321146407523196e-05, "loss": 0.19, "step": 14430 }, { "epoch": 0.2573930724503264, "grad_norm": 0.33900538086891174, "learning_rate": 4.6320333615905786e-05, "loss": 0.1407, "step": 14431 }, { "epoch": 0.2574109085720401, "grad_norm": 0.33996281027793884, "learning_rate": 4.631952074164339e-05, "loss": 0.2368, "step": 14432 }, { "epoch": 0.25742874469375376, "grad_norm": 0.23042534291744232, "learning_rate": 4.631870778473917e-05, "loss": 0.1789, "step": 14433 }, { "epoch": 0.2574465808154675, "grad_norm": 0.26778653264045715, "learning_rate": 4.631789474519628e-05, "loss": 0.2056, "step": 14434 }, { "epoch": 0.2574644169371812, "grad_norm": 0.2652350962162018, "learning_rate": 4.631708162301786e-05, "loss": 0.1423, "step": 14435 }, { "epoch": 0.2574822530588949, "grad_norm": 0.2772522568702698, "learning_rate": 4.631626841820707e-05, "loss": 0.1752, "step": 14436 }, { "epoch": 0.25750008918060857, "grad_norm": 0.36391860246658325, "learning_rate": 4.631545513076706e-05, "loss": 0.2135, "step": 14437 }, { "epoch": 0.25751792530232226, "grad_norm": 0.20596438646316528, "learning_rate": 4.6314641760700995e-05, "loss": 0.1469, "step": 14438 }, { "epoch": 0.25753576142403595, "grad_norm": 0.33062639832496643, "learning_rate": 4.6313828308012005e-05, "loss": 0.1792, "step": 14439 }, { "epoch": 0.25755359754574964, "grad_norm": 0.48378390073776245, "learning_rate": 4.631301477270326e-05, "loss": 0.2645, "step": 14440 }, { "epoch": 0.2575714336674633, "grad_norm": 0.3870197534561157, "learning_rate": 4.63122011547779e-05, "loss": 0.2003, "step": 14441 }, { "epoch": 0.25758926978917707, "grad_norm": 0.2974582612514496, "learning_rate": 4.63113874542391e-05, "loss": 0.1891, "step": 14442 }, { "epoch": 0.25760710591089075, "grad_norm": 0.2086382806301117, "learning_rate": 4.631057367109e-05, "loss": 0.1559, "step": 14443 }, { "epoch": 0.25762494203260444, "grad_norm": 0.30199331045150757, "learning_rate": 4.630975980533374e-05, "loss": 0.2184, "step": 14444 }, { "epoch": 0.25764277815431813, "grad_norm": 0.4113059639930725, "learning_rate": 4.6308945856973505e-05, "loss": 0.2554, "step": 14445 }, { "epoch": 0.2576606142760318, "grad_norm": 0.2280125916004181, "learning_rate": 4.630813182601244e-05, "loss": 0.1617, "step": 14446 }, { "epoch": 0.2576784503977455, "grad_norm": 0.2942078709602356, "learning_rate": 4.6307317712453686e-05, "loss": 0.2219, "step": 14447 }, { "epoch": 0.2576962865194592, "grad_norm": 0.2534199655056, "learning_rate": 4.630650351630041e-05, "loss": 0.1639, "step": 14448 }, { "epoch": 0.2577141226411729, "grad_norm": 0.30035653710365295, "learning_rate": 4.630568923755577e-05, "loss": 0.1793, "step": 14449 }, { "epoch": 0.25773195876288657, "grad_norm": 0.2677864730358124, "learning_rate": 4.630487487622292e-05, "loss": 0.2001, "step": 14450 }, { "epoch": 0.2577497948846003, "grad_norm": 0.24835999310016632, "learning_rate": 4.6304060432305016e-05, "loss": 0.1675, "step": 14451 }, { "epoch": 0.257767631006314, "grad_norm": 0.2797510623931885, "learning_rate": 4.630324590580522e-05, "loss": 0.1328, "step": 14452 }, { "epoch": 0.2577854671280277, "grad_norm": 0.28699424862861633, "learning_rate": 4.6302431296726684e-05, "loss": 0.1877, "step": 14453 }, { "epoch": 0.2578033032497414, "grad_norm": 0.28767064213752747, "learning_rate": 4.630161660507256e-05, "loss": 0.1907, "step": 14454 }, { "epoch": 0.25782113937145507, "grad_norm": 0.2483808547258377, "learning_rate": 4.630080183084602e-05, "loss": 0.1943, "step": 14455 }, { "epoch": 0.25783897549316875, "grad_norm": 0.26998236775398254, "learning_rate": 4.6299986974050216e-05, "loss": 0.1855, "step": 14456 }, { "epoch": 0.25785681161488244, "grad_norm": 0.36403292417526245, "learning_rate": 4.62991720346883e-05, "loss": 0.1955, "step": 14457 }, { "epoch": 0.25787464773659613, "grad_norm": 0.244973823428154, "learning_rate": 4.629835701276344e-05, "loss": 0.1955, "step": 14458 }, { "epoch": 0.2578924838583099, "grad_norm": 0.3270580470561981, "learning_rate": 4.629754190827878e-05, "loss": 0.2258, "step": 14459 }, { "epoch": 0.25791031998002356, "grad_norm": 0.2520008981227875, "learning_rate": 4.629672672123751e-05, "loss": 0.1627, "step": 14460 }, { "epoch": 0.25792815610173725, "grad_norm": 0.2631997764110565, "learning_rate": 4.629591145164276e-05, "loss": 0.1792, "step": 14461 }, { "epoch": 0.25794599222345094, "grad_norm": 0.2808324992656708, "learning_rate": 4.629509609949771e-05, "loss": 0.2423, "step": 14462 }, { "epoch": 0.2579638283451646, "grad_norm": 0.25615525245666504, "learning_rate": 4.62942806648055e-05, "loss": 0.2088, "step": 14463 }, { "epoch": 0.2579816644668783, "grad_norm": 0.25935521721839905, "learning_rate": 4.629346514756931e-05, "loss": 0.1888, "step": 14464 }, { "epoch": 0.257999500588592, "grad_norm": 0.2394031137228012, "learning_rate": 4.62926495477923e-05, "loss": 0.1637, "step": 14465 }, { "epoch": 0.2580173367103057, "grad_norm": 0.2117544412612915, "learning_rate": 4.629183386547762e-05, "loss": 0.1785, "step": 14466 }, { "epoch": 0.25803517283201943, "grad_norm": 0.2255794256925583, "learning_rate": 4.629101810062844e-05, "loss": 0.2073, "step": 14467 }, { "epoch": 0.2580530089537331, "grad_norm": 0.23256143927574158, "learning_rate": 4.6290202253247915e-05, "loss": 0.1647, "step": 14468 }, { "epoch": 0.2580708450754468, "grad_norm": 0.25897982716560364, "learning_rate": 4.628938632333922e-05, "loss": 0.1574, "step": 14469 }, { "epoch": 0.2580886811971605, "grad_norm": 0.26567527651786804, "learning_rate": 4.628857031090551e-05, "loss": 0.1861, "step": 14470 }, { "epoch": 0.2581065173188742, "grad_norm": 0.29175615310668945, "learning_rate": 4.628775421594995e-05, "loss": 0.216, "step": 14471 }, { "epoch": 0.2581243534405879, "grad_norm": 0.24691833555698395, "learning_rate": 4.628693803847569e-05, "loss": 0.1951, "step": 14472 }, { "epoch": 0.25814218956230156, "grad_norm": 0.2945387661457062, "learning_rate": 4.628612177848592e-05, "loss": 0.1745, "step": 14473 }, { "epoch": 0.25816002568401525, "grad_norm": 0.21037498116493225, "learning_rate": 4.6285305435983785e-05, "loss": 0.1821, "step": 14474 }, { "epoch": 0.25817786180572894, "grad_norm": 0.45623844861984253, "learning_rate": 4.6284489010972455e-05, "loss": 0.18, "step": 14475 }, { "epoch": 0.2581956979274427, "grad_norm": 0.2608366012573242, "learning_rate": 4.628367250345509e-05, "loss": 0.1741, "step": 14476 }, { "epoch": 0.25821353404915637, "grad_norm": 0.20687773823738098, "learning_rate": 4.628285591343486e-05, "loss": 0.1601, "step": 14477 }, { "epoch": 0.25823137017087006, "grad_norm": 0.4461628496646881, "learning_rate": 4.6282039240914935e-05, "loss": 0.1804, "step": 14478 }, { "epoch": 0.25824920629258374, "grad_norm": 0.29956209659576416, "learning_rate": 4.628122248589847e-05, "loss": 0.1869, "step": 14479 }, { "epoch": 0.25826704241429743, "grad_norm": 0.2962927520275116, "learning_rate": 4.628040564838864e-05, "loss": 0.1573, "step": 14480 }, { "epoch": 0.2582848785360111, "grad_norm": 0.3870565593242645, "learning_rate": 4.627958872838861e-05, "loss": 0.1638, "step": 14481 }, { "epoch": 0.2583027146577248, "grad_norm": 0.32134172320365906, "learning_rate": 4.627877172590154e-05, "loss": 0.2385, "step": 14482 }, { "epoch": 0.2583205507794385, "grad_norm": 0.20753470063209534, "learning_rate": 4.62779546409306e-05, "loss": 0.1623, "step": 14483 }, { "epoch": 0.25833838690115224, "grad_norm": 0.330765962600708, "learning_rate": 4.627713747347896e-05, "loss": 0.2135, "step": 14484 }, { "epoch": 0.2583562230228659, "grad_norm": 0.45332831144332886, "learning_rate": 4.6276320223549793e-05, "loss": 0.1918, "step": 14485 }, { "epoch": 0.2583740591445796, "grad_norm": 0.29866304993629456, "learning_rate": 4.627550289114625e-05, "loss": 0.2422, "step": 14486 }, { "epoch": 0.2583918952662933, "grad_norm": 0.26704224944114685, "learning_rate": 4.627468547627152e-05, "loss": 0.1693, "step": 14487 }, { "epoch": 0.258409731388007, "grad_norm": 0.21081265807151794, "learning_rate": 4.627386797892875e-05, "loss": 0.2097, "step": 14488 }, { "epoch": 0.2584275675097207, "grad_norm": 0.26098814606666565, "learning_rate": 4.627305039912112e-05, "loss": 0.1999, "step": 14489 }, { "epoch": 0.25844540363143437, "grad_norm": 0.34457314014434814, "learning_rate": 4.6272232736851804e-05, "loss": 0.2224, "step": 14490 }, { "epoch": 0.25846323975314806, "grad_norm": 0.3143419027328491, "learning_rate": 4.6271414992123976e-05, "loss": 0.1551, "step": 14491 }, { "epoch": 0.25848107587486174, "grad_norm": 0.27008283138275146, "learning_rate": 4.6270597164940777e-05, "loss": 0.1689, "step": 14492 }, { "epoch": 0.2584989119965755, "grad_norm": 0.30262690782546997, "learning_rate": 4.626977925530541e-05, "loss": 0.1857, "step": 14493 }, { "epoch": 0.2585167481182892, "grad_norm": 0.2565249502658844, "learning_rate": 4.626896126322103e-05, "loss": 0.209, "step": 14494 }, { "epoch": 0.25853458424000286, "grad_norm": 0.25042498111724854, "learning_rate": 4.626814318869081e-05, "loss": 0.1541, "step": 14495 }, { "epoch": 0.25855242036171655, "grad_norm": 0.2768356502056122, "learning_rate": 4.6267325031717926e-05, "loss": 0.2116, "step": 14496 }, { "epoch": 0.25857025648343024, "grad_norm": 0.2424658238887787, "learning_rate": 4.626650679230553e-05, "loss": 0.1552, "step": 14497 }, { "epoch": 0.2585880926051439, "grad_norm": 0.23272937536239624, "learning_rate": 4.626568847045682e-05, "loss": 0.2005, "step": 14498 }, { "epoch": 0.2586059287268576, "grad_norm": 0.30662715435028076, "learning_rate": 4.626487006617496e-05, "loss": 0.16, "step": 14499 }, { "epoch": 0.2586237648485713, "grad_norm": 0.276518851518631, "learning_rate": 4.626405157946311e-05, "loss": 0.1994, "step": 14500 }, { "epoch": 0.25864160097028505, "grad_norm": 0.2522293031215668, "learning_rate": 4.6263233010324456e-05, "loss": 0.1682, "step": 14501 }, { "epoch": 0.25865943709199873, "grad_norm": 0.2535803020000458, "learning_rate": 4.6262414358762165e-05, "loss": 0.1662, "step": 14502 }, { "epoch": 0.2586772732137124, "grad_norm": 0.2939068675041199, "learning_rate": 4.626159562477941e-05, "loss": 0.2031, "step": 14503 }, { "epoch": 0.2586951093354261, "grad_norm": 0.2839019000530243, "learning_rate": 4.626077680837937e-05, "loss": 0.1746, "step": 14504 }, { "epoch": 0.2587129454571398, "grad_norm": 0.2303832322359085, "learning_rate": 4.625995790956522e-05, "loss": 0.1915, "step": 14505 }, { "epoch": 0.2587307815788535, "grad_norm": 0.2729906439781189, "learning_rate": 4.625913892834012e-05, "loss": 0.1714, "step": 14506 }, { "epoch": 0.2587486177005672, "grad_norm": 0.24131199717521667, "learning_rate": 4.625831986470726e-05, "loss": 0.183, "step": 14507 }, { "epoch": 0.25876645382228086, "grad_norm": 0.372177392244339, "learning_rate": 4.625750071866981e-05, "loss": 0.2103, "step": 14508 }, { "epoch": 0.2587842899439946, "grad_norm": 0.22164888679981232, "learning_rate": 4.6256681490230945e-05, "loss": 0.1672, "step": 14509 }, { "epoch": 0.2588021260657083, "grad_norm": 0.3263988196849823, "learning_rate": 4.625586217939384e-05, "loss": 0.2026, "step": 14510 }, { "epoch": 0.258819962187422, "grad_norm": 0.3316977620124817, "learning_rate": 4.6255042786161675e-05, "loss": 0.1813, "step": 14511 }, { "epoch": 0.25883779830913567, "grad_norm": 0.3202001750469208, "learning_rate": 4.625422331053762e-05, "loss": 0.1789, "step": 14512 }, { "epoch": 0.25885563443084936, "grad_norm": 0.23884595930576324, "learning_rate": 4.6253403752524855e-05, "loss": 0.1991, "step": 14513 }, { "epoch": 0.25887347055256305, "grad_norm": 0.26711705327033997, "learning_rate": 4.625258411212656e-05, "loss": 0.1802, "step": 14514 }, { "epoch": 0.25889130667427673, "grad_norm": 0.23574720323085785, "learning_rate": 4.62517643893459e-05, "loss": 0.1384, "step": 14515 }, { "epoch": 0.2589091427959904, "grad_norm": 0.33867430686950684, "learning_rate": 4.625094458418607e-05, "loss": 0.1882, "step": 14516 }, { "epoch": 0.2589269789177041, "grad_norm": 0.29728835821151733, "learning_rate": 4.6250124696650235e-05, "loss": 0.2051, "step": 14517 }, { "epoch": 0.25894481503941785, "grad_norm": 0.24823889136314392, "learning_rate": 4.624930472674158e-05, "loss": 0.2024, "step": 14518 }, { "epoch": 0.25896265116113154, "grad_norm": 0.3470822274684906, "learning_rate": 4.624848467446328e-05, "loss": 0.1622, "step": 14519 }, { "epoch": 0.25898048728284523, "grad_norm": 0.20998916029930115, "learning_rate": 4.6247664539818504e-05, "loss": 0.1778, "step": 14520 }, { "epoch": 0.2589983234045589, "grad_norm": 0.2235182821750641, "learning_rate": 4.6246844322810456e-05, "loss": 0.217, "step": 14521 }, { "epoch": 0.2590161595262726, "grad_norm": 0.23888911306858063, "learning_rate": 4.624602402344229e-05, "loss": 0.1934, "step": 14522 }, { "epoch": 0.2590339956479863, "grad_norm": 0.2445191740989685, "learning_rate": 4.6245203641717206e-05, "loss": 0.2209, "step": 14523 }, { "epoch": 0.2590518317697, "grad_norm": 0.2210816591978073, "learning_rate": 4.624438317763837e-05, "loss": 0.1818, "step": 14524 }, { "epoch": 0.25906966789141367, "grad_norm": 0.2667429745197296, "learning_rate": 4.624356263120897e-05, "loss": 0.1709, "step": 14525 }, { "epoch": 0.2590875040131274, "grad_norm": 0.2377333641052246, "learning_rate": 4.6242742002432176e-05, "loss": 0.2153, "step": 14526 }, { "epoch": 0.2591053401348411, "grad_norm": 0.2568122446537018, "learning_rate": 4.6241921291311184e-05, "loss": 0.1403, "step": 14527 }, { "epoch": 0.2591231762565548, "grad_norm": 0.35701045393943787, "learning_rate": 4.6241100497849165e-05, "loss": 0.2133, "step": 14528 }, { "epoch": 0.2591410123782685, "grad_norm": 0.3056379556655884, "learning_rate": 4.624027962204931e-05, "loss": 0.1688, "step": 14529 }, { "epoch": 0.25915884849998216, "grad_norm": 0.29644814133644104, "learning_rate": 4.623945866391479e-05, "loss": 0.2025, "step": 14530 }, { "epoch": 0.25917668462169585, "grad_norm": 0.3030856251716614, "learning_rate": 4.623863762344879e-05, "loss": 0.1644, "step": 14531 }, { "epoch": 0.25919452074340954, "grad_norm": 0.4780627191066742, "learning_rate": 4.6237816500654494e-05, "loss": 0.2265, "step": 14532 }, { "epoch": 0.25921235686512323, "grad_norm": 0.23053552210330963, "learning_rate": 4.62369952955351e-05, "loss": 0.1829, "step": 14533 }, { "epoch": 0.2592301929868369, "grad_norm": 0.22041228413581848, "learning_rate": 4.6236174008093764e-05, "loss": 0.1929, "step": 14534 }, { "epoch": 0.25924802910855066, "grad_norm": 0.23523174226284027, "learning_rate": 4.623535263833368e-05, "loss": 0.1761, "step": 14535 }, { "epoch": 0.25926586523026435, "grad_norm": 0.26908382773399353, "learning_rate": 4.623453118625804e-05, "loss": 0.1995, "step": 14536 }, { "epoch": 0.25928370135197804, "grad_norm": 0.2717522978782654, "learning_rate": 4.6233709651870026e-05, "loss": 0.1488, "step": 14537 }, { "epoch": 0.2593015374736917, "grad_norm": 0.2552168071269989, "learning_rate": 4.623288803517282e-05, "loss": 0.2113, "step": 14538 }, { "epoch": 0.2593193735954054, "grad_norm": 0.32829946279525757, "learning_rate": 4.6232066336169604e-05, "loss": 0.1177, "step": 14539 }, { "epoch": 0.2593372097171191, "grad_norm": 0.2889467477798462, "learning_rate": 4.623124455486357e-05, "loss": 0.2252, "step": 14540 }, { "epoch": 0.2593550458388328, "grad_norm": 0.22341616451740265, "learning_rate": 4.6230422691257893e-05, "loss": 0.1672, "step": 14541 }, { "epoch": 0.2593728819605465, "grad_norm": 0.30590370297431946, "learning_rate": 4.622960074535576e-05, "loss": 0.2161, "step": 14542 }, { "epoch": 0.2593907180822602, "grad_norm": 0.15577496588230133, "learning_rate": 4.622877871716037e-05, "loss": 0.1424, "step": 14543 }, { "epoch": 0.2594085542039739, "grad_norm": 0.2397838532924652, "learning_rate": 4.6227956606674905e-05, "loss": 0.2068, "step": 14544 }, { "epoch": 0.2594263903256876, "grad_norm": 0.3154611587524414, "learning_rate": 4.622713441390254e-05, "loss": 0.2012, "step": 14545 }, { "epoch": 0.2594442264474013, "grad_norm": 0.20436833798885345, "learning_rate": 4.6226312138846475e-05, "loss": 0.1531, "step": 14546 }, { "epoch": 0.25946206256911497, "grad_norm": 0.3715524673461914, "learning_rate": 4.622548978150989e-05, "loss": 0.1785, "step": 14547 }, { "epoch": 0.25947989869082866, "grad_norm": 0.2255830615758896, "learning_rate": 4.622466734189598e-05, "loss": 0.1252, "step": 14548 }, { "epoch": 0.25949773481254235, "grad_norm": 0.28741219639778137, "learning_rate": 4.6223844820007924e-05, "loss": 0.1994, "step": 14549 }, { "epoch": 0.25951557093425603, "grad_norm": 0.254725843667984, "learning_rate": 4.622302221584891e-05, "loss": 0.1266, "step": 14550 }, { "epoch": 0.2595334070559697, "grad_norm": 0.22727371752262115, "learning_rate": 4.6222199529422145e-05, "loss": 0.1739, "step": 14551 }, { "epoch": 0.25955124317768347, "grad_norm": 0.18915487825870514, "learning_rate": 4.622137676073079e-05, "loss": 0.1363, "step": 14552 }, { "epoch": 0.25956907929939715, "grad_norm": 0.21335235238075256, "learning_rate": 4.6220553909778065e-05, "loss": 0.1738, "step": 14553 }, { "epoch": 0.25958691542111084, "grad_norm": 0.28781911730766296, "learning_rate": 4.621973097656713e-05, "loss": 0.1828, "step": 14554 }, { "epoch": 0.25960475154282453, "grad_norm": 0.2517540156841278, "learning_rate": 4.62189079611012e-05, "loss": 0.186, "step": 14555 }, { "epoch": 0.2596225876645382, "grad_norm": 0.265123188495636, "learning_rate": 4.621808486338345e-05, "loss": 0.1472, "step": 14556 }, { "epoch": 0.2596404237862519, "grad_norm": 0.3527012765407562, "learning_rate": 4.621726168341707e-05, "loss": 0.2345, "step": 14557 }, { "epoch": 0.2596582599079656, "grad_norm": 0.2715505361557007, "learning_rate": 4.621643842120526e-05, "loss": 0.2043, "step": 14558 }, { "epoch": 0.2596760960296793, "grad_norm": 0.30191344022750854, "learning_rate": 4.6215615076751207e-05, "loss": 0.1913, "step": 14559 }, { "epoch": 0.259693932151393, "grad_norm": 0.2725018560886383, "learning_rate": 4.62147916500581e-05, "loss": 0.1931, "step": 14560 }, { "epoch": 0.2597117682731067, "grad_norm": 0.2341236174106598, "learning_rate": 4.6213968141129134e-05, "loss": 0.1782, "step": 14561 }, { "epoch": 0.2597296043948204, "grad_norm": 0.24975833296775818, "learning_rate": 4.6213144549967495e-05, "loss": 0.1879, "step": 14562 }, { "epoch": 0.2597474405165341, "grad_norm": 0.24866192042827606, "learning_rate": 4.6212320876576385e-05, "loss": 0.1764, "step": 14563 }, { "epoch": 0.2597652766382478, "grad_norm": 0.338969886302948, "learning_rate": 4.6211497120958996e-05, "loss": 0.2346, "step": 14564 }, { "epoch": 0.25978311275996147, "grad_norm": 0.2714281678199768, "learning_rate": 4.6210673283118514e-05, "loss": 0.1867, "step": 14565 }, { "epoch": 0.25980094888167515, "grad_norm": 0.2358761727809906, "learning_rate": 4.620984936305814e-05, "loss": 0.2094, "step": 14566 }, { "epoch": 0.25981878500338884, "grad_norm": 0.2552592158317566, "learning_rate": 4.6209025360781066e-05, "loss": 0.2277, "step": 14567 }, { "epoch": 0.2598366211251026, "grad_norm": 0.30145397782325745, "learning_rate": 4.620820127629048e-05, "loss": 0.1831, "step": 14568 }, { "epoch": 0.2598544572468163, "grad_norm": 0.23235675692558289, "learning_rate": 4.620737710958958e-05, "loss": 0.182, "step": 14569 }, { "epoch": 0.25987229336852996, "grad_norm": 0.5347310900688171, "learning_rate": 4.620655286068156e-05, "loss": 0.2004, "step": 14570 }, { "epoch": 0.25989012949024365, "grad_norm": 0.4997529685497284, "learning_rate": 4.620572852956963e-05, "loss": 0.1591, "step": 14571 }, { "epoch": 0.25990796561195734, "grad_norm": 0.23414045572280884, "learning_rate": 4.620490411625695e-05, "loss": 0.1612, "step": 14572 }, { "epoch": 0.259925801733671, "grad_norm": 0.1868872344493866, "learning_rate": 4.620407962074676e-05, "loss": 0.1527, "step": 14573 }, { "epoch": 0.2599436378553847, "grad_norm": 0.391832560300827, "learning_rate": 4.620325504304221e-05, "loss": 0.2218, "step": 14574 }, { "epoch": 0.2599614739770984, "grad_norm": 0.2683939039707184, "learning_rate": 4.620243038314654e-05, "loss": 0.1845, "step": 14575 }, { "epoch": 0.2599793100988121, "grad_norm": 0.25123825669288635, "learning_rate": 4.6201605641062915e-05, "loss": 0.1849, "step": 14576 }, { "epoch": 0.25999714622052583, "grad_norm": 0.3053325116634369, "learning_rate": 4.6200780816794554e-05, "loss": 0.1912, "step": 14577 }, { "epoch": 0.2600149823422395, "grad_norm": 0.25136125087738037, "learning_rate": 4.619995591034464e-05, "loss": 0.1653, "step": 14578 }, { "epoch": 0.2600328184639532, "grad_norm": 0.27196529507637024, "learning_rate": 4.619913092171637e-05, "loss": 0.2125, "step": 14579 }, { "epoch": 0.2600506545856669, "grad_norm": 0.27855661511421204, "learning_rate": 4.619830585091295e-05, "loss": 0.1745, "step": 14580 }, { "epoch": 0.2600684907073806, "grad_norm": 0.26694926619529724, "learning_rate": 4.6197480697937576e-05, "loss": 0.2441, "step": 14581 }, { "epoch": 0.26008632682909427, "grad_norm": 0.23903785645961761, "learning_rate": 4.619665546279345e-05, "loss": 0.2099, "step": 14582 }, { "epoch": 0.26010416295080796, "grad_norm": 0.2713642120361328, "learning_rate": 4.6195830145483754e-05, "loss": 0.1685, "step": 14583 }, { "epoch": 0.26012199907252165, "grad_norm": 0.27828171849250793, "learning_rate": 4.61950047460117e-05, "loss": 0.2328, "step": 14584 }, { "epoch": 0.2601398351942354, "grad_norm": 0.24836130440235138, "learning_rate": 4.61941792643805e-05, "loss": 0.1673, "step": 14585 }, { "epoch": 0.2601576713159491, "grad_norm": 0.2757941484451294, "learning_rate": 4.619335370059333e-05, "loss": 0.1338, "step": 14586 }, { "epoch": 0.26017550743766277, "grad_norm": 0.2903648912906647, "learning_rate": 4.619252805465341e-05, "loss": 0.1709, "step": 14587 }, { "epoch": 0.26019334355937646, "grad_norm": 0.223250150680542, "learning_rate": 4.619170232656393e-05, "loss": 0.1934, "step": 14588 }, { "epoch": 0.26021117968109014, "grad_norm": 0.19557170569896698, "learning_rate": 4.619087651632808e-05, "loss": 0.1606, "step": 14589 }, { "epoch": 0.26022901580280383, "grad_norm": 0.23488689959049225, "learning_rate": 4.619005062394909e-05, "loss": 0.1489, "step": 14590 }, { "epoch": 0.2602468519245175, "grad_norm": 0.30712801218032837, "learning_rate": 4.618922464943014e-05, "loss": 0.1975, "step": 14591 }, { "epoch": 0.2602646880462312, "grad_norm": 0.4634312689304352, "learning_rate": 4.618839859277443e-05, "loss": 0.2035, "step": 14592 }, { "epoch": 0.2602825241679449, "grad_norm": 0.3740062713623047, "learning_rate": 4.618757245398517e-05, "loss": 0.2023, "step": 14593 }, { "epoch": 0.26030036028965864, "grad_norm": 0.22786946594715118, "learning_rate": 4.618674623306557e-05, "loss": 0.1703, "step": 14594 }, { "epoch": 0.2603181964113723, "grad_norm": 0.2764323055744171, "learning_rate": 4.618591993001882e-05, "loss": 0.2282, "step": 14595 }, { "epoch": 0.260336032533086, "grad_norm": 0.2611202299594879, "learning_rate": 4.618509354484812e-05, "loss": 0.1974, "step": 14596 }, { "epoch": 0.2603538686547997, "grad_norm": 0.25055521726608276, "learning_rate": 4.618426707755669e-05, "loss": 0.1967, "step": 14597 }, { "epoch": 0.2603717047765134, "grad_norm": 0.23774002492427826, "learning_rate": 4.618344052814772e-05, "loss": 0.2276, "step": 14598 }, { "epoch": 0.2603895408982271, "grad_norm": 0.3096005916595459, "learning_rate": 4.618261389662442e-05, "loss": 0.2224, "step": 14599 }, { "epoch": 0.26040737701994077, "grad_norm": 0.2499011605978012, "learning_rate": 4.6181787182989986e-05, "loss": 0.2315, "step": 14600 }, { "epoch": 0.26042521314165445, "grad_norm": 0.3614709675312042, "learning_rate": 4.6180960387247635e-05, "loss": 0.1578, "step": 14601 }, { "epoch": 0.2604430492633682, "grad_norm": 0.3674885034561157, "learning_rate": 4.6180133509400565e-05, "loss": 0.2514, "step": 14602 }, { "epoch": 0.2604608853850819, "grad_norm": 0.26196131110191345, "learning_rate": 4.617930654945199e-05, "loss": 0.1972, "step": 14603 }, { "epoch": 0.2604787215067956, "grad_norm": 0.20775820314884186, "learning_rate": 4.6178479507405086e-05, "loss": 0.1799, "step": 14604 }, { "epoch": 0.26049655762850926, "grad_norm": 0.20737087726593018, "learning_rate": 4.6177652383263095e-05, "loss": 0.1558, "step": 14605 }, { "epoch": 0.26051439375022295, "grad_norm": 0.2366851568222046, "learning_rate": 4.617682517702921e-05, "loss": 0.1748, "step": 14606 }, { "epoch": 0.26053222987193664, "grad_norm": 0.37857529520988464, "learning_rate": 4.6175997888706634e-05, "loss": 0.1785, "step": 14607 }, { "epoch": 0.2605500659936503, "grad_norm": 0.31405702233314514, "learning_rate": 4.617517051829857e-05, "loss": 0.1392, "step": 14608 }, { "epoch": 0.260567902115364, "grad_norm": 0.26305896043777466, "learning_rate": 4.6174343065808247e-05, "loss": 0.1513, "step": 14609 }, { "epoch": 0.26058573823707776, "grad_norm": 0.24315159022808075, "learning_rate": 4.617351553123884e-05, "loss": 0.1626, "step": 14610 }, { "epoch": 0.26060357435879145, "grad_norm": 0.27016130089759827, "learning_rate": 4.617268791459358e-05, "loss": 0.1375, "step": 14611 }, { "epoch": 0.26062141048050513, "grad_norm": 0.2838382422924042, "learning_rate": 4.617186021587567e-05, "loss": 0.1815, "step": 14612 }, { "epoch": 0.2606392466022188, "grad_norm": 0.30013149976730347, "learning_rate": 4.6171032435088316e-05, "loss": 0.1915, "step": 14613 }, { "epoch": 0.2606570827239325, "grad_norm": 0.22592492401599884, "learning_rate": 4.617020457223473e-05, "loss": 0.181, "step": 14614 }, { "epoch": 0.2606749188456462, "grad_norm": 0.4308866262435913, "learning_rate": 4.6169376627318116e-05, "loss": 0.1961, "step": 14615 }, { "epoch": 0.2606927549673599, "grad_norm": 0.3728789985179901, "learning_rate": 4.616854860034169e-05, "loss": 0.1659, "step": 14616 }, { "epoch": 0.2607105910890736, "grad_norm": 0.24493689835071564, "learning_rate": 4.616772049130866e-05, "loss": 0.2097, "step": 14617 }, { "epoch": 0.26072842721078726, "grad_norm": 0.31610414385795593, "learning_rate": 4.6166892300222234e-05, "loss": 0.2272, "step": 14618 }, { "epoch": 0.260746263332501, "grad_norm": 0.31300243735313416, "learning_rate": 4.616606402708561e-05, "loss": 0.2174, "step": 14619 }, { "epoch": 0.2607640994542147, "grad_norm": 0.25682532787323, "learning_rate": 4.616523567190203e-05, "loss": 0.1827, "step": 14620 }, { "epoch": 0.2607819355759284, "grad_norm": 0.26203516125679016, "learning_rate": 4.616440723467468e-05, "loss": 0.2041, "step": 14621 }, { "epoch": 0.26079977169764207, "grad_norm": 0.17587150633335114, "learning_rate": 4.616357871540677e-05, "loss": 0.1749, "step": 14622 }, { "epoch": 0.26081760781935576, "grad_norm": 0.2796902358531952, "learning_rate": 4.6162750114101526e-05, "loss": 0.1966, "step": 14623 }, { "epoch": 0.26083544394106944, "grad_norm": 0.2103869915008545, "learning_rate": 4.616192143076214e-05, "loss": 0.1573, "step": 14624 }, { "epoch": 0.26085328006278313, "grad_norm": 0.24828559160232544, "learning_rate": 4.616109266539186e-05, "loss": 0.1716, "step": 14625 }, { "epoch": 0.2608711161844968, "grad_norm": 0.28482159972190857, "learning_rate": 4.6160263817993864e-05, "loss": 0.1845, "step": 14626 }, { "epoch": 0.26088895230621056, "grad_norm": 0.20587415993213654, "learning_rate": 4.615943488857137e-05, "loss": 0.1297, "step": 14627 }, { "epoch": 0.26090678842792425, "grad_norm": 0.24430646002292633, "learning_rate": 4.615860587712762e-05, "loss": 0.1856, "step": 14628 }, { "epoch": 0.26092462454963794, "grad_norm": 0.24143333733081818, "learning_rate": 4.6157776783665784e-05, "loss": 0.2147, "step": 14629 }, { "epoch": 0.26094246067135163, "grad_norm": 0.2912479639053345, "learning_rate": 4.6156947608189104e-05, "loss": 0.198, "step": 14630 }, { "epoch": 0.2609602967930653, "grad_norm": 0.2395295649766922, "learning_rate": 4.615611835070079e-05, "loss": 0.1227, "step": 14631 }, { "epoch": 0.260978132914779, "grad_norm": 0.29983842372894287, "learning_rate": 4.615528901120405e-05, "loss": 0.1272, "step": 14632 }, { "epoch": 0.2609959690364927, "grad_norm": 0.30038511753082275, "learning_rate": 4.615445958970211e-05, "loss": 0.1831, "step": 14633 }, { "epoch": 0.2610138051582064, "grad_norm": 0.27793991565704346, "learning_rate": 4.615363008619817e-05, "loss": 0.1801, "step": 14634 }, { "epoch": 0.26103164127992007, "grad_norm": 0.3724186420440674, "learning_rate": 4.6152800500695456e-05, "loss": 0.184, "step": 14635 }, { "epoch": 0.2610494774016338, "grad_norm": 0.2901003658771515, "learning_rate": 4.615197083319719e-05, "loss": 0.2038, "step": 14636 }, { "epoch": 0.2610673135233475, "grad_norm": 0.39684247970581055, "learning_rate": 4.615114108370657e-05, "loss": 0.1443, "step": 14637 }, { "epoch": 0.2610851496450612, "grad_norm": 0.26540371775627136, "learning_rate": 4.615031125222682e-05, "loss": 0.1815, "step": 14638 }, { "epoch": 0.2611029857667749, "grad_norm": 0.24698083102703094, "learning_rate": 4.614948133876117e-05, "loss": 0.1761, "step": 14639 }, { "epoch": 0.26112082188848856, "grad_norm": 0.3243093192577362, "learning_rate": 4.6148651343312815e-05, "loss": 0.1703, "step": 14640 }, { "epoch": 0.26113865801020225, "grad_norm": 0.2990565598011017, "learning_rate": 4.614782126588498e-05, "loss": 0.1899, "step": 14641 }, { "epoch": 0.26115649413191594, "grad_norm": 0.2859545946121216, "learning_rate": 4.61469911064809e-05, "loss": 0.1969, "step": 14642 }, { "epoch": 0.2611743302536296, "grad_norm": 0.30889734625816345, "learning_rate": 4.614616086510377e-05, "loss": 0.2243, "step": 14643 }, { "epoch": 0.26119216637534337, "grad_norm": 0.26461878418922424, "learning_rate": 4.6145330541756814e-05, "loss": 0.2149, "step": 14644 }, { "epoch": 0.26121000249705706, "grad_norm": 0.2526128590106964, "learning_rate": 4.614450013644326e-05, "loss": 0.173, "step": 14645 }, { "epoch": 0.26122783861877075, "grad_norm": 0.30586960911750793, "learning_rate": 4.614366964916631e-05, "loss": 0.2102, "step": 14646 }, { "epoch": 0.26124567474048443, "grad_norm": 0.3145003020763397, "learning_rate": 4.614283907992921e-05, "loss": 0.113, "step": 14647 }, { "epoch": 0.2612635108621981, "grad_norm": 0.4077993631362915, "learning_rate": 4.6142008428735154e-05, "loss": 0.2165, "step": 14648 }, { "epoch": 0.2612813469839118, "grad_norm": 0.2266421616077423, "learning_rate": 4.614117769558737e-05, "loss": 0.1591, "step": 14649 }, { "epoch": 0.2612991831056255, "grad_norm": 0.34424862265586853, "learning_rate": 4.614034688048908e-05, "loss": 0.2193, "step": 14650 }, { "epoch": 0.2613170192273392, "grad_norm": 0.4223160147666931, "learning_rate": 4.6139515983443506e-05, "loss": 0.2259, "step": 14651 }, { "epoch": 0.2613348553490529, "grad_norm": 0.2760487198829651, "learning_rate": 4.613868500445386e-05, "loss": 0.1846, "step": 14652 }, { "epoch": 0.2613526914707666, "grad_norm": 0.2091551423072815, "learning_rate": 4.613785394352337e-05, "loss": 0.1528, "step": 14653 }, { "epoch": 0.2613705275924803, "grad_norm": 0.27764102816581726, "learning_rate": 4.613702280065527e-05, "loss": 0.2424, "step": 14654 }, { "epoch": 0.261388363714194, "grad_norm": 0.22962181270122528, "learning_rate": 4.613619157585276e-05, "loss": 0.168, "step": 14655 }, { "epoch": 0.2614061998359077, "grad_norm": 0.2940472662448883, "learning_rate": 4.613536026911907e-05, "loss": 0.2298, "step": 14656 }, { "epoch": 0.26142403595762137, "grad_norm": 0.23704254627227783, "learning_rate": 4.613452888045743e-05, "loss": 0.148, "step": 14657 }, { "epoch": 0.26144187207933506, "grad_norm": 0.28653737902641296, "learning_rate": 4.6133697409871044e-05, "loss": 0.1515, "step": 14658 }, { "epoch": 0.26145970820104875, "grad_norm": 0.2383522242307663, "learning_rate": 4.613286585736316e-05, "loss": 0.1776, "step": 14659 }, { "epoch": 0.26147754432276243, "grad_norm": 0.3766253590583801, "learning_rate": 4.613203422293698e-05, "loss": 0.1528, "step": 14660 }, { "epoch": 0.2614953804444762, "grad_norm": 0.31416067481040955, "learning_rate": 4.613120250659575e-05, "loss": 0.1836, "step": 14661 }, { "epoch": 0.26151321656618987, "grad_norm": 0.3291376829147339, "learning_rate": 4.6130370708342665e-05, "loss": 0.135, "step": 14662 }, { "epoch": 0.26153105268790355, "grad_norm": 0.3606354892253876, "learning_rate": 4.612953882818097e-05, "loss": 0.177, "step": 14663 }, { "epoch": 0.26154888880961724, "grad_norm": 0.36071377992630005, "learning_rate": 4.612870686611389e-05, "loss": 0.1847, "step": 14664 }, { "epoch": 0.26156672493133093, "grad_norm": 0.3293820321559906, "learning_rate": 4.6127874822144644e-05, "loss": 0.1932, "step": 14665 }, { "epoch": 0.2615845610530446, "grad_norm": 0.2977519929409027, "learning_rate": 4.6127042696276453e-05, "loss": 0.199, "step": 14666 }, { "epoch": 0.2616023971747583, "grad_norm": 0.2657562792301178, "learning_rate": 4.612621048851255e-05, "loss": 0.1939, "step": 14667 }, { "epoch": 0.261620233296472, "grad_norm": 0.19356130063533783, "learning_rate": 4.6125378198856153e-05, "loss": 0.1556, "step": 14668 }, { "epoch": 0.26163806941818574, "grad_norm": 0.2492857277393341, "learning_rate": 4.612454582731051e-05, "loss": 0.1617, "step": 14669 }, { "epoch": 0.2616559055398994, "grad_norm": 0.25919419527053833, "learning_rate": 4.612371337387881e-05, "loss": 0.1589, "step": 14670 }, { "epoch": 0.2616737416616131, "grad_norm": 0.2730581760406494, "learning_rate": 4.612288083856431e-05, "loss": 0.2175, "step": 14671 }, { "epoch": 0.2616915777833268, "grad_norm": 0.3098990023136139, "learning_rate": 4.6122048221370226e-05, "loss": 0.1714, "step": 14672 }, { "epoch": 0.2617094139050405, "grad_norm": 0.3315688669681549, "learning_rate": 4.6121215522299796e-05, "loss": 0.1677, "step": 14673 }, { "epoch": 0.2617272500267542, "grad_norm": 0.22881069779396057, "learning_rate": 4.612038274135624e-05, "loss": 0.1676, "step": 14674 }, { "epoch": 0.26174508614846786, "grad_norm": 0.2312840223312378, "learning_rate": 4.611954987854278e-05, "loss": 0.1618, "step": 14675 }, { "epoch": 0.26176292227018155, "grad_norm": 0.407091349363327, "learning_rate": 4.611871693386264e-05, "loss": 0.2257, "step": 14676 }, { "epoch": 0.26178075839189524, "grad_norm": 0.317665159702301, "learning_rate": 4.611788390731907e-05, "loss": 0.1842, "step": 14677 }, { "epoch": 0.261798594513609, "grad_norm": 0.2729001045227051, "learning_rate": 4.61170507989153e-05, "loss": 0.2042, "step": 14678 }, { "epoch": 0.26181643063532267, "grad_norm": 0.26450785994529724, "learning_rate": 4.611621760865453e-05, "loss": 0.2182, "step": 14679 }, { "epoch": 0.26183426675703636, "grad_norm": 0.24056072533130646, "learning_rate": 4.6115384336540005e-05, "loss": 0.1514, "step": 14680 }, { "epoch": 0.26185210287875005, "grad_norm": 0.35893580317497253, "learning_rate": 4.611455098257497e-05, "loss": 0.2196, "step": 14681 }, { "epoch": 0.26186993900046374, "grad_norm": 0.23031428456306458, "learning_rate": 4.611371754676264e-05, "loss": 0.1942, "step": 14682 }, { "epoch": 0.2618877751221774, "grad_norm": 0.17973680794239044, "learning_rate": 4.611288402910624e-05, "loss": 0.1529, "step": 14683 }, { "epoch": 0.2619056112438911, "grad_norm": 0.25546473264694214, "learning_rate": 4.611205042960901e-05, "loss": 0.1887, "step": 14684 }, { "epoch": 0.2619234473656048, "grad_norm": 0.3437134325504303, "learning_rate": 4.611121674827419e-05, "loss": 0.2135, "step": 14685 }, { "epoch": 0.26194128348731854, "grad_norm": 0.2834162414073944, "learning_rate": 4.611038298510499e-05, "loss": 0.1867, "step": 14686 }, { "epoch": 0.26195911960903223, "grad_norm": 0.25561559200286865, "learning_rate": 4.610954914010467e-05, "loss": 0.174, "step": 14687 }, { "epoch": 0.2619769557307459, "grad_norm": 0.2823486924171448, "learning_rate": 4.610871521327644e-05, "loss": 0.2226, "step": 14688 }, { "epoch": 0.2619947918524596, "grad_norm": 0.3704608976840973, "learning_rate": 4.610788120462354e-05, "loss": 0.2391, "step": 14689 }, { "epoch": 0.2620126279741733, "grad_norm": 0.39324644207954407, "learning_rate": 4.6107047114149195e-05, "loss": 0.174, "step": 14690 }, { "epoch": 0.262030464095887, "grad_norm": 0.23658964037895203, "learning_rate": 4.6106212941856655e-05, "loss": 0.1857, "step": 14691 }, { "epoch": 0.26204830021760067, "grad_norm": 0.25668588280677795, "learning_rate": 4.6105378687749134e-05, "loss": 0.1974, "step": 14692 }, { "epoch": 0.26206613633931436, "grad_norm": 0.23021358251571655, "learning_rate": 4.610454435182988e-05, "loss": 0.2043, "step": 14693 }, { "epoch": 0.26208397246102805, "grad_norm": 0.3021219074726105, "learning_rate": 4.610370993410212e-05, "loss": 0.1289, "step": 14694 }, { "epoch": 0.2621018085827418, "grad_norm": 0.3289857506752014, "learning_rate": 4.6102875434569095e-05, "loss": 0.2288, "step": 14695 }, { "epoch": 0.2621196447044555, "grad_norm": 0.2981726825237274, "learning_rate": 4.610204085323404e-05, "loss": 0.1573, "step": 14696 }, { "epoch": 0.26213748082616917, "grad_norm": 0.23678918182849884, "learning_rate": 4.6101206190100175e-05, "loss": 0.1646, "step": 14697 }, { "epoch": 0.26215531694788285, "grad_norm": 0.31751492619514465, "learning_rate": 4.610037144517076e-05, "loss": 0.1601, "step": 14698 }, { "epoch": 0.26217315306959654, "grad_norm": 0.27791768312454224, "learning_rate": 4.609953661844901e-05, "loss": 0.1805, "step": 14699 }, { "epoch": 0.26219098919131023, "grad_norm": 0.2689625024795532, "learning_rate": 4.609870170993817e-05, "loss": 0.175, "step": 14700 }, { "epoch": 0.2622088253130239, "grad_norm": 0.3398139774799347, "learning_rate": 4.6097866719641474e-05, "loss": 0.1685, "step": 14701 }, { "epoch": 0.2622266614347376, "grad_norm": 0.28132590651512146, "learning_rate": 4.6097031647562164e-05, "loss": 0.1978, "step": 14702 }, { "epoch": 0.26224449755645135, "grad_norm": 0.1833999902009964, "learning_rate": 4.609619649370346e-05, "loss": 0.1689, "step": 14703 }, { "epoch": 0.26226233367816504, "grad_norm": 0.2901715040206909, "learning_rate": 4.6095361258068624e-05, "loss": 0.1558, "step": 14704 }, { "epoch": 0.2622801697998787, "grad_norm": 0.27557581663131714, "learning_rate": 4.6094525940660885e-05, "loss": 0.1686, "step": 14705 }, { "epoch": 0.2622980059215924, "grad_norm": 0.26411107182502747, "learning_rate": 4.6093690541483465e-05, "loss": 0.1837, "step": 14706 }, { "epoch": 0.2623158420433061, "grad_norm": 0.2018648236989975, "learning_rate": 4.609285506053962e-05, "loss": 0.1646, "step": 14707 }, { "epoch": 0.2623336781650198, "grad_norm": 0.28513509035110474, "learning_rate": 4.609201949783259e-05, "loss": 0.1896, "step": 14708 }, { "epoch": 0.2623515142867335, "grad_norm": 0.1935337334871292, "learning_rate": 4.609118385336559e-05, "loss": 0.1637, "step": 14709 }, { "epoch": 0.26236935040844717, "grad_norm": 0.36007270216941833, "learning_rate": 4.6090348127141895e-05, "loss": 0.2552, "step": 14710 }, { "epoch": 0.26238718653016085, "grad_norm": 0.3790152966976166, "learning_rate": 4.608951231916472e-05, "loss": 0.1821, "step": 14711 }, { "epoch": 0.2624050226518746, "grad_norm": 0.2814227342605591, "learning_rate": 4.6088676429437314e-05, "loss": 0.1775, "step": 14712 }, { "epoch": 0.2624228587735883, "grad_norm": 0.22261036932468414, "learning_rate": 4.608784045796291e-05, "loss": 0.1873, "step": 14713 }, { "epoch": 0.262440694895302, "grad_norm": 0.2574446201324463, "learning_rate": 4.608700440474475e-05, "loss": 0.1596, "step": 14714 }, { "epoch": 0.26245853101701566, "grad_norm": 0.2813780605792999, "learning_rate": 4.608616826978609e-05, "loss": 0.2092, "step": 14715 }, { "epoch": 0.26247636713872935, "grad_norm": 0.26358798146247864, "learning_rate": 4.6085332053090146e-05, "loss": 0.1605, "step": 14716 }, { "epoch": 0.26249420326044304, "grad_norm": 0.3280124366283417, "learning_rate": 4.608449575466018e-05, "loss": 0.2244, "step": 14717 }, { "epoch": 0.2625120393821567, "grad_norm": 0.3204196095466614, "learning_rate": 4.6083659374499424e-05, "loss": 0.1648, "step": 14718 }, { "epoch": 0.2625298755038704, "grad_norm": 0.24182642996311188, "learning_rate": 4.608282291261112e-05, "loss": 0.1921, "step": 14719 }, { "epoch": 0.26254771162558416, "grad_norm": 0.3450903296470642, "learning_rate": 4.608198636899851e-05, "loss": 0.2257, "step": 14720 }, { "epoch": 0.26256554774729784, "grad_norm": 0.30469459295272827, "learning_rate": 4.608114974366485e-05, "loss": 0.129, "step": 14721 }, { "epoch": 0.26258338386901153, "grad_norm": 0.32286977767944336, "learning_rate": 4.608031303661337e-05, "loss": 0.1124, "step": 14722 }, { "epoch": 0.2626012199907252, "grad_norm": 0.20940642058849335, "learning_rate": 4.60794762478473e-05, "loss": 0.1242, "step": 14723 }, { "epoch": 0.2626190561124389, "grad_norm": 0.2885874807834625, "learning_rate": 4.607863937736991e-05, "loss": 0.1918, "step": 14724 }, { "epoch": 0.2626368922341526, "grad_norm": 0.25926509499549866, "learning_rate": 4.6077802425184444e-05, "loss": 0.2039, "step": 14725 }, { "epoch": 0.2626547283558663, "grad_norm": 0.2674572467803955, "learning_rate": 4.6076965391294124e-05, "loss": 0.15, "step": 14726 }, { "epoch": 0.26267256447758, "grad_norm": 0.282661497592926, "learning_rate": 4.6076128275702205e-05, "loss": 0.1749, "step": 14727 }, { "epoch": 0.2626904005992937, "grad_norm": 0.37727436423301697, "learning_rate": 4.6075291078411945e-05, "loss": 0.2124, "step": 14728 }, { "epoch": 0.2627082367210074, "grad_norm": 0.2580815553665161, "learning_rate": 4.6074453799426564e-05, "loss": 0.192, "step": 14729 }, { "epoch": 0.2627260728427211, "grad_norm": 0.3615904748439789, "learning_rate": 4.607361643874932e-05, "loss": 0.1766, "step": 14730 }, { "epoch": 0.2627439089644348, "grad_norm": 0.2709302306175232, "learning_rate": 4.607277899638347e-05, "loss": 0.1882, "step": 14731 }, { "epoch": 0.26276174508614847, "grad_norm": 0.24105305969715118, "learning_rate": 4.607194147233225e-05, "loss": 0.1433, "step": 14732 }, { "epoch": 0.26277958120786216, "grad_norm": 0.20890876650810242, "learning_rate": 4.607110386659891e-05, "loss": 0.1813, "step": 14733 }, { "epoch": 0.26279741732957584, "grad_norm": 0.26178038120269775, "learning_rate": 4.607026617918668e-05, "loss": 0.1533, "step": 14734 }, { "epoch": 0.26281525345128953, "grad_norm": 0.23858247697353363, "learning_rate": 4.606942841009883e-05, "loss": 0.1827, "step": 14735 }, { "epoch": 0.2628330895730032, "grad_norm": 0.2497948259115219, "learning_rate": 4.60685905593386e-05, "loss": 0.1655, "step": 14736 }, { "epoch": 0.26285092569471696, "grad_norm": 0.23599396646022797, "learning_rate": 4.606775262690923e-05, "loss": 0.19, "step": 14737 }, { "epoch": 0.26286876181643065, "grad_norm": 0.2434827983379364, "learning_rate": 4.6066914612813974e-05, "loss": 0.1701, "step": 14738 }, { "epoch": 0.26288659793814434, "grad_norm": 0.2567709982395172, "learning_rate": 4.606607651705609e-05, "loss": 0.1936, "step": 14739 }, { "epoch": 0.262904434059858, "grad_norm": 0.3965451121330261, "learning_rate": 4.606523833963881e-05, "loss": 0.2325, "step": 14740 }, { "epoch": 0.2629222701815717, "grad_norm": 0.1909569650888443, "learning_rate": 4.6064400080565395e-05, "loss": 0.1411, "step": 14741 }, { "epoch": 0.2629401063032854, "grad_norm": 0.2688005566596985, "learning_rate": 4.606356173983908e-05, "loss": 0.1833, "step": 14742 }, { "epoch": 0.2629579424249991, "grad_norm": 0.2818892002105713, "learning_rate": 4.6062723317463136e-05, "loss": 0.1848, "step": 14743 }, { "epoch": 0.2629757785467128, "grad_norm": 0.24099214375019073, "learning_rate": 4.6061884813440796e-05, "loss": 0.1594, "step": 14744 }, { "epoch": 0.2629936146684265, "grad_norm": 0.266245037317276, "learning_rate": 4.6061046227775316e-05, "loss": 0.2316, "step": 14745 }, { "epoch": 0.2630114507901402, "grad_norm": 0.2762731909751892, "learning_rate": 4.606020756046995e-05, "loss": 0.1739, "step": 14746 }, { "epoch": 0.2630292869118539, "grad_norm": 0.31140822172164917, "learning_rate": 4.605936881152794e-05, "loss": 0.2014, "step": 14747 }, { "epoch": 0.2630471230335676, "grad_norm": 0.2451024204492569, "learning_rate": 4.605852998095255e-05, "loss": 0.1918, "step": 14748 }, { "epoch": 0.2630649591552813, "grad_norm": 0.4069274365901947, "learning_rate": 4.605769106874702e-05, "loss": 0.1374, "step": 14749 }, { "epoch": 0.26308279527699496, "grad_norm": 0.2511003315448761, "learning_rate": 4.60568520749146e-05, "loss": 0.1413, "step": 14750 }, { "epoch": 0.26310063139870865, "grad_norm": 0.226557195186615, "learning_rate": 4.605601299945856e-05, "loss": 0.2155, "step": 14751 }, { "epoch": 0.26311846752042234, "grad_norm": 0.262915700674057, "learning_rate": 4.605517384238214e-05, "loss": 0.1531, "step": 14752 }, { "epoch": 0.263136303642136, "grad_norm": 0.38432836532592773, "learning_rate": 4.6054334603688584e-05, "loss": 0.1539, "step": 14753 }, { "epoch": 0.26315413976384977, "grad_norm": 0.3766404092311859, "learning_rate": 4.605349528338116e-05, "loss": 0.1755, "step": 14754 }, { "epoch": 0.26317197588556346, "grad_norm": 0.2324799746274948, "learning_rate": 4.6052655881463126e-05, "loss": 0.1566, "step": 14755 }, { "epoch": 0.26318981200727715, "grad_norm": 0.3256889581680298, "learning_rate": 4.6051816397937714e-05, "loss": 0.1893, "step": 14756 }, { "epoch": 0.26320764812899083, "grad_norm": 0.2917213439941406, "learning_rate": 4.605097683280819e-05, "loss": 0.1898, "step": 14757 }, { "epoch": 0.2632254842507045, "grad_norm": 0.34756094217300415, "learning_rate": 4.605013718607782e-05, "loss": 0.2098, "step": 14758 }, { "epoch": 0.2632433203724182, "grad_norm": 0.3783144950866699, "learning_rate": 4.6049297457749844e-05, "loss": 0.2609, "step": 14759 }, { "epoch": 0.2632611564941319, "grad_norm": 0.4534520208835602, "learning_rate": 4.6048457647827515e-05, "loss": 0.1686, "step": 14760 }, { "epoch": 0.2632789926158456, "grad_norm": 0.24218110740184784, "learning_rate": 4.60476177563141e-05, "loss": 0.2359, "step": 14761 }, { "epoch": 0.26329682873755933, "grad_norm": 0.2579290568828583, "learning_rate": 4.604677778321285e-05, "loss": 0.1819, "step": 14762 }, { "epoch": 0.263314664859273, "grad_norm": 0.39508339762687683, "learning_rate": 4.6045937728527014e-05, "loss": 0.2125, "step": 14763 }, { "epoch": 0.2633325009809867, "grad_norm": 0.3167424499988556, "learning_rate": 4.604509759225986e-05, "loss": 0.151, "step": 14764 }, { "epoch": 0.2633503371027004, "grad_norm": 0.23372012376785278, "learning_rate": 4.6044257374414636e-05, "loss": 0.2015, "step": 14765 }, { "epoch": 0.2633681732244141, "grad_norm": 0.36966976523399353, "learning_rate": 4.6043417074994596e-05, "loss": 0.1999, "step": 14766 }, { "epoch": 0.26338600934612777, "grad_norm": 0.22495873272418976, "learning_rate": 4.6042576694003014e-05, "loss": 0.1575, "step": 14767 }, { "epoch": 0.26340384546784146, "grad_norm": 0.3362847566604614, "learning_rate": 4.6041736231443135e-05, "loss": 0.2179, "step": 14768 }, { "epoch": 0.26342168158955515, "grad_norm": 0.38222536444664, "learning_rate": 4.604089568731821e-05, "loss": 0.1585, "step": 14769 }, { "epoch": 0.2634395177112689, "grad_norm": 0.3842891752719879, "learning_rate": 4.604005506163152e-05, "loss": 0.2175, "step": 14770 }, { "epoch": 0.2634573538329826, "grad_norm": 0.33944371342658997, "learning_rate": 4.6039214354386296e-05, "loss": 0.1776, "step": 14771 }, { "epoch": 0.26347518995469627, "grad_norm": 0.28608548641204834, "learning_rate": 4.6038373565585816e-05, "loss": 0.1851, "step": 14772 }, { "epoch": 0.26349302607640995, "grad_norm": 0.2767927348613739, "learning_rate": 4.603753269523333e-05, "loss": 0.169, "step": 14773 }, { "epoch": 0.26351086219812364, "grad_norm": 0.22000938653945923, "learning_rate": 4.603669174333211e-05, "loss": 0.1863, "step": 14774 }, { "epoch": 0.26352869831983733, "grad_norm": 0.2553474009037018, "learning_rate": 4.60358507098854e-05, "loss": 0.1396, "step": 14775 }, { "epoch": 0.263546534441551, "grad_norm": 0.2102203667163849, "learning_rate": 4.603500959489647e-05, "loss": 0.1612, "step": 14776 }, { "epoch": 0.2635643705632647, "grad_norm": 0.3251095712184906, "learning_rate": 4.603416839836857e-05, "loss": 0.2209, "step": 14777 }, { "epoch": 0.2635822066849784, "grad_norm": 0.23818355798721313, "learning_rate": 4.603332712030498e-05, "loss": 0.1646, "step": 14778 }, { "epoch": 0.26360004280669214, "grad_norm": 0.36736127734184265, "learning_rate": 4.603248576070894e-05, "loss": 0.1883, "step": 14779 }, { "epoch": 0.2636178789284058, "grad_norm": 0.39555251598358154, "learning_rate": 4.603164431958373e-05, "loss": 0.1586, "step": 14780 }, { "epoch": 0.2636357150501195, "grad_norm": 0.3390832245349884, "learning_rate": 4.6030802796932594e-05, "loss": 0.1649, "step": 14781 }, { "epoch": 0.2636535511718332, "grad_norm": 0.21361882984638214, "learning_rate": 4.602996119275881e-05, "loss": 0.169, "step": 14782 }, { "epoch": 0.2636713872935469, "grad_norm": 0.2697640657424927, "learning_rate": 4.602911950706563e-05, "loss": 0.1596, "step": 14783 }, { "epoch": 0.2636892234152606, "grad_norm": 0.23366592824459076, "learning_rate": 4.6028277739856315e-05, "loss": 0.1736, "step": 14784 }, { "epoch": 0.26370705953697426, "grad_norm": 0.32984569668769836, "learning_rate": 4.602743589113413e-05, "loss": 0.1447, "step": 14785 }, { "epoch": 0.26372489565868795, "grad_norm": 0.2873001992702484, "learning_rate": 4.6026593960902356e-05, "loss": 0.2081, "step": 14786 }, { "epoch": 0.2637427317804017, "grad_norm": 0.3492843806743622, "learning_rate": 4.602575194916423e-05, "loss": 0.1993, "step": 14787 }, { "epoch": 0.2637605679021154, "grad_norm": 0.3649522364139557, "learning_rate": 4.6024909855923024e-05, "loss": 0.2071, "step": 14788 }, { "epoch": 0.26377840402382907, "grad_norm": 0.23045486211776733, "learning_rate": 4.6024067681182014e-05, "loss": 0.1525, "step": 14789 }, { "epoch": 0.26379624014554276, "grad_norm": 0.23058190941810608, "learning_rate": 4.602322542494446e-05, "loss": 0.1891, "step": 14790 }, { "epoch": 0.26381407626725645, "grad_norm": 0.3273070454597473, "learning_rate": 4.602238308721362e-05, "loss": 0.2048, "step": 14791 }, { "epoch": 0.26383191238897014, "grad_norm": 0.3690316081047058, "learning_rate": 4.602154066799275e-05, "loss": 0.2785, "step": 14792 }, { "epoch": 0.2638497485106838, "grad_norm": 0.3431062698364258, "learning_rate": 4.602069816728514e-05, "loss": 0.1687, "step": 14793 }, { "epoch": 0.2638675846323975, "grad_norm": 0.24640703201293945, "learning_rate": 4.601985558509404e-05, "loss": 0.2048, "step": 14794 }, { "epoch": 0.2638854207541112, "grad_norm": 0.23419922590255737, "learning_rate": 4.601901292142272e-05, "loss": 0.1385, "step": 14795 }, { "epoch": 0.26390325687582494, "grad_norm": 0.21126295626163483, "learning_rate": 4.6018170176274445e-05, "loss": 0.1834, "step": 14796 }, { "epoch": 0.26392109299753863, "grad_norm": 0.23499855399131775, "learning_rate": 4.601732734965248e-05, "loss": 0.1665, "step": 14797 }, { "epoch": 0.2639389291192523, "grad_norm": 0.641426146030426, "learning_rate": 4.6016484441560103e-05, "loss": 0.174, "step": 14798 }, { "epoch": 0.263956765240966, "grad_norm": 0.31874483823776245, "learning_rate": 4.601564145200057e-05, "loss": 0.2092, "step": 14799 }, { "epoch": 0.2639746013626797, "grad_norm": 0.41482603549957275, "learning_rate": 4.601479838097715e-05, "loss": 0.2215, "step": 14800 }, { "epoch": 0.2639924374843934, "grad_norm": 0.3151341378688812, "learning_rate": 4.6013955228493115e-05, "loss": 0.2024, "step": 14801 }, { "epoch": 0.26401027360610707, "grad_norm": 0.24253544211387634, "learning_rate": 4.6013111994551736e-05, "loss": 0.1869, "step": 14802 }, { "epoch": 0.26402810972782076, "grad_norm": 0.3533554673194885, "learning_rate": 4.601226867915627e-05, "loss": 0.1484, "step": 14803 }, { "epoch": 0.2640459458495345, "grad_norm": 0.29354128241539, "learning_rate": 4.6011425282309996e-05, "loss": 0.173, "step": 14804 }, { "epoch": 0.2640637819712482, "grad_norm": 0.25246462225914, "learning_rate": 4.601058180401619e-05, "loss": 0.1489, "step": 14805 }, { "epoch": 0.2640816180929619, "grad_norm": 0.30939918756484985, "learning_rate": 4.600973824427809e-05, "loss": 0.1946, "step": 14806 }, { "epoch": 0.26409945421467557, "grad_norm": 0.2330443263053894, "learning_rate": 4.600889460309901e-05, "loss": 0.1423, "step": 14807 }, { "epoch": 0.26411729033638925, "grad_norm": 0.24471040070056915, "learning_rate": 4.6008050880482184e-05, "loss": 0.1779, "step": 14808 }, { "epoch": 0.26413512645810294, "grad_norm": 0.31686288118362427, "learning_rate": 4.60072070764309e-05, "loss": 0.2071, "step": 14809 }, { "epoch": 0.26415296257981663, "grad_norm": 0.30483901500701904, "learning_rate": 4.600636319094843e-05, "loss": 0.1811, "step": 14810 }, { "epoch": 0.2641707987015303, "grad_norm": 0.2643861770629883, "learning_rate": 4.600551922403804e-05, "loss": 0.162, "step": 14811 }, { "epoch": 0.264188634823244, "grad_norm": 0.33887606859207153, "learning_rate": 4.6004675175702994e-05, "loss": 0.2067, "step": 14812 }, { "epoch": 0.26420647094495775, "grad_norm": 0.22127081453800201, "learning_rate": 4.6003831045946584e-05, "loss": 0.182, "step": 14813 }, { "epoch": 0.26422430706667144, "grad_norm": 0.27946820855140686, "learning_rate": 4.6002986834772066e-05, "loss": 0.2271, "step": 14814 }, { "epoch": 0.2642421431883851, "grad_norm": 0.3012564480304718, "learning_rate": 4.600214254218271e-05, "loss": 0.1568, "step": 14815 }, { "epoch": 0.2642599793100988, "grad_norm": 0.2642149031162262, "learning_rate": 4.6001298168181804e-05, "loss": 0.1863, "step": 14816 }, { "epoch": 0.2642778154318125, "grad_norm": 0.291229248046875, "learning_rate": 4.600045371277262e-05, "loss": 0.2003, "step": 14817 }, { "epoch": 0.2642956515535262, "grad_norm": 0.26569968461990356, "learning_rate": 4.599960917595841e-05, "loss": 0.1767, "step": 14818 }, { "epoch": 0.2643134876752399, "grad_norm": 0.4270315170288086, "learning_rate": 4.599876455774246e-05, "loss": 0.2409, "step": 14819 }, { "epoch": 0.26433132379695357, "grad_norm": 0.27255186438560486, "learning_rate": 4.5997919858128056e-05, "loss": 0.1935, "step": 14820 }, { "epoch": 0.2643491599186673, "grad_norm": 0.2789528965950012, "learning_rate": 4.599707507711846e-05, "loss": 0.1568, "step": 14821 }, { "epoch": 0.264366996040381, "grad_norm": 0.27071553468704224, "learning_rate": 4.5996230214716944e-05, "loss": 0.1357, "step": 14822 }, { "epoch": 0.2643848321620947, "grad_norm": 0.3277747631072998, "learning_rate": 4.599538527092679e-05, "loss": 0.1625, "step": 14823 }, { "epoch": 0.2644026682838084, "grad_norm": 0.28402459621429443, "learning_rate": 4.599454024575127e-05, "loss": 0.1828, "step": 14824 }, { "epoch": 0.26442050440552206, "grad_norm": 0.29564547538757324, "learning_rate": 4.599369513919367e-05, "loss": 0.192, "step": 14825 }, { "epoch": 0.26443834052723575, "grad_norm": 0.30252617597579956, "learning_rate": 4.5992849951257246e-05, "loss": 0.1997, "step": 14826 }, { "epoch": 0.26445617664894944, "grad_norm": 0.29845741391181946, "learning_rate": 4.599200468194529e-05, "loss": 0.2428, "step": 14827 }, { "epoch": 0.2644740127706631, "grad_norm": 0.23279666900634766, "learning_rate": 4.599115933126107e-05, "loss": 0.1603, "step": 14828 }, { "epoch": 0.26449184889237687, "grad_norm": 0.3187519907951355, "learning_rate": 4.599031389920787e-05, "loss": 0.1812, "step": 14829 }, { "epoch": 0.26450968501409056, "grad_norm": 0.4159899055957794, "learning_rate": 4.598946838578896e-05, "loss": 0.1809, "step": 14830 }, { "epoch": 0.26452752113580424, "grad_norm": 0.2733418345451355, "learning_rate": 4.598862279100762e-05, "loss": 0.2004, "step": 14831 }, { "epoch": 0.26454535725751793, "grad_norm": 0.2938709557056427, "learning_rate": 4.598777711486714e-05, "loss": 0.1865, "step": 14832 }, { "epoch": 0.2645631933792316, "grad_norm": 0.2521997094154358, "learning_rate": 4.598693135737078e-05, "loss": 0.1584, "step": 14833 }, { "epoch": 0.2645810295009453, "grad_norm": 0.3254789710044861, "learning_rate": 4.598608551852181e-05, "loss": 0.2071, "step": 14834 }, { "epoch": 0.264598865622659, "grad_norm": 0.2943840026855469, "learning_rate": 4.598523959832355e-05, "loss": 0.1723, "step": 14835 }, { "epoch": 0.2646167017443727, "grad_norm": 0.2356782704591751, "learning_rate": 4.598439359677924e-05, "loss": 0.1964, "step": 14836 }, { "epoch": 0.2646345378660864, "grad_norm": 0.3495713174343109, "learning_rate": 4.598354751389217e-05, "loss": 0.2269, "step": 14837 }, { "epoch": 0.2646523739878001, "grad_norm": 0.22048158943653107, "learning_rate": 4.598270134966562e-05, "loss": 0.163, "step": 14838 }, { "epoch": 0.2646702101095138, "grad_norm": 0.3752993047237396, "learning_rate": 4.598185510410289e-05, "loss": 0.1806, "step": 14839 }, { "epoch": 0.2646880462312275, "grad_norm": 0.29053422808647156, "learning_rate": 4.5981008777207225e-05, "loss": 0.1866, "step": 14840 }, { "epoch": 0.2647058823529412, "grad_norm": 0.23643746972084045, "learning_rate": 4.598016236898193e-05, "loss": 0.1688, "step": 14841 }, { "epoch": 0.26472371847465487, "grad_norm": 0.2562626302242279, "learning_rate": 4.597931587943029e-05, "loss": 0.1992, "step": 14842 }, { "epoch": 0.26474155459636856, "grad_norm": 0.30713966488838196, "learning_rate": 4.597846930855556e-05, "loss": 0.2131, "step": 14843 }, { "epoch": 0.26475939071808224, "grad_norm": 0.2793387770652771, "learning_rate": 4.597762265636104e-05, "loss": 0.1766, "step": 14844 }, { "epoch": 0.26477722683979593, "grad_norm": 0.23357027769088745, "learning_rate": 4.5976775922850014e-05, "loss": 0.2249, "step": 14845 }, { "epoch": 0.2647950629615097, "grad_norm": 0.2755261957645416, "learning_rate": 4.597592910802575e-05, "loss": 0.1462, "step": 14846 }, { "epoch": 0.26481289908322336, "grad_norm": 0.35624149441719055, "learning_rate": 4.597508221189155e-05, "loss": 0.1886, "step": 14847 }, { "epoch": 0.26483073520493705, "grad_norm": 0.2380143105983734, "learning_rate": 4.597423523445068e-05, "loss": 0.1912, "step": 14848 }, { "epoch": 0.26484857132665074, "grad_norm": 0.2834615111351013, "learning_rate": 4.597338817570643e-05, "loss": 0.1752, "step": 14849 }, { "epoch": 0.2648664074483644, "grad_norm": 0.26098644733428955, "learning_rate": 4.597254103566209e-05, "loss": 0.1943, "step": 14850 }, { "epoch": 0.2648842435700781, "grad_norm": 0.3464111089706421, "learning_rate": 4.5971693814320934e-05, "loss": 0.2229, "step": 14851 }, { "epoch": 0.2649020796917918, "grad_norm": 0.2722216844558716, "learning_rate": 4.597084651168625e-05, "loss": 0.2095, "step": 14852 }, { "epoch": 0.2649199158135055, "grad_norm": 0.25106334686279297, "learning_rate": 4.596999912776132e-05, "loss": 0.1757, "step": 14853 }, { "epoch": 0.2649377519352192, "grad_norm": 0.2816717028617859, "learning_rate": 4.5969151662549435e-05, "loss": 0.1591, "step": 14854 }, { "epoch": 0.2649555880569329, "grad_norm": 0.2325526475906372, "learning_rate": 4.596830411605387e-05, "loss": 0.142, "step": 14855 }, { "epoch": 0.2649734241786466, "grad_norm": 0.29756152629852295, "learning_rate": 4.596745648827792e-05, "loss": 0.195, "step": 14856 }, { "epoch": 0.2649912603003603, "grad_norm": 0.31311625242233276, "learning_rate": 4.596660877922486e-05, "loss": 0.1641, "step": 14857 }, { "epoch": 0.265009096422074, "grad_norm": 0.36467182636260986, "learning_rate": 4.596576098889799e-05, "loss": 0.2028, "step": 14858 }, { "epoch": 0.2650269325437877, "grad_norm": 0.2742144465446472, "learning_rate": 4.596491311730059e-05, "loss": 0.1742, "step": 14859 }, { "epoch": 0.26504476866550136, "grad_norm": 0.3176640272140503, "learning_rate": 4.596406516443594e-05, "loss": 0.1841, "step": 14860 }, { "epoch": 0.26506260478721505, "grad_norm": 0.23231923580169678, "learning_rate": 4.596321713030733e-05, "loss": 0.144, "step": 14861 }, { "epoch": 0.26508044090892874, "grad_norm": 0.18834541738033295, "learning_rate": 4.596236901491806e-05, "loss": 0.1796, "step": 14862 }, { "epoch": 0.2650982770306425, "grad_norm": 0.35587966442108154, "learning_rate": 4.5961520818271407e-05, "loss": 0.2077, "step": 14863 }, { "epoch": 0.26511611315235617, "grad_norm": 0.2952534854412079, "learning_rate": 4.596067254037065e-05, "loss": 0.2131, "step": 14864 }, { "epoch": 0.26513394927406986, "grad_norm": 0.21149951219558716, "learning_rate": 4.595982418121909e-05, "loss": 0.1828, "step": 14865 }, { "epoch": 0.26515178539578355, "grad_norm": 0.26897165179252625, "learning_rate": 4.595897574082002e-05, "loss": 0.1506, "step": 14866 }, { "epoch": 0.26516962151749723, "grad_norm": 0.32345300912857056, "learning_rate": 4.595812721917672e-05, "loss": 0.1937, "step": 14867 }, { "epoch": 0.2651874576392109, "grad_norm": 0.23734283447265625, "learning_rate": 4.5957278616292466e-05, "loss": 0.1758, "step": 14868 }, { "epoch": 0.2652052937609246, "grad_norm": 0.24701447784900665, "learning_rate": 4.595642993217057e-05, "loss": 0.1862, "step": 14869 }, { "epoch": 0.2652231298826383, "grad_norm": 0.28314924240112305, "learning_rate": 4.595558116681432e-05, "loss": 0.1851, "step": 14870 }, { "epoch": 0.26524096600435204, "grad_norm": 0.3213120996952057, "learning_rate": 4.595473232022699e-05, "loss": 0.2177, "step": 14871 }, { "epoch": 0.26525880212606573, "grad_norm": 0.225111186504364, "learning_rate": 4.5953883392411886e-05, "loss": 0.1938, "step": 14872 }, { "epoch": 0.2652766382477794, "grad_norm": 0.2745521366596222, "learning_rate": 4.5953034383372294e-05, "loss": 0.1955, "step": 14873 }, { "epoch": 0.2652944743694931, "grad_norm": 0.29025590419769287, "learning_rate": 4.595218529311149e-05, "loss": 0.1437, "step": 14874 }, { "epoch": 0.2653123104912068, "grad_norm": 0.43301862478256226, "learning_rate": 4.595133612163279e-05, "loss": 0.1717, "step": 14875 }, { "epoch": 0.2653301466129205, "grad_norm": 0.37087392807006836, "learning_rate": 4.595048686893948e-05, "loss": 0.2419, "step": 14876 }, { "epoch": 0.26534798273463417, "grad_norm": 0.272806316614151, "learning_rate": 4.594963753503484e-05, "loss": 0.1218, "step": 14877 }, { "epoch": 0.26536581885634786, "grad_norm": 0.22667503356933594, "learning_rate": 4.594878811992217e-05, "loss": 0.15, "step": 14878 }, { "epoch": 0.26538365497806155, "grad_norm": 0.2920016944408417, "learning_rate": 4.594793862360477e-05, "loss": 0.2233, "step": 14879 }, { "epoch": 0.2654014910997753, "grad_norm": 0.37423068284988403, "learning_rate": 4.594708904608591e-05, "loss": 0.1635, "step": 14880 }, { "epoch": 0.265419327221489, "grad_norm": 0.26330453157424927, "learning_rate": 4.5946239387368906e-05, "loss": 0.2019, "step": 14881 }, { "epoch": 0.26543716334320266, "grad_norm": 0.3152129650115967, "learning_rate": 4.594538964745704e-05, "loss": 0.1831, "step": 14882 }, { "epoch": 0.26545499946491635, "grad_norm": 0.2315073013305664, "learning_rate": 4.5944539826353614e-05, "loss": 0.1675, "step": 14883 }, { "epoch": 0.26547283558663004, "grad_norm": 0.41672590374946594, "learning_rate": 4.5943689924061915e-05, "loss": 0.2603, "step": 14884 }, { "epoch": 0.26549067170834373, "grad_norm": 0.5349219441413879, "learning_rate": 4.594283994058524e-05, "loss": 0.2172, "step": 14885 }, { "epoch": 0.2655085078300574, "grad_norm": 0.23204335570335388, "learning_rate": 4.5941989875926886e-05, "loss": 0.1608, "step": 14886 }, { "epoch": 0.2655263439517711, "grad_norm": 0.2690698802471161, "learning_rate": 4.594113973009014e-05, "loss": 0.1486, "step": 14887 }, { "epoch": 0.26554418007348485, "grad_norm": 0.32761090993881226, "learning_rate": 4.5940289503078313e-05, "loss": 0.2036, "step": 14888 }, { "epoch": 0.26556201619519854, "grad_norm": 0.315785676240921, "learning_rate": 4.593943919489469e-05, "loss": 0.2312, "step": 14889 }, { "epoch": 0.2655798523169122, "grad_norm": 0.2315066009759903, "learning_rate": 4.593858880554256e-05, "loss": 0.1903, "step": 14890 }, { "epoch": 0.2655976884386259, "grad_norm": 0.270658016204834, "learning_rate": 4.593773833502524e-05, "loss": 0.1951, "step": 14891 }, { "epoch": 0.2656155245603396, "grad_norm": 0.26397615671157837, "learning_rate": 4.5936887783346005e-05, "loss": 0.1643, "step": 14892 }, { "epoch": 0.2656333606820533, "grad_norm": 0.22954022884368896, "learning_rate": 4.593603715050816e-05, "loss": 0.1552, "step": 14893 }, { "epoch": 0.265651196803767, "grad_norm": 0.2739209234714508, "learning_rate": 4.593518643651501e-05, "loss": 0.1594, "step": 14894 }, { "epoch": 0.26566903292548066, "grad_norm": 0.24630504846572876, "learning_rate": 4.593433564136984e-05, "loss": 0.2015, "step": 14895 }, { "epoch": 0.26568686904719435, "grad_norm": 0.26596322655677795, "learning_rate": 4.593348476507596e-05, "loss": 0.1862, "step": 14896 }, { "epoch": 0.2657047051689081, "grad_norm": 0.21672040224075317, "learning_rate": 4.5932633807636664e-05, "loss": 0.1406, "step": 14897 }, { "epoch": 0.2657225412906218, "grad_norm": 0.21635863184928894, "learning_rate": 4.593178276905525e-05, "loss": 0.1915, "step": 14898 }, { "epoch": 0.26574037741233547, "grad_norm": 0.3071916699409485, "learning_rate": 4.593093164933501e-05, "loss": 0.2027, "step": 14899 }, { "epoch": 0.26575821353404916, "grad_norm": 0.26271358132362366, "learning_rate": 4.5930080448479254e-05, "loss": 0.1926, "step": 14900 }, { "epoch": 0.26577604965576285, "grad_norm": 0.18739892542362213, "learning_rate": 4.592922916649128e-05, "loss": 0.1722, "step": 14901 }, { "epoch": 0.26579388577747654, "grad_norm": 0.23717336356639862, "learning_rate": 4.592837780337438e-05, "loss": 0.2023, "step": 14902 }, { "epoch": 0.2658117218991902, "grad_norm": 0.23732641339302063, "learning_rate": 4.592752635913186e-05, "loss": 0.151, "step": 14903 }, { "epoch": 0.2658295580209039, "grad_norm": 0.2488204389810562, "learning_rate": 4.5926674833767026e-05, "loss": 0.1573, "step": 14904 }, { "epoch": 0.26584739414261765, "grad_norm": 0.29487520456314087, "learning_rate": 4.592582322728316e-05, "loss": 0.1747, "step": 14905 }, { "epoch": 0.26586523026433134, "grad_norm": 0.2556423842906952, "learning_rate": 4.592497153968358e-05, "loss": 0.1888, "step": 14906 }, { "epoch": 0.26588306638604503, "grad_norm": 0.27322086691856384, "learning_rate": 4.592411977097159e-05, "loss": 0.2006, "step": 14907 }, { "epoch": 0.2659009025077587, "grad_norm": 0.31154635548591614, "learning_rate": 4.592326792115048e-05, "loss": 0.2188, "step": 14908 }, { "epoch": 0.2659187386294724, "grad_norm": 0.2314896583557129, "learning_rate": 4.5922415990223556e-05, "loss": 0.2032, "step": 14909 }, { "epoch": 0.2659365747511861, "grad_norm": 0.265146940946579, "learning_rate": 4.5921563978194125e-05, "loss": 0.156, "step": 14910 }, { "epoch": 0.2659544108728998, "grad_norm": 0.2430349886417389, "learning_rate": 4.5920711885065485e-05, "loss": 0.2064, "step": 14911 }, { "epoch": 0.26597224699461347, "grad_norm": 0.22456717491149902, "learning_rate": 4.591985971084094e-05, "loss": 0.1786, "step": 14912 }, { "epoch": 0.26599008311632716, "grad_norm": 0.45749056339263916, "learning_rate": 4.5919007455523786e-05, "loss": 0.2117, "step": 14913 }, { "epoch": 0.2660079192380409, "grad_norm": 0.25490471720695496, "learning_rate": 4.591815511911734e-05, "loss": 0.1512, "step": 14914 }, { "epoch": 0.2660257553597546, "grad_norm": 0.21868674457073212, "learning_rate": 4.5917302701624896e-05, "loss": 0.1433, "step": 14915 }, { "epoch": 0.2660435914814683, "grad_norm": 0.2644864022731781, "learning_rate": 4.591645020304977e-05, "loss": 0.2045, "step": 14916 }, { "epoch": 0.26606142760318197, "grad_norm": 0.35034647583961487, "learning_rate": 4.591559762339526e-05, "loss": 0.2566, "step": 14917 }, { "epoch": 0.26607926372489565, "grad_norm": 0.3079293668270111, "learning_rate": 4.591474496266466e-05, "loss": 0.1552, "step": 14918 }, { "epoch": 0.26609709984660934, "grad_norm": 0.27546125650405884, "learning_rate": 4.5913892220861285e-05, "loss": 0.1505, "step": 14919 }, { "epoch": 0.26611493596832303, "grad_norm": 0.2736878991127014, "learning_rate": 4.5913039397988445e-05, "loss": 0.1617, "step": 14920 }, { "epoch": 0.2661327720900367, "grad_norm": 0.22687847912311554, "learning_rate": 4.591218649404944e-05, "loss": 0.1834, "step": 14921 }, { "epoch": 0.26615060821175046, "grad_norm": 0.2042544186115265, "learning_rate": 4.591133350904758e-05, "loss": 0.1951, "step": 14922 }, { "epoch": 0.26616844433346415, "grad_norm": 0.30572453141212463, "learning_rate": 4.591048044298617e-05, "loss": 0.2301, "step": 14923 }, { "epoch": 0.26618628045517784, "grad_norm": 0.28840720653533936, "learning_rate": 4.5909627295868506e-05, "loss": 0.1893, "step": 14924 }, { "epoch": 0.2662041165768915, "grad_norm": 0.34331366419792175, "learning_rate": 4.5908774067697915e-05, "loss": 0.1541, "step": 14925 }, { "epoch": 0.2662219526986052, "grad_norm": 0.35308146476745605, "learning_rate": 4.590792075847769e-05, "loss": 0.1943, "step": 14926 }, { "epoch": 0.2662397888203189, "grad_norm": 0.2985835373401642, "learning_rate": 4.590706736821114e-05, "loss": 0.2134, "step": 14927 }, { "epoch": 0.2662576249420326, "grad_norm": 0.2805352210998535, "learning_rate": 4.590621389690158e-05, "loss": 0.2112, "step": 14928 }, { "epoch": 0.2662754610637463, "grad_norm": 0.28096508979797363, "learning_rate": 4.5905360344552315e-05, "loss": 0.1767, "step": 14929 }, { "epoch": 0.26629329718546, "grad_norm": 0.2708877623081207, "learning_rate": 4.5904506711166644e-05, "loss": 0.2033, "step": 14930 }, { "epoch": 0.2663111333071737, "grad_norm": 0.22918345034122467, "learning_rate": 4.590365299674789e-05, "loss": 0.1471, "step": 14931 }, { "epoch": 0.2663289694288874, "grad_norm": 0.31197041273117065, "learning_rate": 4.590279920129936e-05, "loss": 0.2, "step": 14932 }, { "epoch": 0.2663468055506011, "grad_norm": 0.25469622015953064, "learning_rate": 4.590194532482436e-05, "loss": 0.2023, "step": 14933 }, { "epoch": 0.2663646416723148, "grad_norm": 0.2631215751171112, "learning_rate": 4.59010913673262e-05, "loss": 0.1821, "step": 14934 }, { "epoch": 0.26638247779402846, "grad_norm": 0.2114010453224182, "learning_rate": 4.5900237328808194e-05, "loss": 0.1653, "step": 14935 }, { "epoch": 0.26640031391574215, "grad_norm": 0.34483587741851807, "learning_rate": 4.589938320927364e-05, "loss": 0.2025, "step": 14936 }, { "epoch": 0.26641815003745584, "grad_norm": 0.22635510563850403, "learning_rate": 4.589852900872586e-05, "loss": 0.1765, "step": 14937 }, { "epoch": 0.2664359861591695, "grad_norm": 0.43880748748779297, "learning_rate": 4.589767472716817e-05, "loss": 0.1904, "step": 14938 }, { "epoch": 0.26645382228088327, "grad_norm": 0.2123252898454666, "learning_rate": 4.5896820364603874e-05, "loss": 0.1758, "step": 14939 }, { "epoch": 0.26647165840259696, "grad_norm": 0.35721907019615173, "learning_rate": 4.5895965921036285e-05, "loss": 0.2411, "step": 14940 }, { "epoch": 0.26648949452431064, "grad_norm": 0.31588804721832275, "learning_rate": 4.589511139646871e-05, "loss": 0.1815, "step": 14941 }, { "epoch": 0.26650733064602433, "grad_norm": 0.275342732667923, "learning_rate": 4.589425679090446e-05, "loss": 0.1408, "step": 14942 }, { "epoch": 0.266525166767738, "grad_norm": 0.23242709040641785, "learning_rate": 4.589340210434687e-05, "loss": 0.133, "step": 14943 }, { "epoch": 0.2665430028894517, "grad_norm": 0.268547385931015, "learning_rate": 4.589254733679923e-05, "loss": 0.1946, "step": 14944 }, { "epoch": 0.2665608390111654, "grad_norm": 0.27724701166152954, "learning_rate": 4.589169248826486e-05, "loss": 0.1901, "step": 14945 }, { "epoch": 0.2665786751328791, "grad_norm": 0.3501250445842743, "learning_rate": 4.589083755874708e-05, "loss": 0.2006, "step": 14946 }, { "epoch": 0.2665965112545928, "grad_norm": 0.22979986667633057, "learning_rate": 4.588998254824919e-05, "loss": 0.1394, "step": 14947 }, { "epoch": 0.2666143473763065, "grad_norm": 0.3628884255886078, "learning_rate": 4.588912745677452e-05, "loss": 0.255, "step": 14948 }, { "epoch": 0.2666321834980202, "grad_norm": 0.22819317877292633, "learning_rate": 4.588827228432637e-05, "loss": 0.1608, "step": 14949 }, { "epoch": 0.2666500196197339, "grad_norm": 0.29848426580429077, "learning_rate": 4.5887417030908067e-05, "loss": 0.201, "step": 14950 }, { "epoch": 0.2666678557414476, "grad_norm": 0.25532805919647217, "learning_rate": 4.588656169652292e-05, "loss": 0.1674, "step": 14951 }, { "epoch": 0.26668569186316127, "grad_norm": 0.2834542989730835, "learning_rate": 4.5885706281174244e-05, "loss": 0.1935, "step": 14952 }, { "epoch": 0.26670352798487496, "grad_norm": 0.2601635158061981, "learning_rate": 4.588485078486536e-05, "loss": 0.1945, "step": 14953 }, { "epoch": 0.26672136410658864, "grad_norm": 0.2619258463382721, "learning_rate": 4.588399520759957e-05, "loss": 0.1418, "step": 14954 }, { "epoch": 0.26673920022830233, "grad_norm": 0.34515440464019775, "learning_rate": 4.588313954938022e-05, "loss": 0.186, "step": 14955 }, { "epoch": 0.2667570363500161, "grad_norm": 0.22710682451725006, "learning_rate": 4.588228381021059e-05, "loss": 0.1874, "step": 14956 }, { "epoch": 0.26677487247172976, "grad_norm": 0.20383551716804504, "learning_rate": 4.5881427990094025e-05, "loss": 0.1646, "step": 14957 }, { "epoch": 0.26679270859344345, "grad_norm": 0.244561105966568, "learning_rate": 4.5880572089033835e-05, "loss": 0.1938, "step": 14958 }, { "epoch": 0.26681054471515714, "grad_norm": 0.2626098394393921, "learning_rate": 4.587971610703333e-05, "loss": 0.177, "step": 14959 }, { "epoch": 0.2668283808368708, "grad_norm": 0.29114869236946106, "learning_rate": 4.587886004409584e-05, "loss": 0.2041, "step": 14960 }, { "epoch": 0.2668462169585845, "grad_norm": 0.2355378419160843, "learning_rate": 4.587800390022467e-05, "loss": 0.1844, "step": 14961 }, { "epoch": 0.2668640530802982, "grad_norm": 0.24582120776176453, "learning_rate": 4.587714767542315e-05, "loss": 0.2125, "step": 14962 }, { "epoch": 0.2668818892020119, "grad_norm": 0.2756332755088806, "learning_rate": 4.587629136969459e-05, "loss": 0.1706, "step": 14963 }, { "epoch": 0.26689972532372563, "grad_norm": 0.2964188754558563, "learning_rate": 4.587543498304232e-05, "loss": 0.1788, "step": 14964 }, { "epoch": 0.2669175614454393, "grad_norm": 0.3456663191318512, "learning_rate": 4.5874578515469655e-05, "loss": 0.2347, "step": 14965 }, { "epoch": 0.266935397567153, "grad_norm": 0.24593006074428558, "learning_rate": 4.587372196697991e-05, "loss": 0.1323, "step": 14966 }, { "epoch": 0.2669532336888667, "grad_norm": 0.2978859841823578, "learning_rate": 4.587286533757641e-05, "loss": 0.1704, "step": 14967 }, { "epoch": 0.2669710698105804, "grad_norm": 0.38560938835144043, "learning_rate": 4.5872008627262476e-05, "loss": 0.1671, "step": 14968 }, { "epoch": 0.2669889059322941, "grad_norm": 0.28182223439216614, "learning_rate": 4.587115183604143e-05, "loss": 0.1983, "step": 14969 }, { "epoch": 0.26700674205400776, "grad_norm": 0.3404568135738373, "learning_rate": 4.587029496391658e-05, "loss": 0.1613, "step": 14970 }, { "epoch": 0.26702457817572145, "grad_norm": 0.31590014696121216, "learning_rate": 4.586943801089126e-05, "loss": 0.1919, "step": 14971 }, { "epoch": 0.2670424142974352, "grad_norm": 0.29793787002563477, "learning_rate": 4.58685809769688e-05, "loss": 0.1424, "step": 14972 }, { "epoch": 0.2670602504191489, "grad_norm": 0.28106406331062317, "learning_rate": 4.586772386215251e-05, "loss": 0.1901, "step": 14973 }, { "epoch": 0.26707808654086257, "grad_norm": 0.2836782932281494, "learning_rate": 4.586686666644571e-05, "loss": 0.231, "step": 14974 }, { "epoch": 0.26709592266257626, "grad_norm": 0.2543480694293976, "learning_rate": 4.586600938985174e-05, "loss": 0.1756, "step": 14975 }, { "epoch": 0.26711375878428995, "grad_norm": 0.3113311529159546, "learning_rate": 4.5865152032373895e-05, "loss": 0.1819, "step": 14976 }, { "epoch": 0.26713159490600363, "grad_norm": 0.2961636483669281, "learning_rate": 4.586429459401552e-05, "loss": 0.1429, "step": 14977 }, { "epoch": 0.2671494310277173, "grad_norm": 0.2527402639389038, "learning_rate": 4.586343707477994e-05, "loss": 0.143, "step": 14978 }, { "epoch": 0.267167267149431, "grad_norm": 0.3453488051891327, "learning_rate": 4.586257947467046e-05, "loss": 0.1915, "step": 14979 }, { "epoch": 0.2671851032711447, "grad_norm": 0.23477394878864288, "learning_rate": 4.586172179369042e-05, "loss": 0.1219, "step": 14980 }, { "epoch": 0.26720293939285844, "grad_norm": 0.193343847990036, "learning_rate": 4.586086403184314e-05, "loss": 0.1531, "step": 14981 }, { "epoch": 0.26722077551457213, "grad_norm": 0.28575000166893005, "learning_rate": 4.5860006189131955e-05, "loss": 0.206, "step": 14982 }, { "epoch": 0.2672386116362858, "grad_norm": 0.3874417841434479, "learning_rate": 4.585914826556017e-05, "loss": 0.1523, "step": 14983 }, { "epoch": 0.2672564477579995, "grad_norm": 0.22443947196006775, "learning_rate": 4.5858290261131124e-05, "loss": 0.1754, "step": 14984 }, { "epoch": 0.2672742838797132, "grad_norm": 0.37375736236572266, "learning_rate": 4.5857432175848146e-05, "loss": 0.1619, "step": 14985 }, { "epoch": 0.2672921200014269, "grad_norm": 0.21425506472587585, "learning_rate": 4.585657400971455e-05, "loss": 0.1754, "step": 14986 }, { "epoch": 0.26730995612314057, "grad_norm": 0.2879641652107239, "learning_rate": 4.585571576273368e-05, "loss": 0.1696, "step": 14987 }, { "epoch": 0.26732779224485426, "grad_norm": 0.23936870694160461, "learning_rate": 4.5854857434908846e-05, "loss": 0.184, "step": 14988 }, { "epoch": 0.267345628366568, "grad_norm": 0.4232967793941498, "learning_rate": 4.585399902624338e-05, "loss": 0.2345, "step": 14989 }, { "epoch": 0.2673634644882817, "grad_norm": 0.35915425419807434, "learning_rate": 4.5853140536740614e-05, "loss": 0.2141, "step": 14990 }, { "epoch": 0.2673813006099954, "grad_norm": 0.3376728892326355, "learning_rate": 4.585228196640387e-05, "loss": 0.1973, "step": 14991 }, { "epoch": 0.26739913673170906, "grad_norm": 0.26291733980178833, "learning_rate": 4.585142331523647e-05, "loss": 0.1653, "step": 14992 }, { "epoch": 0.26741697285342275, "grad_norm": 0.2760443687438965, "learning_rate": 4.585056458324177e-05, "loss": 0.1761, "step": 14993 }, { "epoch": 0.26743480897513644, "grad_norm": 0.2993004024028778, "learning_rate": 4.584970577042307e-05, "loss": 0.2215, "step": 14994 }, { "epoch": 0.26745264509685013, "grad_norm": 0.23597221076488495, "learning_rate": 4.584884687678371e-05, "loss": 0.177, "step": 14995 }, { "epoch": 0.2674704812185638, "grad_norm": 0.24855457246303558, "learning_rate": 4.584798790232702e-05, "loss": 0.218, "step": 14996 }, { "epoch": 0.2674883173402775, "grad_norm": 0.26287418603897095, "learning_rate": 4.584712884705633e-05, "loss": 0.1404, "step": 14997 }, { "epoch": 0.26750615346199125, "grad_norm": 0.24942946434020996, "learning_rate": 4.5846269710974963e-05, "loss": 0.1757, "step": 14998 }, { "epoch": 0.26752398958370494, "grad_norm": 0.29295971989631653, "learning_rate": 4.584541049408626e-05, "loss": 0.2236, "step": 14999 }, { "epoch": 0.2675418257054186, "grad_norm": 0.2524368166923523, "learning_rate": 4.584455119639354e-05, "loss": 0.1359, "step": 15000 }, { "epoch": 0.2675418257054186, "eval_loss": 0.17883256077766418, "eval_runtime": 107.1337, "eval_samples_per_second": 9.558, "eval_steps_per_second": 1.596, "step": 15000 }, { "epoch": 0.2675596618271323, "grad_norm": 0.20551513135433197, "learning_rate": 4.584369181790015e-05, "loss": 0.1701, "step": 15001 }, { "epoch": 0.267577497948846, "grad_norm": 0.33774229884147644, "learning_rate": 4.58428323586094e-05, "loss": 0.1623, "step": 15002 }, { "epoch": 0.2675953340705597, "grad_norm": 0.24167729914188385, "learning_rate": 4.584197281852464e-05, "loss": 0.1653, "step": 15003 }, { "epoch": 0.2676131701922734, "grad_norm": 0.29225096106529236, "learning_rate": 4.584111319764919e-05, "loss": 0.1321, "step": 15004 }, { "epoch": 0.26763100631398706, "grad_norm": 0.2736901640892029, "learning_rate": 4.584025349598639e-05, "loss": 0.181, "step": 15005 }, { "epoch": 0.2676488424357008, "grad_norm": 0.20935440063476562, "learning_rate": 4.5839393713539567e-05, "loss": 0.1692, "step": 15006 }, { "epoch": 0.2676666785574145, "grad_norm": 0.2919246256351471, "learning_rate": 4.583853385031206e-05, "loss": 0.2066, "step": 15007 }, { "epoch": 0.2676845146791282, "grad_norm": 0.2584631145000458, "learning_rate": 4.58376739063072e-05, "loss": 0.2106, "step": 15008 }, { "epoch": 0.26770235080084187, "grad_norm": 0.3070624768733978, "learning_rate": 4.5836813881528313e-05, "loss": 0.165, "step": 15009 }, { "epoch": 0.26772018692255556, "grad_norm": 0.28218236565589905, "learning_rate": 4.583595377597874e-05, "loss": 0.1462, "step": 15010 }, { "epoch": 0.26773802304426925, "grad_norm": 0.2848483920097351, "learning_rate": 4.5835093589661815e-05, "loss": 0.1612, "step": 15011 }, { "epoch": 0.26775585916598293, "grad_norm": 0.3022254705429077, "learning_rate": 4.583423332258087e-05, "loss": 0.1627, "step": 15012 }, { "epoch": 0.2677736952876966, "grad_norm": 0.25852662324905396, "learning_rate": 4.583337297473924e-05, "loss": 0.1949, "step": 15013 }, { "epoch": 0.2677915314094103, "grad_norm": 0.3122718334197998, "learning_rate": 4.5832512546140266e-05, "loss": 0.1799, "step": 15014 }, { "epoch": 0.26780936753112405, "grad_norm": 0.3488292992115021, "learning_rate": 4.583165203678728e-05, "loss": 0.1884, "step": 15015 }, { "epoch": 0.26782720365283774, "grad_norm": 0.25483936071395874, "learning_rate": 4.58307914466836e-05, "loss": 0.205, "step": 15016 }, { "epoch": 0.26784503977455143, "grad_norm": 0.3567766845226288, "learning_rate": 4.582993077583259e-05, "loss": 0.2572, "step": 15017 }, { "epoch": 0.2678628758962651, "grad_norm": 0.27144569158554077, "learning_rate": 4.582907002423757e-05, "loss": 0.2435, "step": 15018 }, { "epoch": 0.2678807120179788, "grad_norm": 0.35764238238334656, "learning_rate": 4.582820919190188e-05, "loss": 0.1753, "step": 15019 }, { "epoch": 0.2678985481396925, "grad_norm": 0.24760153889656067, "learning_rate": 4.5827348278828866e-05, "loss": 0.15, "step": 15020 }, { "epoch": 0.2679163842614062, "grad_norm": 0.23590341210365295, "learning_rate": 4.582648728502185e-05, "loss": 0.1474, "step": 15021 }, { "epoch": 0.26793422038311987, "grad_norm": 0.3167073428630829, "learning_rate": 4.582562621048417e-05, "loss": 0.1975, "step": 15022 }, { "epoch": 0.2679520565048336, "grad_norm": 0.1918695569038391, "learning_rate": 4.5824765055219175e-05, "loss": 0.132, "step": 15023 }, { "epoch": 0.2679698926265473, "grad_norm": 0.2530979812145233, "learning_rate": 4.5823903819230204e-05, "loss": 0.1491, "step": 15024 }, { "epoch": 0.267987728748261, "grad_norm": 0.3486992120742798, "learning_rate": 4.5823042502520585e-05, "loss": 0.2427, "step": 15025 }, { "epoch": 0.2680055648699747, "grad_norm": 0.4126201868057251, "learning_rate": 4.582218110509366e-05, "loss": 0.2455, "step": 15026 }, { "epoch": 0.26802340099168837, "grad_norm": 0.22779715061187744, "learning_rate": 4.582131962695277e-05, "loss": 0.1879, "step": 15027 }, { "epoch": 0.26804123711340205, "grad_norm": 0.27447912096977234, "learning_rate": 4.582045806810125e-05, "loss": 0.2182, "step": 15028 }, { "epoch": 0.26805907323511574, "grad_norm": 0.27242714166641235, "learning_rate": 4.581959642854245e-05, "loss": 0.147, "step": 15029 }, { "epoch": 0.26807690935682943, "grad_norm": 0.22823967039585114, "learning_rate": 4.5818734708279696e-05, "loss": 0.141, "step": 15030 }, { "epoch": 0.2680947454785432, "grad_norm": 0.29453587532043457, "learning_rate": 4.581787290731634e-05, "loss": 0.1765, "step": 15031 }, { "epoch": 0.26811258160025686, "grad_norm": 0.4526078999042511, "learning_rate": 4.581701102565572e-05, "loss": 0.1799, "step": 15032 }, { "epoch": 0.26813041772197055, "grad_norm": 0.3336029350757599, "learning_rate": 4.5816149063301175e-05, "loss": 0.2432, "step": 15033 }, { "epoch": 0.26814825384368424, "grad_norm": 0.45929092168807983, "learning_rate": 4.581528702025604e-05, "loss": 0.2501, "step": 15034 }, { "epoch": 0.2681660899653979, "grad_norm": 0.2934766411781311, "learning_rate": 4.581442489652367e-05, "loss": 0.2089, "step": 15035 }, { "epoch": 0.2681839260871116, "grad_norm": 0.1741962432861328, "learning_rate": 4.58135626921074e-05, "loss": 0.1378, "step": 15036 }, { "epoch": 0.2682017622088253, "grad_norm": 0.3707410991191864, "learning_rate": 4.581270040701057e-05, "loss": 0.2242, "step": 15037 }, { "epoch": 0.268219598330539, "grad_norm": 0.21595703065395355, "learning_rate": 4.581183804123652e-05, "loss": 0.1724, "step": 15038 }, { "epoch": 0.2682374344522527, "grad_norm": 0.25424498319625854, "learning_rate": 4.5810975594788606e-05, "loss": 0.1751, "step": 15039 }, { "epoch": 0.2682552705739664, "grad_norm": 0.2081408053636551, "learning_rate": 4.581011306767016e-05, "loss": 0.1529, "step": 15040 }, { "epoch": 0.2682731066956801, "grad_norm": 0.2633403241634369, "learning_rate": 4.580925045988453e-05, "loss": 0.1471, "step": 15041 }, { "epoch": 0.2682909428173938, "grad_norm": 0.2900846302509308, "learning_rate": 4.580838777143506e-05, "loss": 0.1743, "step": 15042 }, { "epoch": 0.2683087789391075, "grad_norm": 0.3206022381782532, "learning_rate": 4.580752500232508e-05, "loss": 0.1675, "step": 15043 }, { "epoch": 0.26832661506082117, "grad_norm": 0.26511016488075256, "learning_rate": 4.580666215255796e-05, "loss": 0.1284, "step": 15044 }, { "epoch": 0.26834445118253486, "grad_norm": 0.27598950266838074, "learning_rate": 4.5805799222137025e-05, "loss": 0.2067, "step": 15045 }, { "epoch": 0.26836228730424855, "grad_norm": 0.3592627942562103, "learning_rate": 4.580493621106562e-05, "loss": 0.2508, "step": 15046 }, { "epoch": 0.26838012342596224, "grad_norm": 0.27034735679626465, "learning_rate": 4.58040731193471e-05, "loss": 0.2061, "step": 15047 }, { "epoch": 0.268397959547676, "grad_norm": 0.26650625467300415, "learning_rate": 4.5803209946984814e-05, "loss": 0.1492, "step": 15048 }, { "epoch": 0.26841579566938967, "grad_norm": 0.28974902629852295, "learning_rate": 4.58023466939821e-05, "loss": 0.1454, "step": 15049 }, { "epoch": 0.26843363179110336, "grad_norm": 0.4166126549243927, "learning_rate": 4.58014833603423e-05, "loss": 0.2169, "step": 15050 }, { "epoch": 0.26845146791281704, "grad_norm": 0.21722766757011414, "learning_rate": 4.5800619946068766e-05, "loss": 0.1607, "step": 15051 }, { "epoch": 0.26846930403453073, "grad_norm": 0.2337527573108673, "learning_rate": 4.579975645116484e-05, "loss": 0.2084, "step": 15052 }, { "epoch": 0.2684871401562444, "grad_norm": 0.3176339566707611, "learning_rate": 4.579889287563389e-05, "loss": 0.2024, "step": 15053 }, { "epoch": 0.2685049762779581, "grad_norm": 0.30657845735549927, "learning_rate": 4.579802921947924e-05, "loss": 0.2619, "step": 15054 }, { "epoch": 0.2685228123996718, "grad_norm": 0.24663479626178741, "learning_rate": 4.579716548270424e-05, "loss": 0.1725, "step": 15055 }, { "epoch": 0.2685406485213855, "grad_norm": 0.24212126433849335, "learning_rate": 4.5796301665312244e-05, "loss": 0.1862, "step": 15056 }, { "epoch": 0.2685584846430992, "grad_norm": 0.3466230034828186, "learning_rate": 4.579543776730661e-05, "loss": 0.2001, "step": 15057 }, { "epoch": 0.2685763207648129, "grad_norm": 0.2574392557144165, "learning_rate": 4.579457378869066e-05, "loss": 0.1948, "step": 15058 }, { "epoch": 0.2685941568865266, "grad_norm": 0.3334366977214813, "learning_rate": 4.5793709729467776e-05, "loss": 0.1918, "step": 15059 }, { "epoch": 0.2686119930082403, "grad_norm": 0.3107346296310425, "learning_rate": 4.579284558964129e-05, "loss": 0.1651, "step": 15060 }, { "epoch": 0.268629829129954, "grad_norm": 0.2981800138950348, "learning_rate": 4.5791981369214546e-05, "loss": 0.2316, "step": 15061 }, { "epoch": 0.26864766525166767, "grad_norm": 0.351725697517395, "learning_rate": 4.57911170681909e-05, "loss": 0.1866, "step": 15062 }, { "epoch": 0.26866550137338135, "grad_norm": 0.5415006279945374, "learning_rate": 4.5790252686573705e-05, "loss": 0.232, "step": 15063 }, { "epoch": 0.26868333749509504, "grad_norm": 0.259854257106781, "learning_rate": 4.5789388224366315e-05, "loss": 0.1502, "step": 15064 }, { "epoch": 0.2687011736168088, "grad_norm": 0.3185490369796753, "learning_rate": 4.578852368157207e-05, "loss": 0.2438, "step": 15065 }, { "epoch": 0.2687190097385225, "grad_norm": 0.40953490138053894, "learning_rate": 4.578765905819432e-05, "loss": 0.2136, "step": 15066 }, { "epoch": 0.26873684586023616, "grad_norm": 0.22106356918811798, "learning_rate": 4.578679435423644e-05, "loss": 0.2012, "step": 15067 }, { "epoch": 0.26875468198194985, "grad_norm": 0.2520470917224884, "learning_rate": 4.578592956970176e-05, "loss": 0.1761, "step": 15068 }, { "epoch": 0.26877251810366354, "grad_norm": 0.33525723218917847, "learning_rate": 4.578506470459363e-05, "loss": 0.2346, "step": 15069 }, { "epoch": 0.2687903542253772, "grad_norm": 0.267304927110672, "learning_rate": 4.578419975891542e-05, "loss": 0.2033, "step": 15070 }, { "epoch": 0.2688081903470909, "grad_norm": 0.2740553319454193, "learning_rate": 4.578333473267047e-05, "loss": 0.1558, "step": 15071 }, { "epoch": 0.2688260264688046, "grad_norm": 0.3099888563156128, "learning_rate": 4.578246962586213e-05, "loss": 0.1418, "step": 15072 }, { "epoch": 0.26884386259051835, "grad_norm": 0.3670021593570709, "learning_rate": 4.5781604438493764e-05, "loss": 0.1844, "step": 15073 }, { "epoch": 0.26886169871223203, "grad_norm": 0.2935854494571686, "learning_rate": 4.578073917056872e-05, "loss": 0.1374, "step": 15074 }, { "epoch": 0.2688795348339457, "grad_norm": 0.24012885987758636, "learning_rate": 4.577987382209036e-05, "loss": 0.2023, "step": 15075 }, { "epoch": 0.2688973709556594, "grad_norm": 0.2169635146856308, "learning_rate": 4.5779008393062026e-05, "loss": 0.1584, "step": 15076 }, { "epoch": 0.2689152070773731, "grad_norm": 0.31807398796081543, "learning_rate": 4.577814288348708e-05, "loss": 0.1896, "step": 15077 }, { "epoch": 0.2689330431990868, "grad_norm": 0.2244410216808319, "learning_rate": 4.577727729336888e-05, "loss": 0.157, "step": 15078 }, { "epoch": 0.2689508793208005, "grad_norm": 0.30356472730636597, "learning_rate": 4.577641162271077e-05, "loss": 0.1837, "step": 15079 }, { "epoch": 0.26896871544251416, "grad_norm": 0.37213581800460815, "learning_rate": 4.5775545871516115e-05, "loss": 0.2653, "step": 15080 }, { "epoch": 0.26898655156422785, "grad_norm": 0.4679724872112274, "learning_rate": 4.577468003978827e-05, "loss": 0.2525, "step": 15081 }, { "epoch": 0.2690043876859416, "grad_norm": 0.2462843358516693, "learning_rate": 4.577381412753059e-05, "loss": 0.173, "step": 15082 }, { "epoch": 0.2690222238076553, "grad_norm": 0.21170172095298767, "learning_rate": 4.577294813474643e-05, "loss": 0.1549, "step": 15083 }, { "epoch": 0.26904005992936897, "grad_norm": 0.29097288846969604, "learning_rate": 4.577208206143915e-05, "loss": 0.2141, "step": 15084 }, { "epoch": 0.26905789605108266, "grad_norm": 0.3203429579734802, "learning_rate": 4.57712159076121e-05, "loss": 0.2536, "step": 15085 }, { "epoch": 0.26907573217279634, "grad_norm": 0.3312043249607086, "learning_rate": 4.577034967326865e-05, "loss": 0.2015, "step": 15086 }, { "epoch": 0.26909356829451003, "grad_norm": 0.24537771940231323, "learning_rate": 4.576948335841215e-05, "loss": 0.1288, "step": 15087 }, { "epoch": 0.2691114044162237, "grad_norm": 0.2414066642522812, "learning_rate": 4.576861696304595e-05, "loss": 0.1836, "step": 15088 }, { "epoch": 0.2691292405379374, "grad_norm": 0.2669928967952728, "learning_rate": 4.576775048717343e-05, "loss": 0.1354, "step": 15089 }, { "epoch": 0.26914707665965115, "grad_norm": 0.2919745445251465, "learning_rate": 4.576688393079793e-05, "loss": 0.231, "step": 15090 }, { "epoch": 0.26916491278136484, "grad_norm": 0.6345043182373047, "learning_rate": 4.576601729392281e-05, "loss": 0.2241, "step": 15091 }, { "epoch": 0.26918274890307853, "grad_norm": 0.30328649282455444, "learning_rate": 4.5765150576551444e-05, "loss": 0.1834, "step": 15092 }, { "epoch": 0.2692005850247922, "grad_norm": 0.2644284665584564, "learning_rate": 4.576428377868718e-05, "loss": 0.1738, "step": 15093 }, { "epoch": 0.2692184211465059, "grad_norm": 0.3032969534397125, "learning_rate": 4.5763416900333376e-05, "loss": 0.201, "step": 15094 }, { "epoch": 0.2692362572682196, "grad_norm": 0.2551535964012146, "learning_rate": 4.5762549941493406e-05, "loss": 0.1375, "step": 15095 }, { "epoch": 0.2692540933899333, "grad_norm": 0.27850770950317383, "learning_rate": 4.576168290217061e-05, "loss": 0.2196, "step": 15096 }, { "epoch": 0.26927192951164697, "grad_norm": 0.22138451039791107, "learning_rate": 4.576081578236836e-05, "loss": 0.1201, "step": 15097 }, { "epoch": 0.26928976563336066, "grad_norm": 0.3289961516857147, "learning_rate": 4.575994858209003e-05, "loss": 0.1409, "step": 15098 }, { "epoch": 0.2693076017550744, "grad_norm": 0.26809120178222656, "learning_rate": 4.575908130133896e-05, "loss": 0.2042, "step": 15099 }, { "epoch": 0.2693254378767881, "grad_norm": 0.3855358064174652, "learning_rate": 4.575821394011852e-05, "loss": 0.2078, "step": 15100 }, { "epoch": 0.2693432739985018, "grad_norm": 0.20090390741825104, "learning_rate": 4.5757346498432075e-05, "loss": 0.1927, "step": 15101 }, { "epoch": 0.26936111012021546, "grad_norm": 0.32857489585876465, "learning_rate": 4.5756478976282993e-05, "loss": 0.1751, "step": 15102 }, { "epoch": 0.26937894624192915, "grad_norm": 0.26462826132774353, "learning_rate": 4.575561137367462e-05, "loss": 0.1746, "step": 15103 }, { "epoch": 0.26939678236364284, "grad_norm": 0.3513377010822296, "learning_rate": 4.5754743690610324e-05, "loss": 0.2266, "step": 15104 }, { "epoch": 0.2694146184853565, "grad_norm": 0.2739737927913666, "learning_rate": 4.575387592709348e-05, "loss": 0.1854, "step": 15105 }, { "epoch": 0.2694324546070702, "grad_norm": 0.2547748386859894, "learning_rate": 4.575300808312744e-05, "loss": 0.1389, "step": 15106 }, { "epoch": 0.26945029072878396, "grad_norm": 0.2273988425731659, "learning_rate": 4.5752140158715585e-05, "loss": 0.1672, "step": 15107 }, { "epoch": 0.26946812685049765, "grad_norm": 0.24950018525123596, "learning_rate": 4.5751272153861265e-05, "loss": 0.1691, "step": 15108 }, { "epoch": 0.26948596297221133, "grad_norm": 0.273456871509552, "learning_rate": 4.5750404068567845e-05, "loss": 0.1877, "step": 15109 }, { "epoch": 0.269503799093925, "grad_norm": 0.3427591919898987, "learning_rate": 4.574953590283868e-05, "loss": 0.2205, "step": 15110 }, { "epoch": 0.2695216352156387, "grad_norm": 0.2696007788181305, "learning_rate": 4.574866765667716e-05, "loss": 0.1362, "step": 15111 }, { "epoch": 0.2695394713373524, "grad_norm": 0.2822125554084778, "learning_rate": 4.574779933008663e-05, "loss": 0.1755, "step": 15112 }, { "epoch": 0.2695573074590661, "grad_norm": 0.29417094588279724, "learning_rate": 4.5746930923070474e-05, "loss": 0.174, "step": 15113 }, { "epoch": 0.2695751435807798, "grad_norm": 0.32152101397514343, "learning_rate": 4.5746062435632035e-05, "loss": 0.161, "step": 15114 }, { "epoch": 0.26959297970249346, "grad_norm": 0.42608800530433655, "learning_rate": 4.57451938677747e-05, "loss": 0.1339, "step": 15115 }, { "epoch": 0.2696108158242072, "grad_norm": 0.4274521768093109, "learning_rate": 4.574432521950183e-05, "loss": 0.2179, "step": 15116 }, { "epoch": 0.2696286519459209, "grad_norm": 0.20234228670597076, "learning_rate": 4.5743456490816785e-05, "loss": 0.1807, "step": 15117 }, { "epoch": 0.2696464880676346, "grad_norm": 0.18298442661762238, "learning_rate": 4.5742587681722944e-05, "loss": 0.1327, "step": 15118 }, { "epoch": 0.26966432418934827, "grad_norm": 0.2521079480648041, "learning_rate": 4.5741718792223667e-05, "loss": 0.1772, "step": 15119 }, { "epoch": 0.26968216031106196, "grad_norm": 0.2876557409763336, "learning_rate": 4.574084982232232e-05, "loss": 0.189, "step": 15120 }, { "epoch": 0.26969999643277565, "grad_norm": 0.26367124915122986, "learning_rate": 4.5739980772022275e-05, "loss": 0.1821, "step": 15121 }, { "epoch": 0.26971783255448933, "grad_norm": 0.2989930510520935, "learning_rate": 4.573911164132691e-05, "loss": 0.2458, "step": 15122 }, { "epoch": 0.269735668676203, "grad_norm": 0.4386133849620819, "learning_rate": 4.5738242430239574e-05, "loss": 0.2609, "step": 15123 }, { "epoch": 0.26975350479791677, "grad_norm": 0.2555815875530243, "learning_rate": 4.5737373138763654e-05, "loss": 0.1937, "step": 15124 }, { "epoch": 0.26977134091963045, "grad_norm": 0.25332406163215637, "learning_rate": 4.573650376690252e-05, "loss": 0.1808, "step": 15125 }, { "epoch": 0.26978917704134414, "grad_norm": 0.27824124693870544, "learning_rate": 4.573563431465953e-05, "loss": 0.1418, "step": 15126 }, { "epoch": 0.26980701316305783, "grad_norm": 0.3003767132759094, "learning_rate": 4.573476478203805e-05, "loss": 0.2325, "step": 15127 }, { "epoch": 0.2698248492847715, "grad_norm": 0.21168194711208344, "learning_rate": 4.573389516904147e-05, "loss": 0.1574, "step": 15128 }, { "epoch": 0.2698426854064852, "grad_norm": 0.28841474652290344, "learning_rate": 4.573302547567315e-05, "loss": 0.1638, "step": 15129 }, { "epoch": 0.2698605215281989, "grad_norm": 0.24812036752700806, "learning_rate": 4.573215570193646e-05, "loss": 0.1493, "step": 15130 }, { "epoch": 0.2698783576499126, "grad_norm": 0.3030625581741333, "learning_rate": 4.573128584783477e-05, "loss": 0.1714, "step": 15131 }, { "epoch": 0.2698961937716263, "grad_norm": 0.23205551505088806, "learning_rate": 4.573041591337146e-05, "loss": 0.1574, "step": 15132 }, { "epoch": 0.26991402989334, "grad_norm": 0.201072558760643, "learning_rate": 4.5729545898549904e-05, "loss": 0.1601, "step": 15133 }, { "epoch": 0.2699318660150537, "grad_norm": 0.49314627051353455, "learning_rate": 4.5728675803373454e-05, "loss": 0.1971, "step": 15134 }, { "epoch": 0.2699497021367674, "grad_norm": 0.5175096392631531, "learning_rate": 4.572780562784551e-05, "loss": 0.1851, "step": 15135 }, { "epoch": 0.2699675382584811, "grad_norm": 0.23637612164020538, "learning_rate": 4.572693537196942e-05, "loss": 0.1976, "step": 15136 }, { "epoch": 0.26998537438019476, "grad_norm": 0.2552511394023895, "learning_rate": 4.572606503574859e-05, "loss": 0.211, "step": 15137 }, { "epoch": 0.27000321050190845, "grad_norm": 0.2515599727630615, "learning_rate": 4.5725194619186354e-05, "loss": 0.1816, "step": 15138 }, { "epoch": 0.27002104662362214, "grad_norm": 0.4017189145088196, "learning_rate": 4.572432412228612e-05, "loss": 0.1752, "step": 15139 }, { "epoch": 0.27003888274533583, "grad_norm": 0.24660325050354004, "learning_rate": 4.5723453545051236e-05, "loss": 0.1855, "step": 15140 }, { "epoch": 0.27005671886704957, "grad_norm": 0.25051072239875793, "learning_rate": 4.5722582887485085e-05, "loss": 0.2097, "step": 15141 }, { "epoch": 0.27007455498876326, "grad_norm": 0.3942745625972748, "learning_rate": 4.572171214959106e-05, "loss": 0.1987, "step": 15142 }, { "epoch": 0.27009239111047695, "grad_norm": 0.3423575758934021, "learning_rate": 4.572084133137251e-05, "loss": 0.1915, "step": 15143 }, { "epoch": 0.27011022723219064, "grad_norm": 0.20359322428703308, "learning_rate": 4.5719970432832834e-05, "loss": 0.1607, "step": 15144 }, { "epoch": 0.2701280633539043, "grad_norm": 0.5687680840492249, "learning_rate": 4.571909945397539e-05, "loss": 0.2197, "step": 15145 }, { "epoch": 0.270145899475618, "grad_norm": 0.27103379368782043, "learning_rate": 4.571822839480355e-05, "loss": 0.2201, "step": 15146 }, { "epoch": 0.2701637355973317, "grad_norm": 0.2533745765686035, "learning_rate": 4.5717357255320714e-05, "loss": 0.2099, "step": 15147 }, { "epoch": 0.2701815717190454, "grad_norm": 0.3978731632232666, "learning_rate": 4.5716486035530244e-05, "loss": 0.2324, "step": 15148 }, { "epoch": 0.27019940784075913, "grad_norm": 0.2688053548336029, "learning_rate": 4.571561473543552e-05, "loss": 0.1882, "step": 15149 }, { "epoch": 0.2702172439624728, "grad_norm": 0.24664700031280518, "learning_rate": 4.571474335503991e-05, "loss": 0.1757, "step": 15150 }, { "epoch": 0.2702350800841865, "grad_norm": 0.2718179225921631, "learning_rate": 4.57138718943468e-05, "loss": 0.1958, "step": 15151 }, { "epoch": 0.2702529162059002, "grad_norm": 0.245918408036232, "learning_rate": 4.571300035335958e-05, "loss": 0.1562, "step": 15152 }, { "epoch": 0.2702707523276139, "grad_norm": 0.23125310242176056, "learning_rate": 4.5712128732081614e-05, "loss": 0.1108, "step": 15153 }, { "epoch": 0.27028858844932757, "grad_norm": 0.3100898563861847, "learning_rate": 4.571125703051627e-05, "loss": 0.1576, "step": 15154 }, { "epoch": 0.27030642457104126, "grad_norm": 0.2776975631713867, "learning_rate": 4.571038524866695e-05, "loss": 0.1511, "step": 15155 }, { "epoch": 0.27032426069275495, "grad_norm": 0.25399282574653625, "learning_rate": 4.570951338653703e-05, "loss": 0.2046, "step": 15156 }, { "epoch": 0.27034209681446864, "grad_norm": 0.3336469233036041, "learning_rate": 4.570864144412987e-05, "loss": 0.1764, "step": 15157 }, { "epoch": 0.2703599329361824, "grad_norm": 0.3277715742588043, "learning_rate": 4.570776942144888e-05, "loss": 0.1828, "step": 15158 }, { "epoch": 0.27037776905789607, "grad_norm": 0.3869439959526062, "learning_rate": 4.570689731849741e-05, "loss": 0.1889, "step": 15159 }, { "epoch": 0.27039560517960975, "grad_norm": 0.2036662995815277, "learning_rate": 4.570602513527886e-05, "loss": 0.1538, "step": 15160 }, { "epoch": 0.27041344130132344, "grad_norm": 0.27623727917671204, "learning_rate": 4.57051528717966e-05, "loss": 0.1605, "step": 15161 }, { "epoch": 0.27043127742303713, "grad_norm": 0.24336528778076172, "learning_rate": 4.5704280528054015e-05, "loss": 0.1821, "step": 15162 }, { "epoch": 0.2704491135447508, "grad_norm": 0.23564158380031586, "learning_rate": 4.570340810405449e-05, "loss": 0.1986, "step": 15163 }, { "epoch": 0.2704669496664645, "grad_norm": 0.23940153419971466, "learning_rate": 4.570253559980141e-05, "loss": 0.1807, "step": 15164 }, { "epoch": 0.2704847857881782, "grad_norm": 0.253069669008255, "learning_rate": 4.570166301529815e-05, "loss": 0.1841, "step": 15165 }, { "epoch": 0.27050262190989194, "grad_norm": 0.3073471784591675, "learning_rate": 4.570079035054808e-05, "loss": 0.1857, "step": 15166 }, { "epoch": 0.2705204580316056, "grad_norm": 0.3081522285938263, "learning_rate": 4.5699917605554614e-05, "loss": 0.2263, "step": 15167 }, { "epoch": 0.2705382941533193, "grad_norm": 0.29746657609939575, "learning_rate": 4.569904478032111e-05, "loss": 0.1909, "step": 15168 }, { "epoch": 0.270556130275033, "grad_norm": 0.3431158661842346, "learning_rate": 4.569817187485096e-05, "loss": 0.1552, "step": 15169 }, { "epoch": 0.2705739663967467, "grad_norm": 0.37991803884506226, "learning_rate": 4.5697298889147556e-05, "loss": 0.2265, "step": 15170 }, { "epoch": 0.2705918025184604, "grad_norm": 0.2834690809249878, "learning_rate": 4.569642582321426e-05, "loss": 0.1491, "step": 15171 }, { "epoch": 0.27060963864017407, "grad_norm": 0.2840344309806824, "learning_rate": 4.5695552677054474e-05, "loss": 0.1902, "step": 15172 }, { "epoch": 0.27062747476188775, "grad_norm": 0.39299482107162476, "learning_rate": 4.569467945067158e-05, "loss": 0.2496, "step": 15173 }, { "epoch": 0.27064531088360144, "grad_norm": 0.24964050948619843, "learning_rate": 4.569380614406896e-05, "loss": 0.1858, "step": 15174 }, { "epoch": 0.2706631470053152, "grad_norm": 0.2006838023662567, "learning_rate": 4.5692932757249994e-05, "loss": 0.1675, "step": 15175 }, { "epoch": 0.2706809831270289, "grad_norm": 0.33417633175849915, "learning_rate": 4.569205929021808e-05, "loss": 0.1562, "step": 15176 }, { "epoch": 0.27069881924874256, "grad_norm": 0.46648111939430237, "learning_rate": 4.5691185742976596e-05, "loss": 0.1778, "step": 15177 }, { "epoch": 0.27071665537045625, "grad_norm": 0.26930585503578186, "learning_rate": 4.5690312115528936e-05, "loss": 0.2047, "step": 15178 }, { "epoch": 0.27073449149216994, "grad_norm": 0.24441000819206238, "learning_rate": 4.5689438407878464e-05, "loss": 0.1911, "step": 15179 }, { "epoch": 0.2707523276138836, "grad_norm": 0.23304849863052368, "learning_rate": 4.5688564620028595e-05, "loss": 0.2294, "step": 15180 }, { "epoch": 0.2707701637355973, "grad_norm": 0.24206621944904327, "learning_rate": 4.56876907519827e-05, "loss": 0.1727, "step": 15181 }, { "epoch": 0.270787999857311, "grad_norm": 0.23368129134178162, "learning_rate": 4.568681680374417e-05, "loss": 0.1525, "step": 15182 }, { "epoch": 0.27080583597902474, "grad_norm": 0.16269347071647644, "learning_rate": 4.568594277531639e-05, "loss": 0.1354, "step": 15183 }, { "epoch": 0.27082367210073843, "grad_norm": 0.27379167079925537, "learning_rate": 4.5685068666702756e-05, "loss": 0.2198, "step": 15184 }, { "epoch": 0.2708415082224521, "grad_norm": 0.5590804815292358, "learning_rate": 4.568419447790666e-05, "loss": 0.2431, "step": 15185 }, { "epoch": 0.2708593443441658, "grad_norm": 0.33675411343574524, "learning_rate": 4.568332020893147e-05, "loss": 0.167, "step": 15186 }, { "epoch": 0.2708771804658795, "grad_norm": 0.28820088505744934, "learning_rate": 4.568244585978059e-05, "loss": 0.2604, "step": 15187 }, { "epoch": 0.2708950165875932, "grad_norm": 0.29879653453826904, "learning_rate": 4.56815714304574e-05, "loss": 0.2001, "step": 15188 }, { "epoch": 0.2709128527093069, "grad_norm": 0.27476903796195984, "learning_rate": 4.5680696920965304e-05, "loss": 0.2205, "step": 15189 }, { "epoch": 0.27093068883102056, "grad_norm": 0.32600149512290955, "learning_rate": 4.5679822331307684e-05, "loss": 0.1543, "step": 15190 }, { "epoch": 0.2709485249527343, "grad_norm": 0.38172951340675354, "learning_rate": 4.567894766148792e-05, "loss": 0.2909, "step": 15191 }, { "epoch": 0.270966361074448, "grad_norm": 0.24965259432792664, "learning_rate": 4.567807291150943e-05, "loss": 0.1667, "step": 15192 }, { "epoch": 0.2709841971961617, "grad_norm": 0.36808347702026367, "learning_rate": 4.567719808137558e-05, "loss": 0.1784, "step": 15193 }, { "epoch": 0.27100203331787537, "grad_norm": 0.2753356993198395, "learning_rate": 4.5676323171089764e-05, "loss": 0.1845, "step": 15194 }, { "epoch": 0.27101986943958906, "grad_norm": 0.2599482536315918, "learning_rate": 4.567544818065538e-05, "loss": 0.1796, "step": 15195 }, { "epoch": 0.27103770556130274, "grad_norm": 0.2779335677623749, "learning_rate": 4.567457311007582e-05, "loss": 0.2152, "step": 15196 }, { "epoch": 0.27105554168301643, "grad_norm": 0.27003762125968933, "learning_rate": 4.5673697959354464e-05, "loss": 0.1888, "step": 15197 }, { "epoch": 0.2710733778047301, "grad_norm": 0.16075967252254486, "learning_rate": 4.567282272849473e-05, "loss": 0.149, "step": 15198 }, { "epoch": 0.2710912139264438, "grad_norm": 0.28088581562042236, "learning_rate": 4.5671947417499986e-05, "loss": 0.1578, "step": 15199 }, { "epoch": 0.27110905004815755, "grad_norm": 0.4064401090145111, "learning_rate": 4.567107202637364e-05, "loss": 0.1791, "step": 15200 }, { "epoch": 0.27112688616987124, "grad_norm": 0.3230174481868744, "learning_rate": 4.567019655511907e-05, "loss": 0.2062, "step": 15201 }, { "epoch": 0.2711447222915849, "grad_norm": 0.33291521668434143, "learning_rate": 4.566932100373968e-05, "loss": 0.1666, "step": 15202 }, { "epoch": 0.2711625584132986, "grad_norm": 0.2896880805492401, "learning_rate": 4.5668445372238876e-05, "loss": 0.2153, "step": 15203 }, { "epoch": 0.2711803945350123, "grad_norm": 0.28267180919647217, "learning_rate": 4.566756966062002e-05, "loss": 0.1751, "step": 15204 }, { "epoch": 0.271198230656726, "grad_norm": 0.32702046632766724, "learning_rate": 4.566669386888655e-05, "loss": 0.1712, "step": 15205 }, { "epoch": 0.2712160667784397, "grad_norm": 0.27298182249069214, "learning_rate": 4.566581799704182e-05, "loss": 0.1673, "step": 15206 }, { "epoch": 0.27123390290015337, "grad_norm": 0.3961528539657593, "learning_rate": 4.566494204508923e-05, "loss": 0.2041, "step": 15207 }, { "epoch": 0.2712517390218671, "grad_norm": 0.26520565152168274, "learning_rate": 4.56640660130322e-05, "loss": 0.1816, "step": 15208 }, { "epoch": 0.2712695751435808, "grad_norm": 0.24210646748542786, "learning_rate": 4.566318990087412e-05, "loss": 0.1788, "step": 15209 }, { "epoch": 0.2712874112652945, "grad_norm": 0.2602328360080719, "learning_rate": 4.566231370861838e-05, "loss": 0.1949, "step": 15210 }, { "epoch": 0.2713052473870082, "grad_norm": 0.3372642397880554, "learning_rate": 4.566143743626836e-05, "loss": 0.1502, "step": 15211 }, { "epoch": 0.27132308350872186, "grad_norm": 0.36982017755508423, "learning_rate": 4.566056108382748e-05, "loss": 0.1561, "step": 15212 }, { "epoch": 0.27134091963043555, "grad_norm": 0.22503159940242767, "learning_rate": 4.565968465129913e-05, "loss": 0.1762, "step": 15213 }, { "epoch": 0.27135875575214924, "grad_norm": 0.3456066846847534, "learning_rate": 4.565880813868671e-05, "loss": 0.2427, "step": 15214 }, { "epoch": 0.2713765918738629, "grad_norm": 0.2989911437034607, "learning_rate": 4.565793154599361e-05, "loss": 0.183, "step": 15215 }, { "epoch": 0.2713944279955766, "grad_norm": 0.3059474527835846, "learning_rate": 4.5657054873223234e-05, "loss": 0.1904, "step": 15216 }, { "epoch": 0.27141226411729036, "grad_norm": 0.24165715277194977, "learning_rate": 4.565617812037898e-05, "loss": 0.1909, "step": 15217 }, { "epoch": 0.27143010023900405, "grad_norm": 0.2177954465150833, "learning_rate": 4.565530128746424e-05, "loss": 0.2142, "step": 15218 }, { "epoch": 0.27144793636071773, "grad_norm": 0.2765772342681885, "learning_rate": 4.565442437448242e-05, "loss": 0.1707, "step": 15219 }, { "epoch": 0.2714657724824314, "grad_norm": 0.31171122193336487, "learning_rate": 4.565354738143692e-05, "loss": 0.1785, "step": 15220 }, { "epoch": 0.2714836086041451, "grad_norm": 0.22197510302066803, "learning_rate": 4.5652670308331135e-05, "loss": 0.1732, "step": 15221 }, { "epoch": 0.2715014447258588, "grad_norm": 0.2603869140148163, "learning_rate": 4.5651793155168463e-05, "loss": 0.1393, "step": 15222 }, { "epoch": 0.2715192808475725, "grad_norm": 0.3855699896812439, "learning_rate": 4.5650915921952315e-05, "loss": 0.2192, "step": 15223 }, { "epoch": 0.2715371169692862, "grad_norm": 0.2805880308151245, "learning_rate": 4.5650038608686084e-05, "loss": 0.1452, "step": 15224 }, { "epoch": 0.2715549530909999, "grad_norm": 0.23132579028606415, "learning_rate": 4.564916121537317e-05, "loss": 0.216, "step": 15225 }, { "epoch": 0.2715727892127136, "grad_norm": 0.2955576479434967, "learning_rate": 4.564828374201697e-05, "loss": 0.1325, "step": 15226 }, { "epoch": 0.2715906253344273, "grad_norm": 0.38246214389801025, "learning_rate": 4.564740618862089e-05, "loss": 0.2069, "step": 15227 }, { "epoch": 0.271608461456141, "grad_norm": 0.26435205340385437, "learning_rate": 4.564652855518834e-05, "loss": 0.1954, "step": 15228 }, { "epoch": 0.27162629757785467, "grad_norm": 0.2857755124568939, "learning_rate": 4.564565084172271e-05, "loss": 0.1077, "step": 15229 }, { "epoch": 0.27164413369956836, "grad_norm": 0.2121935486793518, "learning_rate": 4.5644773048227406e-05, "loss": 0.1745, "step": 15230 }, { "epoch": 0.27166196982128205, "grad_norm": 0.2694613039493561, "learning_rate": 4.564389517470583e-05, "loss": 0.1895, "step": 15231 }, { "epoch": 0.27167980594299573, "grad_norm": 0.32408225536346436, "learning_rate": 4.564301722116139e-05, "loss": 0.187, "step": 15232 }, { "epoch": 0.2716976420647095, "grad_norm": 0.2772509753704071, "learning_rate": 4.5642139187597484e-05, "loss": 0.1702, "step": 15233 }, { "epoch": 0.27171547818642316, "grad_norm": 0.2935529351234436, "learning_rate": 4.564126107401751e-05, "loss": 0.1639, "step": 15234 }, { "epoch": 0.27173331430813685, "grad_norm": 0.33130311965942383, "learning_rate": 4.564038288042489e-05, "loss": 0.1792, "step": 15235 }, { "epoch": 0.27175115042985054, "grad_norm": 0.30109965801239014, "learning_rate": 4.5639504606823016e-05, "loss": 0.1646, "step": 15236 }, { "epoch": 0.27176898655156423, "grad_norm": 0.3628794848918915, "learning_rate": 4.563862625321529e-05, "loss": 0.2068, "step": 15237 }, { "epoch": 0.2717868226732779, "grad_norm": 0.21711215376853943, "learning_rate": 4.563774781960511e-05, "loss": 0.1474, "step": 15238 }, { "epoch": 0.2718046587949916, "grad_norm": 0.3367788791656494, "learning_rate": 4.563686930599591e-05, "loss": 0.1977, "step": 15239 }, { "epoch": 0.2718224949167053, "grad_norm": 0.374269962310791, "learning_rate": 4.5635990712391064e-05, "loss": 0.1788, "step": 15240 }, { "epoch": 0.271840331038419, "grad_norm": 0.33344024419784546, "learning_rate": 4.5635112038794e-05, "loss": 0.2051, "step": 15241 }, { "epoch": 0.2718581671601327, "grad_norm": 0.27690979838371277, "learning_rate": 4.5634233285208104e-05, "loss": 0.1796, "step": 15242 }, { "epoch": 0.2718760032818464, "grad_norm": 0.37408357858657837, "learning_rate": 4.56333544516368e-05, "loss": 0.1933, "step": 15243 }, { "epoch": 0.2718938394035601, "grad_norm": 0.27190762758255005, "learning_rate": 4.5632475538083486e-05, "loss": 0.2238, "step": 15244 }, { "epoch": 0.2719116755252738, "grad_norm": 0.28789904713630676, "learning_rate": 4.563159654455157e-05, "loss": 0.1419, "step": 15245 }, { "epoch": 0.2719295116469875, "grad_norm": 0.2511211931705475, "learning_rate": 4.563071747104446e-05, "loss": 0.2109, "step": 15246 }, { "epoch": 0.27194734776870116, "grad_norm": 0.3084287941455841, "learning_rate": 4.5629838317565566e-05, "loss": 0.2023, "step": 15247 }, { "epoch": 0.27196518389041485, "grad_norm": 0.41966989636421204, "learning_rate": 4.5628959084118294e-05, "loss": 0.1518, "step": 15248 }, { "epoch": 0.27198302001212854, "grad_norm": 0.2750261127948761, "learning_rate": 4.562807977070604e-05, "loss": 0.1979, "step": 15249 }, { "epoch": 0.2720008561338423, "grad_norm": 0.3373103141784668, "learning_rate": 4.5627200377332235e-05, "loss": 0.1892, "step": 15250 }, { "epoch": 0.27201869225555597, "grad_norm": 0.3220573663711548, "learning_rate": 4.562632090400028e-05, "loss": 0.1906, "step": 15251 }, { "epoch": 0.27203652837726966, "grad_norm": 0.25796929001808167, "learning_rate": 4.562544135071357e-05, "loss": 0.197, "step": 15252 }, { "epoch": 0.27205436449898335, "grad_norm": 0.30647778511047363, "learning_rate": 4.5624561717475535e-05, "loss": 0.184, "step": 15253 }, { "epoch": 0.27207220062069704, "grad_norm": 0.2855292856693268, "learning_rate": 4.562368200428957e-05, "loss": 0.1901, "step": 15254 }, { "epoch": 0.2720900367424107, "grad_norm": 0.295500248670578, "learning_rate": 4.56228022111591e-05, "loss": 0.165, "step": 15255 }, { "epoch": 0.2721078728641244, "grad_norm": 0.3099755346775055, "learning_rate": 4.5621922338087513e-05, "loss": 0.1815, "step": 15256 }, { "epoch": 0.2721257089858381, "grad_norm": 0.28054386377334595, "learning_rate": 4.562104238507824e-05, "loss": 0.1618, "step": 15257 }, { "epoch": 0.2721435451075518, "grad_norm": 0.32812485098838806, "learning_rate": 4.562016235213468e-05, "loss": 0.1871, "step": 15258 }, { "epoch": 0.27216138122926553, "grad_norm": 0.35932666063308716, "learning_rate": 4.561928223926025e-05, "loss": 0.158, "step": 15259 }, { "epoch": 0.2721792173509792, "grad_norm": 0.3113170862197876, "learning_rate": 4.561840204645836e-05, "loss": 0.1981, "step": 15260 }, { "epoch": 0.2721970534726929, "grad_norm": 0.2167622447013855, "learning_rate": 4.5617521773732416e-05, "loss": 0.1643, "step": 15261 }, { "epoch": 0.2722148895944066, "grad_norm": 0.22766174376010895, "learning_rate": 4.561664142108585e-05, "loss": 0.1486, "step": 15262 }, { "epoch": 0.2722327257161203, "grad_norm": 0.2789682149887085, "learning_rate": 4.561576098852206e-05, "loss": 0.2049, "step": 15263 }, { "epoch": 0.27225056183783397, "grad_norm": 0.2753037214279175, "learning_rate": 4.561488047604445e-05, "loss": 0.1539, "step": 15264 }, { "epoch": 0.27226839795954766, "grad_norm": 0.3413105607032776, "learning_rate": 4.561399988365645e-05, "loss": 0.1999, "step": 15265 }, { "epoch": 0.27228623408126135, "grad_norm": 0.31180474162101746, "learning_rate": 4.5613119211361464e-05, "loss": 0.1791, "step": 15266 }, { "epoch": 0.2723040702029751, "grad_norm": 0.2991024851799011, "learning_rate": 4.56122384591629e-05, "loss": 0.1634, "step": 15267 }, { "epoch": 0.2723219063246888, "grad_norm": 0.2772076427936554, "learning_rate": 4.56113576270642e-05, "loss": 0.1863, "step": 15268 }, { "epoch": 0.27233974244640247, "grad_norm": 0.3939161002635956, "learning_rate": 4.5610476715068745e-05, "loss": 0.229, "step": 15269 }, { "epoch": 0.27235757856811615, "grad_norm": 0.2530250549316406, "learning_rate": 4.560959572317996e-05, "loss": 0.1737, "step": 15270 }, { "epoch": 0.27237541468982984, "grad_norm": 0.29270896315574646, "learning_rate": 4.560871465140128e-05, "loss": 0.1937, "step": 15271 }, { "epoch": 0.27239325081154353, "grad_norm": 0.25215375423431396, "learning_rate": 4.5607833499736094e-05, "loss": 0.1648, "step": 15272 }, { "epoch": 0.2724110869332572, "grad_norm": 0.2080620378255844, "learning_rate": 4.5606952268187823e-05, "loss": 0.1888, "step": 15273 }, { "epoch": 0.2724289230549709, "grad_norm": 0.26196444034576416, "learning_rate": 4.5606070956759894e-05, "loss": 0.1699, "step": 15274 }, { "epoch": 0.2724467591766846, "grad_norm": 0.214683398604393, "learning_rate": 4.560518956545572e-05, "loss": 0.1599, "step": 15275 }, { "epoch": 0.27246459529839834, "grad_norm": 0.2834594249725342, "learning_rate": 4.5604308094278706e-05, "loss": 0.1734, "step": 15276 }, { "epoch": 0.272482431420112, "grad_norm": 0.3563289940357208, "learning_rate": 4.5603426543232284e-05, "loss": 0.1581, "step": 15277 }, { "epoch": 0.2725002675418257, "grad_norm": 0.23578090965747833, "learning_rate": 4.5602544912319865e-05, "loss": 0.1598, "step": 15278 }, { "epoch": 0.2725181036635394, "grad_norm": 0.24709348380565643, "learning_rate": 4.560166320154486e-05, "loss": 0.1961, "step": 15279 }, { "epoch": 0.2725359397852531, "grad_norm": 0.240645632147789, "learning_rate": 4.56007814109107e-05, "loss": 0.1769, "step": 15280 }, { "epoch": 0.2725537759069668, "grad_norm": 0.37232837080955505, "learning_rate": 4.559989954042079e-05, "loss": 0.1247, "step": 15281 }, { "epoch": 0.27257161202868047, "grad_norm": 0.3669794499874115, "learning_rate": 4.559901759007855e-05, "loss": 0.2142, "step": 15282 }, { "epoch": 0.27258944815039415, "grad_norm": 0.2820383310317993, "learning_rate": 4.559813555988741e-05, "loss": 0.1648, "step": 15283 }, { "epoch": 0.2726072842721079, "grad_norm": 0.2757430970668793, "learning_rate": 4.5597253449850785e-05, "loss": 0.2069, "step": 15284 }, { "epoch": 0.2726251203938216, "grad_norm": 0.2687505781650543, "learning_rate": 4.559637125997209e-05, "loss": 0.1682, "step": 15285 }, { "epoch": 0.2726429565155353, "grad_norm": 0.2668740451335907, "learning_rate": 4.559548899025474e-05, "loss": 0.1528, "step": 15286 }, { "epoch": 0.27266079263724896, "grad_norm": 0.22336526215076447, "learning_rate": 4.559460664070217e-05, "loss": 0.1671, "step": 15287 }, { "epoch": 0.27267862875896265, "grad_norm": 0.29438310861587524, "learning_rate": 4.5593724211317775e-05, "loss": 0.1762, "step": 15288 }, { "epoch": 0.27269646488067634, "grad_norm": 0.3443536162376404, "learning_rate": 4.5592841702105014e-05, "loss": 0.1756, "step": 15289 }, { "epoch": 0.27271430100239, "grad_norm": 0.3066597580909729, "learning_rate": 4.559195911306727e-05, "loss": 0.2097, "step": 15290 }, { "epoch": 0.2727321371241037, "grad_norm": 0.3857879638671875, "learning_rate": 4.559107644420799e-05, "loss": 0.2785, "step": 15291 }, { "epoch": 0.27274997324581746, "grad_norm": 0.3418494760990143, "learning_rate": 4.559019369553058e-05, "loss": 0.1861, "step": 15292 }, { "epoch": 0.27276780936753114, "grad_norm": 0.27305635809898376, "learning_rate": 4.558931086703847e-05, "loss": 0.2264, "step": 15293 }, { "epoch": 0.27278564548924483, "grad_norm": 0.2765255272388458, "learning_rate": 4.558842795873508e-05, "loss": 0.177, "step": 15294 }, { "epoch": 0.2728034816109585, "grad_norm": 0.32875490188598633, "learning_rate": 4.5587544970623833e-05, "loss": 0.1948, "step": 15295 }, { "epoch": 0.2728213177326722, "grad_norm": 0.29781773686408997, "learning_rate": 4.558666190270815e-05, "loss": 0.1923, "step": 15296 }, { "epoch": 0.2728391538543859, "grad_norm": 0.2744108736515045, "learning_rate": 4.558577875499146e-05, "loss": 0.1761, "step": 15297 }, { "epoch": 0.2728569899760996, "grad_norm": 0.24439026415348053, "learning_rate": 4.5584895527477175e-05, "loss": 0.1542, "step": 15298 }, { "epoch": 0.2728748260978133, "grad_norm": 0.28602665662765503, "learning_rate": 4.558401222016873e-05, "loss": 0.1762, "step": 15299 }, { "epoch": 0.27289266221952696, "grad_norm": 0.21261803805828094, "learning_rate": 4.558312883306953e-05, "loss": 0.1741, "step": 15300 }, { "epoch": 0.2729104983412407, "grad_norm": 0.329238623380661, "learning_rate": 4.558224536618303e-05, "loss": 0.1951, "step": 15301 }, { "epoch": 0.2729283344629544, "grad_norm": 0.20495079457759857, "learning_rate": 4.558136181951263e-05, "loss": 0.147, "step": 15302 }, { "epoch": 0.2729461705846681, "grad_norm": 0.25885844230651855, "learning_rate": 4.558047819306177e-05, "loss": 0.1913, "step": 15303 }, { "epoch": 0.27296400670638177, "grad_norm": 0.22849741578102112, "learning_rate": 4.557959448683386e-05, "loss": 0.0968, "step": 15304 }, { "epoch": 0.27298184282809546, "grad_norm": 0.3090474605560303, "learning_rate": 4.557871070083234e-05, "loss": 0.1379, "step": 15305 }, { "epoch": 0.27299967894980914, "grad_norm": 0.3825876712799072, "learning_rate": 4.5577826835060625e-05, "loss": 0.2259, "step": 15306 }, { "epoch": 0.27301751507152283, "grad_norm": 0.218611940741539, "learning_rate": 4.557694288952215e-05, "loss": 0.1691, "step": 15307 }, { "epoch": 0.2730353511932365, "grad_norm": 0.24547012150287628, "learning_rate": 4.557605886422033e-05, "loss": 0.1994, "step": 15308 }, { "epoch": 0.27305318731495026, "grad_norm": 0.3120046555995941, "learning_rate": 4.5575174759158604e-05, "loss": 0.2648, "step": 15309 }, { "epoch": 0.27307102343666395, "grad_norm": 0.2578418254852295, "learning_rate": 4.5574290574340395e-05, "loss": 0.2256, "step": 15310 }, { "epoch": 0.27308885955837764, "grad_norm": 0.2318110316991806, "learning_rate": 4.557340630976913e-05, "loss": 0.1761, "step": 15311 }, { "epoch": 0.2731066956800913, "grad_norm": 0.2423015534877777, "learning_rate": 4.5572521965448237e-05, "loss": 0.1689, "step": 15312 }, { "epoch": 0.273124531801805, "grad_norm": 0.1946888267993927, "learning_rate": 4.557163754138114e-05, "loss": 0.1528, "step": 15313 }, { "epoch": 0.2731423679235187, "grad_norm": 0.29533758759498596, "learning_rate": 4.557075303757127e-05, "loss": 0.2106, "step": 15314 }, { "epoch": 0.2731602040452324, "grad_norm": 0.26054847240448, "learning_rate": 4.556986845402206e-05, "loss": 0.174, "step": 15315 }, { "epoch": 0.2731780401669461, "grad_norm": 0.27883899211883545, "learning_rate": 4.556898379073693e-05, "loss": 0.2354, "step": 15316 }, { "epoch": 0.27319587628865977, "grad_norm": 0.43866193294525146, "learning_rate": 4.556809904771933e-05, "loss": 0.2317, "step": 15317 }, { "epoch": 0.2732137124103735, "grad_norm": 0.28204163908958435, "learning_rate": 4.5567214224972653e-05, "loss": 0.2131, "step": 15318 }, { "epoch": 0.2732315485320872, "grad_norm": 0.3200967311859131, "learning_rate": 4.5566329322500365e-05, "loss": 0.1531, "step": 15319 }, { "epoch": 0.2732493846538009, "grad_norm": 0.2608736753463745, "learning_rate": 4.556544434030587e-05, "loss": 0.1412, "step": 15320 }, { "epoch": 0.2732672207755146, "grad_norm": 0.30826514959335327, "learning_rate": 4.556455927839261e-05, "loss": 0.2068, "step": 15321 }, { "epoch": 0.27328505689722826, "grad_norm": 0.47373446822166443, "learning_rate": 4.556367413676402e-05, "loss": 0.1911, "step": 15322 }, { "epoch": 0.27330289301894195, "grad_norm": 0.5966895222663879, "learning_rate": 4.556278891542354e-05, "loss": 0.216, "step": 15323 }, { "epoch": 0.27332072914065564, "grad_norm": 0.26217979192733765, "learning_rate": 4.556190361437457e-05, "loss": 0.1843, "step": 15324 }, { "epoch": 0.2733385652623693, "grad_norm": 0.3170042634010315, "learning_rate": 4.5561018233620566e-05, "loss": 0.1974, "step": 15325 }, { "epoch": 0.27335640138408307, "grad_norm": 0.4135138988494873, "learning_rate": 4.556013277316495e-05, "loss": 0.1779, "step": 15326 }, { "epoch": 0.27337423750579676, "grad_norm": 0.17939463257789612, "learning_rate": 4.555924723301116e-05, "loss": 0.1834, "step": 15327 }, { "epoch": 0.27339207362751045, "grad_norm": 0.2103712409734726, "learning_rate": 4.555836161316263e-05, "loss": 0.2001, "step": 15328 }, { "epoch": 0.27340990974922413, "grad_norm": 0.30819979310035706, "learning_rate": 4.5557475913622785e-05, "loss": 0.146, "step": 15329 }, { "epoch": 0.2734277458709378, "grad_norm": 0.3447093069553375, "learning_rate": 4.5556590134395075e-05, "loss": 0.1514, "step": 15330 }, { "epoch": 0.2734455819926515, "grad_norm": 0.23838426172733307, "learning_rate": 4.555570427548291e-05, "loss": 0.187, "step": 15331 }, { "epoch": 0.2734634181143652, "grad_norm": 0.25891539454460144, "learning_rate": 4.555481833688973e-05, "loss": 0.1719, "step": 15332 }, { "epoch": 0.2734812542360789, "grad_norm": 0.3054523766040802, "learning_rate": 4.5553932318618984e-05, "loss": 0.1706, "step": 15333 }, { "epoch": 0.27349909035779263, "grad_norm": 0.36198678612709045, "learning_rate": 4.55530462206741e-05, "loss": 0.2099, "step": 15334 }, { "epoch": 0.2735169264795063, "grad_norm": 0.2814268469810486, "learning_rate": 4.55521600430585e-05, "loss": 0.1516, "step": 15335 }, { "epoch": 0.27353476260122, "grad_norm": 0.3233660161495209, "learning_rate": 4.555127378577564e-05, "loss": 0.1371, "step": 15336 }, { "epoch": 0.2735525987229337, "grad_norm": 0.35173270106315613, "learning_rate": 4.5550387448828936e-05, "loss": 0.2267, "step": 15337 }, { "epoch": 0.2735704348446474, "grad_norm": 0.23610329627990723, "learning_rate": 4.5549501032221836e-05, "loss": 0.197, "step": 15338 }, { "epoch": 0.27358827096636107, "grad_norm": 0.26902562379837036, "learning_rate": 4.554861453595777e-05, "loss": 0.1797, "step": 15339 }, { "epoch": 0.27360610708807476, "grad_norm": 0.28548333048820496, "learning_rate": 4.5547727960040185e-05, "loss": 0.1887, "step": 15340 }, { "epoch": 0.27362394320978844, "grad_norm": 0.2753860652446747, "learning_rate": 4.55468413044725e-05, "loss": 0.2439, "step": 15341 }, { "epoch": 0.27364177933150213, "grad_norm": 0.25360167026519775, "learning_rate": 4.5545954569258163e-05, "loss": 0.1943, "step": 15342 }, { "epoch": 0.2736596154532159, "grad_norm": 0.26458024978637695, "learning_rate": 4.554506775440062e-05, "loss": 0.1502, "step": 15343 }, { "epoch": 0.27367745157492956, "grad_norm": 0.19540803134441376, "learning_rate": 4.554418085990328e-05, "loss": 0.1574, "step": 15344 }, { "epoch": 0.27369528769664325, "grad_norm": 0.428216814994812, "learning_rate": 4.554329388576961e-05, "loss": 0.1838, "step": 15345 }, { "epoch": 0.27371312381835694, "grad_norm": 0.24133911728858948, "learning_rate": 4.5542406832003035e-05, "loss": 0.2111, "step": 15346 }, { "epoch": 0.27373095994007063, "grad_norm": 0.37168097496032715, "learning_rate": 4.554151969860701e-05, "loss": 0.2317, "step": 15347 }, { "epoch": 0.2737487960617843, "grad_norm": 0.248093381524086, "learning_rate": 4.5540632485584944e-05, "loss": 0.1809, "step": 15348 }, { "epoch": 0.273766632183498, "grad_norm": 0.303943395614624, "learning_rate": 4.55397451929403e-05, "loss": 0.1953, "step": 15349 }, { "epoch": 0.2737844683052117, "grad_norm": 0.30130651593208313, "learning_rate": 4.5538857820676495e-05, "loss": 0.178, "step": 15350 }, { "epoch": 0.27380230442692544, "grad_norm": 0.24781788885593414, "learning_rate": 4.5537970368796995e-05, "loss": 0.1645, "step": 15351 }, { "epoch": 0.2738201405486391, "grad_norm": 0.30300620198249817, "learning_rate": 4.553708283730523e-05, "loss": 0.205, "step": 15352 }, { "epoch": 0.2738379766703528, "grad_norm": 0.2765817642211914, "learning_rate": 4.5536195226204634e-05, "loss": 0.1982, "step": 15353 }, { "epoch": 0.2738558127920665, "grad_norm": 0.23178695142269135, "learning_rate": 4.553530753549865e-05, "loss": 0.1983, "step": 15354 }, { "epoch": 0.2738736489137802, "grad_norm": 0.34527337551116943, "learning_rate": 4.553441976519073e-05, "loss": 0.1876, "step": 15355 }, { "epoch": 0.2738914850354939, "grad_norm": 0.5574958920478821, "learning_rate": 4.55335319152843e-05, "loss": 0.252, "step": 15356 }, { "epoch": 0.27390932115720756, "grad_norm": 0.27578604221343994, "learning_rate": 4.553264398578281e-05, "loss": 0.2463, "step": 15357 }, { "epoch": 0.27392715727892125, "grad_norm": 0.2662498652935028, "learning_rate": 4.55317559766897e-05, "loss": 0.2237, "step": 15358 }, { "epoch": 0.27394499340063494, "grad_norm": 0.25444018840789795, "learning_rate": 4.553086788800841e-05, "loss": 0.1478, "step": 15359 }, { "epoch": 0.2739628295223487, "grad_norm": 0.31608253717422485, "learning_rate": 4.552997971974239e-05, "loss": 0.1769, "step": 15360 }, { "epoch": 0.27398066564406237, "grad_norm": 0.20223009586334229, "learning_rate": 4.552909147189507e-05, "loss": 0.1619, "step": 15361 }, { "epoch": 0.27399850176577606, "grad_norm": 0.353495717048645, "learning_rate": 4.55282031444699e-05, "loss": 0.1505, "step": 15362 }, { "epoch": 0.27401633788748975, "grad_norm": 0.35909226536750793, "learning_rate": 4.552731473747034e-05, "loss": 0.2422, "step": 15363 }, { "epoch": 0.27403417400920344, "grad_norm": 0.2929408848285675, "learning_rate": 4.552642625089981e-05, "loss": 0.1617, "step": 15364 }, { "epoch": 0.2740520101309171, "grad_norm": 0.27406468987464905, "learning_rate": 4.5525537684761754e-05, "loss": 0.2184, "step": 15365 }, { "epoch": 0.2740698462526308, "grad_norm": 0.27739614248275757, "learning_rate": 4.552464903905964e-05, "loss": 0.2242, "step": 15366 }, { "epoch": 0.2740876823743445, "grad_norm": 0.28953036665916443, "learning_rate": 4.5523760313796884e-05, "loss": 0.174, "step": 15367 }, { "epoch": 0.27410551849605824, "grad_norm": 0.23054704070091248, "learning_rate": 4.552287150897695e-05, "loss": 0.1622, "step": 15368 }, { "epoch": 0.27412335461777193, "grad_norm": 0.2942816913127899, "learning_rate": 4.5521982624603274e-05, "loss": 0.2083, "step": 15369 }, { "epoch": 0.2741411907394856, "grad_norm": 0.28185173869132996, "learning_rate": 4.552109366067931e-05, "loss": 0.2161, "step": 15370 }, { "epoch": 0.2741590268611993, "grad_norm": 0.29522964358329773, "learning_rate": 4.5520204617208496e-05, "loss": 0.1486, "step": 15371 }, { "epoch": 0.274176862982913, "grad_norm": 0.22718365490436554, "learning_rate": 4.551931549419428e-05, "loss": 0.1784, "step": 15372 }, { "epoch": 0.2741946991046267, "grad_norm": 0.32544374465942383, "learning_rate": 4.551842629164012e-05, "loss": 0.1805, "step": 15373 }, { "epoch": 0.27421253522634037, "grad_norm": 0.1977655440568924, "learning_rate": 4.5517537009549436e-05, "loss": 0.1325, "step": 15374 }, { "epoch": 0.27423037134805406, "grad_norm": 0.3883865773677826, "learning_rate": 4.551664764792571e-05, "loss": 0.2036, "step": 15375 }, { "epoch": 0.27424820746976775, "grad_norm": 0.23773835599422455, "learning_rate": 4.5515758206772364e-05, "loss": 0.1717, "step": 15376 }, { "epoch": 0.2742660435914815, "grad_norm": 0.2205808162689209, "learning_rate": 4.551486868609285e-05, "loss": 0.1158, "step": 15377 }, { "epoch": 0.2742838797131952, "grad_norm": 0.2984471619129181, "learning_rate": 4.5513979085890626e-05, "loss": 0.1635, "step": 15378 }, { "epoch": 0.27430171583490887, "grad_norm": 0.270412415266037, "learning_rate": 4.551308940616912e-05, "loss": 0.195, "step": 15379 }, { "epoch": 0.27431955195662255, "grad_norm": 0.24950411915779114, "learning_rate": 4.5512199646931807e-05, "loss": 0.1818, "step": 15380 }, { "epoch": 0.27433738807833624, "grad_norm": 0.25902676582336426, "learning_rate": 4.5511309808182125e-05, "loss": 0.1581, "step": 15381 }, { "epoch": 0.27435522420004993, "grad_norm": 0.24803592264652252, "learning_rate": 4.551041988992352e-05, "loss": 0.1702, "step": 15382 }, { "epoch": 0.2743730603217636, "grad_norm": 0.2569483518600464, "learning_rate": 4.5509529892159435e-05, "loss": 0.1739, "step": 15383 }, { "epoch": 0.2743908964434773, "grad_norm": 0.2723129987716675, "learning_rate": 4.550863981489333e-05, "loss": 0.236, "step": 15384 }, { "epoch": 0.27440873256519105, "grad_norm": 0.21242138743400574, "learning_rate": 4.550774965812866e-05, "loss": 0.1555, "step": 15385 }, { "epoch": 0.27442656868690474, "grad_norm": 0.3287215232849121, "learning_rate": 4.550685942186887e-05, "loss": 0.2197, "step": 15386 }, { "epoch": 0.2744444048086184, "grad_norm": 0.23314982652664185, "learning_rate": 4.550596910611741e-05, "loss": 0.1672, "step": 15387 }, { "epoch": 0.2744622409303321, "grad_norm": 0.35281679034233093, "learning_rate": 4.5505078710877726e-05, "loss": 0.2133, "step": 15388 }, { "epoch": 0.2744800770520458, "grad_norm": 0.31634366512298584, "learning_rate": 4.550418823615327e-05, "loss": 0.1904, "step": 15389 }, { "epoch": 0.2744979131737595, "grad_norm": 0.22860662639141083, "learning_rate": 4.550329768194751e-05, "loss": 0.162, "step": 15390 }, { "epoch": 0.2745157492954732, "grad_norm": 0.25230541825294495, "learning_rate": 4.5502407048263875e-05, "loss": 0.1578, "step": 15391 }, { "epoch": 0.27453358541718687, "grad_norm": 0.30084991455078125, "learning_rate": 4.550151633510584e-05, "loss": 0.1828, "step": 15392 }, { "epoch": 0.2745514215389006, "grad_norm": 0.2516980469226837, "learning_rate": 4.550062554247684e-05, "loss": 0.1682, "step": 15393 }, { "epoch": 0.2745692576606143, "grad_norm": 0.39343249797821045, "learning_rate": 4.549973467038034e-05, "loss": 0.1658, "step": 15394 }, { "epoch": 0.274587093782328, "grad_norm": 0.23363885283470154, "learning_rate": 4.549884371881978e-05, "loss": 0.1896, "step": 15395 }, { "epoch": 0.2746049299040417, "grad_norm": 0.24483560025691986, "learning_rate": 4.549795268779863e-05, "loss": 0.1577, "step": 15396 }, { "epoch": 0.27462276602575536, "grad_norm": 0.2151115983724594, "learning_rate": 4.549706157732033e-05, "loss": 0.1572, "step": 15397 }, { "epoch": 0.27464060214746905, "grad_norm": 0.3536268174648285, "learning_rate": 4.549617038738835e-05, "loss": 0.1829, "step": 15398 }, { "epoch": 0.27465843826918274, "grad_norm": 0.2710700035095215, "learning_rate": 4.5495279118006123e-05, "loss": 0.1582, "step": 15399 }, { "epoch": 0.2746762743908964, "grad_norm": 0.25989460945129395, "learning_rate": 4.549438776917712e-05, "loss": 0.1704, "step": 15400 }, { "epoch": 0.2746941105126101, "grad_norm": 0.43564146757125854, "learning_rate": 4.54934963409048e-05, "loss": 0.2206, "step": 15401 }, { "epoch": 0.27471194663432386, "grad_norm": 0.26961979269981384, "learning_rate": 4.549260483319259e-05, "loss": 0.1404, "step": 15402 }, { "epoch": 0.27472978275603754, "grad_norm": 0.2905063033103943, "learning_rate": 4.5491713246043975e-05, "loss": 0.2108, "step": 15403 }, { "epoch": 0.27474761887775123, "grad_norm": 0.25492873787879944, "learning_rate": 4.549082157946241e-05, "loss": 0.1747, "step": 15404 }, { "epoch": 0.2747654549994649, "grad_norm": 0.35250574350357056, "learning_rate": 4.548992983345133e-05, "loss": 0.1797, "step": 15405 }, { "epoch": 0.2747832911211786, "grad_norm": 0.35146665573120117, "learning_rate": 4.5489038008014214e-05, "loss": 0.1605, "step": 15406 }, { "epoch": 0.2748011272428923, "grad_norm": 0.2627125680446625, "learning_rate": 4.5488146103154506e-05, "loss": 0.1681, "step": 15407 }, { "epoch": 0.274818963364606, "grad_norm": 0.2415877878665924, "learning_rate": 4.548725411887567e-05, "loss": 0.1982, "step": 15408 }, { "epoch": 0.27483679948631967, "grad_norm": 0.2854273319244385, "learning_rate": 4.5486362055181155e-05, "loss": 0.1669, "step": 15409 }, { "epoch": 0.2748546356080334, "grad_norm": 0.29682010412216187, "learning_rate": 4.548546991207444e-05, "loss": 0.1937, "step": 15410 }, { "epoch": 0.2748724717297471, "grad_norm": 0.3292175829410553, "learning_rate": 4.548457768955895e-05, "loss": 0.2037, "step": 15411 }, { "epoch": 0.2748903078514608, "grad_norm": 0.3024621605873108, "learning_rate": 4.548368538763817e-05, "loss": 0.1943, "step": 15412 }, { "epoch": 0.2749081439731745, "grad_norm": 0.3101816475391388, "learning_rate": 4.548279300631555e-05, "loss": 0.1604, "step": 15413 }, { "epoch": 0.27492598009488817, "grad_norm": 0.30465930700302124, "learning_rate": 4.548190054559455e-05, "loss": 0.2189, "step": 15414 }, { "epoch": 0.27494381621660186, "grad_norm": 0.26891347765922546, "learning_rate": 4.548100800547863e-05, "loss": 0.2279, "step": 15415 }, { "epoch": 0.27496165233831554, "grad_norm": 0.2779199779033661, "learning_rate": 4.548011538597124e-05, "loss": 0.1995, "step": 15416 }, { "epoch": 0.27497948846002923, "grad_norm": 0.45538026094436646, "learning_rate": 4.547922268707586e-05, "loss": 0.1889, "step": 15417 }, { "epoch": 0.2749973245817429, "grad_norm": 0.6173567771911621, "learning_rate": 4.547832990879594e-05, "loss": 0.1916, "step": 15418 }, { "epoch": 0.27501516070345666, "grad_norm": 0.23143932223320007, "learning_rate": 4.547743705113494e-05, "loss": 0.1951, "step": 15419 }, { "epoch": 0.27503299682517035, "grad_norm": 0.24382197856903076, "learning_rate": 4.547654411409632e-05, "loss": 0.1634, "step": 15420 }, { "epoch": 0.27505083294688404, "grad_norm": 0.28351467847824097, "learning_rate": 4.5475651097683534e-05, "loss": 0.1678, "step": 15421 }, { "epoch": 0.2750686690685977, "grad_norm": 0.2747480571269989, "learning_rate": 4.547475800190006e-05, "loss": 0.1621, "step": 15422 }, { "epoch": 0.2750865051903114, "grad_norm": 0.29847392439842224, "learning_rate": 4.5473864826749354e-05, "loss": 0.221, "step": 15423 }, { "epoch": 0.2751043413120251, "grad_norm": 0.22456280887126923, "learning_rate": 4.547297157223488e-05, "loss": 0.2083, "step": 15424 }, { "epoch": 0.2751221774337388, "grad_norm": 0.2684950828552246, "learning_rate": 4.547207823836009e-05, "loss": 0.2121, "step": 15425 }, { "epoch": 0.2751400135554525, "grad_norm": 0.31478646397590637, "learning_rate": 4.5471184825128454e-05, "loss": 0.1617, "step": 15426 }, { "epoch": 0.2751578496771662, "grad_norm": 0.24621573090553284, "learning_rate": 4.5470291332543434e-05, "loss": 0.1471, "step": 15427 }, { "epoch": 0.2751756857988799, "grad_norm": 0.2672198712825775, "learning_rate": 4.54693977606085e-05, "loss": 0.1447, "step": 15428 }, { "epoch": 0.2751935219205936, "grad_norm": 0.32548636198043823, "learning_rate": 4.54685041093271e-05, "loss": 0.2174, "step": 15429 }, { "epoch": 0.2752113580423073, "grad_norm": 0.2495693415403366, "learning_rate": 4.546761037870272e-05, "loss": 0.176, "step": 15430 }, { "epoch": 0.275229194164021, "grad_norm": 0.27465543150901794, "learning_rate": 4.546671656873881e-05, "loss": 0.1838, "step": 15431 }, { "epoch": 0.27524703028573466, "grad_norm": 0.3896167278289795, "learning_rate": 4.546582267943883e-05, "loss": 0.2049, "step": 15432 }, { "epoch": 0.27526486640744835, "grad_norm": 0.33411529660224915, "learning_rate": 4.546492871080627e-05, "loss": 0.188, "step": 15433 }, { "epoch": 0.27528270252916204, "grad_norm": 0.3175964653491974, "learning_rate": 4.546403466284456e-05, "loss": 0.2119, "step": 15434 }, { "epoch": 0.2753005386508758, "grad_norm": 0.2661793529987335, "learning_rate": 4.546314053555719e-05, "loss": 0.1817, "step": 15435 }, { "epoch": 0.27531837477258947, "grad_norm": 0.36906781792640686, "learning_rate": 4.546224632894762e-05, "loss": 0.2069, "step": 15436 }, { "epoch": 0.27533621089430316, "grad_norm": 0.27329039573669434, "learning_rate": 4.546135204301931e-05, "loss": 0.1883, "step": 15437 }, { "epoch": 0.27535404701601685, "grad_norm": 0.2112458050251007, "learning_rate": 4.5460457677775746e-05, "loss": 0.1551, "step": 15438 }, { "epoch": 0.27537188313773053, "grad_norm": 0.2697179913520813, "learning_rate": 4.545956323322037e-05, "loss": 0.2061, "step": 15439 }, { "epoch": 0.2753897192594442, "grad_norm": 0.302211195230484, "learning_rate": 4.5458668709356664e-05, "loss": 0.1756, "step": 15440 }, { "epoch": 0.2754075553811579, "grad_norm": 0.26720723509788513, "learning_rate": 4.545777410618809e-05, "loss": 0.1657, "step": 15441 }, { "epoch": 0.2754253915028716, "grad_norm": 0.30111950635910034, "learning_rate": 4.5456879423718126e-05, "loss": 0.2163, "step": 15442 }, { "epoch": 0.2754432276245853, "grad_norm": 0.24537427723407745, "learning_rate": 4.545598466195022e-05, "loss": 0.1667, "step": 15443 }, { "epoch": 0.27546106374629903, "grad_norm": 0.3326100707054138, "learning_rate": 4.5455089820887853e-05, "loss": 0.2425, "step": 15444 }, { "epoch": 0.2754788998680127, "grad_norm": 0.2835599184036255, "learning_rate": 4.5454194900534495e-05, "loss": 0.2094, "step": 15445 }, { "epoch": 0.2754967359897264, "grad_norm": 0.250205397605896, "learning_rate": 4.545329990089362e-05, "loss": 0.162, "step": 15446 }, { "epoch": 0.2755145721114401, "grad_norm": 0.34966766834259033, "learning_rate": 4.545240482196868e-05, "loss": 0.1729, "step": 15447 }, { "epoch": 0.2755324082331538, "grad_norm": 0.3049478530883789, "learning_rate": 4.545150966376317e-05, "loss": 0.1948, "step": 15448 }, { "epoch": 0.27555024435486747, "grad_norm": 0.22935433685779572, "learning_rate": 4.545061442628054e-05, "loss": 0.2233, "step": 15449 }, { "epoch": 0.27556808047658116, "grad_norm": 0.19765464961528778, "learning_rate": 4.5449719109524245e-05, "loss": 0.1745, "step": 15450 }, { "epoch": 0.27558591659829484, "grad_norm": 0.21852731704711914, "learning_rate": 4.54488237134978e-05, "loss": 0.1762, "step": 15451 }, { "epoch": 0.2756037527200086, "grad_norm": 0.31670308113098145, "learning_rate": 4.544792823820464e-05, "loss": 0.1639, "step": 15452 }, { "epoch": 0.2756215888417223, "grad_norm": 0.28105905652046204, "learning_rate": 4.544703268364825e-05, "loss": 0.2124, "step": 15453 }, { "epoch": 0.27563942496343596, "grad_norm": 0.26987016201019287, "learning_rate": 4.544613704983209e-05, "loss": 0.1766, "step": 15454 }, { "epoch": 0.27565726108514965, "grad_norm": 0.2731398046016693, "learning_rate": 4.544524133675966e-05, "loss": 0.166, "step": 15455 }, { "epoch": 0.27567509720686334, "grad_norm": 0.26097047328948975, "learning_rate": 4.5444345544434395e-05, "loss": 0.1943, "step": 15456 }, { "epoch": 0.27569293332857703, "grad_norm": 0.31116968393325806, "learning_rate": 4.544344967285979e-05, "loss": 0.2096, "step": 15457 }, { "epoch": 0.2757107694502907, "grad_norm": 0.26668640971183777, "learning_rate": 4.5442553722039327e-05, "loss": 0.1619, "step": 15458 }, { "epoch": 0.2757286055720044, "grad_norm": 0.3781563639640808, "learning_rate": 4.544165769197645e-05, "loss": 0.1345, "step": 15459 }, { "epoch": 0.2757464416937181, "grad_norm": 0.247103750705719, "learning_rate": 4.544076158267465e-05, "loss": 0.1285, "step": 15460 }, { "epoch": 0.27576427781543184, "grad_norm": 0.28072378039360046, "learning_rate": 4.54398653941374e-05, "loss": 0.1613, "step": 15461 }, { "epoch": 0.2757821139371455, "grad_norm": 0.2977953553199768, "learning_rate": 4.543896912636817e-05, "loss": 0.1716, "step": 15462 }, { "epoch": 0.2757999500588592, "grad_norm": 0.2700483798980713, "learning_rate": 4.543807277937044e-05, "loss": 0.1382, "step": 15463 }, { "epoch": 0.2758177861805729, "grad_norm": 0.31720682978630066, "learning_rate": 4.543717635314768e-05, "loss": 0.1381, "step": 15464 }, { "epoch": 0.2758356223022866, "grad_norm": 0.2514737844467163, "learning_rate": 4.5436279847703364e-05, "loss": 0.182, "step": 15465 }, { "epoch": 0.2758534584240003, "grad_norm": 0.40441063046455383, "learning_rate": 4.5435383263040975e-05, "loss": 0.1906, "step": 15466 }, { "epoch": 0.27587129454571396, "grad_norm": 0.3640082776546478, "learning_rate": 4.5434486599163977e-05, "loss": 0.1983, "step": 15467 }, { "epoch": 0.27588913066742765, "grad_norm": 0.2816970646381378, "learning_rate": 4.543358985607585e-05, "loss": 0.1765, "step": 15468 }, { "epoch": 0.2759069667891414, "grad_norm": 0.3926188349723816, "learning_rate": 4.543269303378007e-05, "loss": 0.1824, "step": 15469 }, { "epoch": 0.2759248029108551, "grad_norm": 0.2605321407318115, "learning_rate": 4.5431796132280116e-05, "loss": 0.1742, "step": 15470 }, { "epoch": 0.27594263903256877, "grad_norm": 0.2637878358364105, "learning_rate": 4.543089915157946e-05, "loss": 0.1741, "step": 15471 }, { "epoch": 0.27596047515428246, "grad_norm": 0.23655986785888672, "learning_rate": 4.543000209168159e-05, "loss": 0.1625, "step": 15472 }, { "epoch": 0.27597831127599615, "grad_norm": 0.28845351934432983, "learning_rate": 4.5429104952589976e-05, "loss": 0.1911, "step": 15473 }, { "epoch": 0.27599614739770983, "grad_norm": 0.30237093567848206, "learning_rate": 4.5428207734308094e-05, "loss": 0.1581, "step": 15474 }, { "epoch": 0.2760139835194235, "grad_norm": 0.28601041436195374, "learning_rate": 4.542731043683942e-05, "loss": 0.1492, "step": 15475 }, { "epoch": 0.2760318196411372, "grad_norm": 0.30665090680122375, "learning_rate": 4.5426413060187435e-05, "loss": 0.1735, "step": 15476 }, { "epoch": 0.2760496557628509, "grad_norm": 0.3497749865055084, "learning_rate": 4.542551560435562e-05, "loss": 0.194, "step": 15477 }, { "epoch": 0.27606749188456464, "grad_norm": 0.2997104227542877, "learning_rate": 4.542461806934745e-05, "loss": 0.1253, "step": 15478 }, { "epoch": 0.27608532800627833, "grad_norm": 0.27530500292778015, "learning_rate": 4.542372045516641e-05, "loss": 0.2024, "step": 15479 }, { "epoch": 0.276103164127992, "grad_norm": 0.30195263028144836, "learning_rate": 4.5422822761815966e-05, "loss": 0.1515, "step": 15480 }, { "epoch": 0.2761210002497057, "grad_norm": 0.38357216119766235, "learning_rate": 4.542192498929961e-05, "loss": 0.1988, "step": 15481 }, { "epoch": 0.2761388363714194, "grad_norm": 0.32098227739334106, "learning_rate": 4.542102713762082e-05, "loss": 0.1824, "step": 15482 }, { "epoch": 0.2761566724931331, "grad_norm": 0.19651976227760315, "learning_rate": 4.542012920678308e-05, "loss": 0.1555, "step": 15483 }, { "epoch": 0.27617450861484677, "grad_norm": 0.23746676743030548, "learning_rate": 4.541923119678987e-05, "loss": 0.1517, "step": 15484 }, { "epoch": 0.27619234473656046, "grad_norm": 0.3970274031162262, "learning_rate": 4.5418333107644656e-05, "loss": 0.1928, "step": 15485 }, { "epoch": 0.2762101808582742, "grad_norm": 0.3195289969444275, "learning_rate": 4.5417434939350936e-05, "loss": 0.2204, "step": 15486 }, { "epoch": 0.2762280169799879, "grad_norm": 0.3507113456726074, "learning_rate": 4.541653669191219e-05, "loss": 0.1888, "step": 15487 }, { "epoch": 0.2762458531017016, "grad_norm": 0.32902273535728455, "learning_rate": 4.541563836533189e-05, "loss": 0.2118, "step": 15488 }, { "epoch": 0.27626368922341527, "grad_norm": 0.36764469742774963, "learning_rate": 4.541473995961353e-05, "loss": 0.1885, "step": 15489 }, { "epoch": 0.27628152534512895, "grad_norm": 0.26530030369758606, "learning_rate": 4.5413841474760586e-05, "loss": 0.1571, "step": 15490 }, { "epoch": 0.27629936146684264, "grad_norm": 0.22426503896713257, "learning_rate": 4.5412942910776535e-05, "loss": 0.1767, "step": 15491 }, { "epoch": 0.27631719758855633, "grad_norm": 0.24545851349830627, "learning_rate": 4.541204426766486e-05, "loss": 0.1417, "step": 15492 }, { "epoch": 0.27633503371027, "grad_norm": 0.26633673906326294, "learning_rate": 4.541114554542907e-05, "loss": 0.162, "step": 15493 }, { "epoch": 0.27635286983198376, "grad_norm": 0.262577086687088, "learning_rate": 4.541024674407262e-05, "loss": 0.1978, "step": 15494 }, { "epoch": 0.27637070595369745, "grad_norm": 0.33074212074279785, "learning_rate": 4.540934786359901e-05, "loss": 0.2299, "step": 15495 }, { "epoch": 0.27638854207541114, "grad_norm": 0.28678977489471436, "learning_rate": 4.540844890401171e-05, "loss": 0.1768, "step": 15496 }, { "epoch": 0.2764063781971248, "grad_norm": 0.35482659935951233, "learning_rate": 4.5407549865314225e-05, "loss": 0.2379, "step": 15497 }, { "epoch": 0.2764242143188385, "grad_norm": 0.2754009962081909, "learning_rate": 4.540665074751003e-05, "loss": 0.2044, "step": 15498 }, { "epoch": 0.2764420504405522, "grad_norm": 0.21383485198020935, "learning_rate": 4.540575155060259e-05, "loss": 0.1742, "step": 15499 }, { "epoch": 0.2764598865622659, "grad_norm": 0.21688519418239594, "learning_rate": 4.540485227459542e-05, "loss": 0.1672, "step": 15500 }, { "epoch": 0.2764777226839796, "grad_norm": 0.2701101005077362, "learning_rate": 4.5403952919492e-05, "loss": 0.1964, "step": 15501 }, { "epoch": 0.27649555880569326, "grad_norm": 0.21089604496955872, "learning_rate": 4.54030534852958e-05, "loss": 0.1828, "step": 15502 }, { "epoch": 0.276513394927407, "grad_norm": 0.2518959939479828, "learning_rate": 4.540215397201032e-05, "loss": 0.176, "step": 15503 }, { "epoch": 0.2765312310491207, "grad_norm": 0.1921340972185135, "learning_rate": 4.540125437963905e-05, "loss": 0.1622, "step": 15504 }, { "epoch": 0.2765490671708344, "grad_norm": 0.22540751099586487, "learning_rate": 4.540035470818547e-05, "loss": 0.1558, "step": 15505 }, { "epoch": 0.27656690329254807, "grad_norm": 0.31669074296951294, "learning_rate": 4.539945495765307e-05, "loss": 0.2498, "step": 15506 }, { "epoch": 0.27658473941426176, "grad_norm": 0.25251826643943787, "learning_rate": 4.539855512804534e-05, "loss": 0.183, "step": 15507 }, { "epoch": 0.27660257553597545, "grad_norm": 0.2978300452232361, "learning_rate": 4.5397655219365756e-05, "loss": 0.1113, "step": 15508 }, { "epoch": 0.27662041165768914, "grad_norm": 0.2912184000015259, "learning_rate": 4.5396755231617814e-05, "loss": 0.1499, "step": 15509 }, { "epoch": 0.2766382477794028, "grad_norm": 0.25617778301239014, "learning_rate": 4.539585516480501e-05, "loss": 0.1744, "step": 15510 }, { "epoch": 0.27665608390111657, "grad_norm": 0.3530201315879822, "learning_rate": 4.539495501893083e-05, "loss": 0.1697, "step": 15511 }, { "epoch": 0.27667392002283026, "grad_norm": 0.25744280219078064, "learning_rate": 4.539405479399875e-05, "loss": 0.1812, "step": 15512 }, { "epoch": 0.27669175614454394, "grad_norm": 0.3635775148868561, "learning_rate": 4.539315449001228e-05, "loss": 0.2089, "step": 15513 }, { "epoch": 0.27670959226625763, "grad_norm": 0.2810732424259186, "learning_rate": 4.53922541069749e-05, "loss": 0.1799, "step": 15514 }, { "epoch": 0.2767274283879713, "grad_norm": 0.2714241147041321, "learning_rate": 4.539135364489009e-05, "loss": 0.2276, "step": 15515 }, { "epoch": 0.276745264509685, "grad_norm": 0.22411157190799713, "learning_rate": 4.539045310376136e-05, "loss": 0.1452, "step": 15516 }, { "epoch": 0.2767631006313987, "grad_norm": 0.31750720739364624, "learning_rate": 4.538955248359219e-05, "loss": 0.1752, "step": 15517 }, { "epoch": 0.2767809367531124, "grad_norm": 0.2537008225917816, "learning_rate": 4.5388651784386066e-05, "loss": 0.1992, "step": 15518 }, { "epoch": 0.27679877287482607, "grad_norm": 0.2894752025604248, "learning_rate": 4.538775100614649e-05, "loss": 0.2175, "step": 15519 }, { "epoch": 0.2768166089965398, "grad_norm": 0.22802633047103882, "learning_rate": 4.5386850148876944e-05, "loss": 0.1735, "step": 15520 }, { "epoch": 0.2768344451182535, "grad_norm": 0.2781098484992981, "learning_rate": 4.538594921258094e-05, "loss": 0.1879, "step": 15521 }, { "epoch": 0.2768522812399672, "grad_norm": 0.2885425388813019, "learning_rate": 4.538504819726194e-05, "loss": 0.1765, "step": 15522 }, { "epoch": 0.2768701173616809, "grad_norm": 0.2876160144805908, "learning_rate": 4.5384147102923454e-05, "loss": 0.2395, "step": 15523 }, { "epoch": 0.27688795348339457, "grad_norm": 0.3026312291622162, "learning_rate": 4.538324592956898e-05, "loss": 0.175, "step": 15524 }, { "epoch": 0.27690578960510825, "grad_norm": 0.23294112086296082, "learning_rate": 4.5382344677202e-05, "loss": 0.2215, "step": 15525 }, { "epoch": 0.27692362572682194, "grad_norm": 0.27057570219039917, "learning_rate": 4.538144334582601e-05, "loss": 0.1385, "step": 15526 }, { "epoch": 0.27694146184853563, "grad_norm": 0.23334906995296478, "learning_rate": 4.5380541935444514e-05, "loss": 0.1769, "step": 15527 }, { "epoch": 0.2769592979702494, "grad_norm": 0.3130313456058502, "learning_rate": 4.5379640446061e-05, "loss": 0.2065, "step": 15528 }, { "epoch": 0.27697713409196306, "grad_norm": 0.2886606454849243, "learning_rate": 4.537873887767895e-05, "loss": 0.1943, "step": 15529 }, { "epoch": 0.27699497021367675, "grad_norm": 0.22477389872074127, "learning_rate": 4.537783723030188e-05, "loss": 0.1631, "step": 15530 }, { "epoch": 0.27701280633539044, "grad_norm": 0.2547028362751007, "learning_rate": 4.5376935503933265e-05, "loss": 0.1769, "step": 15531 }, { "epoch": 0.2770306424571041, "grad_norm": 0.25012221932411194, "learning_rate": 4.537603369857662e-05, "loss": 0.1017, "step": 15532 }, { "epoch": 0.2770484785788178, "grad_norm": 0.26341575384140015, "learning_rate": 4.5375131814235415e-05, "loss": 0.1959, "step": 15533 }, { "epoch": 0.2770663147005315, "grad_norm": 0.21279609203338623, "learning_rate": 4.5374229850913174e-05, "loss": 0.1385, "step": 15534 }, { "epoch": 0.2770841508222452, "grad_norm": 0.2950650751590729, "learning_rate": 4.537332780861338e-05, "loss": 0.1786, "step": 15535 }, { "epoch": 0.2771019869439589, "grad_norm": 0.27218860387802124, "learning_rate": 4.537242568733952e-05, "loss": 0.176, "step": 15536 }, { "epoch": 0.2771198230656726, "grad_norm": 0.22721554338932037, "learning_rate": 4.537152348709512e-05, "loss": 0.1416, "step": 15537 }, { "epoch": 0.2771376591873863, "grad_norm": 0.2503151595592499, "learning_rate": 4.537062120788365e-05, "loss": 0.1197, "step": 15538 }, { "epoch": 0.2771554953091, "grad_norm": 0.32465431094169617, "learning_rate": 4.536971884970862e-05, "loss": 0.2091, "step": 15539 }, { "epoch": 0.2771733314308137, "grad_norm": 0.4016757607460022, "learning_rate": 4.5368816412573515e-05, "loss": 0.1305, "step": 15540 }, { "epoch": 0.2771911675525274, "grad_norm": 0.24984855949878693, "learning_rate": 4.536791389648185e-05, "loss": 0.1744, "step": 15541 }, { "epoch": 0.27720900367424106, "grad_norm": 0.18738994002342224, "learning_rate": 4.536701130143711e-05, "loss": 0.1325, "step": 15542 }, { "epoch": 0.27722683979595475, "grad_norm": 0.30558136105537415, "learning_rate": 4.536610862744281e-05, "loss": 0.1701, "step": 15543 }, { "epoch": 0.27724467591766844, "grad_norm": 0.24732215702533722, "learning_rate": 4.536520587450243e-05, "loss": 0.1781, "step": 15544 }, { "epoch": 0.2772625120393822, "grad_norm": 0.26350006461143494, "learning_rate": 4.536430304261948e-05, "loss": 0.1562, "step": 15545 }, { "epoch": 0.27728034816109587, "grad_norm": 0.20535536110401154, "learning_rate": 4.536340013179746e-05, "loss": 0.1432, "step": 15546 }, { "epoch": 0.27729818428280956, "grad_norm": 0.33243194222450256, "learning_rate": 4.536249714203986e-05, "loss": 0.218, "step": 15547 }, { "epoch": 0.27731602040452324, "grad_norm": 0.20961210131645203, "learning_rate": 4.53615940733502e-05, "loss": 0.1231, "step": 15548 }, { "epoch": 0.27733385652623693, "grad_norm": 0.2833684980869293, "learning_rate": 4.5360690925731964e-05, "loss": 0.1159, "step": 15549 }, { "epoch": 0.2773516926479506, "grad_norm": 0.2917201519012451, "learning_rate": 4.5359787699188656e-05, "loss": 0.1843, "step": 15550 }, { "epoch": 0.2773695287696643, "grad_norm": 0.4551113545894623, "learning_rate": 4.535888439372378e-05, "loss": 0.1996, "step": 15551 }, { "epoch": 0.277387364891378, "grad_norm": 0.2640388011932373, "learning_rate": 4.535798100934083e-05, "loss": 0.1745, "step": 15552 }, { "epoch": 0.27740520101309174, "grad_norm": 0.23592965304851532, "learning_rate": 4.5357077546043323e-05, "loss": 0.1919, "step": 15553 }, { "epoch": 0.27742303713480543, "grad_norm": 0.2902146279811859, "learning_rate": 4.535617400383475e-05, "loss": 0.1736, "step": 15554 }, { "epoch": 0.2774408732565191, "grad_norm": 0.3253636956214905, "learning_rate": 4.535527038271862e-05, "loss": 0.2047, "step": 15555 }, { "epoch": 0.2774587093782328, "grad_norm": 0.25144073367118835, "learning_rate": 4.5354366682698426e-05, "loss": 0.2093, "step": 15556 }, { "epoch": 0.2774765454999465, "grad_norm": 0.20756272971630096, "learning_rate": 4.535346290377768e-05, "loss": 0.1816, "step": 15557 }, { "epoch": 0.2774943816216602, "grad_norm": 0.22271545231342316, "learning_rate": 4.535255904595988e-05, "loss": 0.1624, "step": 15558 }, { "epoch": 0.27751221774337387, "grad_norm": 0.23617413640022278, "learning_rate": 4.5351655109248526e-05, "loss": 0.1784, "step": 15559 }, { "epoch": 0.27753005386508756, "grad_norm": 0.34566089510917664, "learning_rate": 4.535075109364713e-05, "loss": 0.2234, "step": 15560 }, { "epoch": 0.27754788998680124, "grad_norm": 0.19219085574150085, "learning_rate": 4.5349846999159194e-05, "loss": 0.1348, "step": 15561 }, { "epoch": 0.277565726108515, "grad_norm": 0.2243836373090744, "learning_rate": 4.534894282578822e-05, "loss": 0.1744, "step": 15562 }, { "epoch": 0.2775835622302287, "grad_norm": 0.3449123501777649, "learning_rate": 4.534803857353772e-05, "loss": 0.1953, "step": 15563 }, { "epoch": 0.27760139835194236, "grad_norm": 0.24644972383975983, "learning_rate": 4.53471342424112e-05, "loss": 0.1751, "step": 15564 }, { "epoch": 0.27761923447365605, "grad_norm": 0.26189693808555603, "learning_rate": 4.534622983241215e-05, "loss": 0.2029, "step": 15565 }, { "epoch": 0.27763707059536974, "grad_norm": 0.35615599155426025, "learning_rate": 4.534532534354409e-05, "loss": 0.228, "step": 15566 }, { "epoch": 0.2776549067170834, "grad_norm": 0.2755829393863678, "learning_rate": 4.534442077581053e-05, "loss": 0.194, "step": 15567 }, { "epoch": 0.2776727428387971, "grad_norm": 0.23754830658435822, "learning_rate": 4.534351612921496e-05, "loss": 0.1795, "step": 15568 }, { "epoch": 0.2776905789605108, "grad_norm": 0.2579927146434784, "learning_rate": 4.534261140376089e-05, "loss": 0.1785, "step": 15569 }, { "epoch": 0.27770841508222455, "grad_norm": 0.2636464834213257, "learning_rate": 4.534170659945184e-05, "loss": 0.2161, "step": 15570 }, { "epoch": 0.27772625120393823, "grad_norm": 0.2908783257007599, "learning_rate": 4.5340801716291305e-05, "loss": 0.1814, "step": 15571 }, { "epoch": 0.2777440873256519, "grad_norm": 0.37598586082458496, "learning_rate": 4.53398967542828e-05, "loss": 0.1054, "step": 15572 }, { "epoch": 0.2777619234473656, "grad_norm": 0.26995182037353516, "learning_rate": 4.533899171342983e-05, "loss": 0.1885, "step": 15573 }, { "epoch": 0.2777797595690793, "grad_norm": 0.2756546139717102, "learning_rate": 4.5338086593735904e-05, "loss": 0.1493, "step": 15574 }, { "epoch": 0.277797595690793, "grad_norm": 0.23395726084709167, "learning_rate": 4.533718139520452e-05, "loss": 0.174, "step": 15575 }, { "epoch": 0.2778154318125067, "grad_norm": 0.3361474573612213, "learning_rate": 4.5336276117839206e-05, "loss": 0.2173, "step": 15576 }, { "epoch": 0.27783326793422036, "grad_norm": 0.25557228922843933, "learning_rate": 4.533537076164346e-05, "loss": 0.1562, "step": 15577 }, { "epoch": 0.27785110405593405, "grad_norm": 0.3303571939468384, "learning_rate": 4.533446532662079e-05, "loss": 0.191, "step": 15578 }, { "epoch": 0.2778689401776478, "grad_norm": 0.30470383167266846, "learning_rate": 4.533355981277472e-05, "loss": 0.159, "step": 15579 }, { "epoch": 0.2778867762993615, "grad_norm": 0.36051464080810547, "learning_rate": 4.5332654220108736e-05, "loss": 0.1502, "step": 15580 }, { "epoch": 0.27790461242107517, "grad_norm": 0.209768146276474, "learning_rate": 4.5331748548626374e-05, "loss": 0.1705, "step": 15581 }, { "epoch": 0.27792244854278886, "grad_norm": 0.3213937282562256, "learning_rate": 4.5330842798331126e-05, "loss": 0.2146, "step": 15582 }, { "epoch": 0.27794028466450255, "grad_norm": 0.25211167335510254, "learning_rate": 4.53299369692265e-05, "loss": 0.1991, "step": 15583 }, { "epoch": 0.27795812078621623, "grad_norm": 0.2216213494539261, "learning_rate": 4.5329031061316035e-05, "loss": 0.1583, "step": 15584 }, { "epoch": 0.2779759569079299, "grad_norm": 0.24817034602165222, "learning_rate": 4.532812507460321e-05, "loss": 0.1894, "step": 15585 }, { "epoch": 0.2779937930296436, "grad_norm": 0.24385593831539154, "learning_rate": 4.532721900909156e-05, "loss": 0.1846, "step": 15586 }, { "epoch": 0.27801162915135735, "grad_norm": 0.24522389471530914, "learning_rate": 4.532631286478458e-05, "loss": 0.1614, "step": 15587 }, { "epoch": 0.27802946527307104, "grad_norm": 0.3663196861743927, "learning_rate": 4.5325406641685796e-05, "loss": 0.2239, "step": 15588 }, { "epoch": 0.27804730139478473, "grad_norm": 0.36047157645225525, "learning_rate": 4.5324500339798715e-05, "loss": 0.2091, "step": 15589 }, { "epoch": 0.2780651375164984, "grad_norm": 0.20961324870586395, "learning_rate": 4.5323593959126857e-05, "loss": 0.1354, "step": 15590 }, { "epoch": 0.2780829736382121, "grad_norm": 0.2699880599975586, "learning_rate": 4.5322687499673724e-05, "loss": 0.2045, "step": 15591 }, { "epoch": 0.2781008097599258, "grad_norm": 0.29620838165283203, "learning_rate": 4.532178096144283e-05, "loss": 0.1992, "step": 15592 }, { "epoch": 0.2781186458816395, "grad_norm": 0.2632938623428345, "learning_rate": 4.5320874344437705e-05, "loss": 0.1829, "step": 15593 }, { "epoch": 0.27813648200335317, "grad_norm": 0.27483099699020386, "learning_rate": 4.5319967648661845e-05, "loss": 0.1805, "step": 15594 }, { "epoch": 0.2781543181250669, "grad_norm": 0.21178576350212097, "learning_rate": 4.5319060874118766e-05, "loss": 0.1407, "step": 15595 }, { "epoch": 0.2781721542467806, "grad_norm": 0.2852518558502197, "learning_rate": 4.5318154020811996e-05, "loss": 0.1534, "step": 15596 }, { "epoch": 0.2781899903684943, "grad_norm": 0.2494693398475647, "learning_rate": 4.531724708874504e-05, "loss": 0.1846, "step": 15597 }, { "epoch": 0.278207826490208, "grad_norm": 0.23659773170948029, "learning_rate": 4.531634007792143e-05, "loss": 0.1688, "step": 15598 }, { "epoch": 0.27822566261192166, "grad_norm": 0.3187173008918762, "learning_rate": 4.531543298834465e-05, "loss": 0.2202, "step": 15599 }, { "epoch": 0.27824349873363535, "grad_norm": 0.2896851599216461, "learning_rate": 4.5314525820018244e-05, "loss": 0.2044, "step": 15600 }, { "epoch": 0.27826133485534904, "grad_norm": 0.2916419804096222, "learning_rate": 4.531361857294572e-05, "loss": 0.223, "step": 15601 }, { "epoch": 0.27827917097706273, "grad_norm": 0.23943808674812317, "learning_rate": 4.53127112471306e-05, "loss": 0.1811, "step": 15602 }, { "epoch": 0.2782970070987764, "grad_norm": 0.22779546678066254, "learning_rate": 4.5311803842576385e-05, "loss": 0.1674, "step": 15603 }, { "epoch": 0.27831484322049016, "grad_norm": 0.320279061794281, "learning_rate": 4.5310896359286605e-05, "loss": 0.1746, "step": 15604 }, { "epoch": 0.27833267934220385, "grad_norm": 0.2758757472038269, "learning_rate": 4.530998879726478e-05, "loss": 0.1649, "step": 15605 }, { "epoch": 0.27835051546391754, "grad_norm": 0.2524808347225189, "learning_rate": 4.530908115651442e-05, "loss": 0.1967, "step": 15606 }, { "epoch": 0.2783683515856312, "grad_norm": 0.2577757239341736, "learning_rate": 4.530817343703905e-05, "loss": 0.2039, "step": 15607 }, { "epoch": 0.2783861877073449, "grad_norm": 0.29628923535346985, "learning_rate": 4.530726563884218e-05, "loss": 0.1545, "step": 15608 }, { "epoch": 0.2784040238290586, "grad_norm": 0.33466726541519165, "learning_rate": 4.5306357761927345e-05, "loss": 0.1666, "step": 15609 }, { "epoch": 0.2784218599507723, "grad_norm": 0.373579740524292, "learning_rate": 4.530544980629804e-05, "loss": 0.2333, "step": 15610 }, { "epoch": 0.278439696072486, "grad_norm": 0.2565092444419861, "learning_rate": 4.530454177195781e-05, "loss": 0.1665, "step": 15611 }, { "epoch": 0.2784575321941997, "grad_norm": 0.30497944355010986, "learning_rate": 4.530363365891015e-05, "loss": 0.2114, "step": 15612 }, { "epoch": 0.2784753683159134, "grad_norm": 0.2598390579223633, "learning_rate": 4.5302725467158604e-05, "loss": 0.1372, "step": 15613 }, { "epoch": 0.2784932044376271, "grad_norm": 0.2783115804195404, "learning_rate": 4.530181719670667e-05, "loss": 0.1664, "step": 15614 }, { "epoch": 0.2785110405593408, "grad_norm": 0.4061029851436615, "learning_rate": 4.53009088475579e-05, "loss": 0.1523, "step": 15615 }, { "epoch": 0.27852887668105447, "grad_norm": 0.2874487638473511, "learning_rate": 4.530000041971578e-05, "loss": 0.1768, "step": 15616 }, { "epoch": 0.27854671280276816, "grad_norm": 0.27280646562576294, "learning_rate": 4.529909191318385e-05, "loss": 0.1952, "step": 15617 }, { "epoch": 0.27856454892448185, "grad_norm": 0.28535526990890503, "learning_rate": 4.529818332796564e-05, "loss": 0.203, "step": 15618 }, { "epoch": 0.27858238504619554, "grad_norm": 0.32303035259246826, "learning_rate": 4.529727466406465e-05, "loss": 0.2003, "step": 15619 }, { "epoch": 0.2786002211679092, "grad_norm": 0.29018232226371765, "learning_rate": 4.529636592148441e-05, "loss": 0.1635, "step": 15620 }, { "epoch": 0.27861805728962297, "grad_norm": 0.2523662745952606, "learning_rate": 4.5295457100228456e-05, "loss": 0.1109, "step": 15621 }, { "epoch": 0.27863589341133665, "grad_norm": 0.2685641348361969, "learning_rate": 4.529454820030029e-05, "loss": 0.1804, "step": 15622 }, { "epoch": 0.27865372953305034, "grad_norm": 0.28229889273643494, "learning_rate": 4.529363922170346e-05, "loss": 0.1954, "step": 15623 }, { "epoch": 0.27867156565476403, "grad_norm": 0.23529836535453796, "learning_rate": 4.5292730164441455e-05, "loss": 0.1969, "step": 15624 }, { "epoch": 0.2786894017764777, "grad_norm": 0.2972094416618347, "learning_rate": 4.5291821028517834e-05, "loss": 0.2335, "step": 15625 }, { "epoch": 0.2787072378981914, "grad_norm": 0.3776022493839264, "learning_rate": 4.52909118139361e-05, "loss": 0.2171, "step": 15626 }, { "epoch": 0.2787250740199051, "grad_norm": 0.39162972569465637, "learning_rate": 4.529000252069978e-05, "loss": 0.1781, "step": 15627 }, { "epoch": 0.2787429101416188, "grad_norm": 0.21789129078388214, "learning_rate": 4.5289093148812414e-05, "loss": 0.1551, "step": 15628 }, { "epoch": 0.2787607462633325, "grad_norm": 0.27025988698005676, "learning_rate": 4.528818369827751e-05, "loss": 0.2257, "step": 15629 }, { "epoch": 0.2787785823850462, "grad_norm": 0.2933149039745331, "learning_rate": 4.528727416909859e-05, "loss": 0.1475, "step": 15630 }, { "epoch": 0.2787964185067599, "grad_norm": 0.2323552668094635, "learning_rate": 4.52863645612792e-05, "loss": 0.1631, "step": 15631 }, { "epoch": 0.2788142546284736, "grad_norm": 0.262197345495224, "learning_rate": 4.528545487482285e-05, "loss": 0.1252, "step": 15632 }, { "epoch": 0.2788320907501873, "grad_norm": 0.27623292803764343, "learning_rate": 4.528454510973307e-05, "loss": 0.1817, "step": 15633 }, { "epoch": 0.27884992687190097, "grad_norm": 0.2861792743206024, "learning_rate": 4.528363526601339e-05, "loss": 0.192, "step": 15634 }, { "epoch": 0.27886776299361465, "grad_norm": 0.26510319113731384, "learning_rate": 4.528272534366733e-05, "loss": 0.1959, "step": 15635 }, { "epoch": 0.27888559911532834, "grad_norm": 0.44476866722106934, "learning_rate": 4.528181534269842e-05, "loss": 0.1873, "step": 15636 }, { "epoch": 0.27890343523704203, "grad_norm": 0.2552759647369385, "learning_rate": 4.5280905263110194e-05, "loss": 0.1552, "step": 15637 }, { "epoch": 0.2789212713587558, "grad_norm": 1.0172224044799805, "learning_rate": 4.527999510490617e-05, "loss": 0.1539, "step": 15638 }, { "epoch": 0.27893910748046946, "grad_norm": 0.28688690066337585, "learning_rate": 4.527908486808988e-05, "loss": 0.1587, "step": 15639 }, { "epoch": 0.27895694360218315, "grad_norm": 0.31575825810432434, "learning_rate": 4.527817455266485e-05, "loss": 0.1909, "step": 15640 }, { "epoch": 0.27897477972389684, "grad_norm": 0.232138529419899, "learning_rate": 4.527726415863462e-05, "loss": 0.1821, "step": 15641 }, { "epoch": 0.2789926158456105, "grad_norm": 0.2726914882659912, "learning_rate": 4.527635368600271e-05, "loss": 0.2031, "step": 15642 }, { "epoch": 0.2790104519673242, "grad_norm": 0.3221610188484192, "learning_rate": 4.527544313477265e-05, "loss": 0.2329, "step": 15643 }, { "epoch": 0.2790282880890379, "grad_norm": 0.23013746738433838, "learning_rate": 4.527453250494797e-05, "loss": 0.188, "step": 15644 }, { "epoch": 0.2790461242107516, "grad_norm": 0.34944698214530945, "learning_rate": 4.5273621796532196e-05, "loss": 0.1697, "step": 15645 }, { "epoch": 0.27906396033246533, "grad_norm": 0.2999511659145355, "learning_rate": 4.527271100952886e-05, "loss": 0.2223, "step": 15646 }, { "epoch": 0.279081796454179, "grad_norm": 0.278818815946579, "learning_rate": 4.527180014394149e-05, "loss": 0.2096, "step": 15647 }, { "epoch": 0.2790996325758927, "grad_norm": 0.2949967384338379, "learning_rate": 4.5270889199773626e-05, "loss": 0.1548, "step": 15648 }, { "epoch": 0.2791174686976064, "grad_norm": 0.2322419136762619, "learning_rate": 4.52699781770288e-05, "loss": 0.1427, "step": 15649 }, { "epoch": 0.2791353048193201, "grad_norm": 0.30145666003227234, "learning_rate": 4.526906707571053e-05, "loss": 0.1613, "step": 15650 }, { "epoch": 0.2791531409410338, "grad_norm": 0.33624911308288574, "learning_rate": 4.5268155895822355e-05, "loss": 0.1834, "step": 15651 }, { "epoch": 0.27917097706274746, "grad_norm": 0.27684760093688965, "learning_rate": 4.526724463736781e-05, "loss": 0.1665, "step": 15652 }, { "epoch": 0.27918881318446115, "grad_norm": 0.287435382604599, "learning_rate": 4.526633330035043e-05, "loss": 0.1928, "step": 15653 }, { "epoch": 0.2792066493061749, "grad_norm": 0.38228148221969604, "learning_rate": 4.526542188477373e-05, "loss": 0.1746, "step": 15654 }, { "epoch": 0.2792244854278886, "grad_norm": 0.274532288312912, "learning_rate": 4.526451039064127e-05, "loss": 0.1586, "step": 15655 }, { "epoch": 0.27924232154960227, "grad_norm": 0.28259244561195374, "learning_rate": 4.5263598817956555e-05, "loss": 0.2192, "step": 15656 }, { "epoch": 0.27926015767131596, "grad_norm": 0.27577927708625793, "learning_rate": 4.526268716672314e-05, "loss": 0.1721, "step": 15657 }, { "epoch": 0.27927799379302964, "grad_norm": 0.2167995572090149, "learning_rate": 4.5261775436944554e-05, "loss": 0.2397, "step": 15658 }, { "epoch": 0.27929582991474333, "grad_norm": 0.30169492959976196, "learning_rate": 4.526086362862432e-05, "loss": 0.259, "step": 15659 }, { "epoch": 0.279313666036457, "grad_norm": 0.21073132753372192, "learning_rate": 4.525995174176598e-05, "loss": 0.2074, "step": 15660 }, { "epoch": 0.2793315021581707, "grad_norm": 0.25644275546073914, "learning_rate": 4.525903977637308e-05, "loss": 0.1562, "step": 15661 }, { "epoch": 0.2793493382798844, "grad_norm": 0.3562442660331726, "learning_rate": 4.525812773244914e-05, "loss": 0.2401, "step": 15662 }, { "epoch": 0.27936717440159814, "grad_norm": 0.21228370070457458, "learning_rate": 4.5257215609997694e-05, "loss": 0.1583, "step": 15663 }, { "epoch": 0.2793850105233118, "grad_norm": 0.21904005110263824, "learning_rate": 4.525630340902229e-05, "loss": 0.1509, "step": 15664 }, { "epoch": 0.2794028466450255, "grad_norm": 0.3077412545681, "learning_rate": 4.525539112952645e-05, "loss": 0.2425, "step": 15665 }, { "epoch": 0.2794206827667392, "grad_norm": 0.25142520666122437, "learning_rate": 4.525447877151373e-05, "loss": 0.2044, "step": 15666 }, { "epoch": 0.2794385188884529, "grad_norm": 0.2700698673725128, "learning_rate": 4.525356633498764e-05, "loss": 0.2162, "step": 15667 }, { "epoch": 0.2794563550101666, "grad_norm": 0.4805814027786255, "learning_rate": 4.5252653819951745e-05, "loss": 0.1724, "step": 15668 }, { "epoch": 0.27947419113188027, "grad_norm": 0.35741961002349854, "learning_rate": 4.525174122640956e-05, "loss": 0.1872, "step": 15669 }, { "epoch": 0.27949202725359396, "grad_norm": 0.2249939739704132, "learning_rate": 4.525082855436464e-05, "loss": 0.1577, "step": 15670 }, { "epoch": 0.2795098633753077, "grad_norm": 0.27167457342147827, "learning_rate": 4.524991580382051e-05, "loss": 0.1447, "step": 15671 }, { "epoch": 0.2795276994970214, "grad_norm": 0.24171313643455505, "learning_rate": 4.524900297478071e-05, "loss": 0.2023, "step": 15672 }, { "epoch": 0.2795455356187351, "grad_norm": 0.3178898096084595, "learning_rate": 4.524809006724878e-05, "loss": 0.2133, "step": 15673 }, { "epoch": 0.27956337174044876, "grad_norm": 0.27155107259750366, "learning_rate": 4.5247177081228264e-05, "loss": 0.1887, "step": 15674 }, { "epoch": 0.27958120786216245, "grad_norm": 0.19774875044822693, "learning_rate": 4.5246264016722696e-05, "loss": 0.1397, "step": 15675 }, { "epoch": 0.27959904398387614, "grad_norm": 0.19590790569782257, "learning_rate": 4.524535087373561e-05, "loss": 0.1796, "step": 15676 }, { "epoch": 0.2796168801055898, "grad_norm": 0.31075024604797363, "learning_rate": 4.524443765227055e-05, "loss": 0.1954, "step": 15677 }, { "epoch": 0.2796347162273035, "grad_norm": 0.28407710790634155, "learning_rate": 4.5243524352331066e-05, "loss": 0.1575, "step": 15678 }, { "epoch": 0.2796525523490172, "grad_norm": 0.23158155381679535, "learning_rate": 4.5242610973920685e-05, "loss": 0.1668, "step": 15679 }, { "epoch": 0.27967038847073095, "grad_norm": 0.3562926948070526, "learning_rate": 4.524169751704296e-05, "loss": 0.191, "step": 15680 }, { "epoch": 0.27968822459244463, "grad_norm": 0.3110958933830261, "learning_rate": 4.524078398170141e-05, "loss": 0.1832, "step": 15681 }, { "epoch": 0.2797060607141583, "grad_norm": 0.35577085614204407, "learning_rate": 4.52398703678996e-05, "loss": 0.1431, "step": 15682 }, { "epoch": 0.279723896835872, "grad_norm": 0.2610926926136017, "learning_rate": 4.523895667564106e-05, "loss": 0.1898, "step": 15683 }, { "epoch": 0.2797417329575857, "grad_norm": 0.24440820515155792, "learning_rate": 4.5238042904929334e-05, "loss": 0.1796, "step": 15684 }, { "epoch": 0.2797595690792994, "grad_norm": 0.34617117047309875, "learning_rate": 4.5237129055767965e-05, "loss": 0.223, "step": 15685 }, { "epoch": 0.2797774052010131, "grad_norm": 0.2710441052913666, "learning_rate": 4.523621512816049e-05, "loss": 0.1601, "step": 15686 }, { "epoch": 0.27979524132272676, "grad_norm": 0.25729870796203613, "learning_rate": 4.5235301122110465e-05, "loss": 0.1394, "step": 15687 }, { "epoch": 0.2798130774444405, "grad_norm": 0.299956351518631, "learning_rate": 4.5234387037621415e-05, "loss": 0.1697, "step": 15688 }, { "epoch": 0.2798309135661542, "grad_norm": 0.24831891059875488, "learning_rate": 4.523347287469689e-05, "loss": 0.1732, "step": 15689 }, { "epoch": 0.2798487496878679, "grad_norm": 0.33137819170951843, "learning_rate": 4.5232558633340436e-05, "loss": 0.2079, "step": 15690 }, { "epoch": 0.27986658580958157, "grad_norm": 0.2328789383172989, "learning_rate": 4.52316443135556e-05, "loss": 0.2031, "step": 15691 }, { "epoch": 0.27988442193129526, "grad_norm": 0.2618556320667267, "learning_rate": 4.5230729915345924e-05, "loss": 0.1432, "step": 15692 }, { "epoch": 0.27990225805300895, "grad_norm": 0.257379949092865, "learning_rate": 4.522981543871495e-05, "loss": 0.1899, "step": 15693 }, { "epoch": 0.27992009417472263, "grad_norm": 0.18314041197299957, "learning_rate": 4.5228900883666224e-05, "loss": 0.1298, "step": 15694 }, { "epoch": 0.2799379302964363, "grad_norm": 0.3744262158870697, "learning_rate": 4.52279862502033e-05, "loss": 0.2006, "step": 15695 }, { "epoch": 0.27995576641815006, "grad_norm": 0.2649403512477875, "learning_rate": 4.52270715383297e-05, "loss": 0.1696, "step": 15696 }, { "epoch": 0.27997360253986375, "grad_norm": 0.2586050033569336, "learning_rate": 4.5226156748049e-05, "loss": 0.1774, "step": 15697 }, { "epoch": 0.27999143866157744, "grad_norm": 0.31558698415756226, "learning_rate": 4.5225241879364724e-05, "loss": 0.2203, "step": 15698 }, { "epoch": 0.28000927478329113, "grad_norm": 0.2562309503555298, "learning_rate": 4.5224326932280414e-05, "loss": 0.2075, "step": 15699 }, { "epoch": 0.2800271109050048, "grad_norm": 0.27281156182289124, "learning_rate": 4.522341190679964e-05, "loss": 0.1466, "step": 15700 }, { "epoch": 0.2800449470267185, "grad_norm": 0.24587777256965637, "learning_rate": 4.522249680292593e-05, "loss": 0.2128, "step": 15701 }, { "epoch": 0.2800627831484322, "grad_norm": 0.2370014786720276, "learning_rate": 4.5221581620662845e-05, "loss": 0.1666, "step": 15702 }, { "epoch": 0.2800806192701459, "grad_norm": 0.3908672034740448, "learning_rate": 4.522066636001392e-05, "loss": 0.1888, "step": 15703 }, { "epoch": 0.28009845539185957, "grad_norm": 0.556275486946106, "learning_rate": 4.52197510209827e-05, "loss": 0.1988, "step": 15704 }, { "epoch": 0.2801162915135733, "grad_norm": 0.17236942052841187, "learning_rate": 4.521883560357276e-05, "loss": 0.1236, "step": 15705 }, { "epoch": 0.280134127635287, "grad_norm": 0.3039679229259491, "learning_rate": 4.521792010778761e-05, "loss": 0.1851, "step": 15706 }, { "epoch": 0.2801519637570007, "grad_norm": 0.27140864729881287, "learning_rate": 4.521700453363083e-05, "loss": 0.1735, "step": 15707 }, { "epoch": 0.2801697998787144, "grad_norm": 0.3555116355419159, "learning_rate": 4.521608888110597e-05, "loss": 0.2011, "step": 15708 }, { "epoch": 0.28018763600042806, "grad_norm": 0.33275192975997925, "learning_rate": 4.521517315021655e-05, "loss": 0.1947, "step": 15709 }, { "epoch": 0.28020547212214175, "grad_norm": 0.23747345805168152, "learning_rate": 4.5214257340966134e-05, "loss": 0.1444, "step": 15710 }, { "epoch": 0.28022330824385544, "grad_norm": 0.2809229791164398, "learning_rate": 4.521334145335828e-05, "loss": 0.1698, "step": 15711 }, { "epoch": 0.28024114436556913, "grad_norm": 0.2740756869316101, "learning_rate": 4.521242548739654e-05, "loss": 0.1943, "step": 15712 }, { "epoch": 0.28025898048728287, "grad_norm": 0.2608456611633301, "learning_rate": 4.5211509443084456e-05, "loss": 0.1756, "step": 15713 }, { "epoch": 0.28027681660899656, "grad_norm": 0.20253300666809082, "learning_rate": 4.5210593320425576e-05, "loss": 0.2273, "step": 15714 }, { "epoch": 0.28029465273071025, "grad_norm": 0.3151959180831909, "learning_rate": 4.520967711942345e-05, "loss": 0.1583, "step": 15715 }, { "epoch": 0.28031248885242394, "grad_norm": 0.2235242873430252, "learning_rate": 4.520876084008164e-05, "loss": 0.1596, "step": 15716 }, { "epoch": 0.2803303249741376, "grad_norm": 0.2532644271850586, "learning_rate": 4.52078444824037e-05, "loss": 0.1838, "step": 15717 }, { "epoch": 0.2803481610958513, "grad_norm": 0.27217957377433777, "learning_rate": 4.520692804639317e-05, "loss": 0.1935, "step": 15718 }, { "epoch": 0.280365997217565, "grad_norm": 0.42854583263397217, "learning_rate": 4.5206011532053606e-05, "loss": 0.183, "step": 15719 }, { "epoch": 0.2803838333392787, "grad_norm": 0.30840596556663513, "learning_rate": 4.5205094939388563e-05, "loss": 0.1908, "step": 15720 }, { "epoch": 0.2804016694609924, "grad_norm": 0.20168592035770416, "learning_rate": 4.5204178268401596e-05, "loss": 0.1695, "step": 15721 }, { "epoch": 0.2804195055827061, "grad_norm": 0.2682773470878601, "learning_rate": 4.520326151909625e-05, "loss": 0.172, "step": 15722 }, { "epoch": 0.2804373417044198, "grad_norm": 0.3022426962852478, "learning_rate": 4.52023446914761e-05, "loss": 0.1383, "step": 15723 }, { "epoch": 0.2804551778261335, "grad_norm": 0.3393450081348419, "learning_rate": 4.520142778554467e-05, "loss": 0.191, "step": 15724 }, { "epoch": 0.2804730139478472, "grad_norm": 0.35501599311828613, "learning_rate": 4.520051080130553e-05, "loss": 0.2231, "step": 15725 }, { "epoch": 0.28049085006956087, "grad_norm": 0.3100959062576294, "learning_rate": 4.5199593738762236e-05, "loss": 0.1522, "step": 15726 }, { "epoch": 0.28050868619127456, "grad_norm": 0.37229984998703003, "learning_rate": 4.5198676597918334e-05, "loss": 0.1593, "step": 15727 }, { "epoch": 0.28052652231298825, "grad_norm": 0.33646801114082336, "learning_rate": 4.519775937877739e-05, "loss": 0.2099, "step": 15728 }, { "epoch": 0.28054435843470193, "grad_norm": 0.2767389416694641, "learning_rate": 4.5196842081342955e-05, "loss": 0.245, "step": 15729 }, { "epoch": 0.2805621945564157, "grad_norm": 0.24052877724170685, "learning_rate": 4.5195924705618585e-05, "loss": 0.2056, "step": 15730 }, { "epoch": 0.28058003067812937, "grad_norm": 0.31136706471443176, "learning_rate": 4.519500725160783e-05, "loss": 0.2343, "step": 15731 }, { "epoch": 0.28059786679984305, "grad_norm": 0.258151650428772, "learning_rate": 4.519408971931426e-05, "loss": 0.2266, "step": 15732 }, { "epoch": 0.28061570292155674, "grad_norm": 0.27437925338745117, "learning_rate": 4.5193172108741415e-05, "loss": 0.1958, "step": 15733 }, { "epoch": 0.28063353904327043, "grad_norm": 0.2852654457092285, "learning_rate": 4.5192254419892865e-05, "loss": 0.1703, "step": 15734 }, { "epoch": 0.2806513751649841, "grad_norm": 0.36328765749931335, "learning_rate": 4.519133665277216e-05, "loss": 0.1946, "step": 15735 }, { "epoch": 0.2806692112866978, "grad_norm": 0.19724664092063904, "learning_rate": 4.519041880738287e-05, "loss": 0.1359, "step": 15736 }, { "epoch": 0.2806870474084115, "grad_norm": 0.29698875546455383, "learning_rate": 4.5189500883728534e-05, "loss": 0.1542, "step": 15737 }, { "epoch": 0.2807048835301252, "grad_norm": 0.38125431537628174, "learning_rate": 4.518858288181272e-05, "loss": 0.1667, "step": 15738 }, { "epoch": 0.2807227196518389, "grad_norm": 0.24442516267299652, "learning_rate": 4.5187664801638984e-05, "loss": 0.1305, "step": 15739 }, { "epoch": 0.2807405557735526, "grad_norm": 0.40529268980026245, "learning_rate": 4.518674664321089e-05, "loss": 0.2371, "step": 15740 }, { "epoch": 0.2807583918952663, "grad_norm": 0.25345057249069214, "learning_rate": 4.518582840653199e-05, "loss": 0.1759, "step": 15741 }, { "epoch": 0.28077622801698, "grad_norm": 0.19947724044322968, "learning_rate": 4.518491009160585e-05, "loss": 0.1532, "step": 15742 }, { "epoch": 0.2807940641386937, "grad_norm": 0.2914084196090698, "learning_rate": 4.5183991698436035e-05, "loss": 0.2232, "step": 15743 }, { "epoch": 0.28081190026040737, "grad_norm": 0.2936772108078003, "learning_rate": 4.5183073227026084e-05, "loss": 0.1872, "step": 15744 }, { "epoch": 0.28082973638212105, "grad_norm": 0.3514951169490814, "learning_rate": 4.5182154677379576e-05, "loss": 0.2488, "step": 15745 }, { "epoch": 0.28084757250383474, "grad_norm": 0.27922895550727844, "learning_rate": 4.518123604950006e-05, "loss": 0.1941, "step": 15746 }, { "epoch": 0.2808654086255485, "grad_norm": 0.30157873034477234, "learning_rate": 4.518031734339111e-05, "loss": 0.1995, "step": 15747 }, { "epoch": 0.2808832447472622, "grad_norm": 0.19730991125106812, "learning_rate": 4.517939855905628e-05, "loss": 0.1369, "step": 15748 }, { "epoch": 0.28090108086897586, "grad_norm": 0.3510364890098572, "learning_rate": 4.517847969649913e-05, "loss": 0.2193, "step": 15749 }, { "epoch": 0.28091891699068955, "grad_norm": 0.18678249418735504, "learning_rate": 4.5177560755723226e-05, "loss": 0.1495, "step": 15750 }, { "epoch": 0.28093675311240324, "grad_norm": 0.3212592601776123, "learning_rate": 4.5176641736732116e-05, "loss": 0.1873, "step": 15751 }, { "epoch": 0.2809545892341169, "grad_norm": 0.25958016514778137, "learning_rate": 4.5175722639529386e-05, "loss": 0.1817, "step": 15752 }, { "epoch": 0.2809724253558306, "grad_norm": 0.23775915801525116, "learning_rate": 4.517480346411858e-05, "loss": 0.1857, "step": 15753 }, { "epoch": 0.2809902614775443, "grad_norm": 0.3295690715312958, "learning_rate": 4.517388421050327e-05, "loss": 0.1632, "step": 15754 }, { "epoch": 0.28100809759925804, "grad_norm": 0.36992278695106506, "learning_rate": 4.517296487868702e-05, "loss": 0.2102, "step": 15755 }, { "epoch": 0.28102593372097173, "grad_norm": 0.23655936121940613, "learning_rate": 4.517204546867338e-05, "loss": 0.1871, "step": 15756 }, { "epoch": 0.2810437698426854, "grad_norm": 0.24623289704322815, "learning_rate": 4.517112598046593e-05, "loss": 0.1502, "step": 15757 }, { "epoch": 0.2810616059643991, "grad_norm": 0.3252638578414917, "learning_rate": 4.517020641406824e-05, "loss": 0.1408, "step": 15758 }, { "epoch": 0.2810794420861128, "grad_norm": 0.2367999404668808, "learning_rate": 4.516928676948385e-05, "loss": 0.1159, "step": 15759 }, { "epoch": 0.2810972782078265, "grad_norm": 0.253127783536911, "learning_rate": 4.516836704671634e-05, "loss": 0.1875, "step": 15760 }, { "epoch": 0.28111511432954017, "grad_norm": 0.3281935751438141, "learning_rate": 4.516744724576928e-05, "loss": 0.1416, "step": 15761 }, { "epoch": 0.28113295045125386, "grad_norm": 0.593634843826294, "learning_rate": 4.516652736664623e-05, "loss": 0.2005, "step": 15762 }, { "epoch": 0.28115078657296755, "grad_norm": 0.38164636492729187, "learning_rate": 4.516560740935074e-05, "loss": 0.1912, "step": 15763 }, { "epoch": 0.2811686226946813, "grad_norm": 0.4401412308216095, "learning_rate": 4.5164687373886403e-05, "loss": 0.1904, "step": 15764 }, { "epoch": 0.281186458816395, "grad_norm": 0.2834833860397339, "learning_rate": 4.5163767260256774e-05, "loss": 0.1962, "step": 15765 }, { "epoch": 0.28120429493810867, "grad_norm": 0.36803925037384033, "learning_rate": 4.516284706846541e-05, "loss": 0.2246, "step": 15766 }, { "epoch": 0.28122213105982236, "grad_norm": 0.3717997670173645, "learning_rate": 4.516192679851589e-05, "loss": 0.1965, "step": 15767 }, { "epoch": 0.28123996718153604, "grad_norm": 0.5259448885917664, "learning_rate": 4.516100645041178e-05, "loss": 0.1999, "step": 15768 }, { "epoch": 0.28125780330324973, "grad_norm": 0.23925939202308655, "learning_rate": 4.5160086024156644e-05, "loss": 0.152, "step": 15769 }, { "epoch": 0.2812756394249634, "grad_norm": 0.26688462495803833, "learning_rate": 4.515916551975406e-05, "loss": 0.2263, "step": 15770 }, { "epoch": 0.2812934755466771, "grad_norm": 0.21828119456768036, "learning_rate": 4.515824493720757e-05, "loss": 0.1737, "step": 15771 }, { "epoch": 0.28131131166839085, "grad_norm": 0.26910701394081116, "learning_rate": 4.515732427652077e-05, "loss": 0.162, "step": 15772 }, { "epoch": 0.28132914779010454, "grad_norm": 0.3421975374221802, "learning_rate": 4.515640353769722e-05, "loss": 0.1402, "step": 15773 }, { "epoch": 0.2813469839118182, "grad_norm": 0.23494797945022583, "learning_rate": 4.515548272074049e-05, "loss": 0.2232, "step": 15774 }, { "epoch": 0.2813648200335319, "grad_norm": 0.24409538507461548, "learning_rate": 4.5154561825654144e-05, "loss": 0.165, "step": 15775 }, { "epoch": 0.2813826561552456, "grad_norm": 0.23689110577106476, "learning_rate": 4.515364085244176e-05, "loss": 0.2181, "step": 15776 }, { "epoch": 0.2814004922769593, "grad_norm": 0.3461141884326935, "learning_rate": 4.51527198011069e-05, "loss": 0.1654, "step": 15777 }, { "epoch": 0.281418328398673, "grad_norm": 0.33549702167510986, "learning_rate": 4.5151798671653134e-05, "loss": 0.1857, "step": 15778 }, { "epoch": 0.28143616452038667, "grad_norm": 0.22499185800552368, "learning_rate": 4.515087746408404e-05, "loss": 0.1889, "step": 15779 }, { "epoch": 0.28145400064210035, "grad_norm": 0.25467947125434875, "learning_rate": 4.514995617840318e-05, "loss": 0.2109, "step": 15780 }, { "epoch": 0.2814718367638141, "grad_norm": 0.2677212059497833, "learning_rate": 4.514903481461414e-05, "loss": 0.1932, "step": 15781 }, { "epoch": 0.2814896728855278, "grad_norm": 0.23083485662937164, "learning_rate": 4.514811337272048e-05, "loss": 0.1351, "step": 15782 }, { "epoch": 0.2815075090072415, "grad_norm": 0.24680112302303314, "learning_rate": 4.514719185272577e-05, "loss": 0.1678, "step": 15783 }, { "epoch": 0.28152534512895516, "grad_norm": 0.3104582726955414, "learning_rate": 4.5146270254633584e-05, "loss": 0.1704, "step": 15784 }, { "epoch": 0.28154318125066885, "grad_norm": 0.33675116300582886, "learning_rate": 4.5145348578447495e-05, "loss": 0.1836, "step": 15785 }, { "epoch": 0.28156101737238254, "grad_norm": 0.1662835329771042, "learning_rate": 4.514442682417108e-05, "loss": 0.1127, "step": 15786 }, { "epoch": 0.2815788534940962, "grad_norm": 0.36040014028549194, "learning_rate": 4.51435049918079e-05, "loss": 0.1752, "step": 15787 }, { "epoch": 0.2815966896158099, "grad_norm": 0.2832331657409668, "learning_rate": 4.514258308136156e-05, "loss": 0.1729, "step": 15788 }, { "epoch": 0.28161452573752366, "grad_norm": 0.4248417615890503, "learning_rate": 4.51416610928356e-05, "loss": 0.1724, "step": 15789 }, { "epoch": 0.28163236185923735, "grad_norm": 0.27826637029647827, "learning_rate": 4.514073902623359e-05, "loss": 0.133, "step": 15790 }, { "epoch": 0.28165019798095103, "grad_norm": 0.26783037185668945, "learning_rate": 4.5139816881559137e-05, "loss": 0.1979, "step": 15791 }, { "epoch": 0.2816680341026647, "grad_norm": 0.25952044129371643, "learning_rate": 4.513889465881579e-05, "loss": 0.1656, "step": 15792 }, { "epoch": 0.2816858702243784, "grad_norm": 0.2946554720401764, "learning_rate": 4.513797235800713e-05, "loss": 0.205, "step": 15793 }, { "epoch": 0.2817037063460921, "grad_norm": 0.32601019740104675, "learning_rate": 4.513704997913673e-05, "loss": 0.2318, "step": 15794 }, { "epoch": 0.2817215424678058, "grad_norm": 0.3385814130306244, "learning_rate": 4.513612752220818e-05, "loss": 0.1569, "step": 15795 }, { "epoch": 0.2817393785895195, "grad_norm": 0.36846330761909485, "learning_rate": 4.5135204987225044e-05, "loss": 0.1862, "step": 15796 }, { "epoch": 0.2817572147112332, "grad_norm": 0.4911366105079651, "learning_rate": 4.5134282374190896e-05, "loss": 0.1768, "step": 15797 }, { "epoch": 0.2817750508329469, "grad_norm": 0.3188064992427826, "learning_rate": 4.5133359683109316e-05, "loss": 0.2016, "step": 15798 }, { "epoch": 0.2817928869546606, "grad_norm": 0.209141805768013, "learning_rate": 4.5132436913983875e-05, "loss": 0.1693, "step": 15799 }, { "epoch": 0.2818107230763743, "grad_norm": 0.24392643570899963, "learning_rate": 4.513151406681817e-05, "loss": 0.1903, "step": 15800 }, { "epoch": 0.28182855919808797, "grad_norm": 0.3080965280532837, "learning_rate": 4.513059114161575e-05, "loss": 0.1791, "step": 15801 }, { "epoch": 0.28184639531980166, "grad_norm": 0.5254833698272705, "learning_rate": 4.512966813838021e-05, "loss": 0.2863, "step": 15802 }, { "epoch": 0.28186423144151534, "grad_norm": 0.2654857337474823, "learning_rate": 4.512874505711512e-05, "loss": 0.1607, "step": 15803 }, { "epoch": 0.28188206756322903, "grad_norm": 0.33769118785858154, "learning_rate": 4.512782189782406e-05, "loss": 0.174, "step": 15804 }, { "epoch": 0.2818999036849427, "grad_norm": 0.30186372995376587, "learning_rate": 4.512689866051062e-05, "loss": 0.1966, "step": 15805 }, { "epoch": 0.28191773980665646, "grad_norm": 0.2846188247203827, "learning_rate": 4.512597534517836e-05, "loss": 0.1708, "step": 15806 }, { "epoch": 0.28193557592837015, "grad_norm": 0.2984698414802551, "learning_rate": 4.512505195183088e-05, "loss": 0.1305, "step": 15807 }, { "epoch": 0.28195341205008384, "grad_norm": 0.27422988414764404, "learning_rate": 4.5124128480471735e-05, "loss": 0.1965, "step": 15808 }, { "epoch": 0.28197124817179753, "grad_norm": 0.24732251465320587, "learning_rate": 4.5123204931104524e-05, "loss": 0.1759, "step": 15809 }, { "epoch": 0.2819890842935112, "grad_norm": 0.24280230700969696, "learning_rate": 4.5122281303732816e-05, "loss": 0.1907, "step": 15810 }, { "epoch": 0.2820069204152249, "grad_norm": 0.2686242163181305, "learning_rate": 4.5121357598360195e-05, "loss": 0.2081, "step": 15811 }, { "epoch": 0.2820247565369386, "grad_norm": 0.2145245224237442, "learning_rate": 4.5120433814990246e-05, "loss": 0.1621, "step": 15812 }, { "epoch": 0.2820425926586523, "grad_norm": 0.300889790058136, "learning_rate": 4.511950995362655e-05, "loss": 0.1775, "step": 15813 }, { "epoch": 0.282060428780366, "grad_norm": 0.18463751673698425, "learning_rate": 4.511858601427268e-05, "loss": 0.1686, "step": 15814 }, { "epoch": 0.2820782649020797, "grad_norm": 0.28218504786491394, "learning_rate": 4.511766199693222e-05, "loss": 0.1936, "step": 15815 }, { "epoch": 0.2820961010237934, "grad_norm": 0.27262285351753235, "learning_rate": 4.511673790160875e-05, "loss": 0.1374, "step": 15816 }, { "epoch": 0.2821139371455071, "grad_norm": 0.29053056240081787, "learning_rate": 4.5115813728305865e-05, "loss": 0.138, "step": 15817 }, { "epoch": 0.2821317732672208, "grad_norm": 0.3268433213233948, "learning_rate": 4.511488947702714e-05, "loss": 0.1758, "step": 15818 }, { "epoch": 0.28214960938893446, "grad_norm": 0.3032713234424591, "learning_rate": 4.511396514777615e-05, "loss": 0.1281, "step": 15819 }, { "epoch": 0.28216744551064815, "grad_norm": 0.41434139013290405, "learning_rate": 4.511304074055648e-05, "loss": 0.1791, "step": 15820 }, { "epoch": 0.28218528163236184, "grad_norm": 0.31003856658935547, "learning_rate": 4.511211625537172e-05, "loss": 0.1785, "step": 15821 }, { "epoch": 0.2822031177540755, "grad_norm": 0.34010785818099976, "learning_rate": 4.511119169222545e-05, "loss": 0.2364, "step": 15822 }, { "epoch": 0.28222095387578927, "grad_norm": 0.2527099847793579, "learning_rate": 4.511026705112126e-05, "loss": 0.186, "step": 15823 }, { "epoch": 0.28223878999750296, "grad_norm": 0.3292764723300934, "learning_rate": 4.510934233206273e-05, "loss": 0.1783, "step": 15824 }, { "epoch": 0.28225662611921665, "grad_norm": 0.22024333477020264, "learning_rate": 4.5108417535053436e-05, "loss": 0.128, "step": 15825 }, { "epoch": 0.28227446224093033, "grad_norm": 0.24847577512264252, "learning_rate": 4.510749266009697e-05, "loss": 0.1374, "step": 15826 }, { "epoch": 0.282292298362644, "grad_norm": 0.26582664251327515, "learning_rate": 4.510656770719693e-05, "loss": 0.2296, "step": 15827 }, { "epoch": 0.2823101344843577, "grad_norm": 0.28152212500572205, "learning_rate": 4.510564267635688e-05, "loss": 0.1467, "step": 15828 }, { "epoch": 0.2823279706060714, "grad_norm": 0.31269505620002747, "learning_rate": 4.5104717567580415e-05, "loss": 0.1937, "step": 15829 }, { "epoch": 0.2823458067277851, "grad_norm": 0.2444497048854828, "learning_rate": 4.510379238087112e-05, "loss": 0.1641, "step": 15830 }, { "epoch": 0.28236364284949883, "grad_norm": 0.2732645869255066, "learning_rate": 4.5102867116232586e-05, "loss": 0.2163, "step": 15831 }, { "epoch": 0.2823814789712125, "grad_norm": 0.20664900541305542, "learning_rate": 4.5101941773668396e-05, "loss": 0.1303, "step": 15832 }, { "epoch": 0.2823993150929262, "grad_norm": 0.31389695405960083, "learning_rate": 4.510101635318213e-05, "loss": 0.1555, "step": 15833 }, { "epoch": 0.2824171512146399, "grad_norm": 0.28889691829681396, "learning_rate": 4.510009085477739e-05, "loss": 0.2385, "step": 15834 }, { "epoch": 0.2824349873363536, "grad_norm": 0.3446749448776245, "learning_rate": 4.5099165278457746e-05, "loss": 0.2041, "step": 15835 }, { "epoch": 0.28245282345806727, "grad_norm": 0.3042996823787689, "learning_rate": 4.50982396242268e-05, "loss": 0.2148, "step": 15836 }, { "epoch": 0.28247065957978096, "grad_norm": 0.2645481526851654, "learning_rate": 4.5097313892088136e-05, "loss": 0.1815, "step": 15837 }, { "epoch": 0.28248849570149465, "grad_norm": 0.3444783389568329, "learning_rate": 4.509638808204535e-05, "loss": 0.2059, "step": 15838 }, { "epoch": 0.28250633182320833, "grad_norm": 0.265800803899765, "learning_rate": 4.509546219410201e-05, "loss": 0.1775, "step": 15839 }, { "epoch": 0.2825241679449221, "grad_norm": 0.3954220414161682, "learning_rate": 4.509453622826172e-05, "loss": 0.1609, "step": 15840 }, { "epoch": 0.28254200406663577, "grad_norm": 0.27506983280181885, "learning_rate": 4.509361018452807e-05, "loss": 0.2039, "step": 15841 }, { "epoch": 0.28255984018834945, "grad_norm": 0.25239405035972595, "learning_rate": 4.509268406290465e-05, "loss": 0.183, "step": 15842 }, { "epoch": 0.28257767631006314, "grad_norm": 0.236806720495224, "learning_rate": 4.5091757863395045e-05, "loss": 0.1698, "step": 15843 }, { "epoch": 0.28259551243177683, "grad_norm": 0.24222493171691895, "learning_rate": 4.509083158600285e-05, "loss": 0.1739, "step": 15844 }, { "epoch": 0.2826133485534905, "grad_norm": 0.23812562227249146, "learning_rate": 4.508990523073164e-05, "loss": 0.163, "step": 15845 }, { "epoch": 0.2826311846752042, "grad_norm": 0.28616097569465637, "learning_rate": 4.508897879758502e-05, "loss": 0.1929, "step": 15846 }, { "epoch": 0.2826490207969179, "grad_norm": 0.24026015400886536, "learning_rate": 4.5088052286566596e-05, "loss": 0.1424, "step": 15847 }, { "epoch": 0.28266685691863164, "grad_norm": 0.2622207701206207, "learning_rate": 4.508712569767993e-05, "loss": 0.1647, "step": 15848 }, { "epoch": 0.2826846930403453, "grad_norm": 0.21435169875621796, "learning_rate": 4.5086199030928635e-05, "loss": 0.1697, "step": 15849 }, { "epoch": 0.282702529162059, "grad_norm": 0.24638047814369202, "learning_rate": 4.508527228631629e-05, "loss": 0.181, "step": 15850 }, { "epoch": 0.2827203652837727, "grad_norm": 0.26158127188682556, "learning_rate": 4.508434546384649e-05, "loss": 0.1694, "step": 15851 }, { "epoch": 0.2827382014054864, "grad_norm": 0.2531074285507202, "learning_rate": 4.508341856352283e-05, "loss": 0.1967, "step": 15852 }, { "epoch": 0.2827560375272001, "grad_norm": 0.31923919916152954, "learning_rate": 4.5082491585348904e-05, "loss": 0.2228, "step": 15853 }, { "epoch": 0.28277387364891376, "grad_norm": 0.3040517568588257, "learning_rate": 4.508156452932831e-05, "loss": 0.1731, "step": 15854 }, { "epoch": 0.28279170977062745, "grad_norm": 0.25326067209243774, "learning_rate": 4.508063739546463e-05, "loss": 0.2227, "step": 15855 }, { "epoch": 0.2828095458923412, "grad_norm": 0.26069650053977966, "learning_rate": 4.507971018376147e-05, "loss": 0.1718, "step": 15856 }, { "epoch": 0.2828273820140549, "grad_norm": 0.33046719431877136, "learning_rate": 4.507878289422242e-05, "loss": 0.2234, "step": 15857 }, { "epoch": 0.2828452181357686, "grad_norm": 0.266009658575058, "learning_rate": 4.507785552685106e-05, "loss": 0.1793, "step": 15858 }, { "epoch": 0.28286305425748226, "grad_norm": 0.20960992574691772, "learning_rate": 4.507692808165101e-05, "loss": 0.1672, "step": 15859 }, { "epoch": 0.28288089037919595, "grad_norm": 0.29292190074920654, "learning_rate": 4.507600055862584e-05, "loss": 0.1763, "step": 15860 }, { "epoch": 0.28289872650090964, "grad_norm": 0.24633224308490753, "learning_rate": 4.5075072957779166e-05, "loss": 0.1986, "step": 15861 }, { "epoch": 0.2829165626226233, "grad_norm": 0.24920177459716797, "learning_rate": 4.5074145279114574e-05, "loss": 0.1443, "step": 15862 }, { "epoch": 0.282934398744337, "grad_norm": 0.2822324335575104, "learning_rate": 4.507321752263566e-05, "loss": 0.1893, "step": 15863 }, { "epoch": 0.2829522348660507, "grad_norm": 0.27641648054122925, "learning_rate": 4.507228968834602e-05, "loss": 0.1848, "step": 15864 }, { "epoch": 0.28297007098776444, "grad_norm": 0.4224631190299988, "learning_rate": 4.5071361776249253e-05, "loss": 0.2127, "step": 15865 }, { "epoch": 0.28298790710947813, "grad_norm": 0.2558792531490326, "learning_rate": 4.5070433786348964e-05, "loss": 0.1771, "step": 15866 }, { "epoch": 0.2830057432311918, "grad_norm": 0.2531677782535553, "learning_rate": 4.506950571864873e-05, "loss": 0.1854, "step": 15867 }, { "epoch": 0.2830235793529055, "grad_norm": 0.22042444348335266, "learning_rate": 4.506857757315217e-05, "loss": 0.1594, "step": 15868 }, { "epoch": 0.2830414154746192, "grad_norm": 0.2552710473537445, "learning_rate": 4.506764934986287e-05, "loss": 0.1441, "step": 15869 }, { "epoch": 0.2830592515963329, "grad_norm": 0.27962878346443176, "learning_rate": 4.506672104878442e-05, "loss": 0.1876, "step": 15870 }, { "epoch": 0.28307708771804657, "grad_norm": 0.29624566435813904, "learning_rate": 4.5065792669920434e-05, "loss": 0.206, "step": 15871 }, { "epoch": 0.28309492383976026, "grad_norm": 0.3222978413105011, "learning_rate": 4.506486421327451e-05, "loss": 0.1811, "step": 15872 }, { "epoch": 0.283112759961474, "grad_norm": 0.2283131331205368, "learning_rate": 4.506393567885024e-05, "loss": 0.1801, "step": 15873 }, { "epoch": 0.2831305960831877, "grad_norm": 0.2531171143054962, "learning_rate": 4.506300706665122e-05, "loss": 0.178, "step": 15874 }, { "epoch": 0.2831484322049014, "grad_norm": 0.2347114235162735, "learning_rate": 4.506207837668106e-05, "loss": 0.1604, "step": 15875 }, { "epoch": 0.28316626832661507, "grad_norm": 0.26539233326911926, "learning_rate": 4.506114960894335e-05, "loss": 0.1486, "step": 15876 }, { "epoch": 0.28318410444832876, "grad_norm": 0.30372369289398193, "learning_rate": 4.5060220763441706e-05, "loss": 0.1856, "step": 15877 }, { "epoch": 0.28320194057004244, "grad_norm": 0.2657814621925354, "learning_rate": 4.5059291840179705e-05, "loss": 0.1849, "step": 15878 }, { "epoch": 0.28321977669175613, "grad_norm": 0.2818485200405121, "learning_rate": 4.5058362839160974e-05, "loss": 0.1619, "step": 15879 }, { "epoch": 0.2832376128134698, "grad_norm": 0.22816427052021027, "learning_rate": 4.505743376038909e-05, "loss": 0.1468, "step": 15880 }, { "epoch": 0.2832554489351835, "grad_norm": 0.43867549300193787, "learning_rate": 4.505650460386767e-05, "loss": 0.1784, "step": 15881 }, { "epoch": 0.28327328505689725, "grad_norm": 0.2857999801635742, "learning_rate": 4.5055575369600314e-05, "loss": 0.1998, "step": 15882 }, { "epoch": 0.28329112117861094, "grad_norm": 0.41786709427833557, "learning_rate": 4.505464605759061e-05, "loss": 0.2069, "step": 15883 }, { "epoch": 0.2833089573003246, "grad_norm": 0.32303425669670105, "learning_rate": 4.505371666784218e-05, "loss": 0.2014, "step": 15884 }, { "epoch": 0.2833267934220383, "grad_norm": 0.1937633454799652, "learning_rate": 4.505278720035862e-05, "loss": 0.1999, "step": 15885 }, { "epoch": 0.283344629543752, "grad_norm": 0.36285242438316345, "learning_rate": 4.5051857655143525e-05, "loss": 0.1679, "step": 15886 }, { "epoch": 0.2833624656654657, "grad_norm": 0.3296329975128174, "learning_rate": 4.50509280322005e-05, "loss": 0.2202, "step": 15887 }, { "epoch": 0.2833803017871794, "grad_norm": 0.27836042642593384, "learning_rate": 4.504999833153316e-05, "loss": 0.1962, "step": 15888 }, { "epoch": 0.28339813790889307, "grad_norm": 0.1825222373008728, "learning_rate": 4.50490685531451e-05, "loss": 0.16, "step": 15889 }, { "epoch": 0.2834159740306068, "grad_norm": 0.3483729660511017, "learning_rate": 4.504813869703992e-05, "loss": 0.2131, "step": 15890 }, { "epoch": 0.2834338101523205, "grad_norm": 0.2789473831653595, "learning_rate": 4.504720876322124e-05, "loss": 0.1977, "step": 15891 }, { "epoch": 0.2834516462740342, "grad_norm": 0.31971925497055054, "learning_rate": 4.504627875169265e-05, "loss": 0.2226, "step": 15892 }, { "epoch": 0.2834694823957479, "grad_norm": 0.35134172439575195, "learning_rate": 4.504534866245776e-05, "loss": 0.1759, "step": 15893 }, { "epoch": 0.28348731851746156, "grad_norm": 0.31223055720329285, "learning_rate": 4.504441849552018e-05, "loss": 0.2221, "step": 15894 }, { "epoch": 0.28350515463917525, "grad_norm": 0.24739018082618713, "learning_rate": 4.50434882508835e-05, "loss": 0.1831, "step": 15895 }, { "epoch": 0.28352299076088894, "grad_norm": 0.3248162269592285, "learning_rate": 4.504255792855134e-05, "loss": 0.1543, "step": 15896 }, { "epoch": 0.2835408268826026, "grad_norm": 0.18326567113399506, "learning_rate": 4.504162752852731e-05, "loss": 0.1481, "step": 15897 }, { "epoch": 0.28355866300431637, "grad_norm": 0.24415653944015503, "learning_rate": 4.5040697050815e-05, "loss": 0.2105, "step": 15898 }, { "epoch": 0.28357649912603006, "grad_norm": 0.27825552225112915, "learning_rate": 4.503976649541803e-05, "loss": 0.2015, "step": 15899 }, { "epoch": 0.28359433524774375, "grad_norm": 0.3033321797847748, "learning_rate": 4.503883586234001e-05, "loss": 0.2465, "step": 15900 }, { "epoch": 0.28361217136945743, "grad_norm": 0.29683420062065125, "learning_rate": 4.503790515158453e-05, "loss": 0.2302, "step": 15901 }, { "epoch": 0.2836300074911711, "grad_norm": 0.2019442319869995, "learning_rate": 4.503697436315522e-05, "loss": 0.1817, "step": 15902 }, { "epoch": 0.2836478436128848, "grad_norm": 0.23974061012268066, "learning_rate": 4.503604349705567e-05, "loss": 0.1731, "step": 15903 }, { "epoch": 0.2836656797345985, "grad_norm": 0.3597039580345154, "learning_rate": 4.5035112553289495e-05, "loss": 0.1756, "step": 15904 }, { "epoch": 0.2836835158563122, "grad_norm": 0.30218592286109924, "learning_rate": 4.50341815318603e-05, "loss": 0.1947, "step": 15905 }, { "epoch": 0.2837013519780259, "grad_norm": 0.38055992126464844, "learning_rate": 4.503325043277171e-05, "loss": 0.1545, "step": 15906 }, { "epoch": 0.2837191880997396, "grad_norm": 0.2936986982822418, "learning_rate": 4.5032319256027314e-05, "loss": 0.1945, "step": 15907 }, { "epoch": 0.2837370242214533, "grad_norm": 0.2647310793399811, "learning_rate": 4.5031388001630726e-05, "loss": 0.1562, "step": 15908 }, { "epoch": 0.283754860343167, "grad_norm": 0.23462165892124176, "learning_rate": 4.5030456669585564e-05, "loss": 0.1405, "step": 15909 }, { "epoch": 0.2837726964648807, "grad_norm": 0.20205175876617432, "learning_rate": 4.502952525989543e-05, "loss": 0.1506, "step": 15910 }, { "epoch": 0.28379053258659437, "grad_norm": 0.2899705171585083, "learning_rate": 4.502859377256395e-05, "loss": 0.1772, "step": 15911 }, { "epoch": 0.28380836870830806, "grad_norm": 0.3019639849662781, "learning_rate": 4.502766220759471e-05, "loss": 0.1599, "step": 15912 }, { "epoch": 0.28382620483002174, "grad_norm": 0.28742510080337524, "learning_rate": 4.5026730564991334e-05, "loss": 0.1881, "step": 15913 }, { "epoch": 0.28384404095173543, "grad_norm": 0.3420644998550415, "learning_rate": 4.5025798844757444e-05, "loss": 0.1596, "step": 15914 }, { "epoch": 0.2838618770734492, "grad_norm": 0.3835827708244324, "learning_rate": 4.5024867046896636e-05, "loss": 0.187, "step": 15915 }, { "epoch": 0.28387971319516286, "grad_norm": 0.2941904067993164, "learning_rate": 4.502393517141252e-05, "loss": 0.1844, "step": 15916 }, { "epoch": 0.28389754931687655, "grad_norm": 0.31132662296295166, "learning_rate": 4.502300321830872e-05, "loss": 0.1877, "step": 15917 }, { "epoch": 0.28391538543859024, "grad_norm": 0.2771371006965637, "learning_rate": 4.5022071187588854e-05, "loss": 0.1941, "step": 15918 }, { "epoch": 0.28393322156030393, "grad_norm": 0.24142910540103912, "learning_rate": 4.5021139079256515e-05, "loss": 0.1386, "step": 15919 }, { "epoch": 0.2839510576820176, "grad_norm": 0.28513261675834656, "learning_rate": 4.5020206893315325e-05, "loss": 0.1107, "step": 15920 }, { "epoch": 0.2839688938037313, "grad_norm": 0.23792928457260132, "learning_rate": 4.50192746297689e-05, "loss": 0.1243, "step": 15921 }, { "epoch": 0.283986729925445, "grad_norm": 0.20691817998886108, "learning_rate": 4.501834228862085e-05, "loss": 0.1779, "step": 15922 }, { "epoch": 0.2840045660471587, "grad_norm": 0.28103986382484436, "learning_rate": 4.5017409869874795e-05, "loss": 0.1761, "step": 15923 }, { "epoch": 0.2840224021688724, "grad_norm": 0.2802456021308899, "learning_rate": 4.501647737353434e-05, "loss": 0.1595, "step": 15924 }, { "epoch": 0.2840402382905861, "grad_norm": 0.27944374084472656, "learning_rate": 4.501554479960312e-05, "loss": 0.1916, "step": 15925 }, { "epoch": 0.2840580744122998, "grad_norm": 0.2746027112007141, "learning_rate": 4.501461214808472e-05, "loss": 0.144, "step": 15926 }, { "epoch": 0.2840759105340135, "grad_norm": 0.26997241377830505, "learning_rate": 4.501367941898277e-05, "loss": 0.1855, "step": 15927 }, { "epoch": 0.2840937466557272, "grad_norm": 0.5294459462165833, "learning_rate": 4.5012746612300896e-05, "loss": 0.2166, "step": 15928 }, { "epoch": 0.28411158277744086, "grad_norm": 0.2522684335708618, "learning_rate": 4.50118137280427e-05, "loss": 0.2079, "step": 15929 }, { "epoch": 0.28412941889915455, "grad_norm": 0.2531207203865051, "learning_rate": 4.501088076621179e-05, "loss": 0.1759, "step": 15930 }, { "epoch": 0.28414725502086824, "grad_norm": 0.2434559315443039, "learning_rate": 4.5009947726811805e-05, "loss": 0.1976, "step": 15931 }, { "epoch": 0.284165091142582, "grad_norm": 0.24981777369976044, "learning_rate": 4.5009014609846355e-05, "loss": 0.1526, "step": 15932 }, { "epoch": 0.28418292726429567, "grad_norm": 0.2622455656528473, "learning_rate": 4.500808141531905e-05, "loss": 0.1646, "step": 15933 }, { "epoch": 0.28420076338600936, "grad_norm": 0.3121867775917053, "learning_rate": 4.500714814323351e-05, "loss": 0.1749, "step": 15934 }, { "epoch": 0.28421859950772305, "grad_norm": 0.29981881380081177, "learning_rate": 4.500621479359336e-05, "loss": 0.2304, "step": 15935 }, { "epoch": 0.28423643562943673, "grad_norm": 0.34250420331954956, "learning_rate": 4.500528136640221e-05, "loss": 0.2151, "step": 15936 }, { "epoch": 0.2842542717511504, "grad_norm": 0.23781858384609222, "learning_rate": 4.5004347861663673e-05, "loss": 0.1872, "step": 15937 }, { "epoch": 0.2842721078728641, "grad_norm": 0.2146347612142563, "learning_rate": 4.500341427938137e-05, "loss": 0.191, "step": 15938 }, { "epoch": 0.2842899439945778, "grad_norm": 0.22418838739395142, "learning_rate": 4.500248061955894e-05, "loss": 0.1292, "step": 15939 }, { "epoch": 0.2843077801162915, "grad_norm": 0.3149854838848114, "learning_rate": 4.500154688219997e-05, "loss": 0.1538, "step": 15940 }, { "epoch": 0.28432561623800523, "grad_norm": 0.24618928134441376, "learning_rate": 4.500061306730811e-05, "loss": 0.1931, "step": 15941 }, { "epoch": 0.2843434523597189, "grad_norm": 0.26659172773361206, "learning_rate": 4.499967917488696e-05, "loss": 0.1544, "step": 15942 }, { "epoch": 0.2843612884814326, "grad_norm": 0.30760619044303894, "learning_rate": 4.4998745204940146e-05, "loss": 0.1881, "step": 15943 }, { "epoch": 0.2843791246031463, "grad_norm": 0.3104797601699829, "learning_rate": 4.499781115747129e-05, "loss": 0.1781, "step": 15944 }, { "epoch": 0.28439696072486, "grad_norm": 0.2967168092727661, "learning_rate": 4.499687703248401e-05, "loss": 0.1874, "step": 15945 }, { "epoch": 0.28441479684657367, "grad_norm": 0.2629615366458893, "learning_rate": 4.4995942829981926e-05, "loss": 0.1304, "step": 15946 }, { "epoch": 0.28443263296828736, "grad_norm": 0.20061103999614716, "learning_rate": 4.499500854996867e-05, "loss": 0.1746, "step": 15947 }, { "epoch": 0.28445046909000105, "grad_norm": 0.3300549387931824, "learning_rate": 4.4994074192447836e-05, "loss": 0.1365, "step": 15948 }, { "epoch": 0.2844683052117148, "grad_norm": 0.30984750390052795, "learning_rate": 4.4993139757423077e-05, "loss": 0.1467, "step": 15949 }, { "epoch": 0.2844861413334285, "grad_norm": 0.29576873779296875, "learning_rate": 4.4992205244898e-05, "loss": 0.2013, "step": 15950 }, { "epoch": 0.28450397745514217, "grad_norm": 0.2682577967643738, "learning_rate": 4.4991270654876234e-05, "loss": 0.1347, "step": 15951 }, { "epoch": 0.28452181357685585, "grad_norm": 0.3534597158432007, "learning_rate": 4.49903359873614e-05, "loss": 0.1551, "step": 15952 }, { "epoch": 0.28453964969856954, "grad_norm": 0.26593804359436035, "learning_rate": 4.498940124235711e-05, "loss": 0.1761, "step": 15953 }, { "epoch": 0.28455748582028323, "grad_norm": 0.25501754879951477, "learning_rate": 4.498846641986701e-05, "loss": 0.1961, "step": 15954 }, { "epoch": 0.2845753219419969, "grad_norm": 0.25656792521476746, "learning_rate": 4.49875315198947e-05, "loss": 0.1671, "step": 15955 }, { "epoch": 0.2845931580637106, "grad_norm": 0.338867723941803, "learning_rate": 4.498659654244381e-05, "loss": 0.2031, "step": 15956 }, { "epoch": 0.28461099418542435, "grad_norm": 0.2237289696931839, "learning_rate": 4.498566148751798e-05, "loss": 0.2255, "step": 15957 }, { "epoch": 0.28462883030713804, "grad_norm": 0.31040847301483154, "learning_rate": 4.4984726355120814e-05, "loss": 0.1375, "step": 15958 }, { "epoch": 0.2846466664288517, "grad_norm": 0.29024532437324524, "learning_rate": 4.498379114525595e-05, "loss": 0.1746, "step": 15959 }, { "epoch": 0.2846645025505654, "grad_norm": 0.2775183916091919, "learning_rate": 4.4982855857927014e-05, "loss": 0.1702, "step": 15960 }, { "epoch": 0.2846823386722791, "grad_norm": 0.27296754717826843, "learning_rate": 4.498192049313762e-05, "loss": 0.1466, "step": 15961 }, { "epoch": 0.2847001747939928, "grad_norm": 0.23766563832759857, "learning_rate": 4.498098505089141e-05, "loss": 0.1397, "step": 15962 }, { "epoch": 0.2847180109157065, "grad_norm": 0.2712920308113098, "learning_rate": 4.4980049531191993e-05, "loss": 0.2018, "step": 15963 }, { "epoch": 0.28473584703742016, "grad_norm": 0.3951716125011444, "learning_rate": 4.4979113934043004e-05, "loss": 0.2187, "step": 15964 }, { "epoch": 0.28475368315913385, "grad_norm": 0.19918771088123322, "learning_rate": 4.4978178259448064e-05, "loss": 0.1585, "step": 15965 }, { "epoch": 0.2847715192808476, "grad_norm": 0.3238455653190613, "learning_rate": 4.497724250741081e-05, "loss": 0.2022, "step": 15966 }, { "epoch": 0.2847893554025613, "grad_norm": 0.2711409330368042, "learning_rate": 4.497630667793486e-05, "loss": 0.2327, "step": 15967 }, { "epoch": 0.28480719152427497, "grad_norm": 0.3973959982395172, "learning_rate": 4.497537077102386e-05, "loss": 0.1422, "step": 15968 }, { "epoch": 0.28482502764598866, "grad_norm": 0.2645553946495056, "learning_rate": 4.497443478668141e-05, "loss": 0.1579, "step": 15969 }, { "epoch": 0.28484286376770235, "grad_norm": 0.2623996138572693, "learning_rate": 4.497349872491116e-05, "loss": 0.1766, "step": 15970 }, { "epoch": 0.28486069988941604, "grad_norm": 0.2854841947555542, "learning_rate": 4.497256258571672e-05, "loss": 0.2177, "step": 15971 }, { "epoch": 0.2848785360111297, "grad_norm": 0.23708291351795197, "learning_rate": 4.497162636910174e-05, "loss": 0.1779, "step": 15972 }, { "epoch": 0.2848963721328434, "grad_norm": 0.25569671392440796, "learning_rate": 4.497069007506983e-05, "loss": 0.1841, "step": 15973 }, { "epoch": 0.28491420825455716, "grad_norm": 0.2253493219614029, "learning_rate": 4.496975370362463e-05, "loss": 0.1947, "step": 15974 }, { "epoch": 0.28493204437627084, "grad_norm": 0.38804617524147034, "learning_rate": 4.4968817254769766e-05, "loss": 0.1749, "step": 15975 }, { "epoch": 0.28494988049798453, "grad_norm": 0.3247669041156769, "learning_rate": 4.496788072850887e-05, "loss": 0.2042, "step": 15976 }, { "epoch": 0.2849677166196982, "grad_norm": 0.6287062168121338, "learning_rate": 4.496694412484558e-05, "loss": 0.2862, "step": 15977 }, { "epoch": 0.2849855527414119, "grad_norm": 0.3077385425567627, "learning_rate": 4.496600744378351e-05, "loss": 0.1751, "step": 15978 }, { "epoch": 0.2850033888631256, "grad_norm": 0.2535353899002075, "learning_rate": 4.49650706853263e-05, "loss": 0.1449, "step": 15979 }, { "epoch": 0.2850212249848393, "grad_norm": 0.24810250103473663, "learning_rate": 4.496413384947758e-05, "loss": 0.2034, "step": 15980 }, { "epoch": 0.28503906110655297, "grad_norm": 0.2122296690940857, "learning_rate": 4.496319693624098e-05, "loss": 0.1753, "step": 15981 }, { "epoch": 0.28505689722826666, "grad_norm": 0.29140132665634155, "learning_rate": 4.496225994562013e-05, "loss": 0.1273, "step": 15982 }, { "epoch": 0.2850747333499804, "grad_norm": 0.1990610808134079, "learning_rate": 4.4961322877618676e-05, "loss": 0.1636, "step": 15983 }, { "epoch": 0.2850925694716941, "grad_norm": 0.2398446500301361, "learning_rate": 4.496038573224024e-05, "loss": 0.1714, "step": 15984 }, { "epoch": 0.2851104055934078, "grad_norm": 0.2520081400871277, "learning_rate": 4.495944850948845e-05, "loss": 0.1854, "step": 15985 }, { "epoch": 0.28512824171512147, "grad_norm": 0.25864988565444946, "learning_rate": 4.4958511209366944e-05, "loss": 0.187, "step": 15986 }, { "epoch": 0.28514607783683515, "grad_norm": 0.3334422707557678, "learning_rate": 4.4957573831879356e-05, "loss": 0.1846, "step": 15987 }, { "epoch": 0.28516391395854884, "grad_norm": 0.24120378494262695, "learning_rate": 4.4956636377029314e-05, "loss": 0.1847, "step": 15988 }, { "epoch": 0.28518175008026253, "grad_norm": 0.31107571721076965, "learning_rate": 4.4955698844820465e-05, "loss": 0.2415, "step": 15989 }, { "epoch": 0.2851995862019762, "grad_norm": 0.4167276620864868, "learning_rate": 4.4954761235256434e-05, "loss": 0.1459, "step": 15990 }, { "epoch": 0.28521742232368996, "grad_norm": 0.2610337436199188, "learning_rate": 4.4953823548340845e-05, "loss": 0.1661, "step": 15991 }, { "epoch": 0.28523525844540365, "grad_norm": 0.4616617262363434, "learning_rate": 4.495288578407736e-05, "loss": 0.1934, "step": 15992 }, { "epoch": 0.28525309456711734, "grad_norm": 0.26704883575439453, "learning_rate": 4.495194794246959e-05, "loss": 0.1524, "step": 15993 }, { "epoch": 0.285270930688831, "grad_norm": 0.3242391347885132, "learning_rate": 4.495101002352118e-05, "loss": 0.1772, "step": 15994 }, { "epoch": 0.2852887668105447, "grad_norm": 0.24882343411445618, "learning_rate": 4.4950072027235753e-05, "loss": 0.165, "step": 15995 }, { "epoch": 0.2853066029322584, "grad_norm": 0.21131683886051178, "learning_rate": 4.494913395361697e-05, "loss": 0.1413, "step": 15996 }, { "epoch": 0.2853244390539721, "grad_norm": 0.2591877281665802, "learning_rate": 4.4948195802668456e-05, "loss": 0.1668, "step": 15997 }, { "epoch": 0.2853422751756858, "grad_norm": 0.31166186928749084, "learning_rate": 4.4947257574393836e-05, "loss": 0.2193, "step": 15998 }, { "epoch": 0.28536011129739947, "grad_norm": 0.2110806405544281, "learning_rate": 4.494631926879676e-05, "loss": 0.1728, "step": 15999 }, { "epoch": 0.2853779474191132, "grad_norm": 0.2495652586221695, "learning_rate": 4.4945380885880863e-05, "loss": 0.189, "step": 16000 }, { "epoch": 0.2853779474191132, "eval_loss": 0.17411202192306519, "eval_runtime": 107.1863, "eval_samples_per_second": 9.553, "eval_steps_per_second": 1.595, "step": 16000 }, { "epoch": 0.2853957835408269, "grad_norm": 0.3134092092514038, "learning_rate": 4.4944442425649775e-05, "loss": 0.213, "step": 16001 }, { "epoch": 0.2854136196625406, "grad_norm": 0.29065507650375366, "learning_rate": 4.494350388810714e-05, "loss": 0.1983, "step": 16002 }, { "epoch": 0.2854314557842543, "grad_norm": 0.25940176844596863, "learning_rate": 4.49425652732566e-05, "loss": 0.1747, "step": 16003 }, { "epoch": 0.28544929190596796, "grad_norm": 0.35320574045181274, "learning_rate": 4.494162658110179e-05, "loss": 0.1682, "step": 16004 }, { "epoch": 0.28546712802768165, "grad_norm": 0.3480782210826874, "learning_rate": 4.494068781164634e-05, "loss": 0.2342, "step": 16005 }, { "epoch": 0.28548496414939534, "grad_norm": 0.19702798128128052, "learning_rate": 4.49397489648939e-05, "loss": 0.1505, "step": 16006 }, { "epoch": 0.285502800271109, "grad_norm": 0.3501552641391754, "learning_rate": 4.4938810040848115e-05, "loss": 0.2263, "step": 16007 }, { "epoch": 0.28552063639282277, "grad_norm": 0.2384253889322281, "learning_rate": 4.4937871039512606e-05, "loss": 0.1625, "step": 16008 }, { "epoch": 0.28553847251453646, "grad_norm": 0.31768542528152466, "learning_rate": 4.4936931960891026e-05, "loss": 0.1873, "step": 16009 }, { "epoch": 0.28555630863625014, "grad_norm": 0.2659898102283478, "learning_rate": 4.4935992804987014e-05, "loss": 0.181, "step": 16010 }, { "epoch": 0.28557414475796383, "grad_norm": 0.25258708000183105, "learning_rate": 4.49350535718042e-05, "loss": 0.1571, "step": 16011 }, { "epoch": 0.2855919808796775, "grad_norm": 0.35919901728630066, "learning_rate": 4.493411426134624e-05, "loss": 0.1783, "step": 16012 }, { "epoch": 0.2856098170013912, "grad_norm": 0.23456792533397675, "learning_rate": 4.4933174873616766e-05, "loss": 0.148, "step": 16013 }, { "epoch": 0.2856276531231049, "grad_norm": 0.29800763726234436, "learning_rate": 4.4932235408619426e-05, "loss": 0.1768, "step": 16014 }, { "epoch": 0.2856454892448186, "grad_norm": 0.30805203318595886, "learning_rate": 4.493129586635785e-05, "loss": 0.1925, "step": 16015 }, { "epoch": 0.28566332536653233, "grad_norm": 0.2858664095401764, "learning_rate": 4.493035624683569e-05, "loss": 0.2074, "step": 16016 }, { "epoch": 0.285681161488246, "grad_norm": 0.2952503263950348, "learning_rate": 4.492941655005658e-05, "loss": 0.1275, "step": 16017 }, { "epoch": 0.2856989976099597, "grad_norm": 0.24600088596343994, "learning_rate": 4.492847677602418e-05, "loss": 0.1842, "step": 16018 }, { "epoch": 0.2857168337316734, "grad_norm": 0.292516827583313, "learning_rate": 4.492753692474211e-05, "loss": 0.1338, "step": 16019 }, { "epoch": 0.2857346698533871, "grad_norm": 0.29206234216690063, "learning_rate": 4.492659699621403e-05, "loss": 0.1858, "step": 16020 }, { "epoch": 0.28575250597510077, "grad_norm": 0.23379239439964294, "learning_rate": 4.4925656990443576e-05, "loss": 0.166, "step": 16021 }, { "epoch": 0.28577034209681446, "grad_norm": 0.3362060785293579, "learning_rate": 4.4924716907434397e-05, "loss": 0.1836, "step": 16022 }, { "epoch": 0.28578817821852814, "grad_norm": 0.3493463099002838, "learning_rate": 4.4923776747190124e-05, "loss": 0.2229, "step": 16023 }, { "epoch": 0.28580601434024183, "grad_norm": 0.25529617071151733, "learning_rate": 4.4922836509714424e-05, "loss": 0.1619, "step": 16024 }, { "epoch": 0.2858238504619556, "grad_norm": 0.29880794882774353, "learning_rate": 4.492189619501093e-05, "loss": 0.2155, "step": 16025 }, { "epoch": 0.28584168658366926, "grad_norm": 0.23340637981891632, "learning_rate": 4.492095580308327e-05, "loss": 0.1734, "step": 16026 }, { "epoch": 0.28585952270538295, "grad_norm": 0.29430294036865234, "learning_rate": 4.4920015333935114e-05, "loss": 0.1968, "step": 16027 }, { "epoch": 0.28587735882709664, "grad_norm": 0.249177947640419, "learning_rate": 4.491907478757009e-05, "loss": 0.1598, "step": 16028 }, { "epoch": 0.2858951949488103, "grad_norm": 0.23493894934654236, "learning_rate": 4.4918134163991866e-05, "loss": 0.1939, "step": 16029 }, { "epoch": 0.285913031070524, "grad_norm": 0.23006486892700195, "learning_rate": 4.4917193463204065e-05, "loss": 0.1708, "step": 16030 }, { "epoch": 0.2859308671922377, "grad_norm": 0.39275267720222473, "learning_rate": 4.491625268521035e-05, "loss": 0.2042, "step": 16031 }, { "epoch": 0.2859487033139514, "grad_norm": 0.25420838594436646, "learning_rate": 4.491531183001435e-05, "loss": 0.1773, "step": 16032 }, { "epoch": 0.28596653943566513, "grad_norm": 0.2234090268611908, "learning_rate": 4.491437089761973e-05, "loss": 0.1402, "step": 16033 }, { "epoch": 0.2859843755573788, "grad_norm": 0.3079547882080078, "learning_rate": 4.491342988803013e-05, "loss": 0.1741, "step": 16034 }, { "epoch": 0.2860022116790925, "grad_norm": 0.31431812047958374, "learning_rate": 4.49124888012492e-05, "loss": 0.153, "step": 16035 }, { "epoch": 0.2860200478008062, "grad_norm": 0.18872922658920288, "learning_rate": 4.491154763728058e-05, "loss": 0.1713, "step": 16036 }, { "epoch": 0.2860378839225199, "grad_norm": 0.44715237617492676, "learning_rate": 4.491060639612793e-05, "loss": 0.2204, "step": 16037 }, { "epoch": 0.2860557200442336, "grad_norm": 0.23845680058002472, "learning_rate": 4.490966507779488e-05, "loss": 0.1751, "step": 16038 }, { "epoch": 0.28607355616594726, "grad_norm": 0.2513500154018402, "learning_rate": 4.49087236822851e-05, "loss": 0.1917, "step": 16039 }, { "epoch": 0.28609139228766095, "grad_norm": 0.24431303143501282, "learning_rate": 4.4907782209602234e-05, "loss": 0.1629, "step": 16040 }, { "epoch": 0.28610922840937464, "grad_norm": 0.27447381615638733, "learning_rate": 4.490684065974993e-05, "loss": 0.1129, "step": 16041 }, { "epoch": 0.2861270645310884, "grad_norm": 0.2231118232011795, "learning_rate": 4.490589903273184e-05, "loss": 0.1583, "step": 16042 }, { "epoch": 0.28614490065280207, "grad_norm": 0.36416152119636536, "learning_rate": 4.490495732855159e-05, "loss": 0.1911, "step": 16043 }, { "epoch": 0.28616273677451576, "grad_norm": 0.25639608502388, "learning_rate": 4.4904015547212866e-05, "loss": 0.206, "step": 16044 }, { "epoch": 0.28618057289622945, "grad_norm": 0.25020232796669006, "learning_rate": 4.49030736887193e-05, "loss": 0.148, "step": 16045 }, { "epoch": 0.28619840901794313, "grad_norm": 0.29758134484291077, "learning_rate": 4.490213175307455e-05, "loss": 0.2019, "step": 16046 }, { "epoch": 0.2862162451396568, "grad_norm": 0.20389901101589203, "learning_rate": 4.490118974028226e-05, "loss": 0.1575, "step": 16047 }, { "epoch": 0.2862340812613705, "grad_norm": 0.22760283946990967, "learning_rate": 4.490024765034608e-05, "loss": 0.1788, "step": 16048 }, { "epoch": 0.2862519173830842, "grad_norm": 0.24378296732902527, "learning_rate": 4.4899305483269673e-05, "loss": 0.1735, "step": 16049 }, { "epoch": 0.28626975350479794, "grad_norm": 0.3251751959323883, "learning_rate": 4.489836323905668e-05, "loss": 0.1411, "step": 16050 }, { "epoch": 0.28628758962651163, "grad_norm": 0.22391058504581451, "learning_rate": 4.489742091771076e-05, "loss": 0.1652, "step": 16051 }, { "epoch": 0.2863054257482253, "grad_norm": 0.26425278186798096, "learning_rate": 4.489647851923557e-05, "loss": 0.216, "step": 16052 }, { "epoch": 0.286323261869939, "grad_norm": 0.24013806879520416, "learning_rate": 4.489553604363475e-05, "loss": 0.1704, "step": 16053 }, { "epoch": 0.2863410979916527, "grad_norm": 0.2567487061023712, "learning_rate": 4.489459349091196e-05, "loss": 0.2114, "step": 16054 }, { "epoch": 0.2863589341133664, "grad_norm": 0.2368803471326828, "learning_rate": 4.4893650861070855e-05, "loss": 0.1473, "step": 16055 }, { "epoch": 0.28637677023508007, "grad_norm": 0.235275000333786, "learning_rate": 4.489270815411509e-05, "loss": 0.1667, "step": 16056 }, { "epoch": 0.28639460635679376, "grad_norm": 0.22388041019439697, "learning_rate": 4.489176537004832e-05, "loss": 0.1786, "step": 16057 }, { "epoch": 0.2864124424785075, "grad_norm": 0.2587563991546631, "learning_rate": 4.48908225088742e-05, "loss": 0.1922, "step": 16058 }, { "epoch": 0.2864302786002212, "grad_norm": 0.2618034780025482, "learning_rate": 4.488987957059638e-05, "loss": 0.1584, "step": 16059 }, { "epoch": 0.2864481147219349, "grad_norm": 0.6321316957473755, "learning_rate": 4.488893655521851e-05, "loss": 0.1972, "step": 16060 }, { "epoch": 0.28646595084364856, "grad_norm": 0.254079669713974, "learning_rate": 4.488799346274426e-05, "loss": 0.1788, "step": 16061 }, { "epoch": 0.28648378696536225, "grad_norm": 0.2672015130519867, "learning_rate": 4.488705029317727e-05, "loss": 0.1884, "step": 16062 }, { "epoch": 0.28650162308707594, "grad_norm": 0.2810853123664856, "learning_rate": 4.488610704652121e-05, "loss": 0.1715, "step": 16063 }, { "epoch": 0.28651945920878963, "grad_norm": 0.31855452060699463, "learning_rate": 4.488516372277973e-05, "loss": 0.2235, "step": 16064 }, { "epoch": 0.2865372953305033, "grad_norm": 0.2011541724205017, "learning_rate": 4.4884220321956486e-05, "loss": 0.1828, "step": 16065 }, { "epoch": 0.286555131452217, "grad_norm": 0.23755288124084473, "learning_rate": 4.4883276844055144e-05, "loss": 0.1869, "step": 16066 }, { "epoch": 0.28657296757393075, "grad_norm": 0.2659914791584015, "learning_rate": 4.488233328907935e-05, "loss": 0.2029, "step": 16067 }, { "epoch": 0.28659080369564444, "grad_norm": 0.334911972284317, "learning_rate": 4.4881389657032754e-05, "loss": 0.175, "step": 16068 }, { "epoch": 0.2866086398173581, "grad_norm": 0.22663113474845886, "learning_rate": 4.488044594791904e-05, "loss": 0.1851, "step": 16069 }, { "epoch": 0.2866264759390718, "grad_norm": 0.1934032142162323, "learning_rate": 4.487950216174184e-05, "loss": 0.1067, "step": 16070 }, { "epoch": 0.2866443120607855, "grad_norm": 0.34028154611587524, "learning_rate": 4.4878558298504825e-05, "loss": 0.1625, "step": 16071 }, { "epoch": 0.2866621481824992, "grad_norm": 0.23633654415607452, "learning_rate": 4.4877614358211653e-05, "loss": 0.1582, "step": 16072 }, { "epoch": 0.2866799843042129, "grad_norm": 0.2550041377544403, "learning_rate": 4.487667034086599e-05, "loss": 0.1841, "step": 16073 }, { "epoch": 0.28669782042592656, "grad_norm": 0.2965671718120575, "learning_rate": 4.4875726246471476e-05, "loss": 0.1795, "step": 16074 }, { "epoch": 0.2867156565476403, "grad_norm": 0.3106358051300049, "learning_rate": 4.487478207503179e-05, "loss": 0.1938, "step": 16075 }, { "epoch": 0.286733492669354, "grad_norm": 0.2519557476043701, "learning_rate": 4.487383782655058e-05, "loss": 0.1653, "step": 16076 }, { "epoch": 0.2867513287910677, "grad_norm": 0.2858811318874359, "learning_rate": 4.487289350103151e-05, "loss": 0.1887, "step": 16077 }, { "epoch": 0.28676916491278137, "grad_norm": 0.20570501685142517, "learning_rate": 4.4871949098478246e-05, "loss": 0.1403, "step": 16078 }, { "epoch": 0.28678700103449506, "grad_norm": 0.23417896032333374, "learning_rate": 4.4871004618894444e-05, "loss": 0.1596, "step": 16079 }, { "epoch": 0.28680483715620875, "grad_norm": 0.2807213068008423, "learning_rate": 4.4870060062283755e-05, "loss": 0.1617, "step": 16080 }, { "epoch": 0.28682267327792244, "grad_norm": 0.21266649663448334, "learning_rate": 4.486911542864986e-05, "loss": 0.1935, "step": 16081 }, { "epoch": 0.2868405093996361, "grad_norm": 0.30692699551582336, "learning_rate": 4.4868170717996405e-05, "loss": 0.2132, "step": 16082 }, { "epoch": 0.2868583455213498, "grad_norm": 0.2792120575904846, "learning_rate": 4.486722593032706e-05, "loss": 0.1397, "step": 16083 }, { "epoch": 0.28687618164306355, "grad_norm": 0.2881179451942444, "learning_rate": 4.486628106564549e-05, "loss": 0.1825, "step": 16084 }, { "epoch": 0.28689401776477724, "grad_norm": 0.2929091453552246, "learning_rate": 4.486533612395535e-05, "loss": 0.1632, "step": 16085 }, { "epoch": 0.28691185388649093, "grad_norm": 0.2305385321378708, "learning_rate": 4.486439110526031e-05, "loss": 0.1824, "step": 16086 }, { "epoch": 0.2869296900082046, "grad_norm": 0.29803115129470825, "learning_rate": 4.486344600956402e-05, "loss": 0.1523, "step": 16087 }, { "epoch": 0.2869475261299183, "grad_norm": 0.25572335720062256, "learning_rate": 4.486250083687016e-05, "loss": 0.1771, "step": 16088 }, { "epoch": 0.286965362251632, "grad_norm": 0.24149270355701447, "learning_rate": 4.486155558718238e-05, "loss": 0.1915, "step": 16089 }, { "epoch": 0.2869831983733457, "grad_norm": 0.2909892797470093, "learning_rate": 4.4860610260504356e-05, "loss": 0.1773, "step": 16090 }, { "epoch": 0.28700103449505937, "grad_norm": 0.24814195930957794, "learning_rate": 4.485966485683975e-05, "loss": 0.1479, "step": 16091 }, { "epoch": 0.2870188706167731, "grad_norm": 0.3109689950942993, "learning_rate": 4.485871937619222e-05, "loss": 0.1627, "step": 16092 }, { "epoch": 0.2870367067384868, "grad_norm": 0.25675228238105774, "learning_rate": 4.4857773818565426e-05, "loss": 0.1865, "step": 16093 }, { "epoch": 0.2870545428602005, "grad_norm": 0.2775726914405823, "learning_rate": 4.485682818396305e-05, "loss": 0.216, "step": 16094 }, { "epoch": 0.2870723789819142, "grad_norm": 0.22096911072731018, "learning_rate": 4.485588247238875e-05, "loss": 0.164, "step": 16095 }, { "epoch": 0.28709021510362787, "grad_norm": 0.3239266872406006, "learning_rate": 4.485493668384619e-05, "loss": 0.1526, "step": 16096 }, { "epoch": 0.28710805122534155, "grad_norm": 0.22390705347061157, "learning_rate": 4.4853990818339036e-05, "loss": 0.1629, "step": 16097 }, { "epoch": 0.28712588734705524, "grad_norm": 0.21042995154857635, "learning_rate": 4.4853044875870956e-05, "loss": 0.1609, "step": 16098 }, { "epoch": 0.28714372346876893, "grad_norm": 0.2583172917366028, "learning_rate": 4.485209885644562e-05, "loss": 0.2085, "step": 16099 }, { "epoch": 0.2871615595904826, "grad_norm": 0.4621339440345764, "learning_rate": 4.4851152760066696e-05, "loss": 0.1751, "step": 16100 }, { "epoch": 0.28717939571219636, "grad_norm": 0.27855831384658813, "learning_rate": 4.485020658673784e-05, "loss": 0.1377, "step": 16101 }, { "epoch": 0.28719723183391005, "grad_norm": 0.31874874234199524, "learning_rate": 4.484926033646273e-05, "loss": 0.196, "step": 16102 }, { "epoch": 0.28721506795562374, "grad_norm": 0.21516236662864685, "learning_rate": 4.484831400924503e-05, "loss": 0.1759, "step": 16103 }, { "epoch": 0.2872329040773374, "grad_norm": 0.2575733959674835, "learning_rate": 4.484736760508842e-05, "loss": 0.1793, "step": 16104 }, { "epoch": 0.2872507401990511, "grad_norm": 0.33717402815818787, "learning_rate": 4.4846421123996546e-05, "loss": 0.1958, "step": 16105 }, { "epoch": 0.2872685763207648, "grad_norm": 0.1957182139158249, "learning_rate": 4.484547456597309e-05, "loss": 0.1361, "step": 16106 }, { "epoch": 0.2872864124424785, "grad_norm": 0.3578245937824249, "learning_rate": 4.484452793102173e-05, "loss": 0.1373, "step": 16107 }, { "epoch": 0.2873042485641922, "grad_norm": 0.28512781858444214, "learning_rate": 4.484358121914611e-05, "loss": 0.1987, "step": 16108 }, { "epoch": 0.2873220846859059, "grad_norm": 0.21096031367778778, "learning_rate": 4.4842634430349925e-05, "loss": 0.1641, "step": 16109 }, { "epoch": 0.2873399208076196, "grad_norm": 0.29081612825393677, "learning_rate": 4.484168756463684e-05, "loss": 0.1778, "step": 16110 }, { "epoch": 0.2873577569293333, "grad_norm": 0.28361403942108154, "learning_rate": 4.4840740622010515e-05, "loss": 0.1702, "step": 16111 }, { "epoch": 0.287375593051047, "grad_norm": 0.1937485784292221, "learning_rate": 4.4839793602474625e-05, "loss": 0.1301, "step": 16112 }, { "epoch": 0.2873934291727607, "grad_norm": 0.2616284489631653, "learning_rate": 4.483884650603284e-05, "loss": 0.1587, "step": 16113 }, { "epoch": 0.28741126529447436, "grad_norm": 0.2699330449104309, "learning_rate": 4.4837899332688836e-05, "loss": 0.1927, "step": 16114 }, { "epoch": 0.28742910141618805, "grad_norm": 0.34607216715812683, "learning_rate": 4.483695208244629e-05, "loss": 0.1709, "step": 16115 }, { "epoch": 0.28744693753790174, "grad_norm": 0.28219762444496155, "learning_rate": 4.483600475530886e-05, "loss": 0.1984, "step": 16116 }, { "epoch": 0.2874647736596155, "grad_norm": 0.2999812662601471, "learning_rate": 4.483505735128023e-05, "loss": 0.2003, "step": 16117 }, { "epoch": 0.28748260978132917, "grad_norm": 0.2562468647956848, "learning_rate": 4.483410987036406e-05, "loss": 0.138, "step": 16118 }, { "epoch": 0.28750044590304286, "grad_norm": 0.2881334125995636, "learning_rate": 4.4833162312564033e-05, "loss": 0.2223, "step": 16119 }, { "epoch": 0.28751828202475654, "grad_norm": 0.3034203052520752, "learning_rate": 4.483221467788381e-05, "loss": 0.2634, "step": 16120 }, { "epoch": 0.28753611814647023, "grad_norm": 0.2534237802028656, "learning_rate": 4.483126696632708e-05, "loss": 0.156, "step": 16121 }, { "epoch": 0.2875539542681839, "grad_norm": 0.30473223328590393, "learning_rate": 4.4830319177897514e-05, "loss": 0.1968, "step": 16122 }, { "epoch": 0.2875717903898976, "grad_norm": 0.216000497341156, "learning_rate": 4.482937131259878e-05, "loss": 0.1824, "step": 16123 }, { "epoch": 0.2875896265116113, "grad_norm": 0.27592262625694275, "learning_rate": 4.482842337043455e-05, "loss": 0.1582, "step": 16124 }, { "epoch": 0.287607462633325, "grad_norm": 0.37166082859039307, "learning_rate": 4.482747535140851e-05, "loss": 0.1667, "step": 16125 }, { "epoch": 0.2876252987550387, "grad_norm": 0.2891044616699219, "learning_rate": 4.482652725552432e-05, "loss": 0.1693, "step": 16126 }, { "epoch": 0.2876431348767524, "grad_norm": 0.34204724431037903, "learning_rate": 4.482557908278566e-05, "loss": 0.1022, "step": 16127 }, { "epoch": 0.2876609709984661, "grad_norm": 0.3074796497821808, "learning_rate": 4.482463083319621e-05, "loss": 0.1205, "step": 16128 }, { "epoch": 0.2876788071201798, "grad_norm": 0.32623523473739624, "learning_rate": 4.4823682506759646e-05, "loss": 0.1947, "step": 16129 }, { "epoch": 0.2876966432418935, "grad_norm": 0.3176324963569641, "learning_rate": 4.4822734103479643e-05, "loss": 0.1607, "step": 16130 }, { "epoch": 0.28771447936360717, "grad_norm": 0.30377528071403503, "learning_rate": 4.482178562335988e-05, "loss": 0.166, "step": 16131 }, { "epoch": 0.28773231548532086, "grad_norm": 0.22573193907737732, "learning_rate": 4.482083706640402e-05, "loss": 0.1679, "step": 16132 }, { "epoch": 0.28775015160703454, "grad_norm": 0.30298227071762085, "learning_rate": 4.481988843261575e-05, "loss": 0.1517, "step": 16133 }, { "epoch": 0.2877679877287483, "grad_norm": 0.32469886541366577, "learning_rate": 4.4818939721998754e-05, "loss": 0.1794, "step": 16134 }, { "epoch": 0.287785823850462, "grad_norm": 0.2826855778694153, "learning_rate": 4.4817990934556695e-05, "loss": 0.1383, "step": 16135 }, { "epoch": 0.28780365997217566, "grad_norm": 0.2807585895061493, "learning_rate": 4.481704207029327e-05, "loss": 0.1975, "step": 16136 }, { "epoch": 0.28782149609388935, "grad_norm": 0.20302768051624298, "learning_rate": 4.481609312921213e-05, "loss": 0.1598, "step": 16137 }, { "epoch": 0.28783933221560304, "grad_norm": 0.29405003786087036, "learning_rate": 4.4815144111316986e-05, "loss": 0.1407, "step": 16138 }, { "epoch": 0.2878571683373167, "grad_norm": 0.25270095467567444, "learning_rate": 4.481419501661149e-05, "loss": 0.2086, "step": 16139 }, { "epoch": 0.2878750044590304, "grad_norm": 0.26641079783439636, "learning_rate": 4.481324584509933e-05, "loss": 0.1359, "step": 16140 }, { "epoch": 0.2878928405807441, "grad_norm": 0.2650429606437683, "learning_rate": 4.4812296596784185e-05, "loss": 0.1878, "step": 16141 }, { "epoch": 0.2879106767024578, "grad_norm": 0.3187381327152252, "learning_rate": 4.4811347271669735e-05, "loss": 0.1534, "step": 16142 }, { "epoch": 0.28792851282417153, "grad_norm": 0.25793227553367615, "learning_rate": 4.481039786975967e-05, "loss": 0.1866, "step": 16143 }, { "epoch": 0.2879463489458852, "grad_norm": 0.29353317618370056, "learning_rate": 4.4809448391057646e-05, "loss": 0.1588, "step": 16144 }, { "epoch": 0.2879641850675989, "grad_norm": 0.3036663234233856, "learning_rate": 4.480849883556737e-05, "loss": 0.1698, "step": 16145 }, { "epoch": 0.2879820211893126, "grad_norm": 0.23433850705623627, "learning_rate": 4.4807549203292496e-05, "loss": 0.2282, "step": 16146 }, { "epoch": 0.2879998573110263, "grad_norm": 0.21231277287006378, "learning_rate": 4.4806599494236735e-05, "loss": 0.171, "step": 16147 }, { "epoch": 0.28801769343274, "grad_norm": 0.24991388618946075, "learning_rate": 4.480564970840375e-05, "loss": 0.178, "step": 16148 }, { "epoch": 0.28803552955445366, "grad_norm": 0.28997746109962463, "learning_rate": 4.4804699845797227e-05, "loss": 0.1636, "step": 16149 }, { "epoch": 0.28805336567616735, "grad_norm": 0.2965392470359802, "learning_rate": 4.4803749906420846e-05, "loss": 0.1616, "step": 16150 }, { "epoch": 0.2880712017978811, "grad_norm": 0.21225705742835999, "learning_rate": 4.480279989027828e-05, "loss": 0.1572, "step": 16151 }, { "epoch": 0.2880890379195948, "grad_norm": 0.32420283555984497, "learning_rate": 4.480184979737323e-05, "loss": 0.1416, "step": 16152 }, { "epoch": 0.28810687404130847, "grad_norm": 0.30400407314300537, "learning_rate": 4.4800899627709375e-05, "loss": 0.191, "step": 16153 }, { "epoch": 0.28812471016302216, "grad_norm": 0.26624125242233276, "learning_rate": 4.479994938129039e-05, "loss": 0.1799, "step": 16154 }, { "epoch": 0.28814254628473585, "grad_norm": 0.21302437782287598, "learning_rate": 4.479899905811996e-05, "loss": 0.1694, "step": 16155 }, { "epoch": 0.28816038240644953, "grad_norm": 0.2593947649002075, "learning_rate": 4.4798048658201776e-05, "loss": 0.1931, "step": 16156 }, { "epoch": 0.2881782185281632, "grad_norm": 0.2870843708515167, "learning_rate": 4.479709818153952e-05, "loss": 0.2059, "step": 16157 }, { "epoch": 0.2881960546498769, "grad_norm": 0.21844667196273804, "learning_rate": 4.4796147628136864e-05, "loss": 0.1499, "step": 16158 }, { "epoch": 0.28821389077159065, "grad_norm": 0.27017688751220703, "learning_rate": 4.479519699799751e-05, "loss": 0.1787, "step": 16159 }, { "epoch": 0.28823172689330434, "grad_norm": 0.27435439825057983, "learning_rate": 4.4794246291125134e-05, "loss": 0.1957, "step": 16160 }, { "epoch": 0.28824956301501803, "grad_norm": 0.26444387435913086, "learning_rate": 4.479329550752342e-05, "loss": 0.1461, "step": 16161 }, { "epoch": 0.2882673991367317, "grad_norm": 0.3331790864467621, "learning_rate": 4.4792344647196064e-05, "loss": 0.1208, "step": 16162 }, { "epoch": 0.2882852352584454, "grad_norm": 0.48470979928970337, "learning_rate": 4.479139371014673e-05, "loss": 0.158, "step": 16163 }, { "epoch": 0.2883030713801591, "grad_norm": 0.4501863420009613, "learning_rate": 4.479044269637913e-05, "loss": 0.1614, "step": 16164 }, { "epoch": 0.2883209075018728, "grad_norm": 0.23002241551876068, "learning_rate": 4.4789491605896935e-05, "loss": 0.1716, "step": 16165 }, { "epoch": 0.28833874362358647, "grad_norm": 0.3586452901363373, "learning_rate": 4.478854043870384e-05, "loss": 0.1334, "step": 16166 }, { "epoch": 0.28835657974530016, "grad_norm": 0.3058027923107147, "learning_rate": 4.478758919480352e-05, "loss": 0.1636, "step": 16167 }, { "epoch": 0.2883744158670139, "grad_norm": 0.28845784068107605, "learning_rate": 4.4786637874199676e-05, "loss": 0.1559, "step": 16168 }, { "epoch": 0.2883922519887276, "grad_norm": 0.22414234280586243, "learning_rate": 4.4785686476895984e-05, "loss": 0.1958, "step": 16169 }, { "epoch": 0.2884100881104413, "grad_norm": 0.2731773257255554, "learning_rate": 4.478473500289614e-05, "loss": 0.1979, "step": 16170 }, { "epoch": 0.28842792423215496, "grad_norm": 0.2643968462944031, "learning_rate": 4.478378345220383e-05, "loss": 0.1899, "step": 16171 }, { "epoch": 0.28844576035386865, "grad_norm": 0.3066263794898987, "learning_rate": 4.478283182482274e-05, "loss": 0.1757, "step": 16172 }, { "epoch": 0.28846359647558234, "grad_norm": 0.23743121325969696, "learning_rate": 4.4781880120756565e-05, "loss": 0.22, "step": 16173 }, { "epoch": 0.28848143259729603, "grad_norm": 0.2775743901729584, "learning_rate": 4.4780928340008986e-05, "loss": 0.2179, "step": 16174 }, { "epoch": 0.2884992687190097, "grad_norm": 0.24497991800308228, "learning_rate": 4.4779976482583695e-05, "loss": 0.1511, "step": 16175 }, { "epoch": 0.28851710484072346, "grad_norm": 0.2688525915145874, "learning_rate": 4.477902454848439e-05, "loss": 0.1542, "step": 16176 }, { "epoch": 0.28853494096243715, "grad_norm": 0.2902068495750427, "learning_rate": 4.477807253771476e-05, "loss": 0.2002, "step": 16177 }, { "epoch": 0.28855277708415084, "grad_norm": 0.40720242261886597, "learning_rate": 4.4777120450278476e-05, "loss": 0.2339, "step": 16178 }, { "epoch": 0.2885706132058645, "grad_norm": 0.1990911215543747, "learning_rate": 4.477616828617924e-05, "loss": 0.1847, "step": 16179 }, { "epoch": 0.2885884493275782, "grad_norm": 0.2967304587364197, "learning_rate": 4.477521604542076e-05, "loss": 0.1953, "step": 16180 }, { "epoch": 0.2886062854492919, "grad_norm": 0.361038476228714, "learning_rate": 4.4774263728006707e-05, "loss": 0.1747, "step": 16181 }, { "epoch": 0.2886241215710056, "grad_norm": 0.2786751687526703, "learning_rate": 4.477331133394078e-05, "loss": 0.154, "step": 16182 }, { "epoch": 0.2886419576927193, "grad_norm": 0.2897767424583435, "learning_rate": 4.477235886322666e-05, "loss": 0.1081, "step": 16183 }, { "epoch": 0.28865979381443296, "grad_norm": 0.2561451494693756, "learning_rate": 4.477140631586806e-05, "loss": 0.183, "step": 16184 }, { "epoch": 0.2886776299361467, "grad_norm": 0.27485552430152893, "learning_rate": 4.4770453691868653e-05, "loss": 0.1809, "step": 16185 }, { "epoch": 0.2886954660578604, "grad_norm": 0.2473611682653427, "learning_rate": 4.476950099123213e-05, "loss": 0.2197, "step": 16186 }, { "epoch": 0.2887133021795741, "grad_norm": 0.2448200136423111, "learning_rate": 4.476854821396221e-05, "loss": 0.1991, "step": 16187 }, { "epoch": 0.28873113830128777, "grad_norm": 0.28758755326271057, "learning_rate": 4.476759536006256e-05, "loss": 0.214, "step": 16188 }, { "epoch": 0.28874897442300146, "grad_norm": 0.29136964678764343, "learning_rate": 4.476664242953688e-05, "loss": 0.2366, "step": 16189 }, { "epoch": 0.28876681054471515, "grad_norm": 0.23921862244606018, "learning_rate": 4.476568942238887e-05, "loss": 0.2174, "step": 16190 }, { "epoch": 0.28878464666642883, "grad_norm": 0.4133034348487854, "learning_rate": 4.4764736338622224e-05, "loss": 0.1984, "step": 16191 }, { "epoch": 0.2888024827881425, "grad_norm": 0.30323541164398193, "learning_rate": 4.4763783178240635e-05, "loss": 0.2293, "step": 16192 }, { "epoch": 0.28882031890985627, "grad_norm": 0.34238171577453613, "learning_rate": 4.476282994124779e-05, "loss": 0.169, "step": 16193 }, { "epoch": 0.28883815503156995, "grad_norm": 0.2112024873495102, "learning_rate": 4.476187662764739e-05, "loss": 0.1441, "step": 16194 }, { "epoch": 0.28885599115328364, "grad_norm": 0.3371756970882416, "learning_rate": 4.476092323744314e-05, "loss": 0.1603, "step": 16195 }, { "epoch": 0.28887382727499733, "grad_norm": 0.27282217144966125, "learning_rate": 4.475996977063872e-05, "loss": 0.1527, "step": 16196 }, { "epoch": 0.288891663396711, "grad_norm": 0.19841310381889343, "learning_rate": 4.475901622723783e-05, "loss": 0.1299, "step": 16197 }, { "epoch": 0.2889094995184247, "grad_norm": 0.22017700970172882, "learning_rate": 4.475806260724417e-05, "loss": 0.1582, "step": 16198 }, { "epoch": 0.2889273356401384, "grad_norm": 0.26420333981513977, "learning_rate": 4.475710891066144e-05, "loss": 0.1791, "step": 16199 }, { "epoch": 0.2889451717618521, "grad_norm": 0.2915920913219452, "learning_rate": 4.475615513749333e-05, "loss": 0.1552, "step": 16200 }, { "epoch": 0.28896300788356577, "grad_norm": 0.19410520792007446, "learning_rate": 4.4755201287743534e-05, "loss": 0.1395, "step": 16201 }, { "epoch": 0.2889808440052795, "grad_norm": 0.23600637912750244, "learning_rate": 4.475424736141576e-05, "loss": 0.1798, "step": 16202 }, { "epoch": 0.2889986801269932, "grad_norm": 0.28963592648506165, "learning_rate": 4.475329335851369e-05, "loss": 0.1606, "step": 16203 }, { "epoch": 0.2890165162487069, "grad_norm": 0.3423754572868347, "learning_rate": 4.475233927904105e-05, "loss": 0.2047, "step": 16204 }, { "epoch": 0.2890343523704206, "grad_norm": 0.24831396341323853, "learning_rate": 4.475138512300151e-05, "loss": 0.2355, "step": 16205 }, { "epoch": 0.28905218849213427, "grad_norm": 0.22182853519916534, "learning_rate": 4.475043089039878e-05, "loss": 0.183, "step": 16206 }, { "epoch": 0.28907002461384795, "grad_norm": 0.2502618730068207, "learning_rate": 4.474947658123656e-05, "loss": 0.183, "step": 16207 }, { "epoch": 0.28908786073556164, "grad_norm": 0.23544780910015106, "learning_rate": 4.474852219551854e-05, "loss": 0.1449, "step": 16208 }, { "epoch": 0.28910569685727533, "grad_norm": 0.20856021344661713, "learning_rate": 4.474756773324844e-05, "loss": 0.1731, "step": 16209 }, { "epoch": 0.2891235329789891, "grad_norm": 0.3059997856616974, "learning_rate": 4.474661319442994e-05, "loss": 0.2296, "step": 16210 }, { "epoch": 0.28914136910070276, "grad_norm": 0.4440188705921173, "learning_rate": 4.474565857906675e-05, "loss": 0.1758, "step": 16211 }, { "epoch": 0.28915920522241645, "grad_norm": 0.3737524151802063, "learning_rate": 4.474470388716256e-05, "loss": 0.2154, "step": 16212 }, { "epoch": 0.28917704134413014, "grad_norm": 0.4202517569065094, "learning_rate": 4.4743749118721084e-05, "loss": 0.189, "step": 16213 }, { "epoch": 0.2891948774658438, "grad_norm": 0.3657699525356293, "learning_rate": 4.474279427374602e-05, "loss": 0.1644, "step": 16214 }, { "epoch": 0.2892127135875575, "grad_norm": 0.40968450903892517, "learning_rate": 4.4741839352241056e-05, "loss": 0.1779, "step": 16215 }, { "epoch": 0.2892305497092712, "grad_norm": 0.17167669534683228, "learning_rate": 4.4740884354209914e-05, "loss": 0.1364, "step": 16216 }, { "epoch": 0.2892483858309849, "grad_norm": 0.3087173104286194, "learning_rate": 4.473992927965628e-05, "loss": 0.1835, "step": 16217 }, { "epoch": 0.28926622195269863, "grad_norm": 0.22113212943077087, "learning_rate": 4.4738974128583866e-05, "loss": 0.1723, "step": 16218 }, { "epoch": 0.2892840580744123, "grad_norm": 0.33859553933143616, "learning_rate": 4.473801890099637e-05, "loss": 0.185, "step": 16219 }, { "epoch": 0.289301894196126, "grad_norm": 0.2641701102256775, "learning_rate": 4.473706359689749e-05, "loss": 0.1809, "step": 16220 }, { "epoch": 0.2893197303178397, "grad_norm": 0.22278887033462524, "learning_rate": 4.473610821629094e-05, "loss": 0.1823, "step": 16221 }, { "epoch": 0.2893375664395534, "grad_norm": 1.4023253917694092, "learning_rate": 4.473515275918042e-05, "loss": 0.7099, "step": 16222 }, { "epoch": 0.28935540256126707, "grad_norm": 0.2187526375055313, "learning_rate": 4.473419722556963e-05, "loss": 0.1743, "step": 16223 }, { "epoch": 0.28937323868298076, "grad_norm": 0.22437356412410736, "learning_rate": 4.473324161546227e-05, "loss": 0.1676, "step": 16224 }, { "epoch": 0.28939107480469445, "grad_norm": 0.20635618269443512, "learning_rate": 4.473228592886206e-05, "loss": 0.1131, "step": 16225 }, { "epoch": 0.28940891092640814, "grad_norm": 0.2504064738750458, "learning_rate": 4.473133016577269e-05, "loss": 0.1852, "step": 16226 }, { "epoch": 0.2894267470481219, "grad_norm": 0.3178398311138153, "learning_rate": 4.473037432619787e-05, "loss": 0.2277, "step": 16227 }, { "epoch": 0.28944458316983557, "grad_norm": 0.3087739050388336, "learning_rate": 4.4729418410141296e-05, "loss": 0.1435, "step": 16228 }, { "epoch": 0.28946241929154926, "grad_norm": 0.2511499226093292, "learning_rate": 4.4728462417606684e-05, "loss": 0.2073, "step": 16229 }, { "epoch": 0.28948025541326294, "grad_norm": 0.35206353664398193, "learning_rate": 4.472750634859775e-05, "loss": 0.2011, "step": 16230 }, { "epoch": 0.28949809153497663, "grad_norm": 0.43800655007362366, "learning_rate": 4.472655020311818e-05, "loss": 0.1755, "step": 16231 }, { "epoch": 0.2895159276566903, "grad_norm": 0.23357662558555603, "learning_rate": 4.4725593981171685e-05, "loss": 0.2205, "step": 16232 }, { "epoch": 0.289533763778404, "grad_norm": 0.2701023817062378, "learning_rate": 4.4724637682761976e-05, "loss": 0.1527, "step": 16233 }, { "epoch": 0.2895515999001177, "grad_norm": 0.2746738791465759, "learning_rate": 4.4723681307892764e-05, "loss": 0.1606, "step": 16234 }, { "epoch": 0.28956943602183144, "grad_norm": 0.22271159291267395, "learning_rate": 4.472272485656774e-05, "loss": 0.1603, "step": 16235 }, { "epoch": 0.2895872721435451, "grad_norm": 0.2857767939567566, "learning_rate": 4.472176832879064e-05, "loss": 0.1911, "step": 16236 }, { "epoch": 0.2896051082652588, "grad_norm": 0.2315034419298172, "learning_rate": 4.472081172456514e-05, "loss": 0.1822, "step": 16237 }, { "epoch": 0.2896229443869725, "grad_norm": 0.2705047130584717, "learning_rate": 4.4719855043894964e-05, "loss": 0.1686, "step": 16238 }, { "epoch": 0.2896407805086862, "grad_norm": 0.36118659377098083, "learning_rate": 4.4718898286783825e-05, "loss": 0.1684, "step": 16239 }, { "epoch": 0.2896586166303999, "grad_norm": 0.24807317554950714, "learning_rate": 4.4717941453235424e-05, "loss": 0.1541, "step": 16240 }, { "epoch": 0.28967645275211357, "grad_norm": 0.30158498883247375, "learning_rate": 4.471698454325346e-05, "loss": 0.1844, "step": 16241 }, { "epoch": 0.28969428887382725, "grad_norm": 0.4301516115665436, "learning_rate": 4.4716027556841666e-05, "loss": 0.2252, "step": 16242 }, { "epoch": 0.28971212499554094, "grad_norm": 0.24146386981010437, "learning_rate": 4.471507049400374e-05, "loss": 0.1745, "step": 16243 }, { "epoch": 0.2897299611172547, "grad_norm": 0.2901882827281952, "learning_rate": 4.471411335474338e-05, "loss": 0.1803, "step": 16244 }, { "epoch": 0.2897477972389684, "grad_norm": 0.2132776528596878, "learning_rate": 4.471315613906432e-05, "loss": 0.1488, "step": 16245 }, { "epoch": 0.28976563336068206, "grad_norm": 0.24833212792873383, "learning_rate": 4.4712198846970256e-05, "loss": 0.1674, "step": 16246 }, { "epoch": 0.28978346948239575, "grad_norm": 0.2625966966152191, "learning_rate": 4.47112414784649e-05, "loss": 0.1672, "step": 16247 }, { "epoch": 0.28980130560410944, "grad_norm": 0.27172932028770447, "learning_rate": 4.4710284033551965e-05, "loss": 0.1671, "step": 16248 }, { "epoch": 0.2898191417258231, "grad_norm": 0.336216002702713, "learning_rate": 4.470932651223516e-05, "loss": 0.2142, "step": 16249 }, { "epoch": 0.2898369778475368, "grad_norm": 0.28624820709228516, "learning_rate": 4.4708368914518196e-05, "loss": 0.1741, "step": 16250 }, { "epoch": 0.2898548139692505, "grad_norm": 0.2580035328865051, "learning_rate": 4.4707411240404784e-05, "loss": 0.1656, "step": 16251 }, { "epoch": 0.28987265009096425, "grad_norm": 0.2556898593902588, "learning_rate": 4.470645348989864e-05, "loss": 0.119, "step": 16252 }, { "epoch": 0.28989048621267793, "grad_norm": 0.36033087968826294, "learning_rate": 4.470549566300348e-05, "loss": 0.2276, "step": 16253 }, { "epoch": 0.2899083223343916, "grad_norm": 0.2873746454715729, "learning_rate": 4.4704537759723014e-05, "loss": 0.1493, "step": 16254 }, { "epoch": 0.2899261584561053, "grad_norm": 0.3023276925086975, "learning_rate": 4.470357978006096e-05, "loss": 0.1978, "step": 16255 }, { "epoch": 0.289943994577819, "grad_norm": 0.30281975865364075, "learning_rate": 4.470262172402101e-05, "loss": 0.2077, "step": 16256 }, { "epoch": 0.2899618306995327, "grad_norm": 0.2511160969734192, "learning_rate": 4.47016635916069e-05, "loss": 0.1688, "step": 16257 }, { "epoch": 0.2899796668212464, "grad_norm": 0.20737150311470032, "learning_rate": 4.470070538282234e-05, "loss": 0.1596, "step": 16258 }, { "epoch": 0.28999750294296006, "grad_norm": 0.4527236819267273, "learning_rate": 4.4699747097671034e-05, "loss": 0.2811, "step": 16259 }, { "epoch": 0.2900153390646738, "grad_norm": 0.23146802186965942, "learning_rate": 4.4698788736156714e-05, "loss": 0.194, "step": 16260 }, { "epoch": 0.2900331751863875, "grad_norm": 0.18410581350326538, "learning_rate": 4.469783029828308e-05, "loss": 0.1449, "step": 16261 }, { "epoch": 0.2900510113081012, "grad_norm": 0.21852976083755493, "learning_rate": 4.469687178405385e-05, "loss": 0.1694, "step": 16262 }, { "epoch": 0.29006884742981487, "grad_norm": 0.35814619064331055, "learning_rate": 4.469591319347275e-05, "loss": 0.2019, "step": 16263 }, { "epoch": 0.29008668355152856, "grad_norm": 0.2415800392627716, "learning_rate": 4.469495452654348e-05, "loss": 0.1998, "step": 16264 }, { "epoch": 0.29010451967324224, "grad_norm": 0.38169074058532715, "learning_rate": 4.4693995783269766e-05, "loss": 0.1764, "step": 16265 }, { "epoch": 0.29012235579495593, "grad_norm": 0.22764852643013, "learning_rate": 4.469303696365532e-05, "loss": 0.1802, "step": 16266 }, { "epoch": 0.2901401919166696, "grad_norm": 0.3218633532524109, "learning_rate": 4.469207806770387e-05, "loss": 0.1703, "step": 16267 }, { "epoch": 0.2901580280383833, "grad_norm": 0.24549725651741028, "learning_rate": 4.469111909541911e-05, "loss": 0.1816, "step": 16268 }, { "epoch": 0.29017586416009705, "grad_norm": 0.31724241375923157, "learning_rate": 4.469016004680478e-05, "loss": 0.1985, "step": 16269 }, { "epoch": 0.29019370028181074, "grad_norm": 0.2785017490386963, "learning_rate": 4.468920092186459e-05, "loss": 0.1515, "step": 16270 }, { "epoch": 0.29021153640352443, "grad_norm": 0.2354467809200287, "learning_rate": 4.468824172060225e-05, "loss": 0.1558, "step": 16271 }, { "epoch": 0.2902293725252381, "grad_norm": 0.2722052335739136, "learning_rate": 4.468728244302149e-05, "loss": 0.1722, "step": 16272 }, { "epoch": 0.2902472086469518, "grad_norm": 0.3607449233531952, "learning_rate": 4.468632308912602e-05, "loss": 0.2074, "step": 16273 }, { "epoch": 0.2902650447686655, "grad_norm": 0.325567364692688, "learning_rate": 4.468536365891957e-05, "loss": 0.1837, "step": 16274 }, { "epoch": 0.2902828808903792, "grad_norm": 0.2977498173713684, "learning_rate": 4.4684404152405845e-05, "loss": 0.1841, "step": 16275 }, { "epoch": 0.29030071701209287, "grad_norm": 0.3762569725513458, "learning_rate": 4.468344456958857e-05, "loss": 0.182, "step": 16276 }, { "epoch": 0.2903185531338066, "grad_norm": 0.22992998361587524, "learning_rate": 4.4682484910471474e-05, "loss": 0.2141, "step": 16277 }, { "epoch": 0.2903363892555203, "grad_norm": 0.2527478337287903, "learning_rate": 4.468152517505826e-05, "loss": 0.1895, "step": 16278 }, { "epoch": 0.290354225377234, "grad_norm": 0.2643086612224579, "learning_rate": 4.4680565363352656e-05, "loss": 0.1533, "step": 16279 }, { "epoch": 0.2903720614989477, "grad_norm": 0.32514500617980957, "learning_rate": 4.4679605475358385e-05, "loss": 0.2437, "step": 16280 }, { "epoch": 0.29038989762066136, "grad_norm": 0.3089638352394104, "learning_rate": 4.467864551107917e-05, "loss": 0.1694, "step": 16281 }, { "epoch": 0.29040773374237505, "grad_norm": 0.3204690217971802, "learning_rate": 4.4677685470518725e-05, "loss": 0.1727, "step": 16282 }, { "epoch": 0.29042556986408874, "grad_norm": 0.29917922616004944, "learning_rate": 4.4676725353680776e-05, "loss": 0.2017, "step": 16283 }, { "epoch": 0.2904434059858024, "grad_norm": 0.2321767657995224, "learning_rate": 4.467576516056904e-05, "loss": 0.164, "step": 16284 }, { "epoch": 0.2904612421075161, "grad_norm": 0.22574694454669952, "learning_rate": 4.467480489118725e-05, "loss": 0.1909, "step": 16285 }, { "epoch": 0.29047907822922986, "grad_norm": 0.8753074407577515, "learning_rate": 4.467384454553911e-05, "loss": 0.3279, "step": 16286 }, { "epoch": 0.29049691435094355, "grad_norm": 0.22182875871658325, "learning_rate": 4.467288412362836e-05, "loss": 0.1634, "step": 16287 }, { "epoch": 0.29051475047265723, "grad_norm": 0.45023661851882935, "learning_rate": 4.4671923625458715e-05, "loss": 0.2519, "step": 16288 }, { "epoch": 0.2905325865943709, "grad_norm": 0.22804221510887146, "learning_rate": 4.46709630510339e-05, "loss": 0.1292, "step": 16289 }, { "epoch": 0.2905504227160846, "grad_norm": 0.2859315574169159, "learning_rate": 4.4670002400357634e-05, "loss": 0.1802, "step": 16290 }, { "epoch": 0.2905682588377983, "grad_norm": 0.2908354103565216, "learning_rate": 4.4669041673433654e-05, "loss": 0.1816, "step": 16291 }, { "epoch": 0.290586094959512, "grad_norm": 0.1963014006614685, "learning_rate": 4.466808087026567e-05, "loss": 0.155, "step": 16292 }, { "epoch": 0.2906039310812257, "grad_norm": 0.25148773193359375, "learning_rate": 4.466711999085741e-05, "loss": 0.1798, "step": 16293 }, { "epoch": 0.2906217672029394, "grad_norm": 0.2758645713329315, "learning_rate": 4.46661590352126e-05, "loss": 0.1609, "step": 16294 }, { "epoch": 0.2906396033246531, "grad_norm": 0.40784990787506104, "learning_rate": 4.466519800333497e-05, "loss": 0.1995, "step": 16295 }, { "epoch": 0.2906574394463668, "grad_norm": 0.23751278221607208, "learning_rate": 4.466423689522824e-05, "loss": 0.2073, "step": 16296 }, { "epoch": 0.2906752755680805, "grad_norm": 0.30521532893180847, "learning_rate": 4.4663275710896126e-05, "loss": 0.1759, "step": 16297 }, { "epoch": 0.29069311168979417, "grad_norm": 0.2473895251750946, "learning_rate": 4.4662314450342365e-05, "loss": 0.1801, "step": 16298 }, { "epoch": 0.29071094781150786, "grad_norm": 0.2924625277519226, "learning_rate": 4.466135311357069e-05, "loss": 0.1567, "step": 16299 }, { "epoch": 0.29072878393322155, "grad_norm": 0.2705855369567871, "learning_rate": 4.4660391700584826e-05, "loss": 0.2148, "step": 16300 }, { "epoch": 0.29074662005493523, "grad_norm": 0.24080908298492432, "learning_rate": 4.465943021138848e-05, "loss": 0.1723, "step": 16301 }, { "epoch": 0.2907644561766489, "grad_norm": 0.22023464739322662, "learning_rate": 4.465846864598539e-05, "loss": 0.1538, "step": 16302 }, { "epoch": 0.29078229229836267, "grad_norm": 0.19990479946136475, "learning_rate": 4.46575070043793e-05, "loss": 0.1498, "step": 16303 }, { "epoch": 0.29080012842007635, "grad_norm": 0.30216798186302185, "learning_rate": 4.465654528657392e-05, "loss": 0.1869, "step": 16304 }, { "epoch": 0.29081796454179004, "grad_norm": 0.26628923416137695, "learning_rate": 4.465558349257297e-05, "loss": 0.1812, "step": 16305 }, { "epoch": 0.29083580066350373, "grad_norm": 0.3141874670982361, "learning_rate": 4.46546216223802e-05, "loss": 0.1824, "step": 16306 }, { "epoch": 0.2908536367852174, "grad_norm": 0.31333398818969727, "learning_rate": 4.4653659675999326e-05, "loss": 0.1777, "step": 16307 }, { "epoch": 0.2908714729069311, "grad_norm": 0.19809776544570923, "learning_rate": 4.465269765343408e-05, "loss": 0.1858, "step": 16308 }, { "epoch": 0.2908893090286448, "grad_norm": 0.3457790017127991, "learning_rate": 4.4651735554688186e-05, "loss": 0.1896, "step": 16309 }, { "epoch": 0.2909071451503585, "grad_norm": 0.3072323203086853, "learning_rate": 4.4650773379765374e-05, "loss": 0.2637, "step": 16310 }, { "epoch": 0.2909249812720722, "grad_norm": 0.19440221786499023, "learning_rate": 4.4649811128669384e-05, "loss": 0.1386, "step": 16311 }, { "epoch": 0.2909428173937859, "grad_norm": 0.2634267210960388, "learning_rate": 4.464884880140394e-05, "loss": 0.173, "step": 16312 }, { "epoch": 0.2909606535154996, "grad_norm": 0.20517735183238983, "learning_rate": 4.464788639797277e-05, "loss": 0.1873, "step": 16313 }, { "epoch": 0.2909784896372133, "grad_norm": 0.28856387734413147, "learning_rate": 4.464692391837961e-05, "loss": 0.1789, "step": 16314 }, { "epoch": 0.290996325758927, "grad_norm": 0.41785359382629395, "learning_rate": 4.464596136262818e-05, "loss": 0.1681, "step": 16315 }, { "epoch": 0.29101416188064066, "grad_norm": 0.28234413266181946, "learning_rate": 4.464499873072222e-05, "loss": 0.2008, "step": 16316 }, { "epoch": 0.29103199800235435, "grad_norm": 0.21627850830554962, "learning_rate": 4.4644036022665456e-05, "loss": 0.1726, "step": 16317 }, { "epoch": 0.29104983412406804, "grad_norm": 0.2704116106033325, "learning_rate": 4.464307323846163e-05, "loss": 0.1629, "step": 16318 }, { "epoch": 0.2910676702457818, "grad_norm": 0.2909679114818573, "learning_rate": 4.464211037811447e-05, "loss": 0.1834, "step": 16319 }, { "epoch": 0.2910855063674955, "grad_norm": 0.20703548192977905, "learning_rate": 4.46411474416277e-05, "loss": 0.1561, "step": 16320 }, { "epoch": 0.29110334248920916, "grad_norm": 0.29566439986228943, "learning_rate": 4.464018442900506e-05, "loss": 0.2282, "step": 16321 }, { "epoch": 0.29112117861092285, "grad_norm": 0.2513722777366638, "learning_rate": 4.4639221340250284e-05, "loss": 0.2127, "step": 16322 }, { "epoch": 0.29113901473263654, "grad_norm": 0.27034732699394226, "learning_rate": 4.46382581753671e-05, "loss": 0.212, "step": 16323 }, { "epoch": 0.2911568508543502, "grad_norm": 0.3016135096549988, "learning_rate": 4.463729493435925e-05, "loss": 0.1854, "step": 16324 }, { "epoch": 0.2911746869760639, "grad_norm": 0.22934338450431824, "learning_rate": 4.463633161723045e-05, "loss": 0.1349, "step": 16325 }, { "epoch": 0.2911925230977776, "grad_norm": 0.27083444595336914, "learning_rate": 4.463536822398446e-05, "loss": 0.1493, "step": 16326 }, { "epoch": 0.2912103592194913, "grad_norm": 0.23818299174308777, "learning_rate": 4.463440475462499e-05, "loss": 0.2084, "step": 16327 }, { "epoch": 0.29122819534120503, "grad_norm": 0.2544391453266144, "learning_rate": 4.463344120915579e-05, "loss": 0.1728, "step": 16328 }, { "epoch": 0.2912460314629187, "grad_norm": 0.27769050002098083, "learning_rate": 4.4632477587580596e-05, "loss": 0.1524, "step": 16329 }, { "epoch": 0.2912638675846324, "grad_norm": 0.2713675796985626, "learning_rate": 4.463151388990313e-05, "loss": 0.1907, "step": 16330 }, { "epoch": 0.2912817037063461, "grad_norm": 0.25703898072242737, "learning_rate": 4.463055011612715e-05, "loss": 0.2383, "step": 16331 }, { "epoch": 0.2912995398280598, "grad_norm": 0.32257533073425293, "learning_rate": 4.462958626625636e-05, "loss": 0.1434, "step": 16332 }, { "epoch": 0.29131737594977347, "grad_norm": 0.18602409958839417, "learning_rate": 4.4628622340294526e-05, "loss": 0.1671, "step": 16333 }, { "epoch": 0.29133521207148716, "grad_norm": 0.34978222846984863, "learning_rate": 4.462765833824536e-05, "loss": 0.2345, "step": 16334 }, { "epoch": 0.29135304819320085, "grad_norm": 0.2618108093738556, "learning_rate": 4.4626694260112625e-05, "loss": 0.1806, "step": 16335 }, { "epoch": 0.2913708843149146, "grad_norm": 0.23017603158950806, "learning_rate": 4.4625730105900034e-05, "loss": 0.1656, "step": 16336 }, { "epoch": 0.2913887204366283, "grad_norm": 0.3865819275379181, "learning_rate": 4.462476587561134e-05, "loss": 0.2095, "step": 16337 }, { "epoch": 0.29140655655834197, "grad_norm": 0.2807057499885559, "learning_rate": 4.462380156925027e-05, "loss": 0.156, "step": 16338 }, { "epoch": 0.29142439268005566, "grad_norm": 0.3241732716560364, "learning_rate": 4.4622837186820574e-05, "loss": 0.1944, "step": 16339 }, { "epoch": 0.29144222880176934, "grad_norm": 0.26081281900405884, "learning_rate": 4.462187272832597e-05, "loss": 0.1735, "step": 16340 }, { "epoch": 0.29146006492348303, "grad_norm": 0.3468260169029236, "learning_rate": 4.4620908193770225e-05, "loss": 0.2009, "step": 16341 }, { "epoch": 0.2914779010451967, "grad_norm": 0.25653132796287537, "learning_rate": 4.461994358315706e-05, "loss": 0.2034, "step": 16342 }, { "epoch": 0.2914957371669104, "grad_norm": 0.2266485095024109, "learning_rate": 4.461897889649021e-05, "loss": 0.171, "step": 16343 }, { "epoch": 0.2915135732886241, "grad_norm": 0.26951655745506287, "learning_rate": 4.4618014133773435e-05, "loss": 0.2089, "step": 16344 }, { "epoch": 0.29153140941033784, "grad_norm": 0.4335484206676483, "learning_rate": 4.4617049295010446e-05, "loss": 0.1743, "step": 16345 }, { "epoch": 0.2915492455320515, "grad_norm": 0.2608528137207031, "learning_rate": 4.4616084380205013e-05, "loss": 0.198, "step": 16346 }, { "epoch": 0.2915670816537652, "grad_norm": 0.24536316096782684, "learning_rate": 4.461511938936085e-05, "loss": 0.2209, "step": 16347 }, { "epoch": 0.2915849177754789, "grad_norm": 0.26365524530410767, "learning_rate": 4.461415432248172e-05, "loss": 0.207, "step": 16348 }, { "epoch": 0.2916027538971926, "grad_norm": 0.16918693482875824, "learning_rate": 4.461318917957135e-05, "loss": 0.1601, "step": 16349 }, { "epoch": 0.2916205900189063, "grad_norm": 0.25554358959198, "learning_rate": 4.461222396063348e-05, "loss": 0.185, "step": 16350 }, { "epoch": 0.29163842614061997, "grad_norm": 0.25917184352874756, "learning_rate": 4.461125866567185e-05, "loss": 0.1603, "step": 16351 }, { "epoch": 0.29165626226233365, "grad_norm": 0.34760043025016785, "learning_rate": 4.461029329469022e-05, "loss": 0.2043, "step": 16352 }, { "epoch": 0.2916740983840474, "grad_norm": 0.32935231924057007, "learning_rate": 4.460932784769232e-05, "loss": 0.1553, "step": 16353 }, { "epoch": 0.2916919345057611, "grad_norm": 0.31214600801467896, "learning_rate": 4.4608362324681885e-05, "loss": 0.2333, "step": 16354 }, { "epoch": 0.2917097706274748, "grad_norm": 0.25096604228019714, "learning_rate": 4.4607396725662665e-05, "loss": 0.1409, "step": 16355 }, { "epoch": 0.29172760674918846, "grad_norm": 0.27495333552360535, "learning_rate": 4.4606431050638406e-05, "loss": 0.1643, "step": 16356 }, { "epoch": 0.29174544287090215, "grad_norm": 0.31411153078079224, "learning_rate": 4.460546529961285e-05, "loss": 0.2227, "step": 16357 }, { "epoch": 0.29176327899261584, "grad_norm": 0.4533727765083313, "learning_rate": 4.460449947258974e-05, "loss": 0.2987, "step": 16358 }, { "epoch": 0.2917811151143295, "grad_norm": 0.23517009615898132, "learning_rate": 4.4603533569572815e-05, "loss": 0.2008, "step": 16359 }, { "epoch": 0.2917989512360432, "grad_norm": 0.22573870420455933, "learning_rate": 4.4602567590565827e-05, "loss": 0.1616, "step": 16360 }, { "epoch": 0.2918167873577569, "grad_norm": 0.21691378951072693, "learning_rate": 4.4601601535572515e-05, "loss": 0.17, "step": 16361 }, { "epoch": 0.29183462347947065, "grad_norm": 0.365568071603775, "learning_rate": 4.460063540459663e-05, "loss": 0.2, "step": 16362 }, { "epoch": 0.29185245960118433, "grad_norm": 0.2749923765659332, "learning_rate": 4.45996691976419e-05, "loss": 0.1711, "step": 16363 }, { "epoch": 0.291870295722898, "grad_norm": 0.3170697093009949, "learning_rate": 4.459870291471209e-05, "loss": 0.1839, "step": 16364 }, { "epoch": 0.2918881318446117, "grad_norm": 0.23263536393642426, "learning_rate": 4.459773655581094e-05, "loss": 0.1599, "step": 16365 }, { "epoch": 0.2919059679663254, "grad_norm": 0.3439905047416687, "learning_rate": 4.459677012094219e-05, "loss": 0.2006, "step": 16366 }, { "epoch": 0.2919238040880391, "grad_norm": 0.2759602963924408, "learning_rate": 4.459580361010959e-05, "loss": 0.162, "step": 16367 }, { "epoch": 0.2919416402097528, "grad_norm": 0.247183158993721, "learning_rate": 4.4594837023316896e-05, "loss": 0.1624, "step": 16368 }, { "epoch": 0.29195947633146646, "grad_norm": 0.2692636549472809, "learning_rate": 4.4593870360567836e-05, "loss": 0.1234, "step": 16369 }, { "epoch": 0.2919773124531802, "grad_norm": 0.37884974479675293, "learning_rate": 4.4592903621866165e-05, "loss": 0.1377, "step": 16370 }, { "epoch": 0.2919951485748939, "grad_norm": 0.23933671414852142, "learning_rate": 4.459193680721564e-05, "loss": 0.133, "step": 16371 }, { "epoch": 0.2920129846966076, "grad_norm": 0.3173424005508423, "learning_rate": 4.459096991662e-05, "loss": 0.1654, "step": 16372 }, { "epoch": 0.29203082081832127, "grad_norm": 0.3288251459598541, "learning_rate": 4.459000295008299e-05, "loss": 0.1909, "step": 16373 }, { "epoch": 0.29204865694003496, "grad_norm": 0.3331115245819092, "learning_rate": 4.4589035907608365e-05, "loss": 0.2064, "step": 16374 }, { "epoch": 0.29206649306174864, "grad_norm": 0.2873155176639557, "learning_rate": 4.4588068789199875e-05, "loss": 0.2163, "step": 16375 }, { "epoch": 0.29208432918346233, "grad_norm": 0.24663789570331573, "learning_rate": 4.4587101594861266e-05, "loss": 0.183, "step": 16376 }, { "epoch": 0.292102165305176, "grad_norm": 0.35439860820770264, "learning_rate": 4.4586134324596276e-05, "loss": 0.2341, "step": 16377 }, { "epoch": 0.29212000142688976, "grad_norm": 0.2999183237552643, "learning_rate": 4.4585166978408674e-05, "loss": 0.162, "step": 16378 }, { "epoch": 0.29213783754860345, "grad_norm": 0.29912668466567993, "learning_rate": 4.45841995563022e-05, "loss": 0.217, "step": 16379 }, { "epoch": 0.29215567367031714, "grad_norm": 0.23091265559196472, "learning_rate": 4.4583232058280594e-05, "loss": 0.1503, "step": 16380 }, { "epoch": 0.2921735097920308, "grad_norm": 0.2601783275604248, "learning_rate": 4.4582264484347625e-05, "loss": 0.1588, "step": 16381 }, { "epoch": 0.2921913459137445, "grad_norm": 0.4871737062931061, "learning_rate": 4.458129683450703e-05, "loss": 0.2216, "step": 16382 }, { "epoch": 0.2922091820354582, "grad_norm": 0.2018498182296753, "learning_rate": 4.458032910876258e-05, "loss": 0.1197, "step": 16383 }, { "epoch": 0.2922270181571719, "grad_norm": 0.28650692105293274, "learning_rate": 4.4579361307117994e-05, "loss": 0.197, "step": 16384 }, { "epoch": 0.2922448542788856, "grad_norm": 0.3046768307685852, "learning_rate": 4.457839342957705e-05, "loss": 0.2482, "step": 16385 }, { "epoch": 0.29226269040059927, "grad_norm": 0.19669987261295319, "learning_rate": 4.4577425476143484e-05, "loss": 0.1836, "step": 16386 }, { "epoch": 0.292280526522313, "grad_norm": 0.18548311293125153, "learning_rate": 4.4576457446821065e-05, "loss": 0.1525, "step": 16387 }, { "epoch": 0.2922983626440267, "grad_norm": 0.3578382134437561, "learning_rate": 4.457548934161353e-05, "loss": 0.1644, "step": 16388 }, { "epoch": 0.2923161987657404, "grad_norm": 0.2788953185081482, "learning_rate": 4.457452116052463e-05, "loss": 0.1934, "step": 16389 }, { "epoch": 0.2923340348874541, "grad_norm": 0.20626167953014374, "learning_rate": 4.457355290355814e-05, "loss": 0.1689, "step": 16390 }, { "epoch": 0.29235187100916776, "grad_norm": 0.3277955949306488, "learning_rate": 4.4572584570717786e-05, "loss": 0.1789, "step": 16391 }, { "epoch": 0.29236970713088145, "grad_norm": 0.24848464131355286, "learning_rate": 4.457161616200733e-05, "loss": 0.1735, "step": 16392 }, { "epoch": 0.29238754325259514, "grad_norm": 0.3722819983959198, "learning_rate": 4.457064767743055e-05, "loss": 0.2024, "step": 16393 }, { "epoch": 0.2924053793743088, "grad_norm": 0.2871563136577606, "learning_rate": 4.456967911699117e-05, "loss": 0.1575, "step": 16394 }, { "epoch": 0.29242321549602257, "grad_norm": 0.2498369663953781, "learning_rate": 4.456871048069295e-05, "loss": 0.1658, "step": 16395 }, { "epoch": 0.29244105161773626, "grad_norm": 0.3902013301849365, "learning_rate": 4.456774176853965e-05, "loss": 0.1969, "step": 16396 }, { "epoch": 0.29245888773944995, "grad_norm": 0.2376020848751068, "learning_rate": 4.4566772980535035e-05, "loss": 0.1741, "step": 16397 }, { "epoch": 0.29247672386116363, "grad_norm": 0.31966328620910645, "learning_rate": 4.456580411668284e-05, "loss": 0.1481, "step": 16398 }, { "epoch": 0.2924945599828773, "grad_norm": 0.2637951076030731, "learning_rate": 4.456483517698683e-05, "loss": 0.1791, "step": 16399 }, { "epoch": 0.292512396104591, "grad_norm": 0.24155883491039276, "learning_rate": 4.4563866161450764e-05, "loss": 0.154, "step": 16400 }, { "epoch": 0.2925302322263047, "grad_norm": 0.2941102087497711, "learning_rate": 4.456289707007839e-05, "loss": 0.1922, "step": 16401 }, { "epoch": 0.2925480683480184, "grad_norm": 0.3191867768764496, "learning_rate": 4.456192790287348e-05, "loss": 0.2534, "step": 16402 }, { "epoch": 0.2925659044697321, "grad_norm": 0.23790162801742554, "learning_rate": 4.456095865983978e-05, "loss": 0.1942, "step": 16403 }, { "epoch": 0.2925837405914458, "grad_norm": 0.26743048429489136, "learning_rate": 4.4559989340981045e-05, "loss": 0.1774, "step": 16404 }, { "epoch": 0.2926015767131595, "grad_norm": 0.27811184525489807, "learning_rate": 4.455901994630103e-05, "loss": 0.22, "step": 16405 }, { "epoch": 0.2926194128348732, "grad_norm": 0.3244760036468506, "learning_rate": 4.4558050475803505e-05, "loss": 0.16, "step": 16406 }, { "epoch": 0.2926372489565869, "grad_norm": 0.2791687250137329, "learning_rate": 4.455708092949222e-05, "loss": 0.1528, "step": 16407 }, { "epoch": 0.29265508507830057, "grad_norm": 0.27157244086265564, "learning_rate": 4.455611130737093e-05, "loss": 0.1666, "step": 16408 }, { "epoch": 0.29267292120001426, "grad_norm": 0.31352323293685913, "learning_rate": 4.4555141609443406e-05, "loss": 0.2554, "step": 16409 }, { "epoch": 0.29269075732172795, "grad_norm": 0.2395026534795761, "learning_rate": 4.45541718357134e-05, "loss": 0.1732, "step": 16410 }, { "epoch": 0.29270859344344163, "grad_norm": 0.3108391761779785, "learning_rate": 4.455320198618466e-05, "loss": 0.1944, "step": 16411 }, { "epoch": 0.2927264295651554, "grad_norm": 0.38609376549720764, "learning_rate": 4.455223206086097e-05, "loss": 0.1585, "step": 16412 }, { "epoch": 0.29274426568686907, "grad_norm": 0.28550460934638977, "learning_rate": 4.4551262059746056e-05, "loss": 0.191, "step": 16413 }, { "epoch": 0.29276210180858275, "grad_norm": 0.270965039730072, "learning_rate": 4.455029198284371e-05, "loss": 0.1811, "step": 16414 }, { "epoch": 0.29277993793029644, "grad_norm": 0.3054213225841522, "learning_rate": 4.4549321830157674e-05, "loss": 0.1804, "step": 16415 }, { "epoch": 0.29279777405201013, "grad_norm": 0.1913258284330368, "learning_rate": 4.4548351601691726e-05, "loss": 0.1709, "step": 16416 }, { "epoch": 0.2928156101737238, "grad_norm": 0.2570454478263855, "learning_rate": 4.45473812974496e-05, "loss": 0.1335, "step": 16417 }, { "epoch": 0.2928334462954375, "grad_norm": 0.2797536253929138, "learning_rate": 4.454641091743509e-05, "loss": 0.1657, "step": 16418 }, { "epoch": 0.2928512824171512, "grad_norm": 0.20599474012851715, "learning_rate": 4.454544046165192e-05, "loss": 0.1553, "step": 16419 }, { "epoch": 0.29286911853886494, "grad_norm": 0.2767390012741089, "learning_rate": 4.454446993010389e-05, "loss": 0.167, "step": 16420 }, { "epoch": 0.2928869546605786, "grad_norm": 0.2473897486925125, "learning_rate": 4.454349932279474e-05, "loss": 0.2034, "step": 16421 }, { "epoch": 0.2929047907822923, "grad_norm": 0.3787566125392914, "learning_rate": 4.4542528639728226e-05, "loss": 0.1621, "step": 16422 }, { "epoch": 0.292922626904006, "grad_norm": 0.31824785470962524, "learning_rate": 4.454155788090813e-05, "loss": 0.1533, "step": 16423 }, { "epoch": 0.2929404630257197, "grad_norm": 0.3889113962650299, "learning_rate": 4.454058704633821e-05, "loss": 0.1981, "step": 16424 }, { "epoch": 0.2929582991474334, "grad_norm": 0.20811069011688232, "learning_rate": 4.453961613602221e-05, "loss": 0.145, "step": 16425 }, { "epoch": 0.29297613526914706, "grad_norm": 0.26095858216285706, "learning_rate": 4.453864514996392e-05, "loss": 0.1521, "step": 16426 }, { "epoch": 0.29299397139086075, "grad_norm": 0.2778921127319336, "learning_rate": 4.453767408816709e-05, "loss": 0.1668, "step": 16427 }, { "epoch": 0.29301180751257444, "grad_norm": 0.2969571053981781, "learning_rate": 4.4536702950635494e-05, "loss": 0.2083, "step": 16428 }, { "epoch": 0.2930296436342882, "grad_norm": 0.2110590934753418, "learning_rate": 4.453573173737289e-05, "loss": 0.188, "step": 16429 }, { "epoch": 0.29304747975600187, "grad_norm": 0.2499048411846161, "learning_rate": 4.4534760448383026e-05, "loss": 0.2003, "step": 16430 }, { "epoch": 0.29306531587771556, "grad_norm": 0.3023947775363922, "learning_rate": 4.45337890836697e-05, "loss": 0.1991, "step": 16431 }, { "epoch": 0.29308315199942925, "grad_norm": 0.28928518295288086, "learning_rate": 4.453281764323666e-05, "loss": 0.1671, "step": 16432 }, { "epoch": 0.29310098812114294, "grad_norm": 0.22792431712150574, "learning_rate": 4.453184612708766e-05, "loss": 0.151, "step": 16433 }, { "epoch": 0.2931188242428566, "grad_norm": 0.1910247653722763, "learning_rate": 4.453087453522649e-05, "loss": 0.124, "step": 16434 }, { "epoch": 0.2931366603645703, "grad_norm": 0.2786944508552551, "learning_rate": 4.4529902867656906e-05, "loss": 0.1801, "step": 16435 }, { "epoch": 0.293154496486284, "grad_norm": 0.284684419631958, "learning_rate": 4.4528931124382666e-05, "loss": 0.1949, "step": 16436 }, { "epoch": 0.29317233260799774, "grad_norm": 0.23884810507297516, "learning_rate": 4.452795930540754e-05, "loss": 0.1779, "step": 16437 }, { "epoch": 0.29319016872971143, "grad_norm": 0.2093641757965088, "learning_rate": 4.452698741073531e-05, "loss": 0.1926, "step": 16438 }, { "epoch": 0.2932080048514251, "grad_norm": 0.21617797017097473, "learning_rate": 4.452601544036972e-05, "loss": 0.1285, "step": 16439 }, { "epoch": 0.2932258409731388, "grad_norm": 0.3104894459247589, "learning_rate": 4.452504339431456e-05, "loss": 0.1905, "step": 16440 }, { "epoch": 0.2932436770948525, "grad_norm": 0.2498467117547989, "learning_rate": 4.4524071272573586e-05, "loss": 0.2091, "step": 16441 }, { "epoch": 0.2932615132165662, "grad_norm": 0.23353685438632965, "learning_rate": 4.4523099075150563e-05, "loss": 0.1366, "step": 16442 }, { "epoch": 0.29327934933827987, "grad_norm": 0.4609735906124115, "learning_rate": 4.452212680204927e-05, "loss": 0.2211, "step": 16443 }, { "epoch": 0.29329718545999356, "grad_norm": 0.2026536762714386, "learning_rate": 4.452115445327347e-05, "loss": 0.1319, "step": 16444 }, { "epoch": 0.29331502158170725, "grad_norm": 0.23939964175224304, "learning_rate": 4.452018202882694e-05, "loss": 0.1968, "step": 16445 }, { "epoch": 0.293332857703421, "grad_norm": 0.458141028881073, "learning_rate": 4.4519209528713436e-05, "loss": 0.2106, "step": 16446 }, { "epoch": 0.2933506938251347, "grad_norm": 0.21322348713874817, "learning_rate": 4.451823695293673e-05, "loss": 0.1871, "step": 16447 }, { "epoch": 0.29336852994684837, "grad_norm": 0.22875843942165375, "learning_rate": 4.451726430150061e-05, "loss": 0.1857, "step": 16448 }, { "epoch": 0.29338636606856205, "grad_norm": 0.23780286312103271, "learning_rate": 4.4516291574408815e-05, "loss": 0.1526, "step": 16449 }, { "epoch": 0.29340420219027574, "grad_norm": 0.22688932716846466, "learning_rate": 4.4515318771665134e-05, "loss": 0.1538, "step": 16450 }, { "epoch": 0.29342203831198943, "grad_norm": 0.25661998987197876, "learning_rate": 4.451434589327335e-05, "loss": 0.2017, "step": 16451 }, { "epoch": 0.2934398744337031, "grad_norm": 0.28685420751571655, "learning_rate": 4.4513372939237217e-05, "loss": 0.1947, "step": 16452 }, { "epoch": 0.2934577105554168, "grad_norm": 0.2017870396375656, "learning_rate": 4.45123999095605e-05, "loss": 0.1575, "step": 16453 }, { "epoch": 0.29347554667713055, "grad_norm": 0.17266018688678741, "learning_rate": 4.451142680424699e-05, "loss": 0.1315, "step": 16454 }, { "epoch": 0.29349338279884424, "grad_norm": 0.24347445368766785, "learning_rate": 4.4510453623300455e-05, "loss": 0.1705, "step": 16455 }, { "epoch": 0.2935112189205579, "grad_norm": 0.34976738691329956, "learning_rate": 4.450948036672466e-05, "loss": 0.2077, "step": 16456 }, { "epoch": 0.2935290550422716, "grad_norm": 0.25179556012153625, "learning_rate": 4.450850703452338e-05, "loss": 0.1719, "step": 16457 }, { "epoch": 0.2935468911639853, "grad_norm": 0.26511818170547485, "learning_rate": 4.450753362670039e-05, "loss": 0.2081, "step": 16458 }, { "epoch": 0.293564727285699, "grad_norm": 0.192202627658844, "learning_rate": 4.450656014325946e-05, "loss": 0.1345, "step": 16459 }, { "epoch": 0.2935825634074127, "grad_norm": 0.19687429070472717, "learning_rate": 4.450558658420436e-05, "loss": 0.1406, "step": 16460 }, { "epoch": 0.29360039952912637, "grad_norm": 0.22294986248016357, "learning_rate": 4.450461294953888e-05, "loss": 0.2098, "step": 16461 }, { "epoch": 0.29361823565084005, "grad_norm": 0.2620992660522461, "learning_rate": 4.450363923926678e-05, "loss": 0.209, "step": 16462 }, { "epoch": 0.2936360717725538, "grad_norm": 0.2383774369955063, "learning_rate": 4.4502665453391835e-05, "loss": 0.1484, "step": 16463 }, { "epoch": 0.2936539078942675, "grad_norm": 0.16639412939548492, "learning_rate": 4.450169159191783e-05, "loss": 0.1278, "step": 16464 }, { "epoch": 0.2936717440159812, "grad_norm": 0.32829856872558594, "learning_rate": 4.450071765484852e-05, "loss": 0.1683, "step": 16465 }, { "epoch": 0.29368958013769486, "grad_norm": 0.3034323751926422, "learning_rate": 4.449974364218771e-05, "loss": 0.1302, "step": 16466 }, { "epoch": 0.29370741625940855, "grad_norm": 0.3968747556209564, "learning_rate": 4.449876955393914e-05, "loss": 0.1726, "step": 16467 }, { "epoch": 0.29372525238112224, "grad_norm": 0.25039324164390564, "learning_rate": 4.449779539010661e-05, "loss": 0.1478, "step": 16468 }, { "epoch": 0.2937430885028359, "grad_norm": 0.2308914214372635, "learning_rate": 4.44968211506939e-05, "loss": 0.1033, "step": 16469 }, { "epoch": 0.2937609246245496, "grad_norm": 0.2924995720386505, "learning_rate": 4.449584683570477e-05, "loss": 0.2233, "step": 16470 }, { "epoch": 0.29377876074626336, "grad_norm": 0.2871107757091522, "learning_rate": 4.449487244514301e-05, "loss": 0.2046, "step": 16471 }, { "epoch": 0.29379659686797704, "grad_norm": 0.2542024254798889, "learning_rate": 4.4493897979012386e-05, "loss": 0.1109, "step": 16472 }, { "epoch": 0.29381443298969073, "grad_norm": 0.296866774559021, "learning_rate": 4.449292343731668e-05, "loss": 0.1886, "step": 16473 }, { "epoch": 0.2938322691114044, "grad_norm": 0.32440072298049927, "learning_rate": 4.449194882005967e-05, "loss": 0.192, "step": 16474 }, { "epoch": 0.2938501052331181, "grad_norm": 0.35222840309143066, "learning_rate": 4.449097412724513e-05, "loss": 0.2455, "step": 16475 }, { "epoch": 0.2938679413548318, "grad_norm": 0.2419472187757492, "learning_rate": 4.4489999358876855e-05, "loss": 0.1846, "step": 16476 }, { "epoch": 0.2938857774765455, "grad_norm": 0.29099488258361816, "learning_rate": 4.44890245149586e-05, "loss": 0.1922, "step": 16477 }, { "epoch": 0.2939036135982592, "grad_norm": 0.2634369432926178, "learning_rate": 4.448804959549416e-05, "loss": 0.2269, "step": 16478 }, { "epoch": 0.2939214497199729, "grad_norm": 0.2559466063976288, "learning_rate": 4.44870746004873e-05, "loss": 0.1766, "step": 16479 }, { "epoch": 0.2939392858416866, "grad_norm": 0.253835529088974, "learning_rate": 4.4486099529941825e-05, "loss": 0.1702, "step": 16480 }, { "epoch": 0.2939571219634003, "grad_norm": 0.256608784198761, "learning_rate": 4.4485124383861485e-05, "loss": 0.2, "step": 16481 }, { "epoch": 0.293974958085114, "grad_norm": 0.24180968105793, "learning_rate": 4.448414916225008e-05, "loss": 0.1519, "step": 16482 }, { "epoch": 0.29399279420682767, "grad_norm": 0.2222093939781189, "learning_rate": 4.448317386511137e-05, "loss": 0.1588, "step": 16483 }, { "epoch": 0.29401063032854136, "grad_norm": 0.3812173902988434, "learning_rate": 4.448219849244916e-05, "loss": 0.223, "step": 16484 }, { "epoch": 0.29402846645025504, "grad_norm": 0.2957015037536621, "learning_rate": 4.4481223044267216e-05, "loss": 0.1788, "step": 16485 }, { "epoch": 0.29404630257196873, "grad_norm": 0.2898963391780853, "learning_rate": 4.448024752056933e-05, "loss": 0.1845, "step": 16486 }, { "epoch": 0.2940641386936824, "grad_norm": 0.21822543442249298, "learning_rate": 4.4479271921359275e-05, "loss": 0.1415, "step": 16487 }, { "epoch": 0.29408197481539616, "grad_norm": 0.26728910207748413, "learning_rate": 4.447829624664083e-05, "loss": 0.2164, "step": 16488 }, { "epoch": 0.29409981093710985, "grad_norm": 0.2585594356060028, "learning_rate": 4.447732049641778e-05, "loss": 0.1235, "step": 16489 }, { "epoch": 0.29411764705882354, "grad_norm": 0.26438191533088684, "learning_rate": 4.44763446706939e-05, "loss": 0.1874, "step": 16490 }, { "epoch": 0.2941354831805372, "grad_norm": 0.2708672285079956, "learning_rate": 4.4475368769473e-05, "loss": 0.236, "step": 16491 }, { "epoch": 0.2941533193022509, "grad_norm": 0.16733378171920776, "learning_rate": 4.447439279275884e-05, "loss": 0.1544, "step": 16492 }, { "epoch": 0.2941711554239646, "grad_norm": 0.21120314300060272, "learning_rate": 4.44734167405552e-05, "loss": 0.191, "step": 16493 }, { "epoch": 0.2941889915456783, "grad_norm": 0.3308258354663849, "learning_rate": 4.4472440612865865e-05, "loss": 0.2124, "step": 16494 }, { "epoch": 0.294206827667392, "grad_norm": 0.264169305562973, "learning_rate": 4.4471464409694635e-05, "loss": 0.1778, "step": 16495 }, { "epoch": 0.2942246637891057, "grad_norm": 0.22388221323490143, "learning_rate": 4.447048813104528e-05, "loss": 0.2072, "step": 16496 }, { "epoch": 0.2942424999108194, "grad_norm": 0.2517063617706299, "learning_rate": 4.446951177692159e-05, "loss": 0.2238, "step": 16497 }, { "epoch": 0.2942603360325331, "grad_norm": 0.2153603434562683, "learning_rate": 4.446853534732735e-05, "loss": 0.1738, "step": 16498 }, { "epoch": 0.2942781721542468, "grad_norm": 0.241929829120636, "learning_rate": 4.446755884226635e-05, "loss": 0.1987, "step": 16499 }, { "epoch": 0.2942960082759605, "grad_norm": 0.2276146560907364, "learning_rate": 4.446658226174235e-05, "loss": 0.1773, "step": 16500 }, { "epoch": 0.29431384439767416, "grad_norm": 0.2956530451774597, "learning_rate": 4.446560560575917e-05, "loss": 0.1881, "step": 16501 }, { "epoch": 0.29433168051938785, "grad_norm": 0.36412230134010315, "learning_rate": 4.446462887432056e-05, "loss": 0.1959, "step": 16502 }, { "epoch": 0.29434951664110154, "grad_norm": 0.20981663465499878, "learning_rate": 4.4463652067430336e-05, "loss": 0.1561, "step": 16503 }, { "epoch": 0.2943673527628152, "grad_norm": 0.2986816465854645, "learning_rate": 4.446267518509228e-05, "loss": 0.1741, "step": 16504 }, { "epoch": 0.29438518888452897, "grad_norm": 0.2284894436597824, "learning_rate": 4.4461698227310164e-05, "loss": 0.1851, "step": 16505 }, { "epoch": 0.29440302500624266, "grad_norm": 0.2480347603559494, "learning_rate": 4.4460721194087785e-05, "loss": 0.1842, "step": 16506 }, { "epoch": 0.29442086112795635, "grad_norm": 0.3044646978378296, "learning_rate": 4.4459744085428935e-05, "loss": 0.1632, "step": 16507 }, { "epoch": 0.29443869724967003, "grad_norm": 0.30180442333221436, "learning_rate": 4.445876690133739e-05, "loss": 0.2162, "step": 16508 }, { "epoch": 0.2944565333713837, "grad_norm": 0.29936933517456055, "learning_rate": 4.445778964181695e-05, "loss": 0.1624, "step": 16509 }, { "epoch": 0.2944743694930974, "grad_norm": 0.26864272356033325, "learning_rate": 4.445681230687139e-05, "loss": 0.1276, "step": 16510 }, { "epoch": 0.2944922056148111, "grad_norm": 0.2878575026988983, "learning_rate": 4.445583489650451e-05, "loss": 0.1557, "step": 16511 }, { "epoch": 0.2945100417365248, "grad_norm": 0.3502851128578186, "learning_rate": 4.44548574107201e-05, "loss": 0.1874, "step": 16512 }, { "epoch": 0.29452787785823853, "grad_norm": 0.22545097768306732, "learning_rate": 4.445387984952193e-05, "loss": 0.1757, "step": 16513 }, { "epoch": 0.2945457139799522, "grad_norm": 0.20361314713954926, "learning_rate": 4.445290221291381e-05, "loss": 0.1689, "step": 16514 }, { "epoch": 0.2945635501016659, "grad_norm": 0.2162192314863205, "learning_rate": 4.445192450089952e-05, "loss": 0.183, "step": 16515 }, { "epoch": 0.2945813862233796, "grad_norm": 0.2695448696613312, "learning_rate": 4.445094671348285e-05, "loss": 0.1736, "step": 16516 }, { "epoch": 0.2945992223450933, "grad_norm": 0.32857251167297363, "learning_rate": 4.4449968850667595e-05, "loss": 0.1821, "step": 16517 }, { "epoch": 0.29461705846680697, "grad_norm": 0.36193594336509705, "learning_rate": 4.444899091245754e-05, "loss": 0.2042, "step": 16518 }, { "epoch": 0.29463489458852066, "grad_norm": 0.24590055644512177, "learning_rate": 4.444801289885648e-05, "loss": 0.1672, "step": 16519 }, { "epoch": 0.29465273071023435, "grad_norm": 0.29725736379623413, "learning_rate": 4.444703480986821e-05, "loss": 0.2214, "step": 16520 }, { "epoch": 0.2946705668319481, "grad_norm": 0.3494088053703308, "learning_rate": 4.4446056645496515e-05, "loss": 0.1908, "step": 16521 }, { "epoch": 0.2946884029536618, "grad_norm": 0.22640679776668549, "learning_rate": 4.4445078405745186e-05, "loss": 0.1945, "step": 16522 }, { "epoch": 0.29470623907537546, "grad_norm": 0.1627998650074005, "learning_rate": 4.4444100090618014e-05, "loss": 0.1245, "step": 16523 }, { "epoch": 0.29472407519708915, "grad_norm": 0.29106754064559937, "learning_rate": 4.4443121700118795e-05, "loss": 0.1671, "step": 16524 }, { "epoch": 0.29474191131880284, "grad_norm": 0.38282686471939087, "learning_rate": 4.444214323425133e-05, "loss": 0.2336, "step": 16525 }, { "epoch": 0.29475974744051653, "grad_norm": 0.22827591001987457, "learning_rate": 4.4441164693019385e-05, "loss": 0.1762, "step": 16526 }, { "epoch": 0.2947775835622302, "grad_norm": 0.3506973087787628, "learning_rate": 4.444018607642679e-05, "loss": 0.16, "step": 16527 }, { "epoch": 0.2947954196839439, "grad_norm": 0.2809859812259674, "learning_rate": 4.4439207384477313e-05, "loss": 0.1977, "step": 16528 }, { "epoch": 0.2948132558056576, "grad_norm": 0.21978464722633362, "learning_rate": 4.443822861717475e-05, "loss": 0.1747, "step": 16529 }, { "epoch": 0.29483109192737134, "grad_norm": 0.27921798825263977, "learning_rate": 4.44372497745229e-05, "loss": 0.1817, "step": 16530 }, { "epoch": 0.294848928049085, "grad_norm": 0.23094773292541504, "learning_rate": 4.4436270856525555e-05, "loss": 0.1646, "step": 16531 }, { "epoch": 0.2948667641707987, "grad_norm": 0.31329360604286194, "learning_rate": 4.443529186318651e-05, "loss": 0.154, "step": 16532 }, { "epoch": 0.2948846002925124, "grad_norm": 0.3997662663459778, "learning_rate": 4.443431279450957e-05, "loss": 0.2434, "step": 16533 }, { "epoch": 0.2949024364142261, "grad_norm": 0.287011981010437, "learning_rate": 4.443333365049851e-05, "loss": 0.1735, "step": 16534 }, { "epoch": 0.2949202725359398, "grad_norm": 0.24818311631679535, "learning_rate": 4.443235443115715e-05, "loss": 0.1732, "step": 16535 }, { "epoch": 0.29493810865765346, "grad_norm": 0.256538450717926, "learning_rate": 4.4431375136489264e-05, "loss": 0.1892, "step": 16536 }, { "epoch": 0.29495594477936715, "grad_norm": 0.31276530027389526, "learning_rate": 4.4430395766498654e-05, "loss": 0.1708, "step": 16537 }, { "epoch": 0.2949737809010809, "grad_norm": 0.3159414529800415, "learning_rate": 4.442941632118912e-05, "loss": 0.1441, "step": 16538 }, { "epoch": 0.2949916170227946, "grad_norm": 0.21607443690299988, "learning_rate": 4.4428436800564464e-05, "loss": 0.1666, "step": 16539 }, { "epoch": 0.29500945314450827, "grad_norm": 0.287865549325943, "learning_rate": 4.442745720462847e-05, "loss": 0.138, "step": 16540 }, { "epoch": 0.29502728926622196, "grad_norm": 0.2994736433029175, "learning_rate": 4.4426477533384944e-05, "loss": 0.2064, "step": 16541 }, { "epoch": 0.29504512538793565, "grad_norm": 0.2550939917564392, "learning_rate": 4.4425497786837685e-05, "loss": 0.1722, "step": 16542 }, { "epoch": 0.29506296150964934, "grad_norm": 0.26640585064888, "learning_rate": 4.4424517964990486e-05, "loss": 0.1566, "step": 16543 }, { "epoch": 0.295080797631363, "grad_norm": 0.21472834050655365, "learning_rate": 4.4423538067847146e-05, "loss": 0.1894, "step": 16544 }, { "epoch": 0.2950986337530767, "grad_norm": 0.2908587157726288, "learning_rate": 4.442255809541146e-05, "loss": 0.1816, "step": 16545 }, { "epoch": 0.2951164698747904, "grad_norm": 0.3289947807788849, "learning_rate": 4.442157804768723e-05, "loss": 0.2153, "step": 16546 }, { "epoch": 0.29513430599650414, "grad_norm": 0.24969275295734406, "learning_rate": 4.442059792467827e-05, "loss": 0.1676, "step": 16547 }, { "epoch": 0.29515214211821783, "grad_norm": 0.23231945931911469, "learning_rate": 4.441961772638834e-05, "loss": 0.1153, "step": 16548 }, { "epoch": 0.2951699782399315, "grad_norm": 0.2817572057247162, "learning_rate": 4.441863745282128e-05, "loss": 0.1612, "step": 16549 }, { "epoch": 0.2951878143616452, "grad_norm": 0.29330912232398987, "learning_rate": 4.441765710398087e-05, "loss": 0.1628, "step": 16550 }, { "epoch": 0.2952056504833589, "grad_norm": 0.26147744059562683, "learning_rate": 4.441667667987092e-05, "loss": 0.1777, "step": 16551 }, { "epoch": 0.2952234866050726, "grad_norm": 0.2868327498435974, "learning_rate": 4.4415696180495225e-05, "loss": 0.1817, "step": 16552 }, { "epoch": 0.29524132272678627, "grad_norm": 0.29684051871299744, "learning_rate": 4.441471560585758e-05, "loss": 0.2259, "step": 16553 }, { "epoch": 0.29525915884849996, "grad_norm": 0.23250903189182281, "learning_rate": 4.4413734955961795e-05, "loss": 0.1579, "step": 16554 }, { "epoch": 0.2952769949702137, "grad_norm": 0.29615867137908936, "learning_rate": 4.441275423081166e-05, "loss": 0.173, "step": 16555 }, { "epoch": 0.2952948310919274, "grad_norm": 0.20359086990356445, "learning_rate": 4.4411773430410997e-05, "loss": 0.1345, "step": 16556 }, { "epoch": 0.2953126672136411, "grad_norm": 0.19083966314792633, "learning_rate": 4.441079255476359e-05, "loss": 0.1509, "step": 16557 }, { "epoch": 0.29533050333535477, "grad_norm": 0.36032634973526, "learning_rate": 4.440981160387324e-05, "loss": 0.1599, "step": 16558 }, { "epoch": 0.29534833945706845, "grad_norm": 0.3039087951183319, "learning_rate": 4.440883057774377e-05, "loss": 0.1573, "step": 16559 }, { "epoch": 0.29536617557878214, "grad_norm": 0.4068549573421478, "learning_rate": 4.440784947637896e-05, "loss": 0.206, "step": 16560 }, { "epoch": 0.29538401170049583, "grad_norm": 0.21798011660575867, "learning_rate": 4.440686829978262e-05, "loss": 0.1665, "step": 16561 }, { "epoch": 0.2954018478222095, "grad_norm": 0.19881121814250946, "learning_rate": 4.4405887047958564e-05, "loss": 0.1605, "step": 16562 }, { "epoch": 0.2954196839439232, "grad_norm": 0.25778138637542725, "learning_rate": 4.440490572091058e-05, "loss": 0.1866, "step": 16563 }, { "epoch": 0.29543752006563695, "grad_norm": 0.2487366944551468, "learning_rate": 4.440392431864248e-05, "loss": 0.1643, "step": 16564 }, { "epoch": 0.29545535618735064, "grad_norm": 0.25507956743240356, "learning_rate": 4.4402942841158065e-05, "loss": 0.1417, "step": 16565 }, { "epoch": 0.2954731923090643, "grad_norm": 0.23990201950073242, "learning_rate": 4.4401961288461156e-05, "loss": 0.1547, "step": 16566 }, { "epoch": 0.295491028430778, "grad_norm": 0.2873901128768921, "learning_rate": 4.4400979660555533e-05, "loss": 0.1344, "step": 16567 }, { "epoch": 0.2955088645524917, "grad_norm": 0.2868228554725647, "learning_rate": 4.4399997957445014e-05, "loss": 0.214, "step": 16568 }, { "epoch": 0.2955267006742054, "grad_norm": 0.27751776576042175, "learning_rate": 4.4399016179133404e-05, "loss": 0.183, "step": 16569 }, { "epoch": 0.2955445367959191, "grad_norm": 0.2701549828052521, "learning_rate": 4.43980343256245e-05, "loss": 0.2012, "step": 16570 }, { "epoch": 0.29556237291763277, "grad_norm": 0.302306205034256, "learning_rate": 4.439705239692212e-05, "loss": 0.1199, "step": 16571 }, { "epoch": 0.2955802090393465, "grad_norm": 0.2979692220687866, "learning_rate": 4.439607039303006e-05, "loss": 0.1567, "step": 16572 }, { "epoch": 0.2955980451610602, "grad_norm": 0.34140437841415405, "learning_rate": 4.439508831395214e-05, "loss": 0.2476, "step": 16573 }, { "epoch": 0.2956158812827739, "grad_norm": 0.35357666015625, "learning_rate": 4.439410615969216e-05, "loss": 0.1466, "step": 16574 }, { "epoch": 0.2956337174044876, "grad_norm": 0.25594770908355713, "learning_rate": 4.4393123930253924e-05, "loss": 0.1402, "step": 16575 }, { "epoch": 0.29565155352620126, "grad_norm": 0.2982681095600128, "learning_rate": 4.439214162564124e-05, "loss": 0.1896, "step": 16576 }, { "epoch": 0.29566938964791495, "grad_norm": 0.281310111284256, "learning_rate": 4.439115924585792e-05, "loss": 0.1501, "step": 16577 }, { "epoch": 0.29568722576962864, "grad_norm": 0.30204230546951294, "learning_rate": 4.439017679090775e-05, "loss": 0.171, "step": 16578 }, { "epoch": 0.2957050618913423, "grad_norm": 0.20857369899749756, "learning_rate": 4.438919426079458e-05, "loss": 0.2046, "step": 16579 }, { "epoch": 0.29572289801305607, "grad_norm": 0.17907004058361053, "learning_rate": 4.438821165552219e-05, "loss": 0.1425, "step": 16580 }, { "epoch": 0.29574073413476976, "grad_norm": 0.20068205893039703, "learning_rate": 4.43872289750944e-05, "loss": 0.1806, "step": 16581 }, { "epoch": 0.29575857025648344, "grad_norm": 0.281717985868454, "learning_rate": 4.4386246219515e-05, "loss": 0.1969, "step": 16582 }, { "epoch": 0.29577640637819713, "grad_norm": 0.26462844014167786, "learning_rate": 4.438526338878783e-05, "loss": 0.1409, "step": 16583 }, { "epoch": 0.2957942424999108, "grad_norm": 0.2483363151550293, "learning_rate": 4.438428048291667e-05, "loss": 0.2049, "step": 16584 }, { "epoch": 0.2958120786216245, "grad_norm": 0.19238242506980896, "learning_rate": 4.438329750190535e-05, "loss": 0.1468, "step": 16585 }, { "epoch": 0.2958299147433382, "grad_norm": 0.1948329210281372, "learning_rate": 4.438231444575768e-05, "loss": 0.1252, "step": 16586 }, { "epoch": 0.2958477508650519, "grad_norm": 0.27889081835746765, "learning_rate": 4.438133131447746e-05, "loss": 0.1692, "step": 16587 }, { "epoch": 0.29586558698676557, "grad_norm": 0.27356308698654175, "learning_rate": 4.4380348108068506e-05, "loss": 0.149, "step": 16588 }, { "epoch": 0.2958834231084793, "grad_norm": 0.33214470744132996, "learning_rate": 4.437936482653463e-05, "loss": 0.1724, "step": 16589 }, { "epoch": 0.295901259230193, "grad_norm": 0.2601211369037628, "learning_rate": 4.437838146987964e-05, "loss": 0.1972, "step": 16590 }, { "epoch": 0.2959190953519067, "grad_norm": 0.19905738532543182, "learning_rate": 4.437739803810735e-05, "loss": 0.1441, "step": 16591 }, { "epoch": 0.2959369314736204, "grad_norm": 0.38575562834739685, "learning_rate": 4.4376414531221574e-05, "loss": 0.2653, "step": 16592 }, { "epoch": 0.29595476759533407, "grad_norm": 0.279782235622406, "learning_rate": 4.4375430949226114e-05, "loss": 0.1601, "step": 16593 }, { "epoch": 0.29597260371704776, "grad_norm": 0.27133673429489136, "learning_rate": 4.4374447292124806e-05, "loss": 0.1995, "step": 16594 }, { "epoch": 0.29599043983876144, "grad_norm": 0.26143351197242737, "learning_rate": 4.437346355992144e-05, "loss": 0.202, "step": 16595 }, { "epoch": 0.29600827596047513, "grad_norm": 0.2220582515001297, "learning_rate": 4.437247975261984e-05, "loss": 0.1458, "step": 16596 }, { "epoch": 0.2960261120821889, "grad_norm": 0.2788536846637726, "learning_rate": 4.437149587022382e-05, "loss": 0.2483, "step": 16597 }, { "epoch": 0.29604394820390256, "grad_norm": 0.261518657207489, "learning_rate": 4.4370511912737186e-05, "loss": 0.1945, "step": 16598 }, { "epoch": 0.29606178432561625, "grad_norm": 0.24918675422668457, "learning_rate": 4.436952788016376e-05, "loss": 0.194, "step": 16599 }, { "epoch": 0.29607962044732994, "grad_norm": 0.2793594300746918, "learning_rate": 4.4368543772507355e-05, "loss": 0.1964, "step": 16600 }, { "epoch": 0.2960974565690436, "grad_norm": 0.2500112056732178, "learning_rate": 4.436755958977179e-05, "loss": 0.1993, "step": 16601 }, { "epoch": 0.2961152926907573, "grad_norm": 0.3161695897579193, "learning_rate": 4.436657533196087e-05, "loss": 0.1831, "step": 16602 }, { "epoch": 0.296133128812471, "grad_norm": 0.23257575929164886, "learning_rate": 4.436559099907841e-05, "loss": 0.1338, "step": 16603 }, { "epoch": 0.2961509649341847, "grad_norm": 0.28345972299575806, "learning_rate": 4.4364606591128236e-05, "loss": 0.2257, "step": 16604 }, { "epoch": 0.2961688010558984, "grad_norm": 0.28399041295051575, "learning_rate": 4.436362210811416e-05, "loss": 0.1781, "step": 16605 }, { "epoch": 0.2961866371776121, "grad_norm": 0.25728896260261536, "learning_rate": 4.436263755003999e-05, "loss": 0.143, "step": 16606 }, { "epoch": 0.2962044732993258, "grad_norm": 0.27097266912460327, "learning_rate": 4.4361652916909555e-05, "loss": 0.1712, "step": 16607 }, { "epoch": 0.2962223094210395, "grad_norm": 0.26441726088523865, "learning_rate": 4.436066820872666e-05, "loss": 0.2159, "step": 16608 }, { "epoch": 0.2962401455427532, "grad_norm": 0.25861290097236633, "learning_rate": 4.435968342549514e-05, "loss": 0.1559, "step": 16609 }, { "epoch": 0.2962579816644669, "grad_norm": 0.25006774067878723, "learning_rate": 4.4358698567218785e-05, "loss": 0.2064, "step": 16610 }, { "epoch": 0.29627581778618056, "grad_norm": 0.19449922442436218, "learning_rate": 4.435771363390143e-05, "loss": 0.1565, "step": 16611 }, { "epoch": 0.29629365390789425, "grad_norm": 0.22116151452064514, "learning_rate": 4.43567286255469e-05, "loss": 0.1754, "step": 16612 }, { "epoch": 0.29631149002960794, "grad_norm": 0.33348163962364197, "learning_rate": 4.4355743542159e-05, "loss": 0.1679, "step": 16613 }, { "epoch": 0.2963293261513217, "grad_norm": 0.2816435396671295, "learning_rate": 4.435475838374156e-05, "loss": 0.1403, "step": 16614 }, { "epoch": 0.29634716227303537, "grad_norm": 0.345577210187912, "learning_rate": 4.435377315029838e-05, "loss": 0.1647, "step": 16615 }, { "epoch": 0.29636499839474906, "grad_norm": 0.2473231703042984, "learning_rate": 4.43527878418333e-05, "loss": 0.1687, "step": 16616 }, { "epoch": 0.29638283451646275, "grad_norm": 0.28243395686149597, "learning_rate": 4.4351802458350124e-05, "loss": 0.2179, "step": 16617 }, { "epoch": 0.29640067063817643, "grad_norm": 0.3431648910045624, "learning_rate": 4.435081699985268e-05, "loss": 0.1982, "step": 16618 }, { "epoch": 0.2964185067598901, "grad_norm": 0.3261888325214386, "learning_rate": 4.434983146634478e-05, "loss": 0.1631, "step": 16619 }, { "epoch": 0.2964363428816038, "grad_norm": 0.31556057929992676, "learning_rate": 4.4348845857830254e-05, "loss": 0.1522, "step": 16620 }, { "epoch": 0.2964541790033175, "grad_norm": 0.25531819462776184, "learning_rate": 4.434786017431293e-05, "loss": 0.125, "step": 16621 }, { "epoch": 0.29647201512503124, "grad_norm": 0.2754364609718323, "learning_rate": 4.4346874415796605e-05, "loss": 0.154, "step": 16622 }, { "epoch": 0.29648985124674493, "grad_norm": 0.26889702677726746, "learning_rate": 4.4345888582285114e-05, "loss": 0.1686, "step": 16623 }, { "epoch": 0.2965076873684586, "grad_norm": 0.19343748688697815, "learning_rate": 4.434490267378227e-05, "loss": 0.1785, "step": 16624 }, { "epoch": 0.2965255234901723, "grad_norm": 0.2791062295436859, "learning_rate": 4.434391669029192e-05, "loss": 0.1993, "step": 16625 }, { "epoch": 0.296543359611886, "grad_norm": 0.27494120597839355, "learning_rate": 4.4342930631817854e-05, "loss": 0.1642, "step": 16626 }, { "epoch": 0.2965611957335997, "grad_norm": 0.269661545753479, "learning_rate": 4.4341944498363907e-05, "loss": 0.1818, "step": 16627 }, { "epoch": 0.29657903185531337, "grad_norm": 0.2954808473587036, "learning_rate": 4.434095828993391e-05, "loss": 0.159, "step": 16628 }, { "epoch": 0.29659686797702706, "grad_norm": 0.25478166341781616, "learning_rate": 4.433997200653168e-05, "loss": 0.1598, "step": 16629 }, { "epoch": 0.29661470409874074, "grad_norm": 0.42966318130493164, "learning_rate": 4.433898564816103e-05, "loss": 0.1727, "step": 16630 }, { "epoch": 0.2966325402204545, "grad_norm": 0.2271733582019806, "learning_rate": 4.4337999214825796e-05, "loss": 0.1625, "step": 16631 }, { "epoch": 0.2966503763421682, "grad_norm": 0.32869628071784973, "learning_rate": 4.4337012706529804e-05, "loss": 0.2204, "step": 16632 }, { "epoch": 0.29666821246388186, "grad_norm": 0.38243845105171204, "learning_rate": 4.4336026123276865e-05, "loss": 0.1501, "step": 16633 }, { "epoch": 0.29668604858559555, "grad_norm": 0.4442615211009979, "learning_rate": 4.433503946507081e-05, "loss": 0.1714, "step": 16634 }, { "epoch": 0.29670388470730924, "grad_norm": 0.25172099471092224, "learning_rate": 4.4334052731915466e-05, "loss": 0.1538, "step": 16635 }, { "epoch": 0.29672172082902293, "grad_norm": 0.28673067688941956, "learning_rate": 4.4333065923814656e-05, "loss": 0.199, "step": 16636 }, { "epoch": 0.2967395569507366, "grad_norm": 0.2143595665693283, "learning_rate": 4.43320790407722e-05, "loss": 0.1428, "step": 16637 }, { "epoch": 0.2967573930724503, "grad_norm": 0.29313308000564575, "learning_rate": 4.433109208279194e-05, "loss": 0.1875, "step": 16638 }, { "epoch": 0.29677522919416405, "grad_norm": 0.28862738609313965, "learning_rate": 4.433010504987768e-05, "loss": 0.1761, "step": 16639 }, { "epoch": 0.29679306531587774, "grad_norm": 0.29017379879951477, "learning_rate": 4.432911794203326e-05, "loss": 0.206, "step": 16640 }, { "epoch": 0.2968109014375914, "grad_norm": 0.18667002022266388, "learning_rate": 4.43281307592625e-05, "loss": 0.136, "step": 16641 }, { "epoch": 0.2968287375593051, "grad_norm": 0.3088679611682892, "learning_rate": 4.4327143501569234e-05, "loss": 0.1462, "step": 16642 }, { "epoch": 0.2968465736810188, "grad_norm": 0.2738398313522339, "learning_rate": 4.4326156168957285e-05, "loss": 0.1648, "step": 16643 }, { "epoch": 0.2968644098027325, "grad_norm": 0.394050657749176, "learning_rate": 4.4325168761430476e-05, "loss": 0.2519, "step": 16644 }, { "epoch": 0.2968822459244462, "grad_norm": 0.26776477694511414, "learning_rate": 4.4324181278992635e-05, "loss": 0.1636, "step": 16645 }, { "epoch": 0.29690008204615986, "grad_norm": 0.22168299555778503, "learning_rate": 4.43231937216476e-05, "loss": 0.1896, "step": 16646 }, { "epoch": 0.29691791816787355, "grad_norm": 0.34589311480522156, "learning_rate": 4.432220608939919e-05, "loss": 0.2142, "step": 16647 }, { "epoch": 0.2969357542895873, "grad_norm": 0.2140132635831833, "learning_rate": 4.432121838225123e-05, "loss": 0.1885, "step": 16648 }, { "epoch": 0.296953590411301, "grad_norm": 0.27988100051879883, "learning_rate": 4.4320230600207565e-05, "loss": 0.1425, "step": 16649 }, { "epoch": 0.29697142653301467, "grad_norm": 0.29920223355293274, "learning_rate": 4.4319242743272e-05, "loss": 0.2004, "step": 16650 }, { "epoch": 0.29698926265472836, "grad_norm": 0.33628174662590027, "learning_rate": 4.431825481144839e-05, "loss": 0.1209, "step": 16651 }, { "epoch": 0.29700709877644205, "grad_norm": 0.30754542350769043, "learning_rate": 4.431726680474054e-05, "loss": 0.1978, "step": 16652 }, { "epoch": 0.29702493489815573, "grad_norm": 0.2443932145833969, "learning_rate": 4.4316278723152306e-05, "loss": 0.1013, "step": 16653 }, { "epoch": 0.2970427710198694, "grad_norm": 0.21593210101127625, "learning_rate": 4.4315290566687497e-05, "loss": 0.1627, "step": 16654 }, { "epoch": 0.2970606071415831, "grad_norm": 0.287570983171463, "learning_rate": 4.431430233534995e-05, "loss": 0.1532, "step": 16655 }, { "epoch": 0.29707844326329685, "grad_norm": 0.24677424132823944, "learning_rate": 4.4313314029143496e-05, "loss": 0.1573, "step": 16656 }, { "epoch": 0.29709627938501054, "grad_norm": 0.21025590598583221, "learning_rate": 4.431232564807197e-05, "loss": 0.1646, "step": 16657 }, { "epoch": 0.29711411550672423, "grad_norm": 0.23474650084972382, "learning_rate": 4.431133719213919e-05, "loss": 0.1729, "step": 16658 }, { "epoch": 0.2971319516284379, "grad_norm": 0.3554365038871765, "learning_rate": 4.4310348661349007e-05, "loss": 0.1624, "step": 16659 }, { "epoch": 0.2971497877501516, "grad_norm": 0.3594236671924591, "learning_rate": 4.430936005570524e-05, "loss": 0.2111, "step": 16660 }, { "epoch": 0.2971676238718653, "grad_norm": 0.3476164937019348, "learning_rate": 4.430837137521172e-05, "loss": 0.1833, "step": 16661 }, { "epoch": 0.297185459993579, "grad_norm": 0.31929105520248413, "learning_rate": 4.430738261987229e-05, "loss": 0.2164, "step": 16662 }, { "epoch": 0.29720329611529267, "grad_norm": 0.1930297166109085, "learning_rate": 4.430639378969077e-05, "loss": 0.1435, "step": 16663 }, { "epoch": 0.29722113223700636, "grad_norm": 0.20953087508678436, "learning_rate": 4.430540488467101e-05, "loss": 0.1386, "step": 16664 }, { "epoch": 0.2972389683587201, "grad_norm": 0.2832101285457611, "learning_rate": 4.430441590481682e-05, "loss": 0.1871, "step": 16665 }, { "epoch": 0.2972568044804338, "grad_norm": 0.3205028474330902, "learning_rate": 4.4303426850132056e-05, "loss": 0.189, "step": 16666 }, { "epoch": 0.2972746406021475, "grad_norm": 0.27176329493522644, "learning_rate": 4.4302437720620536e-05, "loss": 0.1738, "step": 16667 }, { "epoch": 0.29729247672386117, "grad_norm": 0.3001042604446411, "learning_rate": 4.4301448516286104e-05, "loss": 0.2204, "step": 16668 }, { "epoch": 0.29731031284557485, "grad_norm": 0.24722713232040405, "learning_rate": 4.4300459237132594e-05, "loss": 0.181, "step": 16669 }, { "epoch": 0.29732814896728854, "grad_norm": 0.29141971468925476, "learning_rate": 4.429946988316383e-05, "loss": 0.1428, "step": 16670 }, { "epoch": 0.29734598508900223, "grad_norm": 0.23951655626296997, "learning_rate": 4.4298480454383664e-05, "loss": 0.1623, "step": 16671 }, { "epoch": 0.2973638212107159, "grad_norm": 0.2430630326271057, "learning_rate": 4.429749095079591e-05, "loss": 0.1554, "step": 16672 }, { "epoch": 0.29738165733242966, "grad_norm": 0.2622222900390625, "learning_rate": 4.4296501372404427e-05, "loss": 0.164, "step": 16673 }, { "epoch": 0.29739949345414335, "grad_norm": 0.23790578544139862, "learning_rate": 4.429551171921303e-05, "loss": 0.1582, "step": 16674 }, { "epoch": 0.29741732957585704, "grad_norm": 0.3748508393764496, "learning_rate": 4.429452199122557e-05, "loss": 0.1759, "step": 16675 }, { "epoch": 0.2974351656975707, "grad_norm": 0.2436283528804779, "learning_rate": 4.4293532188445884e-05, "loss": 0.2123, "step": 16676 }, { "epoch": 0.2974530018192844, "grad_norm": 0.28571587800979614, "learning_rate": 4.42925423108778e-05, "loss": 0.1597, "step": 16677 }, { "epoch": 0.2974708379409981, "grad_norm": 0.25470608472824097, "learning_rate": 4.429155235852516e-05, "loss": 0.1993, "step": 16678 }, { "epoch": 0.2974886740627118, "grad_norm": 0.21140186488628387, "learning_rate": 4.4290562331391797e-05, "loss": 0.1526, "step": 16679 }, { "epoch": 0.2975065101844255, "grad_norm": 0.2538129687309265, "learning_rate": 4.4289572229481555e-05, "loss": 0.1846, "step": 16680 }, { "epoch": 0.2975243463061392, "grad_norm": 0.28790101408958435, "learning_rate": 4.428858205279826e-05, "loss": 0.1863, "step": 16681 }, { "epoch": 0.2975421824278529, "grad_norm": 0.2624432444572449, "learning_rate": 4.428759180134577e-05, "loss": 0.1708, "step": 16682 }, { "epoch": 0.2975600185495666, "grad_norm": 0.18219736218452454, "learning_rate": 4.428660147512791e-05, "loss": 0.1936, "step": 16683 }, { "epoch": 0.2975778546712803, "grad_norm": 0.28530970215797424, "learning_rate": 4.428561107414852e-05, "loss": 0.152, "step": 16684 }, { "epoch": 0.29759569079299397, "grad_norm": 0.2296566367149353, "learning_rate": 4.428462059841143e-05, "loss": 0.1694, "step": 16685 }, { "epoch": 0.29761352691470766, "grad_norm": 0.2159237563610077, "learning_rate": 4.42836300479205e-05, "loss": 0.186, "step": 16686 }, { "epoch": 0.29763136303642135, "grad_norm": 0.2759743630886078, "learning_rate": 4.428263942267956e-05, "loss": 0.1812, "step": 16687 }, { "epoch": 0.29764919915813504, "grad_norm": 0.24413438141345978, "learning_rate": 4.4281648722692445e-05, "loss": 0.18, "step": 16688 }, { "epoch": 0.2976670352798487, "grad_norm": 0.2134544998407364, "learning_rate": 4.428065794796301e-05, "loss": 0.1829, "step": 16689 }, { "epoch": 0.29768487140156247, "grad_norm": 0.2694886326789856, "learning_rate": 4.427966709849508e-05, "loss": 0.2157, "step": 16690 }, { "epoch": 0.29770270752327616, "grad_norm": 0.23693469166755676, "learning_rate": 4.42786761742925e-05, "loss": 0.1481, "step": 16691 }, { "epoch": 0.29772054364498984, "grad_norm": 0.2122371643781662, "learning_rate": 4.427768517535911e-05, "loss": 0.1281, "step": 16692 }, { "epoch": 0.29773837976670353, "grad_norm": 0.2773756682872772, "learning_rate": 4.427669410169876e-05, "loss": 0.1849, "step": 16693 }, { "epoch": 0.2977562158884172, "grad_norm": 0.2172859162092209, "learning_rate": 4.427570295331528e-05, "loss": 0.1818, "step": 16694 }, { "epoch": 0.2977740520101309, "grad_norm": 0.2926592230796814, "learning_rate": 4.4274711730212516e-05, "loss": 0.1552, "step": 16695 }, { "epoch": 0.2977918881318446, "grad_norm": 0.22938333451747894, "learning_rate": 4.427372043239432e-05, "loss": 0.1658, "step": 16696 }, { "epoch": 0.2978097242535583, "grad_norm": 0.22436851263046265, "learning_rate": 4.427272905986452e-05, "loss": 0.1596, "step": 16697 }, { "epoch": 0.297827560375272, "grad_norm": 0.28358304500579834, "learning_rate": 4.427173761262697e-05, "loss": 0.1611, "step": 16698 }, { "epoch": 0.2978453964969857, "grad_norm": 0.2514285445213318, "learning_rate": 4.42707460906855e-05, "loss": 0.1726, "step": 16699 }, { "epoch": 0.2978632326186994, "grad_norm": 0.7242552638053894, "learning_rate": 4.426975449404397e-05, "loss": 0.1726, "step": 16700 }, { "epoch": 0.2978810687404131, "grad_norm": 0.28280892968177795, "learning_rate": 4.4268762822706223e-05, "loss": 0.1546, "step": 16701 }, { "epoch": 0.2978989048621268, "grad_norm": 0.2746674716472626, "learning_rate": 4.426777107667608e-05, "loss": 0.1929, "step": 16702 }, { "epoch": 0.29791674098384047, "grad_norm": 0.24106605350971222, "learning_rate": 4.4266779255957416e-05, "loss": 0.2159, "step": 16703 }, { "epoch": 0.29793457710555415, "grad_norm": 0.26024553179740906, "learning_rate": 4.426578736055405e-05, "loss": 0.1772, "step": 16704 }, { "epoch": 0.29795241322726784, "grad_norm": 0.31141164898872375, "learning_rate": 4.4264795390469845e-05, "loss": 0.1689, "step": 16705 }, { "epoch": 0.29797024934898153, "grad_norm": 0.4564592242240906, "learning_rate": 4.426380334570864e-05, "loss": 0.1468, "step": 16706 }, { "epoch": 0.2979880854706953, "grad_norm": 0.2418922781944275, "learning_rate": 4.426281122627427e-05, "loss": 0.1907, "step": 16707 }, { "epoch": 0.29800592159240896, "grad_norm": 0.24018177390098572, "learning_rate": 4.4261819032170605e-05, "loss": 0.2136, "step": 16708 }, { "epoch": 0.29802375771412265, "grad_norm": 1.0854966640472412, "learning_rate": 4.426082676340147e-05, "loss": 0.1976, "step": 16709 }, { "epoch": 0.29804159383583634, "grad_norm": 0.21242307126522064, "learning_rate": 4.425983441997071e-05, "loss": 0.1648, "step": 16710 }, { "epoch": 0.29805942995755, "grad_norm": 0.28974631428718567, "learning_rate": 4.425884200188219e-05, "loss": 0.2355, "step": 16711 }, { "epoch": 0.2980772660792637, "grad_norm": 0.31530699133872986, "learning_rate": 4.4257849509139743e-05, "loss": 0.1647, "step": 16712 }, { "epoch": 0.2980951022009774, "grad_norm": 0.27969062328338623, "learning_rate": 4.4256856941747215e-05, "loss": 0.2199, "step": 16713 }, { "epoch": 0.2981129383226911, "grad_norm": 0.5517001748085022, "learning_rate": 4.4255864299708465e-05, "loss": 0.174, "step": 16714 }, { "epoch": 0.29813077444440483, "grad_norm": 0.2415282428264618, "learning_rate": 4.4254871583027336e-05, "loss": 0.1789, "step": 16715 }, { "epoch": 0.2981486105661185, "grad_norm": 0.34333327412605286, "learning_rate": 4.425387879170767e-05, "loss": 0.2178, "step": 16716 }, { "epoch": 0.2981664466878322, "grad_norm": 0.2230244278907776, "learning_rate": 4.425288592575332e-05, "loss": 0.1383, "step": 16717 }, { "epoch": 0.2981842828095459, "grad_norm": 0.2376767247915268, "learning_rate": 4.425189298516813e-05, "loss": 0.1889, "step": 16718 }, { "epoch": 0.2982021189312596, "grad_norm": 0.24361565709114075, "learning_rate": 4.425089996995596e-05, "loss": 0.1784, "step": 16719 }, { "epoch": 0.2982199550529733, "grad_norm": 0.27075791358947754, "learning_rate": 4.424990688012066e-05, "loss": 0.1605, "step": 16720 }, { "epoch": 0.29823779117468696, "grad_norm": 0.21255682408809662, "learning_rate": 4.424891371566606e-05, "loss": 0.184, "step": 16721 }, { "epoch": 0.29825562729640065, "grad_norm": 0.2611226439476013, "learning_rate": 4.4247920476596025e-05, "loss": 0.1443, "step": 16722 }, { "epoch": 0.2982734634181144, "grad_norm": 0.25044888257980347, "learning_rate": 4.4246927162914406e-05, "loss": 0.177, "step": 16723 }, { "epoch": 0.2982912995398281, "grad_norm": 0.26633119583129883, "learning_rate": 4.424593377462504e-05, "loss": 0.1774, "step": 16724 }, { "epoch": 0.29830913566154177, "grad_norm": 0.39802661538124084, "learning_rate": 4.42449403117318e-05, "loss": 0.2054, "step": 16725 }, { "epoch": 0.29832697178325546, "grad_norm": 0.2094593495130539, "learning_rate": 4.424394677423851e-05, "loss": 0.1875, "step": 16726 }, { "epoch": 0.29834480790496914, "grad_norm": 0.23581956326961517, "learning_rate": 4.424295316214905e-05, "loss": 0.1857, "step": 16727 }, { "epoch": 0.29836264402668283, "grad_norm": 0.24323752522468567, "learning_rate": 4.424195947546725e-05, "loss": 0.1358, "step": 16728 }, { "epoch": 0.2983804801483965, "grad_norm": 0.2332221269607544, "learning_rate": 4.424096571419697e-05, "loss": 0.1281, "step": 16729 }, { "epoch": 0.2983983162701102, "grad_norm": 0.24572543799877167, "learning_rate": 4.4239971878342054e-05, "loss": 0.1474, "step": 16730 }, { "epoch": 0.2984161523918239, "grad_norm": 0.34758806228637695, "learning_rate": 4.423897796790637e-05, "loss": 0.2457, "step": 16731 }, { "epoch": 0.29843398851353764, "grad_norm": 0.255517840385437, "learning_rate": 4.4237983982893765e-05, "loss": 0.1393, "step": 16732 }, { "epoch": 0.29845182463525133, "grad_norm": 0.251751571893692, "learning_rate": 4.423698992330809e-05, "loss": 0.1901, "step": 16733 }, { "epoch": 0.298469660756965, "grad_norm": 0.2226800173521042, "learning_rate": 4.4235995789153195e-05, "loss": 0.1723, "step": 16734 }, { "epoch": 0.2984874968786787, "grad_norm": 0.2699423134326935, "learning_rate": 4.4235001580432934e-05, "loss": 0.1955, "step": 16735 }, { "epoch": 0.2985053330003924, "grad_norm": 0.3194892704486847, "learning_rate": 4.423400729715116e-05, "loss": 0.1612, "step": 16736 }, { "epoch": 0.2985231691221061, "grad_norm": 0.22573094069957733, "learning_rate": 4.423301293931173e-05, "loss": 0.1321, "step": 16737 }, { "epoch": 0.29854100524381977, "grad_norm": 0.2501831352710724, "learning_rate": 4.423201850691851e-05, "loss": 0.1387, "step": 16738 }, { "epoch": 0.29855884136553346, "grad_norm": 0.2960270643234253, "learning_rate": 4.423102399997534e-05, "loss": 0.1522, "step": 16739 }, { "epoch": 0.2985766774872472, "grad_norm": 0.29597237706184387, "learning_rate": 4.4230029418486075e-05, "loss": 0.1981, "step": 16740 }, { "epoch": 0.2985945136089609, "grad_norm": 0.2796803116798401, "learning_rate": 4.422903476245457e-05, "loss": 0.1678, "step": 16741 }, { "epoch": 0.2986123497306746, "grad_norm": 0.20084957778453827, "learning_rate": 4.42280400318847e-05, "loss": 0.1496, "step": 16742 }, { "epoch": 0.29863018585238826, "grad_norm": 0.31416070461273193, "learning_rate": 4.42270452267803e-05, "loss": 0.1259, "step": 16743 }, { "epoch": 0.29864802197410195, "grad_norm": 0.2169618010520935, "learning_rate": 4.422605034714522e-05, "loss": 0.1553, "step": 16744 }, { "epoch": 0.29866585809581564, "grad_norm": 0.23236408829689026, "learning_rate": 4.4225055392983336e-05, "loss": 0.1581, "step": 16745 }, { "epoch": 0.2986836942175293, "grad_norm": 0.2908601760864258, "learning_rate": 4.4224060364298496e-05, "loss": 0.141, "step": 16746 }, { "epoch": 0.298701530339243, "grad_norm": 0.28868016600608826, "learning_rate": 4.422306526109456e-05, "loss": 0.1788, "step": 16747 }, { "epoch": 0.2987193664609567, "grad_norm": 0.334247350692749, "learning_rate": 4.422207008337539e-05, "loss": 0.1952, "step": 16748 }, { "epoch": 0.29873720258267045, "grad_norm": 0.32186469435691833, "learning_rate": 4.422107483114482e-05, "loss": 0.2393, "step": 16749 }, { "epoch": 0.29875503870438413, "grad_norm": 0.27745431661605835, "learning_rate": 4.422007950440674e-05, "loss": 0.16, "step": 16750 }, { "epoch": 0.2987728748260978, "grad_norm": 0.30679234862327576, "learning_rate": 4.4219084103164996e-05, "loss": 0.2074, "step": 16751 }, { "epoch": 0.2987907109478115, "grad_norm": 0.25089892745018005, "learning_rate": 4.4218088627423437e-05, "loss": 0.2453, "step": 16752 }, { "epoch": 0.2988085470695252, "grad_norm": 0.3112054169178009, "learning_rate": 4.421709307718592e-05, "loss": 0.2154, "step": 16753 }, { "epoch": 0.2988263831912389, "grad_norm": 0.25213146209716797, "learning_rate": 4.421609745245633e-05, "loss": 0.1803, "step": 16754 }, { "epoch": 0.2988442193129526, "grad_norm": 0.2951754033565521, "learning_rate": 4.4215101753238494e-05, "loss": 0.156, "step": 16755 }, { "epoch": 0.29886205543466626, "grad_norm": 0.24440592527389526, "learning_rate": 4.4214105979536305e-05, "loss": 0.192, "step": 16756 }, { "epoch": 0.29887989155638, "grad_norm": 0.25787267088890076, "learning_rate": 4.4213110131353586e-05, "loss": 0.1851, "step": 16757 }, { "epoch": 0.2988977276780937, "grad_norm": 0.2564343512058258, "learning_rate": 4.421211420869423e-05, "loss": 0.1735, "step": 16758 }, { "epoch": 0.2989155637998074, "grad_norm": 0.4714498519897461, "learning_rate": 4.4211118211562074e-05, "loss": 0.1955, "step": 16759 }, { "epoch": 0.29893339992152107, "grad_norm": 0.24753375351428986, "learning_rate": 4.421012213996099e-05, "loss": 0.1915, "step": 16760 }, { "epoch": 0.29895123604323476, "grad_norm": 0.4700700044631958, "learning_rate": 4.4209125993894845e-05, "loss": 0.2206, "step": 16761 }, { "epoch": 0.29896907216494845, "grad_norm": 0.2219686061143875, "learning_rate": 4.420812977336748e-05, "loss": 0.1901, "step": 16762 }, { "epoch": 0.29898690828666213, "grad_norm": 0.3039661943912506, "learning_rate": 4.4207133478382785e-05, "loss": 0.1666, "step": 16763 }, { "epoch": 0.2990047444083758, "grad_norm": 0.24209414422512054, "learning_rate": 4.42061371089446e-05, "loss": 0.1197, "step": 16764 }, { "epoch": 0.2990225805300895, "grad_norm": 0.33031749725341797, "learning_rate": 4.4205140665056786e-05, "loss": 0.145, "step": 16765 }, { "epoch": 0.29904041665180325, "grad_norm": 0.18115060031414032, "learning_rate": 4.420414414672322e-05, "loss": 0.1396, "step": 16766 }, { "epoch": 0.29905825277351694, "grad_norm": 0.21951572597026825, "learning_rate": 4.420314755394776e-05, "loss": 0.1743, "step": 16767 }, { "epoch": 0.29907608889523063, "grad_norm": 0.33737912774086, "learning_rate": 4.4202150886734274e-05, "loss": 0.1397, "step": 16768 }, { "epoch": 0.2990939250169443, "grad_norm": 0.23330223560333252, "learning_rate": 4.420115414508661e-05, "loss": 0.1646, "step": 16769 }, { "epoch": 0.299111761138658, "grad_norm": 0.31918492913246155, "learning_rate": 4.420015732900864e-05, "loss": 0.1593, "step": 16770 }, { "epoch": 0.2991295972603717, "grad_norm": 0.22612160444259644, "learning_rate": 4.419916043850423e-05, "loss": 0.1884, "step": 16771 }, { "epoch": 0.2991474333820854, "grad_norm": 0.23343156278133392, "learning_rate": 4.419816347357725e-05, "loss": 0.1685, "step": 16772 }, { "epoch": 0.29916526950379907, "grad_norm": 0.2565903067588806, "learning_rate": 4.4197166434231554e-05, "loss": 0.1493, "step": 16773 }, { "epoch": 0.2991831056255128, "grad_norm": 0.2400580793619156, "learning_rate": 4.4196169320471e-05, "loss": 0.1677, "step": 16774 }, { "epoch": 0.2992009417472265, "grad_norm": 0.32715415954589844, "learning_rate": 4.4195172132299475e-05, "loss": 0.163, "step": 16775 }, { "epoch": 0.2992187778689402, "grad_norm": 0.35406193137168884, "learning_rate": 4.4194174869720826e-05, "loss": 0.1695, "step": 16776 }, { "epoch": 0.2992366139906539, "grad_norm": 0.1691502332687378, "learning_rate": 4.419317753273893e-05, "loss": 0.1366, "step": 16777 }, { "epoch": 0.29925445011236756, "grad_norm": 0.3945567011833191, "learning_rate": 4.419218012135765e-05, "loss": 0.1989, "step": 16778 }, { "epoch": 0.29927228623408125, "grad_norm": 0.2872392535209656, "learning_rate": 4.419118263558085e-05, "loss": 0.1513, "step": 16779 }, { "epoch": 0.29929012235579494, "grad_norm": 0.18428415060043335, "learning_rate": 4.41901850754124e-05, "loss": 0.1533, "step": 16780 }, { "epoch": 0.29930795847750863, "grad_norm": 0.24294564127922058, "learning_rate": 4.4189187440856165e-05, "loss": 0.1587, "step": 16781 }, { "epoch": 0.2993257945992224, "grad_norm": 0.25132066011428833, "learning_rate": 4.418818973191601e-05, "loss": 0.202, "step": 16782 }, { "epoch": 0.29934363072093606, "grad_norm": 0.2090282142162323, "learning_rate": 4.4187191948595794e-05, "loss": 0.1752, "step": 16783 }, { "epoch": 0.29936146684264975, "grad_norm": 0.3071610629558563, "learning_rate": 4.41861940908994e-05, "loss": 0.2051, "step": 16784 }, { "epoch": 0.29937930296436344, "grad_norm": 0.2720433473587036, "learning_rate": 4.41851961588307e-05, "loss": 0.1328, "step": 16785 }, { "epoch": 0.2993971390860771, "grad_norm": 0.3585069477558136, "learning_rate": 4.4184198152393544e-05, "loss": 0.1685, "step": 16786 }, { "epoch": 0.2994149752077908, "grad_norm": 0.35692310333251953, "learning_rate": 4.4183200071591815e-05, "loss": 0.1749, "step": 16787 }, { "epoch": 0.2994328113295045, "grad_norm": 0.2433602660894394, "learning_rate": 4.4182201916429375e-05, "loss": 0.1872, "step": 16788 }, { "epoch": 0.2994506474512182, "grad_norm": 0.34218353033065796, "learning_rate": 4.41812036869101e-05, "loss": 0.1715, "step": 16789 }, { "epoch": 0.2994684835729319, "grad_norm": 0.30541422963142395, "learning_rate": 4.418020538303785e-05, "loss": 0.2172, "step": 16790 }, { "epoch": 0.2994863196946456, "grad_norm": 0.32555091381073, "learning_rate": 4.41792070048165e-05, "loss": 0.1927, "step": 16791 }, { "epoch": 0.2995041558163593, "grad_norm": 0.3671061098575592, "learning_rate": 4.4178208552249915e-05, "loss": 0.2207, "step": 16792 }, { "epoch": 0.299521991938073, "grad_norm": 0.21439440548419952, "learning_rate": 4.4177210025341974e-05, "loss": 0.1656, "step": 16793 }, { "epoch": 0.2995398280597867, "grad_norm": 0.3094068765640259, "learning_rate": 4.4176211424096545e-05, "loss": 0.1635, "step": 16794 }, { "epoch": 0.29955766418150037, "grad_norm": 0.4374065101146698, "learning_rate": 4.417521274851749e-05, "loss": 0.2412, "step": 16795 }, { "epoch": 0.29957550030321406, "grad_norm": 0.1831519901752472, "learning_rate": 4.41742139986087e-05, "loss": 0.1686, "step": 16796 }, { "epoch": 0.29959333642492775, "grad_norm": 0.319697767496109, "learning_rate": 4.4173215174374025e-05, "loss": 0.1702, "step": 16797 }, { "epoch": 0.29961117254664144, "grad_norm": 0.2710077166557312, "learning_rate": 4.417221627581735e-05, "loss": 0.184, "step": 16798 }, { "epoch": 0.2996290086683552, "grad_norm": 0.2965708374977112, "learning_rate": 4.4171217302942534e-05, "loss": 0.181, "step": 16799 }, { "epoch": 0.29964684479006887, "grad_norm": 0.2833975851535797, "learning_rate": 4.417021825575347e-05, "loss": 0.1589, "step": 16800 }, { "epoch": 0.29966468091178255, "grad_norm": 0.2365575134754181, "learning_rate": 4.416921913425401e-05, "loss": 0.1444, "step": 16801 }, { "epoch": 0.29968251703349624, "grad_norm": 0.29016876220703125, "learning_rate": 4.416821993844804e-05, "loss": 0.2005, "step": 16802 }, { "epoch": 0.29970035315520993, "grad_norm": 0.295404851436615, "learning_rate": 4.416722066833943e-05, "loss": 0.1074, "step": 16803 }, { "epoch": 0.2997181892769236, "grad_norm": 0.24563342332839966, "learning_rate": 4.4166221323932045e-05, "loss": 0.1864, "step": 16804 }, { "epoch": 0.2997360253986373, "grad_norm": 0.1913757175207138, "learning_rate": 4.4165221905229775e-05, "loss": 0.1764, "step": 16805 }, { "epoch": 0.299753861520351, "grad_norm": 0.30059218406677246, "learning_rate": 4.416422241223648e-05, "loss": 0.1176, "step": 16806 }, { "epoch": 0.2997716976420647, "grad_norm": 0.23344455659389496, "learning_rate": 4.416322284495604e-05, "loss": 0.177, "step": 16807 }, { "epoch": 0.2997895337637784, "grad_norm": 0.31662899255752563, "learning_rate": 4.416222320339234e-05, "loss": 0.1532, "step": 16808 }, { "epoch": 0.2998073698854921, "grad_norm": 0.2756274342536926, "learning_rate": 4.416122348754923e-05, "loss": 0.1861, "step": 16809 }, { "epoch": 0.2998252060072058, "grad_norm": 0.25212037563323975, "learning_rate": 4.416022369743061e-05, "loss": 0.1799, "step": 16810 }, { "epoch": 0.2998430421289195, "grad_norm": 0.21718133985996246, "learning_rate": 4.415922383304034e-05, "loss": 0.1476, "step": 16811 }, { "epoch": 0.2998608782506332, "grad_norm": 0.3229748606681824, "learning_rate": 4.41582238943823e-05, "loss": 0.1899, "step": 16812 }, { "epoch": 0.29987871437234687, "grad_norm": 0.26731520891189575, "learning_rate": 4.415722388146037e-05, "loss": 0.1704, "step": 16813 }, { "epoch": 0.29989655049406055, "grad_norm": 0.27418631315231323, "learning_rate": 4.4156223794278426e-05, "loss": 0.1823, "step": 16814 }, { "epoch": 0.29991438661577424, "grad_norm": 0.36438408493995667, "learning_rate": 4.4155223632840334e-05, "loss": 0.1565, "step": 16815 }, { "epoch": 0.299932222737488, "grad_norm": 0.40100619196891785, "learning_rate": 4.415422339714999e-05, "loss": 0.2397, "step": 16816 }, { "epoch": 0.2999500588592017, "grad_norm": 0.26747944951057434, "learning_rate": 4.4153223087211257e-05, "loss": 0.1773, "step": 16817 }, { "epoch": 0.29996789498091536, "grad_norm": 0.21279700100421906, "learning_rate": 4.415222270302801e-05, "loss": 0.1777, "step": 16818 }, { "epoch": 0.29998573110262905, "grad_norm": 0.2474087029695511, "learning_rate": 4.415122224460414e-05, "loss": 0.1784, "step": 16819 }, { "epoch": 0.30000356722434274, "grad_norm": 0.33131587505340576, "learning_rate": 4.415022171194351e-05, "loss": 0.1669, "step": 16820 }, { "epoch": 0.3000214033460564, "grad_norm": 0.3297050893306732, "learning_rate": 4.414922110505001e-05, "loss": 0.2364, "step": 16821 }, { "epoch": 0.3000392394677701, "grad_norm": 0.24292252957820892, "learning_rate": 4.414822042392752e-05, "loss": 0.1897, "step": 16822 }, { "epoch": 0.3000570755894838, "grad_norm": 0.22840245068073273, "learning_rate": 4.414721966857991e-05, "loss": 0.176, "step": 16823 }, { "epoch": 0.3000749117111975, "grad_norm": 0.33364999294281006, "learning_rate": 4.414621883901106e-05, "loss": 0.163, "step": 16824 }, { "epoch": 0.30009274783291123, "grad_norm": 0.2814142405986786, "learning_rate": 4.414521793522486e-05, "loss": 0.2098, "step": 16825 }, { "epoch": 0.3001105839546249, "grad_norm": 0.293827623128891, "learning_rate": 4.4144216957225185e-05, "loss": 0.223, "step": 16826 }, { "epoch": 0.3001284200763386, "grad_norm": 0.22876468300819397, "learning_rate": 4.41432159050159e-05, "loss": 0.2196, "step": 16827 }, { "epoch": 0.3001462561980523, "grad_norm": 0.21677803993225098, "learning_rate": 4.41422147786009e-05, "loss": 0.1742, "step": 16828 }, { "epoch": 0.300164092319766, "grad_norm": 0.3324127495288849, "learning_rate": 4.414121357798408e-05, "loss": 0.1657, "step": 16829 }, { "epoch": 0.3001819284414797, "grad_norm": 0.2706266939640045, "learning_rate": 4.4140212303169295e-05, "loss": 0.1961, "step": 16830 }, { "epoch": 0.30019976456319336, "grad_norm": 0.2568056583404541, "learning_rate": 4.413921095416042e-05, "loss": 0.1777, "step": 16831 }, { "epoch": 0.30021760068490705, "grad_norm": 0.27072325348854065, "learning_rate": 4.413820953096138e-05, "loss": 0.1561, "step": 16832 }, { "epoch": 0.3002354368066208, "grad_norm": 0.24612703919410706, "learning_rate": 4.413720803357602e-05, "loss": 0.1712, "step": 16833 }, { "epoch": 0.3002532729283345, "grad_norm": 0.35841497778892517, "learning_rate": 4.413620646200822e-05, "loss": 0.1531, "step": 16834 }, { "epoch": 0.30027110905004817, "grad_norm": 0.34456342458724976, "learning_rate": 4.413520481626189e-05, "loss": 0.1665, "step": 16835 }, { "epoch": 0.30028894517176186, "grad_norm": 0.24517013132572174, "learning_rate": 4.413420309634089e-05, "loss": 0.2115, "step": 16836 }, { "epoch": 0.30030678129347554, "grad_norm": 0.24718232452869415, "learning_rate": 4.4133201302249113e-05, "loss": 0.2094, "step": 16837 }, { "epoch": 0.30032461741518923, "grad_norm": 0.2879747748374939, "learning_rate": 4.413219943399044e-05, "loss": 0.1718, "step": 16838 }, { "epoch": 0.3003424535369029, "grad_norm": 0.4600638747215271, "learning_rate": 4.413119749156875e-05, "loss": 0.2177, "step": 16839 }, { "epoch": 0.3003602896586166, "grad_norm": 0.29009267687797546, "learning_rate": 4.4130195474987934e-05, "loss": 0.1403, "step": 16840 }, { "epoch": 0.30037812578033035, "grad_norm": 0.2331254929304123, "learning_rate": 4.4129193384251874e-05, "loss": 0.1742, "step": 16841 }, { "epoch": 0.30039596190204404, "grad_norm": 0.33358296751976013, "learning_rate": 4.412819121936445e-05, "loss": 0.1584, "step": 16842 }, { "epoch": 0.3004137980237577, "grad_norm": 0.3982353210449219, "learning_rate": 4.412718898032955e-05, "loss": 0.1226, "step": 16843 }, { "epoch": 0.3004316341454714, "grad_norm": 0.2488405704498291, "learning_rate": 4.412618666715106e-05, "loss": 0.1862, "step": 16844 }, { "epoch": 0.3004494702671851, "grad_norm": 0.288212388753891, "learning_rate": 4.4125184279832864e-05, "loss": 0.2595, "step": 16845 }, { "epoch": 0.3004673063888988, "grad_norm": 0.2693442404270172, "learning_rate": 4.412418181837885e-05, "loss": 0.2192, "step": 16846 }, { "epoch": 0.3004851425106125, "grad_norm": 0.24485181272029877, "learning_rate": 4.41231792827929e-05, "loss": 0.2134, "step": 16847 }, { "epoch": 0.30050297863232617, "grad_norm": 0.33057427406311035, "learning_rate": 4.41221766730789e-05, "loss": 0.1735, "step": 16848 }, { "epoch": 0.30052081475403986, "grad_norm": 0.24340255558490753, "learning_rate": 4.412117398924074e-05, "loss": 0.2448, "step": 16849 }, { "epoch": 0.3005386508757536, "grad_norm": 0.19123055040836334, "learning_rate": 4.412017123128231e-05, "loss": 0.1649, "step": 16850 }, { "epoch": 0.3005564869974673, "grad_norm": 0.278317928314209, "learning_rate": 4.411916839920749e-05, "loss": 0.1513, "step": 16851 }, { "epoch": 0.300574323119181, "grad_norm": 0.24234922230243683, "learning_rate": 4.411816549302017e-05, "loss": 0.1623, "step": 16852 }, { "epoch": 0.30059215924089466, "grad_norm": 0.26652705669403076, "learning_rate": 4.4117162512724236e-05, "loss": 0.1527, "step": 16853 }, { "epoch": 0.30060999536260835, "grad_norm": 0.27909377217292786, "learning_rate": 4.411615945832358e-05, "loss": 0.1931, "step": 16854 }, { "epoch": 0.30062783148432204, "grad_norm": 0.2569110691547394, "learning_rate": 4.411515632982208e-05, "loss": 0.1482, "step": 16855 }, { "epoch": 0.3006456676060357, "grad_norm": 0.32553431391716003, "learning_rate": 4.411415312722364e-05, "loss": 0.194, "step": 16856 }, { "epoch": 0.3006635037277494, "grad_norm": 0.4370788633823395, "learning_rate": 4.411314985053214e-05, "loss": 0.2551, "step": 16857 }, { "epoch": 0.30068133984946316, "grad_norm": 0.23892571032047272, "learning_rate": 4.4112146499751465e-05, "loss": 0.2054, "step": 16858 }, { "epoch": 0.30069917597117685, "grad_norm": 0.20712657272815704, "learning_rate": 4.411114307488551e-05, "loss": 0.1528, "step": 16859 }, { "epoch": 0.30071701209289053, "grad_norm": 0.2771521806716919, "learning_rate": 4.411013957593817e-05, "loss": 0.1666, "step": 16860 }, { "epoch": 0.3007348482146042, "grad_norm": 0.2017921805381775, "learning_rate": 4.410913600291332e-05, "loss": 0.1416, "step": 16861 }, { "epoch": 0.3007526843363179, "grad_norm": 0.35346201062202454, "learning_rate": 4.4108132355814864e-05, "loss": 0.1382, "step": 16862 }, { "epoch": 0.3007705204580316, "grad_norm": 0.22023862600326538, "learning_rate": 4.410712863464668e-05, "loss": 0.1864, "step": 16863 }, { "epoch": 0.3007883565797453, "grad_norm": 0.22985489666461945, "learning_rate": 4.410612483941268e-05, "loss": 0.1269, "step": 16864 }, { "epoch": 0.300806192701459, "grad_norm": 0.4157508611679077, "learning_rate": 4.410512097011673e-05, "loss": 0.2055, "step": 16865 }, { "epoch": 0.30082402882317266, "grad_norm": 0.24915878474712372, "learning_rate": 4.4104117026762734e-05, "loss": 0.1615, "step": 16866 }, { "epoch": 0.3008418649448864, "grad_norm": 0.2501599192619324, "learning_rate": 4.410311300935459e-05, "loss": 0.1782, "step": 16867 }, { "epoch": 0.3008597010666001, "grad_norm": 0.26404523849487305, "learning_rate": 4.4102108917896165e-05, "loss": 0.1854, "step": 16868 }, { "epoch": 0.3008775371883138, "grad_norm": 0.2470647692680359, "learning_rate": 4.410110475239139e-05, "loss": 0.1605, "step": 16869 }, { "epoch": 0.30089537331002747, "grad_norm": 0.28237083554267883, "learning_rate": 4.4100100512844116e-05, "loss": 0.1612, "step": 16870 }, { "epoch": 0.30091320943174116, "grad_norm": 0.32255420088768005, "learning_rate": 4.409909619925827e-05, "loss": 0.1972, "step": 16871 }, { "epoch": 0.30093104555345485, "grad_norm": 0.25200021266937256, "learning_rate": 4.409809181163772e-05, "loss": 0.1604, "step": 16872 }, { "epoch": 0.30094888167516853, "grad_norm": 0.2677861750125885, "learning_rate": 4.4097087349986376e-05, "loss": 0.1711, "step": 16873 }, { "epoch": 0.3009667177968822, "grad_norm": 0.38924846053123474, "learning_rate": 4.409608281430812e-05, "loss": 0.1854, "step": 16874 }, { "epoch": 0.30098455391859597, "grad_norm": 0.3989528715610504, "learning_rate": 4.409507820460686e-05, "loss": 0.1908, "step": 16875 }, { "epoch": 0.30100239004030965, "grad_norm": 0.32209452986717224, "learning_rate": 4.409407352088647e-05, "loss": 0.1957, "step": 16876 }, { "epoch": 0.30102022616202334, "grad_norm": 0.2875838577747345, "learning_rate": 4.409306876315087e-05, "loss": 0.1689, "step": 16877 }, { "epoch": 0.30103806228373703, "grad_norm": 0.46301335096359253, "learning_rate": 4.4092063931403924e-05, "loss": 0.1897, "step": 16878 }, { "epoch": 0.3010558984054507, "grad_norm": 0.30506759881973267, "learning_rate": 4.4091059025649564e-05, "loss": 0.1541, "step": 16879 }, { "epoch": 0.3010737345271644, "grad_norm": 0.3367038369178772, "learning_rate": 4.409005404589165e-05, "loss": 0.2031, "step": 16880 }, { "epoch": 0.3010915706488781, "grad_norm": 0.2865173816680908, "learning_rate": 4.40890489921341e-05, "loss": 0.1877, "step": 16881 }, { "epoch": 0.3011094067705918, "grad_norm": 0.28864696621894836, "learning_rate": 4.40880438643808e-05, "loss": 0.1728, "step": 16882 }, { "epoch": 0.3011272428923055, "grad_norm": 0.3322356939315796, "learning_rate": 4.408703866263565e-05, "loss": 0.2066, "step": 16883 }, { "epoch": 0.3011450790140192, "grad_norm": 0.28695881366729736, "learning_rate": 4.408603338690255e-05, "loss": 0.191, "step": 16884 }, { "epoch": 0.3011629151357329, "grad_norm": 0.2675721347332001, "learning_rate": 4.408502803718538e-05, "loss": 0.2096, "step": 16885 }, { "epoch": 0.3011807512574466, "grad_norm": 0.2305581122636795, "learning_rate": 4.408402261348806e-05, "loss": 0.1986, "step": 16886 }, { "epoch": 0.3011985873791603, "grad_norm": 0.29373911023139954, "learning_rate": 4.4083017115814474e-05, "loss": 0.2142, "step": 16887 }, { "epoch": 0.30121642350087396, "grad_norm": 0.25480425357818604, "learning_rate": 4.408201154416853e-05, "loss": 0.1703, "step": 16888 }, { "epoch": 0.30123425962258765, "grad_norm": 0.2277686595916748, "learning_rate": 4.4081005898554106e-05, "loss": 0.1317, "step": 16889 }, { "epoch": 0.30125209574430134, "grad_norm": 0.2672290503978729, "learning_rate": 4.4080000178975126e-05, "loss": 0.1957, "step": 16890 }, { "epoch": 0.30126993186601503, "grad_norm": 0.41452622413635254, "learning_rate": 4.407899438543547e-05, "loss": 0.1771, "step": 16891 }, { "epoch": 0.30128776798772877, "grad_norm": 0.3050772249698639, "learning_rate": 4.407798851793904e-05, "loss": 0.2246, "step": 16892 }, { "epoch": 0.30130560410944246, "grad_norm": 0.25255250930786133, "learning_rate": 4.407698257648973e-05, "loss": 0.1535, "step": 16893 }, { "epoch": 0.30132344023115615, "grad_norm": 0.24606359004974365, "learning_rate": 4.407597656109146e-05, "loss": 0.1419, "step": 16894 }, { "epoch": 0.30134127635286984, "grad_norm": 0.22171658277511597, "learning_rate": 4.4074970471748114e-05, "loss": 0.1746, "step": 16895 }, { "epoch": 0.3013591124745835, "grad_norm": 0.36377984285354614, "learning_rate": 4.407396430846358e-05, "loss": 0.1746, "step": 16896 }, { "epoch": 0.3013769485962972, "grad_norm": 0.40162044763565063, "learning_rate": 4.407295807124179e-05, "loss": 0.2032, "step": 16897 }, { "epoch": 0.3013947847180109, "grad_norm": 0.24698717892169952, "learning_rate": 4.4071951760086615e-05, "loss": 0.1909, "step": 16898 }, { "epoch": 0.3014126208397246, "grad_norm": 0.23228327929973602, "learning_rate": 4.407094537500197e-05, "loss": 0.1582, "step": 16899 }, { "epoch": 0.30143045696143833, "grad_norm": 0.32421815395355225, "learning_rate": 4.4069938915991756e-05, "loss": 0.2042, "step": 16900 }, { "epoch": 0.301448293083152, "grad_norm": 0.3217966854572296, "learning_rate": 4.406893238305988e-05, "loss": 0.1732, "step": 16901 }, { "epoch": 0.3014661292048657, "grad_norm": 0.21149370074272156, "learning_rate": 4.4067925776210226e-05, "loss": 0.1763, "step": 16902 }, { "epoch": 0.3014839653265794, "grad_norm": 0.31402158737182617, "learning_rate": 4.406691909544671e-05, "loss": 0.1626, "step": 16903 }, { "epoch": 0.3015018014482931, "grad_norm": 0.2893681824207306, "learning_rate": 4.406591234077323e-05, "loss": 0.1634, "step": 16904 }, { "epoch": 0.30151963757000677, "grad_norm": 0.24363847076892853, "learning_rate": 4.406490551219368e-05, "loss": 0.158, "step": 16905 }, { "epoch": 0.30153747369172046, "grad_norm": 0.2350783348083496, "learning_rate": 4.4063898609711986e-05, "loss": 0.1762, "step": 16906 }, { "epoch": 0.30155530981343415, "grad_norm": 0.2344968467950821, "learning_rate": 4.406289163333203e-05, "loss": 0.1593, "step": 16907 }, { "epoch": 0.30157314593514783, "grad_norm": 0.2735636234283447, "learning_rate": 4.406188458305771e-05, "loss": 0.2033, "step": 16908 }, { "epoch": 0.3015909820568616, "grad_norm": 0.18813778460025787, "learning_rate": 4.4060877458892954e-05, "loss": 0.1818, "step": 16909 }, { "epoch": 0.30160881817857527, "grad_norm": 0.25619176030158997, "learning_rate": 4.4059870260841654e-05, "loss": 0.1856, "step": 16910 }, { "epoch": 0.30162665430028895, "grad_norm": 0.22614216804504395, "learning_rate": 4.4058862988907715e-05, "loss": 0.2121, "step": 16911 }, { "epoch": 0.30164449042200264, "grad_norm": 0.36545228958129883, "learning_rate": 4.4057855643095034e-05, "loss": 0.1449, "step": 16912 }, { "epoch": 0.30166232654371633, "grad_norm": 0.3354528546333313, "learning_rate": 4.405684822340753e-05, "loss": 0.1603, "step": 16913 }, { "epoch": 0.30168016266543, "grad_norm": 0.21166838705539703, "learning_rate": 4.40558407298491e-05, "loss": 0.1556, "step": 16914 }, { "epoch": 0.3016979987871437, "grad_norm": 0.21349474787712097, "learning_rate": 4.405483316242364e-05, "loss": 0.1481, "step": 16915 }, { "epoch": 0.3017158349088574, "grad_norm": 0.3126707673072815, "learning_rate": 4.4053825521135066e-05, "loss": 0.2052, "step": 16916 }, { "epoch": 0.30173367103057114, "grad_norm": 0.28107452392578125, "learning_rate": 4.405281780598729e-05, "loss": 0.1964, "step": 16917 }, { "epoch": 0.3017515071522848, "grad_norm": 0.21973201632499695, "learning_rate": 4.405181001698421e-05, "loss": 0.1209, "step": 16918 }, { "epoch": 0.3017693432739985, "grad_norm": 0.2201208770275116, "learning_rate": 4.4050802154129734e-05, "loss": 0.2096, "step": 16919 }, { "epoch": 0.3017871793957122, "grad_norm": 0.27609607577323914, "learning_rate": 4.4049794217427764e-05, "loss": 0.1694, "step": 16920 }, { "epoch": 0.3018050155174259, "grad_norm": 0.2822144329547882, "learning_rate": 4.404878620688222e-05, "loss": 0.1164, "step": 16921 }, { "epoch": 0.3018228516391396, "grad_norm": 0.2591968774795532, "learning_rate": 4.4047778122497e-05, "loss": 0.1594, "step": 16922 }, { "epoch": 0.30184068776085327, "grad_norm": 0.3573529124259949, "learning_rate": 4.404676996427601e-05, "loss": 0.233, "step": 16923 }, { "epoch": 0.30185852388256695, "grad_norm": 0.7974376678466797, "learning_rate": 4.4045761732223165e-05, "loss": 0.3663, "step": 16924 }, { "epoch": 0.30187636000428064, "grad_norm": 0.26573729515075684, "learning_rate": 4.404475342634236e-05, "loss": 0.1647, "step": 16925 }, { "epoch": 0.3018941961259944, "grad_norm": 0.24243800342082977, "learning_rate": 4.404374504663752e-05, "loss": 0.1596, "step": 16926 }, { "epoch": 0.3019120322477081, "grad_norm": 0.2948571443557739, "learning_rate": 4.4042736593112544e-05, "loss": 0.2185, "step": 16927 }, { "epoch": 0.30192986836942176, "grad_norm": 0.25806403160095215, "learning_rate": 4.404172806577135e-05, "loss": 0.1774, "step": 16928 }, { "epoch": 0.30194770449113545, "grad_norm": 0.28467893600463867, "learning_rate": 4.404071946461784e-05, "loss": 0.1841, "step": 16929 }, { "epoch": 0.30196554061284914, "grad_norm": 0.20621229708194733, "learning_rate": 4.4039710789655916e-05, "loss": 0.1508, "step": 16930 }, { "epoch": 0.3019833767345628, "grad_norm": 0.2832823395729065, "learning_rate": 4.403870204088951e-05, "loss": 0.1672, "step": 16931 }, { "epoch": 0.3020012128562765, "grad_norm": 0.244558185338974, "learning_rate": 4.4037693218322506e-05, "loss": 0.1845, "step": 16932 }, { "epoch": 0.3020190489779902, "grad_norm": 0.2347700595855713, "learning_rate": 4.403668432195883e-05, "loss": 0.1458, "step": 16933 }, { "epoch": 0.30203688509970394, "grad_norm": 0.2341766655445099, "learning_rate": 4.4035675351802396e-05, "loss": 0.154, "step": 16934 }, { "epoch": 0.30205472122141763, "grad_norm": 0.2508016526699066, "learning_rate": 4.403466630785711e-05, "loss": 0.1407, "step": 16935 }, { "epoch": 0.3020725573431313, "grad_norm": 0.25141480565071106, "learning_rate": 4.403365719012688e-05, "loss": 0.1419, "step": 16936 }, { "epoch": 0.302090393464845, "grad_norm": 0.4405762851238251, "learning_rate": 4.4032647998615623e-05, "loss": 0.1858, "step": 16937 }, { "epoch": 0.3021082295865587, "grad_norm": 0.27560028433799744, "learning_rate": 4.403163873332725e-05, "loss": 0.2225, "step": 16938 }, { "epoch": 0.3021260657082724, "grad_norm": 0.33127138018608093, "learning_rate": 4.4030629394265666e-05, "loss": 0.1982, "step": 16939 }, { "epoch": 0.3021439018299861, "grad_norm": 0.31129294633865356, "learning_rate": 4.402961998143479e-05, "loss": 0.1921, "step": 16940 }, { "epoch": 0.30216173795169976, "grad_norm": 0.30967044830322266, "learning_rate": 4.402861049483854e-05, "loss": 0.1767, "step": 16941 }, { "epoch": 0.3021795740734135, "grad_norm": 0.2037944197654724, "learning_rate": 4.402760093448082e-05, "loss": 0.1484, "step": 16942 }, { "epoch": 0.3021974101951272, "grad_norm": 0.25828155875205994, "learning_rate": 4.4026591300365545e-05, "loss": 0.1489, "step": 16943 }, { "epoch": 0.3022152463168409, "grad_norm": 0.2212415188550949, "learning_rate": 4.4025581592496635e-05, "loss": 0.1618, "step": 16944 }, { "epoch": 0.30223308243855457, "grad_norm": 0.20023676753044128, "learning_rate": 4.4024571810878e-05, "loss": 0.1984, "step": 16945 }, { "epoch": 0.30225091856026826, "grad_norm": 0.2713087499141693, "learning_rate": 4.402356195551355e-05, "loss": 0.172, "step": 16946 }, { "epoch": 0.30226875468198194, "grad_norm": 0.33247488737106323, "learning_rate": 4.4022552026407204e-05, "loss": 0.2011, "step": 16947 }, { "epoch": 0.30228659080369563, "grad_norm": 0.24287216365337372, "learning_rate": 4.402154202356288e-05, "loss": 0.133, "step": 16948 }, { "epoch": 0.3023044269254093, "grad_norm": 0.20639853179454803, "learning_rate": 4.4020531946984476e-05, "loss": 0.1544, "step": 16949 }, { "epoch": 0.302322263047123, "grad_norm": 0.2537766695022583, "learning_rate": 4.4019521796675936e-05, "loss": 0.1678, "step": 16950 }, { "epoch": 0.30234009916883675, "grad_norm": 0.23143869638442993, "learning_rate": 4.401851157264115e-05, "loss": 0.1496, "step": 16951 }, { "epoch": 0.30235793529055044, "grad_norm": 0.2683291435241699, "learning_rate": 4.401750127488405e-05, "loss": 0.1121, "step": 16952 }, { "epoch": 0.3023757714122641, "grad_norm": 0.2808206081390381, "learning_rate": 4.401649090340855e-05, "loss": 0.1899, "step": 16953 }, { "epoch": 0.3023936075339778, "grad_norm": 0.23837272822856903, "learning_rate": 4.4015480458218564e-05, "loss": 0.1872, "step": 16954 }, { "epoch": 0.3024114436556915, "grad_norm": 0.27478933334350586, "learning_rate": 4.4014469939318e-05, "loss": 0.1489, "step": 16955 }, { "epoch": 0.3024292797774052, "grad_norm": 0.2566738724708557, "learning_rate": 4.401345934671078e-05, "loss": 0.2003, "step": 16956 }, { "epoch": 0.3024471158991189, "grad_norm": 0.28140363097190857, "learning_rate": 4.4012448680400835e-05, "loss": 0.1738, "step": 16957 }, { "epoch": 0.30246495202083257, "grad_norm": 0.2634008824825287, "learning_rate": 4.401143794039207e-05, "loss": 0.2415, "step": 16958 }, { "epoch": 0.3024827881425463, "grad_norm": 0.3733348846435547, "learning_rate": 4.40104271266884e-05, "loss": 0.2047, "step": 16959 }, { "epoch": 0.30250062426426, "grad_norm": 0.27941736578941345, "learning_rate": 4.4009416239293756e-05, "loss": 0.1713, "step": 16960 }, { "epoch": 0.3025184603859737, "grad_norm": 0.23974740505218506, "learning_rate": 4.400840527821204e-05, "loss": 0.2106, "step": 16961 }, { "epoch": 0.3025362965076874, "grad_norm": 0.29771092534065247, "learning_rate": 4.400739424344719e-05, "loss": 0.1727, "step": 16962 }, { "epoch": 0.30255413262940106, "grad_norm": 0.27438756823539734, "learning_rate": 4.4006383135003106e-05, "loss": 0.2312, "step": 16963 }, { "epoch": 0.30257196875111475, "grad_norm": 0.1773771047592163, "learning_rate": 4.4005371952883725e-05, "loss": 0.1468, "step": 16964 }, { "epoch": 0.30258980487282844, "grad_norm": 0.24084240198135376, "learning_rate": 4.400436069709295e-05, "loss": 0.1759, "step": 16965 }, { "epoch": 0.3026076409945421, "grad_norm": 0.29347968101501465, "learning_rate": 4.400334936763471e-05, "loss": 0.1842, "step": 16966 }, { "epoch": 0.3026254771162558, "grad_norm": 0.2621789872646332, "learning_rate": 4.4002337964512926e-05, "loss": 0.16, "step": 16967 }, { "epoch": 0.30264331323796956, "grad_norm": 0.3632178008556366, "learning_rate": 4.400132648773151e-05, "loss": 0.2045, "step": 16968 }, { "epoch": 0.30266114935968325, "grad_norm": 0.24869462847709656, "learning_rate": 4.400031493729441e-05, "loss": 0.1782, "step": 16969 }, { "epoch": 0.30267898548139693, "grad_norm": 0.32511067390441895, "learning_rate": 4.399930331320551e-05, "loss": 0.2122, "step": 16970 }, { "epoch": 0.3026968216031106, "grad_norm": 0.3275891840457916, "learning_rate": 4.3998291615468746e-05, "loss": 0.1884, "step": 16971 }, { "epoch": 0.3027146577248243, "grad_norm": 0.2646700441837311, "learning_rate": 4.399727984408805e-05, "loss": 0.1741, "step": 16972 }, { "epoch": 0.302732493846538, "grad_norm": 0.2906220555305481, "learning_rate": 4.399626799906733e-05, "loss": 0.1499, "step": 16973 }, { "epoch": 0.3027503299682517, "grad_norm": 0.3187178373336792, "learning_rate": 4.399525608041052e-05, "loss": 0.2222, "step": 16974 }, { "epoch": 0.3027681660899654, "grad_norm": 0.24243609607219696, "learning_rate": 4.399424408812154e-05, "loss": 0.1795, "step": 16975 }, { "epoch": 0.3027860022116791, "grad_norm": 0.2514675557613373, "learning_rate": 4.39932320222043e-05, "loss": 0.176, "step": 16976 }, { "epoch": 0.3028038383333928, "grad_norm": 0.31117239594459534, "learning_rate": 4.399221988266273e-05, "loss": 0.1634, "step": 16977 }, { "epoch": 0.3028216744551065, "grad_norm": 0.2353062927722931, "learning_rate": 4.399120766950077e-05, "loss": 0.159, "step": 16978 }, { "epoch": 0.3028395105768202, "grad_norm": 0.1824413239955902, "learning_rate": 4.399019538272232e-05, "loss": 0.1327, "step": 16979 }, { "epoch": 0.30285734669853387, "grad_norm": 0.3388562500476837, "learning_rate": 4.3989183022331315e-05, "loss": 0.1105, "step": 16980 }, { "epoch": 0.30287518282024756, "grad_norm": 0.1896107941865921, "learning_rate": 4.398817058833168e-05, "loss": 0.1198, "step": 16981 }, { "epoch": 0.30289301894196125, "grad_norm": 0.23556606471538544, "learning_rate": 4.398715808072734e-05, "loss": 0.1921, "step": 16982 }, { "epoch": 0.30291085506367493, "grad_norm": 0.29156801104545593, "learning_rate": 4.3986145499522216e-05, "loss": 0.197, "step": 16983 }, { "epoch": 0.3029286911853887, "grad_norm": 0.41407445073127747, "learning_rate": 4.398513284472023e-05, "loss": 0.17, "step": 16984 }, { "epoch": 0.30294652730710236, "grad_norm": 0.19947557151317596, "learning_rate": 4.398412011632531e-05, "loss": 0.1516, "step": 16985 }, { "epoch": 0.30296436342881605, "grad_norm": 0.2209668606519699, "learning_rate": 4.398310731434139e-05, "loss": 0.156, "step": 16986 }, { "epoch": 0.30298219955052974, "grad_norm": 0.3041382133960724, "learning_rate": 4.398209443877239e-05, "loss": 0.1352, "step": 16987 }, { "epoch": 0.30300003567224343, "grad_norm": 0.2699025273323059, "learning_rate": 4.398108148962223e-05, "loss": 0.1499, "step": 16988 }, { "epoch": 0.3030178717939571, "grad_norm": 0.21555422246456146, "learning_rate": 4.398006846689484e-05, "loss": 0.1698, "step": 16989 }, { "epoch": 0.3030357079156708, "grad_norm": 0.4011364281177521, "learning_rate": 4.397905537059416e-05, "loss": 0.2007, "step": 16990 }, { "epoch": 0.3030535440373845, "grad_norm": 0.2812120020389557, "learning_rate": 4.39780422007241e-05, "loss": 0.1468, "step": 16991 }, { "epoch": 0.3030713801590982, "grad_norm": 0.2659045457839966, "learning_rate": 4.397702895728859e-05, "loss": 0.1718, "step": 16992 }, { "epoch": 0.3030892162808119, "grad_norm": 0.2492925375699997, "learning_rate": 4.3976015640291566e-05, "loss": 0.1778, "step": 16993 }, { "epoch": 0.3031070524025256, "grad_norm": 0.30119457840919495, "learning_rate": 4.3975002249736955e-05, "loss": 0.1842, "step": 16994 }, { "epoch": 0.3031248885242393, "grad_norm": 0.37086451053619385, "learning_rate": 4.397398878562867e-05, "loss": 0.1545, "step": 16995 }, { "epoch": 0.303142724645953, "grad_norm": 0.20378327369689941, "learning_rate": 4.397297524797066e-05, "loss": 0.1358, "step": 16996 }, { "epoch": 0.3031605607676667, "grad_norm": 0.3548892140388489, "learning_rate": 4.397196163676685e-05, "loss": 0.22, "step": 16997 }, { "epoch": 0.30317839688938036, "grad_norm": 0.21069128811359406, "learning_rate": 4.3970947952021154e-05, "loss": 0.1642, "step": 16998 }, { "epoch": 0.30319623301109405, "grad_norm": 0.23091521859169006, "learning_rate": 4.3969934193737516e-05, "loss": 0.2219, "step": 16999 }, { "epoch": 0.30321406913280774, "grad_norm": 0.2196367084980011, "learning_rate": 4.3968920361919865e-05, "loss": 0.1685, "step": 17000 }, { "epoch": 0.30321406913280774, "eval_loss": 0.16822969913482666, "eval_runtime": 106.0413, "eval_samples_per_second": 9.657, "eval_steps_per_second": 1.613, "step": 17000 }, { "epoch": 0.3032319052545215, "grad_norm": 0.25229987502098083, "learning_rate": 4.396790645657212e-05, "loss": 0.1903, "step": 17001 }, { "epoch": 0.30324974137623517, "grad_norm": 0.32935184240341187, "learning_rate": 4.3966892477698216e-05, "loss": 0.1428, "step": 17002 }, { "epoch": 0.30326757749794886, "grad_norm": 0.2073621153831482, "learning_rate": 4.3965878425302085e-05, "loss": 0.1182, "step": 17003 }, { "epoch": 0.30328541361966255, "grad_norm": 0.1906985491514206, "learning_rate": 4.396486429938766e-05, "loss": 0.2033, "step": 17004 }, { "epoch": 0.30330324974137624, "grad_norm": 0.2603358030319214, "learning_rate": 4.3963850099958884e-05, "loss": 0.1933, "step": 17005 }, { "epoch": 0.3033210858630899, "grad_norm": 0.23725542426109314, "learning_rate": 4.396283582701967e-05, "loss": 0.1724, "step": 17006 }, { "epoch": 0.3033389219848036, "grad_norm": 0.23338156938552856, "learning_rate": 4.396182148057394e-05, "loss": 0.1604, "step": 17007 }, { "epoch": 0.3033567581065173, "grad_norm": 0.29763856530189514, "learning_rate": 4.396080706062565e-05, "loss": 0.1257, "step": 17008 }, { "epoch": 0.303374594228231, "grad_norm": 0.2634035050868988, "learning_rate": 4.395979256717873e-05, "loss": 0.1998, "step": 17009 }, { "epoch": 0.30339243034994473, "grad_norm": 0.28416603803634644, "learning_rate": 4.39587780002371e-05, "loss": 0.1675, "step": 17010 }, { "epoch": 0.3034102664716584, "grad_norm": 0.2536405920982361, "learning_rate": 4.3957763359804695e-05, "loss": 0.1796, "step": 17011 }, { "epoch": 0.3034281025933721, "grad_norm": 0.2704066336154938, "learning_rate": 4.3956748645885455e-05, "loss": 0.1731, "step": 17012 }, { "epoch": 0.3034459387150858, "grad_norm": 0.27453184127807617, "learning_rate": 4.395573385848331e-05, "loss": 0.2455, "step": 17013 }, { "epoch": 0.3034637748367995, "grad_norm": 0.23233869671821594, "learning_rate": 4.395471899760219e-05, "loss": 0.1834, "step": 17014 }, { "epoch": 0.30348161095851317, "grad_norm": 0.26142966747283936, "learning_rate": 4.395370406324603e-05, "loss": 0.1477, "step": 17015 }, { "epoch": 0.30349944708022686, "grad_norm": 0.39710143208503723, "learning_rate": 4.395268905541877e-05, "loss": 0.1755, "step": 17016 }, { "epoch": 0.30351728320194055, "grad_norm": 0.3166236877441406, "learning_rate": 4.3951673974124346e-05, "loss": 0.187, "step": 17017 }, { "epoch": 0.3035351193236543, "grad_norm": 0.26329943537712097, "learning_rate": 4.395065881936669e-05, "loss": 0.1379, "step": 17018 }, { "epoch": 0.303552955445368, "grad_norm": 0.2653287351131439, "learning_rate": 4.394964359114972e-05, "loss": 0.148, "step": 17019 }, { "epoch": 0.30357079156708167, "grad_norm": 0.21244986355304718, "learning_rate": 4.39486282894774e-05, "loss": 0.135, "step": 17020 }, { "epoch": 0.30358862768879535, "grad_norm": 0.31565529108047485, "learning_rate": 4.3947612914353654e-05, "loss": 0.1928, "step": 17021 }, { "epoch": 0.30360646381050904, "grad_norm": 0.18823598325252533, "learning_rate": 4.3946597465782403e-05, "loss": 0.1371, "step": 17022 }, { "epoch": 0.30362429993222273, "grad_norm": 0.22636570036411285, "learning_rate": 4.39455819437676e-05, "loss": 0.1774, "step": 17023 }, { "epoch": 0.3036421360539364, "grad_norm": 0.2921881377696991, "learning_rate": 4.394456634831319e-05, "loss": 0.2319, "step": 17024 }, { "epoch": 0.3036599721756501, "grad_norm": 0.24715429544448853, "learning_rate": 4.3943550679423085e-05, "loss": 0.1943, "step": 17025 }, { "epoch": 0.3036778082973638, "grad_norm": 0.1877565234899521, "learning_rate": 4.3942534937101235e-05, "loss": 0.1729, "step": 17026 }, { "epoch": 0.30369564441907754, "grad_norm": 0.3187621235847473, "learning_rate": 4.394151912135158e-05, "loss": 0.1537, "step": 17027 }, { "epoch": 0.3037134805407912, "grad_norm": 0.2842850089073181, "learning_rate": 4.394050323217806e-05, "loss": 0.1646, "step": 17028 }, { "epoch": 0.3037313166625049, "grad_norm": 0.2343442589044571, "learning_rate": 4.39394872695846e-05, "loss": 0.1509, "step": 17029 }, { "epoch": 0.3037491527842186, "grad_norm": 0.2878730893135071, "learning_rate": 4.393847123357515e-05, "loss": 0.1865, "step": 17030 }, { "epoch": 0.3037669889059323, "grad_norm": 0.22307166457176208, "learning_rate": 4.3937455124153645e-05, "loss": 0.1549, "step": 17031 }, { "epoch": 0.303784825027646, "grad_norm": 0.2224445641040802, "learning_rate": 4.3936438941324024e-05, "loss": 0.1291, "step": 17032 }, { "epoch": 0.30380266114935967, "grad_norm": 0.26214656233787537, "learning_rate": 4.3935422685090215e-05, "loss": 0.1418, "step": 17033 }, { "epoch": 0.30382049727107335, "grad_norm": 0.26912835240364075, "learning_rate": 4.3934406355456184e-05, "loss": 0.1797, "step": 17034 }, { "epoch": 0.3038383333927871, "grad_norm": 0.27315858006477356, "learning_rate": 4.393338995242584e-05, "loss": 0.2291, "step": 17035 }, { "epoch": 0.3038561695145008, "grad_norm": 0.2199607938528061, "learning_rate": 4.393237347600314e-05, "loss": 0.2154, "step": 17036 }, { "epoch": 0.3038740056362145, "grad_norm": 0.3718980550765991, "learning_rate": 4.393135692619202e-05, "loss": 0.2124, "step": 17037 }, { "epoch": 0.30389184175792816, "grad_norm": 0.47460609674453735, "learning_rate": 4.393034030299643e-05, "loss": 0.2096, "step": 17038 }, { "epoch": 0.30390967787964185, "grad_norm": 0.2550341486930847, "learning_rate": 4.39293236064203e-05, "loss": 0.1941, "step": 17039 }, { "epoch": 0.30392751400135554, "grad_norm": 0.23543892800807953, "learning_rate": 4.392830683646757e-05, "loss": 0.1297, "step": 17040 }, { "epoch": 0.3039453501230692, "grad_norm": 0.2201838344335556, "learning_rate": 4.3927289993142185e-05, "loss": 0.1795, "step": 17041 }, { "epoch": 0.3039631862447829, "grad_norm": 0.3389892280101776, "learning_rate": 4.392627307644809e-05, "loss": 0.1682, "step": 17042 }, { "epoch": 0.30398102236649666, "grad_norm": 0.25401630997657776, "learning_rate": 4.392525608638922e-05, "loss": 0.2027, "step": 17043 }, { "epoch": 0.30399885848821034, "grad_norm": 0.3649926483631134, "learning_rate": 4.392423902296953e-05, "loss": 0.2452, "step": 17044 }, { "epoch": 0.30401669460992403, "grad_norm": 0.25108101963996887, "learning_rate": 4.3923221886192945e-05, "loss": 0.1491, "step": 17045 }, { "epoch": 0.3040345307316377, "grad_norm": 0.3020821809768677, "learning_rate": 4.3922204676063415e-05, "loss": 0.192, "step": 17046 }, { "epoch": 0.3040523668533514, "grad_norm": 0.18951134383678436, "learning_rate": 4.3921187392584884e-05, "loss": 0.1634, "step": 17047 }, { "epoch": 0.3040702029750651, "grad_norm": 0.2740190923213959, "learning_rate": 4.39201700357613e-05, "loss": 0.1755, "step": 17048 }, { "epoch": 0.3040880390967788, "grad_norm": 0.29754915833473206, "learning_rate": 4.39191526055966e-05, "loss": 0.1942, "step": 17049 }, { "epoch": 0.30410587521849247, "grad_norm": 0.39213114976882935, "learning_rate": 4.3918135102094736e-05, "loss": 0.2362, "step": 17050 }, { "epoch": 0.30412371134020616, "grad_norm": 0.299716591835022, "learning_rate": 4.391711752525964e-05, "loss": 0.1955, "step": 17051 }, { "epoch": 0.3041415474619199, "grad_norm": 0.26223134994506836, "learning_rate": 4.391609987509526e-05, "loss": 0.1917, "step": 17052 }, { "epoch": 0.3041593835836336, "grad_norm": 0.3177604675292969, "learning_rate": 4.391508215160555e-05, "loss": 0.1793, "step": 17053 }, { "epoch": 0.3041772197053473, "grad_norm": 0.21626578271389008, "learning_rate": 4.391406435479444e-05, "loss": 0.1666, "step": 17054 }, { "epoch": 0.30419505582706097, "grad_norm": 0.30217376351356506, "learning_rate": 4.391304648466589e-05, "loss": 0.1758, "step": 17055 }, { "epoch": 0.30421289194877466, "grad_norm": 0.204359769821167, "learning_rate": 4.3912028541223844e-05, "loss": 0.1664, "step": 17056 }, { "epoch": 0.30423072807048834, "grad_norm": 0.18372325599193573, "learning_rate": 4.391101052447224e-05, "loss": 0.1368, "step": 17057 }, { "epoch": 0.30424856419220203, "grad_norm": 0.22900182008743286, "learning_rate": 4.390999243441502e-05, "loss": 0.1176, "step": 17058 }, { "epoch": 0.3042664003139157, "grad_norm": 0.24883610010147095, "learning_rate": 4.3908974271056145e-05, "loss": 0.2003, "step": 17059 }, { "epoch": 0.30428423643562946, "grad_norm": 0.3380500376224518, "learning_rate": 4.390795603439955e-05, "loss": 0.1279, "step": 17060 }, { "epoch": 0.30430207255734315, "grad_norm": 0.1848413646221161, "learning_rate": 4.390693772444919e-05, "loss": 0.1643, "step": 17061 }, { "epoch": 0.30431990867905684, "grad_norm": 0.25698333978652954, "learning_rate": 4.390591934120901e-05, "loss": 0.166, "step": 17062 }, { "epoch": 0.3043377448007705, "grad_norm": 0.36611783504486084, "learning_rate": 4.3904900884682966e-05, "loss": 0.1765, "step": 17063 }, { "epoch": 0.3043555809224842, "grad_norm": 0.29192814230918884, "learning_rate": 4.390388235487498e-05, "loss": 0.1683, "step": 17064 }, { "epoch": 0.3043734170441979, "grad_norm": 0.24549239873886108, "learning_rate": 4.390286375178903e-05, "loss": 0.1958, "step": 17065 }, { "epoch": 0.3043912531659116, "grad_norm": 0.24323663115501404, "learning_rate": 4.390184507542904e-05, "loss": 0.121, "step": 17066 }, { "epoch": 0.3044090892876253, "grad_norm": 0.24716830253601074, "learning_rate": 4.3900826325798974e-05, "loss": 0.1688, "step": 17067 }, { "epoch": 0.30442692540933897, "grad_norm": 0.2796953320503235, "learning_rate": 4.389980750290278e-05, "loss": 0.1656, "step": 17068 }, { "epoch": 0.3044447615310527, "grad_norm": 0.2372235506772995, "learning_rate": 4.38987886067444e-05, "loss": 0.1685, "step": 17069 }, { "epoch": 0.3044625976527664, "grad_norm": 0.3148409128189087, "learning_rate": 4.389776963732779e-05, "loss": 0.2177, "step": 17070 }, { "epoch": 0.3044804337744801, "grad_norm": 0.2756401002407074, "learning_rate": 4.38967505946569e-05, "loss": 0.1725, "step": 17071 }, { "epoch": 0.3044982698961938, "grad_norm": 0.3646738529205322, "learning_rate": 4.3895731478735675e-05, "loss": 0.1571, "step": 17072 }, { "epoch": 0.30451610601790746, "grad_norm": 0.28116896748542786, "learning_rate": 4.389471228956807e-05, "loss": 0.2026, "step": 17073 }, { "epoch": 0.30453394213962115, "grad_norm": 0.3507188856601715, "learning_rate": 4.3893693027158035e-05, "loss": 0.1763, "step": 17074 }, { "epoch": 0.30455177826133484, "grad_norm": 0.20710495114326477, "learning_rate": 4.389267369150951e-05, "loss": 0.175, "step": 17075 }, { "epoch": 0.3045696143830485, "grad_norm": 0.29449790716171265, "learning_rate": 4.3891654282626474e-05, "loss": 0.2129, "step": 17076 }, { "epoch": 0.30458745050476227, "grad_norm": 0.18216033279895782, "learning_rate": 4.389063480051285e-05, "loss": 0.1475, "step": 17077 }, { "epoch": 0.30460528662647596, "grad_norm": 0.2358478307723999, "learning_rate": 4.38896152451726e-05, "loss": 0.1749, "step": 17078 }, { "epoch": 0.30462312274818965, "grad_norm": 0.21464504301548004, "learning_rate": 4.388859561660969e-05, "loss": 0.1624, "step": 17079 }, { "epoch": 0.30464095886990333, "grad_norm": 0.2703222930431366, "learning_rate": 4.3887575914828036e-05, "loss": 0.1453, "step": 17080 }, { "epoch": 0.304658794991617, "grad_norm": 0.27065274119377136, "learning_rate": 4.388655613983163e-05, "loss": 0.1824, "step": 17081 }, { "epoch": 0.3046766311133307, "grad_norm": 0.26811903715133667, "learning_rate": 4.388553629162441e-05, "loss": 0.1799, "step": 17082 }, { "epoch": 0.3046944672350444, "grad_norm": 0.35675516724586487, "learning_rate": 4.3884516370210325e-05, "loss": 0.1683, "step": 17083 }, { "epoch": 0.3047123033567581, "grad_norm": 0.2833549678325653, "learning_rate": 4.388349637559334e-05, "loss": 0.2409, "step": 17084 }, { "epoch": 0.30473013947847183, "grad_norm": 0.3995152711868286, "learning_rate": 4.388247630777739e-05, "loss": 0.2324, "step": 17085 }, { "epoch": 0.3047479756001855, "grad_norm": 0.2744006812572479, "learning_rate": 4.388145616676644e-05, "loss": 0.2162, "step": 17086 }, { "epoch": 0.3047658117218992, "grad_norm": 0.2544262707233429, "learning_rate": 4.388043595256445e-05, "loss": 0.2443, "step": 17087 }, { "epoch": 0.3047836478436129, "grad_norm": 0.30234959721565247, "learning_rate": 4.387941566517536e-05, "loss": 0.1803, "step": 17088 }, { "epoch": 0.3048014839653266, "grad_norm": 0.27694976329803467, "learning_rate": 4.387839530460315e-05, "loss": 0.1879, "step": 17089 }, { "epoch": 0.30481932008704027, "grad_norm": 0.3590095043182373, "learning_rate": 4.387737487085175e-05, "loss": 0.1256, "step": 17090 }, { "epoch": 0.30483715620875396, "grad_norm": 0.2435324341058731, "learning_rate": 4.3876354363925124e-05, "loss": 0.174, "step": 17091 }, { "epoch": 0.30485499233046764, "grad_norm": 0.21426516771316528, "learning_rate": 4.387533378382723e-05, "loss": 0.1253, "step": 17092 }, { "epoch": 0.30487282845218133, "grad_norm": 0.2532746493816376, "learning_rate": 4.3874313130562014e-05, "loss": 0.2078, "step": 17093 }, { "epoch": 0.3048906645738951, "grad_norm": 0.28635111451148987, "learning_rate": 4.3873292404133457e-05, "loss": 0.1921, "step": 17094 }, { "epoch": 0.30490850069560876, "grad_norm": 0.2923727333545685, "learning_rate": 4.387227160454549e-05, "loss": 0.1847, "step": 17095 }, { "epoch": 0.30492633681732245, "grad_norm": 0.21463806927204132, "learning_rate": 4.387125073180208e-05, "loss": 0.2156, "step": 17096 }, { "epoch": 0.30494417293903614, "grad_norm": 0.22355765104293823, "learning_rate": 4.387022978590719e-05, "loss": 0.1733, "step": 17097 }, { "epoch": 0.30496200906074983, "grad_norm": 0.28531867265701294, "learning_rate": 4.386920876686478e-05, "loss": 0.1947, "step": 17098 }, { "epoch": 0.3049798451824635, "grad_norm": 0.2557225525379181, "learning_rate": 4.3868187674678784e-05, "loss": 0.1881, "step": 17099 }, { "epoch": 0.3049976813041772, "grad_norm": 0.2729876637458801, "learning_rate": 4.386716650935318e-05, "loss": 0.1497, "step": 17100 }, { "epoch": 0.3050155174258909, "grad_norm": 0.3143730163574219, "learning_rate": 4.3866145270891924e-05, "loss": 0.1619, "step": 17101 }, { "epoch": 0.30503335354760464, "grad_norm": 0.30513739585876465, "learning_rate": 4.386512395929897e-05, "loss": 0.1733, "step": 17102 }, { "epoch": 0.3050511896693183, "grad_norm": 0.2931326925754547, "learning_rate": 4.386410257457828e-05, "loss": 0.2307, "step": 17103 }, { "epoch": 0.305069025791032, "grad_norm": 0.28533676266670227, "learning_rate": 4.386308111673382e-05, "loss": 0.1861, "step": 17104 }, { "epoch": 0.3050868619127457, "grad_norm": 0.2460443526506424, "learning_rate": 4.3862059585769534e-05, "loss": 0.1598, "step": 17105 }, { "epoch": 0.3051046980344594, "grad_norm": 0.3634937107563019, "learning_rate": 4.386103798168939e-05, "loss": 0.1424, "step": 17106 }, { "epoch": 0.3051225341561731, "grad_norm": 0.28374966979026794, "learning_rate": 4.3860016304497354e-05, "loss": 0.1848, "step": 17107 }, { "epoch": 0.30514037027788676, "grad_norm": 0.2175067961215973, "learning_rate": 4.385899455419738e-05, "loss": 0.1523, "step": 17108 }, { "epoch": 0.30515820639960045, "grad_norm": 0.24766366183757782, "learning_rate": 4.3857972730793426e-05, "loss": 0.2065, "step": 17109 }, { "epoch": 0.30517604252131414, "grad_norm": 0.2719930112361908, "learning_rate": 4.385695083428946e-05, "loss": 0.1626, "step": 17110 }, { "epoch": 0.3051938786430279, "grad_norm": 0.27074936032295227, "learning_rate": 4.3855928864689435e-05, "loss": 0.1483, "step": 17111 }, { "epoch": 0.30521171476474157, "grad_norm": 0.2806743383407593, "learning_rate": 4.385490682199732e-05, "loss": 0.1627, "step": 17112 }, { "epoch": 0.30522955088645526, "grad_norm": 0.28043878078460693, "learning_rate": 4.3853884706217074e-05, "loss": 0.1478, "step": 17113 }, { "epoch": 0.30524738700816895, "grad_norm": 0.2837179899215698, "learning_rate": 4.385286251735266e-05, "loss": 0.1718, "step": 17114 }, { "epoch": 0.30526522312988263, "grad_norm": 0.2241058051586151, "learning_rate": 4.385184025540804e-05, "loss": 0.1681, "step": 17115 }, { "epoch": 0.3052830592515963, "grad_norm": 0.24122250080108643, "learning_rate": 4.385081792038717e-05, "loss": 0.1488, "step": 17116 }, { "epoch": 0.30530089537331, "grad_norm": 0.3232344686985016, "learning_rate": 4.3849795512294025e-05, "loss": 0.1572, "step": 17117 }, { "epoch": 0.3053187314950237, "grad_norm": 0.24130931496620178, "learning_rate": 4.384877303113256e-05, "loss": 0.154, "step": 17118 }, { "epoch": 0.30533656761673744, "grad_norm": 0.21955129504203796, "learning_rate": 4.384775047690674e-05, "loss": 0.1614, "step": 17119 }, { "epoch": 0.30535440373845113, "grad_norm": 0.2242218255996704, "learning_rate": 4.3846727849620527e-05, "loss": 0.202, "step": 17120 }, { "epoch": 0.3053722398601648, "grad_norm": 0.3091730773448944, "learning_rate": 4.3845705149277895e-05, "loss": 0.2094, "step": 17121 }, { "epoch": 0.3053900759818785, "grad_norm": 0.22022242844104767, "learning_rate": 4.38446823758828e-05, "loss": 0.1751, "step": 17122 }, { "epoch": 0.3054079121035922, "grad_norm": 0.24822382628917694, "learning_rate": 4.3843659529439193e-05, "loss": 0.1522, "step": 17123 }, { "epoch": 0.3054257482253059, "grad_norm": 0.2767241597175598, "learning_rate": 4.384263660995107e-05, "loss": 0.2572, "step": 17124 }, { "epoch": 0.30544358434701957, "grad_norm": 0.21422463655471802, "learning_rate": 4.384161361742237e-05, "loss": 0.1706, "step": 17125 }, { "epoch": 0.30546142046873326, "grad_norm": 0.2786320447921753, "learning_rate": 4.384059055185708e-05, "loss": 0.1848, "step": 17126 }, { "epoch": 0.30547925659044695, "grad_norm": 0.30014023184776306, "learning_rate": 4.383956741325914e-05, "loss": 0.2029, "step": 17127 }, { "epoch": 0.3054970927121607, "grad_norm": 0.22989259660243988, "learning_rate": 4.383854420163253e-05, "loss": 0.1599, "step": 17128 }, { "epoch": 0.3055149288338744, "grad_norm": 0.2684986889362335, "learning_rate": 4.383752091698122e-05, "loss": 0.1779, "step": 17129 }, { "epoch": 0.30553276495558807, "grad_norm": 0.2732818126678467, "learning_rate": 4.3836497559309175e-05, "loss": 0.1662, "step": 17130 }, { "epoch": 0.30555060107730175, "grad_norm": 0.3699675500392914, "learning_rate": 4.383547412862036e-05, "loss": 0.2001, "step": 17131 }, { "epoch": 0.30556843719901544, "grad_norm": 0.22524546086788177, "learning_rate": 4.3834450624918735e-05, "loss": 0.2368, "step": 17132 }, { "epoch": 0.30558627332072913, "grad_norm": 0.2979113459587097, "learning_rate": 4.3833427048208284e-05, "loss": 0.2163, "step": 17133 }, { "epoch": 0.3056041094424428, "grad_norm": 0.3119056820869446, "learning_rate": 4.383240339849296e-05, "loss": 0.1366, "step": 17134 }, { "epoch": 0.3056219455641565, "grad_norm": 0.34714776277542114, "learning_rate": 4.383137967577673e-05, "loss": 0.1378, "step": 17135 }, { "epoch": 0.30563978168587025, "grad_norm": 0.33409959077835083, "learning_rate": 4.3830355880063576e-05, "loss": 0.2002, "step": 17136 }, { "epoch": 0.30565761780758394, "grad_norm": 0.22078469395637512, "learning_rate": 4.3829332011357456e-05, "loss": 0.171, "step": 17137 }, { "epoch": 0.3056754539292976, "grad_norm": 0.27064353227615356, "learning_rate": 4.382830806966234e-05, "loss": 0.1886, "step": 17138 }, { "epoch": 0.3056932900510113, "grad_norm": 0.2435254603624344, "learning_rate": 4.38272840549822e-05, "loss": 0.2141, "step": 17139 }, { "epoch": 0.305711126172725, "grad_norm": 0.23598533868789673, "learning_rate": 4.3826259967321e-05, "loss": 0.1696, "step": 17140 }, { "epoch": 0.3057289622944387, "grad_norm": 0.23116660118103027, "learning_rate": 4.382523580668273e-05, "loss": 0.1612, "step": 17141 }, { "epoch": 0.3057467984161524, "grad_norm": 0.32354721426963806, "learning_rate": 4.3824211573071324e-05, "loss": 0.2126, "step": 17142 }, { "epoch": 0.30576463453786606, "grad_norm": 0.2540068030357361, "learning_rate": 4.3823187266490784e-05, "loss": 0.1455, "step": 17143 }, { "epoch": 0.3057824706595798, "grad_norm": 0.2564283609390259, "learning_rate": 4.382216288694507e-05, "loss": 0.1593, "step": 17144 }, { "epoch": 0.3058003067812935, "grad_norm": 0.2832253873348236, "learning_rate": 4.382113843443815e-05, "loss": 0.1416, "step": 17145 }, { "epoch": 0.3058181429030072, "grad_norm": 0.2201293259859085, "learning_rate": 4.382011390897399e-05, "loss": 0.1622, "step": 17146 }, { "epoch": 0.30583597902472087, "grad_norm": 0.290938138961792, "learning_rate": 4.381908931055657e-05, "loss": 0.1608, "step": 17147 }, { "epoch": 0.30585381514643456, "grad_norm": 0.2617526352405548, "learning_rate": 4.381806463918987e-05, "loss": 0.1552, "step": 17148 }, { "epoch": 0.30587165126814825, "grad_norm": 0.2418537139892578, "learning_rate": 4.3817039894877845e-05, "loss": 0.1533, "step": 17149 }, { "epoch": 0.30588948738986194, "grad_norm": 0.3873763680458069, "learning_rate": 4.3816015077624474e-05, "loss": 0.2121, "step": 17150 }, { "epoch": 0.3059073235115756, "grad_norm": 0.242770254611969, "learning_rate": 4.3814990187433726e-05, "loss": 0.2069, "step": 17151 }, { "epoch": 0.3059251596332893, "grad_norm": 0.25278064608573914, "learning_rate": 4.3813965224309586e-05, "loss": 0.2042, "step": 17152 }, { "epoch": 0.30594299575500306, "grad_norm": 0.3051237463951111, "learning_rate": 4.381294018825601e-05, "loss": 0.1854, "step": 17153 }, { "epoch": 0.30596083187671674, "grad_norm": 0.40557852387428284, "learning_rate": 4.3811915079276986e-05, "loss": 0.1503, "step": 17154 }, { "epoch": 0.30597866799843043, "grad_norm": 0.2661967873573303, "learning_rate": 4.381088989737649e-05, "loss": 0.1682, "step": 17155 }, { "epoch": 0.3059965041201441, "grad_norm": 0.19283799827098846, "learning_rate": 4.3809864642558466e-05, "loss": 0.1478, "step": 17156 }, { "epoch": 0.3060143402418578, "grad_norm": 0.30515480041503906, "learning_rate": 4.380883931482692e-05, "loss": 0.188, "step": 17157 }, { "epoch": 0.3060321763635715, "grad_norm": 0.3168274164199829, "learning_rate": 4.380781391418582e-05, "loss": 0.1695, "step": 17158 }, { "epoch": 0.3060500124852852, "grad_norm": 0.2728056311607361, "learning_rate": 4.380678844063913e-05, "loss": 0.1714, "step": 17159 }, { "epoch": 0.30606784860699887, "grad_norm": 0.3205297291278839, "learning_rate": 4.3805762894190845e-05, "loss": 0.1787, "step": 17160 }, { "epoch": 0.3060856847287126, "grad_norm": 0.20630665123462677, "learning_rate": 4.380473727484492e-05, "loss": 0.1685, "step": 17161 }, { "epoch": 0.3061035208504263, "grad_norm": 0.26877710223197937, "learning_rate": 4.380371158260533e-05, "loss": 0.2076, "step": 17162 }, { "epoch": 0.30612135697214, "grad_norm": 0.2504344582557678, "learning_rate": 4.380268581747608e-05, "loss": 0.2159, "step": 17163 }, { "epoch": 0.3061391930938537, "grad_norm": 0.2371448129415512, "learning_rate": 4.3801659979461106e-05, "loss": 0.168, "step": 17164 }, { "epoch": 0.30615702921556737, "grad_norm": 0.2899780571460724, "learning_rate": 4.380063406856441e-05, "loss": 0.1826, "step": 17165 }, { "epoch": 0.30617486533728105, "grad_norm": 0.2562422752380371, "learning_rate": 4.3799608084789965e-05, "loss": 0.1702, "step": 17166 }, { "epoch": 0.30619270145899474, "grad_norm": 0.19431371986865997, "learning_rate": 4.379858202814174e-05, "loss": 0.1566, "step": 17167 }, { "epoch": 0.30621053758070843, "grad_norm": 0.32905110716819763, "learning_rate": 4.379755589862373e-05, "loss": 0.2069, "step": 17168 }, { "epoch": 0.3062283737024221, "grad_norm": 0.233099102973938, "learning_rate": 4.37965296962399e-05, "loss": 0.1482, "step": 17169 }, { "epoch": 0.30624620982413586, "grad_norm": 0.253312349319458, "learning_rate": 4.379550342099422e-05, "loss": 0.1815, "step": 17170 }, { "epoch": 0.30626404594584955, "grad_norm": 0.34701377153396606, "learning_rate": 4.3794477072890674e-05, "loss": 0.1758, "step": 17171 }, { "epoch": 0.30628188206756324, "grad_norm": 0.29600778222084045, "learning_rate": 4.3793450651933257e-05, "loss": 0.139, "step": 17172 }, { "epoch": 0.3062997181892769, "grad_norm": 0.35207584500312805, "learning_rate": 4.379242415812592e-05, "loss": 0.1272, "step": 17173 }, { "epoch": 0.3063175543109906, "grad_norm": 0.3003990948200226, "learning_rate": 4.3791397591472666e-05, "loss": 0.1526, "step": 17174 }, { "epoch": 0.3063353904327043, "grad_norm": 0.29370614886283875, "learning_rate": 4.379037095197746e-05, "loss": 0.169, "step": 17175 }, { "epoch": 0.306353226554418, "grad_norm": 0.2670826315879822, "learning_rate": 4.3789344239644294e-05, "loss": 0.1456, "step": 17176 }, { "epoch": 0.3063710626761317, "grad_norm": 0.36985883116722107, "learning_rate": 4.378831745447713e-05, "loss": 0.1643, "step": 17177 }, { "epoch": 0.3063888987978454, "grad_norm": 0.29399147629737854, "learning_rate": 4.378729059647996e-05, "loss": 0.1661, "step": 17178 }, { "epoch": 0.3064067349195591, "grad_norm": 0.431037575006485, "learning_rate": 4.378626366565677e-05, "loss": 0.1877, "step": 17179 }, { "epoch": 0.3064245710412728, "grad_norm": 0.23802313208580017, "learning_rate": 4.378523666201152e-05, "loss": 0.142, "step": 17180 }, { "epoch": 0.3064424071629865, "grad_norm": 0.25053784251213074, "learning_rate": 4.3784209585548216e-05, "loss": 0.1811, "step": 17181 }, { "epoch": 0.3064602432847002, "grad_norm": 0.2363741248846054, "learning_rate": 4.378318243627083e-05, "loss": 0.1885, "step": 17182 }, { "epoch": 0.30647807940641386, "grad_norm": 0.2585192322731018, "learning_rate": 4.378215521418333e-05, "loss": 0.1299, "step": 17183 }, { "epoch": 0.30649591552812755, "grad_norm": 0.30256012082099915, "learning_rate": 4.378112791928972e-05, "loss": 0.1838, "step": 17184 }, { "epoch": 0.30651375164984124, "grad_norm": 0.27657467126846313, "learning_rate": 4.378010055159396e-05, "loss": 0.2155, "step": 17185 }, { "epoch": 0.306531587771555, "grad_norm": 0.4010425806045532, "learning_rate": 4.3779073111100055e-05, "loss": 0.2099, "step": 17186 }, { "epoch": 0.30654942389326867, "grad_norm": 0.2496582418680191, "learning_rate": 4.3778045597811964e-05, "loss": 0.1639, "step": 17187 }, { "epoch": 0.30656726001498236, "grad_norm": 0.30885085463523865, "learning_rate": 4.37770180117337e-05, "loss": 0.1791, "step": 17188 }, { "epoch": 0.30658509613669604, "grad_norm": 0.26914405822753906, "learning_rate": 4.377599035286921e-05, "loss": 0.1732, "step": 17189 }, { "epoch": 0.30660293225840973, "grad_norm": 0.26386067271232605, "learning_rate": 4.377496262122251e-05, "loss": 0.1529, "step": 17190 }, { "epoch": 0.3066207683801234, "grad_norm": 0.20294250547885895, "learning_rate": 4.377393481679757e-05, "loss": 0.1964, "step": 17191 }, { "epoch": 0.3066386045018371, "grad_norm": 0.2744026184082031, "learning_rate": 4.3772906939598367e-05, "loss": 0.1844, "step": 17192 }, { "epoch": 0.3066564406235508, "grad_norm": 0.23701460659503937, "learning_rate": 4.37718789896289e-05, "loss": 0.1898, "step": 17193 }, { "epoch": 0.3066742767452645, "grad_norm": 0.2524645924568176, "learning_rate": 4.377085096689314e-05, "loss": 0.1885, "step": 17194 }, { "epoch": 0.30669211286697823, "grad_norm": 0.2189350575208664, "learning_rate": 4.376982287139508e-05, "loss": 0.1358, "step": 17195 }, { "epoch": 0.3067099489886919, "grad_norm": 0.32083654403686523, "learning_rate": 4.37687947031387e-05, "loss": 0.1954, "step": 17196 }, { "epoch": 0.3067277851104056, "grad_norm": 0.21298474073410034, "learning_rate": 4.3767766462128e-05, "loss": 0.1522, "step": 17197 }, { "epoch": 0.3067456212321193, "grad_norm": 0.2039504051208496, "learning_rate": 4.376673814836695e-05, "loss": 0.1351, "step": 17198 }, { "epoch": 0.306763457353833, "grad_norm": 0.28641849756240845, "learning_rate": 4.3765709761859534e-05, "loss": 0.1752, "step": 17199 }, { "epoch": 0.30678129347554667, "grad_norm": 0.3778807818889618, "learning_rate": 4.376468130260976e-05, "loss": 0.1751, "step": 17200 }, { "epoch": 0.30679912959726036, "grad_norm": 0.22659938037395477, "learning_rate": 4.376365277062159e-05, "loss": 0.1534, "step": 17201 }, { "epoch": 0.30681696571897404, "grad_norm": 0.25706738233566284, "learning_rate": 4.376262416589902e-05, "loss": 0.1812, "step": 17202 }, { "epoch": 0.3068348018406878, "grad_norm": 0.3632875680923462, "learning_rate": 4.376159548844604e-05, "loss": 0.2162, "step": 17203 }, { "epoch": 0.3068526379624015, "grad_norm": 0.2262653112411499, "learning_rate": 4.3760566738266635e-05, "loss": 0.1542, "step": 17204 }, { "epoch": 0.30687047408411516, "grad_norm": 0.4392228424549103, "learning_rate": 4.37595379153648e-05, "loss": 0.1254, "step": 17205 }, { "epoch": 0.30688831020582885, "grad_norm": 0.26066115498542786, "learning_rate": 4.375850901974451e-05, "loss": 0.1238, "step": 17206 }, { "epoch": 0.30690614632754254, "grad_norm": 0.29774630069732666, "learning_rate": 4.375748005140976e-05, "loss": 0.1976, "step": 17207 }, { "epoch": 0.3069239824492562, "grad_norm": 0.2691112458705902, "learning_rate": 4.375645101036454e-05, "loss": 0.1854, "step": 17208 }, { "epoch": 0.3069418185709699, "grad_norm": 0.426113098859787, "learning_rate": 4.375542189661284e-05, "loss": 0.1784, "step": 17209 }, { "epoch": 0.3069596546926836, "grad_norm": 0.2552269697189331, "learning_rate": 4.3754392710158644e-05, "loss": 0.1699, "step": 17210 }, { "epoch": 0.3069774908143973, "grad_norm": 0.26327285170555115, "learning_rate": 4.3753363451005944e-05, "loss": 0.1784, "step": 17211 }, { "epoch": 0.30699532693611103, "grad_norm": 0.2895050346851349, "learning_rate": 4.3752334119158736e-05, "loss": 0.1336, "step": 17212 }, { "epoch": 0.3070131630578247, "grad_norm": 0.20796377956867218, "learning_rate": 4.3751304714621e-05, "loss": 0.1743, "step": 17213 }, { "epoch": 0.3070309991795384, "grad_norm": 0.7539920806884766, "learning_rate": 4.375027523739672e-05, "loss": 0.1892, "step": 17214 }, { "epoch": 0.3070488353012521, "grad_norm": 0.2287214696407318, "learning_rate": 4.3749245687489915e-05, "loss": 0.2039, "step": 17215 }, { "epoch": 0.3070666714229658, "grad_norm": 0.21478188037872314, "learning_rate": 4.374821606490454e-05, "loss": 0.1654, "step": 17216 }, { "epoch": 0.3070845075446795, "grad_norm": 0.23590540885925293, "learning_rate": 4.3747186369644624e-05, "loss": 0.166, "step": 17217 }, { "epoch": 0.30710234366639316, "grad_norm": 0.2834855318069458, "learning_rate": 4.374615660171413e-05, "loss": 0.1892, "step": 17218 }, { "epoch": 0.30712017978810685, "grad_norm": 0.3188692629337311, "learning_rate": 4.3745126761117054e-05, "loss": 0.1828, "step": 17219 }, { "epoch": 0.3071380159098206, "grad_norm": 0.2887386381626129, "learning_rate": 4.37440968478574e-05, "loss": 0.1477, "step": 17220 }, { "epoch": 0.3071558520315343, "grad_norm": 0.2905057370662689, "learning_rate": 4.374306686193914e-05, "loss": 0.1556, "step": 17221 }, { "epoch": 0.30717368815324797, "grad_norm": 0.28902292251586914, "learning_rate": 4.374203680336629e-05, "loss": 0.1583, "step": 17222 }, { "epoch": 0.30719152427496166, "grad_norm": 0.31475913524627686, "learning_rate": 4.374100667214283e-05, "loss": 0.193, "step": 17223 }, { "epoch": 0.30720936039667535, "grad_norm": 0.3393841087818146, "learning_rate": 4.373997646827276e-05, "loss": 0.1628, "step": 17224 }, { "epoch": 0.30722719651838903, "grad_norm": 0.3062064051628113, "learning_rate": 4.3738946191760055e-05, "loss": 0.1806, "step": 17225 }, { "epoch": 0.3072450326401027, "grad_norm": 0.2175796627998352, "learning_rate": 4.373791584260873e-05, "loss": 0.1511, "step": 17226 }, { "epoch": 0.3072628687618164, "grad_norm": 0.22644783556461334, "learning_rate": 4.373688542082278e-05, "loss": 0.1762, "step": 17227 }, { "epoch": 0.3072807048835301, "grad_norm": 0.26100754737854004, "learning_rate": 4.373585492640618e-05, "loss": 0.2004, "step": 17228 }, { "epoch": 0.30729854100524384, "grad_norm": 0.24516786634922028, "learning_rate": 4.3734824359362936e-05, "loss": 0.1916, "step": 17229 }, { "epoch": 0.30731637712695753, "grad_norm": 0.23080916702747345, "learning_rate": 4.3733793719697047e-05, "loss": 0.1986, "step": 17230 }, { "epoch": 0.3073342132486712, "grad_norm": 0.24642398953437805, "learning_rate": 4.3732763007412495e-05, "loss": 0.1859, "step": 17231 }, { "epoch": 0.3073520493703849, "grad_norm": 0.3539651930332184, "learning_rate": 4.3731732222513286e-05, "loss": 0.1669, "step": 17232 }, { "epoch": 0.3073698854920986, "grad_norm": 0.26970309019088745, "learning_rate": 4.3730701365003425e-05, "loss": 0.2197, "step": 17233 }, { "epoch": 0.3073877216138123, "grad_norm": 0.28341996669769287, "learning_rate": 4.372967043488688e-05, "loss": 0.1459, "step": 17234 }, { "epoch": 0.30740555773552597, "grad_norm": 0.26668596267700195, "learning_rate": 4.3728639432167675e-05, "loss": 0.1805, "step": 17235 }, { "epoch": 0.30742339385723966, "grad_norm": 0.30018532276153564, "learning_rate": 4.372760835684978e-05, "loss": 0.179, "step": 17236 }, { "epoch": 0.3074412299789534, "grad_norm": 0.2692541778087616, "learning_rate": 4.372657720893722e-05, "loss": 0.1888, "step": 17237 }, { "epoch": 0.3074590661006671, "grad_norm": 0.24511288106441498, "learning_rate": 4.3725545988433974e-05, "loss": 0.2257, "step": 17238 }, { "epoch": 0.3074769022223808, "grad_norm": 0.26978081464767456, "learning_rate": 4.372451469534404e-05, "loss": 0.1928, "step": 17239 }, { "epoch": 0.30749473834409446, "grad_norm": 0.2563156187534332, "learning_rate": 4.372348332967143e-05, "loss": 0.182, "step": 17240 }, { "epoch": 0.30751257446580815, "grad_norm": 0.33181315660476685, "learning_rate": 4.372245189142012e-05, "loss": 0.1829, "step": 17241 }, { "epoch": 0.30753041058752184, "grad_norm": 0.21629559993743896, "learning_rate": 4.3721420380594135e-05, "loss": 0.1821, "step": 17242 }, { "epoch": 0.30754824670923553, "grad_norm": 0.34097108244895935, "learning_rate": 4.3720388797197455e-05, "loss": 0.2506, "step": 17243 }, { "epoch": 0.3075660828309492, "grad_norm": 0.24103644490242004, "learning_rate": 4.371935714123407e-05, "loss": 0.1105, "step": 17244 }, { "epoch": 0.30758391895266296, "grad_norm": 0.3936353921890259, "learning_rate": 4.3718325412708e-05, "loss": 0.1796, "step": 17245 }, { "epoch": 0.30760175507437665, "grad_norm": 0.2143954038619995, "learning_rate": 4.3717293611623236e-05, "loss": 0.1783, "step": 17246 }, { "epoch": 0.30761959119609034, "grad_norm": 0.25006744265556335, "learning_rate": 4.371626173798378e-05, "loss": 0.159, "step": 17247 }, { "epoch": 0.307637427317804, "grad_norm": 0.29980263113975525, "learning_rate": 4.371522979179362e-05, "loss": 0.1539, "step": 17248 }, { "epoch": 0.3076552634395177, "grad_norm": 0.21040914952754974, "learning_rate": 4.371419777305677e-05, "loss": 0.2223, "step": 17249 }, { "epoch": 0.3076730995612314, "grad_norm": 0.27245983481407166, "learning_rate": 4.3713165681777224e-05, "loss": 0.1717, "step": 17250 }, { "epoch": 0.3076909356829451, "grad_norm": 0.24427540600299835, "learning_rate": 4.371213351795899e-05, "loss": 0.1641, "step": 17251 }, { "epoch": 0.3077087718046588, "grad_norm": 0.2430589348077774, "learning_rate": 4.371110128160606e-05, "loss": 0.1879, "step": 17252 }, { "epoch": 0.30772660792637246, "grad_norm": 0.2516360878944397, "learning_rate": 4.371006897272244e-05, "loss": 0.1809, "step": 17253 }, { "epoch": 0.3077444440480862, "grad_norm": 0.3043888509273529, "learning_rate": 4.3709036591312125e-05, "loss": 0.18, "step": 17254 }, { "epoch": 0.3077622801697999, "grad_norm": 0.28843557834625244, "learning_rate": 4.370800413737912e-05, "loss": 0.213, "step": 17255 }, { "epoch": 0.3077801162915136, "grad_norm": 0.2079518437385559, "learning_rate": 4.370697161092744e-05, "loss": 0.1728, "step": 17256 }, { "epoch": 0.30779795241322727, "grad_norm": 0.2885398864746094, "learning_rate": 4.370593901196107e-05, "loss": 0.1863, "step": 17257 }, { "epoch": 0.30781578853494096, "grad_norm": 0.3651898503303528, "learning_rate": 4.370490634048403e-05, "loss": 0.1501, "step": 17258 }, { "epoch": 0.30783362465665465, "grad_norm": 0.360970675945282, "learning_rate": 4.37038735965003e-05, "loss": 0.1497, "step": 17259 }, { "epoch": 0.30785146077836834, "grad_norm": 0.22116002440452576, "learning_rate": 4.37028407800139e-05, "loss": 0.2013, "step": 17260 }, { "epoch": 0.307869296900082, "grad_norm": 0.29022619128227234, "learning_rate": 4.3701807891028836e-05, "loss": 0.2063, "step": 17261 }, { "epoch": 0.30788713302179577, "grad_norm": 0.22850602865219116, "learning_rate": 4.370077492954909e-05, "loss": 0.1719, "step": 17262 }, { "epoch": 0.30790496914350945, "grad_norm": 0.2563099265098572, "learning_rate": 4.369974189557869e-05, "loss": 0.1956, "step": 17263 }, { "epoch": 0.30792280526522314, "grad_norm": 0.30301570892333984, "learning_rate": 4.3698708789121634e-05, "loss": 0.2086, "step": 17264 }, { "epoch": 0.30794064138693683, "grad_norm": 0.3367981016635895, "learning_rate": 4.369767561018192e-05, "loss": 0.2608, "step": 17265 }, { "epoch": 0.3079584775086505, "grad_norm": 0.33726802468299866, "learning_rate": 4.369664235876356e-05, "loss": 0.1827, "step": 17266 }, { "epoch": 0.3079763136303642, "grad_norm": 0.31587645411491394, "learning_rate": 4.369560903487056e-05, "loss": 0.2149, "step": 17267 }, { "epoch": 0.3079941497520779, "grad_norm": 0.2821025848388672, "learning_rate": 4.369457563850692e-05, "loss": 0.1538, "step": 17268 }, { "epoch": 0.3080119858737916, "grad_norm": 0.27898287773132324, "learning_rate": 4.369354216967665e-05, "loss": 0.214, "step": 17269 }, { "epoch": 0.30802982199550527, "grad_norm": 0.24345724284648895, "learning_rate": 4.369250862838374e-05, "loss": 0.1739, "step": 17270 }, { "epoch": 0.308047658117219, "grad_norm": 0.2954026758670807, "learning_rate": 4.369147501463223e-05, "loss": 0.1588, "step": 17271 }, { "epoch": 0.3080654942389327, "grad_norm": 0.2502005994319916, "learning_rate": 4.369044132842609e-05, "loss": 0.1401, "step": 17272 }, { "epoch": 0.3080833303606464, "grad_norm": 0.29373979568481445, "learning_rate": 4.368940756976936e-05, "loss": 0.1795, "step": 17273 }, { "epoch": 0.3081011664823601, "grad_norm": 0.27883976697921753, "learning_rate": 4.3688373738666025e-05, "loss": 0.1644, "step": 17274 }, { "epoch": 0.30811900260407377, "grad_norm": 0.23872879147529602, "learning_rate": 4.368733983512009e-05, "loss": 0.1419, "step": 17275 }, { "epoch": 0.30813683872578745, "grad_norm": 0.32773467898368835, "learning_rate": 4.368630585913558e-05, "loss": 0.1985, "step": 17276 }, { "epoch": 0.30815467484750114, "grad_norm": 0.252444863319397, "learning_rate": 4.368527181071649e-05, "loss": 0.1241, "step": 17277 }, { "epoch": 0.30817251096921483, "grad_norm": 0.2558678090572357, "learning_rate": 4.3684237689866837e-05, "loss": 0.1577, "step": 17278 }, { "epoch": 0.3081903470909286, "grad_norm": 0.2435806840658188, "learning_rate": 4.3683203496590626e-05, "loss": 0.1546, "step": 17279 }, { "epoch": 0.30820818321264226, "grad_norm": 0.2824627161026001, "learning_rate": 4.368216923089186e-05, "loss": 0.1921, "step": 17280 }, { "epoch": 0.30822601933435595, "grad_norm": 0.35193753242492676, "learning_rate": 4.368113489277455e-05, "loss": 0.2017, "step": 17281 }, { "epoch": 0.30824385545606964, "grad_norm": 0.24292060732841492, "learning_rate": 4.368010048224273e-05, "loss": 0.2002, "step": 17282 }, { "epoch": 0.3082616915777833, "grad_norm": 0.28214728832244873, "learning_rate": 4.3679065999300365e-05, "loss": 0.1605, "step": 17283 }, { "epoch": 0.308279527699497, "grad_norm": 0.4588879942893982, "learning_rate": 4.367803144395149e-05, "loss": 0.2342, "step": 17284 }, { "epoch": 0.3082973638212107, "grad_norm": 0.2713479697704315, "learning_rate": 4.367699681620013e-05, "loss": 0.2488, "step": 17285 }, { "epoch": 0.3083151999429244, "grad_norm": 0.20883305370807648, "learning_rate": 4.367596211605027e-05, "loss": 0.197, "step": 17286 }, { "epoch": 0.3083330360646381, "grad_norm": 0.22023865580558777, "learning_rate": 4.3674927343505936e-05, "loss": 0.1833, "step": 17287 }, { "epoch": 0.3083508721863518, "grad_norm": 0.21465279161930084, "learning_rate": 4.367389249857112e-05, "loss": 0.1819, "step": 17288 }, { "epoch": 0.3083687083080655, "grad_norm": 0.29342034459114075, "learning_rate": 4.367285758124986e-05, "loss": 0.1844, "step": 17289 }, { "epoch": 0.3083865444297792, "grad_norm": 0.3028952181339264, "learning_rate": 4.367182259154615e-05, "loss": 0.2116, "step": 17290 }, { "epoch": 0.3084043805514929, "grad_norm": 0.20493605732917786, "learning_rate": 4.3670787529464005e-05, "loss": 0.1623, "step": 17291 }, { "epoch": 0.3084222166732066, "grad_norm": 0.22596469521522522, "learning_rate": 4.366975239500745e-05, "loss": 0.1513, "step": 17292 }, { "epoch": 0.30844005279492026, "grad_norm": 0.2786181569099426, "learning_rate": 4.3668717188180476e-05, "loss": 0.1435, "step": 17293 }, { "epoch": 0.30845788891663395, "grad_norm": 0.2698395252227783, "learning_rate": 4.36676819089871e-05, "loss": 0.1784, "step": 17294 }, { "epoch": 0.30847572503834764, "grad_norm": 0.20768164098262787, "learning_rate": 4.366664655743136e-05, "loss": 0.1471, "step": 17295 }, { "epoch": 0.3084935611600614, "grad_norm": 0.3134932518005371, "learning_rate": 4.366561113351723e-05, "loss": 0.1872, "step": 17296 }, { "epoch": 0.30851139728177507, "grad_norm": 0.2926786243915558, "learning_rate": 4.366457563724876e-05, "loss": 0.1931, "step": 17297 }, { "epoch": 0.30852923340348876, "grad_norm": 0.24032816290855408, "learning_rate": 4.366354006862994e-05, "loss": 0.1605, "step": 17298 }, { "epoch": 0.30854706952520244, "grad_norm": 0.37049993872642517, "learning_rate": 4.3662504427664796e-05, "loss": 0.186, "step": 17299 }, { "epoch": 0.30856490564691613, "grad_norm": 0.2395107001066208, "learning_rate": 4.366146871435733e-05, "loss": 0.1417, "step": 17300 }, { "epoch": 0.3085827417686298, "grad_norm": 0.22518615424633026, "learning_rate": 4.366043292871158e-05, "loss": 0.1417, "step": 17301 }, { "epoch": 0.3086005778903435, "grad_norm": 0.2506428062915802, "learning_rate": 4.3659397070731536e-05, "loss": 0.1312, "step": 17302 }, { "epoch": 0.3086184140120572, "grad_norm": 0.2959437668323517, "learning_rate": 4.365836114042123e-05, "loss": 0.1882, "step": 17303 }, { "epoch": 0.30863625013377094, "grad_norm": 0.269821435213089, "learning_rate": 4.365732513778467e-05, "loss": 0.1459, "step": 17304 }, { "epoch": 0.3086540862554846, "grad_norm": 0.4256460964679718, "learning_rate": 4.365628906282587e-05, "loss": 0.2011, "step": 17305 }, { "epoch": 0.3086719223771983, "grad_norm": 0.2815365493297577, "learning_rate": 4.3655252915548864e-05, "loss": 0.1517, "step": 17306 }, { "epoch": 0.308689758498912, "grad_norm": 0.2836959660053253, "learning_rate": 4.365421669595764e-05, "loss": 0.1126, "step": 17307 }, { "epoch": 0.3087075946206257, "grad_norm": 0.3010525405406952, "learning_rate": 4.365318040405623e-05, "loss": 0.1686, "step": 17308 }, { "epoch": 0.3087254307423394, "grad_norm": 0.34020987153053284, "learning_rate": 4.3652144039848654e-05, "loss": 0.1472, "step": 17309 }, { "epoch": 0.30874326686405307, "grad_norm": 0.3223326802253723, "learning_rate": 4.3651107603338924e-05, "loss": 0.2415, "step": 17310 }, { "epoch": 0.30876110298576676, "grad_norm": 0.20760977268218994, "learning_rate": 4.3650071094531064e-05, "loss": 0.1644, "step": 17311 }, { "epoch": 0.30877893910748044, "grad_norm": 0.22713865339756012, "learning_rate": 4.364903451342908e-05, "loss": 0.1622, "step": 17312 }, { "epoch": 0.3087967752291942, "grad_norm": 0.31559303402900696, "learning_rate": 4.3647997860037e-05, "loss": 0.1898, "step": 17313 }, { "epoch": 0.3088146113509079, "grad_norm": 0.25354933738708496, "learning_rate": 4.3646961134358844e-05, "loss": 0.1771, "step": 17314 }, { "epoch": 0.30883244747262156, "grad_norm": 0.24059563875198364, "learning_rate": 4.364592433639862e-05, "loss": 0.1636, "step": 17315 }, { "epoch": 0.30885028359433525, "grad_norm": 0.38591647148132324, "learning_rate": 4.364488746616036e-05, "loss": 0.2283, "step": 17316 }, { "epoch": 0.30886811971604894, "grad_norm": 0.2507249712944031, "learning_rate": 4.364385052364807e-05, "loss": 0.1879, "step": 17317 }, { "epoch": 0.3088859558377626, "grad_norm": 0.3062756061553955, "learning_rate": 4.3642813508865774e-05, "loss": 0.1718, "step": 17318 }, { "epoch": 0.3089037919594763, "grad_norm": 0.31196069717407227, "learning_rate": 4.3641776421817495e-05, "loss": 0.2159, "step": 17319 }, { "epoch": 0.30892162808119, "grad_norm": 0.3008164167404175, "learning_rate": 4.364073926250726e-05, "loss": 0.1425, "step": 17320 }, { "epoch": 0.30893946420290375, "grad_norm": 0.26491355895996094, "learning_rate": 4.3639702030939065e-05, "loss": 0.1523, "step": 17321 }, { "epoch": 0.30895730032461743, "grad_norm": 0.3596571683883667, "learning_rate": 4.363866472711696e-05, "loss": 0.1549, "step": 17322 }, { "epoch": 0.3089751364463311, "grad_norm": 0.24343734979629517, "learning_rate": 4.363762735104495e-05, "loss": 0.1802, "step": 17323 }, { "epoch": 0.3089929725680448, "grad_norm": 0.22927075624465942, "learning_rate": 4.363658990272706e-05, "loss": 0.1251, "step": 17324 }, { "epoch": 0.3090108086897585, "grad_norm": 0.2397066354751587, "learning_rate": 4.363555238216731e-05, "loss": 0.1629, "step": 17325 }, { "epoch": 0.3090286448114722, "grad_norm": 0.36761361360549927, "learning_rate": 4.363451478936973e-05, "loss": 0.1735, "step": 17326 }, { "epoch": 0.3090464809331859, "grad_norm": 0.4123034179210663, "learning_rate": 4.363347712433832e-05, "loss": 0.187, "step": 17327 }, { "epoch": 0.30906431705489956, "grad_norm": 0.31158921122550964, "learning_rate": 4.363243938707713e-05, "loss": 0.2051, "step": 17328 }, { "epoch": 0.30908215317661325, "grad_norm": 0.26064300537109375, "learning_rate": 4.363140157759016e-05, "loss": 0.1735, "step": 17329 }, { "epoch": 0.309099989298327, "grad_norm": 0.30597352981567383, "learning_rate": 4.363036369588145e-05, "loss": 0.1712, "step": 17330 }, { "epoch": 0.3091178254200407, "grad_norm": 0.29409223794937134, "learning_rate": 4.362932574195501e-05, "loss": 0.238, "step": 17331 }, { "epoch": 0.30913566154175437, "grad_norm": 0.333935022354126, "learning_rate": 4.362828771581487e-05, "loss": 0.1698, "step": 17332 }, { "epoch": 0.30915349766346806, "grad_norm": 0.35833579301834106, "learning_rate": 4.362724961746505e-05, "loss": 0.1768, "step": 17333 }, { "epoch": 0.30917133378518175, "grad_norm": 0.28956031799316406, "learning_rate": 4.362621144690958e-05, "loss": 0.1254, "step": 17334 }, { "epoch": 0.30918916990689543, "grad_norm": 0.3301190733909607, "learning_rate": 4.362517320415248e-05, "loss": 0.1549, "step": 17335 }, { "epoch": 0.3092070060286091, "grad_norm": 0.2762179672718048, "learning_rate": 4.362413488919778e-05, "loss": 0.1428, "step": 17336 }, { "epoch": 0.3092248421503228, "grad_norm": 0.3325973451137543, "learning_rate": 4.36230965020495e-05, "loss": 0.1582, "step": 17337 }, { "epoch": 0.30924267827203655, "grad_norm": 0.2459629476070404, "learning_rate": 4.3622058042711666e-05, "loss": 0.1622, "step": 17338 }, { "epoch": 0.30926051439375024, "grad_norm": 0.3135835826396942, "learning_rate": 4.36210195111883e-05, "loss": 0.1808, "step": 17339 }, { "epoch": 0.30927835051546393, "grad_norm": 0.3161329925060272, "learning_rate": 4.361998090748342e-05, "loss": 0.1572, "step": 17340 }, { "epoch": 0.3092961866371776, "grad_norm": 0.20491014420986176, "learning_rate": 4.3618942231601086e-05, "loss": 0.1568, "step": 17341 }, { "epoch": 0.3093140227588913, "grad_norm": 0.22547106444835663, "learning_rate": 4.361790348354529e-05, "loss": 0.16, "step": 17342 }, { "epoch": 0.309331858880605, "grad_norm": 0.30976414680480957, "learning_rate": 4.361686466332007e-05, "loss": 0.2246, "step": 17343 }, { "epoch": 0.3093496950023187, "grad_norm": 0.32153210043907166, "learning_rate": 4.3615825770929454e-05, "loss": 0.1568, "step": 17344 }, { "epoch": 0.30936753112403237, "grad_norm": 0.25154581665992737, "learning_rate": 4.361478680637746e-05, "loss": 0.1949, "step": 17345 }, { "epoch": 0.3093853672457461, "grad_norm": 0.26917651295661926, "learning_rate": 4.361374776966813e-05, "loss": 0.1809, "step": 17346 }, { "epoch": 0.3094032033674598, "grad_norm": 0.27427390217781067, "learning_rate": 4.361270866080548e-05, "loss": 0.2064, "step": 17347 }, { "epoch": 0.3094210394891735, "grad_norm": 0.25331488251686096, "learning_rate": 4.361166947979355e-05, "loss": 0.2109, "step": 17348 }, { "epoch": 0.3094388756108872, "grad_norm": 0.42039036750793457, "learning_rate": 4.361063022663635e-05, "loss": 0.2599, "step": 17349 }, { "epoch": 0.30945671173260086, "grad_norm": 0.2885431945323944, "learning_rate": 4.3609590901337926e-05, "loss": 0.0975, "step": 17350 }, { "epoch": 0.30947454785431455, "grad_norm": 0.20504805445671082, "learning_rate": 4.3608551503902306e-05, "loss": 0.1643, "step": 17351 }, { "epoch": 0.30949238397602824, "grad_norm": 0.26486822962760925, "learning_rate": 4.36075120343335e-05, "loss": 0.1607, "step": 17352 }, { "epoch": 0.30951022009774193, "grad_norm": 0.2809804677963257, "learning_rate": 4.360647249263556e-05, "loss": 0.1803, "step": 17353 }, { "epoch": 0.3095280562194556, "grad_norm": 0.2266402244567871, "learning_rate": 4.36054328788125e-05, "loss": 0.1397, "step": 17354 }, { "epoch": 0.30954589234116936, "grad_norm": 0.24769458174705505, "learning_rate": 4.3604393192868365e-05, "loss": 0.1794, "step": 17355 }, { "epoch": 0.30956372846288305, "grad_norm": 0.21029578149318695, "learning_rate": 4.3603353434807174e-05, "loss": 0.1573, "step": 17356 }, { "epoch": 0.30958156458459674, "grad_norm": 0.22738248109817505, "learning_rate": 4.360231360463295e-05, "loss": 0.1322, "step": 17357 }, { "epoch": 0.3095994007063104, "grad_norm": 0.2758842706680298, "learning_rate": 4.3601273702349743e-05, "loss": 0.1968, "step": 17358 }, { "epoch": 0.3096172368280241, "grad_norm": 0.2754844129085541, "learning_rate": 4.360023372796157e-05, "loss": 0.1805, "step": 17359 }, { "epoch": 0.3096350729497378, "grad_norm": 0.41801783442497253, "learning_rate": 4.359919368147247e-05, "loss": 0.1818, "step": 17360 }, { "epoch": 0.3096529090714515, "grad_norm": 0.28461480140686035, "learning_rate": 4.3598153562886465e-05, "loss": 0.1524, "step": 17361 }, { "epoch": 0.3096707451931652, "grad_norm": 0.16162221133708954, "learning_rate": 4.359711337220761e-05, "loss": 0.1228, "step": 17362 }, { "epoch": 0.3096885813148789, "grad_norm": 0.2785652279853821, "learning_rate": 4.35960731094399e-05, "loss": 0.1512, "step": 17363 }, { "epoch": 0.3097064174365926, "grad_norm": 0.19198106229305267, "learning_rate": 4.35950327745874e-05, "loss": 0.1469, "step": 17364 }, { "epoch": 0.3097242535583063, "grad_norm": 0.3432587683200836, "learning_rate": 4.359399236765412e-05, "loss": 0.2441, "step": 17365 }, { "epoch": 0.30974208968002, "grad_norm": 0.27107396721839905, "learning_rate": 4.359295188864411e-05, "loss": 0.1463, "step": 17366 }, { "epoch": 0.30975992580173367, "grad_norm": 0.2346051186323166, "learning_rate": 4.3591911337561395e-05, "loss": 0.1431, "step": 17367 }, { "epoch": 0.30977776192344736, "grad_norm": 0.29845526814460754, "learning_rate": 4.359087071441002e-05, "loss": 0.2149, "step": 17368 }, { "epoch": 0.30979559804516105, "grad_norm": 0.27903053164482117, "learning_rate": 4.3589830019194e-05, "loss": 0.1745, "step": 17369 }, { "epoch": 0.30981343416687473, "grad_norm": 0.27437329292297363, "learning_rate": 4.358878925191737e-05, "loss": 0.1697, "step": 17370 }, { "epoch": 0.3098312702885884, "grad_norm": 0.22781068086624146, "learning_rate": 4.3587748412584186e-05, "loss": 0.1764, "step": 17371 }, { "epoch": 0.30984910641030217, "grad_norm": 0.3300037682056427, "learning_rate": 4.358670750119847e-05, "loss": 0.1777, "step": 17372 }, { "epoch": 0.30986694253201585, "grad_norm": 0.28571605682373047, "learning_rate": 4.358566651776424e-05, "loss": 0.1556, "step": 17373 }, { "epoch": 0.30988477865372954, "grad_norm": 0.22507601976394653, "learning_rate": 4.358462546228557e-05, "loss": 0.172, "step": 17374 }, { "epoch": 0.30990261477544323, "grad_norm": 0.29112255573272705, "learning_rate": 4.358358433476646e-05, "loss": 0.1888, "step": 17375 }, { "epoch": 0.3099204508971569, "grad_norm": 0.3871815800666809, "learning_rate": 4.358254313521095e-05, "loss": 0.1649, "step": 17376 }, { "epoch": 0.3099382870188706, "grad_norm": 0.3167188763618469, "learning_rate": 4.3581501863623096e-05, "loss": 0.202, "step": 17377 }, { "epoch": 0.3099561231405843, "grad_norm": 0.19899222254753113, "learning_rate": 4.358046052000693e-05, "loss": 0.1685, "step": 17378 }, { "epoch": 0.309973959262298, "grad_norm": 0.3238217830657959, "learning_rate": 4.3579419104366463e-05, "loss": 0.1283, "step": 17379 }, { "epoch": 0.3099917953840117, "grad_norm": 0.24417483806610107, "learning_rate": 4.357837761670576e-05, "loss": 0.1574, "step": 17380 }, { "epoch": 0.3100096315057254, "grad_norm": 0.20129568874835968, "learning_rate": 4.357733605702885e-05, "loss": 0.1268, "step": 17381 }, { "epoch": 0.3100274676274391, "grad_norm": 0.25981321930885315, "learning_rate": 4.357629442533977e-05, "loss": 0.1876, "step": 17382 }, { "epoch": 0.3100453037491528, "grad_norm": 0.2805124521255493, "learning_rate": 4.357525272164255e-05, "loss": 0.1063, "step": 17383 }, { "epoch": 0.3100631398708665, "grad_norm": 0.33776137232780457, "learning_rate": 4.357421094594124e-05, "loss": 0.1321, "step": 17384 }, { "epoch": 0.31008097599258017, "grad_norm": 0.20445755124092102, "learning_rate": 4.357316909823988e-05, "loss": 0.1703, "step": 17385 }, { "epoch": 0.31009881211429385, "grad_norm": 0.21691961586475372, "learning_rate": 4.3572127178542487e-05, "loss": 0.1214, "step": 17386 }, { "epoch": 0.31011664823600754, "grad_norm": 0.2529012858867645, "learning_rate": 4.357108518685312e-05, "loss": 0.1548, "step": 17387 }, { "epoch": 0.31013448435772123, "grad_norm": 0.23972001671791077, "learning_rate": 4.357004312317581e-05, "loss": 0.1363, "step": 17388 }, { "epoch": 0.310152320479435, "grad_norm": 0.3075212836265564, "learning_rate": 4.356900098751461e-05, "loss": 0.1396, "step": 17389 }, { "epoch": 0.31017015660114866, "grad_norm": 0.27812814712524414, "learning_rate": 4.356795877987354e-05, "loss": 0.1708, "step": 17390 }, { "epoch": 0.31018799272286235, "grad_norm": 0.2706345021724701, "learning_rate": 4.356691650025665e-05, "loss": 0.1799, "step": 17391 }, { "epoch": 0.31020582884457604, "grad_norm": 0.3186160624027252, "learning_rate": 4.356587414866798e-05, "loss": 0.2028, "step": 17392 }, { "epoch": 0.3102236649662897, "grad_norm": 0.27843767404556274, "learning_rate": 4.3564831725111565e-05, "loss": 0.1757, "step": 17393 }, { "epoch": 0.3102415010880034, "grad_norm": 0.23253169655799866, "learning_rate": 4.356378922959146e-05, "loss": 0.1302, "step": 17394 }, { "epoch": 0.3102593372097171, "grad_norm": 0.27413132786750793, "learning_rate": 4.3562746662111684e-05, "loss": 0.1621, "step": 17395 }, { "epoch": 0.3102771733314308, "grad_norm": 0.3344685137271881, "learning_rate": 4.3561704022676296e-05, "loss": 0.173, "step": 17396 }, { "epoch": 0.31029500945314453, "grad_norm": 0.2624615728855133, "learning_rate": 4.356066131128933e-05, "loss": 0.1745, "step": 17397 }, { "epoch": 0.3103128455748582, "grad_norm": 0.2921142876148224, "learning_rate": 4.3559618527954834e-05, "loss": 0.1806, "step": 17398 }, { "epoch": 0.3103306816965719, "grad_norm": 0.22078359127044678, "learning_rate": 4.3558575672676844e-05, "loss": 0.1887, "step": 17399 }, { "epoch": 0.3103485178182856, "grad_norm": 0.2526251971721649, "learning_rate": 4.3557532745459404e-05, "loss": 0.1724, "step": 17400 }, { "epoch": 0.3103663539399993, "grad_norm": 0.2232562154531479, "learning_rate": 4.355648974630656e-05, "loss": 0.1628, "step": 17401 }, { "epoch": 0.310384190061713, "grad_norm": 0.3004521131515503, "learning_rate": 4.355544667522235e-05, "loss": 0.2032, "step": 17402 }, { "epoch": 0.31040202618342666, "grad_norm": 0.2784850597381592, "learning_rate": 4.355440353221082e-05, "loss": 0.1416, "step": 17403 }, { "epoch": 0.31041986230514035, "grad_norm": 0.39136961102485657, "learning_rate": 4.355336031727602e-05, "loss": 0.2049, "step": 17404 }, { "epoch": 0.3104376984268541, "grad_norm": 0.24422284960746765, "learning_rate": 4.355231703042198e-05, "loss": 0.1931, "step": 17405 }, { "epoch": 0.3104555345485678, "grad_norm": 0.3265067934989929, "learning_rate": 4.355127367165275e-05, "loss": 0.2096, "step": 17406 }, { "epoch": 0.31047337067028147, "grad_norm": 0.2704797387123108, "learning_rate": 4.355023024097238e-05, "loss": 0.2306, "step": 17407 }, { "epoch": 0.31049120679199516, "grad_norm": 0.6578344106674194, "learning_rate": 4.3549186738384913e-05, "loss": 0.1838, "step": 17408 }, { "epoch": 0.31050904291370884, "grad_norm": 0.20448662340641022, "learning_rate": 4.3548143163894385e-05, "loss": 0.1397, "step": 17409 }, { "epoch": 0.31052687903542253, "grad_norm": 0.3903447985649109, "learning_rate": 4.3547099517504855e-05, "loss": 0.1963, "step": 17410 }, { "epoch": 0.3105447151571362, "grad_norm": 0.16988320648670197, "learning_rate": 4.354605579922035e-05, "loss": 0.1134, "step": 17411 }, { "epoch": 0.3105625512788499, "grad_norm": 0.26739633083343506, "learning_rate": 4.354501200904494e-05, "loss": 0.1788, "step": 17412 }, { "epoch": 0.3105803874005636, "grad_norm": 0.33215096592903137, "learning_rate": 4.354396814698265e-05, "loss": 0.218, "step": 17413 }, { "epoch": 0.31059822352227734, "grad_norm": 0.2763465344905853, "learning_rate": 4.354292421303754e-05, "loss": 0.1544, "step": 17414 }, { "epoch": 0.310616059643991, "grad_norm": 0.23078393936157227, "learning_rate": 4.354188020721365e-05, "loss": 0.0807, "step": 17415 }, { "epoch": 0.3106338957657047, "grad_norm": 0.23900263011455536, "learning_rate": 4.354083612951503e-05, "loss": 0.1406, "step": 17416 }, { "epoch": 0.3106517318874184, "grad_norm": 0.29303503036499023, "learning_rate": 4.353979197994572e-05, "loss": 0.136, "step": 17417 }, { "epoch": 0.3106695680091321, "grad_norm": 0.4580296576023102, "learning_rate": 4.353874775850977e-05, "loss": 0.17, "step": 17418 }, { "epoch": 0.3106874041308458, "grad_norm": 0.30153709650039673, "learning_rate": 4.353770346521124e-05, "loss": 0.1571, "step": 17419 }, { "epoch": 0.31070524025255947, "grad_norm": 0.2793455719947815, "learning_rate": 4.353665910005416e-05, "loss": 0.1329, "step": 17420 }, { "epoch": 0.31072307637427315, "grad_norm": 0.27351051568984985, "learning_rate": 4.353561466304259e-05, "loss": 0.1701, "step": 17421 }, { "epoch": 0.3107409124959869, "grad_norm": 0.364711195230484, "learning_rate": 4.3534570154180575e-05, "loss": 0.1723, "step": 17422 }, { "epoch": 0.3107587486177006, "grad_norm": 0.29340437054634094, "learning_rate": 4.3533525573472165e-05, "loss": 0.1945, "step": 17423 }, { "epoch": 0.3107765847394143, "grad_norm": 0.2669852077960968, "learning_rate": 4.3532480920921416e-05, "loss": 0.1245, "step": 17424 }, { "epoch": 0.31079442086112796, "grad_norm": 0.25489360094070435, "learning_rate": 4.353143619653236e-05, "loss": 0.1515, "step": 17425 }, { "epoch": 0.31081225698284165, "grad_norm": 0.22249282896518707, "learning_rate": 4.353039140030906e-05, "loss": 0.1599, "step": 17426 }, { "epoch": 0.31083009310455534, "grad_norm": 0.39028260111808777, "learning_rate": 4.3529346532255564e-05, "loss": 0.1624, "step": 17427 }, { "epoch": 0.310847929226269, "grad_norm": 0.3274352252483368, "learning_rate": 4.352830159237592e-05, "loss": 0.1655, "step": 17428 }, { "epoch": 0.3108657653479827, "grad_norm": 0.24809467792510986, "learning_rate": 4.352725658067418e-05, "loss": 0.1797, "step": 17429 }, { "epoch": 0.3108836014696964, "grad_norm": 0.26507964730262756, "learning_rate": 4.35262114971544e-05, "loss": 0.1354, "step": 17430 }, { "epoch": 0.31090143759141015, "grad_norm": 0.28164511919021606, "learning_rate": 4.3525166341820615e-05, "loss": 0.1574, "step": 17431 }, { "epoch": 0.31091927371312383, "grad_norm": 0.22720032930374146, "learning_rate": 4.3524121114676894e-05, "loss": 0.1394, "step": 17432 }, { "epoch": 0.3109371098348375, "grad_norm": 0.38103148341178894, "learning_rate": 4.3523075815727275e-05, "loss": 0.1851, "step": 17433 }, { "epoch": 0.3109549459565512, "grad_norm": 0.2739613354206085, "learning_rate": 4.3522030444975826e-05, "loss": 0.1162, "step": 17434 }, { "epoch": 0.3109727820782649, "grad_norm": 0.22264060378074646, "learning_rate": 4.3520985002426585e-05, "loss": 0.1542, "step": 17435 }, { "epoch": 0.3109906181999786, "grad_norm": 0.300814688205719, "learning_rate": 4.35199394880836e-05, "loss": 0.2375, "step": 17436 }, { "epoch": 0.3110084543216923, "grad_norm": 0.40395182371139526, "learning_rate": 4.351889390195095e-05, "loss": 0.1801, "step": 17437 }, { "epoch": 0.31102629044340596, "grad_norm": 0.23144060373306274, "learning_rate": 4.351784824403266e-05, "loss": 0.1936, "step": 17438 }, { "epoch": 0.3110441265651197, "grad_norm": 0.3169465959072113, "learning_rate": 4.3516802514332794e-05, "loss": 0.1873, "step": 17439 }, { "epoch": 0.3110619626868334, "grad_norm": 0.25428691506385803, "learning_rate": 4.351575671285541e-05, "loss": 0.1489, "step": 17440 }, { "epoch": 0.3110797988085471, "grad_norm": 0.31068018078804016, "learning_rate": 4.3514710839604556e-05, "loss": 0.1771, "step": 17441 }, { "epoch": 0.31109763493026077, "grad_norm": 0.4185318648815155, "learning_rate": 4.351366489458429e-05, "loss": 0.282, "step": 17442 }, { "epoch": 0.31111547105197446, "grad_norm": 0.31771883368492126, "learning_rate": 4.351261887779866e-05, "loss": 0.168, "step": 17443 }, { "epoch": 0.31113330717368815, "grad_norm": 0.3612484037876129, "learning_rate": 4.351157278925173e-05, "loss": 0.2258, "step": 17444 }, { "epoch": 0.31115114329540183, "grad_norm": 0.26935887336730957, "learning_rate": 4.3510526628947544e-05, "loss": 0.1536, "step": 17445 }, { "epoch": 0.3111689794171155, "grad_norm": 0.21014304459095, "learning_rate": 4.3509480396890175e-05, "loss": 0.1443, "step": 17446 }, { "epoch": 0.31118681553882926, "grad_norm": 0.33301955461502075, "learning_rate": 4.350843409308366e-05, "loss": 0.1771, "step": 17447 }, { "epoch": 0.31120465166054295, "grad_norm": 0.26040083169937134, "learning_rate": 4.350738771753206e-05, "loss": 0.1623, "step": 17448 }, { "epoch": 0.31122248778225664, "grad_norm": 0.23314322531223297, "learning_rate": 4.350634127023944e-05, "loss": 0.17, "step": 17449 }, { "epoch": 0.31124032390397033, "grad_norm": 0.27478617429733276, "learning_rate": 4.350529475120983e-05, "loss": 0.1801, "step": 17450 }, { "epoch": 0.311258160025684, "grad_norm": 0.22157971560955048, "learning_rate": 4.3504248160447326e-05, "loss": 0.1781, "step": 17451 }, { "epoch": 0.3112759961473977, "grad_norm": 0.3089222311973572, "learning_rate": 4.350320149795596e-05, "loss": 0.1754, "step": 17452 }, { "epoch": 0.3112938322691114, "grad_norm": 0.31412917375564575, "learning_rate": 4.350215476373979e-05, "loss": 0.1494, "step": 17453 }, { "epoch": 0.3113116683908251, "grad_norm": 0.3011000156402588, "learning_rate": 4.350110795780289e-05, "loss": 0.239, "step": 17454 }, { "epoch": 0.31132950451253877, "grad_norm": 0.2290801703929901, "learning_rate": 4.350006108014929e-05, "loss": 0.1547, "step": 17455 }, { "epoch": 0.3113473406342525, "grad_norm": 0.20584842562675476, "learning_rate": 4.349901413078307e-05, "loss": 0.1444, "step": 17456 }, { "epoch": 0.3113651767559662, "grad_norm": 0.2979958951473236, "learning_rate": 4.349796710970828e-05, "loss": 0.1964, "step": 17457 }, { "epoch": 0.3113830128776799, "grad_norm": 0.23865488171577454, "learning_rate": 4.3496920016928985e-05, "loss": 0.19, "step": 17458 }, { "epoch": 0.3114008489993936, "grad_norm": 0.2655833959579468, "learning_rate": 4.3495872852449237e-05, "loss": 0.1498, "step": 17459 }, { "epoch": 0.31141868512110726, "grad_norm": 0.29966792464256287, "learning_rate": 4.34948256162731e-05, "loss": 0.2114, "step": 17460 }, { "epoch": 0.31143652124282095, "grad_norm": 0.22978246212005615, "learning_rate": 4.349377830840463e-05, "loss": 0.1428, "step": 17461 }, { "epoch": 0.31145435736453464, "grad_norm": 0.31427451968193054, "learning_rate": 4.349273092884788e-05, "loss": 0.164, "step": 17462 }, { "epoch": 0.3114721934862483, "grad_norm": 0.2699473202228546, "learning_rate": 4.349168347760692e-05, "loss": 0.1706, "step": 17463 }, { "epoch": 0.31149002960796207, "grad_norm": 0.22348356246948242, "learning_rate": 4.349063595468582e-05, "loss": 0.1734, "step": 17464 }, { "epoch": 0.31150786572967576, "grad_norm": 0.31114187836647034, "learning_rate": 4.348958836008862e-05, "loss": 0.2079, "step": 17465 }, { "epoch": 0.31152570185138945, "grad_norm": 0.26533836126327515, "learning_rate": 4.348854069381939e-05, "loss": 0.1868, "step": 17466 }, { "epoch": 0.31154353797310314, "grad_norm": 0.25131261348724365, "learning_rate": 4.348749295588219e-05, "loss": 0.1315, "step": 17467 }, { "epoch": 0.3115613740948168, "grad_norm": 0.3486991822719574, "learning_rate": 4.348644514628108e-05, "loss": 0.2017, "step": 17468 }, { "epoch": 0.3115792102165305, "grad_norm": 0.2648819386959076, "learning_rate": 4.348539726502012e-05, "loss": 0.1758, "step": 17469 }, { "epoch": 0.3115970463382442, "grad_norm": 0.2868684232234955, "learning_rate": 4.348434931210339e-05, "loss": 0.241, "step": 17470 }, { "epoch": 0.3116148824599579, "grad_norm": 0.38698622584342957, "learning_rate": 4.348330128753493e-05, "loss": 0.2512, "step": 17471 }, { "epoch": 0.3116327185816716, "grad_norm": 0.26002180576324463, "learning_rate": 4.3482253191318803e-05, "loss": 0.1401, "step": 17472 }, { "epoch": 0.3116505547033853, "grad_norm": 0.2657237946987152, "learning_rate": 4.3481205023459086e-05, "loss": 0.137, "step": 17473 }, { "epoch": 0.311668390825099, "grad_norm": 0.32502228021621704, "learning_rate": 4.3480156783959835e-05, "loss": 0.1943, "step": 17474 }, { "epoch": 0.3116862269468127, "grad_norm": 0.20528516173362732, "learning_rate": 4.347910847282511e-05, "loss": 0.1575, "step": 17475 }, { "epoch": 0.3117040630685264, "grad_norm": 0.2571054697036743, "learning_rate": 4.3478060090058986e-05, "loss": 0.141, "step": 17476 }, { "epoch": 0.31172189919024007, "grad_norm": 0.3063930869102478, "learning_rate": 4.347701163566551e-05, "loss": 0.1138, "step": 17477 }, { "epoch": 0.31173973531195376, "grad_norm": 0.25448077917099, "learning_rate": 4.347596310964877e-05, "loss": 0.1762, "step": 17478 }, { "epoch": 0.31175757143366745, "grad_norm": 0.2913854122161865, "learning_rate": 4.34749145120128e-05, "loss": 0.1364, "step": 17479 }, { "epoch": 0.31177540755538113, "grad_norm": 0.4470829963684082, "learning_rate": 4.347386584276169e-05, "loss": 0.143, "step": 17480 }, { "epoch": 0.3117932436770949, "grad_norm": 0.27488455176353455, "learning_rate": 4.347281710189948e-05, "loss": 0.1593, "step": 17481 }, { "epoch": 0.31181107979880857, "grad_norm": 0.2817961275577545, "learning_rate": 4.347176828943026e-05, "loss": 0.1863, "step": 17482 }, { "epoch": 0.31182891592052225, "grad_norm": 0.23810303211212158, "learning_rate": 4.3470719405358095e-05, "loss": 0.1594, "step": 17483 }, { "epoch": 0.31184675204223594, "grad_norm": 0.2999715805053711, "learning_rate": 4.3469670449687026e-05, "loss": 0.1703, "step": 17484 }, { "epoch": 0.31186458816394963, "grad_norm": 0.23091301321983337, "learning_rate": 4.3468621422421155e-05, "loss": 0.165, "step": 17485 }, { "epoch": 0.3118824242856633, "grad_norm": 0.2133583426475525, "learning_rate": 4.346757232356451e-05, "loss": 0.1373, "step": 17486 }, { "epoch": 0.311900260407377, "grad_norm": 0.27956530451774597, "learning_rate": 4.3466523153121186e-05, "loss": 0.1996, "step": 17487 }, { "epoch": 0.3119180965290907, "grad_norm": 0.22427357733249664, "learning_rate": 4.3465473911095234e-05, "loss": 0.1129, "step": 17488 }, { "epoch": 0.3119359326508044, "grad_norm": 0.29926085472106934, "learning_rate": 4.3464424597490735e-05, "loss": 0.1381, "step": 17489 }, { "epoch": 0.3119537687725181, "grad_norm": 0.32807281613349915, "learning_rate": 4.346337521231174e-05, "loss": 0.1662, "step": 17490 }, { "epoch": 0.3119716048942318, "grad_norm": 0.2632836401462555, "learning_rate": 4.346232575556233e-05, "loss": 0.2041, "step": 17491 }, { "epoch": 0.3119894410159455, "grad_norm": 0.2649456262588501, "learning_rate": 4.346127622724657e-05, "loss": 0.1755, "step": 17492 }, { "epoch": 0.3120072771376592, "grad_norm": 0.45056968927383423, "learning_rate": 4.346022662736853e-05, "loss": 0.1996, "step": 17493 }, { "epoch": 0.3120251132593729, "grad_norm": 0.33872950077056885, "learning_rate": 4.3459176955932267e-05, "loss": 0.2236, "step": 17494 }, { "epoch": 0.31204294938108657, "grad_norm": 0.33895570039749146, "learning_rate": 4.3458127212941864e-05, "loss": 0.1254, "step": 17495 }, { "epoch": 0.31206078550280025, "grad_norm": 0.41798847913742065, "learning_rate": 4.345707739840138e-05, "loss": 0.2177, "step": 17496 }, { "epoch": 0.31207862162451394, "grad_norm": 0.22913534939289093, "learning_rate": 4.3456027512314894e-05, "loss": 0.195, "step": 17497 }, { "epoch": 0.3120964577462277, "grad_norm": 0.28038740158081055, "learning_rate": 4.345497755468647e-05, "loss": 0.186, "step": 17498 }, { "epoch": 0.3121142938679414, "grad_norm": 0.29573291540145874, "learning_rate": 4.345392752552018e-05, "loss": 0.212, "step": 17499 }, { "epoch": 0.31213212998965506, "grad_norm": 0.2219371348619461, "learning_rate": 4.3452877424820094e-05, "loss": 0.198, "step": 17500 }, { "epoch": 0.31214996611136875, "grad_norm": 0.21954099833965302, "learning_rate": 4.345182725259027e-05, "loss": 0.1816, "step": 17501 }, { "epoch": 0.31216780223308244, "grad_norm": 0.23090429604053497, "learning_rate": 4.345077700883481e-05, "loss": 0.1688, "step": 17502 }, { "epoch": 0.3121856383547961, "grad_norm": 0.258075475692749, "learning_rate": 4.344972669355775e-05, "loss": 0.1479, "step": 17503 }, { "epoch": 0.3122034744765098, "grad_norm": 0.33138346672058105, "learning_rate": 4.3448676306763184e-05, "loss": 0.1712, "step": 17504 }, { "epoch": 0.3122213105982235, "grad_norm": 0.22639347612857819, "learning_rate": 4.344762584845518e-05, "loss": 0.1812, "step": 17505 }, { "epoch": 0.31223914671993724, "grad_norm": 0.2866867780685425, "learning_rate": 4.344657531863779e-05, "loss": 0.1932, "step": 17506 }, { "epoch": 0.31225698284165093, "grad_norm": 0.2818053066730499, "learning_rate": 4.3445524717315125e-05, "loss": 0.1793, "step": 17507 }, { "epoch": 0.3122748189633646, "grad_norm": 0.3275313377380371, "learning_rate": 4.3444474044491215e-05, "loss": 0.1701, "step": 17508 }, { "epoch": 0.3122926550850783, "grad_norm": 0.32207781076431274, "learning_rate": 4.3443423300170175e-05, "loss": 0.2237, "step": 17509 }, { "epoch": 0.312310491206792, "grad_norm": 0.3252350986003876, "learning_rate": 4.3442372484356044e-05, "loss": 0.2112, "step": 17510 }, { "epoch": 0.3123283273285057, "grad_norm": 0.3287123143672943, "learning_rate": 4.3441321597052895e-05, "loss": 0.2224, "step": 17511 }, { "epoch": 0.31234616345021937, "grad_norm": 0.33139023184776306, "learning_rate": 4.3440270638264834e-05, "loss": 0.1603, "step": 17512 }, { "epoch": 0.31236399957193306, "grad_norm": 0.23307359218597412, "learning_rate": 4.343921960799591e-05, "loss": 0.1743, "step": 17513 }, { "epoch": 0.31238183569364675, "grad_norm": 0.2689676880836487, "learning_rate": 4.34381685062502e-05, "loss": 0.1283, "step": 17514 }, { "epoch": 0.3123996718153605, "grad_norm": 0.2605196237564087, "learning_rate": 4.343711733303178e-05, "loss": 0.1613, "step": 17515 }, { "epoch": 0.3124175079370742, "grad_norm": 0.2356814295053482, "learning_rate": 4.343606608834472e-05, "loss": 0.1716, "step": 17516 }, { "epoch": 0.31243534405878787, "grad_norm": 0.327284038066864, "learning_rate": 4.3435014772193106e-05, "loss": 0.1708, "step": 17517 }, { "epoch": 0.31245318018050156, "grad_norm": 0.33501574397087097, "learning_rate": 4.343396338458101e-05, "loss": 0.1536, "step": 17518 }, { "epoch": 0.31247101630221524, "grad_norm": 0.2738770544528961, "learning_rate": 4.34329119255125e-05, "loss": 0.1775, "step": 17519 }, { "epoch": 0.31248885242392893, "grad_norm": 0.27891844511032104, "learning_rate": 4.343186039499166e-05, "loss": 0.1788, "step": 17520 }, { "epoch": 0.3125066885456426, "grad_norm": 0.20665976405143738, "learning_rate": 4.343080879302256e-05, "loss": 0.1285, "step": 17521 }, { "epoch": 0.3125245246673563, "grad_norm": 0.9507989287376404, "learning_rate": 4.342975711960928e-05, "loss": 0.1991, "step": 17522 }, { "epoch": 0.31254236078907005, "grad_norm": 0.25839972496032715, "learning_rate": 4.34287053747559e-05, "loss": 0.1852, "step": 17523 }, { "epoch": 0.31256019691078374, "grad_norm": 0.2607276141643524, "learning_rate": 4.342765355846649e-05, "loss": 0.16, "step": 17524 }, { "epoch": 0.3125780330324974, "grad_norm": 0.3024839460849762, "learning_rate": 4.342660167074513e-05, "loss": 0.1396, "step": 17525 }, { "epoch": 0.3125958691542111, "grad_norm": 0.29394084215164185, "learning_rate": 4.3425549711595896e-05, "loss": 0.1725, "step": 17526 }, { "epoch": 0.3126137052759248, "grad_norm": 0.2145133912563324, "learning_rate": 4.342449768102287e-05, "loss": 0.1586, "step": 17527 }, { "epoch": 0.3126315413976385, "grad_norm": 0.26951074600219727, "learning_rate": 4.342344557903013e-05, "loss": 0.1677, "step": 17528 }, { "epoch": 0.3126493775193522, "grad_norm": 0.3360919654369354, "learning_rate": 4.3422393405621744e-05, "loss": 0.2025, "step": 17529 }, { "epoch": 0.31266721364106587, "grad_norm": 0.20517614483833313, "learning_rate": 4.34213411608018e-05, "loss": 0.1693, "step": 17530 }, { "epoch": 0.31268504976277955, "grad_norm": 0.22563259303569794, "learning_rate": 4.342028884457438e-05, "loss": 0.1703, "step": 17531 }, { "epoch": 0.3127028858844933, "grad_norm": 0.20919504761695862, "learning_rate": 4.3419236456943556e-05, "loss": 0.151, "step": 17532 }, { "epoch": 0.312720722006207, "grad_norm": 0.221780464053154, "learning_rate": 4.3418183997913406e-05, "loss": 0.1782, "step": 17533 }, { "epoch": 0.3127385581279207, "grad_norm": 0.26816293597221375, "learning_rate": 4.341713146748802e-05, "loss": 0.175, "step": 17534 }, { "epoch": 0.31275639424963436, "grad_norm": 0.25535818934440613, "learning_rate": 4.341607886567147e-05, "loss": 0.1361, "step": 17535 }, { "epoch": 0.31277423037134805, "grad_norm": 0.32535871863365173, "learning_rate": 4.3415026192467835e-05, "loss": 0.2581, "step": 17536 }, { "epoch": 0.31279206649306174, "grad_norm": 0.25587764382362366, "learning_rate": 4.34139734478812e-05, "loss": 0.175, "step": 17537 }, { "epoch": 0.3128099026147754, "grad_norm": 0.2733164429664612, "learning_rate": 4.341292063191564e-05, "loss": 0.1765, "step": 17538 }, { "epoch": 0.3128277387364891, "grad_norm": 0.25258323550224304, "learning_rate": 4.3411867744575246e-05, "loss": 0.1478, "step": 17539 }, { "epoch": 0.31284557485820286, "grad_norm": 0.3264720141887665, "learning_rate": 4.341081478586409e-05, "loss": 0.1432, "step": 17540 }, { "epoch": 0.31286341097991655, "grad_norm": 0.2438495010137558, "learning_rate": 4.340976175578626e-05, "loss": 0.2233, "step": 17541 }, { "epoch": 0.31288124710163023, "grad_norm": 0.2688412368297577, "learning_rate": 4.340870865434583e-05, "loss": 0.1874, "step": 17542 }, { "epoch": 0.3128990832233439, "grad_norm": 0.3897169530391693, "learning_rate": 4.340765548154689e-05, "loss": 0.2205, "step": 17543 }, { "epoch": 0.3129169193450576, "grad_norm": 0.28889134526252747, "learning_rate": 4.340660223739352e-05, "loss": 0.1996, "step": 17544 }, { "epoch": 0.3129347554667713, "grad_norm": 0.20531803369522095, "learning_rate": 4.34055489218898e-05, "loss": 0.1715, "step": 17545 }, { "epoch": 0.312952591588485, "grad_norm": 0.2323165386915207, "learning_rate": 4.3404495535039814e-05, "loss": 0.1754, "step": 17546 }, { "epoch": 0.3129704277101987, "grad_norm": 0.2058759480714798, "learning_rate": 4.340344207684765e-05, "loss": 0.1793, "step": 17547 }, { "epoch": 0.3129882638319124, "grad_norm": 0.28017207980155945, "learning_rate": 4.340238854731738e-05, "loss": 0.1798, "step": 17548 }, { "epoch": 0.3130060999536261, "grad_norm": 0.32554730772972107, "learning_rate": 4.340133494645311e-05, "loss": 0.1691, "step": 17549 }, { "epoch": 0.3130239360753398, "grad_norm": 0.2488563358783722, "learning_rate": 4.34002812742589e-05, "loss": 0.1813, "step": 17550 }, { "epoch": 0.3130417721970535, "grad_norm": 0.2750335931777954, "learning_rate": 4.339922753073885e-05, "loss": 0.1705, "step": 17551 }, { "epoch": 0.31305960831876717, "grad_norm": 0.20450814068317413, "learning_rate": 4.339817371589704e-05, "loss": 0.1333, "step": 17552 }, { "epoch": 0.31307744444048086, "grad_norm": 0.23762629926204681, "learning_rate": 4.3397119829737555e-05, "loss": 0.1686, "step": 17553 }, { "epoch": 0.31309528056219454, "grad_norm": 0.18951566517353058, "learning_rate": 4.339606587226447e-05, "loss": 0.1309, "step": 17554 }, { "epoch": 0.31311311668390823, "grad_norm": 0.3012663722038269, "learning_rate": 4.3395011843481884e-05, "loss": 0.168, "step": 17555 }, { "epoch": 0.3131309528056219, "grad_norm": 0.2693444788455963, "learning_rate": 4.3393957743393886e-05, "loss": 0.1865, "step": 17556 }, { "epoch": 0.31314878892733566, "grad_norm": 0.27569329738616943, "learning_rate": 4.3392903572004545e-05, "loss": 0.187, "step": 17557 }, { "epoch": 0.31316662504904935, "grad_norm": 0.2794274389743805, "learning_rate": 4.339184932931796e-05, "loss": 0.2003, "step": 17558 }, { "epoch": 0.31318446117076304, "grad_norm": 0.2437286674976349, "learning_rate": 4.339079501533821e-05, "loss": 0.1588, "step": 17559 }, { "epoch": 0.31320229729247673, "grad_norm": 0.2621024250984192, "learning_rate": 4.33897406300694e-05, "loss": 0.2289, "step": 17560 }, { "epoch": 0.3132201334141904, "grad_norm": 0.24582625925540924, "learning_rate": 4.3388686173515596e-05, "loss": 0.1654, "step": 17561 }, { "epoch": 0.3132379695359041, "grad_norm": 0.23326288163661957, "learning_rate": 4.338763164568089e-05, "loss": 0.1669, "step": 17562 }, { "epoch": 0.3132558056576178, "grad_norm": 0.1829545646905899, "learning_rate": 4.3386577046569376e-05, "loss": 0.1585, "step": 17563 }, { "epoch": 0.3132736417793315, "grad_norm": 0.223091259598732, "learning_rate": 4.338552237618514e-05, "loss": 0.1699, "step": 17564 }, { "epoch": 0.3132914779010452, "grad_norm": 0.2604628801345825, "learning_rate": 4.338446763453226e-05, "loss": 0.195, "step": 17565 }, { "epoch": 0.3133093140227589, "grad_norm": 0.2905992269515991, "learning_rate": 4.338341282161485e-05, "loss": 0.1771, "step": 17566 }, { "epoch": 0.3133271501444726, "grad_norm": 0.4702034890651703, "learning_rate": 4.338235793743697e-05, "loss": 0.1737, "step": 17567 }, { "epoch": 0.3133449862661863, "grad_norm": 0.23991736769676208, "learning_rate": 4.338130298200273e-05, "loss": 0.1416, "step": 17568 }, { "epoch": 0.3133628223879, "grad_norm": 1.385790228843689, "learning_rate": 4.338024795531621e-05, "loss": 0.1783, "step": 17569 }, { "epoch": 0.31338065850961366, "grad_norm": 0.26064804196357727, "learning_rate": 4.337919285738149e-05, "loss": 0.1635, "step": 17570 }, { "epoch": 0.31339849463132735, "grad_norm": 0.4337914288043976, "learning_rate": 4.337813768820268e-05, "loss": 0.2102, "step": 17571 }, { "epoch": 0.31341633075304104, "grad_norm": 0.2873128652572632, "learning_rate": 4.337708244778386e-05, "loss": 0.1841, "step": 17572 }, { "epoch": 0.3134341668747547, "grad_norm": 0.21467001736164093, "learning_rate": 4.337602713612912e-05, "loss": 0.1805, "step": 17573 }, { "epoch": 0.31345200299646847, "grad_norm": 0.2837502658367157, "learning_rate": 4.337497175324255e-05, "loss": 0.128, "step": 17574 }, { "epoch": 0.31346983911818216, "grad_norm": 0.2778398394584656, "learning_rate": 4.337391629912825e-05, "loss": 0.138, "step": 17575 }, { "epoch": 0.31348767523989585, "grad_norm": 0.34074631333351135, "learning_rate": 4.3372860773790296e-05, "loss": 0.1775, "step": 17576 }, { "epoch": 0.31350551136160953, "grad_norm": 0.2707691192626953, "learning_rate": 4.3371805177232785e-05, "loss": 0.1607, "step": 17577 }, { "epoch": 0.3135233474833232, "grad_norm": 0.2902049422264099, "learning_rate": 4.337074950945982e-05, "loss": 0.1887, "step": 17578 }, { "epoch": 0.3135411836050369, "grad_norm": 0.3427014648914337, "learning_rate": 4.336969377047548e-05, "loss": 0.2461, "step": 17579 }, { "epoch": 0.3135590197267506, "grad_norm": 0.2717701494693756, "learning_rate": 4.336863796028387e-05, "loss": 0.1928, "step": 17580 }, { "epoch": 0.3135768558484643, "grad_norm": 0.27019140124320984, "learning_rate": 4.336758207888907e-05, "loss": 0.1902, "step": 17581 }, { "epoch": 0.31359469197017803, "grad_norm": 0.24754968285560608, "learning_rate": 4.336652612629517e-05, "loss": 0.1503, "step": 17582 }, { "epoch": 0.3136125280918917, "grad_norm": 0.22130009531974792, "learning_rate": 4.336547010250628e-05, "loss": 0.1346, "step": 17583 }, { "epoch": 0.3136303642136054, "grad_norm": 0.26831161975860596, "learning_rate": 4.336441400752649e-05, "loss": 0.1379, "step": 17584 }, { "epoch": 0.3136482003353191, "grad_norm": 0.23630496859550476, "learning_rate": 4.3363357841359874e-05, "loss": 0.1802, "step": 17585 }, { "epoch": 0.3136660364570328, "grad_norm": 0.32348525524139404, "learning_rate": 4.3362301604010554e-05, "loss": 0.1893, "step": 17586 }, { "epoch": 0.31368387257874647, "grad_norm": 0.23855532705783844, "learning_rate": 4.33612452954826e-05, "loss": 0.206, "step": 17587 }, { "epoch": 0.31370170870046016, "grad_norm": 0.3500916659832001, "learning_rate": 4.3360188915780126e-05, "loss": 0.2904, "step": 17588 }, { "epoch": 0.31371954482217385, "grad_norm": 0.3429223597049713, "learning_rate": 4.335913246490722e-05, "loss": 0.2181, "step": 17589 }, { "epoch": 0.31373738094388753, "grad_norm": 0.2651059031486511, "learning_rate": 4.335807594286797e-05, "loss": 0.1865, "step": 17590 }, { "epoch": 0.3137552170656013, "grad_norm": 0.34736937284469604, "learning_rate": 4.335701934966647e-05, "loss": 0.2246, "step": 17591 }, { "epoch": 0.31377305318731497, "grad_norm": 0.37737807631492615, "learning_rate": 4.3355962685306825e-05, "loss": 0.2229, "step": 17592 }, { "epoch": 0.31379088930902865, "grad_norm": 0.20219923555850983, "learning_rate": 4.335490594979314e-05, "loss": 0.1033, "step": 17593 }, { "epoch": 0.31380872543074234, "grad_norm": 0.2116667926311493, "learning_rate": 4.335384914312949e-05, "loss": 0.1664, "step": 17594 }, { "epoch": 0.31382656155245603, "grad_norm": 0.2642044425010681, "learning_rate": 4.3352792265319987e-05, "loss": 0.1694, "step": 17595 }, { "epoch": 0.3138443976741697, "grad_norm": 0.2847067713737488, "learning_rate": 4.3351735316368726e-05, "loss": 0.1503, "step": 17596 }, { "epoch": 0.3138622337958834, "grad_norm": 0.4029041528701782, "learning_rate": 4.335067829627979e-05, "loss": 0.1862, "step": 17597 }, { "epoch": 0.3138800699175971, "grad_norm": 0.2765880525112152, "learning_rate": 4.334962120505729e-05, "loss": 0.2553, "step": 17598 }, { "epoch": 0.31389790603931084, "grad_norm": 0.23255957663059235, "learning_rate": 4.334856404270532e-05, "loss": 0.1656, "step": 17599 }, { "epoch": 0.3139157421610245, "grad_norm": 0.19842900335788727, "learning_rate": 4.3347506809227984e-05, "loss": 0.1774, "step": 17600 }, { "epoch": 0.3139335782827382, "grad_norm": 0.30295294523239136, "learning_rate": 4.3346449504629375e-05, "loss": 0.2032, "step": 17601 }, { "epoch": 0.3139514144044519, "grad_norm": 0.2216023951768875, "learning_rate": 4.334539212891359e-05, "loss": 0.1693, "step": 17602 }, { "epoch": 0.3139692505261656, "grad_norm": 0.3422231674194336, "learning_rate": 4.3344334682084716e-05, "loss": 0.1765, "step": 17603 }, { "epoch": 0.3139870866478793, "grad_norm": 0.22290359437465668, "learning_rate": 4.334327716414688e-05, "loss": 0.1474, "step": 17604 }, { "epoch": 0.31400492276959296, "grad_norm": 0.2919217348098755, "learning_rate": 4.334221957510416e-05, "loss": 0.185, "step": 17605 }, { "epoch": 0.31402275889130665, "grad_norm": 0.5418250560760498, "learning_rate": 4.334116191496066e-05, "loss": 0.205, "step": 17606 }, { "epoch": 0.3140405950130204, "grad_norm": 0.3279336988925934, "learning_rate": 4.3340104183720484e-05, "loss": 0.1088, "step": 17607 }, { "epoch": 0.3140584311347341, "grad_norm": 0.28544193506240845, "learning_rate": 4.333904638138773e-05, "loss": 0.191, "step": 17608 }, { "epoch": 0.31407626725644777, "grad_norm": 0.19915661215782166, "learning_rate": 4.33379885079665e-05, "loss": 0.129, "step": 17609 }, { "epoch": 0.31409410337816146, "grad_norm": 0.2938247323036194, "learning_rate": 4.333693056346089e-05, "loss": 0.1913, "step": 17610 }, { "epoch": 0.31411193949987515, "grad_norm": 0.23385438323020935, "learning_rate": 4.3335872547875e-05, "loss": 0.1343, "step": 17611 }, { "epoch": 0.31412977562158884, "grad_norm": 0.29365524649620056, "learning_rate": 4.333481446121294e-05, "loss": 0.1626, "step": 17612 }, { "epoch": 0.3141476117433025, "grad_norm": 0.25309011340141296, "learning_rate": 4.3333756303478815e-05, "loss": 0.1856, "step": 17613 }, { "epoch": 0.3141654478650162, "grad_norm": 0.30498093366622925, "learning_rate": 4.33326980746767e-05, "loss": 0.1858, "step": 17614 }, { "epoch": 0.3141832839867299, "grad_norm": 0.26543283462524414, "learning_rate": 4.333163977481073e-05, "loss": 0.1621, "step": 17615 }, { "epoch": 0.31420112010844364, "grad_norm": 0.37338709831237793, "learning_rate": 4.3330581403884984e-05, "loss": 0.1444, "step": 17616 }, { "epoch": 0.31421895623015733, "grad_norm": 0.27330338954925537, "learning_rate": 4.332952296190358e-05, "loss": 0.2034, "step": 17617 }, { "epoch": 0.314236792351871, "grad_norm": 0.46846622228622437, "learning_rate": 4.332846444887061e-05, "loss": 0.1368, "step": 17618 }, { "epoch": 0.3142546284735847, "grad_norm": 0.2901749610900879, "learning_rate": 4.332740586479018e-05, "loss": 0.1834, "step": 17619 }, { "epoch": 0.3142724645952984, "grad_norm": 0.30629175901412964, "learning_rate": 4.33263472096664e-05, "loss": 0.16, "step": 17620 }, { "epoch": 0.3142903007170121, "grad_norm": 0.24373897910118103, "learning_rate": 4.332528848350337e-05, "loss": 0.1846, "step": 17621 }, { "epoch": 0.31430813683872577, "grad_norm": 0.22953234612941742, "learning_rate": 4.3324229686305186e-05, "loss": 0.1867, "step": 17622 }, { "epoch": 0.31432597296043946, "grad_norm": 0.3272095024585724, "learning_rate": 4.332317081807595e-05, "loss": 0.1377, "step": 17623 }, { "epoch": 0.3143438090821532, "grad_norm": 0.2385309487581253, "learning_rate": 4.3322111878819797e-05, "loss": 0.1565, "step": 17624 }, { "epoch": 0.3143616452038669, "grad_norm": 0.20896194875240326, "learning_rate": 4.33210528685408e-05, "loss": 0.1624, "step": 17625 }, { "epoch": 0.3143794813255806, "grad_norm": 0.3582569658756256, "learning_rate": 4.3319993787243066e-05, "loss": 0.2019, "step": 17626 }, { "epoch": 0.31439731744729427, "grad_norm": 0.27730584144592285, "learning_rate": 4.3318934634930716e-05, "loss": 0.1937, "step": 17627 }, { "epoch": 0.31441515356900795, "grad_norm": 0.2537687122821808, "learning_rate": 4.3317875411607853e-05, "loss": 0.18, "step": 17628 }, { "epoch": 0.31443298969072164, "grad_norm": 0.26678311824798584, "learning_rate": 4.331681611727857e-05, "loss": 0.1561, "step": 17629 }, { "epoch": 0.31445082581243533, "grad_norm": 0.2528023421764374, "learning_rate": 4.331575675194698e-05, "loss": 0.1749, "step": 17630 }, { "epoch": 0.314468661934149, "grad_norm": 0.22930273413658142, "learning_rate": 4.33146973156172e-05, "loss": 0.1321, "step": 17631 }, { "epoch": 0.3144864980558627, "grad_norm": 0.22550135850906372, "learning_rate": 4.3313637808293326e-05, "loss": 0.1569, "step": 17632 }, { "epoch": 0.31450433417757645, "grad_norm": 0.3391514718532562, "learning_rate": 4.331257822997946e-05, "loss": 0.1693, "step": 17633 }, { "epoch": 0.31452217029929014, "grad_norm": 0.3209320306777954, "learning_rate": 4.331151858067972e-05, "loss": 0.192, "step": 17634 }, { "epoch": 0.3145400064210038, "grad_norm": 0.2813419997692108, "learning_rate": 4.331045886039821e-05, "loss": 0.1798, "step": 17635 }, { "epoch": 0.3145578425427175, "grad_norm": 0.20678496360778809, "learning_rate": 4.330939906913904e-05, "loss": 0.1898, "step": 17636 }, { "epoch": 0.3145756786644312, "grad_norm": 0.23105883598327637, "learning_rate": 4.3308339206906303e-05, "loss": 0.1355, "step": 17637 }, { "epoch": 0.3145935147861449, "grad_norm": 0.3000940680503845, "learning_rate": 4.330727927370413e-05, "loss": 0.1435, "step": 17638 }, { "epoch": 0.3146113509078586, "grad_norm": 0.29369547963142395, "learning_rate": 4.330621926953662e-05, "loss": 0.1724, "step": 17639 }, { "epoch": 0.31462918702957227, "grad_norm": 0.4290458559989929, "learning_rate": 4.330515919440787e-05, "loss": 0.1768, "step": 17640 }, { "epoch": 0.314647023151286, "grad_norm": 0.2907050848007202, "learning_rate": 4.330409904832202e-05, "loss": 0.185, "step": 17641 }, { "epoch": 0.3146648592729997, "grad_norm": 0.29109328985214233, "learning_rate": 4.330303883128315e-05, "loss": 0.1892, "step": 17642 }, { "epoch": 0.3146826953947134, "grad_norm": 0.19267438352108002, "learning_rate": 4.3301978543295375e-05, "loss": 0.1169, "step": 17643 }, { "epoch": 0.3147005315164271, "grad_norm": 0.24215887486934662, "learning_rate": 4.330091818436281e-05, "loss": 0.1646, "step": 17644 }, { "epoch": 0.31471836763814076, "grad_norm": 0.30494052171707153, "learning_rate": 4.329985775448957e-05, "loss": 0.1715, "step": 17645 }, { "epoch": 0.31473620375985445, "grad_norm": 0.3105016052722931, "learning_rate": 4.3298797253679766e-05, "loss": 0.1915, "step": 17646 }, { "epoch": 0.31475403988156814, "grad_norm": 0.27297598123550415, "learning_rate": 4.3297736681937494e-05, "loss": 0.1255, "step": 17647 }, { "epoch": 0.3147718760032818, "grad_norm": 0.24972057342529297, "learning_rate": 4.329667603926688e-05, "loss": 0.1931, "step": 17648 }, { "epoch": 0.3147897121249955, "grad_norm": 0.20854078233242035, "learning_rate": 4.3295615325672026e-05, "loss": 0.1909, "step": 17649 }, { "epoch": 0.31480754824670926, "grad_norm": 0.27245384454727173, "learning_rate": 4.329455454115705e-05, "loss": 0.1602, "step": 17650 }, { "epoch": 0.31482538436842294, "grad_norm": 0.38677680492401123, "learning_rate": 4.329349368572606e-05, "loss": 0.2028, "step": 17651 }, { "epoch": 0.31484322049013663, "grad_norm": 0.28773415088653564, "learning_rate": 4.329243275938317e-05, "loss": 0.1821, "step": 17652 }, { "epoch": 0.3148610566118503, "grad_norm": 0.2797043025493622, "learning_rate": 4.32913717621325e-05, "loss": 0.2245, "step": 17653 }, { "epoch": 0.314878892733564, "grad_norm": 0.29197850823402405, "learning_rate": 4.3290310693978155e-05, "loss": 0.1761, "step": 17654 }, { "epoch": 0.3148967288552777, "grad_norm": 0.29030150175094604, "learning_rate": 4.3289249554924236e-05, "loss": 0.1636, "step": 17655 }, { "epoch": 0.3149145649769914, "grad_norm": 0.3648754954338074, "learning_rate": 4.328818834497488e-05, "loss": 0.2098, "step": 17656 }, { "epoch": 0.3149324010987051, "grad_norm": 0.31052032113075256, "learning_rate": 4.3287127064134185e-05, "loss": 0.1907, "step": 17657 }, { "epoch": 0.3149502372204188, "grad_norm": 0.31157252192497253, "learning_rate": 4.328606571240627e-05, "loss": 0.13, "step": 17658 }, { "epoch": 0.3149680733421325, "grad_norm": 0.26587140560150146, "learning_rate": 4.328500428979525e-05, "loss": 0.1884, "step": 17659 }, { "epoch": 0.3149859094638462, "grad_norm": 0.2714605927467346, "learning_rate": 4.328394279630524e-05, "loss": 0.2003, "step": 17660 }, { "epoch": 0.3150037455855599, "grad_norm": 0.22059091925621033, "learning_rate": 4.328288123194034e-05, "loss": 0.1353, "step": 17661 }, { "epoch": 0.31502158170727357, "grad_norm": 0.268187940120697, "learning_rate": 4.3281819596704694e-05, "loss": 0.1595, "step": 17662 }, { "epoch": 0.31503941782898726, "grad_norm": 0.22585149109363556, "learning_rate": 4.3280757890602394e-05, "loss": 0.1785, "step": 17663 }, { "epoch": 0.31505725395070094, "grad_norm": 0.36876484751701355, "learning_rate": 4.327969611363756e-05, "loss": 0.2152, "step": 17664 }, { "epoch": 0.31507509007241463, "grad_norm": 0.2486571967601776, "learning_rate": 4.327863426581431e-05, "loss": 0.186, "step": 17665 }, { "epoch": 0.3150929261941284, "grad_norm": 0.4804657995700836, "learning_rate": 4.3277572347136766e-05, "loss": 0.1796, "step": 17666 }, { "epoch": 0.31511076231584206, "grad_norm": 0.3148822784423828, "learning_rate": 4.3276510357609035e-05, "loss": 0.155, "step": 17667 }, { "epoch": 0.31512859843755575, "grad_norm": 0.2525266408920288, "learning_rate": 4.3275448297235246e-05, "loss": 0.133, "step": 17668 }, { "epoch": 0.31514643455926944, "grad_norm": 0.2096313238143921, "learning_rate": 4.3274386166019496e-05, "loss": 0.1442, "step": 17669 }, { "epoch": 0.3151642706809831, "grad_norm": 0.24418789148330688, "learning_rate": 4.3273323963965914e-05, "loss": 0.1286, "step": 17670 }, { "epoch": 0.3151821068026968, "grad_norm": 0.32168519496917725, "learning_rate": 4.327226169107862e-05, "loss": 0.1855, "step": 17671 }, { "epoch": 0.3151999429244105, "grad_norm": 0.2948327362537384, "learning_rate": 4.327119934736173e-05, "loss": 0.1876, "step": 17672 }, { "epoch": 0.3152177790461242, "grad_norm": 0.28940072655677795, "learning_rate": 4.327013693281936e-05, "loss": 0.0912, "step": 17673 }, { "epoch": 0.3152356151678379, "grad_norm": 0.2675217092037201, "learning_rate": 4.326907444745563e-05, "loss": 0.1323, "step": 17674 }, { "epoch": 0.3152534512895516, "grad_norm": 0.4579310715198517, "learning_rate": 4.3268011891274654e-05, "loss": 0.2177, "step": 17675 }, { "epoch": 0.3152712874112653, "grad_norm": 0.2734963297843933, "learning_rate": 4.326694926428055e-05, "loss": 0.1759, "step": 17676 }, { "epoch": 0.315289123532979, "grad_norm": 0.26294007897377014, "learning_rate": 4.326588656647745e-05, "loss": 0.1719, "step": 17677 }, { "epoch": 0.3153069596546927, "grad_norm": 0.29418545961380005, "learning_rate": 4.3264823797869463e-05, "loss": 0.1734, "step": 17678 }, { "epoch": 0.3153247957764064, "grad_norm": 0.2187463343143463, "learning_rate": 4.326376095846071e-05, "loss": 0.1579, "step": 17679 }, { "epoch": 0.31534263189812006, "grad_norm": 0.2335316389799118, "learning_rate": 4.3262698048255314e-05, "loss": 0.1878, "step": 17680 }, { "epoch": 0.31536046801983375, "grad_norm": 0.33935675024986267, "learning_rate": 4.3261635067257386e-05, "loss": 0.1843, "step": 17681 }, { "epoch": 0.31537830414154744, "grad_norm": 0.2613784372806549, "learning_rate": 4.326057201547106e-05, "loss": 0.1872, "step": 17682 }, { "epoch": 0.3153961402632612, "grad_norm": 0.22758524119853973, "learning_rate": 4.325950889290045e-05, "loss": 0.1857, "step": 17683 }, { "epoch": 0.31541397638497487, "grad_norm": 0.2417377084493637, "learning_rate": 4.325844569954967e-05, "loss": 0.2008, "step": 17684 }, { "epoch": 0.31543181250668856, "grad_norm": 0.2967318594455719, "learning_rate": 4.325738243542285e-05, "loss": 0.1754, "step": 17685 }, { "epoch": 0.31544964862840225, "grad_norm": 0.3004534840583801, "learning_rate": 4.3256319100524115e-05, "loss": 0.1342, "step": 17686 }, { "epoch": 0.31546748475011593, "grad_norm": 0.25647005438804626, "learning_rate": 4.325525569485758e-05, "loss": 0.1879, "step": 17687 }, { "epoch": 0.3154853208718296, "grad_norm": 0.22697731852531433, "learning_rate": 4.325419221842736e-05, "loss": 0.1732, "step": 17688 }, { "epoch": 0.3155031569935433, "grad_norm": 0.2968234121799469, "learning_rate": 4.32531286712376e-05, "loss": 0.1347, "step": 17689 }, { "epoch": 0.315520993115257, "grad_norm": 0.2540208697319031, "learning_rate": 4.32520650532924e-05, "loss": 0.1925, "step": 17690 }, { "epoch": 0.3155388292369707, "grad_norm": 0.2718205451965332, "learning_rate": 4.325100136459589e-05, "loss": 0.1272, "step": 17691 }, { "epoch": 0.31555666535868443, "grad_norm": 0.3270924687385559, "learning_rate": 4.32499376051522e-05, "loss": 0.1611, "step": 17692 }, { "epoch": 0.3155745014803981, "grad_norm": 0.22599878907203674, "learning_rate": 4.324887377496545e-05, "loss": 0.1767, "step": 17693 }, { "epoch": 0.3155923376021118, "grad_norm": 0.21597085893154144, "learning_rate": 4.324780987403976e-05, "loss": 0.1682, "step": 17694 }, { "epoch": 0.3156101737238255, "grad_norm": 0.2529672384262085, "learning_rate": 4.3246745902379256e-05, "loss": 0.1325, "step": 17695 }, { "epoch": 0.3156280098455392, "grad_norm": 0.2538890242576599, "learning_rate": 4.3245681859988065e-05, "loss": 0.1582, "step": 17696 }, { "epoch": 0.31564584596725287, "grad_norm": 0.1813114881515503, "learning_rate": 4.32446177468703e-05, "loss": 0.121, "step": 17697 }, { "epoch": 0.31566368208896656, "grad_norm": 0.22774241864681244, "learning_rate": 4.3243553563030103e-05, "loss": 0.1608, "step": 17698 }, { "epoch": 0.31568151821068025, "grad_norm": 0.2458595484495163, "learning_rate": 4.324248930847159e-05, "loss": 0.1473, "step": 17699 }, { "epoch": 0.315699354332394, "grad_norm": 0.4012024700641632, "learning_rate": 4.324142498319889e-05, "loss": 0.2038, "step": 17700 }, { "epoch": 0.3157171904541077, "grad_norm": 0.20740777254104614, "learning_rate": 4.3240360587216125e-05, "loss": 0.1125, "step": 17701 }, { "epoch": 0.31573502657582136, "grad_norm": 0.3383517861366272, "learning_rate": 4.323929612052742e-05, "loss": 0.1655, "step": 17702 }, { "epoch": 0.31575286269753505, "grad_norm": 0.20333009958267212, "learning_rate": 4.32382315831369e-05, "loss": 0.1242, "step": 17703 }, { "epoch": 0.31577069881924874, "grad_norm": 0.36358100175857544, "learning_rate": 4.32371669750487e-05, "loss": 0.1657, "step": 17704 }, { "epoch": 0.31578853494096243, "grad_norm": 0.27666720747947693, "learning_rate": 4.323610229626695e-05, "loss": 0.2082, "step": 17705 }, { "epoch": 0.3158063710626761, "grad_norm": 0.2461199015378952, "learning_rate": 4.323503754679576e-05, "loss": 0.1647, "step": 17706 }, { "epoch": 0.3158242071843898, "grad_norm": 0.3557877540588379, "learning_rate": 4.323397272663927e-05, "loss": 0.2248, "step": 17707 }, { "epoch": 0.31584204330610355, "grad_norm": 0.22189338505268097, "learning_rate": 4.32329078358016e-05, "loss": 0.1736, "step": 17708 }, { "epoch": 0.31585987942781724, "grad_norm": 0.2568211555480957, "learning_rate": 4.323184287428688e-05, "loss": 0.2251, "step": 17709 }, { "epoch": 0.3158777155495309, "grad_norm": 0.1955392062664032, "learning_rate": 4.323077784209925e-05, "loss": 0.1407, "step": 17710 }, { "epoch": 0.3158955516712446, "grad_norm": 0.2459288388490677, "learning_rate": 4.322971273924282e-05, "loss": 0.1621, "step": 17711 }, { "epoch": 0.3159133877929583, "grad_norm": 0.21687082946300507, "learning_rate": 4.322864756572173e-05, "loss": 0.1394, "step": 17712 }, { "epoch": 0.315931223914672, "grad_norm": 0.34187471866607666, "learning_rate": 4.32275823215401e-05, "loss": 0.2061, "step": 17713 }, { "epoch": 0.3159490600363857, "grad_norm": 0.25744685530662537, "learning_rate": 4.3226517006702074e-05, "loss": 0.1817, "step": 17714 }, { "epoch": 0.31596689615809936, "grad_norm": 0.3716115951538086, "learning_rate": 4.322545162121177e-05, "loss": 0.1637, "step": 17715 }, { "epoch": 0.31598473227981305, "grad_norm": 0.31630340218544006, "learning_rate": 4.322438616507332e-05, "loss": 0.2016, "step": 17716 }, { "epoch": 0.3160025684015268, "grad_norm": 0.35728713870048523, "learning_rate": 4.322332063829085e-05, "loss": 0.1806, "step": 17717 }, { "epoch": 0.3160204045232405, "grad_norm": 0.33885928988456726, "learning_rate": 4.32222550408685e-05, "loss": 0.2021, "step": 17718 }, { "epoch": 0.31603824064495417, "grad_norm": 0.3298684060573578, "learning_rate": 4.3221189372810387e-05, "loss": 0.2422, "step": 17719 }, { "epoch": 0.31605607676666786, "grad_norm": 0.20445676147937775, "learning_rate": 4.322012363412067e-05, "loss": 0.1507, "step": 17720 }, { "epoch": 0.31607391288838155, "grad_norm": 0.2581339478492737, "learning_rate": 4.3219057824803445e-05, "loss": 0.2137, "step": 17721 }, { "epoch": 0.31609174901009524, "grad_norm": 0.3025096356868744, "learning_rate": 4.321799194486286e-05, "loss": 0.193, "step": 17722 }, { "epoch": 0.3161095851318089, "grad_norm": 0.27923035621643066, "learning_rate": 4.321692599430305e-05, "loss": 0.2062, "step": 17723 }, { "epoch": 0.3161274212535226, "grad_norm": 0.22612594068050385, "learning_rate": 4.321585997312815e-05, "loss": 0.1573, "step": 17724 }, { "epoch": 0.31614525737523635, "grad_norm": 0.32120323181152344, "learning_rate": 4.3214793881342273e-05, "loss": 0.1743, "step": 17725 }, { "epoch": 0.31616309349695004, "grad_norm": 0.32446640729904175, "learning_rate": 4.321372771894957e-05, "loss": 0.1627, "step": 17726 }, { "epoch": 0.31618092961866373, "grad_norm": 0.2871161103248596, "learning_rate": 4.3212661485954166e-05, "loss": 0.2228, "step": 17727 }, { "epoch": 0.3161987657403774, "grad_norm": 0.29146233201026917, "learning_rate": 4.3211595182360194e-05, "loss": 0.1833, "step": 17728 }, { "epoch": 0.3162166018620911, "grad_norm": 0.2407662272453308, "learning_rate": 4.321052880817179e-05, "loss": 0.1793, "step": 17729 }, { "epoch": 0.3162344379838048, "grad_norm": 0.3406382203102112, "learning_rate": 4.320946236339308e-05, "loss": 0.2022, "step": 17730 }, { "epoch": 0.3162522741055185, "grad_norm": 0.21744000911712646, "learning_rate": 4.3208395848028215e-05, "loss": 0.1589, "step": 17731 }, { "epoch": 0.31627011022723217, "grad_norm": 0.3789297938346863, "learning_rate": 4.320732926208132e-05, "loss": 0.1698, "step": 17732 }, { "epoch": 0.31628794634894586, "grad_norm": 0.31260377168655396, "learning_rate": 4.320626260555652e-05, "loss": 0.1602, "step": 17733 }, { "epoch": 0.3163057824706596, "grad_norm": 0.25988101959228516, "learning_rate": 4.320519587845796e-05, "loss": 0.1633, "step": 17734 }, { "epoch": 0.3163236185923733, "grad_norm": 0.26145365834236145, "learning_rate": 4.320412908078978e-05, "loss": 0.1523, "step": 17735 }, { "epoch": 0.316341454714087, "grad_norm": 0.24997448921203613, "learning_rate": 4.32030622125561e-05, "loss": 0.1667, "step": 17736 }, { "epoch": 0.31635929083580067, "grad_norm": 0.2432536482810974, "learning_rate": 4.3201995273761066e-05, "loss": 0.1629, "step": 17737 }, { "epoch": 0.31637712695751435, "grad_norm": 0.2759473919868469, "learning_rate": 4.3200928264408814e-05, "loss": 0.187, "step": 17738 }, { "epoch": 0.31639496307922804, "grad_norm": 0.29780685901641846, "learning_rate": 4.3199861184503474e-05, "loss": 0.1673, "step": 17739 }, { "epoch": 0.31641279920094173, "grad_norm": 0.3092334270477295, "learning_rate": 4.319879403404919e-05, "loss": 0.2258, "step": 17740 }, { "epoch": 0.3164306353226554, "grad_norm": 0.32397884130477905, "learning_rate": 4.3197726813050086e-05, "loss": 0.2177, "step": 17741 }, { "epoch": 0.31644847144436916, "grad_norm": 0.2735349237918854, "learning_rate": 4.319665952151032e-05, "loss": 0.1838, "step": 17742 }, { "epoch": 0.31646630756608285, "grad_norm": 0.20680665969848633, "learning_rate": 4.3195592159434005e-05, "loss": 0.1378, "step": 17743 }, { "epoch": 0.31648414368779654, "grad_norm": 0.2287045568227768, "learning_rate": 4.31945247268253e-05, "loss": 0.1724, "step": 17744 }, { "epoch": 0.3165019798095102, "grad_norm": 0.3623093366622925, "learning_rate": 4.3193457223688325e-05, "loss": 0.2417, "step": 17745 }, { "epoch": 0.3165198159312239, "grad_norm": 0.2668391466140747, "learning_rate": 4.319238965002723e-05, "loss": 0.1979, "step": 17746 }, { "epoch": 0.3165376520529376, "grad_norm": 0.28566113114356995, "learning_rate": 4.319132200584615e-05, "loss": 0.132, "step": 17747 }, { "epoch": 0.3165554881746513, "grad_norm": 0.2487669438123703, "learning_rate": 4.3190254291149225e-05, "loss": 0.1735, "step": 17748 }, { "epoch": 0.316573324296365, "grad_norm": 0.27945441007614136, "learning_rate": 4.318918650594059e-05, "loss": 0.1551, "step": 17749 }, { "epoch": 0.31659116041807867, "grad_norm": 0.2320530265569687, "learning_rate": 4.318811865022438e-05, "loss": 0.1885, "step": 17750 }, { "epoch": 0.3166089965397924, "grad_norm": 0.22671692073345184, "learning_rate": 4.318705072400474e-05, "loss": 0.1763, "step": 17751 }, { "epoch": 0.3166268326615061, "grad_norm": 0.31538254022598267, "learning_rate": 4.318598272728582e-05, "loss": 0.2117, "step": 17752 }, { "epoch": 0.3166446687832198, "grad_norm": 0.29472115635871887, "learning_rate": 4.318491466007174e-05, "loss": 0.1251, "step": 17753 }, { "epoch": 0.3166625049049335, "grad_norm": 0.24860672652721405, "learning_rate": 4.318384652236665e-05, "loss": 0.2087, "step": 17754 }, { "epoch": 0.31668034102664716, "grad_norm": 0.2869946360588074, "learning_rate": 4.31827783141747e-05, "loss": 0.2165, "step": 17755 }, { "epoch": 0.31669817714836085, "grad_norm": 0.2744007706642151, "learning_rate": 4.318171003550001e-05, "loss": 0.2106, "step": 17756 }, { "epoch": 0.31671601327007454, "grad_norm": 0.1817733347415924, "learning_rate": 4.318064168634675e-05, "loss": 0.1692, "step": 17757 }, { "epoch": 0.3167338493917882, "grad_norm": 0.29223644733428955, "learning_rate": 4.317957326671902e-05, "loss": 0.1847, "step": 17758 }, { "epoch": 0.31675168551350197, "grad_norm": 0.3327445387840271, "learning_rate": 4.3178504776621e-05, "loss": 0.1526, "step": 17759 }, { "epoch": 0.31676952163521566, "grad_norm": 0.2552843391895294, "learning_rate": 4.317743621605681e-05, "loss": 0.1326, "step": 17760 }, { "epoch": 0.31678735775692934, "grad_norm": 0.255779892206192, "learning_rate": 4.3176367585030605e-05, "loss": 0.2056, "step": 17761 }, { "epoch": 0.31680519387864303, "grad_norm": 0.31038182973861694, "learning_rate": 4.317529888354652e-05, "loss": 0.1719, "step": 17762 }, { "epoch": 0.3168230300003567, "grad_norm": 0.2724645435810089, "learning_rate": 4.317423011160869e-05, "loss": 0.2022, "step": 17763 }, { "epoch": 0.3168408661220704, "grad_norm": 0.20053817331790924, "learning_rate": 4.317316126922127e-05, "loss": 0.158, "step": 17764 }, { "epoch": 0.3168587022437841, "grad_norm": 0.30450016260147095, "learning_rate": 4.317209235638841e-05, "loss": 0.1726, "step": 17765 }, { "epoch": 0.3168765383654978, "grad_norm": 0.2536017596721649, "learning_rate": 4.317102337311424e-05, "loss": 0.1745, "step": 17766 }, { "epoch": 0.3168943744872115, "grad_norm": 0.2314152866601944, "learning_rate": 4.3169954319402906e-05, "loss": 0.1474, "step": 17767 }, { "epoch": 0.3169122106089252, "grad_norm": 0.18448486924171448, "learning_rate": 4.316888519525855e-05, "loss": 0.1371, "step": 17768 }, { "epoch": 0.3169300467306389, "grad_norm": 0.239515021443367, "learning_rate": 4.3167816000685325e-05, "loss": 0.114, "step": 17769 }, { "epoch": 0.3169478828523526, "grad_norm": 0.28303033113479614, "learning_rate": 4.316674673568736e-05, "loss": 0.177, "step": 17770 }, { "epoch": 0.3169657189740663, "grad_norm": 0.2584919035434723, "learning_rate": 4.3165677400268824e-05, "loss": 0.1718, "step": 17771 }, { "epoch": 0.31698355509577997, "grad_norm": 0.25381705164909363, "learning_rate": 4.316460799443383e-05, "loss": 0.1954, "step": 17772 }, { "epoch": 0.31700139121749366, "grad_norm": 0.28671392798423767, "learning_rate": 4.3163538518186566e-05, "loss": 0.1434, "step": 17773 }, { "epoch": 0.31701922733920734, "grad_norm": 0.34216809272766113, "learning_rate": 4.3162468971531135e-05, "loss": 0.1491, "step": 17774 }, { "epoch": 0.31703706346092103, "grad_norm": 0.2896101772785187, "learning_rate": 4.31613993544717e-05, "loss": 0.1783, "step": 17775 }, { "epoch": 0.3170548995826348, "grad_norm": 0.34536120295524597, "learning_rate": 4.3160329667012425e-05, "loss": 0.2219, "step": 17776 }, { "epoch": 0.31707273570434846, "grad_norm": 0.2488478720188141, "learning_rate": 4.3159259909157427e-05, "loss": 0.1626, "step": 17777 }, { "epoch": 0.31709057182606215, "grad_norm": 0.2681307792663574, "learning_rate": 4.3158190080910866e-05, "loss": 0.1798, "step": 17778 }, { "epoch": 0.31710840794777584, "grad_norm": 0.4173508882522583, "learning_rate": 4.315712018227689e-05, "loss": 0.1876, "step": 17779 }, { "epoch": 0.3171262440694895, "grad_norm": 0.27446678280830383, "learning_rate": 4.315605021325965e-05, "loss": 0.2041, "step": 17780 }, { "epoch": 0.3171440801912032, "grad_norm": 0.3628494143486023, "learning_rate": 4.315498017386328e-05, "loss": 0.1822, "step": 17781 }, { "epoch": 0.3171619163129169, "grad_norm": 0.23815074563026428, "learning_rate": 4.315391006409194e-05, "loss": 0.1902, "step": 17782 }, { "epoch": 0.3171797524346306, "grad_norm": 0.2864932119846344, "learning_rate": 4.315283988394977e-05, "loss": 0.1888, "step": 17783 }, { "epoch": 0.31719758855634433, "grad_norm": 0.3273811638355255, "learning_rate": 4.315176963344093e-05, "loss": 0.2123, "step": 17784 }, { "epoch": 0.317215424678058, "grad_norm": 0.2772671580314636, "learning_rate": 4.315069931256957e-05, "loss": 0.1828, "step": 17785 }, { "epoch": 0.3172332607997717, "grad_norm": 0.26042822003364563, "learning_rate": 4.3149628921339815e-05, "loss": 0.1836, "step": 17786 }, { "epoch": 0.3172510969214854, "grad_norm": 0.3124202787876129, "learning_rate": 4.314855845975583e-05, "loss": 0.1257, "step": 17787 }, { "epoch": 0.3172689330431991, "grad_norm": 0.2609250545501709, "learning_rate": 4.3147487927821775e-05, "loss": 0.1878, "step": 17788 }, { "epoch": 0.3172867691649128, "grad_norm": 0.4113426208496094, "learning_rate": 4.3146417325541776e-05, "loss": 0.1534, "step": 17789 }, { "epoch": 0.31730460528662646, "grad_norm": 0.2329111248254776, "learning_rate": 4.314534665292001e-05, "loss": 0.1401, "step": 17790 }, { "epoch": 0.31732244140834015, "grad_norm": 0.29550719261169434, "learning_rate": 4.3144275909960595e-05, "loss": 0.2085, "step": 17791 }, { "epoch": 0.31734027753005384, "grad_norm": 0.2701817750930786, "learning_rate": 4.3143205096667714e-05, "loss": 0.1978, "step": 17792 }, { "epoch": 0.3173581136517676, "grad_norm": 0.27645787596702576, "learning_rate": 4.314213421304549e-05, "loss": 0.1815, "step": 17793 }, { "epoch": 0.31737594977348127, "grad_norm": 0.36288371682167053, "learning_rate": 4.31410632590981e-05, "loss": 0.1781, "step": 17794 }, { "epoch": 0.31739378589519496, "grad_norm": 0.22071725130081177, "learning_rate": 4.313999223482969e-05, "loss": 0.1495, "step": 17795 }, { "epoch": 0.31741162201690865, "grad_norm": 0.3010807931423187, "learning_rate": 4.313892114024439e-05, "loss": 0.1505, "step": 17796 }, { "epoch": 0.31742945813862233, "grad_norm": 0.4616325795650482, "learning_rate": 4.313784997534637e-05, "loss": 0.1538, "step": 17797 }, { "epoch": 0.317447294260336, "grad_norm": 0.36488229036331177, "learning_rate": 4.3136778740139785e-05, "loss": 0.181, "step": 17798 }, { "epoch": 0.3174651303820497, "grad_norm": 0.27671733498573303, "learning_rate": 4.313570743462877e-05, "loss": 0.1997, "step": 17799 }, { "epoch": 0.3174829665037634, "grad_norm": 0.3063640892505646, "learning_rate": 4.3134636058817504e-05, "loss": 0.1537, "step": 17800 }, { "epoch": 0.31750080262547714, "grad_norm": 0.33603960275650024, "learning_rate": 4.313356461271011e-05, "loss": 0.1663, "step": 17801 }, { "epoch": 0.31751863874719083, "grad_norm": 0.3382076025009155, "learning_rate": 4.3132493096310765e-05, "loss": 0.1612, "step": 17802 }, { "epoch": 0.3175364748689045, "grad_norm": 0.23141314089298248, "learning_rate": 4.3131421509623616e-05, "loss": 0.1684, "step": 17803 }, { "epoch": 0.3175543109906182, "grad_norm": 0.18326106667518616, "learning_rate": 4.3130349852652804e-05, "loss": 0.1437, "step": 17804 }, { "epoch": 0.3175721471123319, "grad_norm": 0.2562151551246643, "learning_rate": 4.31292781254025e-05, "loss": 0.212, "step": 17805 }, { "epoch": 0.3175899832340456, "grad_norm": 0.2702171206474304, "learning_rate": 4.312820632787686e-05, "loss": 0.1528, "step": 17806 }, { "epoch": 0.31760781935575927, "grad_norm": 0.25395190715789795, "learning_rate": 4.312713446008002e-05, "loss": 0.1776, "step": 17807 }, { "epoch": 0.31762565547747296, "grad_norm": 0.2341102808713913, "learning_rate": 4.3126062522016156e-05, "loss": 0.1867, "step": 17808 }, { "epoch": 0.3176434915991867, "grad_norm": 0.2933019995689392, "learning_rate": 4.31249905136894e-05, "loss": 0.2105, "step": 17809 }, { "epoch": 0.3176613277209004, "grad_norm": 0.2942465841770172, "learning_rate": 4.312391843510393e-05, "loss": 0.1832, "step": 17810 }, { "epoch": 0.3176791638426141, "grad_norm": 0.24671952426433563, "learning_rate": 4.31228462862639e-05, "loss": 0.1907, "step": 17811 }, { "epoch": 0.31769699996432776, "grad_norm": 0.2603171169757843, "learning_rate": 4.3121774067173446e-05, "loss": 0.1489, "step": 17812 }, { "epoch": 0.31771483608604145, "grad_norm": 0.35887011885643005, "learning_rate": 4.312070177783674e-05, "loss": 0.1108, "step": 17813 }, { "epoch": 0.31773267220775514, "grad_norm": 0.2506074607372284, "learning_rate": 4.3119629418257936e-05, "loss": 0.1893, "step": 17814 }, { "epoch": 0.31775050832946883, "grad_norm": 0.32645896077156067, "learning_rate": 4.3118556988441185e-05, "loss": 0.1811, "step": 17815 }, { "epoch": 0.3177683444511825, "grad_norm": 0.2704823613166809, "learning_rate": 4.311748448839066e-05, "loss": 0.1686, "step": 17816 }, { "epoch": 0.3177861805728962, "grad_norm": 0.2628668546676636, "learning_rate": 4.311641191811049e-05, "loss": 0.1914, "step": 17817 }, { "epoch": 0.31780401669460995, "grad_norm": 0.31883901357650757, "learning_rate": 4.311533927760487e-05, "loss": 0.161, "step": 17818 }, { "epoch": 0.31782185281632364, "grad_norm": 0.21375906467437744, "learning_rate": 4.311426656687793e-05, "loss": 0.1419, "step": 17819 }, { "epoch": 0.3178396889380373, "grad_norm": 0.30643463134765625, "learning_rate": 4.311319378593383e-05, "loss": 0.1577, "step": 17820 }, { "epoch": 0.317857525059751, "grad_norm": 0.30004793405532837, "learning_rate": 4.311212093477674e-05, "loss": 0.1798, "step": 17821 }, { "epoch": 0.3178753611814647, "grad_norm": 0.29790979623794556, "learning_rate": 4.3111048013410814e-05, "loss": 0.1829, "step": 17822 }, { "epoch": 0.3178931973031784, "grad_norm": 0.24758180975914001, "learning_rate": 4.310997502184021e-05, "loss": 0.1608, "step": 17823 }, { "epoch": 0.3179110334248921, "grad_norm": 0.294776052236557, "learning_rate": 4.310890196006909e-05, "loss": 0.1232, "step": 17824 }, { "epoch": 0.31792886954660576, "grad_norm": 0.3303149938583374, "learning_rate": 4.31078288281016e-05, "loss": 0.2212, "step": 17825 }, { "epoch": 0.3179467056683195, "grad_norm": 0.21274681389331818, "learning_rate": 4.310675562594193e-05, "loss": 0.1606, "step": 17826 }, { "epoch": 0.3179645417900332, "grad_norm": 0.21112030744552612, "learning_rate": 4.310568235359421e-05, "loss": 0.1681, "step": 17827 }, { "epoch": 0.3179823779117469, "grad_norm": 0.2439056932926178, "learning_rate": 4.3104609011062615e-05, "loss": 0.1296, "step": 17828 }, { "epoch": 0.31800021403346057, "grad_norm": 0.2420627623796463, "learning_rate": 4.31035355983513e-05, "loss": 0.192, "step": 17829 }, { "epoch": 0.31801805015517426, "grad_norm": 0.2784542441368103, "learning_rate": 4.310246211546443e-05, "loss": 0.1746, "step": 17830 }, { "epoch": 0.31803588627688795, "grad_norm": 0.27748286724090576, "learning_rate": 4.310138856240616e-05, "loss": 0.1465, "step": 17831 }, { "epoch": 0.31805372239860163, "grad_norm": 0.30073028802871704, "learning_rate": 4.310031493918066e-05, "loss": 0.152, "step": 17832 }, { "epoch": 0.3180715585203153, "grad_norm": 0.3731083273887634, "learning_rate": 4.309924124579209e-05, "loss": 0.111, "step": 17833 }, { "epoch": 0.318089394642029, "grad_norm": 0.6728917956352234, "learning_rate": 4.3098167482244605e-05, "loss": 0.2232, "step": 17834 }, { "epoch": 0.31810723076374275, "grad_norm": 0.21494494378566742, "learning_rate": 4.3097093648542376e-05, "loss": 0.1661, "step": 17835 }, { "epoch": 0.31812506688545644, "grad_norm": 0.20629215240478516, "learning_rate": 4.3096019744689555e-05, "loss": 0.1403, "step": 17836 }, { "epoch": 0.31814290300717013, "grad_norm": 0.2372989058494568, "learning_rate": 4.309494577069032e-05, "loss": 0.1099, "step": 17837 }, { "epoch": 0.3181607391288838, "grad_norm": 0.2816111445426941, "learning_rate": 4.309387172654882e-05, "loss": 0.1788, "step": 17838 }, { "epoch": 0.3181785752505975, "grad_norm": 0.2464326173067093, "learning_rate": 4.309279761226922e-05, "loss": 0.1546, "step": 17839 }, { "epoch": 0.3181964113723112, "grad_norm": 0.26137423515319824, "learning_rate": 4.30917234278557e-05, "loss": 0.1549, "step": 17840 }, { "epoch": 0.3182142474940249, "grad_norm": 0.21578945219516754, "learning_rate": 4.30906491733124e-05, "loss": 0.1648, "step": 17841 }, { "epoch": 0.31823208361573857, "grad_norm": 0.2710860073566437, "learning_rate": 4.30895748486435e-05, "loss": 0.2181, "step": 17842 }, { "epoch": 0.3182499197374523, "grad_norm": 0.2509026527404785, "learning_rate": 4.3088500453853154e-05, "loss": 0.1794, "step": 17843 }, { "epoch": 0.318267755859166, "grad_norm": 0.3039661645889282, "learning_rate": 4.308742598894554e-05, "loss": 0.2058, "step": 17844 }, { "epoch": 0.3182855919808797, "grad_norm": 0.27470862865448, "learning_rate": 4.3086351453924815e-05, "loss": 0.1643, "step": 17845 }, { "epoch": 0.3183034281025934, "grad_norm": 0.24679537117481232, "learning_rate": 4.308527684879514e-05, "loss": 0.1375, "step": 17846 }, { "epoch": 0.31832126422430707, "grad_norm": 0.2020549774169922, "learning_rate": 4.308420217356069e-05, "loss": 0.1621, "step": 17847 }, { "epoch": 0.31833910034602075, "grad_norm": 0.34716740250587463, "learning_rate": 4.3083127428225626e-05, "loss": 0.2077, "step": 17848 }, { "epoch": 0.31835693646773444, "grad_norm": 0.29732653498649597, "learning_rate": 4.308205261279411e-05, "loss": 0.2329, "step": 17849 }, { "epoch": 0.31837477258944813, "grad_norm": 0.22680331766605377, "learning_rate": 4.308097772727032e-05, "loss": 0.1742, "step": 17850 }, { "epoch": 0.3183926087111618, "grad_norm": 0.22886672616004944, "learning_rate": 4.307990277165841e-05, "loss": 0.2042, "step": 17851 }, { "epoch": 0.31841044483287556, "grad_norm": 0.23851677775382996, "learning_rate": 4.3078827745962556e-05, "loss": 0.1967, "step": 17852 }, { "epoch": 0.31842828095458925, "grad_norm": 0.24372057616710663, "learning_rate": 4.307775265018692e-05, "loss": 0.1926, "step": 17853 }, { "epoch": 0.31844611707630294, "grad_norm": 0.32449987530708313, "learning_rate": 4.307667748433567e-05, "loss": 0.1918, "step": 17854 }, { "epoch": 0.3184639531980166, "grad_norm": 0.24032112956047058, "learning_rate": 4.3075602248412975e-05, "loss": 0.1259, "step": 17855 }, { "epoch": 0.3184817893197303, "grad_norm": 0.2844131886959076, "learning_rate": 4.3074526942423e-05, "loss": 0.2125, "step": 17856 }, { "epoch": 0.318499625441444, "grad_norm": 0.23106344044208527, "learning_rate": 4.3073451566369915e-05, "loss": 0.1889, "step": 17857 }, { "epoch": 0.3185174615631577, "grad_norm": 0.26120078563690186, "learning_rate": 4.3072376120257895e-05, "loss": 0.1333, "step": 17858 }, { "epoch": 0.3185352976848714, "grad_norm": 0.2818737328052521, "learning_rate": 4.30713006040911e-05, "loss": 0.1384, "step": 17859 }, { "epoch": 0.3185531338065851, "grad_norm": 0.2503306567668915, "learning_rate": 4.30702250178737e-05, "loss": 0.1587, "step": 17860 }, { "epoch": 0.3185709699282988, "grad_norm": 0.4025648534297943, "learning_rate": 4.3069149361609876e-05, "loss": 0.1353, "step": 17861 }, { "epoch": 0.3185888060500125, "grad_norm": 0.3140362501144409, "learning_rate": 4.3068073635303775e-05, "loss": 0.2106, "step": 17862 }, { "epoch": 0.3186066421717262, "grad_norm": 0.24204808473587036, "learning_rate": 4.306699783895959e-05, "loss": 0.1582, "step": 17863 }, { "epoch": 0.3186244782934399, "grad_norm": 0.2421853244304657, "learning_rate": 4.306592197258148e-05, "loss": 0.1856, "step": 17864 }, { "epoch": 0.31864231441515356, "grad_norm": 0.3057219088077545, "learning_rate": 4.306484603617361e-05, "loss": 0.2206, "step": 17865 }, { "epoch": 0.31866015053686725, "grad_norm": 0.25261229276657104, "learning_rate": 4.3063770029740164e-05, "loss": 0.1655, "step": 17866 }, { "epoch": 0.31867798665858094, "grad_norm": 0.2884041368961334, "learning_rate": 4.306269395328531e-05, "loss": 0.1751, "step": 17867 }, { "epoch": 0.3186958227802947, "grad_norm": 0.26545092463493347, "learning_rate": 4.30616178068132e-05, "loss": 0.1926, "step": 17868 }, { "epoch": 0.31871365890200837, "grad_norm": 0.23007084429264069, "learning_rate": 4.306054159032803e-05, "loss": 0.1805, "step": 17869 }, { "epoch": 0.31873149502372206, "grad_norm": 0.3021833598613739, "learning_rate": 4.3059465303833965e-05, "loss": 0.1996, "step": 17870 }, { "epoch": 0.31874933114543574, "grad_norm": 0.33298414945602417, "learning_rate": 4.3058388947335175e-05, "loss": 0.2013, "step": 17871 }, { "epoch": 0.31876716726714943, "grad_norm": 0.23252364993095398, "learning_rate": 4.3057312520835834e-05, "loss": 0.144, "step": 17872 }, { "epoch": 0.3187850033888631, "grad_norm": 0.180417999625206, "learning_rate": 4.305623602434011e-05, "loss": 0.1488, "step": 17873 }, { "epoch": 0.3188028395105768, "grad_norm": 0.23891793191432953, "learning_rate": 4.3055159457852176e-05, "loss": 0.1902, "step": 17874 }, { "epoch": 0.3188206756322905, "grad_norm": 0.24990332126617432, "learning_rate": 4.305408282137621e-05, "loss": 0.1906, "step": 17875 }, { "epoch": 0.3188385117540042, "grad_norm": 0.3409096598625183, "learning_rate": 4.305300611491638e-05, "loss": 0.2136, "step": 17876 }, { "epoch": 0.3188563478757179, "grad_norm": 0.2706303894519806, "learning_rate": 4.305192933847687e-05, "loss": 0.1568, "step": 17877 }, { "epoch": 0.3188741839974316, "grad_norm": 0.2687775790691376, "learning_rate": 4.305085249206184e-05, "loss": 0.1418, "step": 17878 }, { "epoch": 0.3188920201191453, "grad_norm": 0.43368518352508545, "learning_rate": 4.3049775575675474e-05, "loss": 0.1, "step": 17879 }, { "epoch": 0.318909856240859, "grad_norm": 0.31225666403770447, "learning_rate": 4.304869858932195e-05, "loss": 0.1754, "step": 17880 }, { "epoch": 0.3189276923625727, "grad_norm": 0.22142820060253143, "learning_rate": 4.304762153300543e-05, "loss": 0.1537, "step": 17881 }, { "epoch": 0.31894552848428637, "grad_norm": 0.2720910608768463, "learning_rate": 4.30465444067301e-05, "loss": 0.1557, "step": 17882 }, { "epoch": 0.31896336460600005, "grad_norm": 0.2982274293899536, "learning_rate": 4.3045467210500125e-05, "loss": 0.1774, "step": 17883 }, { "epoch": 0.31898120072771374, "grad_norm": 0.22959260642528534, "learning_rate": 4.304438994431969e-05, "loss": 0.1626, "step": 17884 }, { "epoch": 0.3189990368494275, "grad_norm": 0.28869178891181946, "learning_rate": 4.304331260819297e-05, "loss": 0.1382, "step": 17885 }, { "epoch": 0.3190168729711412, "grad_norm": 0.2013123631477356, "learning_rate": 4.304223520212413e-05, "loss": 0.1295, "step": 17886 }, { "epoch": 0.31903470909285486, "grad_norm": 0.26151391863822937, "learning_rate": 4.304115772611736e-05, "loss": 0.1606, "step": 17887 }, { "epoch": 0.31905254521456855, "grad_norm": 0.2295144945383072, "learning_rate": 4.304008018017683e-05, "loss": 0.1762, "step": 17888 }, { "epoch": 0.31907038133628224, "grad_norm": 0.23897890746593475, "learning_rate": 4.303900256430672e-05, "loss": 0.1516, "step": 17889 }, { "epoch": 0.3190882174579959, "grad_norm": 0.2275541126728058, "learning_rate": 4.30379248785112e-05, "loss": 0.1847, "step": 17890 }, { "epoch": 0.3191060535797096, "grad_norm": 0.3158656358718872, "learning_rate": 4.303684712279446e-05, "loss": 0.2082, "step": 17891 }, { "epoch": 0.3191238897014233, "grad_norm": 0.29559525847435, "learning_rate": 4.303576929716067e-05, "loss": 0.1768, "step": 17892 }, { "epoch": 0.319141725823137, "grad_norm": 0.2867787778377533, "learning_rate": 4.3034691401614e-05, "loss": 0.179, "step": 17893 }, { "epoch": 0.31915956194485073, "grad_norm": 0.23168174922466278, "learning_rate": 4.303361343615865e-05, "loss": 0.1506, "step": 17894 }, { "epoch": 0.3191773980665644, "grad_norm": 0.24208880960941315, "learning_rate": 4.303253540079878e-05, "loss": 0.2005, "step": 17895 }, { "epoch": 0.3191952341882781, "grad_norm": 0.3277707099914551, "learning_rate": 4.303145729553858e-05, "loss": 0.1329, "step": 17896 }, { "epoch": 0.3192130703099918, "grad_norm": 0.24475747346878052, "learning_rate": 4.3030379120382216e-05, "loss": 0.1708, "step": 17897 }, { "epoch": 0.3192309064317055, "grad_norm": 0.3401337265968323, "learning_rate": 4.3029300875333875e-05, "loss": 0.161, "step": 17898 }, { "epoch": 0.3192487425534192, "grad_norm": 0.2839389145374298, "learning_rate": 4.302822256039774e-05, "loss": 0.2711, "step": 17899 }, { "epoch": 0.31926657867513286, "grad_norm": 0.29284125566482544, "learning_rate": 4.3027144175577984e-05, "loss": 0.1699, "step": 17900 }, { "epoch": 0.31928441479684655, "grad_norm": 0.2563447952270508, "learning_rate": 4.3026065720878796e-05, "loss": 0.1682, "step": 17901 }, { "epoch": 0.3193022509185603, "grad_norm": 0.24518202245235443, "learning_rate": 4.3024987196304344e-05, "loss": 0.1915, "step": 17902 }, { "epoch": 0.319320087040274, "grad_norm": 0.22739307582378387, "learning_rate": 4.302390860185883e-05, "loss": 0.169, "step": 17903 }, { "epoch": 0.31933792316198767, "grad_norm": 0.25473618507385254, "learning_rate": 4.3022829937546404e-05, "loss": 0.1501, "step": 17904 }, { "epoch": 0.31935575928370136, "grad_norm": 0.23113799095153809, "learning_rate": 4.302175120337128e-05, "loss": 0.1184, "step": 17905 }, { "epoch": 0.31937359540541504, "grad_norm": 0.23945507407188416, "learning_rate": 4.3020672399337616e-05, "loss": 0.1226, "step": 17906 }, { "epoch": 0.31939143152712873, "grad_norm": 0.38307005167007446, "learning_rate": 4.3019593525449596e-05, "loss": 0.1922, "step": 17907 }, { "epoch": 0.3194092676488424, "grad_norm": 0.43980878591537476, "learning_rate": 4.301851458171141e-05, "loss": 0.2004, "step": 17908 }, { "epoch": 0.3194271037705561, "grad_norm": 0.3586626648902893, "learning_rate": 4.3017435568127246e-05, "loss": 0.134, "step": 17909 }, { "epoch": 0.31944493989226985, "grad_norm": 0.2454012632369995, "learning_rate": 4.301635648470127e-05, "loss": 0.1572, "step": 17910 }, { "epoch": 0.31946277601398354, "grad_norm": 0.26246950030326843, "learning_rate": 4.3015277331437675e-05, "loss": 0.1736, "step": 17911 }, { "epoch": 0.31948061213569723, "grad_norm": 0.5175898671150208, "learning_rate": 4.301419810834065e-05, "loss": 0.262, "step": 17912 }, { "epoch": 0.3194984482574109, "grad_norm": 0.17227597534656525, "learning_rate": 4.3013118815414365e-05, "loss": 0.1416, "step": 17913 }, { "epoch": 0.3195162843791246, "grad_norm": 0.2439941018819809, "learning_rate": 4.3012039452663014e-05, "loss": 0.1202, "step": 17914 }, { "epoch": 0.3195341205008383, "grad_norm": 0.3642473816871643, "learning_rate": 4.301096002009077e-05, "loss": 0.1716, "step": 17915 }, { "epoch": 0.319551956622552, "grad_norm": 0.29081660509109497, "learning_rate": 4.3009880517701836e-05, "loss": 0.2104, "step": 17916 }, { "epoch": 0.31956979274426567, "grad_norm": 0.26018771529197693, "learning_rate": 4.300880094550037e-05, "loss": 0.1824, "step": 17917 }, { "epoch": 0.31958762886597936, "grad_norm": 0.26684725284576416, "learning_rate": 4.3007721303490586e-05, "loss": 0.1701, "step": 17918 }, { "epoch": 0.3196054649876931, "grad_norm": 0.2873651087284088, "learning_rate": 4.3006641591676645e-05, "loss": 0.2203, "step": 17919 }, { "epoch": 0.3196233011094068, "grad_norm": 0.2741551399230957, "learning_rate": 4.3005561810062745e-05, "loss": 0.1628, "step": 17920 }, { "epoch": 0.3196411372311205, "grad_norm": 0.22419938445091248, "learning_rate": 4.3004481958653065e-05, "loss": 0.159, "step": 17921 }, { "epoch": 0.31965897335283416, "grad_norm": 0.2226715385913849, "learning_rate": 4.30034020374518e-05, "loss": 0.1063, "step": 17922 }, { "epoch": 0.31967680947454785, "grad_norm": 0.33590811491012573, "learning_rate": 4.3002322046463125e-05, "loss": 0.1508, "step": 17923 }, { "epoch": 0.31969464559626154, "grad_norm": 0.23108139634132385, "learning_rate": 4.3001241985691234e-05, "loss": 0.1979, "step": 17924 }, { "epoch": 0.3197124817179752, "grad_norm": 0.18934516608715057, "learning_rate": 4.3000161855140315e-05, "loss": 0.2001, "step": 17925 }, { "epoch": 0.3197303178396889, "grad_norm": 0.255266934633255, "learning_rate": 4.299908165481455e-05, "loss": 0.1581, "step": 17926 }, { "epoch": 0.31974815396140266, "grad_norm": 0.24443794786930084, "learning_rate": 4.299800138471812e-05, "loss": 0.1445, "step": 17927 }, { "epoch": 0.31976599008311635, "grad_norm": 0.273963063955307, "learning_rate": 4.299692104485523e-05, "loss": 0.1677, "step": 17928 }, { "epoch": 0.31978382620483004, "grad_norm": 0.272589772939682, "learning_rate": 4.299584063523006e-05, "loss": 0.1651, "step": 17929 }, { "epoch": 0.3198016623265437, "grad_norm": 0.5103332996368408, "learning_rate": 4.299476015584679e-05, "loss": 0.2404, "step": 17930 }, { "epoch": 0.3198194984482574, "grad_norm": 0.21750696003437042, "learning_rate": 4.299367960670961e-05, "loss": 0.184, "step": 17931 }, { "epoch": 0.3198373345699711, "grad_norm": 0.23367401957511902, "learning_rate": 4.2992598987822725e-05, "loss": 0.1895, "step": 17932 }, { "epoch": 0.3198551706916848, "grad_norm": 0.23731425404548645, "learning_rate": 4.2991518299190305e-05, "loss": 0.1918, "step": 17933 }, { "epoch": 0.3198730068133985, "grad_norm": 0.2755047678947449, "learning_rate": 4.2990437540816546e-05, "loss": 0.2193, "step": 17934 }, { "epoch": 0.31989084293511216, "grad_norm": 0.24228043854236603, "learning_rate": 4.2989356712705636e-05, "loss": 0.1282, "step": 17935 }, { "epoch": 0.3199086790568259, "grad_norm": 0.22782939672470093, "learning_rate": 4.298827581486177e-05, "loss": 0.1886, "step": 17936 }, { "epoch": 0.3199265151785396, "grad_norm": 0.24840177595615387, "learning_rate": 4.298719484728913e-05, "loss": 0.1684, "step": 17937 }, { "epoch": 0.3199443513002533, "grad_norm": 0.3157387971878052, "learning_rate": 4.298611380999191e-05, "loss": 0.1706, "step": 17938 }, { "epoch": 0.31996218742196697, "grad_norm": 0.2063383311033249, "learning_rate": 4.2985032702974303e-05, "loss": 0.1452, "step": 17939 }, { "epoch": 0.31998002354368066, "grad_norm": 0.3949030935764313, "learning_rate": 4.29839515262405e-05, "loss": 0.2054, "step": 17940 }, { "epoch": 0.31999785966539435, "grad_norm": 0.31042471528053284, "learning_rate": 4.2982870279794684e-05, "loss": 0.2162, "step": 17941 }, { "epoch": 0.32001569578710803, "grad_norm": 0.292270302772522, "learning_rate": 4.2981788963641055e-05, "loss": 0.127, "step": 17942 }, { "epoch": 0.3200335319088217, "grad_norm": 0.2829902768135071, "learning_rate": 4.2980707577783805e-05, "loss": 0.1843, "step": 17943 }, { "epoch": 0.32005136803053547, "grad_norm": 0.3233446180820465, "learning_rate": 4.297962612222712e-05, "loss": 0.1795, "step": 17944 }, { "epoch": 0.32006920415224915, "grad_norm": 0.24719838798046112, "learning_rate": 4.297854459697519e-05, "loss": 0.1604, "step": 17945 }, { "epoch": 0.32008704027396284, "grad_norm": 0.2635244131088257, "learning_rate": 4.2977463002032214e-05, "loss": 0.1642, "step": 17946 }, { "epoch": 0.32010487639567653, "grad_norm": 0.2845657467842102, "learning_rate": 4.297638133740238e-05, "loss": 0.2115, "step": 17947 }, { "epoch": 0.3201227125173902, "grad_norm": 0.26517805457115173, "learning_rate": 4.297529960308988e-05, "loss": 0.2183, "step": 17948 }, { "epoch": 0.3201405486391039, "grad_norm": 0.2887776494026184, "learning_rate": 4.297421779909892e-05, "loss": 0.1804, "step": 17949 }, { "epoch": 0.3201583847608176, "grad_norm": 0.29620370268821716, "learning_rate": 4.297313592543368e-05, "loss": 0.1338, "step": 17950 }, { "epoch": 0.3201762208825313, "grad_norm": 0.3269090950489044, "learning_rate": 4.297205398209836e-05, "loss": 0.1748, "step": 17951 }, { "epoch": 0.32019405700424497, "grad_norm": 0.4068647623062134, "learning_rate": 4.297097196909714e-05, "loss": 0.1703, "step": 17952 }, { "epoch": 0.3202118931259587, "grad_norm": 0.2973577678203583, "learning_rate": 4.2969889886434236e-05, "loss": 0.2525, "step": 17953 }, { "epoch": 0.3202297292476724, "grad_norm": 0.27513816952705383, "learning_rate": 4.296880773411383e-05, "loss": 0.1818, "step": 17954 }, { "epoch": 0.3202475653693861, "grad_norm": 0.30197274684906006, "learning_rate": 4.296772551214012e-05, "loss": 0.1695, "step": 17955 }, { "epoch": 0.3202654014910998, "grad_norm": 0.2208343744277954, "learning_rate": 4.29666432205173e-05, "loss": 0.114, "step": 17956 }, { "epoch": 0.32028323761281347, "grad_norm": 0.31481072306632996, "learning_rate": 4.2965560859249566e-05, "loss": 0.2217, "step": 17957 }, { "epoch": 0.32030107373452715, "grad_norm": 0.2951738238334656, "learning_rate": 4.2964478428341104e-05, "loss": 0.2324, "step": 17958 }, { "epoch": 0.32031890985624084, "grad_norm": 0.39909499883651733, "learning_rate": 4.2963395927796125e-05, "loss": 0.1922, "step": 17959 }, { "epoch": 0.32033674597795453, "grad_norm": 0.2836631238460541, "learning_rate": 4.2962313357618824e-05, "loss": 0.1942, "step": 17960 }, { "epoch": 0.3203545820996683, "grad_norm": 0.22878852486610413, "learning_rate": 4.296123071781339e-05, "loss": 0.1746, "step": 17961 }, { "epoch": 0.32037241822138196, "grad_norm": 0.19215981662273407, "learning_rate": 4.2960148008384014e-05, "loss": 0.1215, "step": 17962 }, { "epoch": 0.32039025434309565, "grad_norm": 0.3427749574184418, "learning_rate": 4.2959065229334913e-05, "loss": 0.189, "step": 17963 }, { "epoch": 0.32040809046480934, "grad_norm": 0.31281399726867676, "learning_rate": 4.295798238067026e-05, "loss": 0.1841, "step": 17964 }, { "epoch": 0.320425926586523, "grad_norm": 0.4562949240207672, "learning_rate": 4.2956899462394275e-05, "loss": 0.1742, "step": 17965 }, { "epoch": 0.3204437627082367, "grad_norm": 0.33912232518196106, "learning_rate": 4.295581647451115e-05, "loss": 0.1455, "step": 17966 }, { "epoch": 0.3204615988299504, "grad_norm": 0.2856380045413971, "learning_rate": 4.2954733417025065e-05, "loss": 0.1739, "step": 17967 }, { "epoch": 0.3204794349516641, "grad_norm": 0.3533669114112854, "learning_rate": 4.295365028994024e-05, "loss": 0.1713, "step": 17968 }, { "epoch": 0.32049727107337783, "grad_norm": 0.267322838306427, "learning_rate": 4.2952567093260864e-05, "loss": 0.1324, "step": 17969 }, { "epoch": 0.3205151071950915, "grad_norm": 0.31280526518821716, "learning_rate": 4.2951483826991135e-05, "loss": 0.1922, "step": 17970 }, { "epoch": 0.3205329433168052, "grad_norm": 0.251764178276062, "learning_rate": 4.295040049113526e-05, "loss": 0.2005, "step": 17971 }, { "epoch": 0.3205507794385189, "grad_norm": 0.2617865800857544, "learning_rate": 4.2949317085697426e-05, "loss": 0.1818, "step": 17972 }, { "epoch": 0.3205686155602326, "grad_norm": 0.35505223274230957, "learning_rate": 4.294823361068184e-05, "loss": 0.2046, "step": 17973 }, { "epoch": 0.32058645168194627, "grad_norm": 0.4510609805583954, "learning_rate": 4.29471500660927e-05, "loss": 0.1637, "step": 17974 }, { "epoch": 0.32060428780365996, "grad_norm": 0.2971402108669281, "learning_rate": 4.294606645193422e-05, "loss": 0.2013, "step": 17975 }, { "epoch": 0.32062212392537365, "grad_norm": 0.32822614908218384, "learning_rate": 4.2944982768210576e-05, "loss": 0.1625, "step": 17976 }, { "epoch": 0.32063996004708734, "grad_norm": 0.2518485486507416, "learning_rate": 4.294389901492598e-05, "loss": 0.1526, "step": 17977 }, { "epoch": 0.3206577961688011, "grad_norm": 0.2990824282169342, "learning_rate": 4.294281519208464e-05, "loss": 0.1523, "step": 17978 }, { "epoch": 0.32067563229051477, "grad_norm": 0.33606094121932983, "learning_rate": 4.294173129969075e-05, "loss": 0.1609, "step": 17979 }, { "epoch": 0.32069346841222846, "grad_norm": 0.2032424807548523, "learning_rate": 4.294064733774851e-05, "loss": 0.1142, "step": 17980 }, { "epoch": 0.32071130453394214, "grad_norm": 0.23036698997020721, "learning_rate": 4.2939563306262126e-05, "loss": 0.1897, "step": 17981 }, { "epoch": 0.32072914065565583, "grad_norm": 0.2209492176771164, "learning_rate": 4.2938479205235803e-05, "loss": 0.1688, "step": 17982 }, { "epoch": 0.3207469767773695, "grad_norm": 0.2883176803588867, "learning_rate": 4.2937395034673734e-05, "loss": 0.1577, "step": 17983 }, { "epoch": 0.3207648128990832, "grad_norm": 0.19783201813697815, "learning_rate": 4.2936310794580125e-05, "loss": 0.145, "step": 17984 }, { "epoch": 0.3207826490207969, "grad_norm": 0.2921576201915741, "learning_rate": 4.293522648495918e-05, "loss": 0.1858, "step": 17985 }, { "epoch": 0.32080048514251064, "grad_norm": 0.3217620849609375, "learning_rate": 4.293414210581511e-05, "loss": 0.1595, "step": 17986 }, { "epoch": 0.3208183212642243, "grad_norm": 0.2626230716705322, "learning_rate": 4.29330576571521e-05, "loss": 0.1856, "step": 17987 }, { "epoch": 0.320836157385938, "grad_norm": 0.30646297335624695, "learning_rate": 4.293197313897438e-05, "loss": 0.1783, "step": 17988 }, { "epoch": 0.3208539935076517, "grad_norm": 0.23388750851154327, "learning_rate": 4.293088855128612e-05, "loss": 0.164, "step": 17989 }, { "epoch": 0.3208718296293654, "grad_norm": 0.26077958941459656, "learning_rate": 4.2929803894091555e-05, "loss": 0.1739, "step": 17990 }, { "epoch": 0.3208896657510791, "grad_norm": 0.2520488202571869, "learning_rate": 4.292871916739487e-05, "loss": 0.1531, "step": 17991 }, { "epoch": 0.32090750187279277, "grad_norm": 0.276787668466568, "learning_rate": 4.292763437120029e-05, "loss": 0.184, "step": 17992 }, { "epoch": 0.32092533799450645, "grad_norm": 0.2858846187591553, "learning_rate": 4.292654950551199e-05, "loss": 0.1568, "step": 17993 }, { "epoch": 0.32094317411622014, "grad_norm": 0.301724910736084, "learning_rate": 4.292546457033421e-05, "loss": 0.1696, "step": 17994 }, { "epoch": 0.3209610102379339, "grad_norm": 0.24382881820201874, "learning_rate": 4.292437956567113e-05, "loss": 0.1827, "step": 17995 }, { "epoch": 0.3209788463596476, "grad_norm": 0.3244045674800873, "learning_rate": 4.292329449152696e-05, "loss": 0.1548, "step": 17996 }, { "epoch": 0.32099668248136126, "grad_norm": 0.24747657775878906, "learning_rate": 4.2922209347905907e-05, "loss": 0.1354, "step": 17997 }, { "epoch": 0.32101451860307495, "grad_norm": 0.2584995627403259, "learning_rate": 4.292112413481218e-05, "loss": 0.2232, "step": 17998 }, { "epoch": 0.32103235472478864, "grad_norm": 0.23093536496162415, "learning_rate": 4.292003885225e-05, "loss": 0.1428, "step": 17999 }, { "epoch": 0.3210501908465023, "grad_norm": 0.23213060200214386, "learning_rate": 4.291895350022356e-05, "loss": 0.1582, "step": 18000 }, { "epoch": 0.3210501908465023, "eval_loss": 0.16559597849845886, "eval_runtime": 106.6244, "eval_samples_per_second": 9.604, "eval_steps_per_second": 1.604, "step": 18000 }, { "epoch": 0.321068026968216, "grad_norm": 0.5585169196128845, "learning_rate": 4.2917868078737056e-05, "loss": 0.215, "step": 18001 }, { "epoch": 0.3210858630899297, "grad_norm": 0.36891061067581177, "learning_rate": 4.2916782587794705e-05, "loss": 0.1554, "step": 18002 }, { "epoch": 0.32110369921164345, "grad_norm": 0.24026136100292206, "learning_rate": 4.291569702740073e-05, "loss": 0.1681, "step": 18003 }, { "epoch": 0.32112153533335713, "grad_norm": 0.31819137930870056, "learning_rate": 4.291461139755931e-05, "loss": 0.1581, "step": 18004 }, { "epoch": 0.3211393714550708, "grad_norm": 0.3256298005580902, "learning_rate": 4.291352569827467e-05, "loss": 0.1932, "step": 18005 }, { "epoch": 0.3211572075767845, "grad_norm": 0.31874772906303406, "learning_rate": 4.291243992955103e-05, "loss": 0.2097, "step": 18006 }, { "epoch": 0.3211750436984982, "grad_norm": 0.2279292643070221, "learning_rate": 4.291135409139258e-05, "loss": 0.179, "step": 18007 }, { "epoch": 0.3211928798202119, "grad_norm": 0.24464398622512817, "learning_rate": 4.2910268183803535e-05, "loss": 0.2034, "step": 18008 }, { "epoch": 0.3212107159419256, "grad_norm": 0.26906678080558777, "learning_rate": 4.29091822067881e-05, "loss": 0.1958, "step": 18009 }, { "epoch": 0.32122855206363926, "grad_norm": 0.2615545094013214, "learning_rate": 4.29080961603505e-05, "loss": 0.1901, "step": 18010 }, { "epoch": 0.321246388185353, "grad_norm": 0.23821043968200684, "learning_rate": 4.290701004449492e-05, "loss": 0.1626, "step": 18011 }, { "epoch": 0.3212642243070667, "grad_norm": 0.2942897379398346, "learning_rate": 4.2905923859225595e-05, "loss": 0.1616, "step": 18012 }, { "epoch": 0.3212820604287804, "grad_norm": 0.21033979952335358, "learning_rate": 4.2904837604546724e-05, "loss": 0.1649, "step": 18013 }, { "epoch": 0.32129989655049407, "grad_norm": 0.36140936613082886, "learning_rate": 4.290375128046251e-05, "loss": 0.1705, "step": 18014 }, { "epoch": 0.32131773267220776, "grad_norm": 0.3626006245613098, "learning_rate": 4.2902664886977185e-05, "loss": 0.143, "step": 18015 }, { "epoch": 0.32133556879392144, "grad_norm": 0.34682902693748474, "learning_rate": 4.290157842409493e-05, "loss": 0.1967, "step": 18016 }, { "epoch": 0.32135340491563513, "grad_norm": 0.23780466616153717, "learning_rate": 4.290049189181999e-05, "loss": 0.1848, "step": 18017 }, { "epoch": 0.3213712410373488, "grad_norm": 0.346086323261261, "learning_rate": 4.2899405290156555e-05, "loss": 0.1945, "step": 18018 }, { "epoch": 0.3213890771590625, "grad_norm": 0.6469464898109436, "learning_rate": 4.289831861910885e-05, "loss": 0.146, "step": 18019 }, { "epoch": 0.32140691328077625, "grad_norm": 0.26612937450408936, "learning_rate": 4.2897231878681064e-05, "loss": 0.1983, "step": 18020 }, { "epoch": 0.32142474940248994, "grad_norm": 0.28209903836250305, "learning_rate": 4.289614506887743e-05, "loss": 0.2231, "step": 18021 }, { "epoch": 0.32144258552420363, "grad_norm": 0.22776180505752563, "learning_rate": 4.2895058189702163e-05, "loss": 0.1589, "step": 18022 }, { "epoch": 0.3214604216459173, "grad_norm": 0.23473331332206726, "learning_rate": 4.289397124115947e-05, "loss": 0.2098, "step": 18023 }, { "epoch": 0.321478257767631, "grad_norm": 0.3704344928264618, "learning_rate": 4.2892884223253565e-05, "loss": 0.2588, "step": 18024 }, { "epoch": 0.3214960938893447, "grad_norm": 0.22363069653511047, "learning_rate": 4.289179713598865e-05, "loss": 0.1614, "step": 18025 }, { "epoch": 0.3215139300110584, "grad_norm": 0.30653658509254456, "learning_rate": 4.289070997936897e-05, "loss": 0.1998, "step": 18026 }, { "epoch": 0.32153176613277207, "grad_norm": 0.2966502606868744, "learning_rate": 4.2889622753398703e-05, "loss": 0.2054, "step": 18027 }, { "epoch": 0.3215496022544858, "grad_norm": 0.23826436698436737, "learning_rate": 4.288853545808208e-05, "loss": 0.1506, "step": 18028 }, { "epoch": 0.3215674383761995, "grad_norm": 0.22722485661506653, "learning_rate": 4.288744809342332e-05, "loss": 0.1643, "step": 18029 }, { "epoch": 0.3215852744979132, "grad_norm": 0.32047396898269653, "learning_rate": 4.288636065942663e-05, "loss": 0.143, "step": 18030 }, { "epoch": 0.3216031106196269, "grad_norm": 0.28161951899528503, "learning_rate": 4.2885273156096226e-05, "loss": 0.1583, "step": 18031 }, { "epoch": 0.32162094674134056, "grad_norm": 0.2806845009326935, "learning_rate": 4.288418558343633e-05, "loss": 0.1753, "step": 18032 }, { "epoch": 0.32163878286305425, "grad_norm": 0.3080390691757202, "learning_rate": 4.2883097941451155e-05, "loss": 0.166, "step": 18033 }, { "epoch": 0.32165661898476794, "grad_norm": 0.30956411361694336, "learning_rate": 4.288201023014492e-05, "loss": 0.2139, "step": 18034 }, { "epoch": 0.3216744551064816, "grad_norm": 0.2906337380409241, "learning_rate": 4.288092244952182e-05, "loss": 0.1867, "step": 18035 }, { "epoch": 0.3216922912281953, "grad_norm": 0.22089581191539764, "learning_rate": 4.28798345995861e-05, "loss": 0.1377, "step": 18036 }, { "epoch": 0.32171012734990906, "grad_norm": 0.2225026786327362, "learning_rate": 4.287874668034197e-05, "loss": 0.133, "step": 18037 }, { "epoch": 0.32172796347162275, "grad_norm": 0.27453580498695374, "learning_rate": 4.287765869179364e-05, "loss": 0.1601, "step": 18038 }, { "epoch": 0.32174579959333643, "grad_norm": 0.25948649644851685, "learning_rate": 4.287657063394532e-05, "loss": 0.1507, "step": 18039 }, { "epoch": 0.3217636357150501, "grad_norm": 0.2517499327659607, "learning_rate": 4.287548250680124e-05, "loss": 0.1689, "step": 18040 }, { "epoch": 0.3217814718367638, "grad_norm": 0.23956286907196045, "learning_rate": 4.2874394310365626e-05, "loss": 0.176, "step": 18041 }, { "epoch": 0.3217993079584775, "grad_norm": 0.22735492885112762, "learning_rate": 4.2873306044642687e-05, "loss": 0.1352, "step": 18042 }, { "epoch": 0.3218171440801912, "grad_norm": 0.2797132134437561, "learning_rate": 4.287221770963663e-05, "loss": 0.1664, "step": 18043 }, { "epoch": 0.3218349802019049, "grad_norm": 0.24348405003547668, "learning_rate": 4.2871129305351694e-05, "loss": 0.1413, "step": 18044 }, { "epoch": 0.3218528163236186, "grad_norm": 0.21892757713794708, "learning_rate": 4.287004083179208e-05, "loss": 0.1761, "step": 18045 }, { "epoch": 0.3218706524453323, "grad_norm": 0.31122085452079773, "learning_rate": 4.286895228896202e-05, "loss": 0.1875, "step": 18046 }, { "epoch": 0.321888488567046, "grad_norm": 0.20841598510742188, "learning_rate": 4.2867863676865724e-05, "loss": 0.1378, "step": 18047 }, { "epoch": 0.3219063246887597, "grad_norm": 0.33254650235176086, "learning_rate": 4.286677499550743e-05, "loss": 0.2204, "step": 18048 }, { "epoch": 0.32192416081047337, "grad_norm": 0.17228543758392334, "learning_rate": 4.2865686244891334e-05, "loss": 0.0946, "step": 18049 }, { "epoch": 0.32194199693218706, "grad_norm": 0.2636859714984894, "learning_rate": 4.2864597425021666e-05, "loss": 0.1819, "step": 18050 }, { "epoch": 0.32195983305390075, "grad_norm": 0.2608453929424286, "learning_rate": 4.286350853590266e-05, "loss": 0.2011, "step": 18051 }, { "epoch": 0.32197766917561443, "grad_norm": 0.27565479278564453, "learning_rate": 4.2862419577538516e-05, "loss": 0.1788, "step": 18052 }, { "epoch": 0.3219955052973281, "grad_norm": 0.3326278328895569, "learning_rate": 4.286133054993346e-05, "loss": 0.1506, "step": 18053 }, { "epoch": 0.32201334141904187, "grad_norm": 0.3037939965724945, "learning_rate": 4.2860241453091726e-05, "loss": 0.2235, "step": 18054 }, { "epoch": 0.32203117754075555, "grad_norm": 0.191414475440979, "learning_rate": 4.285915228701752e-05, "loss": 0.1257, "step": 18055 }, { "epoch": 0.32204901366246924, "grad_norm": 0.28974199295043945, "learning_rate": 4.285806305171508e-05, "loss": 0.1823, "step": 18056 }, { "epoch": 0.32206684978418293, "grad_norm": 0.24226349592208862, "learning_rate": 4.285697374718862e-05, "loss": 0.1901, "step": 18057 }, { "epoch": 0.3220846859058966, "grad_norm": 0.376336932182312, "learning_rate": 4.285588437344236e-05, "loss": 0.248, "step": 18058 }, { "epoch": 0.3221025220276103, "grad_norm": 0.36374542117118835, "learning_rate": 4.285479493048052e-05, "loss": 0.1721, "step": 18059 }, { "epoch": 0.322120358149324, "grad_norm": 0.25528842210769653, "learning_rate": 4.285370541830733e-05, "loss": 0.175, "step": 18060 }, { "epoch": 0.3221381942710377, "grad_norm": 0.25853872299194336, "learning_rate": 4.2852615836927015e-05, "loss": 0.147, "step": 18061 }, { "epoch": 0.3221560303927514, "grad_norm": 0.3118174374103546, "learning_rate": 4.2851526186343785e-05, "loss": 0.1511, "step": 18062 }, { "epoch": 0.3221738665144651, "grad_norm": 0.2361362725496292, "learning_rate": 4.2850436466561886e-05, "loss": 0.1922, "step": 18063 }, { "epoch": 0.3221917026361788, "grad_norm": 0.2701319754123688, "learning_rate": 4.284934667758552e-05, "loss": 0.165, "step": 18064 }, { "epoch": 0.3222095387578925, "grad_norm": 0.2908462584018707, "learning_rate": 4.284825681941893e-05, "loss": 0.1274, "step": 18065 }, { "epoch": 0.3222273748796062, "grad_norm": 0.23834800720214844, "learning_rate": 4.284716689206633e-05, "loss": 0.1738, "step": 18066 }, { "epoch": 0.32224521100131986, "grad_norm": 0.27812689542770386, "learning_rate": 4.284607689553194e-05, "loss": 0.1772, "step": 18067 }, { "epoch": 0.32226304712303355, "grad_norm": 0.5655857920646667, "learning_rate": 4.284498682982e-05, "loss": 0.1929, "step": 18068 }, { "epoch": 0.32228088324474724, "grad_norm": 0.2239859402179718, "learning_rate": 4.2843896694934725e-05, "loss": 0.1858, "step": 18069 }, { "epoch": 0.322298719366461, "grad_norm": 0.28623759746551514, "learning_rate": 4.284280649088034e-05, "loss": 0.1417, "step": 18070 }, { "epoch": 0.32231655548817467, "grad_norm": 0.2485145926475525, "learning_rate": 4.284171621766108e-05, "loss": 0.1754, "step": 18071 }, { "epoch": 0.32233439160988836, "grad_norm": 0.45249781012535095, "learning_rate": 4.284062587528116e-05, "loss": 0.1542, "step": 18072 }, { "epoch": 0.32235222773160205, "grad_norm": 0.22429540753364563, "learning_rate": 4.283953546374482e-05, "loss": 0.1805, "step": 18073 }, { "epoch": 0.32237006385331574, "grad_norm": 0.3011311888694763, "learning_rate": 4.283844498305627e-05, "loss": 0.1842, "step": 18074 }, { "epoch": 0.3223878999750294, "grad_norm": 0.28535404801368713, "learning_rate": 4.283735443321975e-05, "loss": 0.1747, "step": 18075 }, { "epoch": 0.3224057360967431, "grad_norm": 0.16767239570617676, "learning_rate": 4.2836263814239485e-05, "loss": 0.1412, "step": 18076 }, { "epoch": 0.3224235722184568, "grad_norm": 0.2633972764015198, "learning_rate": 4.28351731261197e-05, "loss": 0.196, "step": 18077 }, { "epoch": 0.3224414083401705, "grad_norm": 0.2889309525489807, "learning_rate": 4.283408236886462e-05, "loss": 0.1878, "step": 18078 }, { "epoch": 0.32245924446188423, "grad_norm": 0.3738020956516266, "learning_rate": 4.283299154247849e-05, "loss": 0.2166, "step": 18079 }, { "epoch": 0.3224770805835979, "grad_norm": 0.26379501819610596, "learning_rate": 4.2831900646965506e-05, "loss": 0.1685, "step": 18080 }, { "epoch": 0.3224949167053116, "grad_norm": 0.27132490277290344, "learning_rate": 4.2830809682329926e-05, "loss": 0.1802, "step": 18081 }, { "epoch": 0.3225127528270253, "grad_norm": 0.25412696599960327, "learning_rate": 4.282971864857597e-05, "loss": 0.1609, "step": 18082 }, { "epoch": 0.322530588948739, "grad_norm": 0.30349597334861755, "learning_rate": 4.282862754570787e-05, "loss": 0.2038, "step": 18083 }, { "epoch": 0.32254842507045267, "grad_norm": 0.24025368690490723, "learning_rate": 4.282753637372984e-05, "loss": 0.1752, "step": 18084 }, { "epoch": 0.32256626119216636, "grad_norm": 0.30440738797187805, "learning_rate": 4.282644513264613e-05, "loss": 0.169, "step": 18085 }, { "epoch": 0.32258409731388005, "grad_norm": 0.2684110105037689, "learning_rate": 4.2825353822460965e-05, "loss": 0.1651, "step": 18086 }, { "epoch": 0.3226019334355938, "grad_norm": 0.25851064920425415, "learning_rate": 4.282426244317857e-05, "loss": 0.159, "step": 18087 }, { "epoch": 0.3226197695573075, "grad_norm": 0.25092485547065735, "learning_rate": 4.282317099480317e-05, "loss": 0.1722, "step": 18088 }, { "epoch": 0.32263760567902117, "grad_norm": 0.5327639579772949, "learning_rate": 4.282207947733901e-05, "loss": 0.2262, "step": 18089 }, { "epoch": 0.32265544180073485, "grad_norm": 0.2172667533159256, "learning_rate": 4.282098789079031e-05, "loss": 0.1423, "step": 18090 }, { "epoch": 0.32267327792244854, "grad_norm": 0.20758669078350067, "learning_rate": 4.2819896235161305e-05, "loss": 0.1493, "step": 18091 }, { "epoch": 0.32269111404416223, "grad_norm": 0.252105176448822, "learning_rate": 4.2818804510456235e-05, "loss": 0.1884, "step": 18092 }, { "epoch": 0.3227089501658759, "grad_norm": 0.2258213609457016, "learning_rate": 4.2817712716679314e-05, "loss": 0.1361, "step": 18093 }, { "epoch": 0.3227267862875896, "grad_norm": 0.20108555257320404, "learning_rate": 4.28166208538348e-05, "loss": 0.1683, "step": 18094 }, { "epoch": 0.3227446224093033, "grad_norm": 0.25176340341567993, "learning_rate": 4.2815528921926896e-05, "loss": 0.1391, "step": 18095 }, { "epoch": 0.32276245853101704, "grad_norm": 0.19316431879997253, "learning_rate": 4.2814436920959855e-05, "loss": 0.1304, "step": 18096 }, { "epoch": 0.3227802946527307, "grad_norm": 0.2648905813694, "learning_rate": 4.28133448509379e-05, "loss": 0.1787, "step": 18097 }, { "epoch": 0.3227981307744444, "grad_norm": 0.2939186692237854, "learning_rate": 4.2812252711865265e-05, "loss": 0.1871, "step": 18098 }, { "epoch": 0.3228159668961581, "grad_norm": 0.3293476700782776, "learning_rate": 4.281116050374619e-05, "loss": 0.1517, "step": 18099 }, { "epoch": 0.3228338030178718, "grad_norm": 0.23596060276031494, "learning_rate": 4.281006822658491e-05, "loss": 0.1495, "step": 18100 }, { "epoch": 0.3228516391395855, "grad_norm": 0.34427961707115173, "learning_rate": 4.280897588038565e-05, "loss": 0.1968, "step": 18101 }, { "epoch": 0.32286947526129917, "grad_norm": 0.27826881408691406, "learning_rate": 4.280788346515265e-05, "loss": 0.1635, "step": 18102 }, { "epoch": 0.32288731138301285, "grad_norm": 0.26027441024780273, "learning_rate": 4.2806790980890144e-05, "loss": 0.1905, "step": 18103 }, { "epoch": 0.3229051475047266, "grad_norm": 0.36218979954719543, "learning_rate": 4.280569842760236e-05, "loss": 0.1578, "step": 18104 }, { "epoch": 0.3229229836264403, "grad_norm": 0.29456502199172974, "learning_rate": 4.280460580529354e-05, "loss": 0.1556, "step": 18105 }, { "epoch": 0.322940819748154, "grad_norm": 0.33097246289253235, "learning_rate": 4.280351311396792e-05, "loss": 0.1665, "step": 18106 }, { "epoch": 0.32295865586986766, "grad_norm": 0.27826792001724243, "learning_rate": 4.2802420353629733e-05, "loss": 0.1446, "step": 18107 }, { "epoch": 0.32297649199158135, "grad_norm": 0.3184787333011627, "learning_rate": 4.280132752428322e-05, "loss": 0.1545, "step": 18108 }, { "epoch": 0.32299432811329504, "grad_norm": 0.3142584264278412, "learning_rate": 4.280023462593261e-05, "loss": 0.2329, "step": 18109 }, { "epoch": 0.3230121642350087, "grad_norm": 0.24121132493019104, "learning_rate": 4.2799141658582144e-05, "loss": 0.1601, "step": 18110 }, { "epoch": 0.3230300003567224, "grad_norm": 0.24124382436275482, "learning_rate": 4.279804862223606e-05, "loss": 0.1654, "step": 18111 }, { "epoch": 0.3230478364784361, "grad_norm": 0.24795156717300415, "learning_rate": 4.2796955516898584e-05, "loss": 0.1704, "step": 18112 }, { "epoch": 0.32306567260014984, "grad_norm": 0.20948849618434906, "learning_rate": 4.279586234257397e-05, "loss": 0.1289, "step": 18113 }, { "epoch": 0.32308350872186353, "grad_norm": 0.2522103190422058, "learning_rate": 4.279476909926644e-05, "loss": 0.1711, "step": 18114 }, { "epoch": 0.3231013448435772, "grad_norm": 0.2595446705818176, "learning_rate": 4.2793675786980244e-05, "loss": 0.1255, "step": 18115 }, { "epoch": 0.3231191809652909, "grad_norm": 0.2632864713668823, "learning_rate": 4.279258240571962e-05, "loss": 0.1573, "step": 18116 }, { "epoch": 0.3231370170870046, "grad_norm": 0.2851024866104126, "learning_rate": 4.279148895548879e-05, "loss": 0.1646, "step": 18117 }, { "epoch": 0.3231548532087183, "grad_norm": 0.24163654446601868, "learning_rate": 4.279039543629201e-05, "loss": 0.1779, "step": 18118 }, { "epoch": 0.323172689330432, "grad_norm": 0.28096872568130493, "learning_rate": 4.278930184813351e-05, "loss": 0.1786, "step": 18119 }, { "epoch": 0.32319052545214566, "grad_norm": 0.3556716740131378, "learning_rate": 4.278820819101753e-05, "loss": 0.1745, "step": 18120 }, { "epoch": 0.3232083615738594, "grad_norm": 0.3623379170894623, "learning_rate": 4.278711446494832e-05, "loss": 0.1723, "step": 18121 }, { "epoch": 0.3232261976955731, "grad_norm": 0.20797869563102722, "learning_rate": 4.27860206699301e-05, "loss": 0.1804, "step": 18122 }, { "epoch": 0.3232440338172868, "grad_norm": 0.35048994421958923, "learning_rate": 4.278492680596713e-05, "loss": 0.1948, "step": 18123 }, { "epoch": 0.32326186993900047, "grad_norm": 0.5778950452804565, "learning_rate": 4.2783832873063635e-05, "loss": 0.2105, "step": 18124 }, { "epoch": 0.32327970606071416, "grad_norm": 0.25398486852645874, "learning_rate": 4.278273887122386e-05, "loss": 0.2011, "step": 18125 }, { "epoch": 0.32329754218242784, "grad_norm": 0.26170650124549866, "learning_rate": 4.2781644800452055e-05, "loss": 0.1197, "step": 18126 }, { "epoch": 0.32331537830414153, "grad_norm": 0.26197847723960876, "learning_rate": 4.278055066075245e-05, "loss": 0.1496, "step": 18127 }, { "epoch": 0.3233332144258552, "grad_norm": 0.32092928886413574, "learning_rate": 4.2779456452129286e-05, "loss": 0.1655, "step": 18128 }, { "epoch": 0.32335105054756896, "grad_norm": 0.2509569227695465, "learning_rate": 4.2778362174586805e-05, "loss": 0.165, "step": 18129 }, { "epoch": 0.32336888666928265, "grad_norm": 0.2260201871395111, "learning_rate": 4.277726782812926e-05, "loss": 0.1573, "step": 18130 }, { "epoch": 0.32338672279099634, "grad_norm": 0.43924370408058167, "learning_rate": 4.277617341276088e-05, "loss": 0.2046, "step": 18131 }, { "epoch": 0.32340455891271, "grad_norm": 0.18697215616703033, "learning_rate": 4.2775078928485915e-05, "loss": 0.1201, "step": 18132 }, { "epoch": 0.3234223950344237, "grad_norm": 0.3008309304714203, "learning_rate": 4.277398437530861e-05, "loss": 0.2267, "step": 18133 }, { "epoch": 0.3234402311561374, "grad_norm": 0.2293943166732788, "learning_rate": 4.277288975323319e-05, "loss": 0.1186, "step": 18134 }, { "epoch": 0.3234580672778511, "grad_norm": 0.25892719626426697, "learning_rate": 4.277179506226392e-05, "loss": 0.1293, "step": 18135 }, { "epoch": 0.3234759033995648, "grad_norm": 0.18936260044574738, "learning_rate": 4.277070030240503e-05, "loss": 0.1765, "step": 18136 }, { "epoch": 0.32349373952127847, "grad_norm": 0.28534770011901855, "learning_rate": 4.276960547366077e-05, "loss": 0.1825, "step": 18137 }, { "epoch": 0.3235115756429922, "grad_norm": 0.265889436006546, "learning_rate": 4.2768510576035384e-05, "loss": 0.1744, "step": 18138 }, { "epoch": 0.3235294117647059, "grad_norm": 0.21549122035503387, "learning_rate": 4.27674156095331e-05, "loss": 0.1406, "step": 18139 }, { "epoch": 0.3235472478864196, "grad_norm": 0.21736140549182892, "learning_rate": 4.276632057415819e-05, "loss": 0.1534, "step": 18140 }, { "epoch": 0.3235650840081333, "grad_norm": 0.45892786979675293, "learning_rate": 4.276522546991488e-05, "loss": 0.1832, "step": 18141 }, { "epoch": 0.32358292012984696, "grad_norm": 0.2051456868648529, "learning_rate": 4.276413029680743e-05, "loss": 0.135, "step": 18142 }, { "epoch": 0.32360075625156065, "grad_norm": 0.39524784684181213, "learning_rate": 4.2763035054840063e-05, "loss": 0.1868, "step": 18143 }, { "epoch": 0.32361859237327434, "grad_norm": 0.2699621021747589, "learning_rate": 4.2761939744017046e-05, "loss": 0.1755, "step": 18144 }, { "epoch": 0.323636428494988, "grad_norm": 0.4233923554420471, "learning_rate": 4.276084436434261e-05, "loss": 0.1819, "step": 18145 }, { "epoch": 0.32365426461670177, "grad_norm": 0.23033460974693298, "learning_rate": 4.275974891582101e-05, "loss": 0.1716, "step": 18146 }, { "epoch": 0.32367210073841546, "grad_norm": 0.19312624633312225, "learning_rate": 4.275865339845648e-05, "loss": 0.1179, "step": 18147 }, { "epoch": 0.32368993686012915, "grad_norm": 0.22600796818733215, "learning_rate": 4.275755781225329e-05, "loss": 0.1694, "step": 18148 }, { "epoch": 0.32370777298184283, "grad_norm": 0.23917296528816223, "learning_rate": 4.2756462157215663e-05, "loss": 0.1751, "step": 18149 }, { "epoch": 0.3237256091035565, "grad_norm": 0.29062923789024353, "learning_rate": 4.275536643334786e-05, "loss": 0.1288, "step": 18150 }, { "epoch": 0.3237434452252702, "grad_norm": 0.25291189551353455, "learning_rate": 4.2754270640654125e-05, "loss": 0.1705, "step": 18151 }, { "epoch": 0.3237612813469839, "grad_norm": 0.30322882533073425, "learning_rate": 4.275317477913871e-05, "loss": 0.1068, "step": 18152 }, { "epoch": 0.3237791174686976, "grad_norm": 0.3245062828063965, "learning_rate": 4.275207884880584e-05, "loss": 0.1843, "step": 18153 }, { "epoch": 0.3237969535904113, "grad_norm": 0.20447225868701935, "learning_rate": 4.2750982849659795e-05, "loss": 0.1439, "step": 18154 }, { "epoch": 0.323814789712125, "grad_norm": 0.2562181055545807, "learning_rate": 4.27498867817048e-05, "loss": 0.1595, "step": 18155 }, { "epoch": 0.3238326258338387, "grad_norm": 0.31041577458381653, "learning_rate": 4.274879064494512e-05, "loss": 0.1778, "step": 18156 }, { "epoch": 0.3238504619555524, "grad_norm": 0.25057414174079895, "learning_rate": 4.2747694439385e-05, "loss": 0.1423, "step": 18157 }, { "epoch": 0.3238682980772661, "grad_norm": 0.2723214328289032, "learning_rate": 4.2746598165028686e-05, "loss": 0.1843, "step": 18158 }, { "epoch": 0.32388613419897977, "grad_norm": 0.29241931438446045, "learning_rate": 4.274550182188042e-05, "loss": 0.1871, "step": 18159 }, { "epoch": 0.32390397032069346, "grad_norm": 0.22207072377204895, "learning_rate": 4.274440540994447e-05, "loss": 0.1774, "step": 18160 }, { "epoch": 0.32392180644240715, "grad_norm": 0.29096317291259766, "learning_rate": 4.274330892922507e-05, "loss": 0.1869, "step": 18161 }, { "epoch": 0.32393964256412083, "grad_norm": 0.2469264566898346, "learning_rate": 4.2742212379726475e-05, "loss": 0.1335, "step": 18162 }, { "epoch": 0.3239574786858346, "grad_norm": 0.3547193109989166, "learning_rate": 4.2741115761452944e-05, "loss": 0.1721, "step": 18163 }, { "epoch": 0.32397531480754826, "grad_norm": 0.2278842329978943, "learning_rate": 4.274001907440871e-05, "loss": 0.1411, "step": 18164 }, { "epoch": 0.32399315092926195, "grad_norm": 0.2389397770166397, "learning_rate": 4.273892231859804e-05, "loss": 0.1863, "step": 18165 }, { "epoch": 0.32401098705097564, "grad_norm": 0.3268055319786072, "learning_rate": 4.273782549402519e-05, "loss": 0.1431, "step": 18166 }, { "epoch": 0.32402882317268933, "grad_norm": 0.24398286640644073, "learning_rate": 4.2736728600694384e-05, "loss": 0.1659, "step": 18167 }, { "epoch": 0.324046659294403, "grad_norm": 0.2801712453365326, "learning_rate": 4.27356316386099e-05, "loss": 0.1773, "step": 18168 }, { "epoch": 0.3240644954161167, "grad_norm": 0.22851204872131348, "learning_rate": 4.273453460777599e-05, "loss": 0.1555, "step": 18169 }, { "epoch": 0.3240823315378304, "grad_norm": 0.25567877292633057, "learning_rate": 4.2733437508196886e-05, "loss": 0.255, "step": 18170 }, { "epoch": 0.32410016765954414, "grad_norm": 0.19158430397510529, "learning_rate": 4.2732340339876856e-05, "loss": 0.142, "step": 18171 }, { "epoch": 0.3241180037812578, "grad_norm": 0.27221187949180603, "learning_rate": 4.2731243102820157e-05, "loss": 0.1619, "step": 18172 }, { "epoch": 0.3241358399029715, "grad_norm": 0.3070240318775177, "learning_rate": 4.273014579703103e-05, "loss": 0.1505, "step": 18173 }, { "epoch": 0.3241536760246852, "grad_norm": 0.30097055435180664, "learning_rate": 4.272904842251374e-05, "loss": 0.1904, "step": 18174 }, { "epoch": 0.3241715121463989, "grad_norm": 0.24105100333690643, "learning_rate": 4.272795097927252e-05, "loss": 0.1902, "step": 18175 }, { "epoch": 0.3241893482681126, "grad_norm": 0.7637127041816711, "learning_rate": 4.272685346731166e-05, "loss": 0.2229, "step": 18176 }, { "epoch": 0.32420718438982626, "grad_norm": 0.26520559191703796, "learning_rate": 4.272575588663538e-05, "loss": 0.1474, "step": 18177 }, { "epoch": 0.32422502051153995, "grad_norm": 0.3035420775413513, "learning_rate": 4.272465823724795e-05, "loss": 0.1418, "step": 18178 }, { "epoch": 0.32424285663325364, "grad_norm": 0.3116084635257721, "learning_rate": 4.2723560519153625e-05, "loss": 0.1664, "step": 18179 }, { "epoch": 0.3242606927549674, "grad_norm": 0.2435222715139389, "learning_rate": 4.272246273235665e-05, "loss": 0.1146, "step": 18180 }, { "epoch": 0.32427852887668107, "grad_norm": 0.2081175148487091, "learning_rate": 4.2721364876861296e-05, "loss": 0.1768, "step": 18181 }, { "epoch": 0.32429636499839476, "grad_norm": 0.3875510096549988, "learning_rate": 4.272026695267181e-05, "loss": 0.257, "step": 18182 }, { "epoch": 0.32431420112010845, "grad_norm": 0.3454514145851135, "learning_rate": 4.2719168959792455e-05, "loss": 0.1889, "step": 18183 }, { "epoch": 0.32433203724182214, "grad_norm": 0.2673277258872986, "learning_rate": 4.271807089822747e-05, "loss": 0.203, "step": 18184 }, { "epoch": 0.3243498733635358, "grad_norm": 0.2972363233566284, "learning_rate": 4.2716972767981125e-05, "loss": 0.2217, "step": 18185 }, { "epoch": 0.3243677094852495, "grad_norm": 0.34821441769599915, "learning_rate": 4.271587456905768e-05, "loss": 0.1567, "step": 18186 }, { "epoch": 0.3243855456069632, "grad_norm": 0.32106438279151917, "learning_rate": 4.271477630146138e-05, "loss": 0.2012, "step": 18187 }, { "epoch": 0.32440338172867694, "grad_norm": 0.3968912661075592, "learning_rate": 4.271367796519649e-05, "loss": 0.1703, "step": 18188 }, { "epoch": 0.32442121785039063, "grad_norm": 0.25592130422592163, "learning_rate": 4.271257956026727e-05, "loss": 0.1942, "step": 18189 }, { "epoch": 0.3244390539721043, "grad_norm": 0.36070919036865234, "learning_rate": 4.271148108667797e-05, "loss": 0.1688, "step": 18190 }, { "epoch": 0.324456890093818, "grad_norm": 0.20616500079631805, "learning_rate": 4.271038254443286e-05, "loss": 0.1653, "step": 18191 }, { "epoch": 0.3244747262155317, "grad_norm": 0.2631708085536957, "learning_rate": 4.270928393353618e-05, "loss": 0.1728, "step": 18192 }, { "epoch": 0.3244925623372454, "grad_norm": 0.24998736381530762, "learning_rate": 4.2708185253992205e-05, "loss": 0.1605, "step": 18193 }, { "epoch": 0.32451039845895907, "grad_norm": 0.24360230565071106, "learning_rate": 4.270708650580518e-05, "loss": 0.1537, "step": 18194 }, { "epoch": 0.32452823458067276, "grad_norm": 0.21705761551856995, "learning_rate": 4.2705987688979376e-05, "loss": 0.1402, "step": 18195 }, { "epoch": 0.32454607070238645, "grad_norm": 0.2770020365715027, "learning_rate": 4.270488880351905e-05, "loss": 0.1531, "step": 18196 }, { "epoch": 0.3245639068241002, "grad_norm": 0.2462465614080429, "learning_rate": 4.270378984942846e-05, "loss": 0.2068, "step": 18197 }, { "epoch": 0.3245817429458139, "grad_norm": 0.22040067613124847, "learning_rate": 4.270269082671187e-05, "loss": 0.1933, "step": 18198 }, { "epoch": 0.32459957906752757, "grad_norm": 0.2138727754354477, "learning_rate": 4.270159173537353e-05, "loss": 0.1719, "step": 18199 }, { "epoch": 0.32461741518924125, "grad_norm": 0.2432202845811844, "learning_rate": 4.2700492575417705e-05, "loss": 0.1705, "step": 18200 }, { "epoch": 0.32463525131095494, "grad_norm": 0.25168612599372864, "learning_rate": 4.269939334684866e-05, "loss": 0.1317, "step": 18201 }, { "epoch": 0.32465308743266863, "grad_norm": 0.21846838295459747, "learning_rate": 4.269829404967065e-05, "loss": 0.1772, "step": 18202 }, { "epoch": 0.3246709235543823, "grad_norm": 0.23773692548274994, "learning_rate": 4.269719468388794e-05, "loss": 0.1469, "step": 18203 }, { "epoch": 0.324688759676096, "grad_norm": 0.2535833716392517, "learning_rate": 4.2696095249504795e-05, "loss": 0.1527, "step": 18204 }, { "epoch": 0.32470659579780975, "grad_norm": 0.20184482634067535, "learning_rate": 4.269499574652548e-05, "loss": 0.1395, "step": 18205 }, { "epoch": 0.32472443191952344, "grad_norm": 0.2776055634021759, "learning_rate": 4.269389617495424e-05, "loss": 0.1265, "step": 18206 }, { "epoch": 0.3247422680412371, "grad_norm": 0.32144761085510254, "learning_rate": 4.269279653479534e-05, "loss": 0.1754, "step": 18207 }, { "epoch": 0.3247601041629508, "grad_norm": 0.2513435184955597, "learning_rate": 4.2691696826053065e-05, "loss": 0.1671, "step": 18208 }, { "epoch": 0.3247779402846645, "grad_norm": 0.3405788540840149, "learning_rate": 4.269059704873165e-05, "loss": 0.16, "step": 18209 }, { "epoch": 0.3247957764063782, "grad_norm": 0.2149336189031601, "learning_rate": 4.2689497202835385e-05, "loss": 0.1479, "step": 18210 }, { "epoch": 0.3248136125280919, "grad_norm": 0.29224058985710144, "learning_rate": 4.2688397288368506e-05, "loss": 0.1558, "step": 18211 }, { "epoch": 0.32483144864980557, "grad_norm": 0.2925548553466797, "learning_rate": 4.268729730533529e-05, "loss": 0.2015, "step": 18212 }, { "epoch": 0.32484928477151925, "grad_norm": 0.26221764087677, "learning_rate": 4.2686197253740005e-05, "loss": 0.1924, "step": 18213 }, { "epoch": 0.324867120893233, "grad_norm": 0.24553368985652924, "learning_rate": 4.2685097133586915e-05, "loss": 0.1996, "step": 18214 }, { "epoch": 0.3248849570149467, "grad_norm": 0.24499189853668213, "learning_rate": 4.268399694488028e-05, "loss": 0.1925, "step": 18215 }, { "epoch": 0.3249027931366604, "grad_norm": 0.2467213124036789, "learning_rate": 4.2682896687624355e-05, "loss": 0.1573, "step": 18216 }, { "epoch": 0.32492062925837406, "grad_norm": 0.4217036962509155, "learning_rate": 4.268179636182342e-05, "loss": 0.1552, "step": 18217 }, { "epoch": 0.32493846538008775, "grad_norm": 0.28804704546928406, "learning_rate": 4.268069596748174e-05, "loss": 0.2181, "step": 18218 }, { "epoch": 0.32495630150180144, "grad_norm": 0.268893837928772, "learning_rate": 4.267959550460357e-05, "loss": 0.1574, "step": 18219 }, { "epoch": 0.3249741376235151, "grad_norm": 0.20434194803237915, "learning_rate": 4.2678494973193184e-05, "loss": 0.0877, "step": 18220 }, { "epoch": 0.3249919737452288, "grad_norm": 0.22750738263130188, "learning_rate": 4.267739437325484e-05, "loss": 0.1663, "step": 18221 }, { "epoch": 0.32500980986694256, "grad_norm": 0.35077619552612305, "learning_rate": 4.2676293704792816e-05, "loss": 0.1855, "step": 18222 }, { "epoch": 0.32502764598865624, "grad_norm": 0.31882402300834656, "learning_rate": 4.2675192967811374e-05, "loss": 0.1793, "step": 18223 }, { "epoch": 0.32504548211036993, "grad_norm": 0.21044772863388062, "learning_rate": 4.267409216231477e-05, "loss": 0.173, "step": 18224 }, { "epoch": 0.3250633182320836, "grad_norm": 0.3163749873638153, "learning_rate": 4.267299128830729e-05, "loss": 0.1535, "step": 18225 }, { "epoch": 0.3250811543537973, "grad_norm": 0.2738315463066101, "learning_rate": 4.267189034579319e-05, "loss": 0.1564, "step": 18226 }, { "epoch": 0.325098990475511, "grad_norm": 0.29425275325775146, "learning_rate": 4.2670789334776736e-05, "loss": 0.1891, "step": 18227 }, { "epoch": 0.3251168265972247, "grad_norm": 0.31764763593673706, "learning_rate": 4.266968825526221e-05, "loss": 0.2118, "step": 18228 }, { "epoch": 0.32513466271893837, "grad_norm": 0.2998555600643158, "learning_rate": 4.266858710725386e-05, "loss": 0.1591, "step": 18229 }, { "epoch": 0.3251524988406521, "grad_norm": 0.3281696140766144, "learning_rate": 4.266748589075596e-05, "loss": 0.1816, "step": 18230 }, { "epoch": 0.3251703349623658, "grad_norm": 0.30124592781066895, "learning_rate": 4.266638460577278e-05, "loss": 0.1929, "step": 18231 }, { "epoch": 0.3251881710840795, "grad_norm": 0.27557745575904846, "learning_rate": 4.266528325230861e-05, "loss": 0.099, "step": 18232 }, { "epoch": 0.3252060072057932, "grad_norm": 0.2500791549682617, "learning_rate": 4.266418183036768e-05, "loss": 0.1781, "step": 18233 }, { "epoch": 0.32522384332750687, "grad_norm": 0.3229110836982727, "learning_rate": 4.2663080339954295e-05, "loss": 0.1729, "step": 18234 }, { "epoch": 0.32524167944922056, "grad_norm": 0.2780783772468567, "learning_rate": 4.2661978781072695e-05, "loss": 0.1751, "step": 18235 }, { "epoch": 0.32525951557093424, "grad_norm": 0.3489958643913269, "learning_rate": 4.2660877153727183e-05, "loss": 0.2497, "step": 18236 }, { "epoch": 0.32527735169264793, "grad_norm": 0.23013868927955627, "learning_rate": 4.2659775457921996e-05, "loss": 0.171, "step": 18237 }, { "epoch": 0.3252951878143616, "grad_norm": 0.30657973885536194, "learning_rate": 4.265867369366143e-05, "loss": 0.1353, "step": 18238 }, { "epoch": 0.32531302393607536, "grad_norm": 0.22024422883987427, "learning_rate": 4.265757186094974e-05, "loss": 0.157, "step": 18239 }, { "epoch": 0.32533086005778905, "grad_norm": 0.39033645391464233, "learning_rate": 4.26564699597912e-05, "loss": 0.2621, "step": 18240 }, { "epoch": 0.32534869617950274, "grad_norm": 0.20797429978847504, "learning_rate": 4.2655367990190095e-05, "loss": 0.1854, "step": 18241 }, { "epoch": 0.3253665323012164, "grad_norm": 0.21181592345237732, "learning_rate": 4.265426595215067e-05, "loss": 0.1897, "step": 18242 }, { "epoch": 0.3253843684229301, "grad_norm": 0.37151506543159485, "learning_rate": 4.265316384567723e-05, "loss": 0.2125, "step": 18243 }, { "epoch": 0.3254022045446438, "grad_norm": 0.23360081017017365, "learning_rate": 4.265206167077402e-05, "loss": 0.1733, "step": 18244 }, { "epoch": 0.3254200406663575, "grad_norm": 0.3226069211959839, "learning_rate": 4.265095942744533e-05, "loss": 0.1609, "step": 18245 }, { "epoch": 0.3254378767880712, "grad_norm": 0.33413782715797424, "learning_rate": 4.264985711569541e-05, "loss": 0.2181, "step": 18246 }, { "epoch": 0.3254557129097849, "grad_norm": 0.3707151710987091, "learning_rate": 4.264875473552856e-05, "loss": 0.2095, "step": 18247 }, { "epoch": 0.3254735490314986, "grad_norm": 0.26449453830718994, "learning_rate": 4.264765228694904e-05, "loss": 0.2147, "step": 18248 }, { "epoch": 0.3254913851532123, "grad_norm": 0.273408442735672, "learning_rate": 4.264654976996112e-05, "loss": 0.1858, "step": 18249 }, { "epoch": 0.325509221274926, "grad_norm": 0.1965082734823227, "learning_rate": 4.2645447184569074e-05, "loss": 0.1376, "step": 18250 }, { "epoch": 0.3255270573966397, "grad_norm": 0.27523037791252136, "learning_rate": 4.264434453077719e-05, "loss": 0.1785, "step": 18251 }, { "epoch": 0.32554489351835336, "grad_norm": 0.19104528427124023, "learning_rate": 4.264324180858973e-05, "loss": 0.1452, "step": 18252 }, { "epoch": 0.32556272964006705, "grad_norm": 0.1954038441181183, "learning_rate": 4.264213901801097e-05, "loss": 0.1323, "step": 18253 }, { "epoch": 0.32558056576178074, "grad_norm": 0.34060055017471313, "learning_rate": 4.264103615904519e-05, "loss": 0.1926, "step": 18254 }, { "epoch": 0.3255984018834944, "grad_norm": 0.24086469411849976, "learning_rate": 4.263993323169665e-05, "loss": 0.1734, "step": 18255 }, { "epoch": 0.32561623800520817, "grad_norm": 0.2462422251701355, "learning_rate": 4.263883023596965e-05, "loss": 0.1457, "step": 18256 }, { "epoch": 0.32563407412692186, "grad_norm": 0.2652982473373413, "learning_rate": 4.2637727171868434e-05, "loss": 0.1584, "step": 18257 }, { "epoch": 0.32565191024863555, "grad_norm": 0.3294752240180969, "learning_rate": 4.263662403939731e-05, "loss": 0.1908, "step": 18258 }, { "epoch": 0.32566974637034923, "grad_norm": 0.259242981672287, "learning_rate": 4.2635520838560534e-05, "loss": 0.137, "step": 18259 }, { "epoch": 0.3256875824920629, "grad_norm": 0.3338519036769867, "learning_rate": 4.2634417569362394e-05, "loss": 0.1787, "step": 18260 }, { "epoch": 0.3257054186137766, "grad_norm": 0.24972964823246002, "learning_rate": 4.2633314231807157e-05, "loss": 0.173, "step": 18261 }, { "epoch": 0.3257232547354903, "grad_norm": 0.2628253996372223, "learning_rate": 4.26322108258991e-05, "loss": 0.173, "step": 18262 }, { "epoch": 0.325741090857204, "grad_norm": 0.31130221486091614, "learning_rate": 4.263110735164251e-05, "loss": 0.1917, "step": 18263 }, { "epoch": 0.32575892697891773, "grad_norm": 0.25815194845199585, "learning_rate": 4.2630003809041654e-05, "loss": 0.1476, "step": 18264 }, { "epoch": 0.3257767631006314, "grad_norm": 0.2450726181268692, "learning_rate": 4.2628900198100814e-05, "loss": 0.1577, "step": 18265 }, { "epoch": 0.3257945992223451, "grad_norm": 0.26551350951194763, "learning_rate": 4.262779651882427e-05, "loss": 0.1579, "step": 18266 }, { "epoch": 0.3258124353440588, "grad_norm": 0.23024778068065643, "learning_rate": 4.2626692771216296e-05, "loss": 0.1336, "step": 18267 }, { "epoch": 0.3258302714657725, "grad_norm": 0.32547813653945923, "learning_rate": 4.262558895528117e-05, "loss": 0.1991, "step": 18268 }, { "epoch": 0.32584810758748617, "grad_norm": 0.20113670825958252, "learning_rate": 4.262448507102318e-05, "loss": 0.15, "step": 18269 }, { "epoch": 0.32586594370919986, "grad_norm": 0.2653674781322479, "learning_rate": 4.262338111844659e-05, "loss": 0.218, "step": 18270 }, { "epoch": 0.32588377983091354, "grad_norm": 0.20280010998249054, "learning_rate": 4.26222770975557e-05, "loss": 0.1307, "step": 18271 }, { "epoch": 0.3259016159526273, "grad_norm": 0.2655740976333618, "learning_rate": 4.262117300835477e-05, "loss": 0.199, "step": 18272 }, { "epoch": 0.325919452074341, "grad_norm": 0.25241681933403015, "learning_rate": 4.262006885084809e-05, "loss": 0.1918, "step": 18273 }, { "epoch": 0.32593728819605466, "grad_norm": 0.28184425830841064, "learning_rate": 4.261896462503994e-05, "loss": 0.1618, "step": 18274 }, { "epoch": 0.32595512431776835, "grad_norm": 0.2922350764274597, "learning_rate": 4.261786033093459e-05, "loss": 0.1428, "step": 18275 }, { "epoch": 0.32597296043948204, "grad_norm": 0.2177983820438385, "learning_rate": 4.261675596853633e-05, "loss": 0.1313, "step": 18276 }, { "epoch": 0.32599079656119573, "grad_norm": 0.26058638095855713, "learning_rate": 4.261565153784945e-05, "loss": 0.1764, "step": 18277 }, { "epoch": 0.3260086326829094, "grad_norm": 0.21988406777381897, "learning_rate": 4.261454703887821e-05, "loss": 0.1052, "step": 18278 }, { "epoch": 0.3260264688046231, "grad_norm": 0.2672010064125061, "learning_rate": 4.26134424716269e-05, "loss": 0.2196, "step": 18279 }, { "epoch": 0.3260443049263368, "grad_norm": 0.2395225465297699, "learning_rate": 4.261233783609981e-05, "loss": 0.1485, "step": 18280 }, { "epoch": 0.32606214104805054, "grad_norm": 0.2792045474052429, "learning_rate": 4.2611233132301206e-05, "loss": 0.1246, "step": 18281 }, { "epoch": 0.3260799771697642, "grad_norm": 0.29213061928749084, "learning_rate": 4.261012836023539e-05, "loss": 0.1554, "step": 18282 }, { "epoch": 0.3260978132914779, "grad_norm": 0.3063087463378906, "learning_rate": 4.2609023519906635e-05, "loss": 0.1446, "step": 18283 }, { "epoch": 0.3261156494131916, "grad_norm": 0.2854937016963959, "learning_rate": 4.260791861131922e-05, "loss": 0.1295, "step": 18284 }, { "epoch": 0.3261334855349053, "grad_norm": 0.22289389371871948, "learning_rate": 4.2606813634477424e-05, "loss": 0.1701, "step": 18285 }, { "epoch": 0.326151321656619, "grad_norm": 0.42393597960472107, "learning_rate": 4.260570858938554e-05, "loss": 0.2563, "step": 18286 }, { "epoch": 0.32616915777833266, "grad_norm": 0.26955538988113403, "learning_rate": 4.2604603476047855e-05, "loss": 0.216, "step": 18287 }, { "epoch": 0.32618699390004635, "grad_norm": 0.3739461302757263, "learning_rate": 4.260349829446864e-05, "loss": 0.155, "step": 18288 }, { "epoch": 0.3262048300217601, "grad_norm": 0.23768596351146698, "learning_rate": 4.260239304465219e-05, "loss": 0.1714, "step": 18289 }, { "epoch": 0.3262226661434738, "grad_norm": 0.2925799489021301, "learning_rate": 4.260128772660278e-05, "loss": 0.208, "step": 18290 }, { "epoch": 0.32624050226518747, "grad_norm": 0.35943129658699036, "learning_rate": 4.260018234032471e-05, "loss": 0.1596, "step": 18291 }, { "epoch": 0.32625833838690116, "grad_norm": 0.5245606899261475, "learning_rate": 4.259907688582224e-05, "loss": 0.1387, "step": 18292 }, { "epoch": 0.32627617450861485, "grad_norm": 0.22465407848358154, "learning_rate": 4.2597971363099675e-05, "loss": 0.1723, "step": 18293 }, { "epoch": 0.32629401063032853, "grad_norm": 0.2072666734457016, "learning_rate": 4.2596865772161296e-05, "loss": 0.1574, "step": 18294 }, { "epoch": 0.3263118467520422, "grad_norm": 0.26960331201553345, "learning_rate": 4.2595760113011394e-05, "loss": 0.1718, "step": 18295 }, { "epoch": 0.3263296828737559, "grad_norm": 0.3110816776752472, "learning_rate": 4.259465438565424e-05, "loss": 0.1904, "step": 18296 }, { "epoch": 0.3263475189954696, "grad_norm": 0.2772740423679352, "learning_rate": 4.259354859009413e-05, "loss": 0.1288, "step": 18297 }, { "epoch": 0.32636535511718334, "grad_norm": 0.27265042066574097, "learning_rate": 4.2592442726335344e-05, "loss": 0.1606, "step": 18298 }, { "epoch": 0.32638319123889703, "grad_norm": 0.19521182775497437, "learning_rate": 4.2591336794382184e-05, "loss": 0.1454, "step": 18299 }, { "epoch": 0.3264010273606107, "grad_norm": 0.42005911469459534, "learning_rate": 4.2590230794238915e-05, "loss": 0.2162, "step": 18300 }, { "epoch": 0.3264188634823244, "grad_norm": 0.28822359442710876, "learning_rate": 4.258912472590985e-05, "loss": 0.2034, "step": 18301 }, { "epoch": 0.3264366996040381, "grad_norm": 0.21401211619377136, "learning_rate": 4.258801858939926e-05, "loss": 0.1937, "step": 18302 }, { "epoch": 0.3264545357257518, "grad_norm": 0.26566237211227417, "learning_rate": 4.258691238471143e-05, "loss": 0.1961, "step": 18303 }, { "epoch": 0.32647237184746547, "grad_norm": 0.22111135721206665, "learning_rate": 4.258580611185066e-05, "loss": 0.1534, "step": 18304 }, { "epoch": 0.32649020796917916, "grad_norm": 0.2629345655441284, "learning_rate": 4.2584699770821215e-05, "loss": 0.1809, "step": 18305 }, { "epoch": 0.3265080440908929, "grad_norm": 0.2634925842285156, "learning_rate": 4.258359336162742e-05, "loss": 0.1408, "step": 18306 }, { "epoch": 0.3265258802126066, "grad_norm": 0.17384250462055206, "learning_rate": 4.2582486884273526e-05, "loss": 0.1372, "step": 18307 }, { "epoch": 0.3265437163343203, "grad_norm": 0.21742819249629974, "learning_rate": 4.258138033876385e-05, "loss": 0.1656, "step": 18308 }, { "epoch": 0.32656155245603397, "grad_norm": 0.2189481258392334, "learning_rate": 4.258027372510267e-05, "loss": 0.1468, "step": 18309 }, { "epoch": 0.32657938857774765, "grad_norm": 0.23182353377342224, "learning_rate": 4.257916704329428e-05, "loss": 0.1578, "step": 18310 }, { "epoch": 0.32659722469946134, "grad_norm": 0.1941414475440979, "learning_rate": 4.257806029334296e-05, "loss": 0.1141, "step": 18311 }, { "epoch": 0.32661506082117503, "grad_norm": 0.20877231657505035, "learning_rate": 4.257695347525301e-05, "loss": 0.164, "step": 18312 }, { "epoch": 0.3266328969428887, "grad_norm": 0.32865646481513977, "learning_rate": 4.257584658902872e-05, "loss": 0.1712, "step": 18313 }, { "epoch": 0.3266507330646024, "grad_norm": 0.26722660660743713, "learning_rate": 4.257473963467438e-05, "loss": 0.1873, "step": 18314 }, { "epoch": 0.32666856918631615, "grad_norm": 0.21177662909030914, "learning_rate": 4.257363261219427e-05, "loss": 0.1351, "step": 18315 }, { "epoch": 0.32668640530802984, "grad_norm": 0.22980764508247375, "learning_rate": 4.25725255215927e-05, "loss": 0.1449, "step": 18316 }, { "epoch": 0.3267042414297435, "grad_norm": 0.2627589702606201, "learning_rate": 4.257141836287395e-05, "loss": 0.1754, "step": 18317 }, { "epoch": 0.3267220775514572, "grad_norm": 0.20913569629192352, "learning_rate": 4.2570311136042305e-05, "loss": 0.1482, "step": 18318 }, { "epoch": 0.3267399136731709, "grad_norm": 0.24080607295036316, "learning_rate": 4.256920384110208e-05, "loss": 0.1406, "step": 18319 }, { "epoch": 0.3267577497948846, "grad_norm": 0.24826756119728088, "learning_rate": 4.256809647805754e-05, "loss": 0.1497, "step": 18320 }, { "epoch": 0.3267755859165983, "grad_norm": 0.19989757239818573, "learning_rate": 4.2566989046913e-05, "loss": 0.1378, "step": 18321 }, { "epoch": 0.32679342203831196, "grad_norm": 0.20871931314468384, "learning_rate": 4.256588154767273e-05, "loss": 0.168, "step": 18322 }, { "epoch": 0.3268112581600257, "grad_norm": 0.23451226949691772, "learning_rate": 4.256477398034104e-05, "loss": 0.1445, "step": 18323 }, { "epoch": 0.3268290942817394, "grad_norm": 0.25865402817726135, "learning_rate": 4.2563666344922225e-05, "loss": 0.1688, "step": 18324 }, { "epoch": 0.3268469304034531, "grad_norm": 0.2108352780342102, "learning_rate": 4.2562558641420575e-05, "loss": 0.17, "step": 18325 }, { "epoch": 0.32686476652516677, "grad_norm": 0.28229469060897827, "learning_rate": 4.256145086984038e-05, "loss": 0.191, "step": 18326 }, { "epoch": 0.32688260264688046, "grad_norm": 0.2260260432958603, "learning_rate": 4.2560343030185934e-05, "loss": 0.1174, "step": 18327 }, { "epoch": 0.32690043876859415, "grad_norm": 0.23733361065387726, "learning_rate": 4.255923512246153e-05, "loss": 0.169, "step": 18328 }, { "epoch": 0.32691827489030784, "grad_norm": 0.36725863814353943, "learning_rate": 4.255812714667147e-05, "loss": 0.1882, "step": 18329 }, { "epoch": 0.3269361110120215, "grad_norm": 0.2420433759689331, "learning_rate": 4.255701910282005e-05, "loss": 0.1788, "step": 18330 }, { "epoch": 0.32695394713373527, "grad_norm": 0.21113458275794983, "learning_rate": 4.255591099091155e-05, "loss": 0.1315, "step": 18331 }, { "epoch": 0.32697178325544896, "grad_norm": 0.22553254663944244, "learning_rate": 4.255480281095028e-05, "loss": 0.1532, "step": 18332 }, { "epoch": 0.32698961937716264, "grad_norm": 0.24502426385879517, "learning_rate": 4.2553694562940525e-05, "loss": 0.1809, "step": 18333 }, { "epoch": 0.32700745549887633, "grad_norm": 0.35403308272361755, "learning_rate": 4.2552586246886595e-05, "loss": 0.1972, "step": 18334 }, { "epoch": 0.32702529162059, "grad_norm": 0.2816767692565918, "learning_rate": 4.255147786279277e-05, "loss": 0.1925, "step": 18335 }, { "epoch": 0.3270431277423037, "grad_norm": 0.25400564074516296, "learning_rate": 4.2550369410663366e-05, "loss": 0.1706, "step": 18336 }, { "epoch": 0.3270609638640174, "grad_norm": 0.19140376150608063, "learning_rate": 4.2549260890502664e-05, "loss": 0.1436, "step": 18337 }, { "epoch": 0.3270787999857311, "grad_norm": 0.30130836367607117, "learning_rate": 4.254815230231496e-05, "loss": 0.2071, "step": 18338 }, { "epoch": 0.32709663610744477, "grad_norm": 0.2753887176513672, "learning_rate": 4.254704364610456e-05, "loss": 0.0837, "step": 18339 }, { "epoch": 0.3271144722291585, "grad_norm": 0.252464234828949, "learning_rate": 4.2545934921875764e-05, "loss": 0.1421, "step": 18340 }, { "epoch": 0.3271323083508722, "grad_norm": 0.37632113695144653, "learning_rate": 4.2544826129632854e-05, "loss": 0.2364, "step": 18341 }, { "epoch": 0.3271501444725859, "grad_norm": 0.24468207359313965, "learning_rate": 4.2543717269380144e-05, "loss": 0.1391, "step": 18342 }, { "epoch": 0.3271679805942996, "grad_norm": 0.2854864299297333, "learning_rate": 4.254260834112192e-05, "loss": 0.1829, "step": 18343 }, { "epoch": 0.32718581671601327, "grad_norm": 0.23584985733032227, "learning_rate": 4.25414993448625e-05, "loss": 0.1593, "step": 18344 }, { "epoch": 0.32720365283772695, "grad_norm": 0.3183591961860657, "learning_rate": 4.254039028060616e-05, "loss": 0.1819, "step": 18345 }, { "epoch": 0.32722148895944064, "grad_norm": 0.22881759703159332, "learning_rate": 4.253928114835721e-05, "loss": 0.17, "step": 18346 }, { "epoch": 0.32723932508115433, "grad_norm": 0.3099662959575653, "learning_rate": 4.253817194811995e-05, "loss": 0.1401, "step": 18347 }, { "epoch": 0.3272571612028681, "grad_norm": 0.19419437646865845, "learning_rate": 4.2537062679898675e-05, "loss": 0.1621, "step": 18348 }, { "epoch": 0.32727499732458176, "grad_norm": 0.2671164274215698, "learning_rate": 4.25359533436977e-05, "loss": 0.1673, "step": 18349 }, { "epoch": 0.32729283344629545, "grad_norm": 0.31209197640419006, "learning_rate": 4.25348439395213e-05, "loss": 0.1858, "step": 18350 }, { "epoch": 0.32731066956800914, "grad_norm": 0.21778671443462372, "learning_rate": 4.2533734467373795e-05, "loss": 0.1357, "step": 18351 }, { "epoch": 0.3273285056897228, "grad_norm": 0.23879684507846832, "learning_rate": 4.2532624927259475e-05, "loss": 0.1764, "step": 18352 }, { "epoch": 0.3273463418114365, "grad_norm": 0.30174243450164795, "learning_rate": 4.253151531918265e-05, "loss": 0.1675, "step": 18353 }, { "epoch": 0.3273641779331502, "grad_norm": 0.24705903232097626, "learning_rate": 4.2530405643147606e-05, "loss": 0.1369, "step": 18354 }, { "epoch": 0.3273820140548639, "grad_norm": 0.25176241993904114, "learning_rate": 4.252929589915867e-05, "loss": 0.1624, "step": 18355 }, { "epoch": 0.3273998501765776, "grad_norm": 0.2652989327907562, "learning_rate": 4.252818608722012e-05, "loss": 0.1542, "step": 18356 }, { "epoch": 0.3274176862982913, "grad_norm": 0.2860439419746399, "learning_rate": 4.2527076207336267e-05, "loss": 0.1492, "step": 18357 }, { "epoch": 0.327435522420005, "grad_norm": 0.28419235348701477, "learning_rate": 4.252596625951141e-05, "loss": 0.1847, "step": 18358 }, { "epoch": 0.3274533585417187, "grad_norm": 0.40242666006088257, "learning_rate": 4.252485624374986e-05, "loss": 0.1407, "step": 18359 }, { "epoch": 0.3274711946634324, "grad_norm": 0.27525705099105835, "learning_rate": 4.2523746160055915e-05, "loss": 0.1656, "step": 18360 }, { "epoch": 0.3274890307851461, "grad_norm": 0.287389874458313, "learning_rate": 4.252263600843387e-05, "loss": 0.225, "step": 18361 }, { "epoch": 0.32750686690685976, "grad_norm": 0.30568593740463257, "learning_rate": 4.252152578888804e-05, "loss": 0.1625, "step": 18362 }, { "epoch": 0.32752470302857345, "grad_norm": 0.32873180508613586, "learning_rate": 4.252041550142273e-05, "loss": 0.1651, "step": 18363 }, { "epoch": 0.32754253915028714, "grad_norm": 0.2423868477344513, "learning_rate": 4.2519305146042234e-05, "loss": 0.189, "step": 18364 }, { "epoch": 0.3275603752720009, "grad_norm": 0.2557133436203003, "learning_rate": 4.251819472275086e-05, "loss": 0.1258, "step": 18365 }, { "epoch": 0.32757821139371457, "grad_norm": 0.32561326026916504, "learning_rate": 4.2517084231552905e-05, "loss": 0.1876, "step": 18366 }, { "epoch": 0.32759604751542826, "grad_norm": 0.3135305941104889, "learning_rate": 4.251597367245269e-05, "loss": 0.1807, "step": 18367 }, { "epoch": 0.32761388363714194, "grad_norm": 0.240975022315979, "learning_rate": 4.251486304545451e-05, "loss": 0.1449, "step": 18368 }, { "epoch": 0.32763171975885563, "grad_norm": 0.22965987026691437, "learning_rate": 4.251375235056267e-05, "loss": 0.1196, "step": 18369 }, { "epoch": 0.3276495558805693, "grad_norm": 0.26841914653778076, "learning_rate": 4.251264158778148e-05, "loss": 0.1807, "step": 18370 }, { "epoch": 0.327667392002283, "grad_norm": 0.23701927065849304, "learning_rate": 4.2511530757115246e-05, "loss": 0.1543, "step": 18371 }, { "epoch": 0.3276852281239967, "grad_norm": 0.25472572445869446, "learning_rate": 4.251041985856826e-05, "loss": 0.116, "step": 18372 }, { "epoch": 0.32770306424571044, "grad_norm": 0.2580103576183319, "learning_rate": 4.250930889214484e-05, "loss": 0.1326, "step": 18373 }, { "epoch": 0.32772090036742413, "grad_norm": 0.24860207736492157, "learning_rate": 4.25081978578493e-05, "loss": 0.197, "step": 18374 }, { "epoch": 0.3277387364891378, "grad_norm": 0.3765740394592285, "learning_rate": 4.250708675568593e-05, "loss": 0.2144, "step": 18375 }, { "epoch": 0.3277565726108515, "grad_norm": 0.28787291049957275, "learning_rate": 4.2505975585659045e-05, "loss": 0.1522, "step": 18376 }, { "epoch": 0.3277744087325652, "grad_norm": 0.291194349527359, "learning_rate": 4.250486434777296e-05, "loss": 0.1529, "step": 18377 }, { "epoch": 0.3277922448542789, "grad_norm": 0.239347442984581, "learning_rate": 4.2503753042031966e-05, "loss": 0.1521, "step": 18378 }, { "epoch": 0.32781008097599257, "grad_norm": 0.20403681695461273, "learning_rate": 4.250264166844039e-05, "loss": 0.144, "step": 18379 }, { "epoch": 0.32782791709770626, "grad_norm": 0.36052584648132324, "learning_rate": 4.2501530227002514e-05, "loss": 0.1433, "step": 18380 }, { "epoch": 0.32784575321941994, "grad_norm": 0.33250558376312256, "learning_rate": 4.250041871772268e-05, "loss": 0.1756, "step": 18381 }, { "epoch": 0.3278635893411337, "grad_norm": 0.2789793312549591, "learning_rate": 4.249930714060517e-05, "loss": 0.1847, "step": 18382 }, { "epoch": 0.3278814254628474, "grad_norm": 0.24241597950458527, "learning_rate": 4.24981954956543e-05, "loss": 0.1454, "step": 18383 }, { "epoch": 0.32789926158456106, "grad_norm": 0.29094740748405457, "learning_rate": 4.249708378287438e-05, "loss": 0.1645, "step": 18384 }, { "epoch": 0.32791709770627475, "grad_norm": 0.31322869658470154, "learning_rate": 4.249597200226972e-05, "loss": 0.1764, "step": 18385 }, { "epoch": 0.32793493382798844, "grad_norm": 0.3057456910610199, "learning_rate": 4.249486015384463e-05, "loss": 0.1757, "step": 18386 }, { "epoch": 0.3279527699497021, "grad_norm": 0.24807824194431305, "learning_rate": 4.249374823760343e-05, "loss": 0.1491, "step": 18387 }, { "epoch": 0.3279706060714158, "grad_norm": 0.3567791283130646, "learning_rate": 4.249263625355041e-05, "loss": 0.2344, "step": 18388 }, { "epoch": 0.3279884421931295, "grad_norm": 0.2898887097835541, "learning_rate": 4.249152420168988e-05, "loss": 0.1698, "step": 18389 }, { "epoch": 0.32800627831484325, "grad_norm": 0.2851754128932953, "learning_rate": 4.249041208202618e-05, "loss": 0.1549, "step": 18390 }, { "epoch": 0.32802411443655694, "grad_norm": 0.3616372048854828, "learning_rate": 4.248929989456359e-05, "loss": 0.198, "step": 18391 }, { "epoch": 0.3280419505582706, "grad_norm": 0.4312104284763336, "learning_rate": 4.248818763930644e-05, "loss": 0.1711, "step": 18392 }, { "epoch": 0.3280597866799843, "grad_norm": 0.30319151282310486, "learning_rate": 4.248707531625903e-05, "loss": 0.157, "step": 18393 }, { "epoch": 0.328077622801698, "grad_norm": 0.27680492401123047, "learning_rate": 4.248596292542567e-05, "loss": 0.1607, "step": 18394 }, { "epoch": 0.3280954589234117, "grad_norm": 0.25055864453315735, "learning_rate": 4.2484850466810686e-05, "loss": 0.1369, "step": 18395 }, { "epoch": 0.3281132950451254, "grad_norm": 0.2178165316581726, "learning_rate": 4.2483737940418386e-05, "loss": 0.18, "step": 18396 }, { "epoch": 0.32813113116683906, "grad_norm": 0.24201984703540802, "learning_rate": 4.2482625346253076e-05, "loss": 0.1453, "step": 18397 }, { "epoch": 0.32814896728855275, "grad_norm": 0.23836806416511536, "learning_rate": 4.2481512684319066e-05, "loss": 0.1803, "step": 18398 }, { "epoch": 0.3281668034102665, "grad_norm": 0.3131429851055145, "learning_rate": 4.248039995462068e-05, "loss": 0.1721, "step": 18399 }, { "epoch": 0.3281846395319802, "grad_norm": 0.21951258182525635, "learning_rate": 4.247928715716223e-05, "loss": 0.1619, "step": 18400 }, { "epoch": 0.32820247565369387, "grad_norm": 0.29232263565063477, "learning_rate": 4.2478174291948016e-05, "loss": 0.182, "step": 18401 }, { "epoch": 0.32822031177540756, "grad_norm": 0.3089362680912018, "learning_rate": 4.2477061358982375e-05, "loss": 0.2145, "step": 18402 }, { "epoch": 0.32823814789712125, "grad_norm": 0.46059197187423706, "learning_rate": 4.247594835826959e-05, "loss": 0.1601, "step": 18403 }, { "epoch": 0.32825598401883493, "grad_norm": 0.29953014850616455, "learning_rate": 4.247483528981401e-05, "loss": 0.1692, "step": 18404 }, { "epoch": 0.3282738201405486, "grad_norm": 0.27145713567733765, "learning_rate": 4.247372215361992e-05, "loss": 0.1869, "step": 18405 }, { "epoch": 0.3282916562622623, "grad_norm": 0.38528114557266235, "learning_rate": 4.247260894969166e-05, "loss": 0.1693, "step": 18406 }, { "epoch": 0.32830949238397605, "grad_norm": 0.2525404095649719, "learning_rate": 4.2471495678033524e-05, "loss": 0.1349, "step": 18407 }, { "epoch": 0.32832732850568974, "grad_norm": 0.27676740288734436, "learning_rate": 4.247038233864984e-05, "loss": 0.1418, "step": 18408 }, { "epoch": 0.32834516462740343, "grad_norm": 0.2454938292503357, "learning_rate": 4.246926893154492e-05, "loss": 0.16, "step": 18409 }, { "epoch": 0.3283630007491171, "grad_norm": 0.2871152460575104, "learning_rate": 4.246815545672308e-05, "loss": 0.177, "step": 18410 }, { "epoch": 0.3283808368708308, "grad_norm": 0.29965710639953613, "learning_rate": 4.246704191418863e-05, "loss": 0.1702, "step": 18411 }, { "epoch": 0.3283986729925445, "grad_norm": 0.225668266415596, "learning_rate": 4.24659283039459e-05, "loss": 0.1379, "step": 18412 }, { "epoch": 0.3284165091142582, "grad_norm": 0.30417150259017944, "learning_rate": 4.24648146259992e-05, "loss": 0.1898, "step": 18413 }, { "epoch": 0.32843434523597187, "grad_norm": 0.2466631531715393, "learning_rate": 4.246370088035284e-05, "loss": 0.1836, "step": 18414 }, { "epoch": 0.32845218135768556, "grad_norm": 0.21825666725635529, "learning_rate": 4.246258706701114e-05, "loss": 0.1564, "step": 18415 }, { "epoch": 0.3284700174793993, "grad_norm": 0.2429237961769104, "learning_rate": 4.246147318597844e-05, "loss": 0.176, "step": 18416 }, { "epoch": 0.328487853601113, "grad_norm": 0.3928097188472748, "learning_rate": 4.2460359237259016e-05, "loss": 0.207, "step": 18417 }, { "epoch": 0.3285056897228267, "grad_norm": 0.32049092650413513, "learning_rate": 4.2459245220857225e-05, "loss": 0.2578, "step": 18418 }, { "epoch": 0.32852352584454036, "grad_norm": 0.2793530225753784, "learning_rate": 4.245813113677736e-05, "loss": 0.1993, "step": 18419 }, { "epoch": 0.32854136196625405, "grad_norm": 0.2271171659231186, "learning_rate": 4.2457016985023756e-05, "loss": 0.1693, "step": 18420 }, { "epoch": 0.32855919808796774, "grad_norm": 0.2803175151348114, "learning_rate": 4.2455902765600724e-05, "loss": 0.2041, "step": 18421 }, { "epoch": 0.32857703420968143, "grad_norm": 0.29815348982810974, "learning_rate": 4.245478847851258e-05, "loss": 0.2603, "step": 18422 }, { "epoch": 0.3285948703313951, "grad_norm": 0.27385538816452026, "learning_rate": 4.2453674123763655e-05, "loss": 0.1344, "step": 18423 }, { "epoch": 0.32861270645310886, "grad_norm": 0.2498927265405655, "learning_rate": 4.245255970135825e-05, "loss": 0.1503, "step": 18424 }, { "epoch": 0.32863054257482255, "grad_norm": 0.2583667039871216, "learning_rate": 4.24514452113007e-05, "loss": 0.1918, "step": 18425 }, { "epoch": 0.32864837869653624, "grad_norm": 0.2544820308685303, "learning_rate": 4.245033065359532e-05, "loss": 0.1437, "step": 18426 }, { "epoch": 0.3286662148182499, "grad_norm": 0.24424763023853302, "learning_rate": 4.244921602824643e-05, "loss": 0.1826, "step": 18427 }, { "epoch": 0.3286840509399636, "grad_norm": 0.335256963968277, "learning_rate": 4.244810133525836e-05, "loss": 0.1749, "step": 18428 }, { "epoch": 0.3287018870616773, "grad_norm": 0.2927291989326477, "learning_rate": 4.2446986574635415e-05, "loss": 0.1631, "step": 18429 }, { "epoch": 0.328719723183391, "grad_norm": 0.37019386887550354, "learning_rate": 4.2445871746381927e-05, "loss": 0.1941, "step": 18430 }, { "epoch": 0.3287375593051047, "grad_norm": 0.3443300724029541, "learning_rate": 4.244475685050221e-05, "loss": 0.1655, "step": 18431 }, { "epoch": 0.3287553954268184, "grad_norm": 0.3643217384815216, "learning_rate": 4.24436418870006e-05, "loss": 0.1953, "step": 18432 }, { "epoch": 0.3287732315485321, "grad_norm": 0.24501550197601318, "learning_rate": 4.24425268558814e-05, "loss": 0.1802, "step": 18433 }, { "epoch": 0.3287910676702458, "grad_norm": 0.292643666267395, "learning_rate": 4.244141175714894e-05, "loss": 0.1916, "step": 18434 }, { "epoch": 0.3288089037919595, "grad_norm": 0.2928709089756012, "learning_rate": 4.244029659080755e-05, "loss": 0.1525, "step": 18435 }, { "epoch": 0.32882673991367317, "grad_norm": 0.26694509387016296, "learning_rate": 4.243918135686155e-05, "loss": 0.1618, "step": 18436 }, { "epoch": 0.32884457603538686, "grad_norm": 0.239248588681221, "learning_rate": 4.243806605531525e-05, "loss": 0.1874, "step": 18437 }, { "epoch": 0.32886241215710055, "grad_norm": 0.23508448898792267, "learning_rate": 4.243695068617299e-05, "loss": 0.1949, "step": 18438 }, { "epoch": 0.32888024827881424, "grad_norm": 0.25928160548210144, "learning_rate": 4.243583524943908e-05, "loss": 0.1392, "step": 18439 }, { "epoch": 0.3288980844005279, "grad_norm": 0.39715102314949036, "learning_rate": 4.243471974511786e-05, "loss": 0.1657, "step": 18440 }, { "epoch": 0.32891592052224167, "grad_norm": 0.2791080176830292, "learning_rate": 4.2433604173213634e-05, "loss": 0.2072, "step": 18441 }, { "epoch": 0.32893375664395536, "grad_norm": 0.20456264913082123, "learning_rate": 4.243248853373075e-05, "loss": 0.1172, "step": 18442 }, { "epoch": 0.32895159276566904, "grad_norm": 0.24758672714233398, "learning_rate": 4.243137282667351e-05, "loss": 0.1514, "step": 18443 }, { "epoch": 0.32896942888738273, "grad_norm": 0.3261067569255829, "learning_rate": 4.243025705204625e-05, "loss": 0.1031, "step": 18444 }, { "epoch": 0.3289872650090964, "grad_norm": 0.2676059901714325, "learning_rate": 4.2429141209853296e-05, "loss": 0.1389, "step": 18445 }, { "epoch": 0.3290051011308101, "grad_norm": 0.28574347496032715, "learning_rate": 4.2428025300098965e-05, "loss": 0.2363, "step": 18446 }, { "epoch": 0.3290229372525238, "grad_norm": 0.3670462369918823, "learning_rate": 4.242690932278759e-05, "loss": 0.1963, "step": 18447 }, { "epoch": 0.3290407733742375, "grad_norm": 0.26557764410972595, "learning_rate": 4.24257932779235e-05, "loss": 0.1884, "step": 18448 }, { "epoch": 0.3290586094959512, "grad_norm": 0.17650456726551056, "learning_rate": 4.2424677165511015e-05, "loss": 0.143, "step": 18449 }, { "epoch": 0.3290764456176649, "grad_norm": 0.3135792016983032, "learning_rate": 4.242356098555446e-05, "loss": 0.1892, "step": 18450 }, { "epoch": 0.3290942817393786, "grad_norm": 0.2925012409687042, "learning_rate": 4.242244473805816e-05, "loss": 0.1663, "step": 18451 }, { "epoch": 0.3291121178610923, "grad_norm": 0.3600432872772217, "learning_rate": 4.2421328423026465e-05, "loss": 0.2134, "step": 18452 }, { "epoch": 0.329129953982806, "grad_norm": 0.23821434378623962, "learning_rate": 4.242021204046367e-05, "loss": 0.1479, "step": 18453 }, { "epoch": 0.32914779010451967, "grad_norm": 0.2633552551269531, "learning_rate": 4.241909559037411e-05, "loss": 0.2214, "step": 18454 }, { "epoch": 0.32916562622623335, "grad_norm": 0.3139183521270752, "learning_rate": 4.241797907276214e-05, "loss": 0.1884, "step": 18455 }, { "epoch": 0.32918346234794704, "grad_norm": 0.2700309455394745, "learning_rate": 4.241686248763205e-05, "loss": 0.209, "step": 18456 }, { "epoch": 0.32920129846966073, "grad_norm": 0.3206835091114044, "learning_rate": 4.241574583498819e-05, "loss": 0.1928, "step": 18457 }, { "epoch": 0.3292191345913745, "grad_norm": 0.26301684975624084, "learning_rate": 4.2414629114834884e-05, "loss": 0.1378, "step": 18458 }, { "epoch": 0.32923697071308816, "grad_norm": 0.2872345447540283, "learning_rate": 4.241351232717647e-05, "loss": 0.1627, "step": 18459 }, { "epoch": 0.32925480683480185, "grad_norm": 0.40082958340644836, "learning_rate": 4.241239547201725e-05, "loss": 0.1309, "step": 18460 }, { "epoch": 0.32927264295651554, "grad_norm": 0.32533612847328186, "learning_rate": 4.241127854936158e-05, "loss": 0.1704, "step": 18461 }, { "epoch": 0.3292904790782292, "grad_norm": 0.29047784209251404, "learning_rate": 4.241016155921378e-05, "loss": 0.2423, "step": 18462 }, { "epoch": 0.3293083151999429, "grad_norm": 0.28228312730789185, "learning_rate": 4.240904450157818e-05, "loss": 0.1639, "step": 18463 }, { "epoch": 0.3293261513216566, "grad_norm": 0.323364794254303, "learning_rate": 4.240792737645911e-05, "loss": 0.1957, "step": 18464 }, { "epoch": 0.3293439874433703, "grad_norm": 0.2613949775695801, "learning_rate": 4.2406810183860904e-05, "loss": 0.1732, "step": 18465 }, { "epoch": 0.32936182356508403, "grad_norm": 0.2527284324169159, "learning_rate": 4.2405692923787886e-05, "loss": 0.1751, "step": 18466 }, { "epoch": 0.3293796596867977, "grad_norm": 0.22423000633716583, "learning_rate": 4.24045755962444e-05, "loss": 0.1856, "step": 18467 }, { "epoch": 0.3293974958085114, "grad_norm": 0.23415596783161163, "learning_rate": 4.240345820123476e-05, "loss": 0.1824, "step": 18468 }, { "epoch": 0.3294153319302251, "grad_norm": 0.23365390300750732, "learning_rate": 4.24023407387633e-05, "loss": 0.1735, "step": 18469 }, { "epoch": 0.3294331680519388, "grad_norm": 0.4614911675453186, "learning_rate": 4.240122320883436e-05, "loss": 0.1293, "step": 18470 }, { "epoch": 0.3294510041736525, "grad_norm": 0.2389400750398636, "learning_rate": 4.2400105611452276e-05, "loss": 0.1366, "step": 18471 }, { "epoch": 0.32946884029536616, "grad_norm": 0.26551803946495056, "learning_rate": 4.239898794662137e-05, "loss": 0.1606, "step": 18472 }, { "epoch": 0.32948667641707985, "grad_norm": 0.21386370062828064, "learning_rate": 4.239787021434597e-05, "loss": 0.1431, "step": 18473 }, { "epoch": 0.3295045125387936, "grad_norm": 0.3108559548854828, "learning_rate": 4.239675241463042e-05, "loss": 0.186, "step": 18474 }, { "epoch": 0.3295223486605073, "grad_norm": 0.3738678991794586, "learning_rate": 4.239563454747906e-05, "loss": 0.1575, "step": 18475 }, { "epoch": 0.32954018478222097, "grad_norm": 0.2810070514678955, "learning_rate": 4.2394516612896194e-05, "loss": 0.1711, "step": 18476 }, { "epoch": 0.32955802090393466, "grad_norm": 0.30859261751174927, "learning_rate": 4.239339861088618e-05, "loss": 0.1779, "step": 18477 }, { "epoch": 0.32957585702564834, "grad_norm": 0.4246044158935547, "learning_rate": 4.239228054145335e-05, "loss": 0.185, "step": 18478 }, { "epoch": 0.32959369314736203, "grad_norm": 0.2715553939342499, "learning_rate": 4.2391162404602036e-05, "loss": 0.1603, "step": 18479 }, { "epoch": 0.3296115292690757, "grad_norm": 0.34074312448501587, "learning_rate": 4.239004420033656e-05, "loss": 0.156, "step": 18480 }, { "epoch": 0.3296293653907894, "grad_norm": 0.2593488395214081, "learning_rate": 4.2388925928661274e-05, "loss": 0.1727, "step": 18481 }, { "epoch": 0.3296472015125031, "grad_norm": 0.4301450252532959, "learning_rate": 4.2387807589580495e-05, "loss": 0.2347, "step": 18482 }, { "epoch": 0.32966503763421684, "grad_norm": 0.27101439237594604, "learning_rate": 4.238668918309858e-05, "loss": 0.1846, "step": 18483 }, { "epoch": 0.32968287375593053, "grad_norm": 0.2908579111099243, "learning_rate": 4.238557070921985e-05, "loss": 0.1393, "step": 18484 }, { "epoch": 0.3297007098776442, "grad_norm": 0.3351699113845825, "learning_rate": 4.238445216794864e-05, "loss": 0.1807, "step": 18485 }, { "epoch": 0.3297185459993579, "grad_norm": 0.2559354901313782, "learning_rate": 4.238333355928929e-05, "loss": 0.1361, "step": 18486 }, { "epoch": 0.3297363821210716, "grad_norm": 0.24197784066200256, "learning_rate": 4.2382214883246134e-05, "loss": 0.1406, "step": 18487 }, { "epoch": 0.3297542182427853, "grad_norm": 0.2392469346523285, "learning_rate": 4.238109613982352e-05, "loss": 0.1364, "step": 18488 }, { "epoch": 0.32977205436449897, "grad_norm": 0.4118051826953888, "learning_rate": 4.2379977329025755e-05, "loss": 0.1935, "step": 18489 }, { "epoch": 0.32978989048621266, "grad_norm": 0.2652823030948639, "learning_rate": 4.2378858450857207e-05, "loss": 0.174, "step": 18490 }, { "epoch": 0.3298077266079264, "grad_norm": 0.27608683705329895, "learning_rate": 4.23777395053222e-05, "loss": 0.1676, "step": 18491 }, { "epoch": 0.3298255627296401, "grad_norm": 0.22772575914859772, "learning_rate": 4.2376620492425075e-05, "loss": 0.1457, "step": 18492 }, { "epoch": 0.3298433988513538, "grad_norm": 0.3159489035606384, "learning_rate": 4.237550141217016e-05, "loss": 0.154, "step": 18493 }, { "epoch": 0.32986123497306746, "grad_norm": 0.27029287815093994, "learning_rate": 4.2374382264561806e-05, "loss": 0.1786, "step": 18494 }, { "epoch": 0.32987907109478115, "grad_norm": 0.21946412324905396, "learning_rate": 4.237326304960434e-05, "loss": 0.1937, "step": 18495 }, { "epoch": 0.32989690721649484, "grad_norm": 0.23954299092292786, "learning_rate": 4.2372143767302113e-05, "loss": 0.1345, "step": 18496 }, { "epoch": 0.3299147433382085, "grad_norm": 0.19765381515026093, "learning_rate": 4.2371024417659455e-05, "loss": 0.1778, "step": 18497 }, { "epoch": 0.3299325794599222, "grad_norm": 0.2995823323726654, "learning_rate": 4.23699050006807e-05, "loss": 0.191, "step": 18498 }, { "epoch": 0.3299504155816359, "grad_norm": 0.30685746669769287, "learning_rate": 4.23687855163702e-05, "loss": 0.1709, "step": 18499 }, { "epoch": 0.32996825170334965, "grad_norm": 0.26829075813293457, "learning_rate": 4.236766596473229e-05, "loss": 0.2112, "step": 18500 }, { "epoch": 0.32998608782506333, "grad_norm": 0.3012070059776306, "learning_rate": 4.2366546345771305e-05, "loss": 0.1842, "step": 18501 }, { "epoch": 0.330003923946777, "grad_norm": 0.5140218734741211, "learning_rate": 4.236542665949158e-05, "loss": 0.243, "step": 18502 }, { "epoch": 0.3300217600684907, "grad_norm": 0.27802202105522156, "learning_rate": 4.2364306905897475e-05, "loss": 0.1633, "step": 18503 }, { "epoch": 0.3300395961902044, "grad_norm": 0.23743318021297455, "learning_rate": 4.236318708499332e-05, "loss": 0.1878, "step": 18504 }, { "epoch": 0.3300574323119181, "grad_norm": 0.2713869512081146, "learning_rate": 4.236206719678345e-05, "loss": 0.166, "step": 18505 }, { "epoch": 0.3300752684336318, "grad_norm": 0.29285669326782227, "learning_rate": 4.236094724127221e-05, "loss": 0.2136, "step": 18506 }, { "epoch": 0.33009310455534546, "grad_norm": 0.3492244482040405, "learning_rate": 4.235982721846394e-05, "loss": 0.1721, "step": 18507 }, { "epoch": 0.3301109406770592, "grad_norm": 0.23571038246154785, "learning_rate": 4.235870712836299e-05, "loss": 0.1601, "step": 18508 }, { "epoch": 0.3301287767987729, "grad_norm": 0.1872876137495041, "learning_rate": 4.235758697097369e-05, "loss": 0.1336, "step": 18509 }, { "epoch": 0.3301466129204866, "grad_norm": 0.2855657637119293, "learning_rate": 4.2356466746300395e-05, "loss": 0.1812, "step": 18510 }, { "epoch": 0.33016444904220027, "grad_norm": 0.2859123647212982, "learning_rate": 4.235534645434743e-05, "loss": 0.1967, "step": 18511 }, { "epoch": 0.33018228516391396, "grad_norm": 0.2978751063346863, "learning_rate": 4.235422609511916e-05, "loss": 0.1979, "step": 18512 }, { "epoch": 0.33020012128562765, "grad_norm": 0.2722541391849518, "learning_rate": 4.235310566861991e-05, "loss": 0.1795, "step": 18513 }, { "epoch": 0.33021795740734133, "grad_norm": 0.24375705420970917, "learning_rate": 4.2351985174854024e-05, "loss": 0.1601, "step": 18514 }, { "epoch": 0.330235793529055, "grad_norm": 0.2665567696094513, "learning_rate": 4.235086461382586e-05, "loss": 0.1969, "step": 18515 }, { "epoch": 0.3302536296507687, "grad_norm": 0.24968458712100983, "learning_rate": 4.2349743985539744e-05, "loss": 0.1772, "step": 18516 }, { "epoch": 0.33027146577248245, "grad_norm": 0.2429434359073639, "learning_rate": 4.234862329000003e-05, "loss": 0.147, "step": 18517 }, { "epoch": 0.33028930189419614, "grad_norm": 0.32692569494247437, "learning_rate": 4.2347502527211066e-05, "loss": 0.2007, "step": 18518 }, { "epoch": 0.33030713801590983, "grad_norm": 0.20782728493213654, "learning_rate": 4.2346381697177186e-05, "loss": 0.1583, "step": 18519 }, { "epoch": 0.3303249741376235, "grad_norm": 0.4226702153682709, "learning_rate": 4.234526079990273e-05, "loss": 0.2149, "step": 18520 }, { "epoch": 0.3303428102593372, "grad_norm": 0.21528585255146027, "learning_rate": 4.2344139835392065e-05, "loss": 0.1671, "step": 18521 }, { "epoch": 0.3303606463810509, "grad_norm": 0.3536587357521057, "learning_rate": 4.234301880364952e-05, "loss": 0.2172, "step": 18522 }, { "epoch": 0.3303784825027646, "grad_norm": 0.2704784870147705, "learning_rate": 4.2341897704679445e-05, "loss": 0.1313, "step": 18523 }, { "epoch": 0.33039631862447827, "grad_norm": 0.231796532869339, "learning_rate": 4.234077653848618e-05, "loss": 0.171, "step": 18524 }, { "epoch": 0.330414154746192, "grad_norm": 0.22569707036018372, "learning_rate": 4.2339655305074075e-05, "loss": 0.2, "step": 18525 }, { "epoch": 0.3304319908679057, "grad_norm": 0.3265334963798523, "learning_rate": 4.2338534004447486e-05, "loss": 0.1638, "step": 18526 }, { "epoch": 0.3304498269896194, "grad_norm": 0.390085369348526, "learning_rate": 4.233741263661075e-05, "loss": 0.195, "step": 18527 }, { "epoch": 0.3304676631113331, "grad_norm": 0.31128254532814026, "learning_rate": 4.23362912015682e-05, "loss": 0.1843, "step": 18528 }, { "epoch": 0.33048549923304676, "grad_norm": 0.4441210627555847, "learning_rate": 4.233516969932422e-05, "loss": 0.2224, "step": 18529 }, { "epoch": 0.33050333535476045, "grad_norm": 0.30675169825553894, "learning_rate": 4.233404812988312e-05, "loss": 0.1153, "step": 18530 }, { "epoch": 0.33052117147647414, "grad_norm": 0.2166266143321991, "learning_rate": 4.233292649324926e-05, "loss": 0.1663, "step": 18531 }, { "epoch": 0.33053900759818783, "grad_norm": 0.3506048619747162, "learning_rate": 4.2331804789427e-05, "loss": 0.2411, "step": 18532 }, { "epoch": 0.33055684371990157, "grad_norm": 0.3138313591480255, "learning_rate": 4.233068301842067e-05, "loss": 0.2076, "step": 18533 }, { "epoch": 0.33057467984161526, "grad_norm": 0.2720378339290619, "learning_rate": 4.2329561180234634e-05, "loss": 0.1463, "step": 18534 }, { "epoch": 0.33059251596332895, "grad_norm": 0.3374897837638855, "learning_rate": 4.232843927487323e-05, "loss": 0.2054, "step": 18535 }, { "epoch": 0.33061035208504264, "grad_norm": 0.24583639204502106, "learning_rate": 4.2327317302340804e-05, "loss": 0.163, "step": 18536 }, { "epoch": 0.3306281882067563, "grad_norm": 0.35280197858810425, "learning_rate": 4.232619526264172e-05, "loss": 0.2484, "step": 18537 }, { "epoch": 0.33064602432847, "grad_norm": 0.2738872468471527, "learning_rate": 4.2325073155780315e-05, "loss": 0.1718, "step": 18538 }, { "epoch": 0.3306638604501837, "grad_norm": 0.35030362010002136, "learning_rate": 4.2323950981760944e-05, "loss": 0.1766, "step": 18539 }, { "epoch": 0.3306816965718974, "grad_norm": 0.2816300392150879, "learning_rate": 4.232282874058796e-05, "loss": 0.2027, "step": 18540 }, { "epoch": 0.3306995326936111, "grad_norm": 0.2774301767349243, "learning_rate": 4.232170643226571e-05, "loss": 0.1856, "step": 18541 }, { "epoch": 0.3307173688153248, "grad_norm": 0.2686683237552643, "learning_rate": 4.232058405679853e-05, "loss": 0.2048, "step": 18542 }, { "epoch": 0.3307352049370385, "grad_norm": 0.2277437299489975, "learning_rate": 4.2319461614190793e-05, "loss": 0.1058, "step": 18543 }, { "epoch": 0.3307530410587522, "grad_norm": 0.21263213455677032, "learning_rate": 4.2318339104446844e-05, "loss": 0.1802, "step": 18544 }, { "epoch": 0.3307708771804659, "grad_norm": 0.24947726726531982, "learning_rate": 4.231721652757102e-05, "loss": 0.1625, "step": 18545 }, { "epoch": 0.33078871330217957, "grad_norm": 0.32679250836372375, "learning_rate": 4.2316093883567695e-05, "loss": 0.17, "step": 18546 }, { "epoch": 0.33080654942389326, "grad_norm": 0.2497401237487793, "learning_rate": 4.2314971172441195e-05, "loss": 0.1582, "step": 18547 }, { "epoch": 0.33082438554560695, "grad_norm": 0.35179224610328674, "learning_rate": 4.23138483941959e-05, "loss": 0.1829, "step": 18548 }, { "epoch": 0.33084222166732064, "grad_norm": 0.2542244791984558, "learning_rate": 4.2312725548836144e-05, "loss": 0.1622, "step": 18549 }, { "epoch": 0.3308600577890344, "grad_norm": 0.3894548714160919, "learning_rate": 4.231160263636629e-05, "loss": 0.1783, "step": 18550 }, { "epoch": 0.33087789391074807, "grad_norm": 0.30261433124542236, "learning_rate": 4.231047965679067e-05, "loss": 0.1755, "step": 18551 }, { "epoch": 0.33089573003246175, "grad_norm": 0.26256614923477173, "learning_rate": 4.230935661011367e-05, "loss": 0.1328, "step": 18552 }, { "epoch": 0.33091356615417544, "grad_norm": 0.24681198596954346, "learning_rate": 4.230823349633961e-05, "loss": 0.1312, "step": 18553 }, { "epoch": 0.33093140227588913, "grad_norm": 0.49699294567108154, "learning_rate": 4.230711031547286e-05, "loss": 0.21, "step": 18554 }, { "epoch": 0.3309492383976028, "grad_norm": 0.4116224944591522, "learning_rate": 4.230598706751779e-05, "loss": 0.1647, "step": 18555 }, { "epoch": 0.3309670745193165, "grad_norm": 0.3963545858860016, "learning_rate": 4.230486375247872e-05, "loss": 0.207, "step": 18556 }, { "epoch": 0.3309849106410302, "grad_norm": 0.22137166559696198, "learning_rate": 4.230374037036003e-05, "loss": 0.1497, "step": 18557 }, { "epoch": 0.3310027467627439, "grad_norm": 0.2381601482629776, "learning_rate": 4.230261692116606e-05, "loss": 0.1583, "step": 18558 }, { "epoch": 0.3310205828844576, "grad_norm": 0.284512996673584, "learning_rate": 4.230149340490117e-05, "loss": 0.1561, "step": 18559 }, { "epoch": 0.3310384190061713, "grad_norm": 0.22340558469295502, "learning_rate": 4.230036982156972e-05, "loss": 0.1429, "step": 18560 }, { "epoch": 0.331056255127885, "grad_norm": 0.2640310525894165, "learning_rate": 4.229924617117606e-05, "loss": 0.1536, "step": 18561 }, { "epoch": 0.3310740912495987, "grad_norm": 0.26734215021133423, "learning_rate": 4.229812245372454e-05, "loss": 0.161, "step": 18562 }, { "epoch": 0.3310919273713124, "grad_norm": 0.3465871214866638, "learning_rate": 4.2296998669219535e-05, "loss": 0.202, "step": 18563 }, { "epoch": 0.33110976349302607, "grad_norm": 0.2486545294523239, "learning_rate": 4.2295874817665385e-05, "loss": 0.1349, "step": 18564 }, { "epoch": 0.33112759961473975, "grad_norm": 0.27449294924736023, "learning_rate": 4.229475089906645e-05, "loss": 0.1151, "step": 18565 }, { "epoch": 0.33114543573645344, "grad_norm": 0.27632424235343933, "learning_rate": 4.2293626913427085e-05, "loss": 0.1692, "step": 18566 }, { "epoch": 0.3311632718581672, "grad_norm": 0.31477048993110657, "learning_rate": 4.229250286075165e-05, "loss": 0.1354, "step": 18567 }, { "epoch": 0.3311811079798809, "grad_norm": 0.23741310834884644, "learning_rate": 4.22913787410445e-05, "loss": 0.1868, "step": 18568 }, { "epoch": 0.33119894410159456, "grad_norm": 0.2532624304294586, "learning_rate": 4.2290254554309994e-05, "loss": 0.1407, "step": 18569 }, { "epoch": 0.33121678022330825, "grad_norm": 0.339575856924057, "learning_rate": 4.2289130300552494e-05, "loss": 0.27, "step": 18570 }, { "epoch": 0.33123461634502194, "grad_norm": 0.3205280303955078, "learning_rate": 4.2288005979776345e-05, "loss": 0.1445, "step": 18571 }, { "epoch": 0.3312524524667356, "grad_norm": 0.2817758321762085, "learning_rate": 4.2286881591985924e-05, "loss": 0.1586, "step": 18572 }, { "epoch": 0.3312702885884493, "grad_norm": 0.3039351999759674, "learning_rate": 4.2285757137185575e-05, "loss": 0.1379, "step": 18573 }, { "epoch": 0.331288124710163, "grad_norm": 0.29977691173553467, "learning_rate": 4.228463261537966e-05, "loss": 0.1962, "step": 18574 }, { "epoch": 0.3313059608318767, "grad_norm": 0.34099602699279785, "learning_rate": 4.228350802657254e-05, "loss": 0.1707, "step": 18575 }, { "epoch": 0.33132379695359043, "grad_norm": 0.3081951141357422, "learning_rate": 4.228238337076857e-05, "loss": 0.1797, "step": 18576 }, { "epoch": 0.3313416330753041, "grad_norm": 0.29143157601356506, "learning_rate": 4.228125864797211e-05, "loss": 0.1891, "step": 18577 }, { "epoch": 0.3313594691970178, "grad_norm": 0.22725003957748413, "learning_rate": 4.228013385818753e-05, "loss": 0.1511, "step": 18578 }, { "epoch": 0.3313773053187315, "grad_norm": 0.310830682516098, "learning_rate": 4.2279009001419184e-05, "loss": 0.1945, "step": 18579 }, { "epoch": 0.3313951414404452, "grad_norm": 0.2794807553291321, "learning_rate": 4.2277884077671424e-05, "loss": 0.1742, "step": 18580 }, { "epoch": 0.3314129775621589, "grad_norm": 0.3090817332267761, "learning_rate": 4.2276759086948626e-05, "loss": 0.2105, "step": 18581 }, { "epoch": 0.33143081368387256, "grad_norm": 0.28938645124435425, "learning_rate": 4.227563402925514e-05, "loss": 0.1995, "step": 18582 }, { "epoch": 0.33144864980558625, "grad_norm": 0.4023700952529907, "learning_rate": 4.227450890459532e-05, "loss": 0.2434, "step": 18583 }, { "epoch": 0.3314664859273, "grad_norm": 0.425690233707428, "learning_rate": 4.2273383712973545e-05, "loss": 0.1813, "step": 18584 }, { "epoch": 0.3314843220490137, "grad_norm": 0.29323694109916687, "learning_rate": 4.2272258454394176e-05, "loss": 0.1479, "step": 18585 }, { "epoch": 0.33150215817072737, "grad_norm": 0.30246469378471375, "learning_rate": 4.2271133128861554e-05, "loss": 0.1413, "step": 18586 }, { "epoch": 0.33151999429244106, "grad_norm": 0.29485321044921875, "learning_rate": 4.2270007736380066e-05, "loss": 0.1248, "step": 18587 }, { "epoch": 0.33153783041415474, "grad_norm": 0.3434136211872101, "learning_rate": 4.226888227695406e-05, "loss": 0.22, "step": 18588 }, { "epoch": 0.33155566653586843, "grad_norm": 0.35784029960632324, "learning_rate": 4.2267756750587894e-05, "loss": 0.1284, "step": 18589 }, { "epoch": 0.3315735026575821, "grad_norm": 0.2941616475582123, "learning_rate": 4.2266631157285945e-05, "loss": 0.1428, "step": 18590 }, { "epoch": 0.3315913387792958, "grad_norm": 0.28665873408317566, "learning_rate": 4.226550549705257e-05, "loss": 0.1511, "step": 18591 }, { "epoch": 0.33160917490100955, "grad_norm": 0.2732289433479309, "learning_rate": 4.2264379769892136e-05, "loss": 0.1591, "step": 18592 }, { "epoch": 0.33162701102272324, "grad_norm": 0.30442890524864197, "learning_rate": 4.2263253975808996e-05, "loss": 0.2504, "step": 18593 }, { "epoch": 0.3316448471444369, "grad_norm": 0.22205771505832672, "learning_rate": 4.226212811480752e-05, "loss": 0.1949, "step": 18594 }, { "epoch": 0.3316626832661506, "grad_norm": 0.22176052629947662, "learning_rate": 4.226100218689209e-05, "loss": 0.1501, "step": 18595 }, { "epoch": 0.3316805193878643, "grad_norm": 0.311583548784256, "learning_rate": 4.225987619206704e-05, "loss": 0.1464, "step": 18596 }, { "epoch": 0.331698355509578, "grad_norm": 0.20730595290660858, "learning_rate": 4.225875013033675e-05, "loss": 0.1782, "step": 18597 }, { "epoch": 0.3317161916312917, "grad_norm": 0.41936686635017395, "learning_rate": 4.225762400170558e-05, "loss": 0.2092, "step": 18598 }, { "epoch": 0.33173402775300537, "grad_norm": 0.24077372252941132, "learning_rate": 4.2256497806177895e-05, "loss": 0.1574, "step": 18599 }, { "epoch": 0.33175186387471906, "grad_norm": 0.22923429310321808, "learning_rate": 4.2255371543758075e-05, "loss": 0.1534, "step": 18600 }, { "epoch": 0.3317696999964328, "grad_norm": 0.24850907921791077, "learning_rate": 4.225424521445047e-05, "loss": 0.1717, "step": 18601 }, { "epoch": 0.3317875361181465, "grad_norm": 0.3402264714241028, "learning_rate": 4.2253118818259454e-05, "loss": 0.1619, "step": 18602 }, { "epoch": 0.3318053722398602, "grad_norm": 0.37563788890838623, "learning_rate": 4.225199235518939e-05, "loss": 0.1812, "step": 18603 }, { "epoch": 0.33182320836157386, "grad_norm": 0.25607120990753174, "learning_rate": 4.225086582524465e-05, "loss": 0.1485, "step": 18604 }, { "epoch": 0.33184104448328755, "grad_norm": 0.3265029489994049, "learning_rate": 4.224973922842958e-05, "loss": 0.1834, "step": 18605 }, { "epoch": 0.33185888060500124, "grad_norm": 0.2571837306022644, "learning_rate": 4.224861256474858e-05, "loss": 0.1837, "step": 18606 }, { "epoch": 0.3318767167267149, "grad_norm": 0.256216436624527, "learning_rate": 4.224748583420599e-05, "loss": 0.1953, "step": 18607 }, { "epoch": 0.3318945528484286, "grad_norm": 0.33437854051589966, "learning_rate": 4.224635903680619e-05, "loss": 0.1653, "step": 18608 }, { "epoch": 0.33191238897014236, "grad_norm": 0.24014635384082794, "learning_rate": 4.224523217255355e-05, "loss": 0.1689, "step": 18609 }, { "epoch": 0.33193022509185605, "grad_norm": 0.2903948724269867, "learning_rate": 4.2244105241452425e-05, "loss": 0.1395, "step": 18610 }, { "epoch": 0.33194806121356973, "grad_norm": 0.2848571240901947, "learning_rate": 4.2242978243507195e-05, "loss": 0.1533, "step": 18611 }, { "epoch": 0.3319658973352834, "grad_norm": 0.2606837749481201, "learning_rate": 4.224185117872223e-05, "loss": 0.1585, "step": 18612 }, { "epoch": 0.3319837334569971, "grad_norm": 0.26199620962142944, "learning_rate": 4.22407240471019e-05, "loss": 0.1296, "step": 18613 }, { "epoch": 0.3320015695787108, "grad_norm": 0.23331594467163086, "learning_rate": 4.2239596848650553e-05, "loss": 0.1614, "step": 18614 }, { "epoch": 0.3320194057004245, "grad_norm": 0.30470937490463257, "learning_rate": 4.2238469583372584e-05, "loss": 0.189, "step": 18615 }, { "epoch": 0.3320372418221382, "grad_norm": 0.2593975365161896, "learning_rate": 4.223734225127235e-05, "loss": 0.1602, "step": 18616 }, { "epoch": 0.33205507794385186, "grad_norm": 0.28764283657073975, "learning_rate": 4.223621485235423e-05, "loss": 0.1441, "step": 18617 }, { "epoch": 0.3320729140655656, "grad_norm": 0.2824632525444031, "learning_rate": 4.223508738662259e-05, "loss": 0.1612, "step": 18618 }, { "epoch": 0.3320907501872793, "grad_norm": 0.5225083827972412, "learning_rate": 4.223395985408178e-05, "loss": 0.1899, "step": 18619 }, { "epoch": 0.332108586308993, "grad_norm": 0.19410806894302368, "learning_rate": 4.223283225473621e-05, "loss": 0.1289, "step": 18620 }, { "epoch": 0.33212642243070667, "grad_norm": 0.23426847159862518, "learning_rate": 4.2231704588590214e-05, "loss": 0.2083, "step": 18621 }, { "epoch": 0.33214425855242036, "grad_norm": 0.2894057631492615, "learning_rate": 4.223057685564819e-05, "loss": 0.1419, "step": 18622 }, { "epoch": 0.33216209467413405, "grad_norm": 0.22195616364479065, "learning_rate": 4.2229449055914495e-05, "loss": 0.1455, "step": 18623 }, { "epoch": 0.33217993079584773, "grad_norm": 0.2332436442375183, "learning_rate": 4.2228321189393505e-05, "loss": 0.1823, "step": 18624 }, { "epoch": 0.3321977669175614, "grad_norm": 0.34190788865089417, "learning_rate": 4.222719325608959e-05, "loss": 0.144, "step": 18625 }, { "epoch": 0.33221560303927516, "grad_norm": 0.22976568341255188, "learning_rate": 4.222606525600713e-05, "loss": 0.1605, "step": 18626 }, { "epoch": 0.33223343916098885, "grad_norm": 0.2539297044277191, "learning_rate": 4.2224937189150484e-05, "loss": 0.1671, "step": 18627 }, { "epoch": 0.33225127528270254, "grad_norm": 0.3443310260772705, "learning_rate": 4.222380905552404e-05, "loss": 0.204, "step": 18628 }, { "epoch": 0.33226911140441623, "grad_norm": 0.40971699357032776, "learning_rate": 4.222268085513216e-05, "loss": 0.1999, "step": 18629 }, { "epoch": 0.3322869475261299, "grad_norm": 0.247114896774292, "learning_rate": 4.222155258797922e-05, "loss": 0.1647, "step": 18630 }, { "epoch": 0.3323047836478436, "grad_norm": 0.2625199854373932, "learning_rate": 4.222042425406959e-05, "loss": 0.1446, "step": 18631 }, { "epoch": 0.3323226197695573, "grad_norm": 0.35182222723960876, "learning_rate": 4.2219295853407647e-05, "loss": 0.1802, "step": 18632 }, { "epoch": 0.332340455891271, "grad_norm": 0.26384562253952026, "learning_rate": 4.221816738599778e-05, "loss": 0.1807, "step": 18633 }, { "epoch": 0.3323582920129847, "grad_norm": 0.3444817364215851, "learning_rate": 4.2217038851844335e-05, "loss": 0.1556, "step": 18634 }, { "epoch": 0.3323761281346984, "grad_norm": 0.4344008266925812, "learning_rate": 4.22159102509517e-05, "loss": 0.2188, "step": 18635 }, { "epoch": 0.3323939642564121, "grad_norm": 0.1910325288772583, "learning_rate": 4.221478158332426e-05, "loss": 0.1672, "step": 18636 }, { "epoch": 0.3324118003781258, "grad_norm": 0.23896032571792603, "learning_rate": 4.221365284896637e-05, "loss": 0.1927, "step": 18637 }, { "epoch": 0.3324296364998395, "grad_norm": 0.28261053562164307, "learning_rate": 4.221252404788242e-05, "loss": 0.1974, "step": 18638 }, { "epoch": 0.33244747262155316, "grad_norm": 0.30259254574775696, "learning_rate": 4.221139518007679e-05, "loss": 0.183, "step": 18639 }, { "epoch": 0.33246530874326685, "grad_norm": 0.246919646859169, "learning_rate": 4.221026624555384e-05, "loss": 0.1836, "step": 18640 }, { "epoch": 0.33248314486498054, "grad_norm": 0.20554570853710175, "learning_rate": 4.2209137244317956e-05, "loss": 0.1848, "step": 18641 }, { "epoch": 0.33250098098669423, "grad_norm": 0.2147049754858017, "learning_rate": 4.220800817637351e-05, "loss": 0.1928, "step": 18642 }, { "epoch": 0.33251881710840797, "grad_norm": 0.2581635117530823, "learning_rate": 4.220687904172489e-05, "loss": 0.1625, "step": 18643 }, { "epoch": 0.33253665323012166, "grad_norm": 0.26008719205856323, "learning_rate": 4.220574984037645e-05, "loss": 0.1725, "step": 18644 }, { "epoch": 0.33255448935183535, "grad_norm": 0.25723814964294434, "learning_rate": 4.220462057233259e-05, "loss": 0.138, "step": 18645 }, { "epoch": 0.33257232547354904, "grad_norm": 0.30129143595695496, "learning_rate": 4.2203491237597674e-05, "loss": 0.1515, "step": 18646 }, { "epoch": 0.3325901615952627, "grad_norm": 0.21004438400268555, "learning_rate": 4.2202361836176087e-05, "loss": 0.1605, "step": 18647 }, { "epoch": 0.3326079977169764, "grad_norm": 0.2537418603897095, "learning_rate": 4.220123236807221e-05, "loss": 0.2125, "step": 18648 }, { "epoch": 0.3326258338386901, "grad_norm": 0.21132661402225494, "learning_rate": 4.22001028332904e-05, "loss": 0.1336, "step": 18649 }, { "epoch": 0.3326436699604038, "grad_norm": 0.300246924161911, "learning_rate": 4.219897323183506e-05, "loss": 0.2098, "step": 18650 }, { "epoch": 0.33266150608211753, "grad_norm": 0.3238065838813782, "learning_rate": 4.219784356371056e-05, "loss": 0.2127, "step": 18651 }, { "epoch": 0.3326793422038312, "grad_norm": 0.23425902426242828, "learning_rate": 4.219671382892127e-05, "loss": 0.1537, "step": 18652 }, { "epoch": 0.3326971783255449, "grad_norm": 0.34821856021881104, "learning_rate": 4.219558402747159e-05, "loss": 0.2092, "step": 18653 }, { "epoch": 0.3327150144472586, "grad_norm": 0.19794493913650513, "learning_rate": 4.219445415936588e-05, "loss": 0.0962, "step": 18654 }, { "epoch": 0.3327328505689723, "grad_norm": 0.3300984799861908, "learning_rate": 4.219332422460853e-05, "loss": 0.2468, "step": 18655 }, { "epoch": 0.33275068669068597, "grad_norm": 0.26174670457839966, "learning_rate": 4.219219422320392e-05, "loss": 0.171, "step": 18656 }, { "epoch": 0.33276852281239966, "grad_norm": 0.23382668197155, "learning_rate": 4.219106415515642e-05, "loss": 0.1685, "step": 18657 }, { "epoch": 0.33278635893411335, "grad_norm": 0.32599666714668274, "learning_rate": 4.2189934020470415e-05, "loss": 0.1452, "step": 18658 }, { "epoch": 0.33280419505582703, "grad_norm": 0.21382218599319458, "learning_rate": 4.21888038191503e-05, "loss": 0.1606, "step": 18659 }, { "epoch": 0.3328220311775408, "grad_norm": 0.34457188844680786, "learning_rate": 4.218767355120044e-05, "loss": 0.1367, "step": 18660 }, { "epoch": 0.33283986729925447, "grad_norm": 0.24509088695049286, "learning_rate": 4.218654321662522e-05, "loss": 0.1743, "step": 18661 }, { "epoch": 0.33285770342096815, "grad_norm": 0.21297286450862885, "learning_rate": 4.2185412815429013e-05, "loss": 0.1497, "step": 18662 }, { "epoch": 0.33287553954268184, "grad_norm": 0.2544233798980713, "learning_rate": 4.218428234761622e-05, "loss": 0.1676, "step": 18663 }, { "epoch": 0.33289337566439553, "grad_norm": 0.2422240823507309, "learning_rate": 4.2183151813191215e-05, "loss": 0.0894, "step": 18664 }, { "epoch": 0.3329112117861092, "grad_norm": 0.28995004296302795, "learning_rate": 4.2182021212158376e-05, "loss": 0.1642, "step": 18665 }, { "epoch": 0.3329290479078229, "grad_norm": 0.2857154607772827, "learning_rate": 4.218089054452209e-05, "loss": 0.1771, "step": 18666 }, { "epoch": 0.3329468840295366, "grad_norm": 0.21182294189929962, "learning_rate": 4.2179759810286734e-05, "loss": 0.139, "step": 18667 }, { "epoch": 0.33296472015125034, "grad_norm": 0.2658856511116028, "learning_rate": 4.217862900945669e-05, "loss": 0.16, "step": 18668 }, { "epoch": 0.332982556272964, "grad_norm": 0.3196842670440674, "learning_rate": 4.217749814203636e-05, "loss": 0.1521, "step": 18669 }, { "epoch": 0.3330003923946777, "grad_norm": 0.3064384162425995, "learning_rate": 4.217636720803011e-05, "loss": 0.1316, "step": 18670 }, { "epoch": 0.3330182285163914, "grad_norm": 0.4255395233631134, "learning_rate": 4.217523620744233e-05, "loss": 0.2148, "step": 18671 }, { "epoch": 0.3330360646381051, "grad_norm": 0.25622081756591797, "learning_rate": 4.21741051402774e-05, "loss": 0.1632, "step": 18672 }, { "epoch": 0.3330539007598188, "grad_norm": 0.2683780789375305, "learning_rate": 4.21729740065397e-05, "loss": 0.1317, "step": 18673 }, { "epoch": 0.33307173688153247, "grad_norm": 0.33805572986602783, "learning_rate": 4.217184280623363e-05, "loss": 0.1889, "step": 18674 }, { "epoch": 0.33308957300324615, "grad_norm": 0.33626481890678406, "learning_rate": 4.217071153936356e-05, "loss": 0.1635, "step": 18675 }, { "epoch": 0.33310740912495984, "grad_norm": 0.19020210206508636, "learning_rate": 4.216958020593389e-05, "loss": 0.1613, "step": 18676 }, { "epoch": 0.3331252452466736, "grad_norm": 0.35197606682777405, "learning_rate": 4.216844880594899e-05, "loss": 0.1529, "step": 18677 }, { "epoch": 0.3331430813683873, "grad_norm": 0.23571282625198364, "learning_rate": 4.2167317339413256e-05, "loss": 0.1734, "step": 18678 }, { "epoch": 0.33316091749010096, "grad_norm": 0.3509941101074219, "learning_rate": 4.216618580633107e-05, "loss": 0.1884, "step": 18679 }, { "epoch": 0.33317875361181465, "grad_norm": 0.20872823894023895, "learning_rate": 4.2165054206706825e-05, "loss": 0.1596, "step": 18680 }, { "epoch": 0.33319658973352834, "grad_norm": 0.273303359746933, "learning_rate": 4.216392254054489e-05, "loss": 0.1932, "step": 18681 }, { "epoch": 0.333214425855242, "grad_norm": 0.2837095558643341, "learning_rate": 4.216279080784966e-05, "loss": 0.2065, "step": 18682 }, { "epoch": 0.3332322619769557, "grad_norm": 0.32746848464012146, "learning_rate": 4.2161659008625534e-05, "loss": 0.1523, "step": 18683 }, { "epoch": 0.3332500980986694, "grad_norm": 0.22128203511238098, "learning_rate": 4.216052714287689e-05, "loss": 0.1334, "step": 18684 }, { "epoch": 0.33326793422038314, "grad_norm": 0.24784326553344727, "learning_rate": 4.2159395210608116e-05, "loss": 0.1961, "step": 18685 }, { "epoch": 0.33328577034209683, "grad_norm": 0.1982993185520172, "learning_rate": 4.21582632118236e-05, "loss": 0.1117, "step": 18686 }, { "epoch": 0.3333036064638105, "grad_norm": 0.33177274465560913, "learning_rate": 4.215713114652773e-05, "loss": 0.1903, "step": 18687 }, { "epoch": 0.3333214425855242, "grad_norm": 0.26933786273002625, "learning_rate": 4.215599901472489e-05, "loss": 0.1512, "step": 18688 }, { "epoch": 0.3333392787072379, "grad_norm": 0.2538033723831177, "learning_rate": 4.215486681641947e-05, "loss": 0.1624, "step": 18689 }, { "epoch": 0.3333571148289516, "grad_norm": 0.21105821430683136, "learning_rate": 4.2153734551615864e-05, "loss": 0.1258, "step": 18690 }, { "epoch": 0.33337495095066527, "grad_norm": 0.33613941073417664, "learning_rate": 4.215260222031846e-05, "loss": 0.1905, "step": 18691 }, { "epoch": 0.33339278707237896, "grad_norm": 0.2952043414115906, "learning_rate": 4.2151469822531645e-05, "loss": 0.1837, "step": 18692 }, { "epoch": 0.3334106231940927, "grad_norm": 0.243205726146698, "learning_rate": 4.2150337358259805e-05, "loss": 0.1457, "step": 18693 }, { "epoch": 0.3334284593158064, "grad_norm": 0.2892273962497711, "learning_rate": 4.214920482750734e-05, "loss": 0.247, "step": 18694 }, { "epoch": 0.3334462954375201, "grad_norm": 0.27271246910095215, "learning_rate": 4.2148072230278626e-05, "loss": 0.1384, "step": 18695 }, { "epoch": 0.33346413155923377, "grad_norm": 0.2792697846889496, "learning_rate": 4.214693956657807e-05, "loss": 0.1509, "step": 18696 }, { "epoch": 0.33348196768094746, "grad_norm": 0.3380482792854309, "learning_rate": 4.214580683641005e-05, "loss": 0.1487, "step": 18697 }, { "epoch": 0.33349980380266114, "grad_norm": 0.22144699096679688, "learning_rate": 4.214467403977896e-05, "loss": 0.1456, "step": 18698 }, { "epoch": 0.33351763992437483, "grad_norm": 0.1926298588514328, "learning_rate": 4.2143541176689195e-05, "loss": 0.1284, "step": 18699 }, { "epoch": 0.3335354760460885, "grad_norm": 0.23470169305801392, "learning_rate": 4.214240824714514e-05, "loss": 0.1506, "step": 18700 }, { "epoch": 0.3335533121678022, "grad_norm": 0.23451177775859833, "learning_rate": 4.214127525115119e-05, "loss": 0.1396, "step": 18701 }, { "epoch": 0.33357114828951595, "grad_norm": 0.2369646281003952, "learning_rate": 4.214014218871174e-05, "loss": 0.1655, "step": 18702 }, { "epoch": 0.33358898441122964, "grad_norm": 0.2560775876045227, "learning_rate": 4.213900905983118e-05, "loss": 0.1932, "step": 18703 }, { "epoch": 0.3336068205329433, "grad_norm": 0.29328837990760803, "learning_rate": 4.213787586451389e-05, "loss": 0.1075, "step": 18704 }, { "epoch": 0.333624656654657, "grad_norm": 0.21619559824466705, "learning_rate": 4.2136742602764286e-05, "loss": 0.1621, "step": 18705 }, { "epoch": 0.3336424927763707, "grad_norm": 0.2599635422229767, "learning_rate": 4.213560927458674e-05, "loss": 0.1808, "step": 18706 }, { "epoch": 0.3336603288980844, "grad_norm": 0.2735520303249359, "learning_rate": 4.213447587998566e-05, "loss": 0.1052, "step": 18707 }, { "epoch": 0.3336781650197981, "grad_norm": 0.304398775100708, "learning_rate": 4.213334241896544e-05, "loss": 0.1698, "step": 18708 }, { "epoch": 0.33369600114151177, "grad_norm": 0.27705931663513184, "learning_rate": 4.213220889153045e-05, "loss": 0.1733, "step": 18709 }, { "epoch": 0.3337138372632255, "grad_norm": 0.23909097909927368, "learning_rate": 4.2131075297685113e-05, "loss": 0.1379, "step": 18710 }, { "epoch": 0.3337316733849392, "grad_norm": 0.2765835225582123, "learning_rate": 4.2129941637433814e-05, "loss": 0.1892, "step": 18711 }, { "epoch": 0.3337495095066529, "grad_norm": 0.22051171958446503, "learning_rate": 4.212880791078093e-05, "loss": 0.1512, "step": 18712 }, { "epoch": 0.3337673456283666, "grad_norm": 0.2813849151134491, "learning_rate": 4.212767411773089e-05, "loss": 0.1595, "step": 18713 }, { "epoch": 0.33378518175008026, "grad_norm": 0.33392274379730225, "learning_rate": 4.212654025828805e-05, "loss": 0.1147, "step": 18714 }, { "epoch": 0.33380301787179395, "grad_norm": 0.2104877531528473, "learning_rate": 4.212540633245683e-05, "loss": 0.1391, "step": 18715 }, { "epoch": 0.33382085399350764, "grad_norm": 0.3532252907752991, "learning_rate": 4.2124272340241625e-05, "loss": 0.1785, "step": 18716 }, { "epoch": 0.3338386901152213, "grad_norm": 0.3038238286972046, "learning_rate": 4.212313828164683e-05, "loss": 0.137, "step": 18717 }, { "epoch": 0.333856526236935, "grad_norm": 0.3160938322544098, "learning_rate": 4.212200415667683e-05, "loss": 0.1779, "step": 18718 }, { "epoch": 0.33387436235864876, "grad_norm": 0.2350011020898819, "learning_rate": 4.212086996533603e-05, "loss": 0.131, "step": 18719 }, { "epoch": 0.33389219848036245, "grad_norm": 0.18767918646335602, "learning_rate": 4.211973570762882e-05, "loss": 0.1438, "step": 18720 }, { "epoch": 0.33391003460207613, "grad_norm": 0.29537296295166016, "learning_rate": 4.211860138355961e-05, "loss": 0.1782, "step": 18721 }, { "epoch": 0.3339278707237898, "grad_norm": 0.2523314356803894, "learning_rate": 4.211746699313278e-05, "loss": 0.1556, "step": 18722 }, { "epoch": 0.3339457068455035, "grad_norm": 0.251301646232605, "learning_rate": 4.2116332536352744e-05, "loss": 0.182, "step": 18723 }, { "epoch": 0.3339635429672172, "grad_norm": 0.2746959924697876, "learning_rate": 4.2115198013223886e-05, "loss": 0.2163, "step": 18724 }, { "epoch": 0.3339813790889309, "grad_norm": 0.37001949548721313, "learning_rate": 4.211406342375061e-05, "loss": 0.1818, "step": 18725 }, { "epoch": 0.3339992152106446, "grad_norm": 0.27162232995033264, "learning_rate": 4.2112928767937313e-05, "loss": 0.1532, "step": 18726 }, { "epoch": 0.3340170513323583, "grad_norm": 0.34194305539131165, "learning_rate": 4.2111794045788395e-05, "loss": 0.0979, "step": 18727 }, { "epoch": 0.334034887454072, "grad_norm": 0.2687188684940338, "learning_rate": 4.211065925730825e-05, "loss": 0.1669, "step": 18728 }, { "epoch": 0.3340527235757857, "grad_norm": 0.41771769523620605, "learning_rate": 4.210952440250128e-05, "loss": 0.1245, "step": 18729 }, { "epoch": 0.3340705596974994, "grad_norm": 0.3138853907585144, "learning_rate": 4.210838948137189e-05, "loss": 0.1755, "step": 18730 }, { "epoch": 0.33408839581921307, "grad_norm": 0.2310846447944641, "learning_rate": 4.2107254493924464e-05, "loss": 0.1689, "step": 18731 }, { "epoch": 0.33410623194092676, "grad_norm": 0.313462495803833, "learning_rate": 4.210611944016342e-05, "loss": 0.1747, "step": 18732 }, { "epoch": 0.33412406806264044, "grad_norm": 0.2913760840892792, "learning_rate": 4.210498432009314e-05, "loss": 0.1801, "step": 18733 }, { "epoch": 0.33414190418435413, "grad_norm": 0.3188319504261017, "learning_rate": 4.2103849133718044e-05, "loss": 0.1551, "step": 18734 }, { "epoch": 0.3341597403060679, "grad_norm": 0.22271277010440826, "learning_rate": 4.210271388104251e-05, "loss": 0.1068, "step": 18735 }, { "epoch": 0.33417757642778156, "grad_norm": 0.19057299196720123, "learning_rate": 4.210157856207096e-05, "loss": 0.1556, "step": 18736 }, { "epoch": 0.33419541254949525, "grad_norm": 0.29498863220214844, "learning_rate": 4.210044317680778e-05, "loss": 0.1463, "step": 18737 }, { "epoch": 0.33421324867120894, "grad_norm": 0.20593951642513275, "learning_rate": 4.2099307725257376e-05, "loss": 0.124, "step": 18738 }, { "epoch": 0.33423108479292263, "grad_norm": 0.2947141230106354, "learning_rate": 4.2098172207424145e-05, "loss": 0.0981, "step": 18739 }, { "epoch": 0.3342489209146363, "grad_norm": 0.374240905046463, "learning_rate": 4.20970366233125e-05, "loss": 0.2417, "step": 18740 }, { "epoch": 0.33426675703635, "grad_norm": 0.27606189250946045, "learning_rate": 4.2095900972926835e-05, "loss": 0.2424, "step": 18741 }, { "epoch": 0.3342845931580637, "grad_norm": 0.23406417667865753, "learning_rate": 4.209476525627155e-05, "loss": 0.1597, "step": 18742 }, { "epoch": 0.3343024292797774, "grad_norm": 0.29940590262413025, "learning_rate": 4.2093629473351046e-05, "loss": 0.2361, "step": 18743 }, { "epoch": 0.3343202654014911, "grad_norm": 0.2661944329738617, "learning_rate": 4.209249362416974e-05, "loss": 0.1585, "step": 18744 }, { "epoch": 0.3343381015232048, "grad_norm": 0.3111472427845001, "learning_rate": 4.209135770873202e-05, "loss": 0.1607, "step": 18745 }, { "epoch": 0.3343559376449185, "grad_norm": 0.32731369137763977, "learning_rate": 4.20902217270423e-05, "loss": 0.1747, "step": 18746 }, { "epoch": 0.3343737737666322, "grad_norm": 0.2322869598865509, "learning_rate": 4.208908567910497e-05, "loss": 0.1709, "step": 18747 }, { "epoch": 0.3343916098883459, "grad_norm": 0.23245777189731598, "learning_rate": 4.2087949564924445e-05, "loss": 0.2006, "step": 18748 }, { "epoch": 0.33440944601005956, "grad_norm": 0.33070069551467896, "learning_rate": 4.2086813384505125e-05, "loss": 0.193, "step": 18749 }, { "epoch": 0.33442728213177325, "grad_norm": 0.3557249903678894, "learning_rate": 4.2085677137851413e-05, "loss": 0.1592, "step": 18750 }, { "epoch": 0.33444511825348694, "grad_norm": 0.1902570277452469, "learning_rate": 4.208454082496772e-05, "loss": 0.146, "step": 18751 }, { "epoch": 0.3344629543752007, "grad_norm": 0.27751973271369934, "learning_rate": 4.208340444585844e-05, "loss": 0.1939, "step": 18752 }, { "epoch": 0.33448079049691437, "grad_norm": 0.20552270114421844, "learning_rate": 4.2082268000527994e-05, "loss": 0.1441, "step": 18753 }, { "epoch": 0.33449862661862806, "grad_norm": 0.45457321405410767, "learning_rate": 4.208113148898076e-05, "loss": 0.1558, "step": 18754 }, { "epoch": 0.33451646274034175, "grad_norm": 0.2982683777809143, "learning_rate": 4.207999491122118e-05, "loss": 0.1841, "step": 18755 }, { "epoch": 0.33453429886205543, "grad_norm": 0.28458109498023987, "learning_rate": 4.2078858267253626e-05, "loss": 0.1539, "step": 18756 }, { "epoch": 0.3345521349837691, "grad_norm": 0.2325965315103531, "learning_rate": 4.207772155708253e-05, "loss": 0.1295, "step": 18757 }, { "epoch": 0.3345699711054828, "grad_norm": 0.21238230168819427, "learning_rate": 4.207658478071228e-05, "loss": 0.1261, "step": 18758 }, { "epoch": 0.3345878072271965, "grad_norm": 0.33513128757476807, "learning_rate": 4.207544793814728e-05, "loss": 0.1901, "step": 18759 }, { "epoch": 0.3346056433489102, "grad_norm": 0.31899237632751465, "learning_rate": 4.2074311029391963e-05, "loss": 0.1952, "step": 18760 }, { "epoch": 0.33462347947062393, "grad_norm": 0.29127180576324463, "learning_rate": 4.207317405445072e-05, "loss": 0.1794, "step": 18761 }, { "epoch": 0.3346413155923376, "grad_norm": 0.2133120447397232, "learning_rate": 4.207203701332794e-05, "loss": 0.1338, "step": 18762 }, { "epoch": 0.3346591517140513, "grad_norm": 0.22214551270008087, "learning_rate": 4.207089990602806e-05, "loss": 0.1584, "step": 18763 }, { "epoch": 0.334676987835765, "grad_norm": 0.25202473998069763, "learning_rate": 4.206976273255547e-05, "loss": 0.1978, "step": 18764 }, { "epoch": 0.3346948239574787, "grad_norm": 0.252835750579834, "learning_rate": 4.2068625492914595e-05, "loss": 0.1542, "step": 18765 }, { "epoch": 0.33471266007919237, "grad_norm": 0.24503786861896515, "learning_rate": 4.206748818710982e-05, "loss": 0.1294, "step": 18766 }, { "epoch": 0.33473049620090606, "grad_norm": 0.27153870463371277, "learning_rate": 4.206635081514557e-05, "loss": 0.1203, "step": 18767 }, { "epoch": 0.33474833232261975, "grad_norm": 0.23417600989341736, "learning_rate": 4.2065213377026244e-05, "loss": 0.1707, "step": 18768 }, { "epoch": 0.3347661684443335, "grad_norm": 0.17935870587825775, "learning_rate": 4.206407587275627e-05, "loss": 0.115, "step": 18769 }, { "epoch": 0.3347840045660472, "grad_norm": 0.36513659358024597, "learning_rate": 4.206293830234004e-05, "loss": 0.1214, "step": 18770 }, { "epoch": 0.33480184068776087, "grad_norm": 0.30545157194137573, "learning_rate": 4.206180066578196e-05, "loss": 0.1475, "step": 18771 }, { "epoch": 0.33481967680947455, "grad_norm": 0.2862299680709839, "learning_rate": 4.2060662963086454e-05, "loss": 0.1745, "step": 18772 }, { "epoch": 0.33483751293118824, "grad_norm": 0.34034794569015503, "learning_rate": 4.2059525194257934e-05, "loss": 0.2029, "step": 18773 }, { "epoch": 0.33485534905290193, "grad_norm": 0.3153921961784363, "learning_rate": 4.2058387359300786e-05, "loss": 0.1925, "step": 18774 }, { "epoch": 0.3348731851746156, "grad_norm": 0.29298266768455505, "learning_rate": 4.205724945821944e-05, "loss": 0.1766, "step": 18775 }, { "epoch": 0.3348910212963293, "grad_norm": 0.27643847465515137, "learning_rate": 4.2056111491018314e-05, "loss": 0.135, "step": 18776 }, { "epoch": 0.334908857418043, "grad_norm": 0.21980465948581696, "learning_rate": 4.2054973457701804e-05, "loss": 0.1806, "step": 18777 }, { "epoch": 0.33492669353975674, "grad_norm": 0.21354219317436218, "learning_rate": 4.205383535827432e-05, "loss": 0.1162, "step": 18778 }, { "epoch": 0.3349445296614704, "grad_norm": 0.28409087657928467, "learning_rate": 4.2052697192740284e-05, "loss": 0.1902, "step": 18779 }, { "epoch": 0.3349623657831841, "grad_norm": 0.3530551493167877, "learning_rate": 4.205155896110411e-05, "loss": 0.1857, "step": 18780 }, { "epoch": 0.3349802019048978, "grad_norm": 0.25170814990997314, "learning_rate": 4.205042066337019e-05, "loss": 0.175, "step": 18781 }, { "epoch": 0.3349980380266115, "grad_norm": 0.2856610417366028, "learning_rate": 4.2049282299542964e-05, "loss": 0.1688, "step": 18782 }, { "epoch": 0.3350158741483252, "grad_norm": 0.2508857846260071, "learning_rate": 4.204814386962682e-05, "loss": 0.1817, "step": 18783 }, { "epoch": 0.33503371027003886, "grad_norm": 0.28833040595054626, "learning_rate": 4.204700537362619e-05, "loss": 0.1799, "step": 18784 }, { "epoch": 0.33505154639175255, "grad_norm": 0.2016073763370514, "learning_rate": 4.204586681154548e-05, "loss": 0.1324, "step": 18785 }, { "epoch": 0.3350693825134663, "grad_norm": 0.22017642855644226, "learning_rate": 4.20447281833891e-05, "loss": 0.1274, "step": 18786 }, { "epoch": 0.33508721863518, "grad_norm": 0.41027387976646423, "learning_rate": 4.204358948916147e-05, "loss": 0.1389, "step": 18787 }, { "epoch": 0.33510505475689367, "grad_norm": 0.28757673501968384, "learning_rate": 4.2042450728867e-05, "loss": 0.1717, "step": 18788 }, { "epoch": 0.33512289087860736, "grad_norm": 0.4009237587451935, "learning_rate": 4.20413119025101e-05, "loss": 0.2333, "step": 18789 }, { "epoch": 0.33514072700032105, "grad_norm": 0.3056463301181793, "learning_rate": 4.2040173010095187e-05, "loss": 0.2015, "step": 18790 }, { "epoch": 0.33515856312203474, "grad_norm": 0.27369338274002075, "learning_rate": 4.203903405162669e-05, "loss": 0.1947, "step": 18791 }, { "epoch": 0.3351763992437484, "grad_norm": 0.3525892496109009, "learning_rate": 4.2037895027109e-05, "loss": 0.1485, "step": 18792 }, { "epoch": 0.3351942353654621, "grad_norm": 0.2792639136314392, "learning_rate": 4.203675593654654e-05, "loss": 0.1653, "step": 18793 }, { "epoch": 0.33521207148717586, "grad_norm": 0.24967066943645477, "learning_rate": 4.203561677994374e-05, "loss": 0.1525, "step": 18794 }, { "epoch": 0.33522990760888954, "grad_norm": 0.2366359680891037, "learning_rate": 4.2034477557305005e-05, "loss": 0.1316, "step": 18795 }, { "epoch": 0.33524774373060323, "grad_norm": 0.27418941259384155, "learning_rate": 4.2033338268634744e-05, "loss": 0.1704, "step": 18796 }, { "epoch": 0.3352655798523169, "grad_norm": 0.22611431777477264, "learning_rate": 4.203219891393739e-05, "loss": 0.1486, "step": 18797 }, { "epoch": 0.3352834159740306, "grad_norm": 0.23127515614032745, "learning_rate": 4.203105949321735e-05, "loss": 0.1639, "step": 18798 }, { "epoch": 0.3353012520957443, "grad_norm": 0.33221226930618286, "learning_rate": 4.202992000647904e-05, "loss": 0.1964, "step": 18799 }, { "epoch": 0.335319088217458, "grad_norm": 0.27608874440193176, "learning_rate": 4.202878045372687e-05, "loss": 0.1633, "step": 18800 }, { "epoch": 0.33533692433917167, "grad_norm": 0.22302059829235077, "learning_rate": 4.2027640834965276e-05, "loss": 0.1322, "step": 18801 }, { "epoch": 0.33535476046088536, "grad_norm": 0.28315451741218567, "learning_rate": 4.202650115019866e-05, "loss": 0.1399, "step": 18802 }, { "epoch": 0.3353725965825991, "grad_norm": 0.269192636013031, "learning_rate": 4.202536139943144e-05, "loss": 0.183, "step": 18803 }, { "epoch": 0.3353904327043128, "grad_norm": 0.27647823095321655, "learning_rate": 4.2024221582668056e-05, "loss": 0.1668, "step": 18804 }, { "epoch": 0.3354082688260265, "grad_norm": 0.26751378178596497, "learning_rate": 4.2023081699912895e-05, "loss": 0.2002, "step": 18805 }, { "epoch": 0.33542610494774017, "grad_norm": 0.23461028933525085, "learning_rate": 4.202194175117039e-05, "loss": 0.1267, "step": 18806 }, { "epoch": 0.33544394106945385, "grad_norm": 0.22237063944339752, "learning_rate": 4.202080173644496e-05, "loss": 0.1425, "step": 18807 }, { "epoch": 0.33546177719116754, "grad_norm": 0.21484938263893127, "learning_rate": 4.2019661655741026e-05, "loss": 0.1057, "step": 18808 }, { "epoch": 0.33547961331288123, "grad_norm": 0.2188551276922226, "learning_rate": 4.2018521509063e-05, "loss": 0.1941, "step": 18809 }, { "epoch": 0.3354974494345949, "grad_norm": 0.23399877548217773, "learning_rate": 4.2017381296415314e-05, "loss": 0.18, "step": 18810 }, { "epoch": 0.33551528555630866, "grad_norm": 0.2748350203037262, "learning_rate": 4.2016241017802374e-05, "loss": 0.1398, "step": 18811 }, { "epoch": 0.33553312167802235, "grad_norm": 0.29247432947158813, "learning_rate": 4.2015100673228614e-05, "loss": 0.1594, "step": 18812 }, { "epoch": 0.33555095779973604, "grad_norm": 0.3833557367324829, "learning_rate": 4.2013960262698444e-05, "loss": 0.2128, "step": 18813 }, { "epoch": 0.3355687939214497, "grad_norm": 0.4648134708404541, "learning_rate": 4.2012819786216284e-05, "loss": 0.1587, "step": 18814 }, { "epoch": 0.3355866300431634, "grad_norm": 0.25184133648872375, "learning_rate": 4.2011679243786564e-05, "loss": 0.1823, "step": 18815 }, { "epoch": 0.3356044661648771, "grad_norm": 0.23803114891052246, "learning_rate": 4.2010538635413696e-05, "loss": 0.1664, "step": 18816 }, { "epoch": 0.3356223022865908, "grad_norm": 0.32847172021865845, "learning_rate": 4.2009397961102105e-05, "loss": 0.1394, "step": 18817 }, { "epoch": 0.3356401384083045, "grad_norm": 0.33813828229904175, "learning_rate": 4.200825722085621e-05, "loss": 0.2061, "step": 18818 }, { "epoch": 0.33565797453001817, "grad_norm": 0.21931812167167664, "learning_rate": 4.200711641468044e-05, "loss": 0.1318, "step": 18819 }, { "epoch": 0.3356758106517319, "grad_norm": 0.287255197763443, "learning_rate": 4.2005975542579215e-05, "loss": 0.1652, "step": 18820 }, { "epoch": 0.3356936467734456, "grad_norm": 0.28176724910736084, "learning_rate": 4.200483460455695e-05, "loss": 0.1197, "step": 18821 }, { "epoch": 0.3357114828951593, "grad_norm": 0.22769640386104584, "learning_rate": 4.200369360061808e-05, "loss": 0.1906, "step": 18822 }, { "epoch": 0.335729319016873, "grad_norm": 0.28410032391548157, "learning_rate": 4.200255253076701e-05, "loss": 0.1662, "step": 18823 }, { "epoch": 0.33574715513858666, "grad_norm": 0.3849674165248871, "learning_rate": 4.2001411395008175e-05, "loss": 0.2569, "step": 18824 }, { "epoch": 0.33576499126030035, "grad_norm": 0.3412947952747345, "learning_rate": 4.2000270193346e-05, "loss": 0.1621, "step": 18825 }, { "epoch": 0.33578282738201404, "grad_norm": 0.24359185993671417, "learning_rate": 4.199912892578491e-05, "loss": 0.1939, "step": 18826 }, { "epoch": 0.3358006635037277, "grad_norm": 0.21283644437789917, "learning_rate": 4.1997987592329325e-05, "loss": 0.1921, "step": 18827 }, { "epoch": 0.33581849962544147, "grad_norm": 0.3339155316352844, "learning_rate": 4.199684619298366e-05, "loss": 0.1767, "step": 18828 }, { "epoch": 0.33583633574715516, "grad_norm": 0.19576819241046906, "learning_rate": 4.199570472775236e-05, "loss": 0.1636, "step": 18829 }, { "epoch": 0.33585417186886884, "grad_norm": 0.3733035624027252, "learning_rate": 4.1994563196639835e-05, "loss": 0.121, "step": 18830 }, { "epoch": 0.33587200799058253, "grad_norm": 0.20671199262142181, "learning_rate": 4.199342159965051e-05, "loss": 0.1389, "step": 18831 }, { "epoch": 0.3358898441122962, "grad_norm": 0.29223716259002686, "learning_rate": 4.199227993678882e-05, "loss": 0.1829, "step": 18832 }, { "epoch": 0.3359076802340099, "grad_norm": 0.2457880973815918, "learning_rate": 4.199113820805918e-05, "loss": 0.1862, "step": 18833 }, { "epoch": 0.3359255163557236, "grad_norm": 0.1931418925523758, "learning_rate": 4.198999641346601e-05, "loss": 0.1103, "step": 18834 }, { "epoch": 0.3359433524774373, "grad_norm": 0.22421900928020477, "learning_rate": 4.198885455301376e-05, "loss": 0.1524, "step": 18835 }, { "epoch": 0.33596118859915103, "grad_norm": 0.1974003165960312, "learning_rate": 4.198771262670684e-05, "loss": 0.1435, "step": 18836 }, { "epoch": 0.3359790247208647, "grad_norm": 0.34305670857429504, "learning_rate": 4.198657063454967e-05, "loss": 0.1236, "step": 18837 }, { "epoch": 0.3359968608425784, "grad_norm": 0.20368368923664093, "learning_rate": 4.1985428576546694e-05, "loss": 0.1999, "step": 18838 }, { "epoch": 0.3360146969642921, "grad_norm": 0.2729019522666931, "learning_rate": 4.198428645270233e-05, "loss": 0.1967, "step": 18839 }, { "epoch": 0.3360325330860058, "grad_norm": 0.28014278411865234, "learning_rate": 4.1983144263021004e-05, "loss": 0.1935, "step": 18840 }, { "epoch": 0.33605036920771947, "grad_norm": 0.2641333043575287, "learning_rate": 4.1982002007507135e-05, "loss": 0.1514, "step": 18841 }, { "epoch": 0.33606820532943316, "grad_norm": 0.40066924691200256, "learning_rate": 4.198085968616517e-05, "loss": 0.1941, "step": 18842 }, { "epoch": 0.33608604145114684, "grad_norm": 0.3447968661785126, "learning_rate": 4.1979717298999534e-05, "loss": 0.1392, "step": 18843 }, { "epoch": 0.33610387757286053, "grad_norm": 0.31529876589775085, "learning_rate": 4.197857484601464e-05, "loss": 0.1946, "step": 18844 }, { "epoch": 0.3361217136945743, "grad_norm": 0.2976031005382538, "learning_rate": 4.197743232721493e-05, "loss": 0.1655, "step": 18845 }, { "epoch": 0.33613954981628796, "grad_norm": 0.25934863090515137, "learning_rate": 4.1976289742604827e-05, "loss": 0.1776, "step": 18846 }, { "epoch": 0.33615738593800165, "grad_norm": 0.2988622486591339, "learning_rate": 4.1975147092188754e-05, "loss": 0.199, "step": 18847 }, { "epoch": 0.33617522205971534, "grad_norm": 0.24079221487045288, "learning_rate": 4.197400437597115e-05, "loss": 0.1401, "step": 18848 }, { "epoch": 0.336193058181429, "grad_norm": 0.25547999143600464, "learning_rate": 4.1972861593956456e-05, "loss": 0.1551, "step": 18849 }, { "epoch": 0.3362108943031427, "grad_norm": 0.20761536061763763, "learning_rate": 4.197171874614908e-05, "loss": 0.1549, "step": 18850 }, { "epoch": 0.3362287304248564, "grad_norm": 0.29660919308662415, "learning_rate": 4.197057583255346e-05, "loss": 0.207, "step": 18851 }, { "epoch": 0.3362465665465701, "grad_norm": 0.2257426679134369, "learning_rate": 4.196943285317402e-05, "loss": 0.1694, "step": 18852 }, { "epoch": 0.33626440266828383, "grad_norm": 0.29529380798339844, "learning_rate": 4.196828980801521e-05, "loss": 0.1898, "step": 18853 }, { "epoch": 0.3362822387899975, "grad_norm": 0.29268166422843933, "learning_rate": 4.196714669708144e-05, "loss": 0.168, "step": 18854 }, { "epoch": 0.3363000749117112, "grad_norm": 0.26879534125328064, "learning_rate": 4.196600352037715e-05, "loss": 0.2051, "step": 18855 }, { "epoch": 0.3363179110334249, "grad_norm": 0.23126782476902008, "learning_rate": 4.196486027790677e-05, "loss": 0.1575, "step": 18856 }, { "epoch": 0.3363357471551386, "grad_norm": 0.2529090344905853, "learning_rate": 4.1963716969674736e-05, "loss": 0.1476, "step": 18857 }, { "epoch": 0.3363535832768523, "grad_norm": 0.26247191429138184, "learning_rate": 4.196257359568547e-05, "loss": 0.1397, "step": 18858 }, { "epoch": 0.33637141939856596, "grad_norm": 0.46113982796669006, "learning_rate": 4.196143015594342e-05, "loss": 0.1593, "step": 18859 }, { "epoch": 0.33638925552027965, "grad_norm": 0.28498685359954834, "learning_rate": 4.196028665045299e-05, "loss": 0.2064, "step": 18860 }, { "epoch": 0.33640709164199334, "grad_norm": 0.24048668146133423, "learning_rate": 4.195914307921865e-05, "loss": 0.1627, "step": 18861 }, { "epoch": 0.3364249277637071, "grad_norm": 0.20519572496414185, "learning_rate": 4.1957999442244803e-05, "loss": 0.1794, "step": 18862 }, { "epoch": 0.33644276388542077, "grad_norm": 0.2637174129486084, "learning_rate": 4.19568557395359e-05, "loss": 0.1464, "step": 18863 }, { "epoch": 0.33646060000713446, "grad_norm": 0.2308375984430313, "learning_rate": 4.1955711971096364e-05, "loss": 0.1759, "step": 18864 }, { "epoch": 0.33647843612884815, "grad_norm": 0.33197465538978577, "learning_rate": 4.1954568136930634e-05, "loss": 0.1432, "step": 18865 }, { "epoch": 0.33649627225056183, "grad_norm": 0.26629436016082764, "learning_rate": 4.1953424237043135e-05, "loss": 0.1714, "step": 18866 }, { "epoch": 0.3365141083722755, "grad_norm": 0.24002783000469208, "learning_rate": 4.1952280271438315e-05, "loss": 0.1623, "step": 18867 }, { "epoch": 0.3365319444939892, "grad_norm": 0.32216477394104004, "learning_rate": 4.1951136240120604e-05, "loss": 0.1486, "step": 18868 }, { "epoch": 0.3365497806157029, "grad_norm": 0.2146845906972885, "learning_rate": 4.194999214309443e-05, "loss": 0.18, "step": 18869 }, { "epoch": 0.33656761673741664, "grad_norm": 0.2908448576927185, "learning_rate": 4.194884798036423e-05, "loss": 0.1619, "step": 18870 }, { "epoch": 0.33658545285913033, "grad_norm": 0.19791792333126068, "learning_rate": 4.1947703751934444e-05, "loss": 0.1374, "step": 18871 }, { "epoch": 0.336603288980844, "grad_norm": 0.33534741401672363, "learning_rate": 4.19465594578095e-05, "loss": 0.1615, "step": 18872 }, { "epoch": 0.3366211251025577, "grad_norm": 0.5692492127418518, "learning_rate": 4.1945415097993846e-05, "loss": 0.1862, "step": 18873 }, { "epoch": 0.3366389612242714, "grad_norm": 0.20844325423240662, "learning_rate": 4.1944270672491904e-05, "loss": 0.1615, "step": 18874 }, { "epoch": 0.3366567973459851, "grad_norm": 0.2584037780761719, "learning_rate": 4.194312618130812e-05, "loss": 0.1539, "step": 18875 }, { "epoch": 0.33667463346769877, "grad_norm": 0.26378270983695984, "learning_rate": 4.1941981624446926e-05, "loss": 0.1824, "step": 18876 }, { "epoch": 0.33669246958941246, "grad_norm": 0.2567320168018341, "learning_rate": 4.194083700191276e-05, "loss": 0.2199, "step": 18877 }, { "epoch": 0.33671030571112615, "grad_norm": 0.2289617657661438, "learning_rate": 4.193969231371006e-05, "loss": 0.1683, "step": 18878 }, { "epoch": 0.3367281418328399, "grad_norm": 0.24370422959327698, "learning_rate": 4.193854755984327e-05, "loss": 0.1779, "step": 18879 }, { "epoch": 0.3367459779545536, "grad_norm": 0.29774782061576843, "learning_rate": 4.19374027403168e-05, "loss": 0.1998, "step": 18880 }, { "epoch": 0.33676381407626726, "grad_norm": 0.2203705757856369, "learning_rate": 4.193625785513512e-05, "loss": 0.1796, "step": 18881 }, { "epoch": 0.33678165019798095, "grad_norm": 0.2693965435028076, "learning_rate": 4.193511290430265e-05, "loss": 0.1321, "step": 18882 }, { "epoch": 0.33679948631969464, "grad_norm": 0.34647831320762634, "learning_rate": 4.193396788782383e-05, "loss": 0.2512, "step": 18883 }, { "epoch": 0.33681732244140833, "grad_norm": 0.30219072103500366, "learning_rate": 4.193282280570311e-05, "loss": 0.1705, "step": 18884 }, { "epoch": 0.336835158563122, "grad_norm": 0.3962326645851135, "learning_rate": 4.1931677657944925e-05, "loss": 0.1679, "step": 18885 }, { "epoch": 0.3368529946848357, "grad_norm": 0.28498876094818115, "learning_rate": 4.193053244455369e-05, "loss": 0.1264, "step": 18886 }, { "epoch": 0.33687083080654945, "grad_norm": 0.24789181351661682, "learning_rate": 4.192938716553388e-05, "loss": 0.1621, "step": 18887 }, { "epoch": 0.33688866692826314, "grad_norm": 0.27573931217193604, "learning_rate": 4.1928241820889914e-05, "loss": 0.1509, "step": 18888 }, { "epoch": 0.3369065030499768, "grad_norm": 0.27217361330986023, "learning_rate": 4.1927096410626234e-05, "loss": 0.2067, "step": 18889 }, { "epoch": 0.3369243391716905, "grad_norm": 0.2628231644630432, "learning_rate": 4.192595093474728e-05, "loss": 0.1161, "step": 18890 }, { "epoch": 0.3369421752934042, "grad_norm": 0.24457767605781555, "learning_rate": 4.19248053932575e-05, "loss": 0.1672, "step": 18891 }, { "epoch": 0.3369600114151179, "grad_norm": 0.24799640476703644, "learning_rate": 4.192365978616133e-05, "loss": 0.1568, "step": 18892 }, { "epoch": 0.3369778475368316, "grad_norm": 0.2729053497314453, "learning_rate": 4.1922514113463196e-05, "loss": 0.1928, "step": 18893 }, { "epoch": 0.33699568365854526, "grad_norm": 0.2368975132703781, "learning_rate": 4.192136837516757e-05, "loss": 0.1425, "step": 18894 }, { "epoch": 0.337013519780259, "grad_norm": 0.2980508804321289, "learning_rate": 4.192022257127887e-05, "loss": 0.1762, "step": 18895 }, { "epoch": 0.3370313559019727, "grad_norm": 0.2621249556541443, "learning_rate": 4.1919076701801536e-05, "loss": 0.1236, "step": 18896 }, { "epoch": 0.3370491920236864, "grad_norm": 0.36102551221847534, "learning_rate": 4.191793076674002e-05, "loss": 0.1448, "step": 18897 }, { "epoch": 0.33706702814540007, "grad_norm": 0.23766344785690308, "learning_rate": 4.191678476609876e-05, "loss": 0.1656, "step": 18898 }, { "epoch": 0.33708486426711376, "grad_norm": 0.2627926468849182, "learning_rate": 4.191563869988221e-05, "loss": 0.1361, "step": 18899 }, { "epoch": 0.33710270038882745, "grad_norm": 0.3469749093055725, "learning_rate": 4.191449256809479e-05, "loss": 0.1371, "step": 18900 }, { "epoch": 0.33712053651054114, "grad_norm": 0.4508765637874603, "learning_rate": 4.191334637074096e-05, "loss": 0.2091, "step": 18901 }, { "epoch": 0.3371383726322548, "grad_norm": 0.33867523074150085, "learning_rate": 4.191220010782515e-05, "loss": 0.182, "step": 18902 }, { "epoch": 0.3371562087539685, "grad_norm": 0.23206554353237152, "learning_rate": 4.191105377935182e-05, "loss": 0.1797, "step": 18903 }, { "epoch": 0.33717404487568226, "grad_norm": 0.2141391634941101, "learning_rate": 4.19099073853254e-05, "loss": 0.1529, "step": 18904 }, { "epoch": 0.33719188099739594, "grad_norm": 0.2958926856517792, "learning_rate": 4.1908760925750346e-05, "loss": 0.1403, "step": 18905 }, { "epoch": 0.33720971711910963, "grad_norm": 0.30149391293525696, "learning_rate": 4.190761440063109e-05, "loss": 0.1474, "step": 18906 }, { "epoch": 0.3372275532408233, "grad_norm": 0.2546897828578949, "learning_rate": 4.190646780997208e-05, "loss": 0.1548, "step": 18907 }, { "epoch": 0.337245389362537, "grad_norm": 0.21468181908130646, "learning_rate": 4.1905321153777765e-05, "loss": 0.1744, "step": 18908 }, { "epoch": 0.3372632254842507, "grad_norm": 0.1941651999950409, "learning_rate": 4.190417443205258e-05, "loss": 0.1395, "step": 18909 }, { "epoch": 0.3372810616059644, "grad_norm": 0.2625371813774109, "learning_rate": 4.1903027644800974e-05, "loss": 0.1615, "step": 18910 }, { "epoch": 0.33729889772767807, "grad_norm": 0.25621822476387024, "learning_rate": 4.1901880792027405e-05, "loss": 0.1241, "step": 18911 }, { "epoch": 0.3373167338493918, "grad_norm": 0.30450040102005005, "learning_rate": 4.1900733873736305e-05, "loss": 0.1912, "step": 18912 }, { "epoch": 0.3373345699711055, "grad_norm": 0.22386007010936737, "learning_rate": 4.189958688993212e-05, "loss": 0.1593, "step": 18913 }, { "epoch": 0.3373524060928192, "grad_norm": 0.24250273406505585, "learning_rate": 4.1898439840619294e-05, "loss": 0.1317, "step": 18914 }, { "epoch": 0.3373702422145329, "grad_norm": 0.27967870235443115, "learning_rate": 4.189729272580229e-05, "loss": 0.1873, "step": 18915 }, { "epoch": 0.33738807833624657, "grad_norm": 0.2978518009185791, "learning_rate": 4.189614554548554e-05, "loss": 0.1945, "step": 18916 }, { "epoch": 0.33740591445796025, "grad_norm": 0.28990477323532104, "learning_rate": 4.189499829967349e-05, "loss": 0.1886, "step": 18917 }, { "epoch": 0.33742375057967394, "grad_norm": 0.27220380306243896, "learning_rate": 4.18938509883706e-05, "loss": 0.1688, "step": 18918 }, { "epoch": 0.33744158670138763, "grad_norm": 0.369583398103714, "learning_rate": 4.1892703611581296e-05, "loss": 0.202, "step": 18919 }, { "epoch": 0.3374594228231013, "grad_norm": 0.21575228869915009, "learning_rate": 4.1891556169310045e-05, "loss": 0.1455, "step": 18920 }, { "epoch": 0.33747725894481506, "grad_norm": 0.25246548652648926, "learning_rate": 4.189040866156129e-05, "loss": 0.1414, "step": 18921 }, { "epoch": 0.33749509506652875, "grad_norm": 0.2783922255039215, "learning_rate": 4.188926108833948e-05, "loss": 0.1981, "step": 18922 }, { "epoch": 0.33751293118824244, "grad_norm": 0.300258070230484, "learning_rate": 4.188811344964905e-05, "loss": 0.179, "step": 18923 }, { "epoch": 0.3375307673099561, "grad_norm": 0.2789432108402252, "learning_rate": 4.1886965745494464e-05, "loss": 0.1912, "step": 18924 }, { "epoch": 0.3375486034316698, "grad_norm": 0.23929810523986816, "learning_rate": 4.188581797588017e-05, "loss": 0.1686, "step": 18925 }, { "epoch": 0.3375664395533835, "grad_norm": 0.41566815972328186, "learning_rate": 4.188467014081061e-05, "loss": 0.1489, "step": 18926 }, { "epoch": 0.3375842756750972, "grad_norm": 0.29327622056007385, "learning_rate": 4.1883522240290243e-05, "loss": 0.2405, "step": 18927 }, { "epoch": 0.3376021117968109, "grad_norm": 0.260224848985672, "learning_rate": 4.18823742743235e-05, "loss": 0.1393, "step": 18928 }, { "epoch": 0.3376199479185246, "grad_norm": 0.2737530767917633, "learning_rate": 4.188122624291485e-05, "loss": 0.1609, "step": 18929 }, { "epoch": 0.3376377840402383, "grad_norm": 0.3170583248138428, "learning_rate": 4.188007814606874e-05, "loss": 0.1808, "step": 18930 }, { "epoch": 0.337655620161952, "grad_norm": 0.3437529504299164, "learning_rate": 4.187892998378962e-05, "loss": 0.2453, "step": 18931 }, { "epoch": 0.3376734562836657, "grad_norm": 0.20769667625427246, "learning_rate": 4.1877781756081926e-05, "loss": 0.1341, "step": 18932 }, { "epoch": 0.3376912924053794, "grad_norm": 0.23483915627002716, "learning_rate": 4.187663346295013e-05, "loss": 0.1553, "step": 18933 }, { "epoch": 0.33770912852709306, "grad_norm": 0.31459829211235046, "learning_rate": 4.187548510439866e-05, "loss": 0.1393, "step": 18934 }, { "epoch": 0.33772696464880675, "grad_norm": 0.23074886202812195, "learning_rate": 4.1874336680431994e-05, "loss": 0.1554, "step": 18935 }, { "epoch": 0.33774480077052044, "grad_norm": 0.33776792883872986, "learning_rate": 4.187318819105457e-05, "loss": 0.1477, "step": 18936 }, { "epoch": 0.3377626368922341, "grad_norm": 0.2515294551849365, "learning_rate": 4.1872039636270836e-05, "loss": 0.1421, "step": 18937 }, { "epoch": 0.33778047301394787, "grad_norm": 0.206778421998024, "learning_rate": 4.187089101608526e-05, "loss": 0.174, "step": 18938 }, { "epoch": 0.33779830913566156, "grad_norm": 0.39762958884239197, "learning_rate": 4.1869742330502266e-05, "loss": 0.1512, "step": 18939 }, { "epoch": 0.33781614525737524, "grad_norm": 0.2478768229484558, "learning_rate": 4.186859357952634e-05, "loss": 0.1413, "step": 18940 }, { "epoch": 0.33783398137908893, "grad_norm": 0.29932278394699097, "learning_rate": 4.1867444763161905e-05, "loss": 0.2039, "step": 18941 }, { "epoch": 0.3378518175008026, "grad_norm": 0.26540690660476685, "learning_rate": 4.1866295881413434e-05, "loss": 0.1806, "step": 18942 }, { "epoch": 0.3378696536225163, "grad_norm": 0.20086121559143066, "learning_rate": 4.186514693428538e-05, "loss": 0.1507, "step": 18943 }, { "epoch": 0.33788748974423, "grad_norm": 0.1826017051935196, "learning_rate": 4.1863997921782185e-05, "loss": 0.1588, "step": 18944 }, { "epoch": 0.3379053258659437, "grad_norm": 0.22912786900997162, "learning_rate": 4.186284884390831e-05, "loss": 0.1602, "step": 18945 }, { "epoch": 0.3379231619876574, "grad_norm": 0.20957501232624054, "learning_rate": 4.186169970066821e-05, "loss": 0.1517, "step": 18946 }, { "epoch": 0.3379409981093711, "grad_norm": 0.2784116268157959, "learning_rate": 4.186055049206634e-05, "loss": 0.181, "step": 18947 }, { "epoch": 0.3379588342310848, "grad_norm": 0.27196410298347473, "learning_rate": 4.185940121810715e-05, "loss": 0.1576, "step": 18948 }, { "epoch": 0.3379766703527985, "grad_norm": 0.2507927417755127, "learning_rate": 4.18582518787951e-05, "loss": 0.2018, "step": 18949 }, { "epoch": 0.3379945064745122, "grad_norm": 0.21486423909664154, "learning_rate": 4.185710247413465e-05, "loss": 0.1532, "step": 18950 }, { "epoch": 0.33801234259622587, "grad_norm": 0.25737878680229187, "learning_rate": 4.185595300413023e-05, "loss": 0.1472, "step": 18951 }, { "epoch": 0.33803017871793956, "grad_norm": 0.23167891800403595, "learning_rate": 4.185480346878633e-05, "loss": 0.1656, "step": 18952 }, { "epoch": 0.33804801483965324, "grad_norm": 0.21269112825393677, "learning_rate": 4.1853653868107385e-05, "loss": 0.1753, "step": 18953 }, { "epoch": 0.338065850961367, "grad_norm": 0.3019649386405945, "learning_rate": 4.185250420209785e-05, "loss": 0.1394, "step": 18954 }, { "epoch": 0.3380836870830807, "grad_norm": 0.23444515466690063, "learning_rate": 4.18513544707622e-05, "loss": 0.1452, "step": 18955 }, { "epoch": 0.33810152320479436, "grad_norm": 0.24718981981277466, "learning_rate": 4.1850204674104875e-05, "loss": 0.1191, "step": 18956 }, { "epoch": 0.33811935932650805, "grad_norm": 0.2582503855228424, "learning_rate": 4.184905481213034e-05, "loss": 0.1952, "step": 18957 }, { "epoch": 0.33813719544822174, "grad_norm": 0.2911236882209778, "learning_rate": 4.184790488484304e-05, "loss": 0.2184, "step": 18958 }, { "epoch": 0.3381550315699354, "grad_norm": 0.28882530331611633, "learning_rate": 4.184675489224745e-05, "loss": 0.19, "step": 18959 }, { "epoch": 0.3381728676916491, "grad_norm": 0.32744723558425903, "learning_rate": 4.1845604834348015e-05, "loss": 0.21, "step": 18960 }, { "epoch": 0.3381907038133628, "grad_norm": 0.29121214151382446, "learning_rate": 4.18444547111492e-05, "loss": 0.1671, "step": 18961 }, { "epoch": 0.3382085399350765, "grad_norm": 0.22942860424518585, "learning_rate": 4.184330452265546e-05, "loss": 0.1702, "step": 18962 }, { "epoch": 0.33822637605679023, "grad_norm": 0.26729515194892883, "learning_rate": 4.1842154268871254e-05, "loss": 0.2024, "step": 18963 }, { "epoch": 0.3382442121785039, "grad_norm": 0.2039777934551239, "learning_rate": 4.184100394980104e-05, "loss": 0.1405, "step": 18964 }, { "epoch": 0.3382620483002176, "grad_norm": 0.26942625641822815, "learning_rate": 4.1839853565449275e-05, "loss": 0.178, "step": 18965 }, { "epoch": 0.3382798844219313, "grad_norm": 0.3534669578075409, "learning_rate": 4.183870311582043e-05, "loss": 0.1928, "step": 18966 }, { "epoch": 0.338297720543645, "grad_norm": 0.2680956721305847, "learning_rate": 4.183755260091895e-05, "loss": 0.1361, "step": 18967 }, { "epoch": 0.3383155566653587, "grad_norm": 0.3817254900932312, "learning_rate": 4.18364020207493e-05, "loss": 0.2145, "step": 18968 }, { "epoch": 0.33833339278707236, "grad_norm": 0.22404317557811737, "learning_rate": 4.1835251375315944e-05, "loss": 0.1644, "step": 18969 }, { "epoch": 0.33835122890878605, "grad_norm": 0.2065775841474533, "learning_rate": 4.183410066462333e-05, "loss": 0.1455, "step": 18970 }, { "epoch": 0.3383690650304998, "grad_norm": 0.22631201148033142, "learning_rate": 4.183294988867594e-05, "loss": 0.1675, "step": 18971 }, { "epoch": 0.3383869011522135, "grad_norm": 0.24366283416748047, "learning_rate": 4.1831799047478215e-05, "loss": 0.1697, "step": 18972 }, { "epoch": 0.33840473727392717, "grad_norm": 0.2905050814151764, "learning_rate": 4.183064814103463e-05, "loss": 0.1773, "step": 18973 }, { "epoch": 0.33842257339564086, "grad_norm": 0.20870763063430786, "learning_rate": 4.182949716934963e-05, "loss": 0.1723, "step": 18974 }, { "epoch": 0.33844040951735455, "grad_norm": 0.25424304604530334, "learning_rate": 4.182834613242769e-05, "loss": 0.1653, "step": 18975 }, { "epoch": 0.33845824563906823, "grad_norm": 0.19270065426826477, "learning_rate": 4.182719503027327e-05, "loss": 0.1929, "step": 18976 }, { "epoch": 0.3384760817607819, "grad_norm": 0.24269334971904755, "learning_rate": 4.182604386289083e-05, "loss": 0.1599, "step": 18977 }, { "epoch": 0.3384939178824956, "grad_norm": 0.38319239020347595, "learning_rate": 4.182489263028484e-05, "loss": 0.2316, "step": 18978 }, { "epoch": 0.3385117540042093, "grad_norm": 0.25000420212745667, "learning_rate": 4.1823741332459744e-05, "loss": 0.1317, "step": 18979 }, { "epoch": 0.33852959012592304, "grad_norm": 0.23778657615184784, "learning_rate": 4.182258996942001e-05, "loss": 0.1782, "step": 18980 }, { "epoch": 0.33854742624763673, "grad_norm": 0.2352931648492813, "learning_rate": 4.182143854117012e-05, "loss": 0.1787, "step": 18981 }, { "epoch": 0.3385652623693504, "grad_norm": 0.25397995114326477, "learning_rate": 4.1820287047714526e-05, "loss": 0.1149, "step": 18982 }, { "epoch": 0.3385830984910641, "grad_norm": 0.258090078830719, "learning_rate": 4.1819135489057684e-05, "loss": 0.1363, "step": 18983 }, { "epoch": 0.3386009346127778, "grad_norm": 0.2313193827867508, "learning_rate": 4.181798386520406e-05, "loss": 0.2097, "step": 18984 }, { "epoch": 0.3386187707344915, "grad_norm": 0.3382202386856079, "learning_rate": 4.181683217615813e-05, "loss": 0.2026, "step": 18985 }, { "epoch": 0.33863660685620517, "grad_norm": 0.31504493951797485, "learning_rate": 4.181568042192434e-05, "loss": 0.2082, "step": 18986 }, { "epoch": 0.33865444297791886, "grad_norm": 0.24363857507705688, "learning_rate": 4.181452860250717e-05, "loss": 0.1844, "step": 18987 }, { "epoch": 0.3386722790996326, "grad_norm": 0.26484745740890503, "learning_rate": 4.181337671791108e-05, "loss": 0.1689, "step": 18988 }, { "epoch": 0.3386901152213463, "grad_norm": 0.21356326341629028, "learning_rate": 4.1812224768140534e-05, "loss": 0.1451, "step": 18989 }, { "epoch": 0.33870795134306, "grad_norm": 0.43112072348594666, "learning_rate": 4.18110727532e-05, "loss": 0.2201, "step": 18990 }, { "epoch": 0.33872578746477366, "grad_norm": 0.22192348539829254, "learning_rate": 4.180992067309394e-05, "loss": 0.1517, "step": 18991 }, { "epoch": 0.33874362358648735, "grad_norm": 0.2938961684703827, "learning_rate": 4.1808768527826824e-05, "loss": 0.1768, "step": 18992 }, { "epoch": 0.33876145970820104, "grad_norm": 0.3255133032798767, "learning_rate": 4.1807616317403106e-05, "loss": 0.1877, "step": 18993 }, { "epoch": 0.33877929582991473, "grad_norm": 0.2388078272342682, "learning_rate": 4.1806464041827275e-05, "loss": 0.1559, "step": 18994 }, { "epoch": 0.3387971319516284, "grad_norm": 0.17218881845474243, "learning_rate": 4.1805311701103774e-05, "loss": 0.153, "step": 18995 }, { "epoch": 0.33881496807334216, "grad_norm": 0.2763029634952545, "learning_rate": 4.180415929523709e-05, "loss": 0.1719, "step": 18996 }, { "epoch": 0.33883280419505585, "grad_norm": 0.35350432991981506, "learning_rate": 4.180300682423167e-05, "loss": 0.1559, "step": 18997 }, { "epoch": 0.33885064031676954, "grad_norm": 0.3385790288448334, "learning_rate": 4.1801854288092004e-05, "loss": 0.1068, "step": 18998 }, { "epoch": 0.3388684764384832, "grad_norm": 0.2966400682926178, "learning_rate": 4.1800701686822544e-05, "loss": 0.1325, "step": 18999 }, { "epoch": 0.3388863125601969, "grad_norm": 0.21844731271266937, "learning_rate": 4.179954902042775e-05, "loss": 0.1666, "step": 19000 }, { "epoch": 0.3388863125601969, "eval_loss": 0.1632959097623825, "eval_runtime": 107.0663, "eval_samples_per_second": 9.564, "eval_steps_per_second": 1.597, "step": 19000 }, { "epoch": 0.3389041486819106, "grad_norm": 0.25229185819625854, "learning_rate": 4.179839628891211e-05, "loss": 0.1547, "step": 19001 }, { "epoch": 0.3389219848036243, "grad_norm": 0.31327369809150696, "learning_rate": 4.179724349228009e-05, "loss": 0.1199, "step": 19002 }, { "epoch": 0.338939820925338, "grad_norm": 0.32854384183883667, "learning_rate": 4.179609063053615e-05, "loss": 0.1869, "step": 19003 }, { "epoch": 0.33895765704705166, "grad_norm": 0.3303373157978058, "learning_rate": 4.1794937703684754e-05, "loss": 0.1761, "step": 19004 }, { "epoch": 0.3389754931687654, "grad_norm": 0.2542996406555176, "learning_rate": 4.179378471173039e-05, "loss": 0.1598, "step": 19005 }, { "epoch": 0.3389933292904791, "grad_norm": 0.3003520965576172, "learning_rate": 4.1792631654677506e-05, "loss": 0.1924, "step": 19006 }, { "epoch": 0.3390111654121928, "grad_norm": 0.27404484152793884, "learning_rate": 4.1791478532530584e-05, "loss": 0.1501, "step": 19007 }, { "epoch": 0.33902900153390647, "grad_norm": 0.22072987258434296, "learning_rate": 4.179032534529409e-05, "loss": 0.1388, "step": 19008 }, { "epoch": 0.33904683765562016, "grad_norm": 0.2703891396522522, "learning_rate": 4.17891720929725e-05, "loss": 0.1694, "step": 19009 }, { "epoch": 0.33906467377733385, "grad_norm": 0.33079418540000916, "learning_rate": 4.178801877557028e-05, "loss": 0.1501, "step": 19010 }, { "epoch": 0.33908250989904754, "grad_norm": 0.40734580159187317, "learning_rate": 4.17868653930919e-05, "loss": 0.2557, "step": 19011 }, { "epoch": 0.3391003460207612, "grad_norm": 0.23386681079864502, "learning_rate": 4.1785711945541834e-05, "loss": 0.1536, "step": 19012 }, { "epoch": 0.33911818214247497, "grad_norm": 0.24409335851669312, "learning_rate": 4.178455843292455e-05, "loss": 0.201, "step": 19013 }, { "epoch": 0.33913601826418865, "grad_norm": 0.2245641052722931, "learning_rate": 4.178340485524451e-05, "loss": 0.1115, "step": 19014 }, { "epoch": 0.33915385438590234, "grad_norm": 0.8578737378120422, "learning_rate": 4.178225121250621e-05, "loss": 0.1742, "step": 19015 }, { "epoch": 0.33917169050761603, "grad_norm": 0.20405761897563934, "learning_rate": 4.1781097504714106e-05, "loss": 0.1538, "step": 19016 }, { "epoch": 0.3391895266293297, "grad_norm": 0.27465230226516724, "learning_rate": 4.177994373187266e-05, "loss": 0.1398, "step": 19017 }, { "epoch": 0.3392073627510434, "grad_norm": 0.24772217869758606, "learning_rate": 4.177878989398637e-05, "loss": 0.1208, "step": 19018 }, { "epoch": 0.3392251988727571, "grad_norm": 0.24304911494255066, "learning_rate": 4.1777635991059686e-05, "loss": 0.1103, "step": 19019 }, { "epoch": 0.3392430349944708, "grad_norm": 0.3815910518169403, "learning_rate": 4.17764820230971e-05, "loss": 0.1725, "step": 19020 }, { "epoch": 0.33926087111618447, "grad_norm": 0.27894261479377747, "learning_rate": 4.177532799010307e-05, "loss": 0.1723, "step": 19021 }, { "epoch": 0.3392787072378982, "grad_norm": 0.25985851883888245, "learning_rate": 4.1774173892082066e-05, "loss": 0.1507, "step": 19022 }, { "epoch": 0.3392965433596119, "grad_norm": 0.2939731180667877, "learning_rate": 4.177301972903858e-05, "loss": 0.1664, "step": 19023 }, { "epoch": 0.3393143794813256, "grad_norm": 0.22582286596298218, "learning_rate": 4.1771865500977084e-05, "loss": 0.2168, "step": 19024 }, { "epoch": 0.3393322156030393, "grad_norm": 0.2266026735305786, "learning_rate": 4.1770711207902034e-05, "loss": 0.1583, "step": 19025 }, { "epoch": 0.33935005172475297, "grad_norm": 0.33099600672721863, "learning_rate": 4.176955684981792e-05, "loss": 0.2193, "step": 19026 }, { "epoch": 0.33936788784646665, "grad_norm": 0.22976182401180267, "learning_rate": 4.1768402426729205e-05, "loss": 0.1169, "step": 19027 }, { "epoch": 0.33938572396818034, "grad_norm": 0.2766157388687134, "learning_rate": 4.176724793864037e-05, "loss": 0.2064, "step": 19028 }, { "epoch": 0.33940356008989403, "grad_norm": 0.22305607795715332, "learning_rate": 4.176609338555589e-05, "loss": 0.1565, "step": 19029 }, { "epoch": 0.3394213962116078, "grad_norm": 0.2595331370830536, "learning_rate": 4.176493876748025e-05, "loss": 0.1679, "step": 19030 }, { "epoch": 0.33943923233332146, "grad_norm": 0.29117804765701294, "learning_rate": 4.176378408441791e-05, "loss": 0.1959, "step": 19031 }, { "epoch": 0.33945706845503515, "grad_norm": 0.31189867854118347, "learning_rate": 4.1762629336373356e-05, "loss": 0.1683, "step": 19032 }, { "epoch": 0.33947490457674884, "grad_norm": 0.293280690908432, "learning_rate": 4.176147452335106e-05, "loss": 0.1445, "step": 19033 }, { "epoch": 0.3394927406984625, "grad_norm": 0.29017212986946106, "learning_rate": 4.17603196453555e-05, "loss": 0.1898, "step": 19034 }, { "epoch": 0.3395105768201762, "grad_norm": 0.2960911989212036, "learning_rate": 4.1759164702391155e-05, "loss": 0.1466, "step": 19035 }, { "epoch": 0.3395284129418899, "grad_norm": 0.28645211458206177, "learning_rate": 4.17580096944625e-05, "loss": 0.1503, "step": 19036 }, { "epoch": 0.3395462490636036, "grad_norm": 0.2455846220254898, "learning_rate": 4.175685462157401e-05, "loss": 0.1521, "step": 19037 }, { "epoch": 0.3395640851853173, "grad_norm": 0.285341739654541, "learning_rate": 4.1755699483730157e-05, "loss": 0.1525, "step": 19038 }, { "epoch": 0.339581921307031, "grad_norm": 0.25104281306266785, "learning_rate": 4.1754544280935426e-05, "loss": 0.1301, "step": 19039 }, { "epoch": 0.3395997574287447, "grad_norm": 0.3304446041584015, "learning_rate": 4.17533890131943e-05, "loss": 0.1836, "step": 19040 }, { "epoch": 0.3396175935504584, "grad_norm": 0.34443068504333496, "learning_rate": 4.175223368051126e-05, "loss": 0.1941, "step": 19041 }, { "epoch": 0.3396354296721721, "grad_norm": 0.3427804410457611, "learning_rate": 4.1751078282890757e-05, "loss": 0.1338, "step": 19042 }, { "epoch": 0.3396532657938858, "grad_norm": 0.2696840763092041, "learning_rate": 4.174992282033729e-05, "loss": 0.1887, "step": 19043 }, { "epoch": 0.33967110191559946, "grad_norm": 0.24931056797504425, "learning_rate": 4.174876729285536e-05, "loss": 0.126, "step": 19044 }, { "epoch": 0.33968893803731315, "grad_norm": 0.28129658102989197, "learning_rate": 4.1747611700449406e-05, "loss": 0.1261, "step": 19045 }, { "epoch": 0.33970677415902684, "grad_norm": 0.23587551712989807, "learning_rate": 4.174645604312393e-05, "loss": 0.1395, "step": 19046 }, { "epoch": 0.3397246102807406, "grad_norm": 0.3077877461910248, "learning_rate": 4.17453003208834e-05, "loss": 0.1046, "step": 19047 }, { "epoch": 0.33974244640245427, "grad_norm": 0.26231181621551514, "learning_rate": 4.174414453373231e-05, "loss": 0.1976, "step": 19048 }, { "epoch": 0.33976028252416796, "grad_norm": 0.2857707142829895, "learning_rate": 4.174298868167512e-05, "loss": 0.1199, "step": 19049 }, { "epoch": 0.33977811864588164, "grad_norm": 0.3958567678928375, "learning_rate": 4.1741832764716335e-05, "loss": 0.2751, "step": 19050 }, { "epoch": 0.33979595476759533, "grad_norm": 0.2019142359495163, "learning_rate": 4.174067678286042e-05, "loss": 0.1544, "step": 19051 }, { "epoch": 0.339813790889309, "grad_norm": 0.2342025339603424, "learning_rate": 4.173952073611186e-05, "loss": 0.1301, "step": 19052 }, { "epoch": 0.3398316270110227, "grad_norm": 0.3273864686489105, "learning_rate": 4.173836462447514e-05, "loss": 0.1369, "step": 19053 }, { "epoch": 0.3398494631327364, "grad_norm": 0.2864091992378235, "learning_rate": 4.173720844795473e-05, "loss": 0.1333, "step": 19054 }, { "epoch": 0.33986729925445014, "grad_norm": 0.3018883466720581, "learning_rate": 4.173605220655512e-05, "loss": 0.132, "step": 19055 }, { "epoch": 0.3398851353761638, "grad_norm": 0.2493281364440918, "learning_rate": 4.17348959002808e-05, "loss": 0.1469, "step": 19056 }, { "epoch": 0.3399029714978775, "grad_norm": 0.3162676990032196, "learning_rate": 4.1733739529136234e-05, "loss": 0.2015, "step": 19057 }, { "epoch": 0.3399208076195912, "grad_norm": 0.3037779629230499, "learning_rate": 4.1732583093125914e-05, "loss": 0.1714, "step": 19058 }, { "epoch": 0.3399386437413049, "grad_norm": 0.34416380524635315, "learning_rate": 4.173142659225433e-05, "loss": 0.1774, "step": 19059 }, { "epoch": 0.3399564798630186, "grad_norm": 0.2441510409116745, "learning_rate": 4.1730270026525955e-05, "loss": 0.165, "step": 19060 }, { "epoch": 0.33997431598473227, "grad_norm": 0.3016694188117981, "learning_rate": 4.1729113395945276e-05, "loss": 0.1479, "step": 19061 }, { "epoch": 0.33999215210644596, "grad_norm": 0.32537052035331726, "learning_rate": 4.172795670051677e-05, "loss": 0.185, "step": 19062 }, { "epoch": 0.34000998822815964, "grad_norm": 0.18432371318340302, "learning_rate": 4.1726799940244924e-05, "loss": 0.122, "step": 19063 }, { "epoch": 0.3400278243498734, "grad_norm": 0.2336127758026123, "learning_rate": 4.172564311513423e-05, "loss": 0.1561, "step": 19064 }, { "epoch": 0.3400456604715871, "grad_norm": 0.23246034979820251, "learning_rate": 4.172448622518917e-05, "loss": 0.1427, "step": 19065 }, { "epoch": 0.34006349659330076, "grad_norm": 0.3123519718647003, "learning_rate": 4.172332927041422e-05, "loss": 0.1826, "step": 19066 }, { "epoch": 0.34008133271501445, "grad_norm": 0.30995211005210876, "learning_rate": 4.172217225081387e-05, "loss": 0.1823, "step": 19067 }, { "epoch": 0.34009916883672814, "grad_norm": 0.3009769022464752, "learning_rate": 4.1721015166392606e-05, "loss": 0.1199, "step": 19068 }, { "epoch": 0.3401170049584418, "grad_norm": 0.3721808195114136, "learning_rate": 4.171985801715491e-05, "loss": 0.169, "step": 19069 }, { "epoch": 0.3401348410801555, "grad_norm": 0.26624995470046997, "learning_rate": 4.171870080310527e-05, "loss": 0.1986, "step": 19070 }, { "epoch": 0.3401526772018692, "grad_norm": 0.24667924642562866, "learning_rate": 4.171754352424817e-05, "loss": 0.1645, "step": 19071 }, { "epoch": 0.34017051332358295, "grad_norm": 0.2952944040298462, "learning_rate": 4.17163861805881e-05, "loss": 0.1941, "step": 19072 }, { "epoch": 0.34018834944529663, "grad_norm": 0.2621082067489624, "learning_rate": 4.1715228772129546e-05, "loss": 0.1345, "step": 19073 }, { "epoch": 0.3402061855670103, "grad_norm": 0.3187590539455414, "learning_rate": 4.1714071298876987e-05, "loss": 0.1682, "step": 19074 }, { "epoch": 0.340224021688724, "grad_norm": 0.1794048696756363, "learning_rate": 4.171291376083492e-05, "loss": 0.138, "step": 19075 }, { "epoch": 0.3402418578104377, "grad_norm": 0.24293185770511627, "learning_rate": 4.171175615800782e-05, "loss": 0.2048, "step": 19076 }, { "epoch": 0.3402596939321514, "grad_norm": 0.28766682744026184, "learning_rate": 4.1710598490400175e-05, "loss": 0.143, "step": 19077 }, { "epoch": 0.3402775300538651, "grad_norm": 0.2681846618652344, "learning_rate": 4.170944075801649e-05, "loss": 0.1567, "step": 19078 }, { "epoch": 0.34029536617557876, "grad_norm": 0.27746152877807617, "learning_rate": 4.1708282960861245e-05, "loss": 0.1827, "step": 19079 }, { "epoch": 0.34031320229729245, "grad_norm": 0.30340448021888733, "learning_rate": 4.170712509893892e-05, "loss": 0.1978, "step": 19080 }, { "epoch": 0.3403310384190062, "grad_norm": 0.24910910427570343, "learning_rate": 4.170596717225401e-05, "loss": 0.1615, "step": 19081 }, { "epoch": 0.3403488745407199, "grad_norm": 0.32467159628868103, "learning_rate": 4.1704809180810986e-05, "loss": 0.1925, "step": 19082 }, { "epoch": 0.34036671066243357, "grad_norm": 0.28001829981803894, "learning_rate": 4.1703651124614374e-05, "loss": 0.1825, "step": 19083 }, { "epoch": 0.34038454678414726, "grad_norm": 0.28363335132598877, "learning_rate": 4.1702493003668625e-05, "loss": 0.0969, "step": 19084 }, { "epoch": 0.34040238290586095, "grad_norm": 0.2827285826206207, "learning_rate": 4.170133481797825e-05, "loss": 0.1875, "step": 19085 }, { "epoch": 0.34042021902757463, "grad_norm": 0.2151196002960205, "learning_rate": 4.170017656754773e-05, "loss": 0.1284, "step": 19086 }, { "epoch": 0.3404380551492883, "grad_norm": 0.3551812171936035, "learning_rate": 4.169901825238156e-05, "loss": 0.1745, "step": 19087 }, { "epoch": 0.340455891271002, "grad_norm": 0.19007790088653564, "learning_rate": 4.169785987248423e-05, "loss": 0.1384, "step": 19088 }, { "epoch": 0.34047372739271575, "grad_norm": 0.2538352906703949, "learning_rate": 4.169670142786023e-05, "loss": 0.1586, "step": 19089 }, { "epoch": 0.34049156351442944, "grad_norm": 0.28654947876930237, "learning_rate": 4.169554291851404e-05, "loss": 0.1656, "step": 19090 }, { "epoch": 0.34050939963614313, "grad_norm": 0.185479074716568, "learning_rate": 4.1694384344450164e-05, "loss": 0.1138, "step": 19091 }, { "epoch": 0.3405272357578568, "grad_norm": 0.25111615657806396, "learning_rate": 4.169322570567309e-05, "loss": 0.1756, "step": 19092 }, { "epoch": 0.3405450718795705, "grad_norm": 0.26057636737823486, "learning_rate": 4.16920670021873e-05, "loss": 0.2197, "step": 19093 }, { "epoch": 0.3405629080012842, "grad_norm": 0.3298758566379547, "learning_rate": 4.16909082339973e-05, "loss": 0.159, "step": 19094 }, { "epoch": 0.3405807441229979, "grad_norm": 0.31102806329727173, "learning_rate": 4.168974940110757e-05, "loss": 0.2075, "step": 19095 }, { "epoch": 0.34059858024471157, "grad_norm": 0.20528429746627808, "learning_rate": 4.168859050352261e-05, "loss": 0.1812, "step": 19096 }, { "epoch": 0.3406164163664253, "grad_norm": 0.2151535302400589, "learning_rate": 4.168743154124691e-05, "loss": 0.1337, "step": 19097 }, { "epoch": 0.340634252488139, "grad_norm": 0.22697141766548157, "learning_rate": 4.168627251428496e-05, "loss": 0.1541, "step": 19098 }, { "epoch": 0.3406520886098527, "grad_norm": 0.29048141837120056, "learning_rate": 4.168511342264125e-05, "loss": 0.1924, "step": 19099 }, { "epoch": 0.3406699247315664, "grad_norm": 0.3127744793891907, "learning_rate": 4.168395426632029e-05, "loss": 0.1863, "step": 19100 }, { "epoch": 0.34068776085328006, "grad_norm": 0.24152883887290955, "learning_rate": 4.168279504532655e-05, "loss": 0.1631, "step": 19101 }, { "epoch": 0.34070559697499375, "grad_norm": 0.2404649704694748, "learning_rate": 4.168163575966454e-05, "loss": 0.1684, "step": 19102 }, { "epoch": 0.34072343309670744, "grad_norm": 0.31002750992774963, "learning_rate": 4.1680476409338744e-05, "loss": 0.1714, "step": 19103 }, { "epoch": 0.34074126921842113, "grad_norm": 0.2534800171852112, "learning_rate": 4.1679316994353664e-05, "loss": 0.177, "step": 19104 }, { "epoch": 0.3407591053401348, "grad_norm": 0.275722473859787, "learning_rate": 4.1678157514713786e-05, "loss": 0.1377, "step": 19105 }, { "epoch": 0.34077694146184856, "grad_norm": 0.21818099915981293, "learning_rate": 4.167699797042362e-05, "loss": 0.1374, "step": 19106 }, { "epoch": 0.34079477758356225, "grad_norm": 0.24218273162841797, "learning_rate": 4.1675838361487637e-05, "loss": 0.1574, "step": 19107 }, { "epoch": 0.34081261370527594, "grad_norm": 0.21971909701824188, "learning_rate": 4.167467868791034e-05, "loss": 0.1436, "step": 19108 }, { "epoch": 0.3408304498269896, "grad_norm": 0.3221576511859894, "learning_rate": 4.167351894969624e-05, "loss": 0.1952, "step": 19109 }, { "epoch": 0.3408482859487033, "grad_norm": 0.34070682525634766, "learning_rate": 4.167235914684982e-05, "loss": 0.2105, "step": 19110 }, { "epoch": 0.340866122070417, "grad_norm": 0.21998755633831024, "learning_rate": 4.167119927937558e-05, "loss": 0.1448, "step": 19111 }, { "epoch": 0.3408839581921307, "grad_norm": 0.34537848830223083, "learning_rate": 4.167003934727801e-05, "loss": 0.2052, "step": 19112 }, { "epoch": 0.3409017943138444, "grad_norm": 0.33918866515159607, "learning_rate": 4.166887935056162e-05, "loss": 0.1506, "step": 19113 }, { "epoch": 0.3409196304355581, "grad_norm": 0.23078452050685883, "learning_rate": 4.166771928923088e-05, "loss": 0.1933, "step": 19114 }, { "epoch": 0.3409374665572718, "grad_norm": 0.31279078125953674, "learning_rate": 4.1666559163290307e-05, "loss": 0.1446, "step": 19115 }, { "epoch": 0.3409553026789855, "grad_norm": 0.2269740104675293, "learning_rate": 4.16653989727444e-05, "loss": 0.1582, "step": 19116 }, { "epoch": 0.3409731388006992, "grad_norm": 0.3424963355064392, "learning_rate": 4.166423871759765e-05, "loss": 0.1955, "step": 19117 }, { "epoch": 0.34099097492241287, "grad_norm": 0.3185013234615326, "learning_rate": 4.166307839785456e-05, "loss": 0.1999, "step": 19118 }, { "epoch": 0.34100881104412656, "grad_norm": 0.2266213446855545, "learning_rate": 4.1661918013519606e-05, "loss": 0.1444, "step": 19119 }, { "epoch": 0.34102664716584025, "grad_norm": 0.2339271605014801, "learning_rate": 4.166075756459732e-05, "loss": 0.1623, "step": 19120 }, { "epoch": 0.34104448328755393, "grad_norm": 0.33711886405944824, "learning_rate": 4.165959705109217e-05, "loss": 0.182, "step": 19121 }, { "epoch": 0.3410623194092676, "grad_norm": 0.1971094012260437, "learning_rate": 4.1658436473008676e-05, "loss": 0.1492, "step": 19122 }, { "epoch": 0.34108015553098137, "grad_norm": 0.21723458170890808, "learning_rate": 4.165727583035133e-05, "loss": 0.1673, "step": 19123 }, { "epoch": 0.34109799165269505, "grad_norm": 0.37158021330833435, "learning_rate": 4.165611512312463e-05, "loss": 0.1622, "step": 19124 }, { "epoch": 0.34111582777440874, "grad_norm": 0.2687208950519562, "learning_rate": 4.1654954351333065e-05, "loss": 0.1682, "step": 19125 }, { "epoch": 0.34113366389612243, "grad_norm": 0.27835360169410706, "learning_rate": 4.1653793514981156e-05, "loss": 0.1718, "step": 19126 }, { "epoch": 0.3411515000178361, "grad_norm": 0.2776113748550415, "learning_rate": 4.1652632614073383e-05, "loss": 0.2274, "step": 19127 }, { "epoch": 0.3411693361395498, "grad_norm": 0.23038287460803986, "learning_rate": 4.165147164861426e-05, "loss": 0.17, "step": 19128 }, { "epoch": 0.3411871722612635, "grad_norm": 0.2833652198314667, "learning_rate": 4.165031061860827e-05, "loss": 0.1785, "step": 19129 }, { "epoch": 0.3412050083829772, "grad_norm": 0.2975352108478546, "learning_rate": 4.1649149524059936e-05, "loss": 0.1668, "step": 19130 }, { "epoch": 0.3412228445046909, "grad_norm": 0.3174719214439392, "learning_rate": 4.1647988364973756e-05, "loss": 0.1964, "step": 19131 }, { "epoch": 0.3412406806264046, "grad_norm": 0.211166113615036, "learning_rate": 4.16468271413542e-05, "loss": 0.1154, "step": 19132 }, { "epoch": 0.3412585167481183, "grad_norm": 0.2222771942615509, "learning_rate": 4.1645665853205816e-05, "loss": 0.1886, "step": 19133 }, { "epoch": 0.341276352869832, "grad_norm": 0.21896757185459137, "learning_rate": 4.164450450053307e-05, "loss": 0.1477, "step": 19134 }, { "epoch": 0.3412941889915457, "grad_norm": 0.2650519013404846, "learning_rate": 4.164334308334048e-05, "loss": 0.1594, "step": 19135 }, { "epoch": 0.34131202511325937, "grad_norm": 0.2698211073875427, "learning_rate": 4.1642181601632534e-05, "loss": 0.1415, "step": 19136 }, { "epoch": 0.34132986123497305, "grad_norm": 0.28691112995147705, "learning_rate": 4.164102005541376e-05, "loss": 0.1493, "step": 19137 }, { "epoch": 0.34134769735668674, "grad_norm": 0.37525513768196106, "learning_rate": 4.163985844468863e-05, "loss": 0.2048, "step": 19138 }, { "epoch": 0.34136553347840043, "grad_norm": 0.18402189016342163, "learning_rate": 4.163869676946167e-05, "loss": 0.1517, "step": 19139 }, { "epoch": 0.3413833696001142, "grad_norm": 0.2589471638202667, "learning_rate": 4.1637535029737367e-05, "loss": 0.2165, "step": 19140 }, { "epoch": 0.34140120572182786, "grad_norm": 0.3775065839290619, "learning_rate": 4.1636373225520245e-05, "loss": 0.2236, "step": 19141 }, { "epoch": 0.34141904184354155, "grad_norm": 0.32576048374176025, "learning_rate": 4.163521135681478e-05, "loss": 0.2056, "step": 19142 }, { "epoch": 0.34143687796525524, "grad_norm": 0.5183864235877991, "learning_rate": 4.16340494236255e-05, "loss": 0.2, "step": 19143 }, { "epoch": 0.3414547140869689, "grad_norm": 0.23035433888435364, "learning_rate": 4.1632887425956894e-05, "loss": 0.1526, "step": 19144 }, { "epoch": 0.3414725502086826, "grad_norm": 0.34884533286094666, "learning_rate": 4.163172536381347e-05, "loss": 0.1531, "step": 19145 }, { "epoch": 0.3414903863303963, "grad_norm": 0.24414996802806854, "learning_rate": 4.163056323719974e-05, "loss": 0.1502, "step": 19146 }, { "epoch": 0.34150822245211, "grad_norm": 0.2550983130931854, "learning_rate": 4.16294010461202e-05, "loss": 0.189, "step": 19147 }, { "epoch": 0.34152605857382373, "grad_norm": 0.22872644662857056, "learning_rate": 4.162823879057935e-05, "loss": 0.177, "step": 19148 }, { "epoch": 0.3415438946955374, "grad_norm": 0.2678053379058838, "learning_rate": 4.162707647058172e-05, "loss": 0.1767, "step": 19149 }, { "epoch": 0.3415617308172511, "grad_norm": 0.26402002573013306, "learning_rate": 4.162591408613179e-05, "loss": 0.1402, "step": 19150 }, { "epoch": 0.3415795669389648, "grad_norm": 0.34151071310043335, "learning_rate": 4.1624751637234075e-05, "loss": 0.1846, "step": 19151 }, { "epoch": 0.3415974030606785, "grad_norm": 0.3045713007450104, "learning_rate": 4.162358912389308e-05, "loss": 0.178, "step": 19152 }, { "epoch": 0.34161523918239217, "grad_norm": 0.3080938756465912, "learning_rate": 4.1622426546113306e-05, "loss": 0.1307, "step": 19153 }, { "epoch": 0.34163307530410586, "grad_norm": 0.34643495082855225, "learning_rate": 4.1621263903899276e-05, "loss": 0.1692, "step": 19154 }, { "epoch": 0.34165091142581955, "grad_norm": 0.2636805772781372, "learning_rate": 4.162010119725548e-05, "loss": 0.1668, "step": 19155 }, { "epoch": 0.3416687475475333, "grad_norm": 0.40771761536598206, "learning_rate": 4.161893842618643e-05, "loss": 0.203, "step": 19156 }, { "epoch": 0.341686583669247, "grad_norm": 0.346594899892807, "learning_rate": 4.1617775590696645e-05, "loss": 0.2061, "step": 19157 }, { "epoch": 0.34170441979096067, "grad_norm": 0.26792019605636597, "learning_rate": 4.1616612690790615e-05, "loss": 0.1563, "step": 19158 }, { "epoch": 0.34172225591267436, "grad_norm": 0.2884537875652313, "learning_rate": 4.161544972647285e-05, "loss": 0.1703, "step": 19159 }, { "epoch": 0.34174009203438804, "grad_norm": 0.2372352033853531, "learning_rate": 4.161428669774787e-05, "loss": 0.1776, "step": 19160 }, { "epoch": 0.34175792815610173, "grad_norm": 0.26953575015068054, "learning_rate": 4.1613123604620174e-05, "loss": 0.1499, "step": 19161 }, { "epoch": 0.3417757642778154, "grad_norm": 0.3464611768722534, "learning_rate": 4.161196044709428e-05, "loss": 0.1914, "step": 19162 }, { "epoch": 0.3417936003995291, "grad_norm": 0.31093481183052063, "learning_rate": 4.161079722517468e-05, "loss": 0.14, "step": 19163 }, { "epoch": 0.3418114365212428, "grad_norm": 0.24490484595298767, "learning_rate": 4.160963393886589e-05, "loss": 0.2012, "step": 19164 }, { "epoch": 0.34182927264295654, "grad_norm": 0.36950263381004333, "learning_rate": 4.160847058817243e-05, "loss": 0.2236, "step": 19165 }, { "epoch": 0.3418471087646702, "grad_norm": 0.39886242151260376, "learning_rate": 4.1607307173098796e-05, "loss": 0.1388, "step": 19166 }, { "epoch": 0.3418649448863839, "grad_norm": 0.25907090306282043, "learning_rate": 4.1606143693649516e-05, "loss": 0.1912, "step": 19167 }, { "epoch": 0.3418827810080976, "grad_norm": 0.2528982162475586, "learning_rate": 4.1604980149829074e-05, "loss": 0.2227, "step": 19168 }, { "epoch": 0.3419006171298113, "grad_norm": 0.18718492984771729, "learning_rate": 4.1603816541642e-05, "loss": 0.124, "step": 19169 }, { "epoch": 0.341918453251525, "grad_norm": 0.2484995424747467, "learning_rate": 4.16026528690928e-05, "loss": 0.2193, "step": 19170 }, { "epoch": 0.34193628937323867, "grad_norm": 0.36052754521369934, "learning_rate": 4.160148913218598e-05, "loss": 0.2217, "step": 19171 }, { "epoch": 0.34195412549495235, "grad_norm": 0.2591293156147003, "learning_rate": 4.1600325330926046e-05, "loss": 0.2093, "step": 19172 }, { "epoch": 0.3419719616166661, "grad_norm": 0.23855255544185638, "learning_rate": 4.159916146531753e-05, "loss": 0.1235, "step": 19173 }, { "epoch": 0.3419897977383798, "grad_norm": 0.2903277277946472, "learning_rate": 4.159799753536493e-05, "loss": 0.1701, "step": 19174 }, { "epoch": 0.3420076338600935, "grad_norm": 0.23712298274040222, "learning_rate": 4.159683354107275e-05, "loss": 0.1645, "step": 19175 }, { "epoch": 0.34202546998180716, "grad_norm": 0.27789393067359924, "learning_rate": 4.159566948244552e-05, "loss": 0.1375, "step": 19176 }, { "epoch": 0.34204330610352085, "grad_norm": 0.3419671356678009, "learning_rate": 4.159450535948773e-05, "loss": 0.1699, "step": 19177 }, { "epoch": 0.34206114222523454, "grad_norm": 0.25967729091644287, "learning_rate": 4.1593341172203925e-05, "loss": 0.1893, "step": 19178 }, { "epoch": 0.3420789783469482, "grad_norm": 0.2332470715045929, "learning_rate": 4.159217692059858e-05, "loss": 0.1734, "step": 19179 }, { "epoch": 0.3420968144686619, "grad_norm": 0.26110801100730896, "learning_rate": 4.1591012604676236e-05, "loss": 0.1873, "step": 19180 }, { "epoch": 0.3421146505903756, "grad_norm": 0.26965662837028503, "learning_rate": 4.1589848224441394e-05, "loss": 0.1876, "step": 19181 }, { "epoch": 0.34213248671208935, "grad_norm": 0.29505789279937744, "learning_rate": 4.158868377989858e-05, "loss": 0.1499, "step": 19182 }, { "epoch": 0.34215032283380303, "grad_norm": 0.2164822220802307, "learning_rate": 4.158751927105229e-05, "loss": 0.1464, "step": 19183 }, { "epoch": 0.3421681589555167, "grad_norm": 0.25503554940223694, "learning_rate": 4.1586354697907043e-05, "loss": 0.1345, "step": 19184 }, { "epoch": 0.3421859950772304, "grad_norm": 0.21500547230243683, "learning_rate": 4.158519006046736e-05, "loss": 0.2017, "step": 19185 }, { "epoch": 0.3422038311989441, "grad_norm": 0.26423928141593933, "learning_rate": 4.1584025358737754e-05, "loss": 0.1562, "step": 19186 }, { "epoch": 0.3422216673206578, "grad_norm": 0.25571408867836, "learning_rate": 4.158286059272273e-05, "loss": 0.1515, "step": 19187 }, { "epoch": 0.3422395034423715, "grad_norm": 0.2532646358013153, "learning_rate": 4.158169576242682e-05, "loss": 0.1189, "step": 19188 }, { "epoch": 0.34225733956408516, "grad_norm": 0.2708805799484253, "learning_rate": 4.158053086785453e-05, "loss": 0.211, "step": 19189 }, { "epoch": 0.3422751756857989, "grad_norm": 0.4668770134449005, "learning_rate": 4.157936590901036e-05, "loss": 0.1475, "step": 19190 }, { "epoch": 0.3422930118075126, "grad_norm": 0.23428738117218018, "learning_rate": 4.157820088589886e-05, "loss": 0.1508, "step": 19191 }, { "epoch": 0.3423108479292263, "grad_norm": 0.26442089676856995, "learning_rate": 4.157703579852452e-05, "loss": 0.1223, "step": 19192 }, { "epoch": 0.34232868405093997, "grad_norm": 0.35886138677597046, "learning_rate": 4.157587064689187e-05, "loss": 0.1032, "step": 19193 }, { "epoch": 0.34234652017265366, "grad_norm": 0.26686227321624756, "learning_rate": 4.157470543100541e-05, "loss": 0.1678, "step": 19194 }, { "epoch": 0.34236435629436734, "grad_norm": 0.21827208995819092, "learning_rate": 4.1573540150869674e-05, "loss": 0.1502, "step": 19195 }, { "epoch": 0.34238219241608103, "grad_norm": 0.22195547819137573, "learning_rate": 4.1572374806489164e-05, "loss": 0.1194, "step": 19196 }, { "epoch": 0.3424000285377947, "grad_norm": 0.2899116575717926, "learning_rate": 4.1571209397868415e-05, "loss": 0.1814, "step": 19197 }, { "epoch": 0.34241786465950846, "grad_norm": 0.31989017128944397, "learning_rate": 4.157004392501193e-05, "loss": 0.1658, "step": 19198 }, { "epoch": 0.34243570078122215, "grad_norm": 0.284021258354187, "learning_rate": 4.156887838792423e-05, "loss": 0.2061, "step": 19199 }, { "epoch": 0.34245353690293584, "grad_norm": 0.3226366639137268, "learning_rate": 4.156771278660984e-05, "loss": 0.1928, "step": 19200 }, { "epoch": 0.34247137302464953, "grad_norm": 0.3172580897808075, "learning_rate": 4.1566547121073274e-05, "loss": 0.1593, "step": 19201 }, { "epoch": 0.3424892091463632, "grad_norm": 0.2661484181880951, "learning_rate": 4.1565381391319045e-05, "loss": 0.1857, "step": 19202 }, { "epoch": 0.3425070452680769, "grad_norm": 0.42309898138046265, "learning_rate": 4.1564215597351675e-05, "loss": 0.1459, "step": 19203 }, { "epoch": 0.3425248813897906, "grad_norm": 0.24071981012821198, "learning_rate": 4.1563049739175684e-05, "loss": 0.1326, "step": 19204 }, { "epoch": 0.3425427175115043, "grad_norm": 0.399253249168396, "learning_rate": 4.156188381679559e-05, "loss": 0.1559, "step": 19205 }, { "epoch": 0.34256055363321797, "grad_norm": 0.28449612855911255, "learning_rate": 4.1560717830215924e-05, "loss": 0.1387, "step": 19206 }, { "epoch": 0.3425783897549317, "grad_norm": 0.2965076267719269, "learning_rate": 4.155955177944119e-05, "loss": 0.147, "step": 19207 }, { "epoch": 0.3425962258766454, "grad_norm": 0.23407036066055298, "learning_rate": 4.155838566447591e-05, "loss": 0.153, "step": 19208 }, { "epoch": 0.3426140619983591, "grad_norm": 0.2760251760482788, "learning_rate": 4.155721948532462e-05, "loss": 0.2395, "step": 19209 }, { "epoch": 0.3426318981200728, "grad_norm": 0.21404099464416504, "learning_rate": 4.155605324199181e-05, "loss": 0.1712, "step": 19210 }, { "epoch": 0.34264973424178646, "grad_norm": 0.292790949344635, "learning_rate": 4.155488693448203e-05, "loss": 0.1192, "step": 19211 }, { "epoch": 0.34266757036350015, "grad_norm": 0.34389957785606384, "learning_rate": 4.155372056279979e-05, "loss": 0.1546, "step": 19212 }, { "epoch": 0.34268540648521384, "grad_norm": 0.3921763598918915, "learning_rate": 4.155255412694962e-05, "loss": 0.1958, "step": 19213 }, { "epoch": 0.3427032426069275, "grad_norm": 0.44704189896583557, "learning_rate": 4.155138762693602e-05, "loss": 0.1292, "step": 19214 }, { "epoch": 0.34272107872864127, "grad_norm": 0.3125969469547272, "learning_rate": 4.155022106276353e-05, "loss": 0.1621, "step": 19215 }, { "epoch": 0.34273891485035496, "grad_norm": 0.23105350136756897, "learning_rate": 4.1549054434436665e-05, "loss": 0.1834, "step": 19216 }, { "epoch": 0.34275675097206865, "grad_norm": 0.24655385315418243, "learning_rate": 4.1547887741959954e-05, "loss": 0.1686, "step": 19217 }, { "epoch": 0.34277458709378233, "grad_norm": 0.3027952015399933, "learning_rate": 4.1546720985337904e-05, "loss": 0.1472, "step": 19218 }, { "epoch": 0.342792423215496, "grad_norm": 0.3027576208114624, "learning_rate": 4.154555416457506e-05, "loss": 0.1678, "step": 19219 }, { "epoch": 0.3428102593372097, "grad_norm": 0.28220006823539734, "learning_rate": 4.154438727967592e-05, "loss": 0.1136, "step": 19220 }, { "epoch": 0.3428280954589234, "grad_norm": 0.3291475176811218, "learning_rate": 4.154322033064504e-05, "loss": 0.178, "step": 19221 }, { "epoch": 0.3428459315806371, "grad_norm": 0.2588566541671753, "learning_rate": 4.154205331748691e-05, "loss": 0.1692, "step": 19222 }, { "epoch": 0.3428637677023508, "grad_norm": 0.24905306100845337, "learning_rate": 4.1540886240206064e-05, "loss": 0.2073, "step": 19223 }, { "epoch": 0.3428816038240645, "grad_norm": 0.30145522952079773, "learning_rate": 4.153971909880705e-05, "loss": 0.2173, "step": 19224 }, { "epoch": 0.3428994399457782, "grad_norm": 0.24704104661941528, "learning_rate": 4.153855189329435e-05, "loss": 0.1644, "step": 19225 }, { "epoch": 0.3429172760674919, "grad_norm": 0.2351163923740387, "learning_rate": 4.1537384623672523e-05, "loss": 0.1173, "step": 19226 }, { "epoch": 0.3429351121892056, "grad_norm": 0.2364279180765152, "learning_rate": 4.1536217289946077e-05, "loss": 0.1555, "step": 19227 }, { "epoch": 0.34295294831091927, "grad_norm": 0.3030035197734833, "learning_rate": 4.153504989211955e-05, "loss": 0.2037, "step": 19228 }, { "epoch": 0.34297078443263296, "grad_norm": 0.22794194519519806, "learning_rate": 4.1533882430197446e-05, "loss": 0.2207, "step": 19229 }, { "epoch": 0.34298862055434665, "grad_norm": 0.31897976994514465, "learning_rate": 4.153271490418431e-05, "loss": 0.2311, "step": 19230 }, { "epoch": 0.34300645667606033, "grad_norm": 0.3582265079021454, "learning_rate": 4.1531547314084664e-05, "loss": 0.1762, "step": 19231 }, { "epoch": 0.3430242927977741, "grad_norm": 0.2571417987346649, "learning_rate": 4.153037965990302e-05, "loss": 0.1894, "step": 19232 }, { "epoch": 0.34304212891948777, "grad_norm": 0.2909238040447235, "learning_rate": 4.152921194164392e-05, "loss": 0.1604, "step": 19233 }, { "epoch": 0.34305996504120145, "grad_norm": 0.28462904691696167, "learning_rate": 4.152804415931189e-05, "loss": 0.1806, "step": 19234 }, { "epoch": 0.34307780116291514, "grad_norm": 0.2746331989765167, "learning_rate": 4.152687631291145e-05, "loss": 0.2101, "step": 19235 }, { "epoch": 0.34309563728462883, "grad_norm": 0.3891318440437317, "learning_rate": 4.152570840244713e-05, "loss": 0.2383, "step": 19236 }, { "epoch": 0.3431134734063425, "grad_norm": 0.32095879316329956, "learning_rate": 4.152454042792345e-05, "loss": 0.1465, "step": 19237 }, { "epoch": 0.3431313095280562, "grad_norm": 0.2463955283164978, "learning_rate": 4.152337238934495e-05, "loss": 0.162, "step": 19238 }, { "epoch": 0.3431491456497699, "grad_norm": 0.3604165315628052, "learning_rate": 4.152220428671615e-05, "loss": 0.2, "step": 19239 }, { "epoch": 0.3431669817714836, "grad_norm": 0.29314178228378296, "learning_rate": 4.152103612004158e-05, "loss": 0.1881, "step": 19240 }, { "epoch": 0.3431848178931973, "grad_norm": 0.30398428440093994, "learning_rate": 4.151986788932577e-05, "loss": 0.1742, "step": 19241 }, { "epoch": 0.343202654014911, "grad_norm": 0.28699666261672974, "learning_rate": 4.151869959457324e-05, "loss": 0.17, "step": 19242 }, { "epoch": 0.3432204901366247, "grad_norm": 0.21540312469005585, "learning_rate": 4.151753123578853e-05, "loss": 0.1988, "step": 19243 }, { "epoch": 0.3432383262583384, "grad_norm": 0.24821065366268158, "learning_rate": 4.151636281297616e-05, "loss": 0.1586, "step": 19244 }, { "epoch": 0.3432561623800521, "grad_norm": 0.32822316884994507, "learning_rate": 4.1515194326140674e-05, "loss": 0.2074, "step": 19245 }, { "epoch": 0.34327399850176576, "grad_norm": 0.2227536141872406, "learning_rate": 4.151402577528658e-05, "loss": 0.1384, "step": 19246 }, { "epoch": 0.34329183462347945, "grad_norm": 0.2184383124113083, "learning_rate": 4.151285716041842e-05, "loss": 0.1654, "step": 19247 }, { "epoch": 0.34330967074519314, "grad_norm": 0.3004356324672699, "learning_rate": 4.151168848154072e-05, "loss": 0.1441, "step": 19248 }, { "epoch": 0.3433275068669069, "grad_norm": 0.2525661587715149, "learning_rate": 4.151051973865802e-05, "loss": 0.1954, "step": 19249 }, { "epoch": 0.34334534298862057, "grad_norm": 0.2357158660888672, "learning_rate": 4.1509350931774835e-05, "loss": 0.152, "step": 19250 }, { "epoch": 0.34336317911033426, "grad_norm": 0.2263195663690567, "learning_rate": 4.1508182060895713e-05, "loss": 0.1229, "step": 19251 }, { "epoch": 0.34338101523204795, "grad_norm": 0.2520339787006378, "learning_rate": 4.150701312602518e-05, "loss": 0.1859, "step": 19252 }, { "epoch": 0.34339885135376164, "grad_norm": 0.2920095920562744, "learning_rate": 4.1505844127167745e-05, "loss": 0.1861, "step": 19253 }, { "epoch": 0.3434166874754753, "grad_norm": 0.28727173805236816, "learning_rate": 4.1504675064327965e-05, "loss": 0.172, "step": 19254 }, { "epoch": 0.343434523597189, "grad_norm": 0.28142687678337097, "learning_rate": 4.150350593751036e-05, "loss": 0.157, "step": 19255 }, { "epoch": 0.3434523597189027, "grad_norm": 0.23413558304309845, "learning_rate": 4.1502336746719475e-05, "loss": 0.1602, "step": 19256 }, { "epoch": 0.34347019584061644, "grad_norm": 0.48061373829841614, "learning_rate": 4.1501167491959824e-05, "loss": 0.1552, "step": 19257 }, { "epoch": 0.34348803196233013, "grad_norm": 0.22398672997951508, "learning_rate": 4.149999817323595e-05, "loss": 0.1409, "step": 19258 }, { "epoch": 0.3435058680840438, "grad_norm": 0.230852872133255, "learning_rate": 4.149882879055239e-05, "loss": 0.1632, "step": 19259 }, { "epoch": 0.3435237042057575, "grad_norm": 0.2472366839647293, "learning_rate": 4.149765934391366e-05, "loss": 0.1625, "step": 19260 }, { "epoch": 0.3435415403274712, "grad_norm": 0.23507563769817352, "learning_rate": 4.149648983332432e-05, "loss": 0.1448, "step": 19261 }, { "epoch": 0.3435593764491849, "grad_norm": 0.3527578115463257, "learning_rate": 4.149532025878888e-05, "loss": 0.1565, "step": 19262 }, { "epoch": 0.34357721257089857, "grad_norm": 0.3142579197883606, "learning_rate": 4.149415062031188e-05, "loss": 0.1599, "step": 19263 }, { "epoch": 0.34359504869261226, "grad_norm": 0.2729453146457672, "learning_rate": 4.149298091789785e-05, "loss": 0.1629, "step": 19264 }, { "epoch": 0.34361288481432595, "grad_norm": 0.29182925820350647, "learning_rate": 4.149181115155134e-05, "loss": 0.1501, "step": 19265 }, { "epoch": 0.3436307209360397, "grad_norm": 0.2653812766075134, "learning_rate": 4.149064132127687e-05, "loss": 0.1825, "step": 19266 }, { "epoch": 0.3436485570577534, "grad_norm": 0.22801761329174042, "learning_rate": 4.1489471427078976e-05, "loss": 0.121, "step": 19267 }, { "epoch": 0.34366639317946707, "grad_norm": 0.340135782957077, "learning_rate": 4.14883014689622e-05, "loss": 0.1662, "step": 19268 }, { "epoch": 0.34368422930118075, "grad_norm": 0.20019252598285675, "learning_rate": 4.148713144693107e-05, "loss": 0.1477, "step": 19269 }, { "epoch": 0.34370206542289444, "grad_norm": 0.40863582491874695, "learning_rate": 4.148596136099012e-05, "loss": 0.1712, "step": 19270 }, { "epoch": 0.34371990154460813, "grad_norm": 0.24625706672668457, "learning_rate": 4.1484791211143896e-05, "loss": 0.1362, "step": 19271 }, { "epoch": 0.3437377376663218, "grad_norm": 0.3434460163116455, "learning_rate": 4.148362099739693e-05, "loss": 0.1471, "step": 19272 }, { "epoch": 0.3437555737880355, "grad_norm": 0.19441038370132446, "learning_rate": 4.148245071975375e-05, "loss": 0.145, "step": 19273 }, { "epoch": 0.34377340990974925, "grad_norm": 0.2680801749229431, "learning_rate": 4.1481280378218904e-05, "loss": 0.1824, "step": 19274 }, { "epoch": 0.34379124603146294, "grad_norm": 0.32224753499031067, "learning_rate": 4.148010997279691e-05, "loss": 0.2753, "step": 19275 }, { "epoch": 0.3438090821531766, "grad_norm": 0.21381701529026031, "learning_rate": 4.147893950349233e-05, "loss": 0.1671, "step": 19276 }, { "epoch": 0.3438269182748903, "grad_norm": 0.21210013329982758, "learning_rate": 4.1477768970309686e-05, "loss": 0.1478, "step": 19277 }, { "epoch": 0.343844754396604, "grad_norm": 0.23080043494701385, "learning_rate": 4.1476598373253515e-05, "loss": 0.1416, "step": 19278 }, { "epoch": 0.3438625905183177, "grad_norm": 0.2671366333961487, "learning_rate": 4.1475427712328356e-05, "loss": 0.1137, "step": 19279 }, { "epoch": 0.3438804266400314, "grad_norm": 0.3107810318470001, "learning_rate": 4.1474256987538756e-05, "loss": 0.1924, "step": 19280 }, { "epoch": 0.34389826276174507, "grad_norm": 0.28378915786743164, "learning_rate": 4.147308619888924e-05, "loss": 0.1628, "step": 19281 }, { "epoch": 0.34391609888345875, "grad_norm": 0.2703215479850769, "learning_rate": 4.147191534638436e-05, "loss": 0.2018, "step": 19282 }, { "epoch": 0.3439339350051725, "grad_norm": 0.16677279770374298, "learning_rate": 4.147074443002864e-05, "loss": 0.138, "step": 19283 }, { "epoch": 0.3439517711268862, "grad_norm": 0.26388025283813477, "learning_rate": 4.1469573449826624e-05, "loss": 0.185, "step": 19284 }, { "epoch": 0.3439696072485999, "grad_norm": 0.4443589448928833, "learning_rate": 4.146840240578286e-05, "loss": 0.206, "step": 19285 }, { "epoch": 0.34398744337031356, "grad_norm": 0.22026492655277252, "learning_rate": 4.1467231297901874e-05, "loss": 0.2038, "step": 19286 }, { "epoch": 0.34400527949202725, "grad_norm": 0.20402655005455017, "learning_rate": 4.1466060126188214e-05, "loss": 0.1777, "step": 19287 }, { "epoch": 0.34402311561374094, "grad_norm": 0.2890566885471344, "learning_rate": 4.146488889064642e-05, "loss": 0.1513, "step": 19288 }, { "epoch": 0.3440409517354546, "grad_norm": 0.3009870648384094, "learning_rate": 4.146371759128103e-05, "loss": 0.1686, "step": 19289 }, { "epoch": 0.3440587878571683, "grad_norm": 0.324984610080719, "learning_rate": 4.1462546228096585e-05, "loss": 0.1814, "step": 19290 }, { "epoch": 0.34407662397888206, "grad_norm": 0.26800447702407837, "learning_rate": 4.146137480109762e-05, "loss": 0.1972, "step": 19291 }, { "epoch": 0.34409446010059574, "grad_norm": 0.3157986104488373, "learning_rate": 4.146020331028868e-05, "loss": 0.1999, "step": 19292 }, { "epoch": 0.34411229622230943, "grad_norm": 0.25662344694137573, "learning_rate": 4.1459031755674314e-05, "loss": 0.1446, "step": 19293 }, { "epoch": 0.3441301323440231, "grad_norm": 0.2624521851539612, "learning_rate": 4.145786013725905e-05, "loss": 0.1176, "step": 19294 }, { "epoch": 0.3441479684657368, "grad_norm": 0.21069125831127167, "learning_rate": 4.145668845504744e-05, "loss": 0.1703, "step": 19295 }, { "epoch": 0.3441658045874505, "grad_norm": 0.3734355866909027, "learning_rate": 4.1455516709044016e-05, "loss": 0.1622, "step": 19296 }, { "epoch": 0.3441836407091642, "grad_norm": 0.27586856484413147, "learning_rate": 4.145434489925333e-05, "loss": 0.1849, "step": 19297 }, { "epoch": 0.3442014768308779, "grad_norm": 0.2133214771747589, "learning_rate": 4.1453173025679916e-05, "loss": 0.1763, "step": 19298 }, { "epoch": 0.3442193129525916, "grad_norm": 0.2775794565677643, "learning_rate": 4.145200108832833e-05, "loss": 0.1528, "step": 19299 }, { "epoch": 0.3442371490743053, "grad_norm": 0.26951631903648376, "learning_rate": 4.145082908720309e-05, "loss": 0.1893, "step": 19300 }, { "epoch": 0.344254985196019, "grad_norm": 0.24548238515853882, "learning_rate": 4.144965702230877e-05, "loss": 0.1619, "step": 19301 }, { "epoch": 0.3442728213177327, "grad_norm": 0.2351071536540985, "learning_rate": 4.144848489364989e-05, "loss": 0.1674, "step": 19302 }, { "epoch": 0.34429065743944637, "grad_norm": 0.2926071882247925, "learning_rate": 4.1447312701231e-05, "loss": 0.1681, "step": 19303 }, { "epoch": 0.34430849356116006, "grad_norm": 0.24202954769134521, "learning_rate": 4.1446140445056656e-05, "loss": 0.1327, "step": 19304 }, { "epoch": 0.34432632968287374, "grad_norm": 0.32067757844924927, "learning_rate": 4.144496812513138e-05, "loss": 0.131, "step": 19305 }, { "epoch": 0.34434416580458743, "grad_norm": 0.2718522548675537, "learning_rate": 4.1443795741459734e-05, "loss": 0.1747, "step": 19306 }, { "epoch": 0.3443620019263011, "grad_norm": 0.27440202236175537, "learning_rate": 4.1442623294046246e-05, "loss": 0.1702, "step": 19307 }, { "epoch": 0.34437983804801486, "grad_norm": 0.2720183730125427, "learning_rate": 4.1441450782895487e-05, "loss": 0.1594, "step": 19308 }, { "epoch": 0.34439767416972855, "grad_norm": 0.2942847013473511, "learning_rate": 4.1440278208011976e-05, "loss": 0.1446, "step": 19309 }, { "epoch": 0.34441551029144224, "grad_norm": 0.2485940158367157, "learning_rate": 4.143910556940027e-05, "loss": 0.1635, "step": 19310 }, { "epoch": 0.3444333464131559, "grad_norm": 0.20638635754585266, "learning_rate": 4.143793286706491e-05, "loss": 0.0965, "step": 19311 }, { "epoch": 0.3444511825348696, "grad_norm": 0.29417091608047485, "learning_rate": 4.143676010101045e-05, "loss": 0.2289, "step": 19312 }, { "epoch": 0.3444690186565833, "grad_norm": 0.25009727478027344, "learning_rate": 4.143558727124142e-05, "loss": 0.1516, "step": 19313 }, { "epoch": 0.344486854778297, "grad_norm": 0.2975854277610779, "learning_rate": 4.1434414377762386e-05, "loss": 0.1704, "step": 19314 }, { "epoch": 0.3445046909000107, "grad_norm": 0.3443279266357422, "learning_rate": 4.1433241420577886e-05, "loss": 0.2154, "step": 19315 }, { "epoch": 0.3445225270217244, "grad_norm": 0.33312395215034485, "learning_rate": 4.143206839969247e-05, "loss": 0.2016, "step": 19316 }, { "epoch": 0.3445403631434381, "grad_norm": 0.2558199167251587, "learning_rate": 4.143089531511067e-05, "loss": 0.1445, "step": 19317 }, { "epoch": 0.3445581992651518, "grad_norm": 0.2809750735759735, "learning_rate": 4.142972216683705e-05, "loss": 0.1312, "step": 19318 }, { "epoch": 0.3445760353868655, "grad_norm": 0.5312844514846802, "learning_rate": 4.1428548954876155e-05, "loss": 0.1531, "step": 19319 }, { "epoch": 0.3445938715085792, "grad_norm": 0.2636854350566864, "learning_rate": 4.142737567923253e-05, "loss": 0.1529, "step": 19320 }, { "epoch": 0.34461170763029286, "grad_norm": 0.2276621013879776, "learning_rate": 4.142620233991072e-05, "loss": 0.1445, "step": 19321 }, { "epoch": 0.34462954375200655, "grad_norm": 0.3009655177593231, "learning_rate": 4.142502893691528e-05, "loss": 0.1354, "step": 19322 }, { "epoch": 0.34464737987372024, "grad_norm": 0.22071021795272827, "learning_rate": 4.1423855470250756e-05, "loss": 0.122, "step": 19323 }, { "epoch": 0.3446652159954339, "grad_norm": 0.24714696407318115, "learning_rate": 4.142268193992169e-05, "loss": 0.1485, "step": 19324 }, { "epoch": 0.34468305211714767, "grad_norm": 0.27075421810150146, "learning_rate": 4.142150834593264e-05, "loss": 0.1582, "step": 19325 }, { "epoch": 0.34470088823886136, "grad_norm": 0.28965651988983154, "learning_rate": 4.142033468828815e-05, "loss": 0.1921, "step": 19326 }, { "epoch": 0.34471872436057505, "grad_norm": 0.36095792055130005, "learning_rate": 4.141916096699277e-05, "loss": 0.1264, "step": 19327 }, { "epoch": 0.34473656048228873, "grad_norm": 0.29419201612472534, "learning_rate": 4.1417987182051057e-05, "loss": 0.1657, "step": 19328 }, { "epoch": 0.3447543966040024, "grad_norm": 0.20871809124946594, "learning_rate": 4.141681333346755e-05, "loss": 0.1623, "step": 19329 }, { "epoch": 0.3447722327257161, "grad_norm": 0.27883103489875793, "learning_rate": 4.141563942124681e-05, "loss": 0.1552, "step": 19330 }, { "epoch": 0.3447900688474298, "grad_norm": 0.561913251876831, "learning_rate": 4.141446544539337e-05, "loss": 0.1955, "step": 19331 }, { "epoch": 0.3448079049691435, "grad_norm": 0.29841604828834534, "learning_rate": 4.14132914059118e-05, "loss": 0.1586, "step": 19332 }, { "epoch": 0.34482574109085723, "grad_norm": 0.22488825023174286, "learning_rate": 4.1412117302806644e-05, "loss": 0.105, "step": 19333 }, { "epoch": 0.3448435772125709, "grad_norm": 0.3224492371082306, "learning_rate": 4.141094313608246e-05, "loss": 0.1544, "step": 19334 }, { "epoch": 0.3448614133342846, "grad_norm": 0.21744197607040405, "learning_rate": 4.140976890574378e-05, "loss": 0.1161, "step": 19335 }, { "epoch": 0.3448792494559983, "grad_norm": 0.2855767011642456, "learning_rate": 4.140859461179517e-05, "loss": 0.1487, "step": 19336 }, { "epoch": 0.344897085577712, "grad_norm": 0.2090531885623932, "learning_rate": 4.140742025424118e-05, "loss": 0.1845, "step": 19337 }, { "epoch": 0.34491492169942567, "grad_norm": 0.2449500858783722, "learning_rate": 4.140624583308637e-05, "loss": 0.1654, "step": 19338 }, { "epoch": 0.34493275782113936, "grad_norm": 0.1828378140926361, "learning_rate": 4.140507134833528e-05, "loss": 0.1117, "step": 19339 }, { "epoch": 0.34495059394285305, "grad_norm": 0.19028717279434204, "learning_rate": 4.1403896799992465e-05, "loss": 0.1424, "step": 19340 }, { "epoch": 0.34496843006456673, "grad_norm": 0.29808419942855835, "learning_rate": 4.140272218806248e-05, "loss": 0.1853, "step": 19341 }, { "epoch": 0.3449862661862805, "grad_norm": 0.3018692135810852, "learning_rate": 4.1401547512549884e-05, "loss": 0.1226, "step": 19342 }, { "epoch": 0.34500410230799416, "grad_norm": 0.32953277230262756, "learning_rate": 4.140037277345922e-05, "loss": 0.2063, "step": 19343 }, { "epoch": 0.34502193842970785, "grad_norm": 0.2793973684310913, "learning_rate": 4.1399197970795045e-05, "loss": 0.219, "step": 19344 }, { "epoch": 0.34503977455142154, "grad_norm": 0.2436210960149765, "learning_rate": 4.139802310456192e-05, "loss": 0.1495, "step": 19345 }, { "epoch": 0.34505761067313523, "grad_norm": 0.22010815143585205, "learning_rate": 4.139684817476439e-05, "loss": 0.1974, "step": 19346 }, { "epoch": 0.3450754467948489, "grad_norm": 0.26123496890068054, "learning_rate": 4.1395673181407014e-05, "loss": 0.1422, "step": 19347 }, { "epoch": 0.3450932829165626, "grad_norm": 0.2590912878513336, "learning_rate": 4.139449812449435e-05, "loss": 0.2137, "step": 19348 }, { "epoch": 0.3451111190382763, "grad_norm": 0.2490239143371582, "learning_rate": 4.139332300403094e-05, "loss": 0.129, "step": 19349 }, { "epoch": 0.34512895515999004, "grad_norm": 0.2790435254573822, "learning_rate": 4.139214782002135e-05, "loss": 0.1489, "step": 19350 }, { "epoch": 0.3451467912817037, "grad_norm": 0.25684571266174316, "learning_rate": 4.1390972572470145e-05, "loss": 0.1373, "step": 19351 }, { "epoch": 0.3451646274034174, "grad_norm": 0.20819757878780365, "learning_rate": 4.138979726138186e-05, "loss": 0.1703, "step": 19352 }, { "epoch": 0.3451824635251311, "grad_norm": 0.3392389714717865, "learning_rate": 4.138862188676106e-05, "loss": 0.1651, "step": 19353 }, { "epoch": 0.3452002996468448, "grad_norm": 0.4044768214225769, "learning_rate": 4.13874464486123e-05, "loss": 0.1924, "step": 19354 }, { "epoch": 0.3452181357685585, "grad_norm": 0.28278350830078125, "learning_rate": 4.138627094694014e-05, "loss": 0.1944, "step": 19355 }, { "epoch": 0.34523597189027216, "grad_norm": 0.2475716918706894, "learning_rate": 4.1385095381749134e-05, "loss": 0.183, "step": 19356 }, { "epoch": 0.34525380801198585, "grad_norm": 0.2546561062335968, "learning_rate": 4.1383919753043834e-05, "loss": 0.1761, "step": 19357 }, { "epoch": 0.3452716441336996, "grad_norm": 0.33256638050079346, "learning_rate": 4.138274406082881e-05, "loss": 0.2232, "step": 19358 }, { "epoch": 0.3452894802554133, "grad_norm": 0.24753150343894958, "learning_rate": 4.138156830510861e-05, "loss": 0.1647, "step": 19359 }, { "epoch": 0.34530731637712697, "grad_norm": 0.2755405604839325, "learning_rate": 4.1380392485887785e-05, "loss": 0.1229, "step": 19360 }, { "epoch": 0.34532515249884066, "grad_norm": 0.2919138967990875, "learning_rate": 4.137921660317091e-05, "loss": 0.1804, "step": 19361 }, { "epoch": 0.34534298862055435, "grad_norm": 0.25106585025787354, "learning_rate": 4.1378040656962534e-05, "loss": 0.1517, "step": 19362 }, { "epoch": 0.34536082474226804, "grad_norm": 0.22420300543308258, "learning_rate": 4.137686464726721e-05, "loss": 0.1721, "step": 19363 }, { "epoch": 0.3453786608639817, "grad_norm": 0.3089165985584259, "learning_rate": 4.1375688574089514e-05, "loss": 0.1381, "step": 19364 }, { "epoch": 0.3453964969856954, "grad_norm": 0.27446988224983215, "learning_rate": 4.137451243743398e-05, "loss": 0.1994, "step": 19365 }, { "epoch": 0.3454143331074091, "grad_norm": 0.231646329164505, "learning_rate": 4.137333623730518e-05, "loss": 0.1655, "step": 19366 }, { "epoch": 0.34543216922912284, "grad_norm": 0.2537112534046173, "learning_rate": 4.137215997370768e-05, "loss": 0.1078, "step": 19367 }, { "epoch": 0.34545000535083653, "grad_norm": 0.25047567486763, "learning_rate": 4.1370983646646034e-05, "loss": 0.1943, "step": 19368 }, { "epoch": 0.3454678414725502, "grad_norm": 0.29900601506233215, "learning_rate": 4.13698072561248e-05, "loss": 0.1746, "step": 19369 }, { "epoch": 0.3454856775942639, "grad_norm": 0.3534618020057678, "learning_rate": 4.1368630802148534e-05, "loss": 0.1446, "step": 19370 }, { "epoch": 0.3455035137159776, "grad_norm": 0.32002121210098267, "learning_rate": 4.1367454284721805e-05, "loss": 0.199, "step": 19371 }, { "epoch": 0.3455213498376913, "grad_norm": 0.3234451115131378, "learning_rate": 4.136627770384917e-05, "loss": 0.177, "step": 19372 }, { "epoch": 0.34553918595940497, "grad_norm": 0.22080664336681366, "learning_rate": 4.136510105953519e-05, "loss": 0.2138, "step": 19373 }, { "epoch": 0.34555702208111866, "grad_norm": 0.330020934343338, "learning_rate": 4.136392435178443e-05, "loss": 0.1813, "step": 19374 }, { "epoch": 0.3455748582028324, "grad_norm": 0.23628132045269012, "learning_rate": 4.136274758060144e-05, "loss": 0.113, "step": 19375 }, { "epoch": 0.3455926943245461, "grad_norm": 0.2844630479812622, "learning_rate": 4.13615707459908e-05, "loss": 0.1711, "step": 19376 }, { "epoch": 0.3456105304462598, "grad_norm": 0.2452024221420288, "learning_rate": 4.1360393847957045e-05, "loss": 0.1421, "step": 19377 }, { "epoch": 0.34562836656797347, "grad_norm": 0.25904181599617004, "learning_rate": 4.1359216886504756e-05, "loss": 0.2128, "step": 19378 }, { "epoch": 0.34564620268968715, "grad_norm": 0.2607460021972656, "learning_rate": 4.1358039861638505e-05, "loss": 0.1727, "step": 19379 }, { "epoch": 0.34566403881140084, "grad_norm": 0.22307580709457397, "learning_rate": 4.135686277336284e-05, "loss": 0.1376, "step": 19380 }, { "epoch": 0.34568187493311453, "grad_norm": 0.292441189289093, "learning_rate": 4.135568562168231e-05, "loss": 0.1604, "step": 19381 }, { "epoch": 0.3456997110548282, "grad_norm": 0.2761858105659485, "learning_rate": 4.1354508406601504e-05, "loss": 0.1722, "step": 19382 }, { "epoch": 0.3457175471765419, "grad_norm": 0.26929807662963867, "learning_rate": 4.135333112812497e-05, "loss": 0.1403, "step": 19383 }, { "epoch": 0.34573538329825565, "grad_norm": 0.2326447069644928, "learning_rate": 4.135215378625729e-05, "loss": 0.1542, "step": 19384 }, { "epoch": 0.34575321941996934, "grad_norm": 0.3189031183719635, "learning_rate": 4.1350976381002994e-05, "loss": 0.1687, "step": 19385 }, { "epoch": 0.345771055541683, "grad_norm": 0.2811721861362457, "learning_rate": 4.1349798912366675e-05, "loss": 0.1506, "step": 19386 }, { "epoch": 0.3457888916633967, "grad_norm": 0.25603535771369934, "learning_rate": 4.134862138035289e-05, "loss": 0.2115, "step": 19387 }, { "epoch": 0.3458067277851104, "grad_norm": 0.3122118413448334, "learning_rate": 4.13474437849662e-05, "loss": 0.1875, "step": 19388 }, { "epoch": 0.3458245639068241, "grad_norm": 0.218451589345932, "learning_rate": 4.1346266126211177e-05, "loss": 0.1754, "step": 19389 }, { "epoch": 0.3458424000285378, "grad_norm": 0.2716617286205292, "learning_rate": 4.1345088404092374e-05, "loss": 0.1164, "step": 19390 }, { "epoch": 0.34586023615025147, "grad_norm": 0.28143441677093506, "learning_rate": 4.134391061861436e-05, "loss": 0.1815, "step": 19391 }, { "epoch": 0.3458780722719652, "grad_norm": 0.39164450764656067, "learning_rate": 4.1342732769781714e-05, "loss": 0.1882, "step": 19392 }, { "epoch": 0.3458959083936789, "grad_norm": 0.34961459040641785, "learning_rate": 4.1341554857598984e-05, "loss": 0.1431, "step": 19393 }, { "epoch": 0.3459137445153926, "grad_norm": 0.3550502359867096, "learning_rate": 4.1340376882070745e-05, "loss": 0.1518, "step": 19394 }, { "epoch": 0.3459315806371063, "grad_norm": 0.23281922936439514, "learning_rate": 4.1339198843201567e-05, "loss": 0.1566, "step": 19395 }, { "epoch": 0.34594941675881996, "grad_norm": 0.3236084580421448, "learning_rate": 4.1338020740996e-05, "loss": 0.1545, "step": 19396 }, { "epoch": 0.34596725288053365, "grad_norm": 0.3396601676940918, "learning_rate": 4.133684257545863e-05, "loss": 0.1691, "step": 19397 }, { "epoch": 0.34598508900224734, "grad_norm": 0.2830646336078644, "learning_rate": 4.133566434659401e-05, "loss": 0.1795, "step": 19398 }, { "epoch": 0.346002925123961, "grad_norm": 0.3032616376876831, "learning_rate": 4.133448605440672e-05, "loss": 0.1938, "step": 19399 }, { "epoch": 0.3460207612456747, "grad_norm": 0.25963565707206726, "learning_rate": 4.133330769890131e-05, "loss": 0.1843, "step": 19400 }, { "epoch": 0.34603859736738846, "grad_norm": 0.22302964329719543, "learning_rate": 4.1332129280082364e-05, "loss": 0.1482, "step": 19401 }, { "epoch": 0.34605643348910214, "grad_norm": 0.1829168200492859, "learning_rate": 4.133095079795444e-05, "loss": 0.1483, "step": 19402 }, { "epoch": 0.34607426961081583, "grad_norm": 0.34463924169540405, "learning_rate": 4.132977225252211e-05, "loss": 0.164, "step": 19403 }, { "epoch": 0.3460921057325295, "grad_norm": 0.2819700837135315, "learning_rate": 4.1328593643789957e-05, "loss": 0.0853, "step": 19404 }, { "epoch": 0.3461099418542432, "grad_norm": 0.2864101529121399, "learning_rate": 4.1327414971762525e-05, "loss": 0.1892, "step": 19405 }, { "epoch": 0.3461277779759569, "grad_norm": 0.24050670862197876, "learning_rate": 4.132623623644439e-05, "loss": 0.1432, "step": 19406 }, { "epoch": 0.3461456140976706, "grad_norm": 0.27650803327560425, "learning_rate": 4.1325057437840123e-05, "loss": 0.2029, "step": 19407 }, { "epoch": 0.34616345021938427, "grad_norm": 0.23626144230365753, "learning_rate": 4.132387857595431e-05, "loss": 0.1899, "step": 19408 }, { "epoch": 0.346181286341098, "grad_norm": 0.22747544944286346, "learning_rate": 4.132269965079149e-05, "loss": 0.1796, "step": 19409 }, { "epoch": 0.3461991224628117, "grad_norm": 0.30286508798599243, "learning_rate": 4.132152066235625e-05, "loss": 0.1795, "step": 19410 }, { "epoch": 0.3462169585845254, "grad_norm": 0.22571417689323425, "learning_rate": 4.132034161065316e-05, "loss": 0.1735, "step": 19411 }, { "epoch": 0.3462347947062391, "grad_norm": 0.18470734357833862, "learning_rate": 4.1319162495686796e-05, "loss": 0.1364, "step": 19412 }, { "epoch": 0.34625263082795277, "grad_norm": 0.26024067401885986, "learning_rate": 4.1317983317461714e-05, "loss": 0.1666, "step": 19413 }, { "epoch": 0.34627046694966646, "grad_norm": 0.23578520119190216, "learning_rate": 4.1316804075982496e-05, "loss": 0.1763, "step": 19414 }, { "epoch": 0.34628830307138014, "grad_norm": 0.37316691875457764, "learning_rate": 4.131562477125371e-05, "loss": 0.1796, "step": 19415 }, { "epoch": 0.34630613919309383, "grad_norm": 0.3864397406578064, "learning_rate": 4.131444540327993e-05, "loss": 0.1544, "step": 19416 }, { "epoch": 0.3463239753148076, "grad_norm": 0.34760424494743347, "learning_rate": 4.131326597206572e-05, "loss": 0.2304, "step": 19417 }, { "epoch": 0.34634181143652126, "grad_norm": 0.18980322778224945, "learning_rate": 4.131208647761565e-05, "loss": 0.1571, "step": 19418 }, { "epoch": 0.34635964755823495, "grad_norm": 0.24487678706645966, "learning_rate": 4.131090691993431e-05, "loss": 0.1522, "step": 19419 }, { "epoch": 0.34637748367994864, "grad_norm": 0.2938240170478821, "learning_rate": 4.1309727299026255e-05, "loss": 0.167, "step": 19420 }, { "epoch": 0.3463953198016623, "grad_norm": 0.375690221786499, "learning_rate": 4.1308547614896075e-05, "loss": 0.1868, "step": 19421 }, { "epoch": 0.346413155923376, "grad_norm": 0.25675103068351746, "learning_rate": 4.1307367867548316e-05, "loss": 0.2172, "step": 19422 }, { "epoch": 0.3464309920450897, "grad_norm": 0.26187554001808167, "learning_rate": 4.130618805698758e-05, "loss": 0.1654, "step": 19423 }, { "epoch": 0.3464488281668034, "grad_norm": 0.21872815489768982, "learning_rate": 4.130500818321842e-05, "loss": 0.1764, "step": 19424 }, { "epoch": 0.3464666642885171, "grad_norm": 0.26111993193626404, "learning_rate": 4.130382824624541e-05, "loss": 0.1372, "step": 19425 }, { "epoch": 0.3464845004102308, "grad_norm": 0.2385575920343399, "learning_rate": 4.1302648246073145e-05, "loss": 0.1715, "step": 19426 }, { "epoch": 0.3465023365319445, "grad_norm": 0.2510746717453003, "learning_rate": 4.1301468182706176e-05, "loss": 0.1295, "step": 19427 }, { "epoch": 0.3465201726536582, "grad_norm": 0.32107454538345337, "learning_rate": 4.1300288056149084e-05, "loss": 0.1804, "step": 19428 }, { "epoch": 0.3465380087753719, "grad_norm": 0.2929087281227112, "learning_rate": 4.129910786640645e-05, "loss": 0.1589, "step": 19429 }, { "epoch": 0.3465558448970856, "grad_norm": 0.28323599696159363, "learning_rate": 4.129792761348285e-05, "loss": 0.1565, "step": 19430 }, { "epoch": 0.34657368101879926, "grad_norm": 0.19271722435951233, "learning_rate": 4.129674729738284e-05, "loss": 0.1382, "step": 19431 }, { "epoch": 0.34659151714051295, "grad_norm": 0.22486954927444458, "learning_rate": 4.129556691811102e-05, "loss": 0.1819, "step": 19432 }, { "epoch": 0.34660935326222664, "grad_norm": 0.3742815852165222, "learning_rate": 4.1294386475671955e-05, "loss": 0.1853, "step": 19433 }, { "epoch": 0.3466271893839404, "grad_norm": 0.19420146942138672, "learning_rate": 4.1293205970070216e-05, "loss": 0.1717, "step": 19434 }, { "epoch": 0.34664502550565407, "grad_norm": 0.2316415160894394, "learning_rate": 4.129202540131037e-05, "loss": 0.1695, "step": 19435 }, { "epoch": 0.34666286162736776, "grad_norm": 0.20413222908973694, "learning_rate": 4.129084476939703e-05, "loss": 0.1538, "step": 19436 }, { "epoch": 0.34668069774908145, "grad_norm": 0.36453738808631897, "learning_rate": 4.128966407433473e-05, "loss": 0.2106, "step": 19437 }, { "epoch": 0.34669853387079513, "grad_norm": 0.33726999163627625, "learning_rate": 4.128848331612808e-05, "loss": 0.2279, "step": 19438 }, { "epoch": 0.3467163699925088, "grad_norm": 0.2563611567020416, "learning_rate": 4.128730249478163e-05, "loss": 0.114, "step": 19439 }, { "epoch": 0.3467342061142225, "grad_norm": 0.2862267792224884, "learning_rate": 4.1286121610299986e-05, "loss": 0.1915, "step": 19440 }, { "epoch": 0.3467520422359362, "grad_norm": 0.2832556962966919, "learning_rate": 4.1284940662687696e-05, "loss": 0.2229, "step": 19441 }, { "epoch": 0.3467698783576499, "grad_norm": 0.31398025155067444, "learning_rate": 4.128375965194936e-05, "loss": 0.1493, "step": 19442 }, { "epoch": 0.34678771447936363, "grad_norm": 0.25471949577331543, "learning_rate": 4.1282578578089534e-05, "loss": 0.2328, "step": 19443 }, { "epoch": 0.3468055506010773, "grad_norm": 0.2671929597854614, "learning_rate": 4.1281397441112823e-05, "loss": 0.1614, "step": 19444 }, { "epoch": 0.346823386722791, "grad_norm": 0.2736206352710724, "learning_rate": 4.128021624102378e-05, "loss": 0.1663, "step": 19445 }, { "epoch": 0.3468412228445047, "grad_norm": 0.4274255335330963, "learning_rate": 4.127903497782701e-05, "loss": 0.1802, "step": 19446 }, { "epoch": 0.3468590589662184, "grad_norm": 0.29369959235191345, "learning_rate": 4.1277853651527075e-05, "loss": 0.1822, "step": 19447 }, { "epoch": 0.34687689508793207, "grad_norm": 0.24593660235404968, "learning_rate": 4.127667226212855e-05, "loss": 0.1353, "step": 19448 }, { "epoch": 0.34689473120964576, "grad_norm": 0.3930591344833374, "learning_rate": 4.127549080963603e-05, "loss": 0.1963, "step": 19449 }, { "epoch": 0.34691256733135944, "grad_norm": 0.3083154857158661, "learning_rate": 4.127430929405408e-05, "loss": 0.2248, "step": 19450 }, { "epoch": 0.3469304034530732, "grad_norm": 0.25477007031440735, "learning_rate": 4.127312771538729e-05, "loss": 0.1788, "step": 19451 }, { "epoch": 0.3469482395747869, "grad_norm": 0.21664482355117798, "learning_rate": 4.127194607364023e-05, "loss": 0.1457, "step": 19452 }, { "epoch": 0.34696607569650056, "grad_norm": 0.2607729136943817, "learning_rate": 4.127076436881749e-05, "loss": 0.172, "step": 19453 }, { "epoch": 0.34698391181821425, "grad_norm": 0.2806100845336914, "learning_rate": 4.126958260092365e-05, "loss": 0.2115, "step": 19454 }, { "epoch": 0.34700174793992794, "grad_norm": 0.31121960282325745, "learning_rate": 4.126840076996329e-05, "loss": 0.1938, "step": 19455 }, { "epoch": 0.34701958406164163, "grad_norm": 0.3361174166202545, "learning_rate": 4.126721887594098e-05, "loss": 0.0998, "step": 19456 }, { "epoch": 0.3470374201833553, "grad_norm": 0.2513003945350647, "learning_rate": 4.126603691886132e-05, "loss": 0.136, "step": 19457 }, { "epoch": 0.347055256305069, "grad_norm": 0.26615363359451294, "learning_rate": 4.126485489872888e-05, "loss": 0.1571, "step": 19458 }, { "epoch": 0.34707309242678275, "grad_norm": 0.26913365721702576, "learning_rate": 4.126367281554825e-05, "loss": 0.1893, "step": 19459 }, { "epoch": 0.34709092854849644, "grad_norm": 0.3098093867301941, "learning_rate": 4.1262490669324005e-05, "loss": 0.199, "step": 19460 }, { "epoch": 0.3471087646702101, "grad_norm": 0.2577618658542633, "learning_rate": 4.126130846006072e-05, "loss": 0.1995, "step": 19461 }, { "epoch": 0.3471266007919238, "grad_norm": 0.24977301061153412, "learning_rate": 4.126012618776299e-05, "loss": 0.1648, "step": 19462 }, { "epoch": 0.3471444369136375, "grad_norm": 0.21640323102474213, "learning_rate": 4.12589438524354e-05, "loss": 0.1713, "step": 19463 }, { "epoch": 0.3471622730353512, "grad_norm": 0.281476765871048, "learning_rate": 4.1257761454082535e-05, "loss": 0.1961, "step": 19464 }, { "epoch": 0.3471801091570649, "grad_norm": 0.21089307963848114, "learning_rate": 4.1256578992708954e-05, "loss": 0.1637, "step": 19465 }, { "epoch": 0.34719794527877856, "grad_norm": 0.28609150648117065, "learning_rate": 4.1255396468319265e-05, "loss": 0.1598, "step": 19466 }, { "epoch": 0.34721578140049225, "grad_norm": 0.2323131412267685, "learning_rate": 4.1254213880918044e-05, "loss": 0.1672, "step": 19467 }, { "epoch": 0.347233617522206, "grad_norm": 0.2568100392818451, "learning_rate": 4.1253031230509884e-05, "loss": 0.1372, "step": 19468 }, { "epoch": 0.3472514536439197, "grad_norm": 0.2932114005088806, "learning_rate": 4.125184851709936e-05, "loss": 0.1747, "step": 19469 }, { "epoch": 0.34726928976563337, "grad_norm": 0.30081841349601746, "learning_rate": 4.125066574069105e-05, "loss": 0.2114, "step": 19470 }, { "epoch": 0.34728712588734706, "grad_norm": 0.3816368877887726, "learning_rate": 4.124948290128955e-05, "loss": 0.1471, "step": 19471 }, { "epoch": 0.34730496200906075, "grad_norm": 0.30220168828964233, "learning_rate": 4.124829999889944e-05, "loss": 0.1502, "step": 19472 }, { "epoch": 0.34732279813077443, "grad_norm": 0.21175815165042877, "learning_rate": 4.124711703352531e-05, "loss": 0.1739, "step": 19473 }, { "epoch": 0.3473406342524881, "grad_norm": 0.3254470229148865, "learning_rate": 4.124593400517174e-05, "loss": 0.1609, "step": 19474 }, { "epoch": 0.3473584703742018, "grad_norm": 0.25704342126846313, "learning_rate": 4.124475091384332e-05, "loss": 0.1735, "step": 19475 }, { "epoch": 0.34737630649591555, "grad_norm": 0.2239055186510086, "learning_rate": 4.124356775954464e-05, "loss": 0.1936, "step": 19476 }, { "epoch": 0.34739414261762924, "grad_norm": 0.29571130871772766, "learning_rate": 4.124238454228028e-05, "loss": 0.1138, "step": 19477 }, { "epoch": 0.34741197873934293, "grad_norm": 0.36035099625587463, "learning_rate": 4.124120126205482e-05, "loss": 0.1758, "step": 19478 }, { "epoch": 0.3474298148610566, "grad_norm": 0.25826746225357056, "learning_rate": 4.124001791887286e-05, "loss": 0.1607, "step": 19479 }, { "epoch": 0.3474476509827703, "grad_norm": 0.3327743709087372, "learning_rate": 4.1238834512738976e-05, "loss": 0.1577, "step": 19480 }, { "epoch": 0.347465487104484, "grad_norm": 0.1871565878391266, "learning_rate": 4.1237651043657765e-05, "loss": 0.1428, "step": 19481 }, { "epoch": 0.3474833232261977, "grad_norm": 0.2558317482471466, "learning_rate": 4.123646751163381e-05, "loss": 0.1567, "step": 19482 }, { "epoch": 0.34750115934791137, "grad_norm": 0.2345658540725708, "learning_rate": 4.1235283916671695e-05, "loss": 0.175, "step": 19483 }, { "epoch": 0.34751899546962506, "grad_norm": 0.29519787430763245, "learning_rate": 4.123410025877602e-05, "loss": 0.1409, "step": 19484 }, { "epoch": 0.3475368315913388, "grad_norm": 0.25815102458000183, "learning_rate": 4.1232916537951363e-05, "loss": 0.1219, "step": 19485 }, { "epoch": 0.3475546677130525, "grad_norm": 0.39939600229263306, "learning_rate": 4.123173275420231e-05, "loss": 0.1523, "step": 19486 }, { "epoch": 0.3475725038347662, "grad_norm": 0.2604312002658844, "learning_rate": 4.1230548907533464e-05, "loss": 0.1718, "step": 19487 }, { "epoch": 0.34759033995647987, "grad_norm": 0.301343709230423, "learning_rate": 4.1229364997949394e-05, "loss": 0.186, "step": 19488 }, { "epoch": 0.34760817607819355, "grad_norm": 0.29630666971206665, "learning_rate": 4.12281810254547e-05, "loss": 0.2056, "step": 19489 }, { "epoch": 0.34762601219990724, "grad_norm": 0.31824612617492676, "learning_rate": 4.122699699005398e-05, "loss": 0.1576, "step": 19490 }, { "epoch": 0.34764384832162093, "grad_norm": 0.3133047819137573, "learning_rate": 4.1225812891751815e-05, "loss": 0.1398, "step": 19491 }, { "epoch": 0.3476616844433346, "grad_norm": 0.31197676062583923, "learning_rate": 4.1224628730552786e-05, "loss": 0.1357, "step": 19492 }, { "epoch": 0.34767952056504836, "grad_norm": 0.35021182894706726, "learning_rate": 4.122344450646149e-05, "loss": 0.2013, "step": 19493 }, { "epoch": 0.34769735668676205, "grad_norm": 0.24721401929855347, "learning_rate": 4.122226021948253e-05, "loss": 0.1196, "step": 19494 }, { "epoch": 0.34771519280847574, "grad_norm": 0.38173720240592957, "learning_rate": 4.1221075869620486e-05, "loss": 0.1861, "step": 19495 }, { "epoch": 0.3477330289301894, "grad_norm": 0.2375587821006775, "learning_rate": 4.1219891456879946e-05, "loss": 0.1624, "step": 19496 }, { "epoch": 0.3477508650519031, "grad_norm": 0.4578511714935303, "learning_rate": 4.1218706981265506e-05, "loss": 0.2162, "step": 19497 }, { "epoch": 0.3477687011736168, "grad_norm": 0.2093442678451538, "learning_rate": 4.121752244278175e-05, "loss": 0.1595, "step": 19498 }, { "epoch": 0.3477865372953305, "grad_norm": 0.22799724340438843, "learning_rate": 4.121633784143328e-05, "loss": 0.1565, "step": 19499 }, { "epoch": 0.3478043734170442, "grad_norm": 0.18609677255153656, "learning_rate": 4.1215153177224686e-05, "loss": 0.1679, "step": 19500 }, { "epoch": 0.34782220953875786, "grad_norm": 0.3047395646572113, "learning_rate": 4.1213968450160554e-05, "loss": 0.1433, "step": 19501 }, { "epoch": 0.3478400456604716, "grad_norm": 0.2760085165500641, "learning_rate": 4.1212783660245485e-05, "loss": 0.1337, "step": 19502 }, { "epoch": 0.3478578817821853, "grad_norm": 0.2929195463657379, "learning_rate": 4.121159880748406e-05, "loss": 0.2277, "step": 19503 }, { "epoch": 0.347875717903899, "grad_norm": 0.41020041704177856, "learning_rate": 4.1210413891880885e-05, "loss": 0.1974, "step": 19504 }, { "epoch": 0.3478935540256127, "grad_norm": 0.25318020582199097, "learning_rate": 4.120922891344054e-05, "loss": 0.1945, "step": 19505 }, { "epoch": 0.34791139014732636, "grad_norm": 0.22687697410583496, "learning_rate": 4.1208043872167634e-05, "loss": 0.1589, "step": 19506 }, { "epoch": 0.34792922626904005, "grad_norm": 0.34781762957572937, "learning_rate": 4.120685876806675e-05, "loss": 0.1606, "step": 19507 }, { "epoch": 0.34794706239075374, "grad_norm": 0.3028854727745056, "learning_rate": 4.120567360114248e-05, "loss": 0.1311, "step": 19508 }, { "epoch": 0.3479648985124674, "grad_norm": 0.2773214876651764, "learning_rate": 4.1204488371399426e-05, "loss": 0.1712, "step": 19509 }, { "epoch": 0.34798273463418117, "grad_norm": 0.2004251629114151, "learning_rate": 4.120330307884217e-05, "loss": 0.17, "step": 19510 }, { "epoch": 0.34800057075589486, "grad_norm": 0.29450973868370056, "learning_rate": 4.120211772347532e-05, "loss": 0.164, "step": 19511 }, { "epoch": 0.34801840687760854, "grad_norm": 0.3179113566875458, "learning_rate": 4.120093230530347e-05, "loss": 0.2042, "step": 19512 }, { "epoch": 0.34803624299932223, "grad_norm": 0.19407083094120026, "learning_rate": 4.119974682433121e-05, "loss": 0.103, "step": 19513 }, { "epoch": 0.3480540791210359, "grad_norm": 0.2156829982995987, "learning_rate": 4.119856128056313e-05, "loss": 0.1394, "step": 19514 }, { "epoch": 0.3480719152427496, "grad_norm": 0.2385161817073822, "learning_rate": 4.119737567400383e-05, "loss": 0.1878, "step": 19515 }, { "epoch": 0.3480897513644633, "grad_norm": 0.2861107885837555, "learning_rate": 4.119619000465791e-05, "loss": 0.2155, "step": 19516 }, { "epoch": 0.348107587486177, "grad_norm": 0.2676670253276825, "learning_rate": 4.119500427252997e-05, "loss": 0.1794, "step": 19517 }, { "epoch": 0.3481254236078907, "grad_norm": 0.2797057628631592, "learning_rate": 4.1193818477624594e-05, "loss": 0.1848, "step": 19518 }, { "epoch": 0.3481432597296044, "grad_norm": 0.2694793939590454, "learning_rate": 4.119263261994638e-05, "loss": 0.1199, "step": 19519 }, { "epoch": 0.3481610958513181, "grad_norm": 0.25778859853744507, "learning_rate": 4.119144669949994e-05, "loss": 0.179, "step": 19520 }, { "epoch": 0.3481789319730318, "grad_norm": 0.34032678604125977, "learning_rate": 4.119026071628985e-05, "loss": 0.1824, "step": 19521 }, { "epoch": 0.3481967680947455, "grad_norm": 0.23092837631702423, "learning_rate": 4.118907467032073e-05, "loss": 0.1531, "step": 19522 }, { "epoch": 0.34821460421645917, "grad_norm": 0.38058170676231384, "learning_rate": 4.1187888561597155e-05, "loss": 0.2384, "step": 19523 }, { "epoch": 0.34823244033817286, "grad_norm": 0.34737053513526917, "learning_rate": 4.118670239012373e-05, "loss": 0.1765, "step": 19524 }, { "epoch": 0.34825027645988654, "grad_norm": 0.2629907727241516, "learning_rate": 4.118551615590507e-05, "loss": 0.1841, "step": 19525 }, { "epoch": 0.34826811258160023, "grad_norm": 0.22977577149868011, "learning_rate": 4.1184329858945745e-05, "loss": 0.1456, "step": 19526 }, { "epoch": 0.348285948703314, "grad_norm": 0.37088480591773987, "learning_rate": 4.1183143499250374e-05, "loss": 0.1409, "step": 19527 }, { "epoch": 0.34830378482502766, "grad_norm": 0.23603902757167816, "learning_rate": 4.118195707682355e-05, "loss": 0.1746, "step": 19528 }, { "epoch": 0.34832162094674135, "grad_norm": 0.23476672172546387, "learning_rate": 4.118077059166987e-05, "loss": 0.1571, "step": 19529 }, { "epoch": 0.34833945706845504, "grad_norm": 0.5951573848724365, "learning_rate": 4.1179584043793925e-05, "loss": 0.1862, "step": 19530 }, { "epoch": 0.3483572931901687, "grad_norm": 0.3055584132671356, "learning_rate": 4.117839743320034e-05, "loss": 0.2119, "step": 19531 }, { "epoch": 0.3483751293118824, "grad_norm": 0.23719768226146698, "learning_rate": 4.117721075989369e-05, "loss": 0.1363, "step": 19532 }, { "epoch": 0.3483929654335961, "grad_norm": 0.32017260789871216, "learning_rate": 4.117602402387859e-05, "loss": 0.1906, "step": 19533 }, { "epoch": 0.3484108015553098, "grad_norm": 0.2891557514667511, "learning_rate": 4.1174837225159625e-05, "loss": 0.2109, "step": 19534 }, { "epoch": 0.34842863767702353, "grad_norm": 0.265910804271698, "learning_rate": 4.117365036374141e-05, "loss": 0.1904, "step": 19535 }, { "epoch": 0.3484464737987372, "grad_norm": 0.19597332179546356, "learning_rate": 4.1172463439628536e-05, "loss": 0.1369, "step": 19536 }, { "epoch": 0.3484643099204509, "grad_norm": 0.2930004894733429, "learning_rate": 4.1171276452825614e-05, "loss": 0.1263, "step": 19537 }, { "epoch": 0.3484821460421646, "grad_norm": 0.32766494154930115, "learning_rate": 4.117008940333724e-05, "loss": 0.107, "step": 19538 }, { "epoch": 0.3484999821638783, "grad_norm": 0.2567526698112488, "learning_rate": 4.1168902291168006e-05, "loss": 0.1937, "step": 19539 }, { "epoch": 0.348517818285592, "grad_norm": 0.21148563921451569, "learning_rate": 4.116771511632252e-05, "loss": 0.1662, "step": 19540 }, { "epoch": 0.34853565440730566, "grad_norm": 0.22094793617725372, "learning_rate": 4.11665278788054e-05, "loss": 0.1636, "step": 19541 }, { "epoch": 0.34855349052901935, "grad_norm": 0.21725133061408997, "learning_rate": 4.116534057862122e-05, "loss": 0.1517, "step": 19542 }, { "epoch": 0.34857132665073304, "grad_norm": 0.29790034890174866, "learning_rate": 4.11641532157746e-05, "loss": 0.2164, "step": 19543 }, { "epoch": 0.3485891627724468, "grad_norm": 0.429353266954422, "learning_rate": 4.1162965790270155e-05, "loss": 0.1336, "step": 19544 }, { "epoch": 0.34860699889416047, "grad_norm": 0.2449774593114853, "learning_rate": 4.116177830211245e-05, "loss": 0.1674, "step": 19545 }, { "epoch": 0.34862483501587416, "grad_norm": 0.37135180830955505, "learning_rate": 4.116059075130613e-05, "loss": 0.2445, "step": 19546 }, { "epoch": 0.34864267113758785, "grad_norm": 0.2782810628414154, "learning_rate": 4.115940313785576e-05, "loss": 0.1724, "step": 19547 }, { "epoch": 0.34866050725930153, "grad_norm": 0.3206256926059723, "learning_rate": 4.115821546176598e-05, "loss": 0.1592, "step": 19548 }, { "epoch": 0.3486783433810152, "grad_norm": 0.3780229389667511, "learning_rate": 4.115702772304136e-05, "loss": 0.185, "step": 19549 }, { "epoch": 0.3486961795027289, "grad_norm": 0.24184750020503998, "learning_rate": 4.1155839921686525e-05, "loss": 0.1649, "step": 19550 }, { "epoch": 0.3487140156244426, "grad_norm": 0.2922285199165344, "learning_rate": 4.115465205770608e-05, "loss": 0.1324, "step": 19551 }, { "epoch": 0.34873185174615634, "grad_norm": 0.2572081685066223, "learning_rate": 4.115346413110461e-05, "loss": 0.1214, "step": 19552 }, { "epoch": 0.34874968786787003, "grad_norm": 0.2920433282852173, "learning_rate": 4.115227614188675e-05, "loss": 0.1615, "step": 19553 }, { "epoch": 0.3487675239895837, "grad_norm": 0.2628594636917114, "learning_rate": 4.1151088090057085e-05, "loss": 0.1218, "step": 19554 }, { "epoch": 0.3487853601112974, "grad_norm": 0.2535925507545471, "learning_rate": 4.114989997562022e-05, "loss": 0.1527, "step": 19555 }, { "epoch": 0.3488031962330111, "grad_norm": 0.29852116107940674, "learning_rate": 4.114871179858076e-05, "loss": 0.1039, "step": 19556 }, { "epoch": 0.3488210323547248, "grad_norm": 0.2597878873348236, "learning_rate": 4.114752355894333e-05, "loss": 0.186, "step": 19557 }, { "epoch": 0.34883886847643847, "grad_norm": 0.41820937395095825, "learning_rate": 4.114633525671251e-05, "loss": 0.1951, "step": 19558 }, { "epoch": 0.34885670459815216, "grad_norm": 0.20156671106815338, "learning_rate": 4.114514689189292e-05, "loss": 0.1423, "step": 19559 }, { "epoch": 0.3488745407198659, "grad_norm": 0.3613940477371216, "learning_rate": 4.114395846448916e-05, "loss": 0.1918, "step": 19560 }, { "epoch": 0.3488923768415796, "grad_norm": 0.23352019488811493, "learning_rate": 4.114276997450586e-05, "loss": 0.1447, "step": 19561 }, { "epoch": 0.3489102129632933, "grad_norm": 0.2274901419878006, "learning_rate": 4.1141581421947586e-05, "loss": 0.1693, "step": 19562 }, { "epoch": 0.34892804908500696, "grad_norm": 0.3260687291622162, "learning_rate": 4.1140392806818975e-05, "loss": 0.1784, "step": 19563 }, { "epoch": 0.34894588520672065, "grad_norm": 0.2123492807149887, "learning_rate": 4.113920412912463e-05, "loss": 0.1327, "step": 19564 }, { "epoch": 0.34896372132843434, "grad_norm": 0.3055228590965271, "learning_rate": 4.113801538886915e-05, "loss": 0.1529, "step": 19565 }, { "epoch": 0.348981557450148, "grad_norm": 0.35890069603919983, "learning_rate": 4.1136826586057156e-05, "loss": 0.1649, "step": 19566 }, { "epoch": 0.3489993935718617, "grad_norm": 0.3369129002094269, "learning_rate": 4.113563772069324e-05, "loss": 0.1677, "step": 19567 }, { "epoch": 0.3490172296935754, "grad_norm": 0.34517356753349304, "learning_rate": 4.1134448792782034e-05, "loss": 0.232, "step": 19568 }, { "epoch": 0.34903506581528915, "grad_norm": 0.22877280414104462, "learning_rate": 4.1133259802328116e-05, "loss": 0.1532, "step": 19569 }, { "epoch": 0.34905290193700284, "grad_norm": 0.27136847376823425, "learning_rate": 4.1132070749336116e-05, "loss": 0.1756, "step": 19570 }, { "epoch": 0.3490707380587165, "grad_norm": 0.2570529580116272, "learning_rate": 4.1130881633810635e-05, "loss": 0.2028, "step": 19571 }, { "epoch": 0.3490885741804302, "grad_norm": 0.311628520488739, "learning_rate": 4.1129692455756294e-05, "loss": 0.1573, "step": 19572 }, { "epoch": 0.3491064103021439, "grad_norm": 0.3021625578403473, "learning_rate": 4.112850321517768e-05, "loss": 0.1662, "step": 19573 }, { "epoch": 0.3491242464238576, "grad_norm": 0.31669682264328003, "learning_rate": 4.112731391207943e-05, "loss": 0.2294, "step": 19574 }, { "epoch": 0.3491420825455713, "grad_norm": 0.28785020112991333, "learning_rate": 4.112612454646613e-05, "loss": 0.1785, "step": 19575 }, { "epoch": 0.34915991866728496, "grad_norm": 0.3462860584259033, "learning_rate": 4.1124935118342414e-05, "loss": 0.2149, "step": 19576 }, { "epoch": 0.3491777547889987, "grad_norm": 0.26719382405281067, "learning_rate": 4.112374562771287e-05, "loss": 0.1384, "step": 19577 }, { "epoch": 0.3491955909107124, "grad_norm": 0.26933640241622925, "learning_rate": 4.112255607458212e-05, "loss": 0.1593, "step": 19578 }, { "epoch": 0.3492134270324261, "grad_norm": 0.2914995849132538, "learning_rate": 4.112136645895478e-05, "loss": 0.1535, "step": 19579 }, { "epoch": 0.34923126315413977, "grad_norm": 0.2068871706724167, "learning_rate": 4.112017678083545e-05, "loss": 0.1312, "step": 19580 }, { "epoch": 0.34924909927585346, "grad_norm": 0.2618831396102905, "learning_rate": 4.1118987040228754e-05, "loss": 0.1925, "step": 19581 }, { "epoch": 0.34926693539756715, "grad_norm": 0.3109544813632965, "learning_rate": 4.111779723713929e-05, "loss": 0.1459, "step": 19582 }, { "epoch": 0.34928477151928083, "grad_norm": 0.2558040916919708, "learning_rate": 4.111660737157168e-05, "loss": 0.2371, "step": 19583 }, { "epoch": 0.3493026076409945, "grad_norm": 0.2118171751499176, "learning_rate": 4.111541744353054e-05, "loss": 0.1606, "step": 19584 }, { "epoch": 0.3493204437627082, "grad_norm": 0.2972525954246521, "learning_rate": 4.111422745302046e-05, "loss": 0.185, "step": 19585 }, { "epoch": 0.34933827988442195, "grad_norm": 0.27432727813720703, "learning_rate": 4.111303740004608e-05, "loss": 0.1481, "step": 19586 }, { "epoch": 0.34935611600613564, "grad_norm": 0.22611775994300842, "learning_rate": 4.1111847284612e-05, "loss": 0.1752, "step": 19587 }, { "epoch": 0.34937395212784933, "grad_norm": 0.26797839999198914, "learning_rate": 4.1110657106722826e-05, "loss": 0.1658, "step": 19588 }, { "epoch": 0.349391788249563, "grad_norm": 0.4832557439804077, "learning_rate": 4.1109466866383195e-05, "loss": 0.2024, "step": 19589 }, { "epoch": 0.3494096243712767, "grad_norm": 0.2917376756668091, "learning_rate": 4.11082765635977e-05, "loss": 0.1771, "step": 19590 }, { "epoch": 0.3494274604929904, "grad_norm": 0.22100360691547394, "learning_rate": 4.1107086198370955e-05, "loss": 0.1692, "step": 19591 }, { "epoch": 0.3494452966147041, "grad_norm": 0.31009167432785034, "learning_rate": 4.1105895770707596e-05, "loss": 0.1777, "step": 19592 }, { "epoch": 0.34946313273641777, "grad_norm": 0.2743508219718933, "learning_rate": 4.110470528061221e-05, "loss": 0.1872, "step": 19593 }, { "epoch": 0.3494809688581315, "grad_norm": 0.2346644550561905, "learning_rate": 4.110351472808943e-05, "loss": 0.1463, "step": 19594 }, { "epoch": 0.3494988049798452, "grad_norm": 0.3018113374710083, "learning_rate": 4.110232411314386e-05, "loss": 0.1966, "step": 19595 }, { "epoch": 0.3495166411015589, "grad_norm": 0.22118301689624786, "learning_rate": 4.1101133435780124e-05, "loss": 0.1073, "step": 19596 }, { "epoch": 0.3495344772232726, "grad_norm": 0.39103150367736816, "learning_rate": 4.109994269600284e-05, "loss": 0.1411, "step": 19597 }, { "epoch": 0.34955231334498627, "grad_norm": 0.3187457323074341, "learning_rate": 4.10987518938166e-05, "loss": 0.1952, "step": 19598 }, { "epoch": 0.34957014946669995, "grad_norm": 0.2902323305606842, "learning_rate": 4.109756102922605e-05, "loss": 0.1908, "step": 19599 }, { "epoch": 0.34958798558841364, "grad_norm": 0.31453147530555725, "learning_rate": 4.109637010223579e-05, "loss": 0.1307, "step": 19600 }, { "epoch": 0.34960582171012733, "grad_norm": 0.30840393900871277, "learning_rate": 4.109517911285043e-05, "loss": 0.2156, "step": 19601 }, { "epoch": 0.349623657831841, "grad_norm": 0.49145349860191345, "learning_rate": 4.1093988061074617e-05, "loss": 0.14, "step": 19602 }, { "epoch": 0.34964149395355476, "grad_norm": 0.19740381836891174, "learning_rate": 4.1092796946912934e-05, "loss": 0.1343, "step": 19603 }, { "epoch": 0.34965933007526845, "grad_norm": 0.28548941016197205, "learning_rate": 4.109160577037002e-05, "loss": 0.1606, "step": 19604 }, { "epoch": 0.34967716619698214, "grad_norm": 0.24940462410449982, "learning_rate": 4.109041453145048e-05, "loss": 0.1771, "step": 19605 }, { "epoch": 0.3496950023186958, "grad_norm": 0.31209197640419006, "learning_rate": 4.108922323015893e-05, "loss": 0.1913, "step": 19606 }, { "epoch": 0.3497128384404095, "grad_norm": 0.36832955479621887, "learning_rate": 4.1088031866499996e-05, "loss": 0.1897, "step": 19607 }, { "epoch": 0.3497306745621232, "grad_norm": 0.34475061297416687, "learning_rate": 4.1086840440478305e-05, "loss": 0.1799, "step": 19608 }, { "epoch": 0.3497485106838369, "grad_norm": 0.24442026019096375, "learning_rate": 4.1085648952098456e-05, "loss": 0.1307, "step": 19609 }, { "epoch": 0.3497663468055506, "grad_norm": 0.21195891499519348, "learning_rate": 4.108445740136507e-05, "loss": 0.1738, "step": 19610 }, { "epoch": 0.3497841829272643, "grad_norm": 0.21677544713020325, "learning_rate": 4.108326578828278e-05, "loss": 0.1351, "step": 19611 }, { "epoch": 0.349802019048978, "grad_norm": 0.28859928250312805, "learning_rate": 4.1082074112856196e-05, "loss": 0.1642, "step": 19612 }, { "epoch": 0.3498198551706917, "grad_norm": 0.22792096436023712, "learning_rate": 4.108088237508993e-05, "loss": 0.1147, "step": 19613 }, { "epoch": 0.3498376912924054, "grad_norm": 0.2346087098121643, "learning_rate": 4.107969057498862e-05, "loss": 0.1976, "step": 19614 }, { "epoch": 0.34985552741411907, "grad_norm": 0.20971044898033142, "learning_rate": 4.1078498712556876e-05, "loss": 0.1455, "step": 19615 }, { "epoch": 0.34987336353583276, "grad_norm": 0.2765769958496094, "learning_rate": 4.107730678779932e-05, "loss": 0.2243, "step": 19616 }, { "epoch": 0.34989119965754645, "grad_norm": 0.26900607347488403, "learning_rate": 4.1076114800720556e-05, "loss": 0.1952, "step": 19617 }, { "epoch": 0.34990903577926014, "grad_norm": 0.21946214139461517, "learning_rate": 4.1074922751325226e-05, "loss": 0.1205, "step": 19618 }, { "epoch": 0.3499268719009739, "grad_norm": 0.24247460067272186, "learning_rate": 4.1073730639617945e-05, "loss": 0.1774, "step": 19619 }, { "epoch": 0.34994470802268757, "grad_norm": 0.28337591886520386, "learning_rate": 4.107253846560333e-05, "loss": 0.1297, "step": 19620 }, { "epoch": 0.34996254414440126, "grad_norm": 0.24794021248817444, "learning_rate": 4.1071346229286005e-05, "loss": 0.1442, "step": 19621 }, { "epoch": 0.34998038026611494, "grad_norm": 0.2338310033082962, "learning_rate": 4.1070153930670597e-05, "loss": 0.173, "step": 19622 }, { "epoch": 0.34999821638782863, "grad_norm": 0.23294194042682648, "learning_rate": 4.106896156976171e-05, "loss": 0.1742, "step": 19623 }, { "epoch": 0.3500160525095423, "grad_norm": 0.3466554582118988, "learning_rate": 4.106776914656399e-05, "loss": 0.1234, "step": 19624 }, { "epoch": 0.350033888631256, "grad_norm": 0.2669873833656311, "learning_rate": 4.106657666108204e-05, "loss": 0.1773, "step": 19625 }, { "epoch": 0.3500517247529697, "grad_norm": 0.34078970551490784, "learning_rate": 4.106538411332049e-05, "loss": 0.184, "step": 19626 }, { "epoch": 0.3500695608746834, "grad_norm": 0.2385529726743698, "learning_rate": 4.1064191503283966e-05, "loss": 0.1375, "step": 19627 }, { "epoch": 0.3500873969963971, "grad_norm": 0.2472548633813858, "learning_rate": 4.106299883097708e-05, "loss": 0.15, "step": 19628 }, { "epoch": 0.3501052331181108, "grad_norm": 0.21401570737361908, "learning_rate": 4.1061806096404476e-05, "loss": 0.1167, "step": 19629 }, { "epoch": 0.3501230692398245, "grad_norm": 0.2543730139732361, "learning_rate": 4.106061329957075e-05, "loss": 0.154, "step": 19630 }, { "epoch": 0.3501409053615382, "grad_norm": 0.22819173336029053, "learning_rate": 4.105942044048055e-05, "loss": 0.1897, "step": 19631 }, { "epoch": 0.3501587414832519, "grad_norm": 0.2190040498971939, "learning_rate": 4.1058227519138484e-05, "loss": 0.1729, "step": 19632 }, { "epoch": 0.35017657760496557, "grad_norm": 0.37843620777130127, "learning_rate": 4.1057034535549174e-05, "loss": 0.2404, "step": 19633 }, { "epoch": 0.35019441372667925, "grad_norm": 0.2770199477672577, "learning_rate": 4.105584148971726e-05, "loss": 0.0757, "step": 19634 }, { "epoch": 0.35021224984839294, "grad_norm": 0.17997154593467712, "learning_rate": 4.105464838164737e-05, "loss": 0.125, "step": 19635 }, { "epoch": 0.3502300859701067, "grad_norm": 0.22524304687976837, "learning_rate": 4.1053455211344105e-05, "loss": 0.1405, "step": 19636 }, { "epoch": 0.3502479220918204, "grad_norm": 0.26091763377189636, "learning_rate": 4.1052261978812104e-05, "loss": 0.1441, "step": 19637 }, { "epoch": 0.35026575821353406, "grad_norm": 0.3988263010978699, "learning_rate": 4.105106868405599e-05, "loss": 0.1399, "step": 19638 }, { "epoch": 0.35028359433524775, "grad_norm": 0.30785071849823, "learning_rate": 4.10498753270804e-05, "loss": 0.1576, "step": 19639 }, { "epoch": 0.35030143045696144, "grad_norm": 0.23383331298828125, "learning_rate": 4.1048681907889934e-05, "loss": 0.1434, "step": 19640 }, { "epoch": 0.3503192665786751, "grad_norm": 0.22138716280460358, "learning_rate": 4.1047488426489244e-05, "loss": 0.1385, "step": 19641 }, { "epoch": 0.3503371027003888, "grad_norm": 0.21598877012729645, "learning_rate": 4.104629488288294e-05, "loss": 0.2074, "step": 19642 }, { "epoch": 0.3503549388221025, "grad_norm": 0.25582030415534973, "learning_rate": 4.1045101277075665e-05, "loss": 0.1636, "step": 19643 }, { "epoch": 0.3503727749438162, "grad_norm": 0.24900773167610168, "learning_rate": 4.104390760907203e-05, "loss": 0.1566, "step": 19644 }, { "epoch": 0.35039061106552993, "grad_norm": 0.3139323890209198, "learning_rate": 4.104271387887667e-05, "loss": 0.1398, "step": 19645 }, { "epoch": 0.3504084471872436, "grad_norm": 0.2185821235179901, "learning_rate": 4.104152008649421e-05, "loss": 0.1529, "step": 19646 }, { "epoch": 0.3504262833089573, "grad_norm": 0.20959828794002533, "learning_rate": 4.1040326231929275e-05, "loss": 0.1191, "step": 19647 }, { "epoch": 0.350444119430671, "grad_norm": 0.24508103728294373, "learning_rate": 4.1039132315186495e-05, "loss": 0.1931, "step": 19648 }, { "epoch": 0.3504619555523847, "grad_norm": 0.2361128032207489, "learning_rate": 4.10379383362705e-05, "loss": 0.1878, "step": 19649 }, { "epoch": 0.3504797916740984, "grad_norm": 0.2508060038089752, "learning_rate": 4.1036744295185913e-05, "loss": 0.1323, "step": 19650 }, { "epoch": 0.35049762779581206, "grad_norm": 0.2504958510398865, "learning_rate": 4.103555019193737e-05, "loss": 0.1508, "step": 19651 }, { "epoch": 0.35051546391752575, "grad_norm": 0.3027316629886627, "learning_rate": 4.10343560265295e-05, "loss": 0.1755, "step": 19652 }, { "epoch": 0.3505333000392395, "grad_norm": 0.21214164793491364, "learning_rate": 4.103316179896691e-05, "loss": 0.1456, "step": 19653 }, { "epoch": 0.3505511361609532, "grad_norm": 0.3208407461643219, "learning_rate": 4.1031967509254266e-05, "loss": 0.1793, "step": 19654 }, { "epoch": 0.35056897228266687, "grad_norm": 0.19025740027427673, "learning_rate": 4.103077315739618e-05, "loss": 0.1516, "step": 19655 }, { "epoch": 0.35058680840438056, "grad_norm": 0.22745627164840698, "learning_rate": 4.102957874339727e-05, "loss": 0.1727, "step": 19656 }, { "epoch": 0.35060464452609424, "grad_norm": 0.3025979697704315, "learning_rate": 4.1028384267262176e-05, "loss": 0.1766, "step": 19657 }, { "epoch": 0.35062248064780793, "grad_norm": 0.24098078906536102, "learning_rate": 4.102718972899553e-05, "loss": 0.1768, "step": 19658 }, { "epoch": 0.3506403167695216, "grad_norm": 0.29610326886177063, "learning_rate": 4.1025995128601966e-05, "loss": 0.1375, "step": 19659 }, { "epoch": 0.3506581528912353, "grad_norm": 0.2935595214366913, "learning_rate": 4.102480046608611e-05, "loss": 0.1333, "step": 19660 }, { "epoch": 0.35067598901294905, "grad_norm": 0.23132145404815674, "learning_rate": 4.1023605741452586e-05, "loss": 0.1525, "step": 19661 }, { "epoch": 0.35069382513466274, "grad_norm": 0.26081085205078125, "learning_rate": 4.102241095470604e-05, "loss": 0.1617, "step": 19662 }, { "epoch": 0.35071166125637643, "grad_norm": 0.38519376516342163, "learning_rate": 4.102121610585109e-05, "loss": 0.1971, "step": 19663 }, { "epoch": 0.3507294973780901, "grad_norm": 0.286912202835083, "learning_rate": 4.102002119489237e-05, "loss": 0.154, "step": 19664 }, { "epoch": 0.3507473334998038, "grad_norm": 0.2111155390739441, "learning_rate": 4.101882622183452e-05, "loss": 0.1563, "step": 19665 }, { "epoch": 0.3507651696215175, "grad_norm": 0.32270610332489014, "learning_rate": 4.101763118668216e-05, "loss": 0.248, "step": 19666 }, { "epoch": 0.3507830057432312, "grad_norm": 0.24495652318000793, "learning_rate": 4.101643608943994e-05, "loss": 0.1455, "step": 19667 }, { "epoch": 0.35080084186494487, "grad_norm": 0.41668838262557983, "learning_rate": 4.101524093011247e-05, "loss": 0.1601, "step": 19668 }, { "epoch": 0.35081867798665856, "grad_norm": 0.26712530851364136, "learning_rate": 4.1014045708704404e-05, "loss": 0.1853, "step": 19669 }, { "epoch": 0.3508365141083723, "grad_norm": 0.23040862381458282, "learning_rate": 4.101285042522036e-05, "loss": 0.1684, "step": 19670 }, { "epoch": 0.350854350230086, "grad_norm": 0.263711154460907, "learning_rate": 4.1011655079664976e-05, "loss": 0.1524, "step": 19671 }, { "epoch": 0.3508721863517997, "grad_norm": 0.22974510490894318, "learning_rate": 4.10104596720429e-05, "loss": 0.1093, "step": 19672 }, { "epoch": 0.35089002247351336, "grad_norm": 0.2576884329319, "learning_rate": 4.1009264202358735e-05, "loss": 0.1515, "step": 19673 }, { "epoch": 0.35090785859522705, "grad_norm": 0.28994515538215637, "learning_rate": 4.100806867061714e-05, "loss": 0.065, "step": 19674 }, { "epoch": 0.35092569471694074, "grad_norm": 0.25028344988822937, "learning_rate": 4.100687307682274e-05, "loss": 0.1496, "step": 19675 }, { "epoch": 0.3509435308386544, "grad_norm": 0.29133617877960205, "learning_rate": 4.100567742098018e-05, "loss": 0.1464, "step": 19676 }, { "epoch": 0.3509613669603681, "grad_norm": 0.27885901927948, "learning_rate": 4.1004481703094075e-05, "loss": 0.1661, "step": 19677 }, { "epoch": 0.35097920308208186, "grad_norm": 0.2527541220188141, "learning_rate": 4.100328592316908e-05, "loss": 0.1534, "step": 19678 }, { "epoch": 0.35099703920379555, "grad_norm": 0.29238662123680115, "learning_rate": 4.100209008120981e-05, "loss": 0.1773, "step": 19679 }, { "epoch": 0.35101487532550923, "grad_norm": 0.2656225562095642, "learning_rate": 4.100089417722092e-05, "loss": 0.1159, "step": 19680 }, { "epoch": 0.3510327114472229, "grad_norm": 0.2631196677684784, "learning_rate": 4.0999698211207036e-05, "loss": 0.1862, "step": 19681 }, { "epoch": 0.3510505475689366, "grad_norm": 0.2563972771167755, "learning_rate": 4.09985021831728e-05, "loss": 0.1464, "step": 19682 }, { "epoch": 0.3510683836906503, "grad_norm": 0.2973809540271759, "learning_rate": 4.099730609312284e-05, "loss": 0.173, "step": 19683 }, { "epoch": 0.351086219812364, "grad_norm": 0.37659335136413574, "learning_rate": 4.099610994106179e-05, "loss": 0.1368, "step": 19684 }, { "epoch": 0.3511040559340777, "grad_norm": 0.25841158628463745, "learning_rate": 4.09949137269943e-05, "loss": 0.1539, "step": 19685 }, { "epoch": 0.35112189205579136, "grad_norm": 0.24447759985923767, "learning_rate": 4.0993717450925e-05, "loss": 0.1729, "step": 19686 }, { "epoch": 0.3511397281775051, "grad_norm": 0.5070403814315796, "learning_rate": 4.099252111285853e-05, "loss": 0.2365, "step": 19687 }, { "epoch": 0.3511575642992188, "grad_norm": 0.29022204875946045, "learning_rate": 4.099132471279952e-05, "loss": 0.1988, "step": 19688 }, { "epoch": 0.3511754004209325, "grad_norm": 0.2991228997707367, "learning_rate": 4.0990128250752614e-05, "loss": 0.1468, "step": 19689 }, { "epoch": 0.35119323654264617, "grad_norm": 0.3976593315601349, "learning_rate": 4.098893172672245e-05, "loss": 0.1826, "step": 19690 }, { "epoch": 0.35121107266435986, "grad_norm": 0.20655082166194916, "learning_rate": 4.0987735140713656e-05, "loss": 0.1809, "step": 19691 }, { "epoch": 0.35122890878607355, "grad_norm": 0.30448174476623535, "learning_rate": 4.098653849273088e-05, "loss": 0.1702, "step": 19692 }, { "epoch": 0.35124674490778723, "grad_norm": 0.33953171968460083, "learning_rate": 4.0985341782778764e-05, "loss": 0.1701, "step": 19693 }, { "epoch": 0.3512645810295009, "grad_norm": 0.19479066133499146, "learning_rate": 4.098414501086194e-05, "loss": 0.1711, "step": 19694 }, { "epoch": 0.35128241715121467, "grad_norm": 0.22470815479755402, "learning_rate": 4.0982948176985056e-05, "loss": 0.1716, "step": 19695 }, { "epoch": 0.35130025327292835, "grad_norm": 0.41108715534210205, "learning_rate": 4.098175128115273e-05, "loss": 0.1797, "step": 19696 }, { "epoch": 0.35131808939464204, "grad_norm": 0.2554748058319092, "learning_rate": 4.098055432336963e-05, "loss": 0.1931, "step": 19697 }, { "epoch": 0.35133592551635573, "grad_norm": 0.2931663393974304, "learning_rate": 4.097935730364038e-05, "loss": 0.1631, "step": 19698 }, { "epoch": 0.3513537616380694, "grad_norm": 0.2234746664762497, "learning_rate": 4.0978160221969616e-05, "loss": 0.1494, "step": 19699 }, { "epoch": 0.3513715977597831, "grad_norm": 0.19326429069042206, "learning_rate": 4.097696307836199e-05, "loss": 0.1759, "step": 19700 }, { "epoch": 0.3513894338814968, "grad_norm": 0.2622203528881073, "learning_rate": 4.097576587282214e-05, "loss": 0.1594, "step": 19701 }, { "epoch": 0.3514072700032105, "grad_norm": 0.2425583451986313, "learning_rate": 4.097456860535469e-05, "loss": 0.1997, "step": 19702 }, { "epoch": 0.35142510612492417, "grad_norm": 0.25813931226730347, "learning_rate": 4.09733712759643e-05, "loss": 0.2243, "step": 19703 }, { "epoch": 0.3514429422466379, "grad_norm": 0.19891194999217987, "learning_rate": 4.097217388465561e-05, "loss": 0.139, "step": 19704 }, { "epoch": 0.3514607783683516, "grad_norm": 0.1712683141231537, "learning_rate": 4.097097643143325e-05, "loss": 0.1432, "step": 19705 }, { "epoch": 0.3514786144900653, "grad_norm": 0.23824673891067505, "learning_rate": 4.096977891630188e-05, "loss": 0.1327, "step": 19706 }, { "epoch": 0.351496450611779, "grad_norm": 0.23117029666900635, "learning_rate": 4.0968581339266125e-05, "loss": 0.1118, "step": 19707 }, { "epoch": 0.35151428673349266, "grad_norm": 0.24423429369926453, "learning_rate": 4.096738370033064e-05, "loss": 0.1837, "step": 19708 }, { "epoch": 0.35153212285520635, "grad_norm": 0.2343129962682724, "learning_rate": 4.096618599950005e-05, "loss": 0.1345, "step": 19709 }, { "epoch": 0.35154995897692004, "grad_norm": 0.36951953172683716, "learning_rate": 4.096498823677901e-05, "loss": 0.1884, "step": 19710 }, { "epoch": 0.35156779509863373, "grad_norm": 0.2731028199195862, "learning_rate": 4.096379041217217e-05, "loss": 0.1782, "step": 19711 }, { "epoch": 0.35158563122034747, "grad_norm": 0.3639281094074249, "learning_rate": 4.096259252568416e-05, "loss": 0.2356, "step": 19712 }, { "epoch": 0.35160346734206116, "grad_norm": 0.23560945689678192, "learning_rate": 4.0961394577319626e-05, "loss": 0.1997, "step": 19713 }, { "epoch": 0.35162130346377485, "grad_norm": 0.25245076417922974, "learning_rate": 4.096019656708322e-05, "loss": 0.1422, "step": 19714 }, { "epoch": 0.35163913958548854, "grad_norm": 0.3055262565612793, "learning_rate": 4.095899849497957e-05, "loss": 0.1948, "step": 19715 }, { "epoch": 0.3516569757072022, "grad_norm": 0.28720512986183167, "learning_rate": 4.095780036101333e-05, "loss": 0.1164, "step": 19716 }, { "epoch": 0.3516748118289159, "grad_norm": 0.32368525862693787, "learning_rate": 4.095660216518916e-05, "loss": 0.2153, "step": 19717 }, { "epoch": 0.3516926479506296, "grad_norm": 0.2521657645702362, "learning_rate": 4.0955403907511675e-05, "loss": 0.1607, "step": 19718 }, { "epoch": 0.3517104840723433, "grad_norm": 0.19980616867542267, "learning_rate": 4.095420558798554e-05, "loss": 0.1521, "step": 19719 }, { "epoch": 0.35172832019405703, "grad_norm": 0.2124643474817276, "learning_rate": 4.095300720661538e-05, "loss": 0.1571, "step": 19720 }, { "epoch": 0.3517461563157707, "grad_norm": 0.33116739988327026, "learning_rate": 4.095180876340588e-05, "loss": 0.1195, "step": 19721 }, { "epoch": 0.3517639924374844, "grad_norm": 0.26021748781204224, "learning_rate": 4.095061025836163e-05, "loss": 0.1726, "step": 19722 }, { "epoch": 0.3517818285591981, "grad_norm": 0.29108476638793945, "learning_rate": 4.094941169148732e-05, "loss": 0.1629, "step": 19723 }, { "epoch": 0.3517996646809118, "grad_norm": 0.2202785611152649, "learning_rate": 4.094821306278759e-05, "loss": 0.1679, "step": 19724 }, { "epoch": 0.35181750080262547, "grad_norm": 0.22796787321567535, "learning_rate": 4.094701437226707e-05, "loss": 0.1349, "step": 19725 }, { "epoch": 0.35183533692433916, "grad_norm": 0.26325109601020813, "learning_rate": 4.094581561993041e-05, "loss": 0.1563, "step": 19726 }, { "epoch": 0.35185317304605285, "grad_norm": 0.2852124273777008, "learning_rate": 4.094461680578226e-05, "loss": 0.2054, "step": 19727 }, { "epoch": 0.35187100916776654, "grad_norm": 0.2270687222480774, "learning_rate": 4.094341792982728e-05, "loss": 0.1512, "step": 19728 }, { "epoch": 0.3518888452894803, "grad_norm": 0.2668840289115906, "learning_rate": 4.094221899207009e-05, "loss": 0.1547, "step": 19729 }, { "epoch": 0.35190668141119397, "grad_norm": 0.27421703934669495, "learning_rate": 4.094101999251536e-05, "loss": 0.1644, "step": 19730 }, { "epoch": 0.35192451753290765, "grad_norm": 0.2846114933490753, "learning_rate": 4.093982093116773e-05, "loss": 0.1211, "step": 19731 }, { "epoch": 0.35194235365462134, "grad_norm": 0.3455972969532013, "learning_rate": 4.093862180803185e-05, "loss": 0.1644, "step": 19732 }, { "epoch": 0.35196018977633503, "grad_norm": 0.2865823209285736, "learning_rate": 4.0937422623112367e-05, "loss": 0.167, "step": 19733 }, { "epoch": 0.3519780258980487, "grad_norm": 0.25498536229133606, "learning_rate": 4.0936223376413926e-05, "loss": 0.1926, "step": 19734 }, { "epoch": 0.3519958620197624, "grad_norm": 0.25609761476516724, "learning_rate": 4.093502406794118e-05, "loss": 0.1512, "step": 19735 }, { "epoch": 0.3520136981414761, "grad_norm": 0.23590657114982605, "learning_rate": 4.0933824697698786e-05, "loss": 0.1502, "step": 19736 }, { "epoch": 0.35203153426318984, "grad_norm": 0.25410187244415283, "learning_rate": 4.093262526569138e-05, "loss": 0.1249, "step": 19737 }, { "epoch": 0.3520493703849035, "grad_norm": 0.2516137659549713, "learning_rate": 4.0931425771923616e-05, "loss": 0.156, "step": 19738 }, { "epoch": 0.3520672065066172, "grad_norm": 0.3644959330558777, "learning_rate": 4.0930226216400134e-05, "loss": 0.2473, "step": 19739 }, { "epoch": 0.3520850426283309, "grad_norm": 0.28251513838768005, "learning_rate": 4.09290265991256e-05, "loss": 0.152, "step": 19740 }, { "epoch": 0.3521028787500446, "grad_norm": 0.30688774585723877, "learning_rate": 4.092782692010466e-05, "loss": 0.237, "step": 19741 }, { "epoch": 0.3521207148717583, "grad_norm": 0.2338908314704895, "learning_rate": 4.0926627179341957e-05, "loss": 0.1657, "step": 19742 }, { "epoch": 0.35213855099347197, "grad_norm": 0.2764683663845062, "learning_rate": 4.0925427376842146e-05, "loss": 0.206, "step": 19743 }, { "epoch": 0.35215638711518565, "grad_norm": 0.3336687982082367, "learning_rate": 4.092422751260988e-05, "loss": 0.1607, "step": 19744 }, { "epoch": 0.35217422323689934, "grad_norm": 0.22442187368869781, "learning_rate": 4.092302758664981e-05, "loss": 0.19, "step": 19745 }, { "epoch": 0.3521920593586131, "grad_norm": 0.2398339807987213, "learning_rate": 4.092182759896658e-05, "loss": 0.1636, "step": 19746 }, { "epoch": 0.3522098954803268, "grad_norm": 0.21279588341712952, "learning_rate": 4.092062754956485e-05, "loss": 0.1641, "step": 19747 }, { "epoch": 0.35222773160204046, "grad_norm": 0.19786347448825836, "learning_rate": 4.0919427438449265e-05, "loss": 0.1436, "step": 19748 }, { "epoch": 0.35224556772375415, "grad_norm": 0.3153798580169678, "learning_rate": 4.091822726562449e-05, "loss": 0.13, "step": 19749 }, { "epoch": 0.35226340384546784, "grad_norm": 0.2867707908153534, "learning_rate": 4.091702703109516e-05, "loss": 0.1635, "step": 19750 }, { "epoch": 0.3522812399671815, "grad_norm": 0.3336258828639984, "learning_rate": 4.0915826734865934e-05, "loss": 0.1836, "step": 19751 }, { "epoch": 0.3522990760888952, "grad_norm": 0.31477659940719604, "learning_rate": 4.091462637694147e-05, "loss": 0.1516, "step": 19752 }, { "epoch": 0.3523169122106089, "grad_norm": 0.2180374413728714, "learning_rate": 4.0913425957326424e-05, "loss": 0.1147, "step": 19753 }, { "epoch": 0.35233474833232264, "grad_norm": 0.2784854471683502, "learning_rate": 4.0912225476025435e-05, "loss": 0.1858, "step": 19754 }, { "epoch": 0.35235258445403633, "grad_norm": 0.34138286113739014, "learning_rate": 4.091102493304316e-05, "loss": 0.1917, "step": 19755 }, { "epoch": 0.35237042057575, "grad_norm": 0.3120967745780945, "learning_rate": 4.0909824328384263e-05, "loss": 0.16, "step": 19756 }, { "epoch": 0.3523882566974637, "grad_norm": 0.2316724956035614, "learning_rate": 4.090862366205339e-05, "loss": 0.1259, "step": 19757 }, { "epoch": 0.3524060928191774, "grad_norm": 0.27696239948272705, "learning_rate": 4.090742293405521e-05, "loss": 0.1774, "step": 19758 }, { "epoch": 0.3524239289408911, "grad_norm": 0.36350706219673157, "learning_rate": 4.090622214439435e-05, "loss": 0.1314, "step": 19759 }, { "epoch": 0.3524417650626048, "grad_norm": 0.2812734842300415, "learning_rate": 4.090502129307547e-05, "loss": 0.1733, "step": 19760 }, { "epoch": 0.35245960118431846, "grad_norm": 0.23775143921375275, "learning_rate": 4.090382038010325e-05, "loss": 0.1698, "step": 19761 }, { "epoch": 0.35247743730603215, "grad_norm": 0.26190581917762756, "learning_rate": 4.090261940548233e-05, "loss": 0.1379, "step": 19762 }, { "epoch": 0.3524952734277459, "grad_norm": 0.39719924330711365, "learning_rate": 4.090141836921736e-05, "loss": 0.1644, "step": 19763 }, { "epoch": 0.3525131095494596, "grad_norm": 0.3248538374900818, "learning_rate": 4.090021727131299e-05, "loss": 0.1423, "step": 19764 }, { "epoch": 0.35253094567117327, "grad_norm": 0.2315378040075302, "learning_rate": 4.0899016111773905e-05, "loss": 0.1686, "step": 19765 }, { "epoch": 0.35254878179288696, "grad_norm": 0.2608569264411926, "learning_rate": 4.0897814890604734e-05, "loss": 0.1671, "step": 19766 }, { "epoch": 0.35256661791460064, "grad_norm": 0.3050379455089569, "learning_rate": 4.089661360781014e-05, "loss": 0.1599, "step": 19767 }, { "epoch": 0.35258445403631433, "grad_norm": 0.3049139082431793, "learning_rate": 4.089541226339478e-05, "loss": 0.1385, "step": 19768 }, { "epoch": 0.352602290158028, "grad_norm": 0.223703995347023, "learning_rate": 4.089421085736332e-05, "loss": 0.1795, "step": 19769 }, { "epoch": 0.3526201262797417, "grad_norm": 0.2346150428056717, "learning_rate": 4.08930093897204e-05, "loss": 0.1319, "step": 19770 }, { "epoch": 0.35263796240145545, "grad_norm": 0.670360267162323, "learning_rate": 4.089180786047069e-05, "loss": 0.2171, "step": 19771 }, { "epoch": 0.35265579852316914, "grad_norm": 0.24883748590946198, "learning_rate": 4.0890606269618835e-05, "loss": 0.1175, "step": 19772 }, { "epoch": 0.3526736346448828, "grad_norm": 0.2740766704082489, "learning_rate": 4.0889404617169516e-05, "loss": 0.1714, "step": 19773 }, { "epoch": 0.3526914707665965, "grad_norm": 0.39677295088768005, "learning_rate": 4.088820290312737e-05, "loss": 0.147, "step": 19774 }, { "epoch": 0.3527093068883102, "grad_norm": 0.24014437198638916, "learning_rate": 4.088700112749706e-05, "loss": 0.1636, "step": 19775 }, { "epoch": 0.3527271430100239, "grad_norm": 0.14541755616664886, "learning_rate": 4.088579929028326e-05, "loss": 0.0994, "step": 19776 }, { "epoch": 0.3527449791317376, "grad_norm": 0.1968134194612503, "learning_rate": 4.0884597391490595e-05, "loss": 0.1088, "step": 19777 }, { "epoch": 0.35276281525345127, "grad_norm": 0.32403284311294556, "learning_rate": 4.088339543112375e-05, "loss": 0.1326, "step": 19778 }, { "epoch": 0.352780651375165, "grad_norm": 0.2569282054901123, "learning_rate": 4.088219340918739e-05, "loss": 0.1512, "step": 19779 }, { "epoch": 0.3527984874968787, "grad_norm": 0.20336899161338806, "learning_rate": 4.088099132568616e-05, "loss": 0.1673, "step": 19780 }, { "epoch": 0.3528163236185924, "grad_norm": 0.2858489155769348, "learning_rate": 4.0879789180624705e-05, "loss": 0.1655, "step": 19781 }, { "epoch": 0.3528341597403061, "grad_norm": 0.29759639501571655, "learning_rate": 4.087858697400771e-05, "loss": 0.236, "step": 19782 }, { "epoch": 0.35285199586201976, "grad_norm": 0.20409435033798218, "learning_rate": 4.087738470583984e-05, "loss": 0.1323, "step": 19783 }, { "epoch": 0.35286983198373345, "grad_norm": 0.26103392243385315, "learning_rate": 4.087618237612574e-05, "loss": 0.174, "step": 19784 }, { "epoch": 0.35288766810544714, "grad_norm": 0.22956660389900208, "learning_rate": 4.087497998487006e-05, "loss": 0.1262, "step": 19785 }, { "epoch": 0.3529055042271608, "grad_norm": 0.22196048498153687, "learning_rate": 4.087377753207749e-05, "loss": 0.1761, "step": 19786 }, { "epoch": 0.3529233403488745, "grad_norm": 0.26203569769859314, "learning_rate": 4.087257501775267e-05, "loss": 0.1635, "step": 19787 }, { "epoch": 0.35294117647058826, "grad_norm": 0.305367112159729, "learning_rate": 4.087137244190026e-05, "loss": 0.2214, "step": 19788 }, { "epoch": 0.35295901259230195, "grad_norm": 0.2782062590122223, "learning_rate": 4.087016980452494e-05, "loss": 0.1567, "step": 19789 }, { "epoch": 0.35297684871401563, "grad_norm": 0.45773640275001526, "learning_rate": 4.086896710563135e-05, "loss": 0.1742, "step": 19790 }, { "epoch": 0.3529946848357293, "grad_norm": 0.253086656332016, "learning_rate": 4.086776434522417e-05, "loss": 0.146, "step": 19791 }, { "epoch": 0.353012520957443, "grad_norm": 0.2581816017627716, "learning_rate": 4.086656152330805e-05, "loss": 0.1804, "step": 19792 }, { "epoch": 0.3530303570791567, "grad_norm": 0.22439709305763245, "learning_rate": 4.086535863988766e-05, "loss": 0.1503, "step": 19793 }, { "epoch": 0.3530481932008704, "grad_norm": 0.2890036404132843, "learning_rate": 4.086415569496767e-05, "loss": 0.1983, "step": 19794 }, { "epoch": 0.3530660293225841, "grad_norm": 0.3186407685279846, "learning_rate": 4.086295268855271e-05, "loss": 0.1844, "step": 19795 }, { "epoch": 0.3530838654442978, "grad_norm": 0.26146066188812256, "learning_rate": 4.0861749620647484e-05, "loss": 0.166, "step": 19796 }, { "epoch": 0.3531017015660115, "grad_norm": 0.21023817360401154, "learning_rate": 4.086054649125664e-05, "loss": 0.0948, "step": 19797 }, { "epoch": 0.3531195376877252, "grad_norm": 0.3218429684638977, "learning_rate": 4.085934330038483e-05, "loss": 0.1614, "step": 19798 }, { "epoch": 0.3531373738094389, "grad_norm": 0.2928532063961029, "learning_rate": 4.085814004803673e-05, "loss": 0.1228, "step": 19799 }, { "epoch": 0.35315520993115257, "grad_norm": 0.2698879539966583, "learning_rate": 4.0856936734217005e-05, "loss": 0.1644, "step": 19800 }, { "epoch": 0.35317304605286626, "grad_norm": 0.43529677391052246, "learning_rate": 4.085573335893031e-05, "loss": 0.1473, "step": 19801 }, { "epoch": 0.35319088217457995, "grad_norm": 0.2933369278907776, "learning_rate": 4.085452992218132e-05, "loss": 0.1496, "step": 19802 }, { "epoch": 0.35320871829629363, "grad_norm": 0.24520447850227356, "learning_rate": 4.08533264239747e-05, "loss": 0.1719, "step": 19803 }, { "epoch": 0.3532265544180073, "grad_norm": 0.2467024177312851, "learning_rate": 4.085212286431511e-05, "loss": 0.1838, "step": 19804 }, { "epoch": 0.35324439053972106, "grad_norm": 0.2627450227737427, "learning_rate": 4.0850919243207206e-05, "loss": 0.1657, "step": 19805 }, { "epoch": 0.35326222666143475, "grad_norm": 0.19614753127098083, "learning_rate": 4.084971556065568e-05, "loss": 0.1135, "step": 19806 }, { "epoch": 0.35328006278314844, "grad_norm": 0.22114554047584534, "learning_rate": 4.0848511816665166e-05, "loss": 0.1076, "step": 19807 }, { "epoch": 0.35329789890486213, "grad_norm": 0.2613510489463806, "learning_rate": 4.0847308011240364e-05, "loss": 0.1874, "step": 19808 }, { "epoch": 0.3533157350265758, "grad_norm": 0.2803811728954315, "learning_rate": 4.084610414438591e-05, "loss": 0.2278, "step": 19809 }, { "epoch": 0.3533335711482895, "grad_norm": 0.22982949018478394, "learning_rate": 4.084490021610649e-05, "loss": 0.1377, "step": 19810 }, { "epoch": 0.3533514072700032, "grad_norm": 0.30077412724494934, "learning_rate": 4.0843696226406756e-05, "loss": 0.1477, "step": 19811 }, { "epoch": 0.3533692433917169, "grad_norm": 0.2277221977710724, "learning_rate": 4.0842492175291394e-05, "loss": 0.1323, "step": 19812 }, { "epoch": 0.3533870795134306, "grad_norm": 0.21202826499938965, "learning_rate": 4.084128806276506e-05, "loss": 0.1613, "step": 19813 }, { "epoch": 0.3534049156351443, "grad_norm": 0.27329447865486145, "learning_rate": 4.084008388883241e-05, "loss": 0.2075, "step": 19814 }, { "epoch": 0.353422751756858, "grad_norm": 0.4323183298110962, "learning_rate": 4.083887965349813e-05, "loss": 0.1704, "step": 19815 }, { "epoch": 0.3534405878785717, "grad_norm": 0.2226802259683609, "learning_rate": 4.083767535676688e-05, "loss": 0.1366, "step": 19816 }, { "epoch": 0.3534584240002854, "grad_norm": 0.2623763978481293, "learning_rate": 4.083647099864334e-05, "loss": 0.1678, "step": 19817 }, { "epoch": 0.35347626012199906, "grad_norm": 0.2511468231678009, "learning_rate": 4.083526657913216e-05, "loss": 0.1646, "step": 19818 }, { "epoch": 0.35349409624371275, "grad_norm": 0.2920827269554138, "learning_rate": 4.083406209823802e-05, "loss": 0.1865, "step": 19819 }, { "epoch": 0.35351193236542644, "grad_norm": 0.26181209087371826, "learning_rate": 4.083285755596559e-05, "loss": 0.1774, "step": 19820 }, { "epoch": 0.3535297684871402, "grad_norm": 0.2334909439086914, "learning_rate": 4.083165295231953e-05, "loss": 0.1844, "step": 19821 }, { "epoch": 0.35354760460885387, "grad_norm": 0.24480712413787842, "learning_rate": 4.0830448287304515e-05, "loss": 0.1888, "step": 19822 }, { "epoch": 0.35356544073056756, "grad_norm": 0.2666739523410797, "learning_rate": 4.082924356092521e-05, "loss": 0.1614, "step": 19823 }, { "epoch": 0.35358327685228125, "grad_norm": 0.27341511845588684, "learning_rate": 4.08280387731863e-05, "loss": 0.1661, "step": 19824 }, { "epoch": 0.35360111297399494, "grad_norm": 0.3112272620201111, "learning_rate": 4.082683392409244e-05, "loss": 0.2085, "step": 19825 }, { "epoch": 0.3536189490957086, "grad_norm": 0.24370110034942627, "learning_rate": 4.0825629013648306e-05, "loss": 0.1475, "step": 19826 }, { "epoch": 0.3536367852174223, "grad_norm": 0.2231035977602005, "learning_rate": 4.0824424041858566e-05, "loss": 0.1643, "step": 19827 }, { "epoch": 0.353654621339136, "grad_norm": 0.23225586116313934, "learning_rate": 4.08232190087279e-05, "loss": 0.1475, "step": 19828 }, { "epoch": 0.3536724574608497, "grad_norm": 0.31278154253959656, "learning_rate": 4.082201391426096e-05, "loss": 0.1531, "step": 19829 }, { "epoch": 0.35369029358256343, "grad_norm": 0.2686065137386322, "learning_rate": 4.0820808758462435e-05, "loss": 0.1829, "step": 19830 }, { "epoch": 0.3537081297042771, "grad_norm": 0.425155907869339, "learning_rate": 4.0819603541336994e-05, "loss": 0.2167, "step": 19831 }, { "epoch": 0.3537259658259908, "grad_norm": 0.3046644628047943, "learning_rate": 4.081839826288931e-05, "loss": 0.1805, "step": 19832 }, { "epoch": 0.3537438019477045, "grad_norm": 0.24657128751277924, "learning_rate": 4.0817192923124035e-05, "loss": 0.1847, "step": 19833 }, { "epoch": 0.3537616380694182, "grad_norm": 0.23382999002933502, "learning_rate": 4.0815987522045875e-05, "loss": 0.145, "step": 19834 }, { "epoch": 0.35377947419113187, "grad_norm": 0.21233895421028137, "learning_rate": 4.0814782059659476e-05, "loss": 0.1615, "step": 19835 }, { "epoch": 0.35379731031284556, "grad_norm": 0.25242379307746887, "learning_rate": 4.0813576535969516e-05, "loss": 0.1538, "step": 19836 }, { "epoch": 0.35381514643455925, "grad_norm": 0.4201928973197937, "learning_rate": 4.0812370950980675e-05, "loss": 0.1676, "step": 19837 }, { "epoch": 0.353832982556273, "grad_norm": 0.2942999005317688, "learning_rate": 4.081116530469762e-05, "loss": 0.1524, "step": 19838 }, { "epoch": 0.3538508186779867, "grad_norm": 0.30660712718963623, "learning_rate": 4.080995959712503e-05, "loss": 0.1477, "step": 19839 }, { "epoch": 0.35386865479970037, "grad_norm": 0.3902914822101593, "learning_rate": 4.0808753828267575e-05, "loss": 0.1491, "step": 19840 }, { "epoch": 0.35388649092141405, "grad_norm": 0.32468390464782715, "learning_rate": 4.080754799812994e-05, "loss": 0.1522, "step": 19841 }, { "epoch": 0.35390432704312774, "grad_norm": 0.2665756642818451, "learning_rate": 4.080634210671678e-05, "loss": 0.1614, "step": 19842 }, { "epoch": 0.35392216316484143, "grad_norm": 0.43207842111587524, "learning_rate": 4.080513615403278e-05, "loss": 0.2466, "step": 19843 }, { "epoch": 0.3539399992865551, "grad_norm": 0.2925707697868347, "learning_rate": 4.08039301400826e-05, "loss": 0.2039, "step": 19844 }, { "epoch": 0.3539578354082688, "grad_norm": 0.3350176513195038, "learning_rate": 4.0802724064870954e-05, "loss": 0.1928, "step": 19845 }, { "epoch": 0.3539756715299825, "grad_norm": 0.23238037526607513, "learning_rate": 4.0801517928402475e-05, "loss": 0.1533, "step": 19846 }, { "epoch": 0.35399350765169624, "grad_norm": 0.2267867624759674, "learning_rate": 4.080031173068186e-05, "loss": 0.1494, "step": 19847 }, { "epoch": 0.3540113437734099, "grad_norm": 0.4750889539718628, "learning_rate": 4.0799105471713774e-05, "loss": 0.1536, "step": 19848 }, { "epoch": 0.3540291798951236, "grad_norm": 0.24716679751873016, "learning_rate": 4.07978991515029e-05, "loss": 0.1779, "step": 19849 }, { "epoch": 0.3540470160168373, "grad_norm": 0.28831833600997925, "learning_rate": 4.0796692770053915e-05, "loss": 0.1563, "step": 19850 }, { "epoch": 0.354064852138551, "grad_norm": 0.21190913021564484, "learning_rate": 4.079548632737149e-05, "loss": 0.1566, "step": 19851 }, { "epoch": 0.3540826882602647, "grad_norm": 0.21971747279167175, "learning_rate": 4.0794279823460304e-05, "loss": 0.1815, "step": 19852 }, { "epoch": 0.35410052438197837, "grad_norm": 0.256743460893631, "learning_rate": 4.0793073258325046e-05, "loss": 0.1283, "step": 19853 }, { "epoch": 0.35411836050369205, "grad_norm": 0.25874578952789307, "learning_rate": 4.0791866631970366e-05, "loss": 0.1951, "step": 19854 }, { "epoch": 0.3541361966254058, "grad_norm": 0.3698365390300751, "learning_rate": 4.079065994440097e-05, "loss": 0.1975, "step": 19855 }, { "epoch": 0.3541540327471195, "grad_norm": 0.19486546516418457, "learning_rate": 4.078945319562151e-05, "loss": 0.1528, "step": 19856 }, { "epoch": 0.3541718688688332, "grad_norm": 0.2912094295024872, "learning_rate": 4.078824638563668e-05, "loss": 0.1658, "step": 19857 }, { "epoch": 0.35418970499054686, "grad_norm": 0.19902034103870392, "learning_rate": 4.0787039514451154e-05, "loss": 0.126, "step": 19858 }, { "epoch": 0.35420754111226055, "grad_norm": 0.29859817028045654, "learning_rate": 4.078583258206961e-05, "loss": 0.1919, "step": 19859 }, { "epoch": 0.35422537723397424, "grad_norm": 0.24333952367305756, "learning_rate": 4.078462558849673e-05, "loss": 0.1798, "step": 19860 }, { "epoch": 0.3542432133556879, "grad_norm": 0.25449657440185547, "learning_rate": 4.078341853373718e-05, "loss": 0.1561, "step": 19861 }, { "epoch": 0.3542610494774016, "grad_norm": 0.2693297266960144, "learning_rate": 4.078221141779566e-05, "loss": 0.1913, "step": 19862 }, { "epoch": 0.3542788855991153, "grad_norm": 0.20248349010944366, "learning_rate": 4.0781004240676835e-05, "loss": 0.1201, "step": 19863 }, { "epoch": 0.35429672172082904, "grad_norm": 0.25756850838661194, "learning_rate": 4.0779797002385384e-05, "loss": 0.1666, "step": 19864 }, { "epoch": 0.35431455784254273, "grad_norm": 0.22621749341487885, "learning_rate": 4.077858970292599e-05, "loss": 0.1278, "step": 19865 }, { "epoch": 0.3543323939642564, "grad_norm": 0.26753324270248413, "learning_rate": 4.077738234230334e-05, "loss": 0.1357, "step": 19866 }, { "epoch": 0.3543502300859701, "grad_norm": 0.30189305543899536, "learning_rate": 4.0776174920522095e-05, "loss": 0.1357, "step": 19867 }, { "epoch": 0.3543680662076838, "grad_norm": 0.28078579902648926, "learning_rate": 4.0774967437586956e-05, "loss": 0.1533, "step": 19868 }, { "epoch": 0.3543859023293975, "grad_norm": 0.24791203439235687, "learning_rate": 4.0773759893502585e-05, "loss": 0.1516, "step": 19869 }, { "epoch": 0.35440373845111117, "grad_norm": 0.24133937060832977, "learning_rate": 4.077255228827368e-05, "loss": 0.2027, "step": 19870 }, { "epoch": 0.35442157457282486, "grad_norm": 0.41438114643096924, "learning_rate": 4.077134462190491e-05, "loss": 0.2251, "step": 19871 }, { "epoch": 0.3544394106945386, "grad_norm": 0.21450918912887573, "learning_rate": 4.077013689440097e-05, "loss": 0.1986, "step": 19872 }, { "epoch": 0.3544572468162523, "grad_norm": 0.2632606625556946, "learning_rate": 4.0768929105766525e-05, "loss": 0.1716, "step": 19873 }, { "epoch": 0.354475082937966, "grad_norm": 0.2613445222377777, "learning_rate": 4.0767721256006266e-05, "loss": 0.1583, "step": 19874 }, { "epoch": 0.35449291905967967, "grad_norm": 0.3841835558414459, "learning_rate": 4.076651334512487e-05, "loss": 0.1103, "step": 19875 }, { "epoch": 0.35451075518139336, "grad_norm": 0.2413518875837326, "learning_rate": 4.076530537312703e-05, "loss": 0.1895, "step": 19876 }, { "epoch": 0.35452859130310704, "grad_norm": 0.26866576075553894, "learning_rate": 4.076409734001741e-05, "loss": 0.1197, "step": 19877 }, { "epoch": 0.35454642742482073, "grad_norm": 0.25959742069244385, "learning_rate": 4.0762889245800713e-05, "loss": 0.1977, "step": 19878 }, { "epoch": 0.3545642635465344, "grad_norm": 0.2680872082710266, "learning_rate": 4.0761681090481606e-05, "loss": 0.1678, "step": 19879 }, { "epoch": 0.35458209966824816, "grad_norm": 0.2539249658584595, "learning_rate": 4.076047287406479e-05, "loss": 0.1935, "step": 19880 }, { "epoch": 0.35459993578996185, "grad_norm": 0.2112245410680771, "learning_rate": 4.075926459655493e-05, "loss": 0.1666, "step": 19881 }, { "epoch": 0.35461777191167554, "grad_norm": 0.18986941874027252, "learning_rate": 4.075805625795672e-05, "loss": 0.1631, "step": 19882 }, { "epoch": 0.3546356080333892, "grad_norm": 0.20954051613807678, "learning_rate": 4.075684785827484e-05, "loss": 0.151, "step": 19883 }, { "epoch": 0.3546534441551029, "grad_norm": 0.25232070684432983, "learning_rate": 4.075563939751398e-05, "loss": 0.1764, "step": 19884 }, { "epoch": 0.3546712802768166, "grad_norm": 0.40389907360076904, "learning_rate": 4.0754430875678815e-05, "loss": 0.2092, "step": 19885 }, { "epoch": 0.3546891163985303, "grad_norm": 0.1998995691537857, "learning_rate": 4.075322229277403e-05, "loss": 0.159, "step": 19886 }, { "epoch": 0.354706952520244, "grad_norm": 0.27081090211868286, "learning_rate": 4.075201364880432e-05, "loss": 0.1611, "step": 19887 }, { "epoch": 0.35472478864195767, "grad_norm": 0.24346719682216644, "learning_rate": 4.075080494377437e-05, "loss": 0.1462, "step": 19888 }, { "epoch": 0.3547426247636714, "grad_norm": 0.3675137758255005, "learning_rate": 4.0749596177688846e-05, "loss": 0.178, "step": 19889 }, { "epoch": 0.3547604608853851, "grad_norm": 0.17535705864429474, "learning_rate": 4.074838735055246e-05, "loss": 0.145, "step": 19890 }, { "epoch": 0.3547782970070988, "grad_norm": 0.20113764703273773, "learning_rate": 4.074717846236988e-05, "loss": 0.1344, "step": 19891 }, { "epoch": 0.3547961331288125, "grad_norm": 0.25401291251182556, "learning_rate": 4.07459695131458e-05, "loss": 0.1752, "step": 19892 }, { "epoch": 0.35481396925052616, "grad_norm": 0.2587297856807709, "learning_rate": 4.0744760502884905e-05, "loss": 0.1545, "step": 19893 }, { "epoch": 0.35483180537223985, "grad_norm": 0.2889137268066406, "learning_rate": 4.0743551431591876e-05, "loss": 0.1113, "step": 19894 }, { "epoch": 0.35484964149395354, "grad_norm": 0.2278619110584259, "learning_rate": 4.074234229927141e-05, "loss": 0.1583, "step": 19895 }, { "epoch": 0.3548674776156672, "grad_norm": 0.27764952182769775, "learning_rate": 4.074113310592818e-05, "loss": 0.1794, "step": 19896 }, { "epoch": 0.35488531373738097, "grad_norm": 0.273314893245697, "learning_rate": 4.0739923851566887e-05, "loss": 0.1686, "step": 19897 }, { "epoch": 0.35490314985909466, "grad_norm": 0.2454201579093933, "learning_rate": 4.07387145361922e-05, "loss": 0.1772, "step": 19898 }, { "epoch": 0.35492098598080835, "grad_norm": 0.2265036702156067, "learning_rate": 4.073750515980883e-05, "loss": 0.1566, "step": 19899 }, { "epoch": 0.35493882210252203, "grad_norm": 0.3226830065250397, "learning_rate": 4.0736295722421456e-05, "loss": 0.196, "step": 19900 }, { "epoch": 0.3549566582242357, "grad_norm": 0.331459641456604, "learning_rate": 4.073508622403477e-05, "loss": 0.1774, "step": 19901 }, { "epoch": 0.3549744943459494, "grad_norm": 0.3452409505844116, "learning_rate": 4.073387666465344e-05, "loss": 0.1581, "step": 19902 }, { "epoch": 0.3549923304676631, "grad_norm": 0.2745140492916107, "learning_rate": 4.073266704428218e-05, "loss": 0.1711, "step": 19903 }, { "epoch": 0.3550101665893768, "grad_norm": 0.25147125124931335, "learning_rate": 4.073145736292566e-05, "loss": 0.2093, "step": 19904 }, { "epoch": 0.3550280027110905, "grad_norm": 0.22041663527488708, "learning_rate": 4.073024762058859e-05, "loss": 0.1345, "step": 19905 }, { "epoch": 0.3550458388328042, "grad_norm": 0.23245128989219666, "learning_rate": 4.072903781727564e-05, "loss": 0.1737, "step": 19906 }, { "epoch": 0.3550636749545179, "grad_norm": 0.3767424523830414, "learning_rate": 4.072782795299151e-05, "loss": 0.1729, "step": 19907 }, { "epoch": 0.3550815110762316, "grad_norm": 0.26995816826820374, "learning_rate": 4.0726618027740885e-05, "loss": 0.1259, "step": 19908 }, { "epoch": 0.3550993471979453, "grad_norm": 0.28562813997268677, "learning_rate": 4.072540804152846e-05, "loss": 0.1351, "step": 19909 }, { "epoch": 0.35511718331965897, "grad_norm": 0.2921011447906494, "learning_rate": 4.0724197994358916e-05, "loss": 0.1426, "step": 19910 }, { "epoch": 0.35513501944137266, "grad_norm": 0.2537221610546112, "learning_rate": 4.072298788623695e-05, "loss": 0.1158, "step": 19911 }, { "epoch": 0.35515285556308634, "grad_norm": 0.3903716206550598, "learning_rate": 4.0721777717167256e-05, "loss": 0.2055, "step": 19912 }, { "epoch": 0.35517069168480003, "grad_norm": 0.2782233655452728, "learning_rate": 4.0720567487154514e-05, "loss": 0.1463, "step": 19913 }, { "epoch": 0.3551885278065138, "grad_norm": 0.24708178639411926, "learning_rate": 4.0719357196203436e-05, "loss": 0.1811, "step": 19914 }, { "epoch": 0.35520636392822746, "grad_norm": 0.2346726506948471, "learning_rate": 4.0718146844318686e-05, "loss": 0.1469, "step": 19915 }, { "epoch": 0.35522420004994115, "grad_norm": 0.28966253995895386, "learning_rate": 4.071693643150498e-05, "loss": 0.1861, "step": 19916 }, { "epoch": 0.35524203617165484, "grad_norm": 0.32770591974258423, "learning_rate": 4.071572595776699e-05, "loss": 0.175, "step": 19917 }, { "epoch": 0.35525987229336853, "grad_norm": 0.21523083746433258, "learning_rate": 4.0714515423109436e-05, "loss": 0.175, "step": 19918 }, { "epoch": 0.3552777084150822, "grad_norm": 0.2968734800815582, "learning_rate": 4.071330482753698e-05, "loss": 0.1277, "step": 19919 }, { "epoch": 0.3552955445367959, "grad_norm": 0.3430662751197815, "learning_rate": 4.071209417105433e-05, "loss": 0.1323, "step": 19920 }, { "epoch": 0.3553133806585096, "grad_norm": 0.28144368529319763, "learning_rate": 4.071088345366617e-05, "loss": 0.1258, "step": 19921 }, { "epoch": 0.35533121678022334, "grad_norm": 0.25554487109184265, "learning_rate": 4.0709672675377205e-05, "loss": 0.1633, "step": 19922 }, { "epoch": 0.355349052901937, "grad_norm": 0.21577578783035278, "learning_rate": 4.070846183619212e-05, "loss": 0.1432, "step": 19923 }, { "epoch": 0.3553668890236507, "grad_norm": 0.30410903692245483, "learning_rate": 4.070725093611562e-05, "loss": 0.171, "step": 19924 }, { "epoch": 0.3553847251453644, "grad_norm": 0.25397956371307373, "learning_rate": 4.0706039975152386e-05, "loss": 0.1538, "step": 19925 }, { "epoch": 0.3554025612670781, "grad_norm": 0.35547491908073425, "learning_rate": 4.070482895330711e-05, "loss": 0.2167, "step": 19926 }, { "epoch": 0.3554203973887918, "grad_norm": 0.2257125973701477, "learning_rate": 4.0703617870584496e-05, "loss": 0.1785, "step": 19927 }, { "epoch": 0.35543823351050546, "grad_norm": 0.22685030102729797, "learning_rate": 4.070240672698924e-05, "loss": 0.1841, "step": 19928 }, { "epoch": 0.35545606963221915, "grad_norm": 0.2880244255065918, "learning_rate": 4.070119552252603e-05, "loss": 0.1503, "step": 19929 }, { "epoch": 0.35547390575393284, "grad_norm": 0.29200857877731323, "learning_rate": 4.069998425719955e-05, "loss": 0.076, "step": 19930 }, { "epoch": 0.3554917418756466, "grad_norm": 0.27487778663635254, "learning_rate": 4.069877293101453e-05, "loss": 0.195, "step": 19931 }, { "epoch": 0.35550957799736027, "grad_norm": 0.21253696084022522, "learning_rate": 4.0697561543975626e-05, "loss": 0.1327, "step": 19932 }, { "epoch": 0.35552741411907396, "grad_norm": 0.2654271125793457, "learning_rate": 4.069635009608757e-05, "loss": 0.1566, "step": 19933 }, { "epoch": 0.35554525024078765, "grad_norm": 0.30429890751838684, "learning_rate": 4.069513858735502e-05, "loss": 0.2393, "step": 19934 }, { "epoch": 0.35556308636250133, "grad_norm": 0.21206673979759216, "learning_rate": 4.06939270177827e-05, "loss": 0.1621, "step": 19935 }, { "epoch": 0.355580922484215, "grad_norm": 0.3072971701622009, "learning_rate": 4.0692715387375304e-05, "loss": 0.1706, "step": 19936 }, { "epoch": 0.3555987586059287, "grad_norm": 0.19217704236507416, "learning_rate": 4.0691503696137514e-05, "loss": 0.1435, "step": 19937 }, { "epoch": 0.3556165947276424, "grad_norm": 0.27029523253440857, "learning_rate": 4.0690291944074044e-05, "loss": 0.1603, "step": 19938 }, { "epoch": 0.35563443084935614, "grad_norm": 0.2937868535518646, "learning_rate": 4.0689080131189576e-05, "loss": 0.1266, "step": 19939 }, { "epoch": 0.35565226697106983, "grad_norm": 0.20645561814308167, "learning_rate": 4.068786825748882e-05, "loss": 0.1669, "step": 19940 }, { "epoch": 0.3556701030927835, "grad_norm": 0.1976676732301712, "learning_rate": 4.0686656322976466e-05, "loss": 0.1657, "step": 19941 }, { "epoch": 0.3556879392144972, "grad_norm": 0.2827216386795044, "learning_rate": 4.0685444327657215e-05, "loss": 0.1525, "step": 19942 }, { "epoch": 0.3557057753362109, "grad_norm": 0.2267533242702484, "learning_rate": 4.068423227153576e-05, "loss": 0.1571, "step": 19943 }, { "epoch": 0.3557236114579246, "grad_norm": 0.3056357800960541, "learning_rate": 4.0683020154616816e-05, "loss": 0.1344, "step": 19944 }, { "epoch": 0.35574144757963827, "grad_norm": 0.2581568658351898, "learning_rate": 4.068180797690506e-05, "loss": 0.1303, "step": 19945 }, { "epoch": 0.35575928370135196, "grad_norm": 0.23742994666099548, "learning_rate": 4.068059573840519e-05, "loss": 0.1454, "step": 19946 }, { "epoch": 0.35577711982306565, "grad_norm": 0.34085613489151, "learning_rate": 4.067938343912193e-05, "loss": 0.1552, "step": 19947 }, { "epoch": 0.3557949559447794, "grad_norm": 0.23910628259181976, "learning_rate": 4.0678171079059955e-05, "loss": 0.156, "step": 19948 }, { "epoch": 0.3558127920664931, "grad_norm": 0.2141461819410324, "learning_rate": 4.0676958658223986e-05, "loss": 0.1299, "step": 19949 }, { "epoch": 0.35583062818820677, "grad_norm": 0.2811725437641144, "learning_rate": 4.06757461766187e-05, "loss": 0.2097, "step": 19950 }, { "epoch": 0.35584846430992045, "grad_norm": 0.25468626618385315, "learning_rate": 4.067453363424881e-05, "loss": 0.2093, "step": 19951 }, { "epoch": 0.35586630043163414, "grad_norm": 0.2904917895793915, "learning_rate": 4.0673321031119015e-05, "loss": 0.1651, "step": 19952 }, { "epoch": 0.35588413655334783, "grad_norm": 0.22564221918582916, "learning_rate": 4.0672108367234016e-05, "loss": 0.1017, "step": 19953 }, { "epoch": 0.3559019726750615, "grad_norm": 0.20709817111492157, "learning_rate": 4.0670895642598506e-05, "loss": 0.1493, "step": 19954 }, { "epoch": 0.3559198087967752, "grad_norm": 0.22773197293281555, "learning_rate": 4.0669682857217196e-05, "loss": 0.1825, "step": 19955 }, { "epoch": 0.35593764491848895, "grad_norm": 0.2977898418903351, "learning_rate": 4.0668470011094786e-05, "loss": 0.1459, "step": 19956 }, { "epoch": 0.35595548104020264, "grad_norm": 0.32465144991874695, "learning_rate": 4.066725710423597e-05, "loss": 0.1972, "step": 19957 }, { "epoch": 0.3559733171619163, "grad_norm": 0.32574278116226196, "learning_rate": 4.0666044136645456e-05, "loss": 0.1948, "step": 19958 }, { "epoch": 0.35599115328363, "grad_norm": 0.2731695771217346, "learning_rate": 4.066483110832794e-05, "loss": 0.1711, "step": 19959 }, { "epoch": 0.3560089894053437, "grad_norm": 0.24134424328804016, "learning_rate": 4.066361801928814e-05, "loss": 0.1473, "step": 19960 }, { "epoch": 0.3560268255270574, "grad_norm": 0.19726938009262085, "learning_rate": 4.0662404869530735e-05, "loss": 0.1283, "step": 19961 }, { "epoch": 0.3560446616487711, "grad_norm": 0.2203519195318222, "learning_rate": 4.066119165906044e-05, "loss": 0.142, "step": 19962 }, { "epoch": 0.35606249777048476, "grad_norm": 0.2862675189971924, "learning_rate": 4.065997838788196e-05, "loss": 0.1402, "step": 19963 }, { "epoch": 0.35608033389219845, "grad_norm": 0.21290041506290436, "learning_rate": 4.065876505599999e-05, "loss": 0.1184, "step": 19964 }, { "epoch": 0.3560981700139122, "grad_norm": 0.2929311692714691, "learning_rate": 4.0657551663419245e-05, "loss": 0.1221, "step": 19965 }, { "epoch": 0.3561160061356259, "grad_norm": 0.24848604202270508, "learning_rate": 4.065633821014442e-05, "loss": 0.1784, "step": 19966 }, { "epoch": 0.3561338422573396, "grad_norm": 0.31184107065200806, "learning_rate": 4.065512469618022e-05, "loss": 0.2016, "step": 19967 }, { "epoch": 0.35615167837905326, "grad_norm": 0.26397597789764404, "learning_rate": 4.065391112153135e-05, "loss": 0.1797, "step": 19968 }, { "epoch": 0.35616951450076695, "grad_norm": 0.28391069173812866, "learning_rate": 4.065269748620251e-05, "loss": 0.1687, "step": 19969 }, { "epoch": 0.35618735062248064, "grad_norm": 0.22017905116081238, "learning_rate": 4.065148379019842e-05, "loss": 0.1272, "step": 19970 }, { "epoch": 0.3562051867441943, "grad_norm": 0.26468491554260254, "learning_rate": 4.0650270033523766e-05, "loss": 0.1723, "step": 19971 }, { "epoch": 0.356223022865908, "grad_norm": 0.30175405740737915, "learning_rate": 4.064905621618325e-05, "loss": 0.1419, "step": 19972 }, { "epoch": 0.35624085898762176, "grad_norm": 0.3725954294204712, "learning_rate": 4.06478423381816e-05, "loss": 0.1907, "step": 19973 }, { "epoch": 0.35625869510933544, "grad_norm": 0.2569026052951813, "learning_rate": 4.06466283995235e-05, "loss": 0.1772, "step": 19974 }, { "epoch": 0.35627653123104913, "grad_norm": 0.33000338077545166, "learning_rate": 4.064541440021367e-05, "loss": 0.122, "step": 19975 }, { "epoch": 0.3562943673527628, "grad_norm": 0.3174501955509186, "learning_rate": 4.064420034025681e-05, "loss": 0.1406, "step": 19976 }, { "epoch": 0.3563122034744765, "grad_norm": 0.2697739601135254, "learning_rate": 4.0642986219657624e-05, "loss": 0.1761, "step": 19977 }, { "epoch": 0.3563300395961902, "grad_norm": 0.21084244549274445, "learning_rate": 4.064177203842082e-05, "loss": 0.1671, "step": 19978 }, { "epoch": 0.3563478757179039, "grad_norm": 0.2385108321905136, "learning_rate": 4.0640557796551106e-05, "loss": 0.0984, "step": 19979 }, { "epoch": 0.35636571183961757, "grad_norm": 0.31695103645324707, "learning_rate": 4.063934349405318e-05, "loss": 0.1611, "step": 19980 }, { "epoch": 0.3563835479613313, "grad_norm": 0.25559332966804504, "learning_rate": 4.063812913093177e-05, "loss": 0.1667, "step": 19981 }, { "epoch": 0.356401384083045, "grad_norm": 0.3464248776435852, "learning_rate": 4.0636914707191564e-05, "loss": 0.2051, "step": 19982 }, { "epoch": 0.3564192202047587, "grad_norm": 0.25592154264450073, "learning_rate": 4.063570022283728e-05, "loss": 0.1724, "step": 19983 }, { "epoch": 0.3564370563264724, "grad_norm": 0.27940690517425537, "learning_rate": 4.063448567787362e-05, "loss": 0.164, "step": 19984 }, { "epoch": 0.35645489244818607, "grad_norm": 0.2590286135673523, "learning_rate": 4.063327107230529e-05, "loss": 0.1617, "step": 19985 }, { "epoch": 0.35647272856989975, "grad_norm": 0.2647278308868408, "learning_rate": 4.063205640613701e-05, "loss": 0.1499, "step": 19986 }, { "epoch": 0.35649056469161344, "grad_norm": 0.2159040868282318, "learning_rate": 4.0630841679373464e-05, "loss": 0.1409, "step": 19987 }, { "epoch": 0.35650840081332713, "grad_norm": 0.2959003150463104, "learning_rate": 4.062962689201939e-05, "loss": 0.1497, "step": 19988 }, { "epoch": 0.3565262369350408, "grad_norm": 0.23684212565422058, "learning_rate": 4.062841204407948e-05, "loss": 0.1457, "step": 19989 }, { "epoch": 0.35654407305675456, "grad_norm": 0.25725317001342773, "learning_rate": 4.062719713555845e-05, "loss": 0.1547, "step": 19990 }, { "epoch": 0.35656190917846825, "grad_norm": 0.22904419898986816, "learning_rate": 4.0625982166461e-05, "loss": 0.1794, "step": 19991 }, { "epoch": 0.35657974530018194, "grad_norm": 0.5962663888931274, "learning_rate": 4.062476713679185e-05, "loss": 0.1599, "step": 19992 }, { "epoch": 0.3565975814218956, "grad_norm": 0.2700563073158264, "learning_rate": 4.0623552046555706e-05, "loss": 0.1769, "step": 19993 }, { "epoch": 0.3566154175436093, "grad_norm": 0.25697433948516846, "learning_rate": 4.062233689575728e-05, "loss": 0.1534, "step": 19994 }, { "epoch": 0.356633253665323, "grad_norm": 0.29047948122024536, "learning_rate": 4.062112168440128e-05, "loss": 0.1517, "step": 19995 }, { "epoch": 0.3566510897870367, "grad_norm": 0.3100607693195343, "learning_rate": 4.061990641249241e-05, "loss": 0.1813, "step": 19996 }, { "epoch": 0.3566689259087504, "grad_norm": 0.2511794865131378, "learning_rate": 4.0618691080035405e-05, "loss": 0.1656, "step": 19997 }, { "epoch": 0.3566867620304641, "grad_norm": 0.3291493356227875, "learning_rate": 4.061747568703494e-05, "loss": 0.1274, "step": 19998 }, { "epoch": 0.3567045981521778, "grad_norm": 0.3066295087337494, "learning_rate": 4.0616260233495755e-05, "loss": 0.167, "step": 19999 }, { "epoch": 0.3567224342738915, "grad_norm": 0.22795794904232025, "learning_rate": 4.0615044719422545e-05, "loss": 0.1347, "step": 20000 }, { "epoch": 0.3567224342738915, "eval_loss": 0.15922844409942627, "eval_runtime": 106.7988, "eval_samples_per_second": 9.588, "eval_steps_per_second": 1.601, "step": 20000 }, { "epoch": 0.3567402703956052, "grad_norm": 0.3071390688419342, "learning_rate": 4.0613829144820035e-05, "loss": 0.1581, "step": 20001 }, { "epoch": 0.3567581065173189, "grad_norm": 0.2813947796821594, "learning_rate": 4.061261350969293e-05, "loss": 0.1343, "step": 20002 }, { "epoch": 0.35677594263903256, "grad_norm": 0.36832886934280396, "learning_rate": 4.0611397814045934e-05, "loss": 0.1832, "step": 20003 }, { "epoch": 0.35679377876074625, "grad_norm": 0.27840691804885864, "learning_rate": 4.061018205788378e-05, "loss": 0.1589, "step": 20004 }, { "epoch": 0.35681161488245994, "grad_norm": 0.31088972091674805, "learning_rate": 4.060896624121117e-05, "loss": 0.2003, "step": 20005 }, { "epoch": 0.3568294510041736, "grad_norm": 0.2268286794424057, "learning_rate": 4.06077503640328e-05, "loss": 0.1719, "step": 20006 }, { "epoch": 0.35684728712588737, "grad_norm": 0.2342464178800583, "learning_rate": 4.0606534426353415e-05, "loss": 0.1701, "step": 20007 }, { "epoch": 0.35686512324760106, "grad_norm": 0.3153845965862274, "learning_rate": 4.0605318428177694e-05, "loss": 0.1449, "step": 20008 }, { "epoch": 0.35688295936931475, "grad_norm": 0.16588109731674194, "learning_rate": 4.060410236951039e-05, "loss": 0.1508, "step": 20009 }, { "epoch": 0.35690079549102843, "grad_norm": 0.23996523022651672, "learning_rate": 4.0602886250356185e-05, "loss": 0.0992, "step": 20010 }, { "epoch": 0.3569186316127421, "grad_norm": 0.2849825620651245, "learning_rate": 4.0601670070719796e-05, "loss": 0.1603, "step": 20011 }, { "epoch": 0.3569364677344558, "grad_norm": 0.2879987955093384, "learning_rate": 4.0600453830605966e-05, "loss": 0.1537, "step": 20012 }, { "epoch": 0.3569543038561695, "grad_norm": 0.24345509707927704, "learning_rate": 4.059923753001937e-05, "loss": 0.1621, "step": 20013 }, { "epoch": 0.3569721399778832, "grad_norm": 0.29284825921058655, "learning_rate": 4.059802116896475e-05, "loss": 0.1751, "step": 20014 }, { "epoch": 0.35698997609959693, "grad_norm": 0.4531479775905609, "learning_rate": 4.059680474744681e-05, "loss": 0.1114, "step": 20015 }, { "epoch": 0.3570078122213106, "grad_norm": 0.24714669585227966, "learning_rate": 4.059558826547027e-05, "loss": 0.1474, "step": 20016 }, { "epoch": 0.3570256483430243, "grad_norm": 0.2343725562095642, "learning_rate": 4.059437172303984e-05, "loss": 0.1725, "step": 20017 }, { "epoch": 0.357043484464738, "grad_norm": 0.36711928248405457, "learning_rate": 4.059315512016024e-05, "loss": 0.1752, "step": 20018 }, { "epoch": 0.3570613205864517, "grad_norm": 0.2591914236545563, "learning_rate": 4.0591938456836186e-05, "loss": 0.167, "step": 20019 }, { "epoch": 0.35707915670816537, "grad_norm": 0.2744218707084656, "learning_rate": 4.05907217330724e-05, "loss": 0.1487, "step": 20020 }, { "epoch": 0.35709699282987906, "grad_norm": 0.26663652062416077, "learning_rate": 4.058950494887358e-05, "loss": 0.1807, "step": 20021 }, { "epoch": 0.35711482895159274, "grad_norm": 0.2801266312599182, "learning_rate": 4.058828810424446e-05, "loss": 0.1885, "step": 20022 }, { "epoch": 0.3571326650733065, "grad_norm": 0.31054872274398804, "learning_rate": 4.0587071199189756e-05, "loss": 0.1532, "step": 20023 }, { "epoch": 0.3571505011950202, "grad_norm": 0.26875048875808716, "learning_rate": 4.058585423371417e-05, "loss": 0.1761, "step": 20024 }, { "epoch": 0.35716833731673386, "grad_norm": 0.23426620662212372, "learning_rate": 4.058463720782243e-05, "loss": 0.1207, "step": 20025 }, { "epoch": 0.35718617343844755, "grad_norm": 0.2521340847015381, "learning_rate": 4.058342012151926e-05, "loss": 0.1967, "step": 20026 }, { "epoch": 0.35720400956016124, "grad_norm": 0.30015817284584045, "learning_rate": 4.058220297480937e-05, "loss": 0.1402, "step": 20027 }, { "epoch": 0.3572218456818749, "grad_norm": 0.3039630949497223, "learning_rate": 4.058098576769748e-05, "loss": 0.1853, "step": 20028 }, { "epoch": 0.3572396818035886, "grad_norm": 0.21330486238002777, "learning_rate": 4.057976850018831e-05, "loss": 0.1423, "step": 20029 }, { "epoch": 0.3572575179253023, "grad_norm": 0.3427489101886749, "learning_rate": 4.057855117228657e-05, "loss": 0.2264, "step": 20030 }, { "epoch": 0.357275354047016, "grad_norm": 0.25500714778900146, "learning_rate": 4.0577333783996985e-05, "loss": 0.1883, "step": 20031 }, { "epoch": 0.35729319016872974, "grad_norm": 0.243867427110672, "learning_rate": 4.0576116335324274e-05, "loss": 0.1644, "step": 20032 }, { "epoch": 0.3573110262904434, "grad_norm": 0.2899976670742035, "learning_rate": 4.0574898826273164e-05, "loss": 0.1629, "step": 20033 }, { "epoch": 0.3573288624121571, "grad_norm": 0.2780800759792328, "learning_rate": 4.0573681256848364e-05, "loss": 0.1271, "step": 20034 }, { "epoch": 0.3573466985338708, "grad_norm": 0.3251967430114746, "learning_rate": 4.057246362705459e-05, "loss": 0.1794, "step": 20035 }, { "epoch": 0.3573645346555845, "grad_norm": 0.2088649868965149, "learning_rate": 4.0571245936896575e-05, "loss": 0.1659, "step": 20036 }, { "epoch": 0.3573823707772982, "grad_norm": 0.21452626585960388, "learning_rate": 4.0570028186379025e-05, "loss": 0.1518, "step": 20037 }, { "epoch": 0.35740020689901186, "grad_norm": 0.29758983850479126, "learning_rate": 4.056881037550668e-05, "loss": 0.1687, "step": 20038 }, { "epoch": 0.35741804302072555, "grad_norm": 0.2507551312446594, "learning_rate": 4.0567592504284236e-05, "loss": 0.1633, "step": 20039 }, { "epoch": 0.3574358791424393, "grad_norm": 0.246376171708107, "learning_rate": 4.0566374572716435e-05, "loss": 0.1494, "step": 20040 }, { "epoch": 0.357453715264153, "grad_norm": 0.25536832213401794, "learning_rate": 4.056515658080799e-05, "loss": 0.163, "step": 20041 }, { "epoch": 0.35747155138586667, "grad_norm": 0.257061243057251, "learning_rate": 4.056393852856362e-05, "loss": 0.1494, "step": 20042 }, { "epoch": 0.35748938750758036, "grad_norm": 0.26048627495765686, "learning_rate": 4.056272041598804e-05, "loss": 0.2195, "step": 20043 }, { "epoch": 0.35750722362929405, "grad_norm": 0.3398483097553253, "learning_rate": 4.0561502243085994e-05, "loss": 0.1723, "step": 20044 }, { "epoch": 0.35752505975100773, "grad_norm": 0.28536954522132874, "learning_rate": 4.056028400986218e-05, "loss": 0.1698, "step": 20045 }, { "epoch": 0.3575428958727214, "grad_norm": 0.2261572778224945, "learning_rate": 4.0559065716321344e-05, "loss": 0.1427, "step": 20046 }, { "epoch": 0.3575607319944351, "grad_norm": 0.2272304743528366, "learning_rate": 4.055784736246818e-05, "loss": 0.1619, "step": 20047 }, { "epoch": 0.3575785681161488, "grad_norm": 0.3480839729309082, "learning_rate": 4.055662894830744e-05, "loss": 0.1973, "step": 20048 }, { "epoch": 0.35759640423786254, "grad_norm": 0.3085460364818573, "learning_rate": 4.0555410473843826e-05, "loss": 0.1777, "step": 20049 }, { "epoch": 0.35761424035957623, "grad_norm": 0.23200970888137817, "learning_rate": 4.0554191939082065e-05, "loss": 0.1804, "step": 20050 }, { "epoch": 0.3576320764812899, "grad_norm": 0.24992962181568146, "learning_rate": 4.055297334402689e-05, "loss": 0.1669, "step": 20051 }, { "epoch": 0.3576499126030036, "grad_norm": 0.27971726655960083, "learning_rate": 4.055175468868301e-05, "loss": 0.1912, "step": 20052 }, { "epoch": 0.3576677487247173, "grad_norm": 0.25915294885635376, "learning_rate": 4.055053597305517e-05, "loss": 0.2176, "step": 20053 }, { "epoch": 0.357685584846431, "grad_norm": 0.24156317114830017, "learning_rate": 4.054931719714807e-05, "loss": 0.128, "step": 20054 }, { "epoch": 0.35770342096814467, "grad_norm": 0.23737764358520508, "learning_rate": 4.054809836096646e-05, "loss": 0.1269, "step": 20055 }, { "epoch": 0.35772125708985836, "grad_norm": 0.2549412250518799, "learning_rate": 4.054687946451503e-05, "loss": 0.1995, "step": 20056 }, { "epoch": 0.3577390932115721, "grad_norm": 0.21873600780963898, "learning_rate": 4.054566050779855e-05, "loss": 0.1353, "step": 20057 }, { "epoch": 0.3577569293332858, "grad_norm": 0.32668742537498474, "learning_rate": 4.05444414908217e-05, "loss": 0.2179, "step": 20058 }, { "epoch": 0.3577747654549995, "grad_norm": 0.2762688994407654, "learning_rate": 4.054322241358923e-05, "loss": 0.1623, "step": 20059 }, { "epoch": 0.35779260157671317, "grad_norm": 0.3811163306236267, "learning_rate": 4.054200327610587e-05, "loss": 0.2306, "step": 20060 }, { "epoch": 0.35781043769842685, "grad_norm": 0.22776132822036743, "learning_rate": 4.054078407837633e-05, "loss": 0.0969, "step": 20061 }, { "epoch": 0.35782827382014054, "grad_norm": 0.2461792230606079, "learning_rate": 4.0539564820405344e-05, "loss": 0.2207, "step": 20062 }, { "epoch": 0.35784610994185423, "grad_norm": 0.2780037224292755, "learning_rate": 4.0538345502197636e-05, "loss": 0.1675, "step": 20063 }, { "epoch": 0.3578639460635679, "grad_norm": 0.2432805299758911, "learning_rate": 4.0537126123757944e-05, "loss": 0.1589, "step": 20064 }, { "epoch": 0.3578817821852816, "grad_norm": 0.27001407742500305, "learning_rate": 4.053590668509098e-05, "loss": 0.1658, "step": 20065 }, { "epoch": 0.35789961830699535, "grad_norm": 0.3425554931163788, "learning_rate": 4.053468718620147e-05, "loss": 0.1818, "step": 20066 }, { "epoch": 0.35791745442870904, "grad_norm": 0.2458711713552475, "learning_rate": 4.053346762709415e-05, "loss": 0.1598, "step": 20067 }, { "epoch": 0.3579352905504227, "grad_norm": 0.2911379635334015, "learning_rate": 4.0532248007773746e-05, "loss": 0.1709, "step": 20068 }, { "epoch": 0.3579531266721364, "grad_norm": 0.28506800532341003, "learning_rate": 4.0531028328244985e-05, "loss": 0.1565, "step": 20069 }, { "epoch": 0.3579709627938501, "grad_norm": 0.2757311761379242, "learning_rate": 4.052980858851259e-05, "loss": 0.2468, "step": 20070 }, { "epoch": 0.3579887989155638, "grad_norm": 0.19196200370788574, "learning_rate": 4.0528588788581295e-05, "loss": 0.0889, "step": 20071 }, { "epoch": 0.3580066350372775, "grad_norm": 0.31583845615386963, "learning_rate": 4.0527368928455826e-05, "loss": 0.2032, "step": 20072 }, { "epoch": 0.35802447115899116, "grad_norm": 0.23932869732379913, "learning_rate": 4.0526149008140914e-05, "loss": 0.138, "step": 20073 }, { "epoch": 0.3580423072807049, "grad_norm": 0.20993123948574066, "learning_rate": 4.052492902764129e-05, "loss": 0.1554, "step": 20074 }, { "epoch": 0.3580601434024186, "grad_norm": 0.1977933794260025, "learning_rate": 4.052370898696167e-05, "loss": 0.1144, "step": 20075 }, { "epoch": 0.3580779795241323, "grad_norm": 0.2511729300022125, "learning_rate": 4.052248888610679e-05, "loss": 0.185, "step": 20076 }, { "epoch": 0.35809581564584597, "grad_norm": 0.28162476420402527, "learning_rate": 4.05212687250814e-05, "loss": 0.1705, "step": 20077 }, { "epoch": 0.35811365176755966, "grad_norm": 0.23591631650924683, "learning_rate": 4.052004850389019e-05, "loss": 0.1693, "step": 20078 }, { "epoch": 0.35813148788927335, "grad_norm": 0.28343600034713745, "learning_rate": 4.0518828222537916e-05, "loss": 0.1794, "step": 20079 }, { "epoch": 0.35814932401098704, "grad_norm": 0.2719506323337555, "learning_rate": 4.051760788102931e-05, "loss": 0.2279, "step": 20080 }, { "epoch": 0.3581671601327007, "grad_norm": 0.23200997710227966, "learning_rate": 4.05163874793691e-05, "loss": 0.1785, "step": 20081 }, { "epoch": 0.35818499625441447, "grad_norm": 0.22298255562782288, "learning_rate": 4.0515167017562006e-05, "loss": 0.1567, "step": 20082 }, { "epoch": 0.35820283237612816, "grad_norm": 0.31438174843788147, "learning_rate": 4.051394649561277e-05, "loss": 0.1597, "step": 20083 }, { "epoch": 0.35822066849784184, "grad_norm": 0.18996065855026245, "learning_rate": 4.0512725913526115e-05, "loss": 0.1371, "step": 20084 }, { "epoch": 0.35823850461955553, "grad_norm": 0.2507956326007843, "learning_rate": 4.051150527130678e-05, "loss": 0.1644, "step": 20085 }, { "epoch": 0.3582563407412692, "grad_norm": 0.25392869114875793, "learning_rate": 4.051028456895949e-05, "loss": 0.186, "step": 20086 }, { "epoch": 0.3582741768629829, "grad_norm": 0.22865475714206696, "learning_rate": 4.050906380648898e-05, "loss": 0.1422, "step": 20087 }, { "epoch": 0.3582920129846966, "grad_norm": 0.3003014624118805, "learning_rate": 4.050784298389998e-05, "loss": 0.1733, "step": 20088 }, { "epoch": 0.3583098491064103, "grad_norm": 0.3805437982082367, "learning_rate": 4.050662210119723e-05, "loss": 0.2735, "step": 20089 }, { "epoch": 0.35832768522812397, "grad_norm": 0.27107667922973633, "learning_rate": 4.050540115838546e-05, "loss": 0.2042, "step": 20090 }, { "epoch": 0.3583455213498377, "grad_norm": 0.3716125786304474, "learning_rate": 4.050418015546939e-05, "loss": 0.1865, "step": 20091 }, { "epoch": 0.3583633574715514, "grad_norm": 0.3723642826080322, "learning_rate": 4.050295909245377e-05, "loss": 0.1819, "step": 20092 }, { "epoch": 0.3583811935932651, "grad_norm": 0.29444295167922974, "learning_rate": 4.0501737969343326e-05, "loss": 0.1629, "step": 20093 }, { "epoch": 0.3583990297149788, "grad_norm": 0.27687492966651917, "learning_rate": 4.0500516786142784e-05, "loss": 0.176, "step": 20094 }, { "epoch": 0.35841686583669247, "grad_norm": 0.2261350452899933, "learning_rate": 4.0499295542856884e-05, "loss": 0.1514, "step": 20095 }, { "epoch": 0.35843470195840615, "grad_norm": 0.29730236530303955, "learning_rate": 4.0498074239490367e-05, "loss": 0.0942, "step": 20096 }, { "epoch": 0.35845253808011984, "grad_norm": 0.2816241681575775, "learning_rate": 4.049685287604796e-05, "loss": 0.1718, "step": 20097 }, { "epoch": 0.35847037420183353, "grad_norm": 0.2102278470993042, "learning_rate": 4.04956314525344e-05, "loss": 0.1874, "step": 20098 }, { "epoch": 0.3584882103235473, "grad_norm": 0.3193916380405426, "learning_rate": 4.0494409968954424e-05, "loss": 0.218, "step": 20099 }, { "epoch": 0.35850604644526096, "grad_norm": 0.2492050975561142, "learning_rate": 4.0493188425312754e-05, "loss": 0.1714, "step": 20100 }, { "epoch": 0.35852388256697465, "grad_norm": 0.23118208348751068, "learning_rate": 4.0491966821614144e-05, "loss": 0.1564, "step": 20101 }, { "epoch": 0.35854171868868834, "grad_norm": 0.28668248653411865, "learning_rate": 4.049074515786332e-05, "loss": 0.1549, "step": 20102 }, { "epoch": 0.358559554810402, "grad_norm": 0.5270244479179382, "learning_rate": 4.048952343406501e-05, "loss": 0.2129, "step": 20103 }, { "epoch": 0.3585773909321157, "grad_norm": 0.2226092368364334, "learning_rate": 4.048830165022396e-05, "loss": 0.1598, "step": 20104 }, { "epoch": 0.3585952270538294, "grad_norm": 0.28536170721054077, "learning_rate": 4.048707980634491e-05, "loss": 0.1779, "step": 20105 }, { "epoch": 0.3586130631755431, "grad_norm": 0.1809590607881546, "learning_rate": 4.0485857902432575e-05, "loss": 0.1054, "step": 20106 }, { "epoch": 0.3586308992972568, "grad_norm": 0.2945133447647095, "learning_rate": 4.048463593849172e-05, "loss": 0.1682, "step": 20107 }, { "epoch": 0.3586487354189705, "grad_norm": 0.4864062964916229, "learning_rate": 4.0483413914527055e-05, "loss": 0.1762, "step": 20108 }, { "epoch": 0.3586665715406842, "grad_norm": 0.31564784049987793, "learning_rate": 4.048219183054335e-05, "loss": 0.1481, "step": 20109 }, { "epoch": 0.3586844076623979, "grad_norm": 0.3170863389968872, "learning_rate": 4.04809696865453e-05, "loss": 0.1562, "step": 20110 }, { "epoch": 0.3587022437841116, "grad_norm": 0.275523841381073, "learning_rate": 4.0479747482537675e-05, "loss": 0.1543, "step": 20111 }, { "epoch": 0.3587200799058253, "grad_norm": 0.20725567638874054, "learning_rate": 4.047852521852521e-05, "loss": 0.1092, "step": 20112 }, { "epoch": 0.35873791602753896, "grad_norm": 0.21188224852085114, "learning_rate": 4.0477302894512625e-05, "loss": 0.1754, "step": 20113 }, { "epoch": 0.35875575214925265, "grad_norm": 0.25122159719467163, "learning_rate": 4.0476080510504666e-05, "loss": 0.228, "step": 20114 }, { "epoch": 0.35877358827096634, "grad_norm": 0.28943324089050293, "learning_rate": 4.047485806650608e-05, "loss": 0.1529, "step": 20115 }, { "epoch": 0.3587914243926801, "grad_norm": 0.2326575517654419, "learning_rate": 4.0473635562521594e-05, "loss": 0.1525, "step": 20116 }, { "epoch": 0.35880926051439377, "grad_norm": 0.2267775684595108, "learning_rate": 4.0472412998555956e-05, "loss": 0.1714, "step": 20117 }, { "epoch": 0.35882709663610746, "grad_norm": 0.2923399806022644, "learning_rate": 4.04711903746139e-05, "loss": 0.2147, "step": 20118 }, { "epoch": 0.35884493275782114, "grad_norm": 0.22854118049144745, "learning_rate": 4.046996769070017e-05, "loss": 0.1105, "step": 20119 }, { "epoch": 0.35886276887953483, "grad_norm": 0.3726526200771332, "learning_rate": 4.0468744946819495e-05, "loss": 0.1624, "step": 20120 }, { "epoch": 0.3588806050012485, "grad_norm": 0.4243506193161011, "learning_rate": 4.0467522142976626e-05, "loss": 0.1917, "step": 20121 }, { "epoch": 0.3588984411229622, "grad_norm": 0.19965283572673798, "learning_rate": 4.04662992791763e-05, "loss": 0.1407, "step": 20122 }, { "epoch": 0.3589162772446759, "grad_norm": 0.23447652161121368, "learning_rate": 4.046507635542325e-05, "loss": 0.1902, "step": 20123 }, { "epoch": 0.35893411336638964, "grad_norm": 0.26635512709617615, "learning_rate": 4.0463853371722234e-05, "loss": 0.1594, "step": 20124 }, { "epoch": 0.35895194948810333, "grad_norm": 0.23608747124671936, "learning_rate": 4.046263032807797e-05, "loss": 0.1385, "step": 20125 }, { "epoch": 0.358969785609817, "grad_norm": 0.19783037900924683, "learning_rate": 4.046140722449522e-05, "loss": 0.1389, "step": 20126 }, { "epoch": 0.3589876217315307, "grad_norm": 0.3190438449382782, "learning_rate": 4.04601840609787e-05, "loss": 0.1859, "step": 20127 }, { "epoch": 0.3590054578532444, "grad_norm": 0.22001811861991882, "learning_rate": 4.0458960837533185e-05, "loss": 0.1702, "step": 20128 }, { "epoch": 0.3590232939749581, "grad_norm": 0.2458259016275406, "learning_rate": 4.045773755416339e-05, "loss": 0.177, "step": 20129 }, { "epoch": 0.35904113009667177, "grad_norm": 0.3692992031574249, "learning_rate": 4.045651421087406e-05, "loss": 0.1903, "step": 20130 }, { "epoch": 0.35905896621838546, "grad_norm": 0.20430196821689606, "learning_rate": 4.0455290807669955e-05, "loss": 0.1625, "step": 20131 }, { "epoch": 0.35907680234009914, "grad_norm": 0.2893421947956085, "learning_rate": 4.045406734455579e-05, "loss": 0.1033, "step": 20132 }, { "epoch": 0.3590946384618129, "grad_norm": 0.2364894151687622, "learning_rate": 4.045284382153633e-05, "loss": 0.1377, "step": 20133 }, { "epoch": 0.3591124745835266, "grad_norm": 0.27595511078834534, "learning_rate": 4.0451620238616315e-05, "loss": 0.1541, "step": 20134 }, { "epoch": 0.35913031070524026, "grad_norm": 0.26502010226249695, "learning_rate": 4.045039659580048e-05, "loss": 0.115, "step": 20135 }, { "epoch": 0.35914814682695395, "grad_norm": 0.29014939069747925, "learning_rate": 4.0449172893093565e-05, "loss": 0.1608, "step": 20136 }, { "epoch": 0.35916598294866764, "grad_norm": 0.26655539870262146, "learning_rate": 4.044794913050033e-05, "loss": 0.1356, "step": 20137 }, { "epoch": 0.3591838190703813, "grad_norm": 0.24565160274505615, "learning_rate": 4.04467253080255e-05, "loss": 0.1849, "step": 20138 }, { "epoch": 0.359201655192095, "grad_norm": 0.3990464210510254, "learning_rate": 4.044550142567383e-05, "loss": 0.1777, "step": 20139 }, { "epoch": 0.3592194913138087, "grad_norm": 0.2843189537525177, "learning_rate": 4.0444277483450064e-05, "loss": 0.1697, "step": 20140 }, { "epoch": 0.35923732743552245, "grad_norm": 0.3496202826499939, "learning_rate": 4.044305348135894e-05, "loss": 0.2031, "step": 20141 }, { "epoch": 0.35925516355723613, "grad_norm": 0.22710567712783813, "learning_rate": 4.0441829419405215e-05, "loss": 0.1681, "step": 20142 }, { "epoch": 0.3592729996789498, "grad_norm": 0.37579599022865295, "learning_rate": 4.0440605297593616e-05, "loss": 0.1999, "step": 20143 }, { "epoch": 0.3592908358006635, "grad_norm": 0.2775934338569641, "learning_rate": 4.0439381115928906e-05, "loss": 0.1689, "step": 20144 }, { "epoch": 0.3593086719223772, "grad_norm": 0.21812483668327332, "learning_rate": 4.0438156874415816e-05, "loss": 0.171, "step": 20145 }, { "epoch": 0.3593265080440909, "grad_norm": 0.25060713291168213, "learning_rate": 4.0436932573059104e-05, "loss": 0.1715, "step": 20146 }, { "epoch": 0.3593443441658046, "grad_norm": 0.23388399183750153, "learning_rate": 4.04357082118635e-05, "loss": 0.1793, "step": 20147 }, { "epoch": 0.35936218028751826, "grad_norm": 0.2578822374343872, "learning_rate": 4.043448379083377e-05, "loss": 0.1115, "step": 20148 }, { "epoch": 0.35938001640923195, "grad_norm": 0.30807995796203613, "learning_rate": 4.043325930997464e-05, "loss": 0.147, "step": 20149 }, { "epoch": 0.3593978525309457, "grad_norm": 0.4385431706905365, "learning_rate": 4.0432034769290876e-05, "loss": 0.1824, "step": 20150 }, { "epoch": 0.3594156886526594, "grad_norm": 0.26442015171051025, "learning_rate": 4.043081016878721e-05, "loss": 0.1565, "step": 20151 }, { "epoch": 0.35943352477437307, "grad_norm": 0.38670381903648376, "learning_rate": 4.042958550846839e-05, "loss": 0.2768, "step": 20152 }, { "epoch": 0.35945136089608676, "grad_norm": 0.28464677929878235, "learning_rate": 4.042836078833917e-05, "loss": 0.1648, "step": 20153 }, { "epoch": 0.35946919701780045, "grad_norm": 0.28415244817733765, "learning_rate": 4.042713600840431e-05, "loss": 0.1377, "step": 20154 }, { "epoch": 0.35948703313951413, "grad_norm": 0.24842941761016846, "learning_rate": 4.042591116866853e-05, "loss": 0.1763, "step": 20155 }, { "epoch": 0.3595048692612278, "grad_norm": 0.30126526951789856, "learning_rate": 4.042468626913659e-05, "loss": 0.2284, "step": 20156 }, { "epoch": 0.3595227053829415, "grad_norm": 0.22162488102912903, "learning_rate": 4.042346130981324e-05, "loss": 0.1644, "step": 20157 }, { "epoch": 0.35954054150465525, "grad_norm": 0.1988789290189743, "learning_rate": 4.042223629070322e-05, "loss": 0.1553, "step": 20158 }, { "epoch": 0.35955837762636894, "grad_norm": 0.2553611993789673, "learning_rate": 4.042101121181129e-05, "loss": 0.1982, "step": 20159 }, { "epoch": 0.35957621374808263, "grad_norm": 0.2910846471786499, "learning_rate": 4.0419786073142193e-05, "loss": 0.1703, "step": 20160 }, { "epoch": 0.3595940498697963, "grad_norm": 0.22156549990177155, "learning_rate": 4.0418560874700686e-05, "loss": 0.1665, "step": 20161 }, { "epoch": 0.35961188599151, "grad_norm": 0.2697817087173462, "learning_rate": 4.04173356164915e-05, "loss": 0.1346, "step": 20162 }, { "epoch": 0.3596297221132237, "grad_norm": 0.23115389049053192, "learning_rate": 4.041611029851941e-05, "loss": 0.1593, "step": 20163 }, { "epoch": 0.3596475582349374, "grad_norm": 0.3282219171524048, "learning_rate": 4.041488492078914e-05, "loss": 0.2124, "step": 20164 }, { "epoch": 0.35966539435665107, "grad_norm": 0.2314584106206894, "learning_rate": 4.041365948330546e-05, "loss": 0.171, "step": 20165 }, { "epoch": 0.35968323047836476, "grad_norm": 0.270245760679245, "learning_rate": 4.041243398607311e-05, "loss": 0.1752, "step": 20166 }, { "epoch": 0.3597010666000785, "grad_norm": 0.22855812311172485, "learning_rate": 4.041120842909685e-05, "loss": 0.1658, "step": 20167 }, { "epoch": 0.3597189027217922, "grad_norm": 0.2193540781736374, "learning_rate": 4.040998281238141e-05, "loss": 0.1729, "step": 20168 }, { "epoch": 0.3597367388435059, "grad_norm": 0.21676138043403625, "learning_rate": 4.0408757135931564e-05, "loss": 0.1599, "step": 20169 }, { "epoch": 0.35975457496521956, "grad_norm": 0.30260127782821655, "learning_rate": 4.040753139975205e-05, "loss": 0.165, "step": 20170 }, { "epoch": 0.35977241108693325, "grad_norm": 0.22339145839214325, "learning_rate": 4.040630560384761e-05, "loss": 0.1815, "step": 20171 }, { "epoch": 0.35979024720864694, "grad_norm": 0.3318864405155182, "learning_rate": 4.040507974822303e-05, "loss": 0.1661, "step": 20172 }, { "epoch": 0.35980808333036063, "grad_norm": 0.21138843894004822, "learning_rate": 4.0403853832883024e-05, "loss": 0.1666, "step": 20173 }, { "epoch": 0.3598259194520743, "grad_norm": 0.24807460606098175, "learning_rate": 4.040262785783237e-05, "loss": 0.2165, "step": 20174 }, { "epoch": 0.35984375557378806, "grad_norm": 0.2565484046936035, "learning_rate": 4.0401401823075805e-05, "loss": 0.181, "step": 20175 }, { "epoch": 0.35986159169550175, "grad_norm": 0.2567596435546875, "learning_rate": 4.040017572861809e-05, "loss": 0.1857, "step": 20176 }, { "epoch": 0.35987942781721544, "grad_norm": 0.23779326677322388, "learning_rate": 4.039894957446398e-05, "loss": 0.1601, "step": 20177 }, { "epoch": 0.3598972639389291, "grad_norm": 0.2920511066913605, "learning_rate": 4.039772336061821e-05, "loss": 0.1678, "step": 20178 }, { "epoch": 0.3599151000606428, "grad_norm": 0.27880293130874634, "learning_rate": 4.039649708708555e-05, "loss": 0.2438, "step": 20179 }, { "epoch": 0.3599329361823565, "grad_norm": 0.3981066346168518, "learning_rate": 4.039527075387075e-05, "loss": 0.1534, "step": 20180 }, { "epoch": 0.3599507723040702, "grad_norm": 0.33346423506736755, "learning_rate": 4.039404436097857e-05, "loss": 0.137, "step": 20181 }, { "epoch": 0.3599686084257839, "grad_norm": 0.2764245569705963, "learning_rate": 4.039281790841375e-05, "loss": 0.1224, "step": 20182 }, { "epoch": 0.3599864445474976, "grad_norm": 0.17735832929611206, "learning_rate": 4.039159139618106e-05, "loss": 0.1388, "step": 20183 }, { "epoch": 0.3600042806692113, "grad_norm": 0.4991561472415924, "learning_rate": 4.0390364824285234e-05, "loss": 0.1564, "step": 20184 }, { "epoch": 0.360022116790925, "grad_norm": 0.3999602198600769, "learning_rate": 4.0389138192731044e-05, "loss": 0.1669, "step": 20185 }, { "epoch": 0.3600399529126387, "grad_norm": 0.20516230165958405, "learning_rate": 4.038791150152324e-05, "loss": 0.1716, "step": 20186 }, { "epoch": 0.36005778903435237, "grad_norm": 0.2424459308385849, "learning_rate": 4.038668475066657e-05, "loss": 0.1364, "step": 20187 }, { "epoch": 0.36007562515606606, "grad_norm": 0.3136383295059204, "learning_rate": 4.03854579401658e-05, "loss": 0.1081, "step": 20188 }, { "epoch": 0.36009346127777975, "grad_norm": 0.2521072328090668, "learning_rate": 4.038423107002569e-05, "loss": 0.1619, "step": 20189 }, { "epoch": 0.36011129739949344, "grad_norm": 0.28016197681427, "learning_rate": 4.038300414025098e-05, "loss": 0.2076, "step": 20190 }, { "epoch": 0.3601291335212071, "grad_norm": 0.31766772270202637, "learning_rate": 4.038177715084642e-05, "loss": 0.1259, "step": 20191 }, { "epoch": 0.36014696964292087, "grad_norm": 0.33110103011131287, "learning_rate": 4.03805501018168e-05, "loss": 0.1397, "step": 20192 }, { "epoch": 0.36016480576463455, "grad_norm": 0.2855415642261505, "learning_rate": 4.037932299316685e-05, "loss": 0.1588, "step": 20193 }, { "epoch": 0.36018264188634824, "grad_norm": 0.20734168589115143, "learning_rate": 4.0378095824901317e-05, "loss": 0.1383, "step": 20194 }, { "epoch": 0.36020047800806193, "grad_norm": 0.22390116751194, "learning_rate": 4.037686859702499e-05, "loss": 0.1511, "step": 20195 }, { "epoch": 0.3602183141297756, "grad_norm": 0.30205607414245605, "learning_rate": 4.03756413095426e-05, "loss": 0.1713, "step": 20196 }, { "epoch": 0.3602361502514893, "grad_norm": 0.30078405141830444, "learning_rate": 4.037441396245892e-05, "loss": 0.2272, "step": 20197 }, { "epoch": 0.360253986373203, "grad_norm": 0.28949621319770813, "learning_rate": 4.03731865557787e-05, "loss": 0.1742, "step": 20198 }, { "epoch": 0.3602718224949167, "grad_norm": 0.2577759325504303, "learning_rate": 4.03719590895067e-05, "loss": 0.1611, "step": 20199 }, { "epoch": 0.3602896586166304, "grad_norm": 0.26126524806022644, "learning_rate": 4.037073156364767e-05, "loss": 0.1569, "step": 20200 }, { "epoch": 0.3603074947383441, "grad_norm": 0.23403382301330566, "learning_rate": 4.036950397820638e-05, "loss": 0.1798, "step": 20201 }, { "epoch": 0.3603253308600578, "grad_norm": 0.2359326332807541, "learning_rate": 4.0368276333187585e-05, "loss": 0.1322, "step": 20202 }, { "epoch": 0.3603431669817715, "grad_norm": 0.236772358417511, "learning_rate": 4.036704862859604e-05, "loss": 0.1693, "step": 20203 }, { "epoch": 0.3603610031034852, "grad_norm": 0.2861442565917969, "learning_rate": 4.036582086443651e-05, "loss": 0.1268, "step": 20204 }, { "epoch": 0.36037883922519887, "grad_norm": 0.3536750376224518, "learning_rate": 4.036459304071375e-05, "loss": 0.2235, "step": 20205 }, { "epoch": 0.36039667534691255, "grad_norm": 0.2823757827281952, "learning_rate": 4.036336515743252e-05, "loss": 0.1731, "step": 20206 }, { "epoch": 0.36041451146862624, "grad_norm": 0.2587491273880005, "learning_rate": 4.0362137214597585e-05, "loss": 0.1733, "step": 20207 }, { "epoch": 0.36043234759033993, "grad_norm": 0.2883797287940979, "learning_rate": 4.0360909212213696e-05, "loss": 0.1425, "step": 20208 }, { "epoch": 0.3604501837120537, "grad_norm": 0.256644070148468, "learning_rate": 4.035968115028562e-05, "loss": 0.1473, "step": 20209 }, { "epoch": 0.36046801983376736, "grad_norm": 0.3776950538158417, "learning_rate": 4.035845302881811e-05, "loss": 0.1503, "step": 20210 }, { "epoch": 0.36048585595548105, "grad_norm": 0.3086390495300293, "learning_rate": 4.035722484781593e-05, "loss": 0.1937, "step": 20211 }, { "epoch": 0.36050369207719474, "grad_norm": 0.2689470052719116, "learning_rate": 4.035599660728385e-05, "loss": 0.1792, "step": 20212 }, { "epoch": 0.3605215281989084, "grad_norm": 0.25602486729621887, "learning_rate": 4.0354768307226623e-05, "loss": 0.1327, "step": 20213 }, { "epoch": 0.3605393643206221, "grad_norm": 0.22623009979724884, "learning_rate": 4.035353994764901e-05, "loss": 0.1178, "step": 20214 }, { "epoch": 0.3605572004423358, "grad_norm": 0.302060604095459, "learning_rate": 4.035231152855576e-05, "loss": 0.1669, "step": 20215 }, { "epoch": 0.3605750365640495, "grad_norm": 0.22856928408145905, "learning_rate": 4.035108304995167e-05, "loss": 0.126, "step": 20216 }, { "epoch": 0.36059287268576323, "grad_norm": 0.23531246185302734, "learning_rate": 4.034985451184147e-05, "loss": 0.1498, "step": 20217 }, { "epoch": 0.3606107088074769, "grad_norm": 0.30605563521385193, "learning_rate": 4.0348625914229925e-05, "loss": 0.2622, "step": 20218 }, { "epoch": 0.3606285449291906, "grad_norm": 0.3773329555988312, "learning_rate": 4.034739725712181e-05, "loss": 0.2051, "step": 20219 }, { "epoch": 0.3606463810509043, "grad_norm": 0.27986735105514526, "learning_rate": 4.034616854052189e-05, "loss": 0.1941, "step": 20220 }, { "epoch": 0.360664217172618, "grad_norm": 0.2917799949645996, "learning_rate": 4.034493976443491e-05, "loss": 0.2118, "step": 20221 }, { "epoch": 0.3606820532943317, "grad_norm": 0.25020676851272583, "learning_rate": 4.034371092886565e-05, "loss": 0.1457, "step": 20222 }, { "epoch": 0.36069988941604536, "grad_norm": 0.29000723361968994, "learning_rate": 4.034248203381886e-05, "loss": 0.1552, "step": 20223 }, { "epoch": 0.36071772553775905, "grad_norm": 0.22609686851501465, "learning_rate": 4.034125307929932e-05, "loss": 0.1588, "step": 20224 }, { "epoch": 0.36073556165947274, "grad_norm": 0.24163925647735596, "learning_rate": 4.034002406531178e-05, "loss": 0.1616, "step": 20225 }, { "epoch": 0.3607533977811865, "grad_norm": 0.7998350858688354, "learning_rate": 4.0338794991861e-05, "loss": 0.2186, "step": 20226 }, { "epoch": 0.36077123390290017, "grad_norm": 0.22135712206363678, "learning_rate": 4.033756585895177e-05, "loss": 0.1455, "step": 20227 }, { "epoch": 0.36078907002461386, "grad_norm": 0.30494847893714905, "learning_rate": 4.033633666658883e-05, "loss": 0.1097, "step": 20228 }, { "epoch": 0.36080690614632754, "grad_norm": 0.27180221676826477, "learning_rate": 4.033510741477694e-05, "loss": 0.1366, "step": 20229 }, { "epoch": 0.36082474226804123, "grad_norm": 0.19971726834774017, "learning_rate": 4.033387810352088e-05, "loss": 0.126, "step": 20230 }, { "epoch": 0.3608425783897549, "grad_norm": 0.24496595561504364, "learning_rate": 4.033264873282542e-05, "loss": 0.173, "step": 20231 }, { "epoch": 0.3608604145114686, "grad_norm": 0.26250410079956055, "learning_rate": 4.033141930269532e-05, "loss": 0.1656, "step": 20232 }, { "epoch": 0.3608782506331823, "grad_norm": 0.26627466082572937, "learning_rate": 4.0330189813135345e-05, "loss": 0.1947, "step": 20233 }, { "epoch": 0.36089608675489604, "grad_norm": 0.30126819014549255, "learning_rate": 4.032896026415025e-05, "loss": 0.1833, "step": 20234 }, { "epoch": 0.3609139228766097, "grad_norm": 0.22844769060611725, "learning_rate": 4.032773065574482e-05, "loss": 0.1791, "step": 20235 }, { "epoch": 0.3609317589983234, "grad_norm": 0.2675713002681732, "learning_rate": 4.03265009879238e-05, "loss": 0.2061, "step": 20236 }, { "epoch": 0.3609495951200371, "grad_norm": 0.2921369671821594, "learning_rate": 4.032527126069198e-05, "loss": 0.206, "step": 20237 }, { "epoch": 0.3609674312417508, "grad_norm": 0.24295756220817566, "learning_rate": 4.0324041474054106e-05, "loss": 0.1495, "step": 20238 }, { "epoch": 0.3609852673634645, "grad_norm": 0.27312779426574707, "learning_rate": 4.032281162801497e-05, "loss": 0.1853, "step": 20239 }, { "epoch": 0.36100310348517817, "grad_norm": 0.3118610680103302, "learning_rate": 4.03215817225793e-05, "loss": 0.1941, "step": 20240 }, { "epoch": 0.36102093960689186, "grad_norm": 0.32590848207473755, "learning_rate": 4.032035175775191e-05, "loss": 0.1419, "step": 20241 }, { "epoch": 0.3610387757286056, "grad_norm": 0.29744282364845276, "learning_rate": 4.0319121733537535e-05, "loss": 0.1357, "step": 20242 }, { "epoch": 0.3610566118503193, "grad_norm": 0.3176305890083313, "learning_rate": 4.0317891649940955e-05, "loss": 0.208, "step": 20243 }, { "epoch": 0.361074447972033, "grad_norm": 0.1861177235841751, "learning_rate": 4.031666150696693e-05, "loss": 0.1158, "step": 20244 }, { "epoch": 0.36109228409374666, "grad_norm": 0.25487369298934937, "learning_rate": 4.031543130462024e-05, "loss": 0.218, "step": 20245 }, { "epoch": 0.36111012021546035, "grad_norm": 0.27307257056236267, "learning_rate": 4.031420104290565e-05, "loss": 0.1661, "step": 20246 }, { "epoch": 0.36112795633717404, "grad_norm": 0.24589209258556366, "learning_rate": 4.031297072182793e-05, "loss": 0.1597, "step": 20247 }, { "epoch": 0.3611457924588877, "grad_norm": 0.31624308228492737, "learning_rate": 4.0311740341391844e-05, "loss": 0.1449, "step": 20248 }, { "epoch": 0.3611636285806014, "grad_norm": 0.40471410751342773, "learning_rate": 4.0310509901602155e-05, "loss": 0.147, "step": 20249 }, { "epoch": 0.3611814647023151, "grad_norm": 0.24861374497413635, "learning_rate": 4.030927940246365e-05, "loss": 0.1714, "step": 20250 }, { "epoch": 0.36119930082402885, "grad_norm": 0.31324025988578796, "learning_rate": 4.030804884398109e-05, "loss": 0.1214, "step": 20251 }, { "epoch": 0.36121713694574253, "grad_norm": 0.36588868498802185, "learning_rate": 4.030681822615925e-05, "loss": 0.2327, "step": 20252 }, { "epoch": 0.3612349730674562, "grad_norm": 0.2740863263607025, "learning_rate": 4.030558754900289e-05, "loss": 0.1548, "step": 20253 }, { "epoch": 0.3612528091891699, "grad_norm": 0.2316063642501831, "learning_rate": 4.030435681251679e-05, "loss": 0.2011, "step": 20254 }, { "epoch": 0.3612706453108836, "grad_norm": 0.2927284836769104, "learning_rate": 4.030312601670571e-05, "loss": 0.1778, "step": 20255 }, { "epoch": 0.3612884814325973, "grad_norm": 0.3773501515388489, "learning_rate": 4.030189516157443e-05, "loss": 0.1714, "step": 20256 }, { "epoch": 0.361306317554311, "grad_norm": 0.26292529702186584, "learning_rate": 4.030066424712772e-05, "loss": 0.1644, "step": 20257 }, { "epoch": 0.36132415367602466, "grad_norm": 0.27771130204200745, "learning_rate": 4.0299433273370356e-05, "loss": 0.1351, "step": 20258 }, { "epoch": 0.3613419897977384, "grad_norm": 0.2810702323913574, "learning_rate": 4.0298202240307095e-05, "loss": 0.1806, "step": 20259 }, { "epoch": 0.3613598259194521, "grad_norm": 0.24073146283626556, "learning_rate": 4.0296971147942725e-05, "loss": 0.1518, "step": 20260 }, { "epoch": 0.3613776620411658, "grad_norm": 0.2514442205429077, "learning_rate": 4.029573999628201e-05, "loss": 0.1801, "step": 20261 }, { "epoch": 0.36139549816287947, "grad_norm": 0.2692815959453583, "learning_rate": 4.029450878532973e-05, "loss": 0.1748, "step": 20262 }, { "epoch": 0.36141333428459316, "grad_norm": 0.29654932022094727, "learning_rate": 4.029327751509064e-05, "loss": 0.2166, "step": 20263 }, { "epoch": 0.36143117040630685, "grad_norm": 0.319212406873703, "learning_rate": 4.029204618556953e-05, "loss": 0.1679, "step": 20264 }, { "epoch": 0.36144900652802053, "grad_norm": 0.30208736658096313, "learning_rate": 4.029081479677117e-05, "loss": 0.1377, "step": 20265 }, { "epoch": 0.3614668426497342, "grad_norm": 0.3648304045200348, "learning_rate": 4.0289583348700325e-05, "loss": 0.1505, "step": 20266 }, { "epoch": 0.3614846787714479, "grad_norm": 0.25528931617736816, "learning_rate": 4.0288351841361775e-05, "loss": 0.1526, "step": 20267 }, { "epoch": 0.36150251489316165, "grad_norm": 0.20291338860988617, "learning_rate": 4.0287120274760294e-05, "loss": 0.1671, "step": 20268 }, { "epoch": 0.36152035101487534, "grad_norm": 0.22874824702739716, "learning_rate": 4.028588864890066e-05, "loss": 0.0885, "step": 20269 }, { "epoch": 0.36153818713658903, "grad_norm": 0.24226036667823792, "learning_rate": 4.0284656963787634e-05, "loss": 0.1432, "step": 20270 }, { "epoch": 0.3615560232583027, "grad_norm": 0.28844985365867615, "learning_rate": 4.0283425219425995e-05, "loss": 0.1908, "step": 20271 }, { "epoch": 0.3615738593800164, "grad_norm": 0.3035147190093994, "learning_rate": 4.028219341582053e-05, "loss": 0.1807, "step": 20272 }, { "epoch": 0.3615916955017301, "grad_norm": 0.3126792013645172, "learning_rate": 4.0280961552976e-05, "loss": 0.1739, "step": 20273 }, { "epoch": 0.3616095316234438, "grad_norm": 0.3192787766456604, "learning_rate": 4.0279729630897196e-05, "loss": 0.1718, "step": 20274 }, { "epoch": 0.36162736774515747, "grad_norm": 0.27367380261421204, "learning_rate": 4.027849764958887e-05, "loss": 0.0961, "step": 20275 }, { "epoch": 0.3616452038668712, "grad_norm": 0.3001756966114044, "learning_rate": 4.0277265609055814e-05, "loss": 0.1957, "step": 20276 }, { "epoch": 0.3616630399885849, "grad_norm": 0.31588807702064514, "learning_rate": 4.02760335093028e-05, "loss": 0.1361, "step": 20277 }, { "epoch": 0.3616808761102986, "grad_norm": 0.3375159502029419, "learning_rate": 4.027480135033461e-05, "loss": 0.1704, "step": 20278 }, { "epoch": 0.3616987122320123, "grad_norm": 0.3191787004470825, "learning_rate": 4.0273569132156e-05, "loss": 0.2551, "step": 20279 }, { "epoch": 0.36171654835372596, "grad_norm": 0.2709929645061493, "learning_rate": 4.0272336854771775e-05, "loss": 0.1642, "step": 20280 }, { "epoch": 0.36173438447543965, "grad_norm": 0.35836878418922424, "learning_rate": 4.027110451818669e-05, "loss": 0.1281, "step": 20281 }, { "epoch": 0.36175222059715334, "grad_norm": 0.2236219197511673, "learning_rate": 4.0269872122405526e-05, "loss": 0.1545, "step": 20282 }, { "epoch": 0.36177005671886703, "grad_norm": 0.22378231585025787, "learning_rate": 4.026863966743307e-05, "loss": 0.163, "step": 20283 }, { "epoch": 0.36178789284058077, "grad_norm": 0.23444709181785583, "learning_rate": 4.0267407153274094e-05, "loss": 0.1854, "step": 20284 }, { "epoch": 0.36180572896229446, "grad_norm": 0.43053171038627625, "learning_rate": 4.026617457993337e-05, "loss": 0.1926, "step": 20285 }, { "epoch": 0.36182356508400815, "grad_norm": 0.3429013192653656, "learning_rate": 4.026494194741568e-05, "loss": 0.1752, "step": 20286 }, { "epoch": 0.36184140120572184, "grad_norm": 0.24252557754516602, "learning_rate": 4.026370925572581e-05, "loss": 0.2251, "step": 20287 }, { "epoch": 0.3618592373274355, "grad_norm": 0.23021471500396729, "learning_rate": 4.026247650486853e-05, "loss": 0.1434, "step": 20288 }, { "epoch": 0.3618770734491492, "grad_norm": 0.281147837638855, "learning_rate": 4.0261243694848616e-05, "loss": 0.173, "step": 20289 }, { "epoch": 0.3618949095708629, "grad_norm": 0.29727640748023987, "learning_rate": 4.026001082567085e-05, "loss": 0.1578, "step": 20290 }, { "epoch": 0.3619127456925766, "grad_norm": 0.253035306930542, "learning_rate": 4.025877789734001e-05, "loss": 0.1645, "step": 20291 }, { "epoch": 0.3619305818142903, "grad_norm": 0.282619446516037, "learning_rate": 4.0257544909860877e-05, "loss": 0.1522, "step": 20292 }, { "epoch": 0.361948417936004, "grad_norm": 0.26037371158599854, "learning_rate": 4.025631186323824e-05, "loss": 0.1163, "step": 20293 }, { "epoch": 0.3619662540577177, "grad_norm": 0.24035592377185822, "learning_rate": 4.025507875747685e-05, "loss": 0.175, "step": 20294 }, { "epoch": 0.3619840901794314, "grad_norm": 0.24814194440841675, "learning_rate": 4.025384559258152e-05, "loss": 0.1837, "step": 20295 }, { "epoch": 0.3620019263011451, "grad_norm": 0.3079022765159607, "learning_rate": 4.025261236855701e-05, "loss": 0.1216, "step": 20296 }, { "epoch": 0.36201976242285877, "grad_norm": 0.24591481685638428, "learning_rate": 4.0251379085408116e-05, "loss": 0.1487, "step": 20297 }, { "epoch": 0.36203759854457246, "grad_norm": 0.35132238268852234, "learning_rate": 4.02501457431396e-05, "loss": 0.1553, "step": 20298 }, { "epoch": 0.36205543466628615, "grad_norm": 0.24435901641845703, "learning_rate": 4.024891234175625e-05, "loss": 0.127, "step": 20299 }, { "epoch": 0.36207327078799983, "grad_norm": 0.24215549230575562, "learning_rate": 4.0247678881262854e-05, "loss": 0.1524, "step": 20300 }, { "epoch": 0.3620911069097136, "grad_norm": 0.29649487137794495, "learning_rate": 4.024644536166419e-05, "loss": 0.1656, "step": 20301 }, { "epoch": 0.36210894303142727, "grad_norm": 0.30124008655548096, "learning_rate": 4.024521178296503e-05, "loss": 0.1796, "step": 20302 }, { "epoch": 0.36212677915314095, "grad_norm": 0.307167649269104, "learning_rate": 4.024397814517017e-05, "loss": 0.155, "step": 20303 }, { "epoch": 0.36214461527485464, "grad_norm": 0.2808796167373657, "learning_rate": 4.024274444828439e-05, "loss": 0.1285, "step": 20304 }, { "epoch": 0.36216245139656833, "grad_norm": 0.43239811062812805, "learning_rate": 4.024151069231246e-05, "loss": 0.1693, "step": 20305 }, { "epoch": 0.362180287518282, "grad_norm": 0.2988102436065674, "learning_rate": 4.024027687725917e-05, "loss": 0.2092, "step": 20306 }, { "epoch": 0.3621981236399957, "grad_norm": 0.2658236622810364, "learning_rate": 4.02390430031293e-05, "loss": 0.1795, "step": 20307 }, { "epoch": 0.3622159597617094, "grad_norm": 0.2768888771533966, "learning_rate": 4.0237809069927646e-05, "loss": 0.1641, "step": 20308 }, { "epoch": 0.3622337958834231, "grad_norm": 0.36782771348953247, "learning_rate": 4.0236575077658974e-05, "loss": 0.2209, "step": 20309 }, { "epoch": 0.3622516320051368, "grad_norm": 0.2993672788143158, "learning_rate": 4.023534102632808e-05, "loss": 0.2264, "step": 20310 }, { "epoch": 0.3622694681268505, "grad_norm": 0.2562003433704376, "learning_rate": 4.023410691593973e-05, "loss": 0.1386, "step": 20311 }, { "epoch": 0.3622873042485642, "grad_norm": 0.23674288392066956, "learning_rate": 4.023287274649873e-05, "loss": 0.1853, "step": 20312 }, { "epoch": 0.3623051403702779, "grad_norm": 0.3505714237689972, "learning_rate": 4.0231638518009857e-05, "loss": 0.2693, "step": 20313 }, { "epoch": 0.3623229764919916, "grad_norm": 0.3001098930835724, "learning_rate": 4.0230404230477886e-05, "loss": 0.1457, "step": 20314 }, { "epoch": 0.36234081261370527, "grad_norm": 0.21215714514255524, "learning_rate": 4.022916988390761e-05, "loss": 0.1778, "step": 20315 }, { "epoch": 0.36235864873541895, "grad_norm": 0.3627565801143646, "learning_rate": 4.0227935478303815e-05, "loss": 0.1737, "step": 20316 }, { "epoch": 0.36237648485713264, "grad_norm": 0.25186824798583984, "learning_rate": 4.0226701013671276e-05, "loss": 0.2377, "step": 20317 }, { "epoch": 0.3623943209788464, "grad_norm": 0.2834342420101166, "learning_rate": 4.0225466490014784e-05, "loss": 0.1475, "step": 20318 }, { "epoch": 0.3624121571005601, "grad_norm": 0.35382670164108276, "learning_rate": 4.022423190733913e-05, "loss": 0.1641, "step": 20319 }, { "epoch": 0.36242999322227376, "grad_norm": 0.2001914381980896, "learning_rate": 4.022299726564909e-05, "loss": 0.137, "step": 20320 }, { "epoch": 0.36244782934398745, "grad_norm": 0.36918172240257263, "learning_rate": 4.022176256494946e-05, "loss": 0.1722, "step": 20321 }, { "epoch": 0.36246566546570114, "grad_norm": 0.22326025366783142, "learning_rate": 4.0220527805245023e-05, "loss": 0.1783, "step": 20322 }, { "epoch": 0.3624835015874148, "grad_norm": 0.24765141308307648, "learning_rate": 4.0219292986540555e-05, "loss": 0.1892, "step": 20323 }, { "epoch": 0.3625013377091285, "grad_norm": 0.3298339247703552, "learning_rate": 4.021805810884085e-05, "loss": 0.1635, "step": 20324 }, { "epoch": 0.3625191738308422, "grad_norm": 0.19307364523410797, "learning_rate": 4.0216823172150706e-05, "loss": 0.1191, "step": 20325 }, { "epoch": 0.3625370099525559, "grad_norm": 0.29228341579437256, "learning_rate": 4.021558817647489e-05, "loss": 0.2044, "step": 20326 }, { "epoch": 0.36255484607426963, "grad_norm": 0.2923954725265503, "learning_rate": 4.02143531218182e-05, "loss": 0.121, "step": 20327 }, { "epoch": 0.3625726821959833, "grad_norm": 0.2576627731323242, "learning_rate": 4.0213118008185434e-05, "loss": 0.1339, "step": 20328 }, { "epoch": 0.362590518317697, "grad_norm": 0.33647146821022034, "learning_rate": 4.021188283558136e-05, "loss": 0.1723, "step": 20329 }, { "epoch": 0.3626083544394107, "grad_norm": 0.2549893856048584, "learning_rate": 4.021064760401078e-05, "loss": 0.1728, "step": 20330 }, { "epoch": 0.3626261905611244, "grad_norm": 0.2981374263763428, "learning_rate": 4.020941231347846e-05, "loss": 0.1769, "step": 20331 }, { "epoch": 0.36264402668283807, "grad_norm": 0.3461923599243164, "learning_rate": 4.020817696398922e-05, "loss": 0.1447, "step": 20332 }, { "epoch": 0.36266186280455176, "grad_norm": 0.395556777715683, "learning_rate": 4.020694155554783e-05, "loss": 0.0792, "step": 20333 }, { "epoch": 0.36267969892626545, "grad_norm": 0.3223714232444763, "learning_rate": 4.020570608815908e-05, "loss": 0.1297, "step": 20334 }, { "epoch": 0.3626975350479792, "grad_norm": 0.30979812145233154, "learning_rate": 4.0204470561827754e-05, "loss": 0.1487, "step": 20335 }, { "epoch": 0.3627153711696929, "grad_norm": 0.2697942554950714, "learning_rate": 4.020323497655866e-05, "loss": 0.142, "step": 20336 }, { "epoch": 0.36273320729140657, "grad_norm": 0.2825428545475006, "learning_rate": 4.020199933235657e-05, "loss": 0.1931, "step": 20337 }, { "epoch": 0.36275104341312026, "grad_norm": 0.3282732665538788, "learning_rate": 4.020076362922629e-05, "loss": 0.1224, "step": 20338 }, { "epoch": 0.36276887953483394, "grad_norm": 0.21660448610782623, "learning_rate": 4.019952786717259e-05, "loss": 0.1391, "step": 20339 }, { "epoch": 0.36278671565654763, "grad_norm": 0.25640738010406494, "learning_rate": 4.019829204620027e-05, "loss": 0.1953, "step": 20340 }, { "epoch": 0.3628045517782613, "grad_norm": 0.2200232595205307, "learning_rate": 4.019705616631413e-05, "loss": 0.1011, "step": 20341 }, { "epoch": 0.362822387899975, "grad_norm": 0.3159371614456177, "learning_rate": 4.0195820227518945e-05, "loss": 0.1801, "step": 20342 }, { "epoch": 0.36284022402168875, "grad_norm": 0.21758824586868286, "learning_rate": 4.019458422981951e-05, "loss": 0.1646, "step": 20343 }, { "epoch": 0.36285806014340244, "grad_norm": 0.3435939848423004, "learning_rate": 4.019334817322062e-05, "loss": 0.1404, "step": 20344 }, { "epoch": 0.3628758962651161, "grad_norm": 0.35938042402267456, "learning_rate": 4.019211205772707e-05, "loss": 0.1781, "step": 20345 }, { "epoch": 0.3628937323868298, "grad_norm": 0.3149055540561676, "learning_rate": 4.019087588334364e-05, "loss": 0.1981, "step": 20346 }, { "epoch": 0.3629115685085435, "grad_norm": 0.31614789366722107, "learning_rate": 4.0189639650075126e-05, "loss": 0.2255, "step": 20347 }, { "epoch": 0.3629294046302572, "grad_norm": 0.20064190030097961, "learning_rate": 4.018840335792633e-05, "loss": 0.1418, "step": 20348 }, { "epoch": 0.3629472407519709, "grad_norm": 0.23846852779388428, "learning_rate": 4.0187167006902035e-05, "loss": 0.1162, "step": 20349 }, { "epoch": 0.36296507687368457, "grad_norm": 0.3052425682544708, "learning_rate": 4.018593059700703e-05, "loss": 0.1585, "step": 20350 }, { "epoch": 0.36298291299539825, "grad_norm": 0.24612314999103546, "learning_rate": 4.018469412824611e-05, "loss": 0.1711, "step": 20351 }, { "epoch": 0.363000749117112, "grad_norm": 0.36478111147880554, "learning_rate": 4.0183457600624085e-05, "loss": 0.1523, "step": 20352 }, { "epoch": 0.3630185852388257, "grad_norm": 0.33922725915908813, "learning_rate": 4.018222101414573e-05, "loss": 0.1839, "step": 20353 }, { "epoch": 0.3630364213605394, "grad_norm": 0.22540529072284698, "learning_rate": 4.0180984368815835e-05, "loss": 0.1638, "step": 20354 }, { "epoch": 0.36305425748225306, "grad_norm": 0.35284557938575745, "learning_rate": 4.017974766463921e-05, "loss": 0.2088, "step": 20355 }, { "epoch": 0.36307209360396675, "grad_norm": 0.25336530804634094, "learning_rate": 4.017851090162064e-05, "loss": 0.1721, "step": 20356 }, { "epoch": 0.36308992972568044, "grad_norm": 0.24890448153018951, "learning_rate": 4.0177274079764904e-05, "loss": 0.1728, "step": 20357 }, { "epoch": 0.3631077658473941, "grad_norm": 0.26287776231765747, "learning_rate": 4.017603719907683e-05, "loss": 0.2118, "step": 20358 }, { "epoch": 0.3631256019691078, "grad_norm": 0.31241151690483093, "learning_rate": 4.0174800259561185e-05, "loss": 0.1209, "step": 20359 }, { "epoch": 0.36314343809082156, "grad_norm": 0.35665857791900635, "learning_rate": 4.017356326122277e-05, "loss": 0.1798, "step": 20360 }, { "epoch": 0.36316127421253525, "grad_norm": 0.24914267659187317, "learning_rate": 4.017232620406639e-05, "loss": 0.1464, "step": 20361 }, { "epoch": 0.36317911033424893, "grad_norm": 0.2625211179256439, "learning_rate": 4.017108908809683e-05, "loss": 0.1495, "step": 20362 }, { "epoch": 0.3631969464559626, "grad_norm": 0.3509328067302704, "learning_rate": 4.016985191331889e-05, "loss": 0.1097, "step": 20363 }, { "epoch": 0.3632147825776763, "grad_norm": 0.357641339302063, "learning_rate": 4.0168614679737366e-05, "loss": 0.1696, "step": 20364 }, { "epoch": 0.36323261869939, "grad_norm": 0.2892305850982666, "learning_rate": 4.016737738735705e-05, "loss": 0.1297, "step": 20365 }, { "epoch": 0.3632504548211037, "grad_norm": 0.31118863821029663, "learning_rate": 4.0166140036182745e-05, "loss": 0.1358, "step": 20366 }, { "epoch": 0.3632682909428174, "grad_norm": 0.29650670289993286, "learning_rate": 4.0164902626219235e-05, "loss": 0.2323, "step": 20367 }, { "epoch": 0.36328612706453106, "grad_norm": 0.3526393473148346, "learning_rate": 4.0163665157471333e-05, "loss": 0.1311, "step": 20368 }, { "epoch": 0.3633039631862448, "grad_norm": 0.3056434094905853, "learning_rate": 4.0162427629943825e-05, "loss": 0.1389, "step": 20369 }, { "epoch": 0.3633217993079585, "grad_norm": 0.26603442430496216, "learning_rate": 4.0161190043641506e-05, "loss": 0.1858, "step": 20370 }, { "epoch": 0.3633396354296722, "grad_norm": 0.2744593322277069, "learning_rate": 4.0159952398569175e-05, "loss": 0.1727, "step": 20371 }, { "epoch": 0.36335747155138587, "grad_norm": 0.2747959792613983, "learning_rate": 4.0158714694731636e-05, "loss": 0.173, "step": 20372 }, { "epoch": 0.36337530767309956, "grad_norm": 0.27869388461112976, "learning_rate": 4.0157476932133694e-05, "loss": 0.1481, "step": 20373 }, { "epoch": 0.36339314379481324, "grad_norm": 0.30759885907173157, "learning_rate": 4.0156239110780126e-05, "loss": 0.1388, "step": 20374 }, { "epoch": 0.36341097991652693, "grad_norm": 0.3121108412742615, "learning_rate": 4.0155001230675735e-05, "loss": 0.1392, "step": 20375 }, { "epoch": 0.3634288160382406, "grad_norm": 0.38811028003692627, "learning_rate": 4.0153763291825334e-05, "loss": 0.1844, "step": 20376 }, { "epoch": 0.36344665215995436, "grad_norm": 0.341267853975296, "learning_rate": 4.0152525294233714e-05, "loss": 0.1326, "step": 20377 }, { "epoch": 0.36346448828166805, "grad_norm": 0.29311496019363403, "learning_rate": 4.015128723790567e-05, "loss": 0.1649, "step": 20378 }, { "epoch": 0.36348232440338174, "grad_norm": 0.22935625910758972, "learning_rate": 4.0150049122846e-05, "loss": 0.1547, "step": 20379 }, { "epoch": 0.36350016052509543, "grad_norm": 0.2277718335390091, "learning_rate": 4.0148810949059514e-05, "loss": 0.1768, "step": 20380 }, { "epoch": 0.3635179966468091, "grad_norm": 0.2998082935810089, "learning_rate": 4.0147572716550996e-05, "loss": 0.1387, "step": 20381 }, { "epoch": 0.3635358327685228, "grad_norm": 0.23907089233398438, "learning_rate": 4.014633442532526e-05, "loss": 0.1668, "step": 20382 }, { "epoch": 0.3635536688902365, "grad_norm": 0.38662102818489075, "learning_rate": 4.01450960753871e-05, "loss": 0.238, "step": 20383 }, { "epoch": 0.3635715050119502, "grad_norm": 0.19294650852680206, "learning_rate": 4.0143857666741316e-05, "loss": 0.1221, "step": 20384 }, { "epoch": 0.3635893411336639, "grad_norm": 0.2246742993593216, "learning_rate": 4.0142619199392704e-05, "loss": 0.1269, "step": 20385 }, { "epoch": 0.3636071772553776, "grad_norm": 0.2402191460132599, "learning_rate": 4.014138067334608e-05, "loss": 0.2034, "step": 20386 }, { "epoch": 0.3636250133770913, "grad_norm": 0.195674329996109, "learning_rate": 4.0140142088606226e-05, "loss": 0.1176, "step": 20387 }, { "epoch": 0.363642849498805, "grad_norm": 0.288394570350647, "learning_rate": 4.0138903445177957e-05, "loss": 0.1603, "step": 20388 }, { "epoch": 0.3636606856205187, "grad_norm": 0.2395041286945343, "learning_rate": 4.013766474306606e-05, "loss": 0.1783, "step": 20389 }, { "epoch": 0.36367852174223236, "grad_norm": 0.32888275384902954, "learning_rate": 4.013642598227536e-05, "loss": 0.1456, "step": 20390 }, { "epoch": 0.36369635786394605, "grad_norm": 0.2046915739774704, "learning_rate": 4.013518716281064e-05, "loss": 0.1478, "step": 20391 }, { "epoch": 0.36371419398565974, "grad_norm": 0.30634281039237976, "learning_rate": 4.0133948284676705e-05, "loss": 0.1485, "step": 20392 }, { "epoch": 0.3637320301073734, "grad_norm": 0.2923586070537567, "learning_rate": 4.0132709347878363e-05, "loss": 0.1428, "step": 20393 }, { "epoch": 0.36374986622908717, "grad_norm": 0.34686318039894104, "learning_rate": 4.013147035242041e-05, "loss": 0.1343, "step": 20394 }, { "epoch": 0.36376770235080086, "grad_norm": 0.26981616020202637, "learning_rate": 4.013023129830765e-05, "loss": 0.1499, "step": 20395 }, { "epoch": 0.36378553847251455, "grad_norm": 0.24971304833889008, "learning_rate": 4.012899218554489e-05, "loss": 0.1444, "step": 20396 }, { "epoch": 0.36380337459422823, "grad_norm": 0.27691808342933655, "learning_rate": 4.012775301413693e-05, "loss": 0.1365, "step": 20397 }, { "epoch": 0.3638212107159419, "grad_norm": 0.20154893398284912, "learning_rate": 4.012651378408857e-05, "loss": 0.1261, "step": 20398 }, { "epoch": 0.3638390468376556, "grad_norm": 0.2605063319206238, "learning_rate": 4.012527449540463e-05, "loss": 0.1802, "step": 20399 }, { "epoch": 0.3638568829593693, "grad_norm": 0.21625597774982452, "learning_rate": 4.012403514808989e-05, "loss": 0.1858, "step": 20400 }, { "epoch": 0.363874719081083, "grad_norm": 0.21393819153308868, "learning_rate": 4.0122795742149175e-05, "loss": 0.1864, "step": 20401 }, { "epoch": 0.36389255520279673, "grad_norm": 0.3559608459472656, "learning_rate": 4.012155627758727e-05, "loss": 0.1713, "step": 20402 }, { "epoch": 0.3639103913245104, "grad_norm": 0.21698346734046936, "learning_rate": 4.0120316754409e-05, "loss": 0.1382, "step": 20403 }, { "epoch": 0.3639282274462241, "grad_norm": 0.304470032453537, "learning_rate": 4.011907717261916e-05, "loss": 0.1355, "step": 20404 }, { "epoch": 0.3639460635679378, "grad_norm": 0.24674120545387268, "learning_rate": 4.0117837532222546e-05, "loss": 0.2016, "step": 20405 }, { "epoch": 0.3639638996896515, "grad_norm": 0.24448737502098083, "learning_rate": 4.011659783322398e-05, "loss": 0.1609, "step": 20406 }, { "epoch": 0.36398173581136517, "grad_norm": 0.3851648271083832, "learning_rate": 4.011535807562825e-05, "loss": 0.155, "step": 20407 }, { "epoch": 0.36399957193307886, "grad_norm": 0.23602813482284546, "learning_rate": 4.011411825944018e-05, "loss": 0.1311, "step": 20408 }, { "epoch": 0.36401740805479255, "grad_norm": 0.2662386894226074, "learning_rate": 4.011287838466456e-05, "loss": 0.1647, "step": 20409 }, { "epoch": 0.36403524417650623, "grad_norm": 0.3345221281051636, "learning_rate": 4.011163845130622e-05, "loss": 0.1744, "step": 20410 }, { "epoch": 0.36405308029822, "grad_norm": 0.4055943787097931, "learning_rate": 4.011039845936994e-05, "loss": 0.1456, "step": 20411 }, { "epoch": 0.36407091641993367, "grad_norm": 0.3128933906555176, "learning_rate": 4.010915840886054e-05, "loss": 0.1529, "step": 20412 }, { "epoch": 0.36408875254164735, "grad_norm": 0.2648877203464508, "learning_rate": 4.010791829978281e-05, "loss": 0.1633, "step": 20413 }, { "epoch": 0.36410658866336104, "grad_norm": 0.2558538019657135, "learning_rate": 4.0106678132141585e-05, "loss": 0.1817, "step": 20414 }, { "epoch": 0.36412442478507473, "grad_norm": 0.30200332403182983, "learning_rate": 4.010543790594165e-05, "loss": 0.1424, "step": 20415 }, { "epoch": 0.3641422609067884, "grad_norm": 0.3777584731578827, "learning_rate": 4.010419762118782e-05, "loss": 0.1664, "step": 20416 }, { "epoch": 0.3641600970285021, "grad_norm": 0.27539873123168945, "learning_rate": 4.0102957277884914e-05, "loss": 0.1584, "step": 20417 }, { "epoch": 0.3641779331502158, "grad_norm": 0.24165771901607513, "learning_rate": 4.010171687603772e-05, "loss": 0.144, "step": 20418 }, { "epoch": 0.36419576927192954, "grad_norm": 0.24389661848545074, "learning_rate": 4.0100476415651055e-05, "loss": 0.174, "step": 20419 }, { "epoch": 0.3642136053936432, "grad_norm": 0.27097684144973755, "learning_rate": 4.0099235896729725e-05, "loss": 0.1907, "step": 20420 }, { "epoch": 0.3642314415153569, "grad_norm": 0.24715308845043182, "learning_rate": 4.0097995319278554e-05, "loss": 0.1253, "step": 20421 }, { "epoch": 0.3642492776370706, "grad_norm": 0.2176176756620407, "learning_rate": 4.009675468330233e-05, "loss": 0.1783, "step": 20422 }, { "epoch": 0.3642671137587843, "grad_norm": 0.29457953572273254, "learning_rate": 4.0095513988805864e-05, "loss": 0.2018, "step": 20423 }, { "epoch": 0.364284949880498, "grad_norm": 0.26480454206466675, "learning_rate": 4.009427323579398e-05, "loss": 0.131, "step": 20424 }, { "epoch": 0.36430278600221166, "grad_norm": 0.3660164773464203, "learning_rate": 4.009303242427148e-05, "loss": 0.165, "step": 20425 }, { "epoch": 0.36432062212392535, "grad_norm": 0.1991516649723053, "learning_rate": 4.009179155424317e-05, "loss": 0.1527, "step": 20426 }, { "epoch": 0.36433845824563904, "grad_norm": 0.221453458070755, "learning_rate": 4.009055062571387e-05, "loss": 0.1286, "step": 20427 }, { "epoch": 0.3643562943673528, "grad_norm": 0.23807577788829803, "learning_rate": 4.0089309638688376e-05, "loss": 0.1592, "step": 20428 }, { "epoch": 0.3643741304890665, "grad_norm": 0.3022037744522095, "learning_rate": 4.0088068593171514e-05, "loss": 0.1593, "step": 20429 }, { "epoch": 0.36439196661078016, "grad_norm": 0.33922526240348816, "learning_rate": 4.008682748916809e-05, "loss": 0.1779, "step": 20430 }, { "epoch": 0.36440980273249385, "grad_norm": 0.1978517770767212, "learning_rate": 4.00855863266829e-05, "loss": 0.1254, "step": 20431 }, { "epoch": 0.36442763885420754, "grad_norm": 0.24193094670772552, "learning_rate": 4.008434510572077e-05, "loss": 0.1537, "step": 20432 }, { "epoch": 0.3644454749759212, "grad_norm": 0.22125987708568573, "learning_rate": 4.0083103826286506e-05, "loss": 0.1665, "step": 20433 }, { "epoch": 0.3644633110976349, "grad_norm": 0.2807343304157257, "learning_rate": 4.008186248838493e-05, "loss": 0.1668, "step": 20434 }, { "epoch": 0.3644811472193486, "grad_norm": 0.23143427073955536, "learning_rate": 4.008062109202084e-05, "loss": 0.152, "step": 20435 }, { "epoch": 0.36449898334106234, "grad_norm": 0.23137961328029633, "learning_rate": 4.007937963719906e-05, "loss": 0.1703, "step": 20436 }, { "epoch": 0.36451681946277603, "grad_norm": 0.3320882022380829, "learning_rate": 4.0078138123924385e-05, "loss": 0.1682, "step": 20437 }, { "epoch": 0.3645346555844897, "grad_norm": 0.2713082730770111, "learning_rate": 4.007689655220165e-05, "loss": 0.1856, "step": 20438 }, { "epoch": 0.3645524917062034, "grad_norm": 0.31148722767829895, "learning_rate": 4.007565492203565e-05, "loss": 0.2232, "step": 20439 }, { "epoch": 0.3645703278279171, "grad_norm": 0.2416645735502243, "learning_rate": 4.007441323343121e-05, "loss": 0.1566, "step": 20440 }, { "epoch": 0.3645881639496308, "grad_norm": 0.26091521978378296, "learning_rate": 4.007317148639313e-05, "loss": 0.1528, "step": 20441 }, { "epoch": 0.36460600007134447, "grad_norm": 0.5213764309883118, "learning_rate": 4.007192968092623e-05, "loss": 0.1449, "step": 20442 }, { "epoch": 0.36462383619305816, "grad_norm": 0.31789395213127136, "learning_rate": 4.0070687817035337e-05, "loss": 0.1753, "step": 20443 }, { "epoch": 0.3646416723147719, "grad_norm": 0.5056061148643494, "learning_rate": 4.006944589472524e-05, "loss": 0.1754, "step": 20444 }, { "epoch": 0.3646595084364856, "grad_norm": 0.2900460660457611, "learning_rate": 4.006820391400078e-05, "loss": 0.1443, "step": 20445 }, { "epoch": 0.3646773445581993, "grad_norm": 0.34964457154273987, "learning_rate": 4.006696187486675e-05, "loss": 0.2074, "step": 20446 }, { "epoch": 0.36469518067991297, "grad_norm": 0.32497259974479675, "learning_rate": 4.006571977732797e-05, "loss": 0.173, "step": 20447 }, { "epoch": 0.36471301680162665, "grad_norm": 0.3151305615901947, "learning_rate": 4.006447762138926e-05, "loss": 0.1569, "step": 20448 }, { "epoch": 0.36473085292334034, "grad_norm": 0.23915310204029083, "learning_rate": 4.0063235407055434e-05, "loss": 0.184, "step": 20449 }, { "epoch": 0.36474868904505403, "grad_norm": 0.29487887024879456, "learning_rate": 4.00619931343313e-05, "loss": 0.147, "step": 20450 }, { "epoch": 0.3647665251667677, "grad_norm": 0.18383778631687164, "learning_rate": 4.006075080322168e-05, "loss": 0.1426, "step": 20451 }, { "epoch": 0.3647843612884814, "grad_norm": 0.4039187431335449, "learning_rate": 4.0059508413731387e-05, "loss": 0.1412, "step": 20452 }, { "epoch": 0.36480219741019515, "grad_norm": 0.3977547287940979, "learning_rate": 4.005826596586523e-05, "loss": 0.2561, "step": 20453 }, { "epoch": 0.36482003353190884, "grad_norm": 0.2797873914241791, "learning_rate": 4.005702345962804e-05, "loss": 0.1562, "step": 20454 }, { "epoch": 0.3648378696536225, "grad_norm": 0.28851714730262756, "learning_rate": 4.005578089502463e-05, "loss": 0.125, "step": 20455 }, { "epoch": 0.3648557057753362, "grad_norm": 0.27626490592956543, "learning_rate": 4.005453827205981e-05, "loss": 0.1477, "step": 20456 }, { "epoch": 0.3648735418970499, "grad_norm": 0.22205011546611786, "learning_rate": 4.005329559073841e-05, "loss": 0.1648, "step": 20457 }, { "epoch": 0.3648913780187636, "grad_norm": 0.1904202550649643, "learning_rate": 4.005205285106522e-05, "loss": 0.1459, "step": 20458 }, { "epoch": 0.3649092141404773, "grad_norm": 0.2292073518037796, "learning_rate": 4.0050810053045086e-05, "loss": 0.1682, "step": 20459 }, { "epoch": 0.36492705026219097, "grad_norm": 0.29050785303115845, "learning_rate": 4.004956719668281e-05, "loss": 0.1577, "step": 20460 }, { "epoch": 0.3649448863839047, "grad_norm": 0.22272981703281403, "learning_rate": 4.004832428198321e-05, "loss": 0.1855, "step": 20461 }, { "epoch": 0.3649627225056184, "grad_norm": 0.2937490940093994, "learning_rate": 4.004708130895111e-05, "loss": 0.1733, "step": 20462 }, { "epoch": 0.3649805586273321, "grad_norm": 0.3048675060272217, "learning_rate": 4.004583827759133e-05, "loss": 0.1445, "step": 20463 }, { "epoch": 0.3649983947490458, "grad_norm": 0.24152550101280212, "learning_rate": 4.004459518790868e-05, "loss": 0.1408, "step": 20464 }, { "epoch": 0.36501623087075946, "grad_norm": 0.3479171693325043, "learning_rate": 4.004335203990798e-05, "loss": 0.1519, "step": 20465 }, { "epoch": 0.36503406699247315, "grad_norm": 0.26340603828430176, "learning_rate": 4.004210883359406e-05, "loss": 0.2036, "step": 20466 }, { "epoch": 0.36505190311418684, "grad_norm": 0.2923718988895416, "learning_rate": 4.0040865568971725e-05, "loss": 0.1447, "step": 20467 }, { "epoch": 0.3650697392359005, "grad_norm": 0.32460954785346985, "learning_rate": 4.003962224604581e-05, "loss": 0.1481, "step": 20468 }, { "epoch": 0.3650875753576142, "grad_norm": 0.22123803198337555, "learning_rate": 4.0038378864821106e-05, "loss": 0.1284, "step": 20469 }, { "epoch": 0.36510541147932796, "grad_norm": 0.3040921986103058, "learning_rate": 4.0037135425302465e-05, "loss": 0.18, "step": 20470 }, { "epoch": 0.36512324760104165, "grad_norm": 0.2259899377822876, "learning_rate": 4.003589192749469e-05, "loss": 0.0989, "step": 20471 }, { "epoch": 0.36514108372275533, "grad_norm": 0.4131251871585846, "learning_rate": 4.0034648371402605e-05, "loss": 0.1749, "step": 20472 }, { "epoch": 0.365158919844469, "grad_norm": 0.19247211515903473, "learning_rate": 4.0033404757031034e-05, "loss": 0.1363, "step": 20473 }, { "epoch": 0.3651767559661827, "grad_norm": 0.21143482625484467, "learning_rate": 4.003216108438478e-05, "loss": 0.1368, "step": 20474 }, { "epoch": 0.3651945920878964, "grad_norm": 0.24007849395275116, "learning_rate": 4.003091735346869e-05, "loss": 0.1372, "step": 20475 }, { "epoch": 0.3652124282096101, "grad_norm": 0.2866557538509369, "learning_rate": 4.0029673564287576e-05, "loss": 0.116, "step": 20476 }, { "epoch": 0.3652302643313238, "grad_norm": 0.3185955286026001, "learning_rate": 4.002842971684625e-05, "loss": 0.1878, "step": 20477 }, { "epoch": 0.3652481004530375, "grad_norm": 0.29474368691444397, "learning_rate": 4.0027185811149536e-05, "loss": 0.2068, "step": 20478 }, { "epoch": 0.3652659365747512, "grad_norm": 0.2426108866930008, "learning_rate": 4.0025941847202264e-05, "loss": 0.1836, "step": 20479 }, { "epoch": 0.3652837726964649, "grad_norm": 0.28309541940689087, "learning_rate": 4.002469782500925e-05, "loss": 0.2011, "step": 20480 }, { "epoch": 0.3653016088181786, "grad_norm": 0.24353143572807312, "learning_rate": 4.0023453744575326e-05, "loss": 0.1866, "step": 20481 }, { "epoch": 0.36531944493989227, "grad_norm": 0.318086177110672, "learning_rate": 4.002220960590529e-05, "loss": 0.1218, "step": 20482 }, { "epoch": 0.36533728106160596, "grad_norm": 0.2994476854801178, "learning_rate": 4.002096540900399e-05, "loss": 0.2045, "step": 20483 }, { "epoch": 0.36535511718331964, "grad_norm": 0.25481754541397095, "learning_rate": 4.0019721153876244e-05, "loss": 0.1588, "step": 20484 }, { "epoch": 0.36537295330503333, "grad_norm": 0.335957407951355, "learning_rate": 4.001847684052687e-05, "loss": 0.1467, "step": 20485 }, { "epoch": 0.3653907894267471, "grad_norm": 0.29596617817878723, "learning_rate": 4.0017232468960694e-05, "loss": 0.188, "step": 20486 }, { "epoch": 0.36540862554846076, "grad_norm": 0.3041633367538452, "learning_rate": 4.001598803918253e-05, "loss": 0.1831, "step": 20487 }, { "epoch": 0.36542646167017445, "grad_norm": 0.3252646327018738, "learning_rate": 4.001474355119722e-05, "loss": 0.1647, "step": 20488 }, { "epoch": 0.36544429779188814, "grad_norm": 0.253260999917984, "learning_rate": 4.0013499005009566e-05, "loss": 0.1099, "step": 20489 }, { "epoch": 0.3654621339136018, "grad_norm": 0.29536011815071106, "learning_rate": 4.0012254400624416e-05, "loss": 0.1472, "step": 20490 }, { "epoch": 0.3654799700353155, "grad_norm": 0.24268674850463867, "learning_rate": 4.001100973804657e-05, "loss": 0.1572, "step": 20491 }, { "epoch": 0.3654978061570292, "grad_norm": 0.25012004375457764, "learning_rate": 4.000976501728088e-05, "loss": 0.159, "step": 20492 }, { "epoch": 0.3655156422787429, "grad_norm": 0.30402180552482605, "learning_rate": 4.000852023833215e-05, "loss": 0.1634, "step": 20493 }, { "epoch": 0.3655334784004566, "grad_norm": 0.27871835231781006, "learning_rate": 4.0007275401205216e-05, "loss": 0.1479, "step": 20494 }, { "epoch": 0.3655513145221703, "grad_norm": 0.25464093685150146, "learning_rate": 4.000603050590489e-05, "loss": 0.2031, "step": 20495 }, { "epoch": 0.365569150643884, "grad_norm": 0.1704392433166504, "learning_rate": 4.0004785552436005e-05, "loss": 0.1398, "step": 20496 }, { "epoch": 0.3655869867655977, "grad_norm": 0.389527827501297, "learning_rate": 4.0003540540803405e-05, "loss": 0.2127, "step": 20497 }, { "epoch": 0.3656048228873114, "grad_norm": 0.23409676551818848, "learning_rate": 4.000229547101189e-05, "loss": 0.1856, "step": 20498 }, { "epoch": 0.3656226590090251, "grad_norm": 0.24304834008216858, "learning_rate": 4.0001050343066296e-05, "loss": 0.1626, "step": 20499 }, { "epoch": 0.36564049513073876, "grad_norm": 0.31907039880752563, "learning_rate": 3.999980515697145e-05, "loss": 0.1489, "step": 20500 }, { "epoch": 0.36565833125245245, "grad_norm": 0.2660103142261505, "learning_rate": 3.999855991273218e-05, "loss": 0.1725, "step": 20501 }, { "epoch": 0.36567616737416614, "grad_norm": 0.3440670669078827, "learning_rate": 3.99973146103533e-05, "loss": 0.1603, "step": 20502 }, { "epoch": 0.3656940034958799, "grad_norm": 0.2671903371810913, "learning_rate": 3.999606924983966e-05, "loss": 0.1788, "step": 20503 }, { "epoch": 0.36571183961759357, "grad_norm": 0.2599971294403076, "learning_rate": 3.9994823831196075e-05, "loss": 0.1896, "step": 20504 }, { "epoch": 0.36572967573930726, "grad_norm": 0.24406467378139496, "learning_rate": 3.999357835442737e-05, "loss": 0.1166, "step": 20505 }, { "epoch": 0.36574751186102095, "grad_norm": 0.2240985482931137, "learning_rate": 3.999233281953839e-05, "loss": 0.1741, "step": 20506 }, { "epoch": 0.36576534798273463, "grad_norm": 0.2051028162240982, "learning_rate": 3.9991087226533936e-05, "loss": 0.1579, "step": 20507 }, { "epoch": 0.3657831841044483, "grad_norm": 0.29228609800338745, "learning_rate": 3.998984157541885e-05, "loss": 0.1165, "step": 20508 }, { "epoch": 0.365801020226162, "grad_norm": 0.24455542862415314, "learning_rate": 3.998859586619797e-05, "loss": 0.1324, "step": 20509 }, { "epoch": 0.3658188563478757, "grad_norm": 0.4160729944705963, "learning_rate": 3.998735009887611e-05, "loss": 0.2102, "step": 20510 }, { "epoch": 0.3658366924695894, "grad_norm": 0.2766497731208801, "learning_rate": 3.99861042734581e-05, "loss": 0.173, "step": 20511 }, { "epoch": 0.36585452859130313, "grad_norm": 0.24490101635456085, "learning_rate": 3.9984858389948784e-05, "loss": 0.1597, "step": 20512 }, { "epoch": 0.3658723647130168, "grad_norm": 0.19108742475509644, "learning_rate": 3.998361244835298e-05, "loss": 0.126, "step": 20513 }, { "epoch": 0.3658902008347305, "grad_norm": 0.25275447964668274, "learning_rate": 3.998236644867551e-05, "loss": 0.1161, "step": 20514 }, { "epoch": 0.3659080369564442, "grad_norm": 0.2700955271720886, "learning_rate": 3.998112039092122e-05, "loss": 0.1323, "step": 20515 }, { "epoch": 0.3659258730781579, "grad_norm": 0.33192551136016846, "learning_rate": 3.997987427509493e-05, "loss": 0.1688, "step": 20516 }, { "epoch": 0.36594370919987157, "grad_norm": 0.24112530052661896, "learning_rate": 3.997862810120148e-05, "loss": 0.1891, "step": 20517 }, { "epoch": 0.36596154532158526, "grad_norm": 0.3015174865722656, "learning_rate": 3.9977381869245684e-05, "loss": 0.2004, "step": 20518 }, { "epoch": 0.36597938144329895, "grad_norm": 0.2796415686607361, "learning_rate": 3.997613557923239e-05, "loss": 0.1413, "step": 20519 }, { "epoch": 0.3659972175650127, "grad_norm": 0.2002744823694229, "learning_rate": 3.997488923116641e-05, "loss": 0.1409, "step": 20520 }, { "epoch": 0.3660150536867264, "grad_norm": 0.1873876005411148, "learning_rate": 3.99736428250526e-05, "loss": 0.1158, "step": 20521 }, { "epoch": 0.36603288980844007, "grad_norm": 0.23740717768669128, "learning_rate": 3.997239636089578e-05, "loss": 0.1765, "step": 20522 }, { "epoch": 0.36605072593015375, "grad_norm": 0.42529770731925964, "learning_rate": 3.9971149838700774e-05, "loss": 0.1524, "step": 20523 }, { "epoch": 0.36606856205186744, "grad_norm": 0.2501377463340759, "learning_rate": 3.9969903258472415e-05, "loss": 0.1734, "step": 20524 }, { "epoch": 0.36608639817358113, "grad_norm": 0.24219156801700592, "learning_rate": 3.996865662021556e-05, "loss": 0.187, "step": 20525 }, { "epoch": 0.3661042342952948, "grad_norm": 0.3499244749546051, "learning_rate": 3.9967409923935e-05, "loss": 0.1378, "step": 20526 }, { "epoch": 0.3661220704170085, "grad_norm": 0.31709209084510803, "learning_rate": 3.99661631696356e-05, "loss": 0.2221, "step": 20527 }, { "epoch": 0.3661399065387222, "grad_norm": 0.2493421882390976, "learning_rate": 3.996491635732218e-05, "loss": 0.208, "step": 20528 }, { "epoch": 0.36615774266043594, "grad_norm": 0.40529438853263855, "learning_rate": 3.996366948699958e-05, "loss": 0.2433, "step": 20529 }, { "epoch": 0.3661755787821496, "grad_norm": 0.2923068404197693, "learning_rate": 3.9962422558672624e-05, "loss": 0.1478, "step": 20530 }, { "epoch": 0.3661934149038633, "grad_norm": 0.23993302881717682, "learning_rate": 3.996117557234616e-05, "loss": 0.194, "step": 20531 }, { "epoch": 0.366211251025577, "grad_norm": 0.24556699395179749, "learning_rate": 3.995992852802499e-05, "loss": 0.1589, "step": 20532 }, { "epoch": 0.3662290871472907, "grad_norm": 0.37080827355384827, "learning_rate": 3.995868142571399e-05, "loss": 0.1466, "step": 20533 }, { "epoch": 0.3662469232690044, "grad_norm": 0.24904875457286835, "learning_rate": 3.995743426541797e-05, "loss": 0.1968, "step": 20534 }, { "epoch": 0.36626475939071806, "grad_norm": 0.31126147508621216, "learning_rate": 3.995618704714177e-05, "loss": 0.1999, "step": 20535 }, { "epoch": 0.36628259551243175, "grad_norm": 0.2658511996269226, "learning_rate": 3.995493977089022e-05, "loss": 0.1666, "step": 20536 }, { "epoch": 0.3663004316341455, "grad_norm": 0.21540863811969757, "learning_rate": 3.995369243666815e-05, "loss": 0.1638, "step": 20537 }, { "epoch": 0.3663182677558592, "grad_norm": 0.20054097473621368, "learning_rate": 3.9952445044480414e-05, "loss": 0.1718, "step": 20538 }, { "epoch": 0.36633610387757287, "grad_norm": 0.21968497335910797, "learning_rate": 3.995119759433184e-05, "loss": 0.1107, "step": 20539 }, { "epoch": 0.36635393999928656, "grad_norm": 0.2569504976272583, "learning_rate": 3.994995008622725e-05, "loss": 0.1751, "step": 20540 }, { "epoch": 0.36637177612100025, "grad_norm": 0.27129343152046204, "learning_rate": 3.9948702520171496e-05, "loss": 0.1411, "step": 20541 }, { "epoch": 0.36638961224271394, "grad_norm": 0.2291068583726883, "learning_rate": 3.99474548961694e-05, "loss": 0.1844, "step": 20542 }, { "epoch": 0.3664074483644276, "grad_norm": 0.2738098204135895, "learning_rate": 3.994620721422582e-05, "loss": 0.1345, "step": 20543 }, { "epoch": 0.3664252844861413, "grad_norm": 0.31332671642303467, "learning_rate": 3.9944959474345565e-05, "loss": 0.1685, "step": 20544 }, { "epoch": 0.36644312060785506, "grad_norm": 0.4531061351299286, "learning_rate": 3.994371167653349e-05, "loss": 0.169, "step": 20545 }, { "epoch": 0.36646095672956874, "grad_norm": 0.2348158061504364, "learning_rate": 3.9942463820794426e-05, "loss": 0.1287, "step": 20546 }, { "epoch": 0.36647879285128243, "grad_norm": 0.23562182486057281, "learning_rate": 3.994121590713322e-05, "loss": 0.1225, "step": 20547 }, { "epoch": 0.3664966289729961, "grad_norm": 0.2010362297296524, "learning_rate": 3.993996793555469e-05, "loss": 0.1835, "step": 20548 }, { "epoch": 0.3665144650947098, "grad_norm": 0.3551895022392273, "learning_rate": 3.993871990606369e-05, "loss": 0.1431, "step": 20549 }, { "epoch": 0.3665323012164235, "grad_norm": 0.22388547658920288, "learning_rate": 3.993747181866505e-05, "loss": 0.1433, "step": 20550 }, { "epoch": 0.3665501373381372, "grad_norm": 0.1926109939813614, "learning_rate": 3.9936223673363616e-05, "loss": 0.1271, "step": 20551 }, { "epoch": 0.36656797345985087, "grad_norm": 0.24497747421264648, "learning_rate": 3.993497547016421e-05, "loss": 0.1274, "step": 20552 }, { "epoch": 0.36658580958156456, "grad_norm": 0.2648605704307556, "learning_rate": 3.9933727209071686e-05, "loss": 0.1663, "step": 20553 }, { "epoch": 0.3666036457032783, "grad_norm": 0.1955610066652298, "learning_rate": 3.9932478890090875e-05, "loss": 0.1843, "step": 20554 }, { "epoch": 0.366621481824992, "grad_norm": 0.32756561040878296, "learning_rate": 3.9931230513226624e-05, "loss": 0.1518, "step": 20555 }, { "epoch": 0.3666393179467057, "grad_norm": 0.2855414152145386, "learning_rate": 3.992998207848376e-05, "loss": 0.1761, "step": 20556 }, { "epoch": 0.36665715406841937, "grad_norm": 0.2493400275707245, "learning_rate": 3.992873358586713e-05, "loss": 0.166, "step": 20557 }, { "epoch": 0.36667499019013305, "grad_norm": 0.3972211480140686, "learning_rate": 3.9927485035381575e-05, "loss": 0.1928, "step": 20558 }, { "epoch": 0.36669282631184674, "grad_norm": 0.24886314570903778, "learning_rate": 3.992623642703193e-05, "loss": 0.1554, "step": 20559 }, { "epoch": 0.36671066243356043, "grad_norm": 0.3397632837295532, "learning_rate": 3.992498776082304e-05, "loss": 0.213, "step": 20560 }, { "epoch": 0.3667284985552741, "grad_norm": 0.18415671586990356, "learning_rate": 3.9923739036759745e-05, "loss": 0.1577, "step": 20561 }, { "epoch": 0.36674633467698786, "grad_norm": 0.24915896356105804, "learning_rate": 3.992249025484688e-05, "loss": 0.1678, "step": 20562 }, { "epoch": 0.36676417079870155, "grad_norm": 0.31253454089164734, "learning_rate": 3.992124141508928e-05, "loss": 0.133, "step": 20563 }, { "epoch": 0.36678200692041524, "grad_norm": 0.32473352551460266, "learning_rate": 3.9919992517491806e-05, "loss": 0.1443, "step": 20564 }, { "epoch": 0.3667998430421289, "grad_norm": 0.2487039566040039, "learning_rate": 3.991874356205928e-05, "loss": 0.1191, "step": 20565 }, { "epoch": 0.3668176791638426, "grad_norm": 0.2990691661834717, "learning_rate": 3.991749454879655e-05, "loss": 0.1594, "step": 20566 }, { "epoch": 0.3668355152855563, "grad_norm": 0.25821274518966675, "learning_rate": 3.991624547770847e-05, "loss": 0.1767, "step": 20567 }, { "epoch": 0.36685335140727, "grad_norm": 0.28561002016067505, "learning_rate": 3.991499634879987e-05, "loss": 0.1738, "step": 20568 }, { "epoch": 0.3668711875289837, "grad_norm": 0.32387417554855347, "learning_rate": 3.991374716207558e-05, "loss": 0.2243, "step": 20569 }, { "epoch": 0.36688902365069737, "grad_norm": 0.2864395081996918, "learning_rate": 3.991249791754046e-05, "loss": 0.1338, "step": 20570 }, { "epoch": 0.3669068597724111, "grad_norm": 0.2525719106197357, "learning_rate": 3.991124861519935e-05, "loss": 0.1545, "step": 20571 }, { "epoch": 0.3669246958941248, "grad_norm": 0.2808087468147278, "learning_rate": 3.990999925505709e-05, "loss": 0.1805, "step": 20572 }, { "epoch": 0.3669425320158385, "grad_norm": 0.24211646616458893, "learning_rate": 3.990874983711852e-05, "loss": 0.1378, "step": 20573 }, { "epoch": 0.3669603681375522, "grad_norm": 0.24511969089508057, "learning_rate": 3.9907500361388494e-05, "loss": 0.1819, "step": 20574 }, { "epoch": 0.36697820425926586, "grad_norm": 0.2839509844779968, "learning_rate": 3.990625082787185e-05, "loss": 0.128, "step": 20575 }, { "epoch": 0.36699604038097955, "grad_norm": 0.26853278279304504, "learning_rate": 3.9905001236573417e-05, "loss": 0.1554, "step": 20576 }, { "epoch": 0.36701387650269324, "grad_norm": 0.3065044581890106, "learning_rate": 3.9903751587498056e-05, "loss": 0.1486, "step": 20577 }, { "epoch": 0.3670317126244069, "grad_norm": 0.29642218351364136, "learning_rate": 3.99025018806506e-05, "loss": 0.1197, "step": 20578 }, { "epoch": 0.36704954874612067, "grad_norm": 0.3315908908843994, "learning_rate": 3.9901252116035917e-05, "loss": 0.2342, "step": 20579 }, { "epoch": 0.36706738486783436, "grad_norm": 0.2164284884929657, "learning_rate": 3.9900002293658814e-05, "loss": 0.1592, "step": 20580 }, { "epoch": 0.36708522098954804, "grad_norm": 0.23315350711345673, "learning_rate": 3.989875241352417e-05, "loss": 0.1002, "step": 20581 }, { "epoch": 0.36710305711126173, "grad_norm": 0.2701401114463806, "learning_rate": 3.9897502475636804e-05, "loss": 0.1475, "step": 20582 }, { "epoch": 0.3671208932329754, "grad_norm": 0.28790125250816345, "learning_rate": 3.9896252480001586e-05, "loss": 0.1451, "step": 20583 }, { "epoch": 0.3671387293546891, "grad_norm": 0.25865453481674194, "learning_rate": 3.989500242662334e-05, "loss": 0.1527, "step": 20584 }, { "epoch": 0.3671565654764028, "grad_norm": 0.23018702864646912, "learning_rate": 3.989375231550693e-05, "loss": 0.1458, "step": 20585 }, { "epoch": 0.3671744015981165, "grad_norm": 0.25528964400291443, "learning_rate": 3.989250214665717e-05, "loss": 0.16, "step": 20586 }, { "epoch": 0.36719223771983023, "grad_norm": 0.2600228190422058, "learning_rate": 3.989125192007895e-05, "loss": 0.1698, "step": 20587 }, { "epoch": 0.3672100738415439, "grad_norm": 0.2692631185054779, "learning_rate": 3.9890001635777084e-05, "loss": 0.2139, "step": 20588 }, { "epoch": 0.3672279099632576, "grad_norm": 0.2748851478099823, "learning_rate": 3.988875129375643e-05, "loss": 0.1304, "step": 20589 }, { "epoch": 0.3672457460849713, "grad_norm": 0.2953891158103943, "learning_rate": 3.9887500894021836e-05, "loss": 0.1869, "step": 20590 }, { "epoch": 0.367263582206685, "grad_norm": 0.33523455262184143, "learning_rate": 3.988625043657814e-05, "loss": 0.2501, "step": 20591 }, { "epoch": 0.36728141832839867, "grad_norm": 0.24251173436641693, "learning_rate": 3.988499992143021e-05, "loss": 0.161, "step": 20592 }, { "epoch": 0.36729925445011236, "grad_norm": 0.19889268279075623, "learning_rate": 3.9883749348582863e-05, "loss": 0.1592, "step": 20593 }, { "epoch": 0.36731709057182604, "grad_norm": 0.22585001587867737, "learning_rate": 3.9882498718040974e-05, "loss": 0.1711, "step": 20594 }, { "epoch": 0.36733492669353973, "grad_norm": 0.26180481910705566, "learning_rate": 3.988124802980938e-05, "loss": 0.1688, "step": 20595 }, { "epoch": 0.3673527628152535, "grad_norm": 0.2743278443813324, "learning_rate": 3.987999728389292e-05, "loss": 0.1156, "step": 20596 }, { "epoch": 0.36737059893696716, "grad_norm": 0.2452520728111267, "learning_rate": 3.987874648029646e-05, "loss": 0.1178, "step": 20597 }, { "epoch": 0.36738843505868085, "grad_norm": 0.24680772423744202, "learning_rate": 3.987749561902483e-05, "loss": 0.1188, "step": 20598 }, { "epoch": 0.36740627118039454, "grad_norm": 0.27082571387290955, "learning_rate": 3.98762447000829e-05, "loss": 0.1597, "step": 20599 }, { "epoch": 0.3674241073021082, "grad_norm": 0.2516172528266907, "learning_rate": 3.9874993723475493e-05, "loss": 0.1784, "step": 20600 }, { "epoch": 0.3674419434238219, "grad_norm": 0.2623244822025299, "learning_rate": 3.9873742689207486e-05, "loss": 0.1522, "step": 20601 }, { "epoch": 0.3674597795455356, "grad_norm": 0.44623157382011414, "learning_rate": 3.9872491597283714e-05, "loss": 0.1731, "step": 20602 }, { "epoch": 0.3674776156672493, "grad_norm": 0.2736106514930725, "learning_rate": 3.9871240447709024e-05, "loss": 0.1539, "step": 20603 }, { "epoch": 0.36749545178896303, "grad_norm": 0.2611555755138397, "learning_rate": 3.9869989240488266e-05, "loss": 0.1309, "step": 20604 }, { "epoch": 0.3675132879106767, "grad_norm": 0.19687646627426147, "learning_rate": 3.9868737975626306e-05, "loss": 0.1252, "step": 20605 }, { "epoch": 0.3675311240323904, "grad_norm": 0.22891318798065186, "learning_rate": 3.986748665312796e-05, "loss": 0.1447, "step": 20606 }, { "epoch": 0.3675489601541041, "grad_norm": 0.3005000948905945, "learning_rate": 3.986623527299812e-05, "loss": 0.132, "step": 20607 }, { "epoch": 0.3675667962758178, "grad_norm": 0.20634649693965912, "learning_rate": 3.986498383524161e-05, "loss": 0.1379, "step": 20608 }, { "epoch": 0.3675846323975315, "grad_norm": 0.26238688826560974, "learning_rate": 3.986373233986329e-05, "loss": 0.1763, "step": 20609 }, { "epoch": 0.36760246851924516, "grad_norm": 0.23865243792533875, "learning_rate": 3.9862480786868006e-05, "loss": 0.1511, "step": 20610 }, { "epoch": 0.36762030464095885, "grad_norm": 0.37497448921203613, "learning_rate": 3.9861229176260614e-05, "loss": 0.1733, "step": 20611 }, { "epoch": 0.36763814076267254, "grad_norm": 0.23728413879871368, "learning_rate": 3.9859977508045976e-05, "loss": 0.1468, "step": 20612 }, { "epoch": 0.3676559768843863, "grad_norm": 0.28528738021850586, "learning_rate": 3.985872578222892e-05, "loss": 0.1984, "step": 20613 }, { "epoch": 0.36767381300609997, "grad_norm": 0.26065874099731445, "learning_rate": 3.985747399881432e-05, "loss": 0.1678, "step": 20614 }, { "epoch": 0.36769164912781366, "grad_norm": 0.2710501253604889, "learning_rate": 3.985622215780701e-05, "loss": 0.2159, "step": 20615 }, { "epoch": 0.36770948524952735, "grad_norm": 0.32494550943374634, "learning_rate": 3.9854970259211863e-05, "loss": 0.2324, "step": 20616 }, { "epoch": 0.36772732137124103, "grad_norm": 0.20609810948371887, "learning_rate": 3.985371830303371e-05, "loss": 0.1306, "step": 20617 }, { "epoch": 0.3677451574929547, "grad_norm": 0.23852016031742096, "learning_rate": 3.985246628927742e-05, "loss": 0.1819, "step": 20618 }, { "epoch": 0.3677629936146684, "grad_norm": 0.25980716943740845, "learning_rate": 3.985121421794783e-05, "loss": 0.1576, "step": 20619 }, { "epoch": 0.3677808297363821, "grad_norm": 0.35172727704048157, "learning_rate": 3.9849962089049816e-05, "loss": 0.1409, "step": 20620 }, { "epoch": 0.36779866585809584, "grad_norm": 0.2904670834541321, "learning_rate": 3.984870990258822e-05, "loss": 0.1581, "step": 20621 }, { "epoch": 0.36781650197980953, "grad_norm": 0.24317657947540283, "learning_rate": 3.9847457658567896e-05, "loss": 0.1544, "step": 20622 }, { "epoch": 0.3678343381015232, "grad_norm": 0.3140999376773834, "learning_rate": 3.9846205356993696e-05, "loss": 0.1589, "step": 20623 }, { "epoch": 0.3678521742232369, "grad_norm": 0.27958840131759644, "learning_rate": 3.984495299787047e-05, "loss": 0.1742, "step": 20624 }, { "epoch": 0.3678700103449506, "grad_norm": 0.2750065326690674, "learning_rate": 3.984370058120308e-05, "loss": 0.1475, "step": 20625 }, { "epoch": 0.3678878464666643, "grad_norm": 0.39659854769706726, "learning_rate": 3.984244810699639e-05, "loss": 0.1407, "step": 20626 }, { "epoch": 0.36790568258837797, "grad_norm": 0.2191317230463028, "learning_rate": 3.9841195575255244e-05, "loss": 0.1262, "step": 20627 }, { "epoch": 0.36792351871009166, "grad_norm": 0.2404310405254364, "learning_rate": 3.9839942985984494e-05, "loss": 0.2014, "step": 20628 }, { "epoch": 0.36794135483180535, "grad_norm": 0.2965210974216461, "learning_rate": 3.983869033918899e-05, "loss": 0.1392, "step": 20629 }, { "epoch": 0.3679591909535191, "grad_norm": 0.25626567006111145, "learning_rate": 3.983743763487361e-05, "loss": 0.1662, "step": 20630 }, { "epoch": 0.3679770270752328, "grad_norm": 0.2228141874074936, "learning_rate": 3.9836184873043194e-05, "loss": 0.1389, "step": 20631 }, { "epoch": 0.36799486319694646, "grad_norm": 0.36724740266799927, "learning_rate": 3.983493205370259e-05, "loss": 0.1317, "step": 20632 }, { "epoch": 0.36801269931866015, "grad_norm": 0.36563339829444885, "learning_rate": 3.983367917685668e-05, "loss": 0.1758, "step": 20633 }, { "epoch": 0.36803053544037384, "grad_norm": 0.2705327868461609, "learning_rate": 3.98324262425103e-05, "loss": 0.1926, "step": 20634 }, { "epoch": 0.36804837156208753, "grad_norm": 0.3557082712650299, "learning_rate": 3.983117325066832e-05, "loss": 0.1927, "step": 20635 }, { "epoch": 0.3680662076838012, "grad_norm": 0.30475980043411255, "learning_rate": 3.982992020133558e-05, "loss": 0.1535, "step": 20636 }, { "epoch": 0.3680840438055149, "grad_norm": 0.29667025804519653, "learning_rate": 3.982866709451695e-05, "loss": 0.1458, "step": 20637 }, { "epoch": 0.36810187992722865, "grad_norm": 0.2111435830593109, "learning_rate": 3.982741393021728e-05, "loss": 0.199, "step": 20638 }, { "epoch": 0.36811971604894234, "grad_norm": 0.21722495555877686, "learning_rate": 3.982616070844144e-05, "loss": 0.11, "step": 20639 }, { "epoch": 0.368137552170656, "grad_norm": 0.22200269997119904, "learning_rate": 3.982490742919428e-05, "loss": 0.1786, "step": 20640 }, { "epoch": 0.3681553882923697, "grad_norm": 0.3229037821292877, "learning_rate": 3.982365409248066e-05, "loss": 0.1806, "step": 20641 }, { "epoch": 0.3681732244140834, "grad_norm": 0.17968812584877014, "learning_rate": 3.9822400698305434e-05, "loss": 0.1348, "step": 20642 }, { "epoch": 0.3681910605357971, "grad_norm": 0.27382978796958923, "learning_rate": 3.982114724667346e-05, "loss": 0.1353, "step": 20643 }, { "epoch": 0.3682088966575108, "grad_norm": 0.2185668796300888, "learning_rate": 3.981989373758961e-05, "loss": 0.1485, "step": 20644 }, { "epoch": 0.36822673277922446, "grad_norm": 0.2252895087003708, "learning_rate": 3.981864017105872e-05, "loss": 0.1391, "step": 20645 }, { "epoch": 0.3682445689009382, "grad_norm": 0.2759244441986084, "learning_rate": 3.981738654708567e-05, "loss": 0.1306, "step": 20646 }, { "epoch": 0.3682624050226519, "grad_norm": 0.3097649812698364, "learning_rate": 3.9816132865675316e-05, "loss": 0.1925, "step": 20647 }, { "epoch": 0.3682802411443656, "grad_norm": 0.24489419162273407, "learning_rate": 3.9814879126832504e-05, "loss": 0.1596, "step": 20648 }, { "epoch": 0.36829807726607927, "grad_norm": 0.4304359555244446, "learning_rate": 3.981362533056211e-05, "loss": 0.1945, "step": 20649 }, { "epoch": 0.36831591338779296, "grad_norm": 0.2890470623970032, "learning_rate": 3.9812371476868984e-05, "loss": 0.1539, "step": 20650 }, { "epoch": 0.36833374950950665, "grad_norm": 0.2140313982963562, "learning_rate": 3.9811117565757994e-05, "loss": 0.1501, "step": 20651 }, { "epoch": 0.36835158563122034, "grad_norm": 0.24240589141845703, "learning_rate": 3.9809863597234e-05, "loss": 0.1745, "step": 20652 }, { "epoch": 0.368369421752934, "grad_norm": 0.2301190048456192, "learning_rate": 3.9808609571301844e-05, "loss": 0.1891, "step": 20653 }, { "epoch": 0.3683872578746477, "grad_norm": 0.2814874053001404, "learning_rate": 3.9807355487966416e-05, "loss": 0.1543, "step": 20654 }, { "epoch": 0.36840509399636145, "grad_norm": 0.18487438559532166, "learning_rate": 3.980610134723256e-05, "loss": 0.1735, "step": 20655 }, { "epoch": 0.36842293011807514, "grad_norm": 0.3395955264568329, "learning_rate": 3.9804847149105145e-05, "loss": 0.1939, "step": 20656 }, { "epoch": 0.36844076623978883, "grad_norm": 0.3092682957649231, "learning_rate": 3.9803592893589027e-05, "loss": 0.2114, "step": 20657 }, { "epoch": 0.3684586023615025, "grad_norm": 0.25286003947257996, "learning_rate": 3.980233858068907e-05, "loss": 0.1414, "step": 20658 }, { "epoch": 0.3684764384832162, "grad_norm": 0.20957231521606445, "learning_rate": 3.980108421041013e-05, "loss": 0.1625, "step": 20659 }, { "epoch": 0.3684942746049299, "grad_norm": 0.35376012325286865, "learning_rate": 3.979982978275708e-05, "loss": 0.2041, "step": 20660 }, { "epoch": 0.3685121107266436, "grad_norm": 0.1887214183807373, "learning_rate": 3.9798575297734785e-05, "loss": 0.1323, "step": 20661 }, { "epoch": 0.36852994684835727, "grad_norm": 0.2860632538795471, "learning_rate": 3.9797320755348096e-05, "loss": 0.1418, "step": 20662 }, { "epoch": 0.368547782970071, "grad_norm": 0.20776663720607758, "learning_rate": 3.9796066155601874e-05, "loss": 0.1347, "step": 20663 }, { "epoch": 0.3685656190917847, "grad_norm": 0.2547638714313507, "learning_rate": 3.9794811498501e-05, "loss": 0.1543, "step": 20664 }, { "epoch": 0.3685834552134984, "grad_norm": 0.3296222984790802, "learning_rate": 3.9793556784050326e-05, "loss": 0.1845, "step": 20665 }, { "epoch": 0.3686012913352121, "grad_norm": 0.3723026514053345, "learning_rate": 3.9792302012254704e-05, "loss": 0.185, "step": 20666 }, { "epoch": 0.36861912745692577, "grad_norm": 0.26877814531326294, "learning_rate": 3.9791047183119024e-05, "loss": 0.1515, "step": 20667 }, { "epoch": 0.36863696357863945, "grad_norm": 0.29769232869148254, "learning_rate": 3.978979229664813e-05, "loss": 0.22, "step": 20668 }, { "epoch": 0.36865479970035314, "grad_norm": 0.21737362444400787, "learning_rate": 3.978853735284689e-05, "loss": 0.1518, "step": 20669 }, { "epoch": 0.36867263582206683, "grad_norm": 0.24438200891017914, "learning_rate": 3.978728235172018e-05, "loss": 0.1187, "step": 20670 }, { "epoch": 0.3686904719437805, "grad_norm": 0.3249605596065521, "learning_rate": 3.978602729327284e-05, "loss": 0.1731, "step": 20671 }, { "epoch": 0.36870830806549426, "grad_norm": 0.31561630964279175, "learning_rate": 3.978477217750977e-05, "loss": 0.1521, "step": 20672 }, { "epoch": 0.36872614418720795, "grad_norm": 0.20443210005760193, "learning_rate": 3.9783517004435806e-05, "loss": 0.1878, "step": 20673 }, { "epoch": 0.36874398030892164, "grad_norm": 0.25708481669425964, "learning_rate": 3.978226177405583e-05, "loss": 0.1637, "step": 20674 }, { "epoch": 0.3687618164306353, "grad_norm": 0.21317251026630402, "learning_rate": 3.9781006486374694e-05, "loss": 0.1262, "step": 20675 }, { "epoch": 0.368779652552349, "grad_norm": 0.3076186180114746, "learning_rate": 3.9779751141397284e-05, "loss": 0.117, "step": 20676 }, { "epoch": 0.3687974886740627, "grad_norm": 0.23719710111618042, "learning_rate": 3.977849573912844e-05, "loss": 0.1509, "step": 20677 }, { "epoch": 0.3688153247957764, "grad_norm": 0.3630293607711792, "learning_rate": 3.977724027957305e-05, "loss": 0.1669, "step": 20678 }, { "epoch": 0.3688331609174901, "grad_norm": 0.263110488653183, "learning_rate": 3.9775984762735974e-05, "loss": 0.2011, "step": 20679 }, { "epoch": 0.3688509970392038, "grad_norm": 0.19805766642093658, "learning_rate": 3.977472918862207e-05, "loss": 0.1589, "step": 20680 }, { "epoch": 0.3688688331609175, "grad_norm": 0.18016962707042694, "learning_rate": 3.977347355723622e-05, "loss": 0.1199, "step": 20681 }, { "epoch": 0.3688866692826312, "grad_norm": 0.31155461072921753, "learning_rate": 3.9772217868583287e-05, "loss": 0.1555, "step": 20682 }, { "epoch": 0.3689045054043449, "grad_norm": 0.24009375274181366, "learning_rate": 3.977096212266812e-05, "loss": 0.1479, "step": 20683 }, { "epoch": 0.3689223415260586, "grad_norm": 0.23271121084690094, "learning_rate": 3.976970631949561e-05, "loss": 0.1589, "step": 20684 }, { "epoch": 0.36894017764777226, "grad_norm": 0.3241308033466339, "learning_rate": 3.976845045907063e-05, "loss": 0.213, "step": 20685 }, { "epoch": 0.36895801376948595, "grad_norm": 0.2038877308368683, "learning_rate": 3.976719454139802e-05, "loss": 0.149, "step": 20686 }, { "epoch": 0.36897584989119964, "grad_norm": 0.33551889657974243, "learning_rate": 3.976593856648266e-05, "loss": 0.1687, "step": 20687 }, { "epoch": 0.3689936860129133, "grad_norm": 0.2658495306968689, "learning_rate": 3.976468253432944e-05, "loss": 0.1767, "step": 20688 }, { "epoch": 0.36901152213462707, "grad_norm": 0.20284298062324524, "learning_rate": 3.97634264449432e-05, "loss": 0.1393, "step": 20689 }, { "epoch": 0.36902935825634076, "grad_norm": 0.2537386417388916, "learning_rate": 3.9762170298328814e-05, "loss": 0.1714, "step": 20690 }, { "epoch": 0.36904719437805444, "grad_norm": 0.22543781995773315, "learning_rate": 3.9760914094491166e-05, "loss": 0.1279, "step": 20691 }, { "epoch": 0.36906503049976813, "grad_norm": 0.2585589587688446, "learning_rate": 3.9759657833435115e-05, "loss": 0.166, "step": 20692 }, { "epoch": 0.3690828666214818, "grad_norm": 0.2211478054523468, "learning_rate": 3.975840151516553e-05, "loss": 0.146, "step": 20693 }, { "epoch": 0.3691007027431955, "grad_norm": 0.3498081862926483, "learning_rate": 3.975714513968729e-05, "loss": 0.1767, "step": 20694 }, { "epoch": 0.3691185388649092, "grad_norm": 0.30740275979042053, "learning_rate": 3.975588870700525e-05, "loss": 0.1838, "step": 20695 }, { "epoch": 0.3691363749866229, "grad_norm": 0.2446300983428955, "learning_rate": 3.97546322171243e-05, "loss": 0.1529, "step": 20696 }, { "epoch": 0.3691542111083366, "grad_norm": 0.22753500938415527, "learning_rate": 3.9753375670049285e-05, "loss": 0.1874, "step": 20697 }, { "epoch": 0.3691720472300503, "grad_norm": 0.19337725639343262, "learning_rate": 3.97521190657851e-05, "loss": 0.1147, "step": 20698 }, { "epoch": 0.369189883351764, "grad_norm": 0.29787009954452515, "learning_rate": 3.97508624043366e-05, "loss": 0.1063, "step": 20699 }, { "epoch": 0.3692077194734777, "grad_norm": 0.294334352016449, "learning_rate": 3.974960568570867e-05, "loss": 0.1619, "step": 20700 }, { "epoch": 0.3692255555951914, "grad_norm": 0.21719473600387573, "learning_rate": 3.974834890990616e-05, "loss": 0.1594, "step": 20701 }, { "epoch": 0.36924339171690507, "grad_norm": 0.27734988927841187, "learning_rate": 3.974709207693397e-05, "loss": 0.1595, "step": 20702 }, { "epoch": 0.36926122783861876, "grad_norm": 0.31374648213386536, "learning_rate": 3.974583518679695e-05, "loss": 0.163, "step": 20703 }, { "epoch": 0.36927906396033244, "grad_norm": 0.18755824863910675, "learning_rate": 3.974457823949999e-05, "loss": 0.139, "step": 20704 }, { "epoch": 0.3692969000820462, "grad_norm": 0.28139257431030273, "learning_rate": 3.974332123504794e-05, "loss": 0.1229, "step": 20705 }, { "epoch": 0.3693147362037599, "grad_norm": 0.28746309876441956, "learning_rate": 3.974206417344569e-05, "loss": 0.1515, "step": 20706 }, { "epoch": 0.36933257232547356, "grad_norm": 0.21319624781608582, "learning_rate": 3.974080705469812e-05, "loss": 0.1646, "step": 20707 }, { "epoch": 0.36935040844718725, "grad_norm": 0.2256128191947937, "learning_rate": 3.973954987881007e-05, "loss": 0.1308, "step": 20708 }, { "epoch": 0.36936824456890094, "grad_norm": 0.38180941343307495, "learning_rate": 3.973829264578645e-05, "loss": 0.1748, "step": 20709 }, { "epoch": 0.3693860806906146, "grad_norm": 0.23181700706481934, "learning_rate": 3.97370353556321e-05, "loss": 0.2155, "step": 20710 }, { "epoch": 0.3694039168123283, "grad_norm": 0.331217497587204, "learning_rate": 3.973577800835192e-05, "loss": 0.2849, "step": 20711 }, { "epoch": 0.369421752934042, "grad_norm": 0.28522709012031555, "learning_rate": 3.973452060395077e-05, "loss": 0.1678, "step": 20712 }, { "epoch": 0.3694395890557557, "grad_norm": 0.21633446216583252, "learning_rate": 3.9733263142433544e-05, "loss": 0.158, "step": 20713 }, { "epoch": 0.36945742517746943, "grad_norm": 0.21896664798259735, "learning_rate": 3.973200562380509e-05, "loss": 0.115, "step": 20714 }, { "epoch": 0.3694752612991831, "grad_norm": 0.22210277616977692, "learning_rate": 3.973074804807029e-05, "loss": 0.2155, "step": 20715 }, { "epoch": 0.3694930974208968, "grad_norm": 0.25365108251571655, "learning_rate": 3.972949041523403e-05, "loss": 0.108, "step": 20716 }, { "epoch": 0.3695109335426105, "grad_norm": 0.24365845322608948, "learning_rate": 3.972823272530118e-05, "loss": 0.1724, "step": 20717 }, { "epoch": 0.3695287696643242, "grad_norm": 0.24406962096691132, "learning_rate": 3.9726974978276606e-05, "loss": 0.1744, "step": 20718 }, { "epoch": 0.3695466057860379, "grad_norm": 0.29617372155189514, "learning_rate": 3.972571717416519e-05, "loss": 0.1755, "step": 20719 }, { "epoch": 0.36956444190775156, "grad_norm": 0.24914050102233887, "learning_rate": 3.972445931297182e-05, "loss": 0.1625, "step": 20720 }, { "epoch": 0.36958227802946525, "grad_norm": 0.298544317483902, "learning_rate": 3.972320139470135e-05, "loss": 0.1517, "step": 20721 }, { "epoch": 0.369600114151179, "grad_norm": 0.25870659947395325, "learning_rate": 3.972194341935867e-05, "loss": 0.1658, "step": 20722 }, { "epoch": 0.3696179502728927, "grad_norm": 0.26940783858299255, "learning_rate": 3.9720685386948645e-05, "loss": 0.1725, "step": 20723 }, { "epoch": 0.36963578639460637, "grad_norm": 0.1911354660987854, "learning_rate": 3.971942729747617e-05, "loss": 0.1491, "step": 20724 }, { "epoch": 0.36965362251632006, "grad_norm": 0.21466852724552155, "learning_rate": 3.971816915094609e-05, "loss": 0.1404, "step": 20725 }, { "epoch": 0.36967145863803375, "grad_norm": 0.3736649751663208, "learning_rate": 3.971691094736333e-05, "loss": 0.2264, "step": 20726 }, { "epoch": 0.36968929475974743, "grad_norm": 0.29381558299064636, "learning_rate": 3.9715652686732726e-05, "loss": 0.1273, "step": 20727 }, { "epoch": 0.3697071308814611, "grad_norm": 0.2900749444961548, "learning_rate": 3.971439436905917e-05, "loss": 0.151, "step": 20728 }, { "epoch": 0.3697249670031748, "grad_norm": 0.2699805200099945, "learning_rate": 3.971313599434754e-05, "loss": 0.1438, "step": 20729 }, { "epoch": 0.3697428031248885, "grad_norm": 0.4069530665874481, "learning_rate": 3.971187756260272e-05, "loss": 0.1943, "step": 20730 }, { "epoch": 0.36976063924660224, "grad_norm": 0.3974965214729309, "learning_rate": 3.971061907382957e-05, "loss": 0.1448, "step": 20731 }, { "epoch": 0.36977847536831593, "grad_norm": 0.21953637897968292, "learning_rate": 3.970936052803298e-05, "loss": 0.1718, "step": 20732 }, { "epoch": 0.3697963114900296, "grad_norm": 0.2323625534772873, "learning_rate": 3.970810192521784e-05, "loss": 0.0813, "step": 20733 }, { "epoch": 0.3698141476117433, "grad_norm": 0.37191709876060486, "learning_rate": 3.9706843265389004e-05, "loss": 0.1205, "step": 20734 }, { "epoch": 0.369831983733457, "grad_norm": 0.2520902156829834, "learning_rate": 3.9705584548551375e-05, "loss": 0.0957, "step": 20735 }, { "epoch": 0.3698498198551707, "grad_norm": 0.2635739743709564, "learning_rate": 3.970432577470981e-05, "loss": 0.1837, "step": 20736 }, { "epoch": 0.36986765597688437, "grad_norm": 0.2611275017261505, "learning_rate": 3.970306694386921e-05, "loss": 0.1733, "step": 20737 }, { "epoch": 0.36988549209859806, "grad_norm": 0.2665553092956543, "learning_rate": 3.9701808056034436e-05, "loss": 0.1567, "step": 20738 }, { "epoch": 0.3699033282203118, "grad_norm": 0.2911405861377716, "learning_rate": 3.970054911121038e-05, "loss": 0.162, "step": 20739 }, { "epoch": 0.3699211643420255, "grad_norm": 0.25105607509613037, "learning_rate": 3.969929010940192e-05, "loss": 0.1511, "step": 20740 }, { "epoch": 0.3699390004637392, "grad_norm": 0.3187588155269623, "learning_rate": 3.9698031050613935e-05, "loss": 0.1879, "step": 20741 }, { "epoch": 0.36995683658545286, "grad_norm": 0.3255598247051239, "learning_rate": 3.96967719348513e-05, "loss": 0.1986, "step": 20742 }, { "epoch": 0.36997467270716655, "grad_norm": 0.18469995260238647, "learning_rate": 3.96955127621189e-05, "loss": 0.1256, "step": 20743 }, { "epoch": 0.36999250882888024, "grad_norm": 0.21111544966697693, "learning_rate": 3.969425353242162e-05, "loss": 0.1369, "step": 20744 }, { "epoch": 0.37001034495059393, "grad_norm": 0.21260325610637665, "learning_rate": 3.969299424576435e-05, "loss": 0.1478, "step": 20745 }, { "epoch": 0.3700281810723076, "grad_norm": 0.29275450110435486, "learning_rate": 3.969173490215195e-05, "loss": 0.1898, "step": 20746 }, { "epoch": 0.37004601719402136, "grad_norm": 0.33503565192222595, "learning_rate": 3.9690475501589297e-05, "loss": 0.1788, "step": 20747 }, { "epoch": 0.37006385331573505, "grad_norm": 0.18387584388256073, "learning_rate": 3.96892160440813e-05, "loss": 0.1343, "step": 20748 }, { "epoch": 0.37008168943744874, "grad_norm": 0.3641549050807953, "learning_rate": 3.968795652963283e-05, "loss": 0.1881, "step": 20749 }, { "epoch": 0.3700995255591624, "grad_norm": 0.2309250831604004, "learning_rate": 3.968669695824877e-05, "loss": 0.1303, "step": 20750 }, { "epoch": 0.3701173616808761, "grad_norm": 0.28715309500694275, "learning_rate": 3.968543732993399e-05, "loss": 0.1468, "step": 20751 }, { "epoch": 0.3701351978025898, "grad_norm": 0.2431654930114746, "learning_rate": 3.968417764469339e-05, "loss": 0.1678, "step": 20752 }, { "epoch": 0.3701530339243035, "grad_norm": 0.2502996027469635, "learning_rate": 3.968291790253183e-05, "loss": 0.1938, "step": 20753 }, { "epoch": 0.3701708700460172, "grad_norm": 0.3158586323261261, "learning_rate": 3.9681658103454234e-05, "loss": 0.1553, "step": 20754 }, { "epoch": 0.37018870616773086, "grad_norm": 0.28493940830230713, "learning_rate": 3.968039824746545e-05, "loss": 0.1345, "step": 20755 }, { "epoch": 0.3702065422894446, "grad_norm": 0.33351123332977295, "learning_rate": 3.9679138334570365e-05, "loss": 0.1955, "step": 20756 }, { "epoch": 0.3702243784111583, "grad_norm": 0.21014797687530518, "learning_rate": 3.967787836477387e-05, "loss": 0.1493, "step": 20757 }, { "epoch": 0.370242214532872, "grad_norm": 0.20715588331222534, "learning_rate": 3.967661833808086e-05, "loss": 0.1505, "step": 20758 }, { "epoch": 0.37026005065458567, "grad_norm": 0.22673384845256805, "learning_rate": 3.967535825449621e-05, "loss": 0.1657, "step": 20759 }, { "epoch": 0.37027788677629936, "grad_norm": 0.24830789864063263, "learning_rate": 3.9674098114024785e-05, "loss": 0.1399, "step": 20760 }, { "epoch": 0.37029572289801305, "grad_norm": 0.33110448718070984, "learning_rate": 3.96728379166715e-05, "loss": 0.1089, "step": 20761 }, { "epoch": 0.37031355901972673, "grad_norm": 0.281533420085907, "learning_rate": 3.967157766244123e-05, "loss": 0.207, "step": 20762 }, { "epoch": 0.3703313951414404, "grad_norm": 0.290824294090271, "learning_rate": 3.967031735133885e-05, "loss": 0.1489, "step": 20763 }, { "epoch": 0.37034923126315417, "grad_norm": 0.22915130853652954, "learning_rate": 3.966905698336925e-05, "loss": 0.1382, "step": 20764 }, { "epoch": 0.37036706738486785, "grad_norm": 0.26040738821029663, "learning_rate": 3.966779655853733e-05, "loss": 0.1796, "step": 20765 }, { "epoch": 0.37038490350658154, "grad_norm": 0.30748674273490906, "learning_rate": 3.9666536076847954e-05, "loss": 0.1534, "step": 20766 }, { "epoch": 0.37040273962829523, "grad_norm": 0.2495889663696289, "learning_rate": 3.9665275538306026e-05, "loss": 0.1643, "step": 20767 }, { "epoch": 0.3704205757500089, "grad_norm": 0.2611350417137146, "learning_rate": 3.9664014942916416e-05, "loss": 0.1957, "step": 20768 }, { "epoch": 0.3704384118717226, "grad_norm": 0.524882435798645, "learning_rate": 3.966275429068403e-05, "loss": 0.1667, "step": 20769 }, { "epoch": 0.3704562479934363, "grad_norm": 0.2503334879875183, "learning_rate": 3.966149358161374e-05, "loss": 0.1336, "step": 20770 }, { "epoch": 0.37047408411515, "grad_norm": 0.2533748149871826, "learning_rate": 3.966023281571043e-05, "loss": 0.1312, "step": 20771 }, { "epoch": 0.37049192023686367, "grad_norm": 0.21128694713115692, "learning_rate": 3.965897199297901e-05, "loss": 0.0763, "step": 20772 }, { "epoch": 0.3705097563585774, "grad_norm": 0.39581942558288574, "learning_rate": 3.9657711113424334e-05, "loss": 0.171, "step": 20773 }, { "epoch": 0.3705275924802911, "grad_norm": 0.3122757077217102, "learning_rate": 3.965645017705132e-05, "loss": 0.1512, "step": 20774 }, { "epoch": 0.3705454286020048, "grad_norm": 0.2682683765888214, "learning_rate": 3.9655189183864835e-05, "loss": 0.1763, "step": 20775 }, { "epoch": 0.3705632647237185, "grad_norm": 0.2530685067176819, "learning_rate": 3.9653928133869776e-05, "loss": 0.1638, "step": 20776 }, { "epoch": 0.37058110084543217, "grad_norm": 0.24746932089328766, "learning_rate": 3.9652667027071034e-05, "loss": 0.2045, "step": 20777 }, { "epoch": 0.37059893696714585, "grad_norm": 0.2642756402492523, "learning_rate": 3.965140586347349e-05, "loss": 0.1711, "step": 20778 }, { "epoch": 0.37061677308885954, "grad_norm": 0.2608848512172699, "learning_rate": 3.965014464308204e-05, "loss": 0.1502, "step": 20779 }, { "epoch": 0.37063460921057323, "grad_norm": 0.30534178018569946, "learning_rate": 3.964888336590157e-05, "loss": 0.1828, "step": 20780 }, { "epoch": 0.370652445332287, "grad_norm": 0.26097339391708374, "learning_rate": 3.964762203193696e-05, "loss": 0.1483, "step": 20781 }, { "epoch": 0.37067028145400066, "grad_norm": 0.19460429251194, "learning_rate": 3.964636064119312e-05, "loss": 0.1464, "step": 20782 }, { "epoch": 0.37068811757571435, "grad_norm": 0.2558959424495697, "learning_rate": 3.964509919367492e-05, "loss": 0.1403, "step": 20783 }, { "epoch": 0.37070595369742804, "grad_norm": 0.2617916762828827, "learning_rate": 3.964383768938725e-05, "loss": 0.1565, "step": 20784 }, { "epoch": 0.3707237898191417, "grad_norm": 0.2780449688434601, "learning_rate": 3.9642576128335026e-05, "loss": 0.1655, "step": 20785 }, { "epoch": 0.3707416259408554, "grad_norm": 0.2817031741142273, "learning_rate": 3.964131451052311e-05, "loss": 0.1484, "step": 20786 }, { "epoch": 0.3707594620625691, "grad_norm": 0.22971881926059723, "learning_rate": 3.96400528359564e-05, "loss": 0.1066, "step": 20787 }, { "epoch": 0.3707772981842828, "grad_norm": 0.35492902994155884, "learning_rate": 3.963879110463978e-05, "loss": 0.1828, "step": 20788 }, { "epoch": 0.3707951343059965, "grad_norm": 0.3692830502986908, "learning_rate": 3.963752931657817e-05, "loss": 0.1475, "step": 20789 }, { "epoch": 0.3708129704277102, "grad_norm": 0.30633455514907837, "learning_rate": 3.963626747177642e-05, "loss": 0.1903, "step": 20790 }, { "epoch": 0.3708308065494239, "grad_norm": 0.3038899898529053, "learning_rate": 3.9635005570239456e-05, "loss": 0.1499, "step": 20791 }, { "epoch": 0.3708486426711376, "grad_norm": 0.26397743821144104, "learning_rate": 3.9633743611972154e-05, "loss": 0.1774, "step": 20792 }, { "epoch": 0.3708664787928513, "grad_norm": 0.3259041905403137, "learning_rate": 3.963248159697941e-05, "loss": 0.1567, "step": 20793 }, { "epoch": 0.37088431491456497, "grad_norm": 0.25907421112060547, "learning_rate": 3.96312195252661e-05, "loss": 0.2146, "step": 20794 }, { "epoch": 0.37090215103627866, "grad_norm": 0.2867237627506256, "learning_rate": 3.9629957396837146e-05, "loss": 0.1442, "step": 20795 }, { "epoch": 0.37091998715799235, "grad_norm": 0.25982075929641724, "learning_rate": 3.9628695211697415e-05, "loss": 0.1878, "step": 20796 }, { "epoch": 0.37093782327970604, "grad_norm": 0.20911921560764313, "learning_rate": 3.962743296985181e-05, "loss": 0.1732, "step": 20797 }, { "epoch": 0.3709556594014198, "grad_norm": 0.2497350573539734, "learning_rate": 3.9626170671305223e-05, "loss": 0.1633, "step": 20798 }, { "epoch": 0.37097349552313347, "grad_norm": 0.24562138319015503, "learning_rate": 3.962490831606255e-05, "loss": 0.1661, "step": 20799 }, { "epoch": 0.37099133164484716, "grad_norm": 0.21879325807094574, "learning_rate": 3.962364590412868e-05, "loss": 0.1458, "step": 20800 }, { "epoch": 0.37100916776656084, "grad_norm": 0.20751112699508667, "learning_rate": 3.9622383435508504e-05, "loss": 0.1191, "step": 20801 }, { "epoch": 0.37102700388827453, "grad_norm": 0.27567774057388306, "learning_rate": 3.962112091020692e-05, "loss": 0.1698, "step": 20802 }, { "epoch": 0.3710448400099882, "grad_norm": 0.20791807770729065, "learning_rate": 3.961985832822882e-05, "loss": 0.1398, "step": 20803 }, { "epoch": 0.3710626761317019, "grad_norm": 0.232282817363739, "learning_rate": 3.961859568957911e-05, "loss": 0.1501, "step": 20804 }, { "epoch": 0.3710805122534156, "grad_norm": 0.257133424282074, "learning_rate": 3.9617332994262654e-05, "loss": 0.1858, "step": 20805 }, { "epoch": 0.37109834837512934, "grad_norm": 0.23405201733112335, "learning_rate": 3.961607024228439e-05, "loss": 0.152, "step": 20806 }, { "epoch": 0.371116184496843, "grad_norm": 0.31592363119125366, "learning_rate": 3.961480743364917e-05, "loss": 0.188, "step": 20807 }, { "epoch": 0.3711340206185567, "grad_norm": 0.23744623363018036, "learning_rate": 3.9613544568361916e-05, "loss": 0.1608, "step": 20808 }, { "epoch": 0.3711518567402704, "grad_norm": 0.2736688256263733, "learning_rate": 3.961228164642752e-05, "loss": 0.1619, "step": 20809 }, { "epoch": 0.3711696928619841, "grad_norm": 0.3179933726787567, "learning_rate": 3.9611018667850866e-05, "loss": 0.2267, "step": 20810 }, { "epoch": 0.3711875289836978, "grad_norm": 0.22657260298728943, "learning_rate": 3.960975563263687e-05, "loss": 0.1358, "step": 20811 }, { "epoch": 0.37120536510541147, "grad_norm": 0.20992985367774963, "learning_rate": 3.96084925407904e-05, "loss": 0.1721, "step": 20812 }, { "epoch": 0.37122320122712515, "grad_norm": 0.33215299248695374, "learning_rate": 3.9607229392316376e-05, "loss": 0.2161, "step": 20813 }, { "epoch": 0.37124103734883884, "grad_norm": 0.31987807154655457, "learning_rate": 3.960596618721968e-05, "loss": 0.1111, "step": 20814 }, { "epoch": 0.3712588734705526, "grad_norm": 0.24876877665519714, "learning_rate": 3.960470292550522e-05, "loss": 0.1653, "step": 20815 }, { "epoch": 0.3712767095922663, "grad_norm": 0.23401497304439545, "learning_rate": 3.960343960717788e-05, "loss": 0.1292, "step": 20816 }, { "epoch": 0.37129454571397996, "grad_norm": 0.4843595027923584, "learning_rate": 3.960217623224257e-05, "loss": 0.2191, "step": 20817 }, { "epoch": 0.37131238183569365, "grad_norm": 0.24649792909622192, "learning_rate": 3.9600912800704184e-05, "loss": 0.1831, "step": 20818 }, { "epoch": 0.37133021795740734, "grad_norm": 0.24797876179218292, "learning_rate": 3.959964931256761e-05, "loss": 0.1444, "step": 20819 }, { "epoch": 0.371348054079121, "grad_norm": 0.20644067227840424, "learning_rate": 3.959838576783776e-05, "loss": 0.1449, "step": 20820 }, { "epoch": 0.3713658902008347, "grad_norm": 0.2838148772716522, "learning_rate": 3.9597122166519516e-05, "loss": 0.159, "step": 20821 }, { "epoch": 0.3713837263225484, "grad_norm": 0.30081966519355774, "learning_rate": 3.9595858508617796e-05, "loss": 0.1271, "step": 20822 }, { "epoch": 0.37140156244426215, "grad_norm": 0.21972356736660004, "learning_rate": 3.959459479413748e-05, "loss": 0.1321, "step": 20823 }, { "epoch": 0.37141939856597583, "grad_norm": 0.2762352526187897, "learning_rate": 3.959333102308348e-05, "loss": 0.1696, "step": 20824 }, { "epoch": 0.3714372346876895, "grad_norm": 0.2654752731323242, "learning_rate": 3.959206719546068e-05, "loss": 0.1403, "step": 20825 }, { "epoch": 0.3714550708094032, "grad_norm": 0.29895031452178955, "learning_rate": 3.959080331127399e-05, "loss": 0.1792, "step": 20826 }, { "epoch": 0.3714729069311169, "grad_norm": 0.2670992314815521, "learning_rate": 3.9589539370528306e-05, "loss": 0.2022, "step": 20827 }, { "epoch": 0.3714907430528306, "grad_norm": 0.2724984884262085, "learning_rate": 3.9588275373228534e-05, "loss": 0.1986, "step": 20828 }, { "epoch": 0.3715085791745443, "grad_norm": 0.35580363869667053, "learning_rate": 3.958701131937956e-05, "loss": 0.1148, "step": 20829 }, { "epoch": 0.37152641529625796, "grad_norm": 0.22566132247447968, "learning_rate": 3.9585747208986296e-05, "loss": 0.153, "step": 20830 }, { "epoch": 0.37154425141797165, "grad_norm": 0.25295549631118774, "learning_rate": 3.9584483042053634e-05, "loss": 0.1817, "step": 20831 }, { "epoch": 0.3715620875396854, "grad_norm": 0.27537551522254944, "learning_rate": 3.9583218818586484e-05, "loss": 0.1565, "step": 20832 }, { "epoch": 0.3715799236613991, "grad_norm": 0.2592831254005432, "learning_rate": 3.958195453858974e-05, "loss": 0.1326, "step": 20833 }, { "epoch": 0.37159775978311277, "grad_norm": 0.4563406705856323, "learning_rate": 3.9580690202068307e-05, "loss": 0.2275, "step": 20834 }, { "epoch": 0.37161559590482646, "grad_norm": 0.2982614040374756, "learning_rate": 3.957942580902708e-05, "loss": 0.1852, "step": 20835 }, { "epoch": 0.37163343202654014, "grad_norm": 0.18832939863204956, "learning_rate": 3.957816135947096e-05, "loss": 0.1435, "step": 20836 }, { "epoch": 0.37165126814825383, "grad_norm": 0.27177080512046814, "learning_rate": 3.957689685340486e-05, "loss": 0.1669, "step": 20837 }, { "epoch": 0.3716691042699675, "grad_norm": 0.27617475390434265, "learning_rate": 3.957563229083366e-05, "loss": 0.1031, "step": 20838 }, { "epoch": 0.3716869403916812, "grad_norm": 0.33756181597709656, "learning_rate": 3.957436767176228e-05, "loss": 0.1378, "step": 20839 }, { "epoch": 0.37170477651339495, "grad_norm": 0.24287867546081543, "learning_rate": 3.957310299619562e-05, "loss": 0.1658, "step": 20840 }, { "epoch": 0.37172261263510864, "grad_norm": 0.3450038731098175, "learning_rate": 3.9571838264138575e-05, "loss": 0.1396, "step": 20841 }, { "epoch": 0.37174044875682233, "grad_norm": 0.27262115478515625, "learning_rate": 3.9570573475596054e-05, "loss": 0.1925, "step": 20842 }, { "epoch": 0.371758284878536, "grad_norm": 0.2437928318977356, "learning_rate": 3.956930863057297e-05, "loss": 0.1875, "step": 20843 }, { "epoch": 0.3717761210002497, "grad_norm": 0.28718969225883484, "learning_rate": 3.9568043729074186e-05, "loss": 0.1645, "step": 20844 }, { "epoch": 0.3717939571219634, "grad_norm": 0.2277885228395462, "learning_rate": 3.956677877110466e-05, "loss": 0.1749, "step": 20845 }, { "epoch": 0.3718117932436771, "grad_norm": 0.25676479935646057, "learning_rate": 3.956551375666925e-05, "loss": 0.1767, "step": 20846 }, { "epoch": 0.37182962936539077, "grad_norm": 0.27865269780158997, "learning_rate": 3.9564248685772885e-05, "loss": 0.1236, "step": 20847 }, { "epoch": 0.3718474654871045, "grad_norm": 0.2554360032081604, "learning_rate": 3.9562983558420464e-05, "loss": 0.1695, "step": 20848 }, { "epoch": 0.3718653016088182, "grad_norm": 0.18411928415298462, "learning_rate": 3.956171837461689e-05, "loss": 0.1228, "step": 20849 }, { "epoch": 0.3718831377305319, "grad_norm": 0.2741954028606415, "learning_rate": 3.956045313436706e-05, "loss": 0.1619, "step": 20850 }, { "epoch": 0.3719009738522456, "grad_norm": 0.319092720746994, "learning_rate": 3.955918783767589e-05, "loss": 0.1531, "step": 20851 }, { "epoch": 0.37191880997395926, "grad_norm": 0.3173488676548004, "learning_rate": 3.9557922484548284e-05, "loss": 0.1107, "step": 20852 }, { "epoch": 0.37193664609567295, "grad_norm": 0.23448435962200165, "learning_rate": 3.955665707498913e-05, "loss": 0.1432, "step": 20853 }, { "epoch": 0.37195448221738664, "grad_norm": 0.2821740508079529, "learning_rate": 3.955539160900335e-05, "loss": 0.1561, "step": 20854 }, { "epoch": 0.3719723183391003, "grad_norm": 0.2684013843536377, "learning_rate": 3.955412608659584e-05, "loss": 0.1376, "step": 20855 }, { "epoch": 0.371990154460814, "grad_norm": 0.361016184091568, "learning_rate": 3.955286050777152e-05, "loss": 0.1574, "step": 20856 }, { "epoch": 0.37200799058252776, "grad_norm": 0.425301194190979, "learning_rate": 3.955159487253528e-05, "loss": 0.1455, "step": 20857 }, { "epoch": 0.37202582670424145, "grad_norm": 0.24172918498516083, "learning_rate": 3.9550329180892044e-05, "loss": 0.1629, "step": 20858 }, { "epoch": 0.37204366282595513, "grad_norm": 0.27436745166778564, "learning_rate": 3.9549063432846687e-05, "loss": 0.1513, "step": 20859 }, { "epoch": 0.3720614989476688, "grad_norm": 0.22665497660636902, "learning_rate": 3.954779762840415e-05, "loss": 0.1297, "step": 20860 }, { "epoch": 0.3720793350693825, "grad_norm": 0.2823479473590851, "learning_rate": 3.954653176756932e-05, "loss": 0.1084, "step": 20861 }, { "epoch": 0.3720971711910962, "grad_norm": 0.2251901477575302, "learning_rate": 3.9545265850347116e-05, "loss": 0.1879, "step": 20862 }, { "epoch": 0.3721150073128099, "grad_norm": 0.234126478433609, "learning_rate": 3.954399987674242e-05, "loss": 0.1461, "step": 20863 }, { "epoch": 0.3721328434345236, "grad_norm": 0.23911389708518982, "learning_rate": 3.954273384676017e-05, "loss": 0.1852, "step": 20864 }, { "epoch": 0.3721506795562373, "grad_norm": 0.2928299605846405, "learning_rate": 3.954146776040526e-05, "loss": 0.1937, "step": 20865 }, { "epoch": 0.372168515677951, "grad_norm": 0.2781590521335602, "learning_rate": 3.9540201617682596e-05, "loss": 0.157, "step": 20866 }, { "epoch": 0.3721863517996647, "grad_norm": 0.21413928270339966, "learning_rate": 3.953893541859709e-05, "loss": 0.1479, "step": 20867 }, { "epoch": 0.3722041879213784, "grad_norm": 0.19536499679088593, "learning_rate": 3.9537669163153644e-05, "loss": 0.1567, "step": 20868 }, { "epoch": 0.37222202404309207, "grad_norm": 0.20709502696990967, "learning_rate": 3.953640285135718e-05, "loss": 0.1458, "step": 20869 }, { "epoch": 0.37223986016480576, "grad_norm": 0.2385265976190567, "learning_rate": 3.953513648321259e-05, "loss": 0.0996, "step": 20870 }, { "epoch": 0.37225769628651945, "grad_norm": 0.258060097694397, "learning_rate": 3.9533870058724797e-05, "loss": 0.1818, "step": 20871 }, { "epoch": 0.37227553240823313, "grad_norm": 0.3334944546222687, "learning_rate": 3.9532603577898694e-05, "loss": 0.1721, "step": 20872 }, { "epoch": 0.3722933685299468, "grad_norm": 0.2653723955154419, "learning_rate": 3.9531337040739215e-05, "loss": 0.146, "step": 20873 }, { "epoch": 0.37231120465166057, "grad_norm": 0.25973907113075256, "learning_rate": 3.9530070447251246e-05, "loss": 0.15, "step": 20874 }, { "epoch": 0.37232904077337425, "grad_norm": 0.25220730900764465, "learning_rate": 3.9528803797439705e-05, "loss": 0.1698, "step": 20875 }, { "epoch": 0.37234687689508794, "grad_norm": 0.25818932056427, "learning_rate": 3.95275370913095e-05, "loss": 0.1699, "step": 20876 }, { "epoch": 0.37236471301680163, "grad_norm": 0.310071736574173, "learning_rate": 3.952627032886555e-05, "loss": 0.2, "step": 20877 }, { "epoch": 0.3723825491385153, "grad_norm": 0.31427833437919617, "learning_rate": 3.952500351011276e-05, "loss": 0.1645, "step": 20878 }, { "epoch": 0.372400385260229, "grad_norm": 0.2230541855096817, "learning_rate": 3.952373663505603e-05, "loss": 0.1492, "step": 20879 }, { "epoch": 0.3724182213819427, "grad_norm": 0.3833317756652832, "learning_rate": 3.952246970370029e-05, "loss": 0.1131, "step": 20880 }, { "epoch": 0.3724360575036564, "grad_norm": 0.20893841981887817, "learning_rate": 3.952120271605043e-05, "loss": 0.1477, "step": 20881 }, { "epoch": 0.3724538936253701, "grad_norm": 0.37659087777137756, "learning_rate": 3.9519935672111384e-05, "loss": 0.1925, "step": 20882 }, { "epoch": 0.3724717297470838, "grad_norm": 0.21617098152637482, "learning_rate": 3.9518668571888054e-05, "loss": 0.1567, "step": 20883 }, { "epoch": 0.3724895658687975, "grad_norm": 0.4688357710838318, "learning_rate": 3.9517401415385335e-05, "loss": 0.2293, "step": 20884 }, { "epoch": 0.3725074019905112, "grad_norm": 0.3353196084499359, "learning_rate": 3.9516134202608166e-05, "loss": 0.1377, "step": 20885 }, { "epoch": 0.3725252381122249, "grad_norm": 0.1893564760684967, "learning_rate": 3.9514866933561446e-05, "loss": 0.1269, "step": 20886 }, { "epoch": 0.37254307423393856, "grad_norm": 0.25817370414733887, "learning_rate": 3.951359960825008e-05, "loss": 0.1893, "step": 20887 }, { "epoch": 0.37256091035565225, "grad_norm": 0.2507270574569702, "learning_rate": 3.9512332226679005e-05, "loss": 0.1531, "step": 20888 }, { "epoch": 0.37257874647736594, "grad_norm": 0.2700137794017792, "learning_rate": 3.9511064788853104e-05, "loss": 0.1798, "step": 20889 }, { "epoch": 0.37259658259907963, "grad_norm": 0.2808361351490021, "learning_rate": 3.950979729477731e-05, "loss": 0.1587, "step": 20890 }, { "epoch": 0.37261441872079337, "grad_norm": 0.27956733107566833, "learning_rate": 3.9508529744456535e-05, "loss": 0.1664, "step": 20891 }, { "epoch": 0.37263225484250706, "grad_norm": 0.2957374155521393, "learning_rate": 3.950726213789568e-05, "loss": 0.1688, "step": 20892 }, { "epoch": 0.37265009096422075, "grad_norm": 0.2731435000896454, "learning_rate": 3.950599447509967e-05, "loss": 0.146, "step": 20893 }, { "epoch": 0.37266792708593444, "grad_norm": 0.38462555408477783, "learning_rate": 3.9504726756073405e-05, "loss": 0.1402, "step": 20894 }, { "epoch": 0.3726857632076481, "grad_norm": 0.3309250473976135, "learning_rate": 3.9503458980821826e-05, "loss": 0.1335, "step": 20895 }, { "epoch": 0.3727035993293618, "grad_norm": 0.17635788023471832, "learning_rate": 3.950219114934982e-05, "loss": 0.1415, "step": 20896 }, { "epoch": 0.3727214354510755, "grad_norm": 0.27848848700523376, "learning_rate": 3.950092326166232e-05, "loss": 0.1813, "step": 20897 }, { "epoch": 0.3727392715727892, "grad_norm": 0.2748837172985077, "learning_rate": 3.949965531776422e-05, "loss": 0.1233, "step": 20898 }, { "epoch": 0.37275710769450293, "grad_norm": 0.24764619767665863, "learning_rate": 3.9498387317660454e-05, "loss": 0.179, "step": 20899 }, { "epoch": 0.3727749438162166, "grad_norm": 0.2562500536441803, "learning_rate": 3.949711926135593e-05, "loss": 0.1662, "step": 20900 }, { "epoch": 0.3727927799379303, "grad_norm": 0.2847629487514496, "learning_rate": 3.949585114885558e-05, "loss": 0.1769, "step": 20901 }, { "epoch": 0.372810616059644, "grad_norm": 0.23247461020946503, "learning_rate": 3.9494582980164284e-05, "loss": 0.1536, "step": 20902 }, { "epoch": 0.3728284521813577, "grad_norm": 0.39158889651298523, "learning_rate": 3.949331475528699e-05, "loss": 0.2121, "step": 20903 }, { "epoch": 0.37284628830307137, "grad_norm": 0.2710724174976349, "learning_rate": 3.9492046474228594e-05, "loss": 0.1508, "step": 20904 }, { "epoch": 0.37286412442478506, "grad_norm": 0.29780441522598267, "learning_rate": 3.9490778136994025e-05, "loss": 0.1785, "step": 20905 }, { "epoch": 0.37288196054649875, "grad_norm": 0.2175154834985733, "learning_rate": 3.9489509743588195e-05, "loss": 0.1534, "step": 20906 }, { "epoch": 0.3728997966682125, "grad_norm": 0.24034057557582855, "learning_rate": 3.9488241294016017e-05, "loss": 0.1109, "step": 20907 }, { "epoch": 0.3729176327899262, "grad_norm": 0.28218695521354675, "learning_rate": 3.948697278828242e-05, "loss": 0.1463, "step": 20908 }, { "epoch": 0.37293546891163987, "grad_norm": 0.32997918128967285, "learning_rate": 3.948570422639231e-05, "loss": 0.1766, "step": 20909 }, { "epoch": 0.37295330503335355, "grad_norm": 0.20871379971504211, "learning_rate": 3.94844356083506e-05, "loss": 0.1579, "step": 20910 }, { "epoch": 0.37297114115506724, "grad_norm": 0.20707568526268005, "learning_rate": 3.948316693416222e-05, "loss": 0.1615, "step": 20911 }, { "epoch": 0.37298897727678093, "grad_norm": 0.23493592441082, "learning_rate": 3.948189820383208e-05, "loss": 0.1697, "step": 20912 }, { "epoch": 0.3730068133984946, "grad_norm": 0.28584012389183044, "learning_rate": 3.94806294173651e-05, "loss": 0.189, "step": 20913 }, { "epoch": 0.3730246495202083, "grad_norm": 0.21202589571475983, "learning_rate": 3.9479360574766204e-05, "loss": 0.1257, "step": 20914 }, { "epoch": 0.373042485641922, "grad_norm": 0.23167893290519714, "learning_rate": 3.9478091676040305e-05, "loss": 0.1819, "step": 20915 }, { "epoch": 0.37306032176363574, "grad_norm": 0.41091087460517883, "learning_rate": 3.947682272119232e-05, "loss": 0.199, "step": 20916 }, { "epoch": 0.3730781578853494, "grad_norm": 0.2537975609302521, "learning_rate": 3.9475553710227175e-05, "loss": 0.1587, "step": 20917 }, { "epoch": 0.3730959940070631, "grad_norm": 0.3241865038871765, "learning_rate": 3.947428464314977e-05, "loss": 0.1349, "step": 20918 }, { "epoch": 0.3731138301287768, "grad_norm": 0.24923311173915863, "learning_rate": 3.947301551996505e-05, "loss": 0.1769, "step": 20919 }, { "epoch": 0.3731316662504905, "grad_norm": 0.31199532747268677, "learning_rate": 3.9471746340677915e-05, "loss": 0.196, "step": 20920 }, { "epoch": 0.3731495023722042, "grad_norm": 0.19318658113479614, "learning_rate": 3.947047710529331e-05, "loss": 0.1514, "step": 20921 }, { "epoch": 0.37316733849391787, "grad_norm": 0.2515678107738495, "learning_rate": 3.946920781381612e-05, "loss": 0.1819, "step": 20922 }, { "epoch": 0.37318517461563155, "grad_norm": 0.24265235662460327, "learning_rate": 3.946793846625129e-05, "loss": 0.1908, "step": 20923 }, { "epoch": 0.3732030107373453, "grad_norm": 0.26168200373649597, "learning_rate": 3.946666906260373e-05, "loss": 0.1075, "step": 20924 }, { "epoch": 0.373220846859059, "grad_norm": 0.2690441310405731, "learning_rate": 3.946539960287837e-05, "loss": 0.1712, "step": 20925 }, { "epoch": 0.3732386829807727, "grad_norm": 0.27775177359580994, "learning_rate": 3.946413008708012e-05, "loss": 0.1554, "step": 20926 }, { "epoch": 0.37325651910248636, "grad_norm": 0.26072096824645996, "learning_rate": 3.946286051521391e-05, "loss": 0.19, "step": 20927 }, { "epoch": 0.37327435522420005, "grad_norm": 0.19600936770439148, "learning_rate": 3.946159088728465e-05, "loss": 0.1571, "step": 20928 }, { "epoch": 0.37329219134591374, "grad_norm": 0.20395301282405853, "learning_rate": 3.946032120329728e-05, "loss": 0.1812, "step": 20929 }, { "epoch": 0.3733100274676274, "grad_norm": 0.22938822209835052, "learning_rate": 3.945905146325671e-05, "loss": 0.1466, "step": 20930 }, { "epoch": 0.3733278635893411, "grad_norm": 0.25904223322868347, "learning_rate": 3.9457781667167854e-05, "loss": 0.1873, "step": 20931 }, { "epoch": 0.3733456997110548, "grad_norm": 0.2521626055240631, "learning_rate": 3.945651181503565e-05, "loss": 0.1341, "step": 20932 }, { "epoch": 0.37336353583276854, "grad_norm": 0.2962108552455902, "learning_rate": 3.9455241906865003e-05, "loss": 0.1872, "step": 20933 }, { "epoch": 0.37338137195448223, "grad_norm": 0.2521093487739563, "learning_rate": 3.945397194266086e-05, "loss": 0.1723, "step": 20934 }, { "epoch": 0.3733992080761959, "grad_norm": 0.27413707971572876, "learning_rate": 3.9452701922428114e-05, "loss": 0.1729, "step": 20935 }, { "epoch": 0.3734170441979096, "grad_norm": 0.34369009733200073, "learning_rate": 3.945143184617171e-05, "loss": 0.1894, "step": 20936 }, { "epoch": 0.3734348803196233, "grad_norm": 0.2235700935125351, "learning_rate": 3.945016171389656e-05, "loss": 0.1803, "step": 20937 }, { "epoch": 0.373452716441337, "grad_norm": 0.21069109439849854, "learning_rate": 3.944889152560761e-05, "loss": 0.1379, "step": 20938 }, { "epoch": 0.3734705525630507, "grad_norm": 0.2789163589477539, "learning_rate": 3.944762128130975e-05, "loss": 0.1669, "step": 20939 }, { "epoch": 0.37348838868476436, "grad_norm": 0.27874982357025146, "learning_rate": 3.9446350981007924e-05, "loss": 0.1284, "step": 20940 }, { "epoch": 0.3735062248064781, "grad_norm": 0.24538666009902954, "learning_rate": 3.944508062470705e-05, "loss": 0.1501, "step": 20941 }, { "epoch": 0.3735240609281918, "grad_norm": 0.18700912594795227, "learning_rate": 3.9443810212412055e-05, "loss": 0.1489, "step": 20942 }, { "epoch": 0.3735418970499055, "grad_norm": 0.30852341651916504, "learning_rate": 3.9442539744127864e-05, "loss": 0.1101, "step": 20943 }, { "epoch": 0.37355973317161917, "grad_norm": 0.3122432231903076, "learning_rate": 3.94412692198594e-05, "loss": 0.1717, "step": 20944 }, { "epoch": 0.37357756929333286, "grad_norm": 0.2327127903699875, "learning_rate": 3.94399986396116e-05, "loss": 0.1706, "step": 20945 }, { "epoch": 0.37359540541504654, "grad_norm": 0.23632730543613434, "learning_rate": 3.943872800338936e-05, "loss": 0.1533, "step": 20946 }, { "epoch": 0.37361324153676023, "grad_norm": 0.23022763431072235, "learning_rate": 3.943745731119763e-05, "loss": 0.1395, "step": 20947 }, { "epoch": 0.3736310776584739, "grad_norm": 0.2568363547325134, "learning_rate": 3.943618656304133e-05, "loss": 0.1735, "step": 20948 }, { "epoch": 0.37364891378018766, "grad_norm": 0.24698111414909363, "learning_rate": 3.9434915758925385e-05, "loss": 0.1097, "step": 20949 }, { "epoch": 0.37366674990190135, "grad_norm": 0.3155350685119629, "learning_rate": 3.9433644898854716e-05, "loss": 0.1764, "step": 20950 }, { "epoch": 0.37368458602361504, "grad_norm": 0.20479997992515564, "learning_rate": 3.943237398283426e-05, "loss": 0.1036, "step": 20951 }, { "epoch": 0.3737024221453287, "grad_norm": 0.30280378460884094, "learning_rate": 3.943110301086893e-05, "loss": 0.2106, "step": 20952 }, { "epoch": 0.3737202582670424, "grad_norm": 0.26288577914237976, "learning_rate": 3.9429831982963674e-05, "loss": 0.1708, "step": 20953 }, { "epoch": 0.3737380943887561, "grad_norm": 0.32733508944511414, "learning_rate": 3.942856089912339e-05, "loss": 0.1926, "step": 20954 }, { "epoch": 0.3737559305104698, "grad_norm": 0.2616784870624542, "learning_rate": 3.9427289759353034e-05, "loss": 0.18, "step": 20955 }, { "epoch": 0.3737737666321835, "grad_norm": 0.18744464218616486, "learning_rate": 3.942601856365752e-05, "loss": 0.131, "step": 20956 }, { "epoch": 0.37379160275389717, "grad_norm": 0.2464095652103424, "learning_rate": 3.9424747312041765e-05, "loss": 0.1691, "step": 20957 }, { "epoch": 0.3738094388756109, "grad_norm": 0.1791401207447052, "learning_rate": 3.942347600451071e-05, "loss": 0.1239, "step": 20958 }, { "epoch": 0.3738272749973246, "grad_norm": 0.2502337694168091, "learning_rate": 3.9422204641069284e-05, "loss": 0.1442, "step": 20959 }, { "epoch": 0.3738451111190383, "grad_norm": 0.31434884667396545, "learning_rate": 3.942093322172241e-05, "loss": 0.1529, "step": 20960 }, { "epoch": 0.373862947240752, "grad_norm": 0.3276646137237549, "learning_rate": 3.941966174647501e-05, "loss": 0.2384, "step": 20961 }, { "epoch": 0.37388078336246566, "grad_norm": 0.19896353781223297, "learning_rate": 3.941839021533203e-05, "loss": 0.1157, "step": 20962 }, { "epoch": 0.37389861948417935, "grad_norm": 0.300504595041275, "learning_rate": 3.9417118628298386e-05, "loss": 0.1526, "step": 20963 }, { "epoch": 0.37391645560589304, "grad_norm": 0.3498363196849823, "learning_rate": 3.941584698537901e-05, "loss": 0.1688, "step": 20964 }, { "epoch": 0.3739342917276067, "grad_norm": 0.6301300525665283, "learning_rate": 3.941457528657884e-05, "loss": 0.1944, "step": 20965 }, { "epoch": 0.37395212784932047, "grad_norm": 0.3406152129173279, "learning_rate": 3.941330353190279e-05, "loss": 0.1562, "step": 20966 }, { "epoch": 0.37396996397103416, "grad_norm": 0.31587889790534973, "learning_rate": 3.94120317213558e-05, "loss": 0.1423, "step": 20967 }, { "epoch": 0.37398780009274785, "grad_norm": 0.3007887005805969, "learning_rate": 3.94107598549428e-05, "loss": 0.1608, "step": 20968 }, { "epoch": 0.37400563621446153, "grad_norm": 0.21104386448860168, "learning_rate": 3.940948793266871e-05, "loss": 0.1422, "step": 20969 }, { "epoch": 0.3740234723361752, "grad_norm": 0.2530542016029358, "learning_rate": 3.940821595453847e-05, "loss": 0.1245, "step": 20970 }, { "epoch": 0.3740413084578889, "grad_norm": 0.22693689167499542, "learning_rate": 3.9406943920557014e-05, "loss": 0.1272, "step": 20971 }, { "epoch": 0.3740591445796026, "grad_norm": 0.23822426795959473, "learning_rate": 3.9405671830729266e-05, "loss": 0.129, "step": 20972 }, { "epoch": 0.3740769807013163, "grad_norm": 0.2835020124912262, "learning_rate": 3.940439968506016e-05, "loss": 0.1759, "step": 20973 }, { "epoch": 0.37409481682303, "grad_norm": 0.35984939336776733, "learning_rate": 3.940312748355461e-05, "loss": 0.2107, "step": 20974 }, { "epoch": 0.3741126529447437, "grad_norm": 0.27591952681541443, "learning_rate": 3.9401855226217574e-05, "loss": 0.1729, "step": 20975 }, { "epoch": 0.3741304890664574, "grad_norm": 0.2780141234397888, "learning_rate": 3.940058291305398e-05, "loss": 0.1758, "step": 20976 }, { "epoch": 0.3741483251881711, "grad_norm": 0.33819320797920227, "learning_rate": 3.9399310544068745e-05, "loss": 0.1705, "step": 20977 }, { "epoch": 0.3741661613098848, "grad_norm": 0.23138880729675293, "learning_rate": 3.939803811926681e-05, "loss": 0.2254, "step": 20978 }, { "epoch": 0.37418399743159847, "grad_norm": 0.19835540652275085, "learning_rate": 3.939676563865311e-05, "loss": 0.1103, "step": 20979 }, { "epoch": 0.37420183355331216, "grad_norm": 0.4091818332672119, "learning_rate": 3.9395493102232574e-05, "loss": 0.1671, "step": 20980 }, { "epoch": 0.37421966967502585, "grad_norm": 0.25975480675697327, "learning_rate": 3.939422051001013e-05, "loss": 0.1202, "step": 20981 }, { "epoch": 0.37423750579673953, "grad_norm": 0.33079445362091064, "learning_rate": 3.939294786199072e-05, "loss": 0.1929, "step": 20982 }, { "epoch": 0.3742553419184533, "grad_norm": 0.21822389960289001, "learning_rate": 3.9391675158179265e-05, "loss": 0.1468, "step": 20983 }, { "epoch": 0.37427317804016697, "grad_norm": 0.2974565625190735, "learning_rate": 3.939040239858072e-05, "loss": 0.1736, "step": 20984 }, { "epoch": 0.37429101416188065, "grad_norm": 0.2653729319572449, "learning_rate": 3.938912958319999e-05, "loss": 0.1455, "step": 20985 }, { "epoch": 0.37430885028359434, "grad_norm": 0.20173591375350952, "learning_rate": 3.9387856712042034e-05, "loss": 0.1437, "step": 20986 }, { "epoch": 0.37432668640530803, "grad_norm": 0.3011341989040375, "learning_rate": 3.938658378511177e-05, "loss": 0.1838, "step": 20987 }, { "epoch": 0.3743445225270217, "grad_norm": 0.3004617989063263, "learning_rate": 3.938531080241414e-05, "loss": 0.1894, "step": 20988 }, { "epoch": 0.3743623586487354, "grad_norm": 0.31858810782432556, "learning_rate": 3.9384037763954074e-05, "loss": 0.1248, "step": 20989 }, { "epoch": 0.3743801947704491, "grad_norm": 0.3206600248813629, "learning_rate": 3.938276466973652e-05, "loss": 0.1528, "step": 20990 }, { "epoch": 0.3743980308921628, "grad_norm": 0.24963733553886414, "learning_rate": 3.938149151976639e-05, "loss": 0.1703, "step": 20991 }, { "epoch": 0.3744158670138765, "grad_norm": 0.34398558735847473, "learning_rate": 3.938021831404864e-05, "loss": 0.1437, "step": 20992 }, { "epoch": 0.3744337031355902, "grad_norm": 0.29185688495635986, "learning_rate": 3.937894505258819e-05, "loss": 0.1519, "step": 20993 }, { "epoch": 0.3744515392573039, "grad_norm": 0.36593085527420044, "learning_rate": 3.9377671735389995e-05, "loss": 0.1943, "step": 20994 }, { "epoch": 0.3744693753790176, "grad_norm": 0.23105423152446747, "learning_rate": 3.937639836245896e-05, "loss": 0.1575, "step": 20995 }, { "epoch": 0.3744872115007313, "grad_norm": 0.2864378094673157, "learning_rate": 3.9375124933800056e-05, "loss": 0.1532, "step": 20996 }, { "epoch": 0.37450504762244496, "grad_norm": 0.2871434688568115, "learning_rate": 3.93738514494182e-05, "loss": 0.1639, "step": 20997 }, { "epoch": 0.37452288374415865, "grad_norm": 0.29691845178604126, "learning_rate": 3.937257790931832e-05, "loss": 0.1348, "step": 20998 }, { "epoch": 0.37454071986587234, "grad_norm": 0.2640552818775177, "learning_rate": 3.937130431350538e-05, "loss": 0.1645, "step": 20999 }, { "epoch": 0.3745585559875861, "grad_norm": 0.20489969849586487, "learning_rate": 3.9370030661984295e-05, "loss": 0.1862, "step": 21000 }, { "epoch": 0.3745585559875861, "eval_loss": 0.15552359819412231, "eval_runtime": 107.8589, "eval_samples_per_second": 9.494, "eval_steps_per_second": 1.585, "step": 21000 }, { "epoch": 0.37457639210929977, "grad_norm": 0.24851778149604797, "learning_rate": 3.936875695476e-05, "loss": 0.205, "step": 21001 }, { "epoch": 0.37459422823101346, "grad_norm": 0.2541540563106537, "learning_rate": 3.9367483191837444e-05, "loss": 0.1742, "step": 21002 }, { "epoch": 0.37461206435272715, "grad_norm": 0.2621119022369385, "learning_rate": 3.936620937322156e-05, "loss": 0.1776, "step": 21003 }, { "epoch": 0.37462990047444084, "grad_norm": 0.20699170231819153, "learning_rate": 3.9364935498917296e-05, "loss": 0.1541, "step": 21004 }, { "epoch": 0.3746477365961545, "grad_norm": 0.26266545057296753, "learning_rate": 3.936366156892958e-05, "loss": 0.1916, "step": 21005 }, { "epoch": 0.3746655727178682, "grad_norm": 0.28171104192733765, "learning_rate": 3.9362387583263336e-05, "loss": 0.2057, "step": 21006 }, { "epoch": 0.3746834088395819, "grad_norm": 0.24466729164123535, "learning_rate": 3.936111354192352e-05, "loss": 0.1487, "step": 21007 }, { "epoch": 0.37470124496129564, "grad_norm": 0.2627304196357727, "learning_rate": 3.935983944491508e-05, "loss": 0.1871, "step": 21008 }, { "epoch": 0.37471908108300933, "grad_norm": 0.22136624157428741, "learning_rate": 3.935856529224293e-05, "loss": 0.1501, "step": 21009 }, { "epoch": 0.374736917204723, "grad_norm": 0.2881280183792114, "learning_rate": 3.9357291083912036e-05, "loss": 0.1421, "step": 21010 }, { "epoch": 0.3747547533264367, "grad_norm": 0.25228843092918396, "learning_rate": 3.935601681992731e-05, "loss": 0.1649, "step": 21011 }, { "epoch": 0.3747725894481504, "grad_norm": 0.2941727340221405, "learning_rate": 3.9354742500293715e-05, "loss": 0.1832, "step": 21012 }, { "epoch": 0.3747904255698641, "grad_norm": 0.2715993821620941, "learning_rate": 3.935346812501617e-05, "loss": 0.133, "step": 21013 }, { "epoch": 0.37480826169157777, "grad_norm": 0.2922652065753937, "learning_rate": 3.9352193694099624e-05, "loss": 0.1326, "step": 21014 }, { "epoch": 0.37482609781329146, "grad_norm": 0.2693033516407013, "learning_rate": 3.935091920754903e-05, "loss": 0.1809, "step": 21015 }, { "epoch": 0.37484393393500515, "grad_norm": 0.2567836046218872, "learning_rate": 3.9349644665369304e-05, "loss": 0.1387, "step": 21016 }, { "epoch": 0.3748617700567189, "grad_norm": 0.212194561958313, "learning_rate": 3.93483700675654e-05, "loss": 0.154, "step": 21017 }, { "epoch": 0.3748796061784326, "grad_norm": 0.282661110162735, "learning_rate": 3.934709541414227e-05, "loss": 0.2028, "step": 21018 }, { "epoch": 0.37489744230014627, "grad_norm": 0.284035325050354, "learning_rate": 3.934582070510483e-05, "loss": 0.1786, "step": 21019 }, { "epoch": 0.37491527842185995, "grad_norm": 0.25869375467300415, "learning_rate": 3.9344545940458044e-05, "loss": 0.1232, "step": 21020 }, { "epoch": 0.37493311454357364, "grad_norm": 0.26101529598236084, "learning_rate": 3.934327112020684e-05, "loss": 0.1454, "step": 21021 }, { "epoch": 0.37495095066528733, "grad_norm": 0.4360544979572296, "learning_rate": 3.9341996244356164e-05, "loss": 0.1473, "step": 21022 }, { "epoch": 0.374968786787001, "grad_norm": 0.2781641185283661, "learning_rate": 3.934072131291096e-05, "loss": 0.1365, "step": 21023 }, { "epoch": 0.3749866229087147, "grad_norm": 0.32837679982185364, "learning_rate": 3.933944632587615e-05, "loss": 0.1905, "step": 21024 }, { "epoch": 0.37500445903042845, "grad_norm": 0.24864588677883148, "learning_rate": 3.933817128325671e-05, "loss": 0.1508, "step": 21025 }, { "epoch": 0.37502229515214214, "grad_norm": 0.19751842319965363, "learning_rate": 3.933689618505756e-05, "loss": 0.1384, "step": 21026 }, { "epoch": 0.3750401312738558, "grad_norm": 0.27384525537490845, "learning_rate": 3.933562103128365e-05, "loss": 0.1231, "step": 21027 }, { "epoch": 0.3750579673955695, "grad_norm": 0.4188830554485321, "learning_rate": 3.9334345821939925e-05, "loss": 0.1691, "step": 21028 }, { "epoch": 0.3750758035172832, "grad_norm": 0.20186211168766022, "learning_rate": 3.933307055703132e-05, "loss": 0.1015, "step": 21029 }, { "epoch": 0.3750936396389969, "grad_norm": 0.25390926003456116, "learning_rate": 3.9331795236562785e-05, "loss": 0.1405, "step": 21030 }, { "epoch": 0.3751114757607106, "grad_norm": 0.25701770186424255, "learning_rate": 3.933051986053926e-05, "loss": 0.1939, "step": 21031 }, { "epoch": 0.37512931188242427, "grad_norm": 0.30005180835723877, "learning_rate": 3.9329244428965684e-05, "loss": 0.1419, "step": 21032 }, { "epoch": 0.37514714800413795, "grad_norm": 0.2700938582420349, "learning_rate": 3.932796894184702e-05, "loss": 0.2098, "step": 21033 }, { "epoch": 0.3751649841258517, "grad_norm": 0.2825222909450531, "learning_rate": 3.9326693399188195e-05, "loss": 0.1904, "step": 21034 }, { "epoch": 0.3751828202475654, "grad_norm": 0.28326770663261414, "learning_rate": 3.932541780099416e-05, "loss": 0.198, "step": 21035 }, { "epoch": 0.3752006563692791, "grad_norm": 0.29205089807510376, "learning_rate": 3.932414214726985e-05, "loss": 0.1544, "step": 21036 }, { "epoch": 0.37521849249099276, "grad_norm": 0.31696927547454834, "learning_rate": 3.932286643802022e-05, "loss": 0.2181, "step": 21037 }, { "epoch": 0.37523632861270645, "grad_norm": 0.24711734056472778, "learning_rate": 3.932159067325022e-05, "loss": 0.1392, "step": 21038 }, { "epoch": 0.37525416473442014, "grad_norm": 0.3203314244747162, "learning_rate": 3.932031485296478e-05, "loss": 0.1857, "step": 21039 }, { "epoch": 0.3752720008561338, "grad_norm": 0.30566754937171936, "learning_rate": 3.9319038977168865e-05, "loss": 0.2117, "step": 21040 }, { "epoch": 0.3752898369778475, "grad_norm": 0.29654553532600403, "learning_rate": 3.9317763045867393e-05, "loss": 0.1825, "step": 21041 }, { "epoch": 0.37530767309956126, "grad_norm": 0.2398095726966858, "learning_rate": 3.9316487059065335e-05, "loss": 0.1418, "step": 21042 }, { "epoch": 0.37532550922127494, "grad_norm": 0.35327228903770447, "learning_rate": 3.931521101676763e-05, "loss": 0.14, "step": 21043 }, { "epoch": 0.37534334534298863, "grad_norm": 0.2769358456134796, "learning_rate": 3.9313934918979224e-05, "loss": 0.1257, "step": 21044 }, { "epoch": 0.3753611814647023, "grad_norm": 0.29926323890686035, "learning_rate": 3.931265876570506e-05, "loss": 0.143, "step": 21045 }, { "epoch": 0.375379017586416, "grad_norm": 0.33838826417922974, "learning_rate": 3.9311382556950084e-05, "loss": 0.2138, "step": 21046 }, { "epoch": 0.3753968537081297, "grad_norm": 0.2684604525566101, "learning_rate": 3.931010629271924e-05, "loss": 0.1789, "step": 21047 }, { "epoch": 0.3754146898298434, "grad_norm": 0.22139166295528412, "learning_rate": 3.9308829973017495e-05, "loss": 0.1723, "step": 21048 }, { "epoch": 0.3754325259515571, "grad_norm": 0.28836438059806824, "learning_rate": 3.930755359784978e-05, "loss": 0.0951, "step": 21049 }, { "epoch": 0.37545036207327076, "grad_norm": 0.38170522451400757, "learning_rate": 3.930627716722104e-05, "loss": 0.1896, "step": 21050 }, { "epoch": 0.3754681981949845, "grad_norm": 0.3336580693721771, "learning_rate": 3.9305000681136236e-05, "loss": 0.1902, "step": 21051 }, { "epoch": 0.3754860343166982, "grad_norm": 0.39005589485168457, "learning_rate": 3.93037241396003e-05, "loss": 0.1897, "step": 21052 }, { "epoch": 0.3755038704384119, "grad_norm": 0.2585568428039551, "learning_rate": 3.93024475426182e-05, "loss": 0.1839, "step": 21053 }, { "epoch": 0.37552170656012557, "grad_norm": 0.22890785336494446, "learning_rate": 3.930117089019486e-05, "loss": 0.1314, "step": 21054 }, { "epoch": 0.37553954268183926, "grad_norm": 0.20664522051811218, "learning_rate": 3.929989418233525e-05, "loss": 0.16, "step": 21055 }, { "epoch": 0.37555737880355294, "grad_norm": 0.41688215732574463, "learning_rate": 3.929861741904431e-05, "loss": 0.183, "step": 21056 }, { "epoch": 0.37557521492526663, "grad_norm": 0.208701953291893, "learning_rate": 3.9297340600326995e-05, "loss": 0.1426, "step": 21057 }, { "epoch": 0.3755930510469803, "grad_norm": 0.2092231810092926, "learning_rate": 3.9296063726188244e-05, "loss": 0.1363, "step": 21058 }, { "epoch": 0.37561088716869406, "grad_norm": 0.3435227870941162, "learning_rate": 3.9294786796633007e-05, "loss": 0.2322, "step": 21059 }, { "epoch": 0.37562872329040775, "grad_norm": 0.20513859391212463, "learning_rate": 3.929350981166625e-05, "loss": 0.1475, "step": 21060 }, { "epoch": 0.37564655941212144, "grad_norm": 0.22249682247638702, "learning_rate": 3.929223277129291e-05, "loss": 0.14, "step": 21061 }, { "epoch": 0.3756643955338351, "grad_norm": 0.267733633518219, "learning_rate": 3.9290955675517934e-05, "loss": 0.1979, "step": 21062 }, { "epoch": 0.3756822316555488, "grad_norm": 0.32979053258895874, "learning_rate": 3.9289678524346284e-05, "loss": 0.1409, "step": 21063 }, { "epoch": 0.3757000677772625, "grad_norm": 0.3158261477947235, "learning_rate": 3.92884013177829e-05, "loss": 0.1691, "step": 21064 }, { "epoch": 0.3757179038989762, "grad_norm": 0.26205506920814514, "learning_rate": 3.928712405583274e-05, "loss": 0.1657, "step": 21065 }, { "epoch": 0.3757357400206899, "grad_norm": 0.27283820509910583, "learning_rate": 3.9285846738500754e-05, "loss": 0.2025, "step": 21066 }, { "epoch": 0.3757535761424036, "grad_norm": 0.2515011429786682, "learning_rate": 3.9284569365791885e-05, "loss": 0.1875, "step": 21067 }, { "epoch": 0.3757714122641173, "grad_norm": 0.2335003763437271, "learning_rate": 3.9283291937711096e-05, "loss": 0.1596, "step": 21068 }, { "epoch": 0.375789248385831, "grad_norm": 0.24934139847755432, "learning_rate": 3.9282014454263335e-05, "loss": 0.2232, "step": 21069 }, { "epoch": 0.3758070845075447, "grad_norm": 0.3563438653945923, "learning_rate": 3.9280736915453555e-05, "loss": 0.2303, "step": 21070 }, { "epoch": 0.3758249206292584, "grad_norm": 0.23537486791610718, "learning_rate": 3.92794593212867e-05, "loss": 0.1473, "step": 21071 }, { "epoch": 0.37584275675097206, "grad_norm": 0.23724907636642456, "learning_rate": 3.927818167176773e-05, "loss": 0.1377, "step": 21072 }, { "epoch": 0.37586059287268575, "grad_norm": 0.22681060433387756, "learning_rate": 3.92769039669016e-05, "loss": 0.1606, "step": 21073 }, { "epoch": 0.37587842899439944, "grad_norm": 0.18544234335422516, "learning_rate": 3.927562620669326e-05, "loss": 0.1211, "step": 21074 }, { "epoch": 0.3758962651161131, "grad_norm": 0.2739073932170868, "learning_rate": 3.927434839114766e-05, "loss": 0.1589, "step": 21075 }, { "epoch": 0.37591410123782687, "grad_norm": 0.31435784697532654, "learning_rate": 3.927307052026975e-05, "loss": 0.1195, "step": 21076 }, { "epoch": 0.37593193735954056, "grad_norm": 0.28558459877967834, "learning_rate": 3.9271792594064495e-05, "loss": 0.2036, "step": 21077 }, { "epoch": 0.37594977348125425, "grad_norm": 0.18557113409042358, "learning_rate": 3.9270514612536844e-05, "loss": 0.1515, "step": 21078 }, { "epoch": 0.37596760960296793, "grad_norm": 0.2325652688741684, "learning_rate": 3.926923657569175e-05, "loss": 0.1551, "step": 21079 }, { "epoch": 0.3759854457246816, "grad_norm": 0.36040163040161133, "learning_rate": 3.926795848353416e-05, "loss": 0.2233, "step": 21080 }, { "epoch": 0.3760032818463953, "grad_norm": 0.21082210540771484, "learning_rate": 3.9266680336069036e-05, "loss": 0.1633, "step": 21081 }, { "epoch": 0.376021117968109, "grad_norm": 0.2819143533706665, "learning_rate": 3.926540213330133e-05, "loss": 0.2116, "step": 21082 }, { "epoch": 0.3760389540898227, "grad_norm": 0.2111992985010147, "learning_rate": 3.9264123875236006e-05, "loss": 0.1362, "step": 21083 }, { "epoch": 0.37605679021153643, "grad_norm": 0.28136852383613586, "learning_rate": 3.926284556187801e-05, "loss": 0.1426, "step": 21084 }, { "epoch": 0.3760746263332501, "grad_norm": 0.31279659271240234, "learning_rate": 3.92615671932323e-05, "loss": 0.1968, "step": 21085 }, { "epoch": 0.3760924624549638, "grad_norm": 0.24259164929389954, "learning_rate": 3.926028876930382e-05, "loss": 0.1567, "step": 21086 }, { "epoch": 0.3761102985766775, "grad_norm": 0.2827032506465912, "learning_rate": 3.925901029009754e-05, "loss": 0.1736, "step": 21087 }, { "epoch": 0.3761281346983912, "grad_norm": 0.2551811635494232, "learning_rate": 3.9257731755618414e-05, "loss": 0.1454, "step": 21088 }, { "epoch": 0.37614597082010487, "grad_norm": 0.42513418197631836, "learning_rate": 3.9256453165871397e-05, "loss": 0.1492, "step": 21089 }, { "epoch": 0.37616380694181856, "grad_norm": 0.18518340587615967, "learning_rate": 3.9255174520861436e-05, "loss": 0.107, "step": 21090 }, { "epoch": 0.37618164306353225, "grad_norm": 0.22969557344913483, "learning_rate": 3.92538958205935e-05, "loss": 0.1651, "step": 21091 }, { "epoch": 0.37619947918524593, "grad_norm": 0.19186931848526, "learning_rate": 3.925261706507254e-05, "loss": 0.1357, "step": 21092 }, { "epoch": 0.3762173153069597, "grad_norm": 0.3463195860385895, "learning_rate": 3.925133825430351e-05, "loss": 0.1844, "step": 21093 }, { "epoch": 0.37623515142867336, "grad_norm": 0.22685328125953674, "learning_rate": 3.9250059388291375e-05, "loss": 0.1736, "step": 21094 }, { "epoch": 0.37625298755038705, "grad_norm": 0.239205002784729, "learning_rate": 3.9248780467041094e-05, "loss": 0.1552, "step": 21095 }, { "epoch": 0.37627082367210074, "grad_norm": 0.24057349562644958, "learning_rate": 3.92475014905576e-05, "loss": 0.1665, "step": 21096 }, { "epoch": 0.37628865979381443, "grad_norm": 0.27467283606529236, "learning_rate": 3.924622245884588e-05, "loss": 0.153, "step": 21097 }, { "epoch": 0.3763064959155281, "grad_norm": 0.23760554194450378, "learning_rate": 3.9244943371910895e-05, "loss": 0.1503, "step": 21098 }, { "epoch": 0.3763243320372418, "grad_norm": 0.28076374530792236, "learning_rate": 3.924366422975757e-05, "loss": 0.1867, "step": 21099 }, { "epoch": 0.3763421681589555, "grad_norm": 0.24499619007110596, "learning_rate": 3.924238503239089e-05, "loss": 0.2002, "step": 21100 }, { "epoch": 0.37636000428066924, "grad_norm": 0.30110907554626465, "learning_rate": 3.924110577981581e-05, "loss": 0.2248, "step": 21101 }, { "epoch": 0.3763778404023829, "grad_norm": 0.30522486567497253, "learning_rate": 3.923982647203728e-05, "loss": 0.1622, "step": 21102 }, { "epoch": 0.3763956765240966, "grad_norm": 0.40530920028686523, "learning_rate": 3.9238547109060265e-05, "loss": 0.2179, "step": 21103 }, { "epoch": 0.3764135126458103, "grad_norm": 0.2657020688056946, "learning_rate": 3.9237267690889716e-05, "loss": 0.1418, "step": 21104 }, { "epoch": 0.376431348767524, "grad_norm": 0.26725947856903076, "learning_rate": 3.923598821753061e-05, "loss": 0.1743, "step": 21105 }, { "epoch": 0.3764491848892377, "grad_norm": 0.3668763041496277, "learning_rate": 3.9234708688987896e-05, "loss": 0.2352, "step": 21106 }, { "epoch": 0.37646702101095136, "grad_norm": 0.2298911064863205, "learning_rate": 3.923342910526653e-05, "loss": 0.168, "step": 21107 }, { "epoch": 0.37648485713266505, "grad_norm": 0.38377413153648376, "learning_rate": 3.923214946637148e-05, "loss": 0.2558, "step": 21108 }, { "epoch": 0.3765026932543788, "grad_norm": 0.2673284113407135, "learning_rate": 3.9230869772307713e-05, "loss": 0.2117, "step": 21109 }, { "epoch": 0.3765205293760925, "grad_norm": 0.21532359719276428, "learning_rate": 3.9229590023080164e-05, "loss": 0.1594, "step": 21110 }, { "epoch": 0.37653836549780617, "grad_norm": 0.2449541538953781, "learning_rate": 3.9228310218693816e-05, "loss": 0.1225, "step": 21111 }, { "epoch": 0.37655620161951986, "grad_norm": 0.37000924348831177, "learning_rate": 3.9227030359153616e-05, "loss": 0.1485, "step": 21112 }, { "epoch": 0.37657403774123355, "grad_norm": 0.31963300704956055, "learning_rate": 3.922575044446454e-05, "loss": 0.1402, "step": 21113 }, { "epoch": 0.37659187386294724, "grad_norm": 0.28932279348373413, "learning_rate": 3.9224470474631546e-05, "loss": 0.2088, "step": 21114 }, { "epoch": 0.3766097099846609, "grad_norm": 0.30094069242477417, "learning_rate": 3.922319044965958e-05, "loss": 0.1553, "step": 21115 }, { "epoch": 0.3766275461063746, "grad_norm": 0.2677748203277588, "learning_rate": 3.922191036955363e-05, "loss": 0.191, "step": 21116 }, { "epoch": 0.3766453822280883, "grad_norm": 0.24286675453186035, "learning_rate": 3.922063023431863e-05, "loss": 0.1299, "step": 21117 }, { "epoch": 0.37666321834980204, "grad_norm": 0.3031335771083832, "learning_rate": 3.9219350043959556e-05, "loss": 0.1389, "step": 21118 }, { "epoch": 0.37668105447151573, "grad_norm": 0.24761976301670074, "learning_rate": 3.921806979848137e-05, "loss": 0.1323, "step": 21119 }, { "epoch": 0.3766988905932294, "grad_norm": 0.28467342257499695, "learning_rate": 3.9216789497889046e-05, "loss": 0.1877, "step": 21120 }, { "epoch": 0.3767167267149431, "grad_norm": 0.2957324683666229, "learning_rate": 3.921550914218752e-05, "loss": 0.2204, "step": 21121 }, { "epoch": 0.3767345628366568, "grad_norm": 0.3219175636768341, "learning_rate": 3.9214228731381784e-05, "loss": 0.1894, "step": 21122 }, { "epoch": 0.3767523989583705, "grad_norm": 0.30537447333335876, "learning_rate": 3.9212948265476785e-05, "loss": 0.1536, "step": 21123 }, { "epoch": 0.37677023508008417, "grad_norm": 0.37514179944992065, "learning_rate": 3.921166774447749e-05, "loss": 0.1865, "step": 21124 }, { "epoch": 0.37678807120179786, "grad_norm": 0.3233698904514313, "learning_rate": 3.921038716838886e-05, "loss": 0.1629, "step": 21125 }, { "epoch": 0.3768059073235116, "grad_norm": 0.23803956806659698, "learning_rate": 3.9209106537215854e-05, "loss": 0.168, "step": 21126 }, { "epoch": 0.3768237434452253, "grad_norm": 0.27758491039276123, "learning_rate": 3.9207825850963454e-05, "loss": 0.1622, "step": 21127 }, { "epoch": 0.376841579566939, "grad_norm": 0.3723032772541046, "learning_rate": 3.920654510963661e-05, "loss": 0.1071, "step": 21128 }, { "epoch": 0.37685941568865267, "grad_norm": 0.2727159857749939, "learning_rate": 3.9205264313240296e-05, "loss": 0.1745, "step": 21129 }, { "epoch": 0.37687725181036635, "grad_norm": 0.31325629353523254, "learning_rate": 3.9203983461779465e-05, "loss": 0.1851, "step": 21130 }, { "epoch": 0.37689508793208004, "grad_norm": 0.25675976276397705, "learning_rate": 3.920270255525909e-05, "loss": 0.1714, "step": 21131 }, { "epoch": 0.37691292405379373, "grad_norm": 0.3355540931224823, "learning_rate": 3.920142159368413e-05, "loss": 0.1971, "step": 21132 }, { "epoch": 0.3769307601755074, "grad_norm": 0.29059579968452454, "learning_rate": 3.9200140577059566e-05, "loss": 0.1527, "step": 21133 }, { "epoch": 0.3769485962972211, "grad_norm": 0.28188809752464294, "learning_rate": 3.919885950539034e-05, "loss": 0.0655, "step": 21134 }, { "epoch": 0.37696643241893485, "grad_norm": 0.2044854313135147, "learning_rate": 3.919757837868143e-05, "loss": 0.1503, "step": 21135 }, { "epoch": 0.37698426854064854, "grad_norm": 0.3224719762802124, "learning_rate": 3.919629719693781e-05, "loss": 0.135, "step": 21136 }, { "epoch": 0.3770021046623622, "grad_norm": 0.15747763216495514, "learning_rate": 3.919501596016444e-05, "loss": 0.111, "step": 21137 }, { "epoch": 0.3770199407840759, "grad_norm": 0.1874404102563858, "learning_rate": 3.919373466836628e-05, "loss": 0.158, "step": 21138 }, { "epoch": 0.3770377769057896, "grad_norm": 0.3001151978969574, "learning_rate": 3.919245332154831e-05, "loss": 0.2188, "step": 21139 }, { "epoch": 0.3770556130275033, "grad_norm": 0.31146538257598877, "learning_rate": 3.919117191971548e-05, "loss": 0.139, "step": 21140 }, { "epoch": 0.377073449149217, "grad_norm": 0.19606070220470428, "learning_rate": 3.918989046287277e-05, "loss": 0.1604, "step": 21141 }, { "epoch": 0.37709128527093067, "grad_norm": 0.2943406105041504, "learning_rate": 3.918860895102514e-05, "loss": 0.1831, "step": 21142 }, { "epoch": 0.3771091213926444, "grad_norm": 0.254599004983902, "learning_rate": 3.9187327384177564e-05, "loss": 0.148, "step": 21143 }, { "epoch": 0.3771269575143581, "grad_norm": 0.2885769307613373, "learning_rate": 3.918604576233501e-05, "loss": 0.1687, "step": 21144 }, { "epoch": 0.3771447936360718, "grad_norm": 0.24128156900405884, "learning_rate": 3.918476408550243e-05, "loss": 0.1578, "step": 21145 }, { "epoch": 0.3771626297577855, "grad_norm": 0.3192015588283539, "learning_rate": 3.918348235368482e-05, "loss": 0.1563, "step": 21146 }, { "epoch": 0.37718046587949916, "grad_norm": 0.2293146252632141, "learning_rate": 3.9182200566887126e-05, "loss": 0.14, "step": 21147 }, { "epoch": 0.37719830200121285, "grad_norm": 0.2671649158000946, "learning_rate": 3.918091872511433e-05, "loss": 0.1686, "step": 21148 }, { "epoch": 0.37721613812292654, "grad_norm": 0.3107260465621948, "learning_rate": 3.9179636828371394e-05, "loss": 0.136, "step": 21149 }, { "epoch": 0.3772339742446402, "grad_norm": 0.22389011085033417, "learning_rate": 3.917835487666328e-05, "loss": 0.1297, "step": 21150 }, { "epoch": 0.3772518103663539, "grad_norm": 0.30652227997779846, "learning_rate": 3.917707286999497e-05, "loss": 0.1561, "step": 21151 }, { "epoch": 0.37726964648806766, "grad_norm": 0.22887754440307617, "learning_rate": 3.917579080837144e-05, "loss": 0.1541, "step": 21152 }, { "epoch": 0.37728748260978134, "grad_norm": 0.3071889877319336, "learning_rate": 3.917450869179764e-05, "loss": 0.1735, "step": 21153 }, { "epoch": 0.37730531873149503, "grad_norm": 0.2959241569042206, "learning_rate": 3.917322652027854e-05, "loss": 0.1717, "step": 21154 }, { "epoch": 0.3773231548532087, "grad_norm": 0.22360247373580933, "learning_rate": 3.917194429381913e-05, "loss": 0.1345, "step": 21155 }, { "epoch": 0.3773409909749224, "grad_norm": 0.24642226099967957, "learning_rate": 3.9170662012424364e-05, "loss": 0.1548, "step": 21156 }, { "epoch": 0.3773588270966361, "grad_norm": 0.2418583184480667, "learning_rate": 3.916937967609922e-05, "loss": 0.1735, "step": 21157 }, { "epoch": 0.3773766632183498, "grad_norm": 0.3053596317768097, "learning_rate": 3.916809728484866e-05, "loss": 0.1867, "step": 21158 }, { "epoch": 0.37739449934006347, "grad_norm": 0.25629428029060364, "learning_rate": 3.9166814838677676e-05, "loss": 0.1911, "step": 21159 }, { "epoch": 0.3774123354617772, "grad_norm": 0.26770350337028503, "learning_rate": 3.916553233759121e-05, "loss": 0.151, "step": 21160 }, { "epoch": 0.3774301715834909, "grad_norm": 0.22661587595939636, "learning_rate": 3.916424978159425e-05, "loss": 0.1535, "step": 21161 }, { "epoch": 0.3774480077052046, "grad_norm": 0.3032943606376648, "learning_rate": 3.9162967170691776e-05, "loss": 0.2057, "step": 21162 }, { "epoch": 0.3774658438269183, "grad_norm": 0.26406124234199524, "learning_rate": 3.916168450488874e-05, "loss": 0.1682, "step": 21163 }, { "epoch": 0.37748367994863197, "grad_norm": 0.27670973539352417, "learning_rate": 3.9160401784190124e-05, "loss": 0.1538, "step": 21164 }, { "epoch": 0.37750151607034566, "grad_norm": 0.24491195380687714, "learning_rate": 3.915911900860091e-05, "loss": 0.1317, "step": 21165 }, { "epoch": 0.37751935219205934, "grad_norm": 0.22537662088871002, "learning_rate": 3.915783617812605e-05, "loss": 0.1394, "step": 21166 }, { "epoch": 0.37753718831377303, "grad_norm": 0.38055962324142456, "learning_rate": 3.915655329277052e-05, "loss": 0.1363, "step": 21167 }, { "epoch": 0.3775550244354868, "grad_norm": 0.27480241656303406, "learning_rate": 3.915527035253932e-05, "loss": 0.1405, "step": 21168 }, { "epoch": 0.37757286055720046, "grad_norm": 0.17853978276252747, "learning_rate": 3.9153987357437396e-05, "loss": 0.1156, "step": 21169 }, { "epoch": 0.37759069667891415, "grad_norm": 0.2834928333759308, "learning_rate": 3.915270430746972e-05, "loss": 0.1674, "step": 21170 }, { "epoch": 0.37760853280062784, "grad_norm": 0.32998737692832947, "learning_rate": 3.915142120264128e-05, "loss": 0.1654, "step": 21171 }, { "epoch": 0.3776263689223415, "grad_norm": 0.32447749376296997, "learning_rate": 3.915013804295704e-05, "loss": 0.1997, "step": 21172 }, { "epoch": 0.3776442050440552, "grad_norm": 0.2537434995174408, "learning_rate": 3.9148854828421975e-05, "loss": 0.1497, "step": 21173 }, { "epoch": 0.3776620411657689, "grad_norm": 0.23773936927318573, "learning_rate": 3.914757155904107e-05, "loss": 0.1195, "step": 21174 }, { "epoch": 0.3776798772874826, "grad_norm": 0.26881924271583557, "learning_rate": 3.914628823481929e-05, "loss": 0.1611, "step": 21175 }, { "epoch": 0.3776977134091963, "grad_norm": 0.2859360873699188, "learning_rate": 3.9145004855761605e-05, "loss": 0.2267, "step": 21176 }, { "epoch": 0.37771554953091, "grad_norm": 0.24947260320186615, "learning_rate": 3.9143721421873006e-05, "loss": 0.1907, "step": 21177 }, { "epoch": 0.3777333856526237, "grad_norm": 0.20801378786563873, "learning_rate": 3.914243793315845e-05, "loss": 0.1439, "step": 21178 }, { "epoch": 0.3777512217743374, "grad_norm": 0.2590799331665039, "learning_rate": 3.914115438962292e-05, "loss": 0.19, "step": 21179 }, { "epoch": 0.3777690578960511, "grad_norm": 0.2983662486076355, "learning_rate": 3.913987079127139e-05, "loss": 0.1356, "step": 21180 }, { "epoch": 0.3777868940177648, "grad_norm": 0.25143465399742126, "learning_rate": 3.913858713810885e-05, "loss": 0.1909, "step": 21181 }, { "epoch": 0.37780473013947846, "grad_norm": 0.2417428195476532, "learning_rate": 3.913730343014025e-05, "loss": 0.1447, "step": 21182 }, { "epoch": 0.37782256626119215, "grad_norm": 0.21844208240509033, "learning_rate": 3.9136019667370576e-05, "loss": 0.1193, "step": 21183 }, { "epoch": 0.37784040238290584, "grad_norm": 0.28542360663414, "learning_rate": 3.913473584980482e-05, "loss": 0.1791, "step": 21184 }, { "epoch": 0.3778582385046196, "grad_norm": 0.2789161205291748, "learning_rate": 3.9133451977447933e-05, "loss": 0.1743, "step": 21185 }, { "epoch": 0.37787607462633327, "grad_norm": 0.25786641240119934, "learning_rate": 3.9132168050304904e-05, "loss": 0.1184, "step": 21186 }, { "epoch": 0.37789391074804696, "grad_norm": 0.3577745854854584, "learning_rate": 3.9130884068380724e-05, "loss": 0.1474, "step": 21187 }, { "epoch": 0.37791174686976065, "grad_norm": 0.2462194263935089, "learning_rate": 3.9129600031680346e-05, "loss": 0.1736, "step": 21188 }, { "epoch": 0.37792958299147433, "grad_norm": 0.2648969888687134, "learning_rate": 3.912831594020877e-05, "loss": 0.1365, "step": 21189 }, { "epoch": 0.377947419113188, "grad_norm": 0.22943319380283356, "learning_rate": 3.9127031793970946e-05, "loss": 0.1215, "step": 21190 }, { "epoch": 0.3779652552349017, "grad_norm": 0.45379889011383057, "learning_rate": 3.912574759297188e-05, "loss": 0.157, "step": 21191 }, { "epoch": 0.3779830913566154, "grad_norm": 0.2934167683124542, "learning_rate": 3.9124463337216535e-05, "loss": 0.1353, "step": 21192 }, { "epoch": 0.3780009274783291, "grad_norm": 0.2566176950931549, "learning_rate": 3.912317902670989e-05, "loss": 0.1437, "step": 21193 }, { "epoch": 0.37801876360004283, "grad_norm": 0.35906800627708435, "learning_rate": 3.912189466145692e-05, "loss": 0.2763, "step": 21194 }, { "epoch": 0.3780365997217565, "grad_norm": 0.2270839810371399, "learning_rate": 3.9120610241462605e-05, "loss": 0.1636, "step": 21195 }, { "epoch": 0.3780544358434702, "grad_norm": 0.23638534545898438, "learning_rate": 3.9119325766731945e-05, "loss": 0.1472, "step": 21196 }, { "epoch": 0.3780722719651839, "grad_norm": 0.2532123327255249, "learning_rate": 3.9118041237269886e-05, "loss": 0.1549, "step": 21197 }, { "epoch": 0.3780901080868976, "grad_norm": 0.21310847997665405, "learning_rate": 3.9116756653081434e-05, "loss": 0.1577, "step": 21198 }, { "epoch": 0.37810794420861127, "grad_norm": 0.33060526847839355, "learning_rate": 3.911547201417155e-05, "loss": 0.1887, "step": 21199 }, { "epoch": 0.37812578033032496, "grad_norm": 0.3737643361091614, "learning_rate": 3.911418732054522e-05, "loss": 0.1514, "step": 21200 }, { "epoch": 0.37814361645203864, "grad_norm": 0.2087615728378296, "learning_rate": 3.911290257220743e-05, "loss": 0.1471, "step": 21201 }, { "epoch": 0.3781614525737524, "grad_norm": 0.38428646326065063, "learning_rate": 3.9111617769163155e-05, "loss": 0.133, "step": 21202 }, { "epoch": 0.3781792886954661, "grad_norm": 0.3405749499797821, "learning_rate": 3.911033291141738e-05, "loss": 0.126, "step": 21203 }, { "epoch": 0.37819712481717976, "grad_norm": 0.19987717270851135, "learning_rate": 3.9109047998975076e-05, "loss": 0.1207, "step": 21204 }, { "epoch": 0.37821496093889345, "grad_norm": 0.2265322059392929, "learning_rate": 3.9107763031841226e-05, "loss": 0.1377, "step": 21205 }, { "epoch": 0.37823279706060714, "grad_norm": 0.4929906725883484, "learning_rate": 3.910647801002082e-05, "loss": 0.1679, "step": 21206 }, { "epoch": 0.37825063318232083, "grad_norm": 0.25322386622428894, "learning_rate": 3.9105192933518824e-05, "loss": 0.1363, "step": 21207 }, { "epoch": 0.3782684693040345, "grad_norm": 0.19747935235500336, "learning_rate": 3.910390780234023e-05, "loss": 0.1324, "step": 21208 }, { "epoch": 0.3782863054257482, "grad_norm": 0.22482000291347504, "learning_rate": 3.910262261649003e-05, "loss": 0.142, "step": 21209 }, { "epoch": 0.37830414154746195, "grad_norm": 0.2056960016489029, "learning_rate": 3.910133737597318e-05, "loss": 0.1479, "step": 21210 }, { "epoch": 0.37832197766917564, "grad_norm": 0.2445351928472519, "learning_rate": 3.910005208079468e-05, "loss": 0.1177, "step": 21211 }, { "epoch": 0.3783398137908893, "grad_norm": 0.3166850805282593, "learning_rate": 3.9098766730959516e-05, "loss": 0.1354, "step": 21212 }, { "epoch": 0.378357649912603, "grad_norm": 0.3053915798664093, "learning_rate": 3.909748132647265e-05, "loss": 0.1398, "step": 21213 }, { "epoch": 0.3783754860343167, "grad_norm": 0.2880917191505432, "learning_rate": 3.9096195867339085e-05, "loss": 0.1758, "step": 21214 }, { "epoch": 0.3783933221560304, "grad_norm": 0.2977958917617798, "learning_rate": 3.9094910353563795e-05, "loss": 0.1254, "step": 21215 }, { "epoch": 0.3784111582777441, "grad_norm": 0.24911907315254211, "learning_rate": 3.909362478515176e-05, "loss": 0.1166, "step": 21216 }, { "epoch": 0.37842899439945776, "grad_norm": 0.29075637459754944, "learning_rate": 3.9092339162107976e-05, "loss": 0.1511, "step": 21217 }, { "epoch": 0.37844683052117145, "grad_norm": 0.2191636562347412, "learning_rate": 3.9091053484437415e-05, "loss": 0.1333, "step": 21218 }, { "epoch": 0.3784646666428852, "grad_norm": 0.25263407826423645, "learning_rate": 3.908976775214506e-05, "loss": 0.1088, "step": 21219 }, { "epoch": 0.3784825027645989, "grad_norm": 0.18928050994873047, "learning_rate": 3.90884819652359e-05, "loss": 0.1152, "step": 21220 }, { "epoch": 0.37850033888631257, "grad_norm": 0.3013874888420105, "learning_rate": 3.908719612371492e-05, "loss": 0.154, "step": 21221 }, { "epoch": 0.37851817500802626, "grad_norm": 0.21188804507255554, "learning_rate": 3.90859102275871e-05, "loss": 0.1385, "step": 21222 }, { "epoch": 0.37853601112973995, "grad_norm": 0.3053354322910309, "learning_rate": 3.908462427685743e-05, "loss": 0.1564, "step": 21223 }, { "epoch": 0.37855384725145363, "grad_norm": 0.2779475748538971, "learning_rate": 3.908333827153089e-05, "loss": 0.1343, "step": 21224 }, { "epoch": 0.3785716833731673, "grad_norm": 0.37374675273895264, "learning_rate": 3.9082052211612464e-05, "loss": 0.1169, "step": 21225 }, { "epoch": 0.378589519494881, "grad_norm": 0.6503673195838928, "learning_rate": 3.9080766097107144e-05, "loss": 0.3509, "step": 21226 }, { "epoch": 0.37860735561659475, "grad_norm": 0.19449803233146667, "learning_rate": 3.907947992801991e-05, "loss": 0.1154, "step": 21227 }, { "epoch": 0.37862519173830844, "grad_norm": 0.2848609983921051, "learning_rate": 3.9078193704355745e-05, "loss": 0.1827, "step": 21228 }, { "epoch": 0.37864302786002213, "grad_norm": 0.2472040057182312, "learning_rate": 3.907690742611964e-05, "loss": 0.1787, "step": 21229 }, { "epoch": 0.3786608639817358, "grad_norm": 0.26020362973213196, "learning_rate": 3.907562109331658e-05, "loss": 0.1735, "step": 21230 }, { "epoch": 0.3786787001034495, "grad_norm": 0.3421153426170349, "learning_rate": 3.907433470595156e-05, "loss": 0.1924, "step": 21231 }, { "epoch": 0.3786965362251632, "grad_norm": 0.24508266150951385, "learning_rate": 3.907304826402955e-05, "loss": 0.1411, "step": 21232 }, { "epoch": 0.3787143723468769, "grad_norm": 0.2621370553970337, "learning_rate": 3.907176176755555e-05, "loss": 0.1592, "step": 21233 }, { "epoch": 0.37873220846859057, "grad_norm": 0.278695672750473, "learning_rate": 3.907047521653453e-05, "loss": 0.1405, "step": 21234 }, { "epoch": 0.37875004459030426, "grad_norm": 0.40978237986564636, "learning_rate": 3.9069188610971495e-05, "loss": 0.1605, "step": 21235 }, { "epoch": 0.378767880712018, "grad_norm": 0.3006298243999481, "learning_rate": 3.906790195087142e-05, "loss": 0.197, "step": 21236 }, { "epoch": 0.3787857168337317, "grad_norm": 0.3026232421398163, "learning_rate": 3.906661523623931e-05, "loss": 0.1473, "step": 21237 }, { "epoch": 0.3788035529554454, "grad_norm": 0.360795795917511, "learning_rate": 3.9065328467080134e-05, "loss": 0.1819, "step": 21238 }, { "epoch": 0.37882138907715907, "grad_norm": 0.29126667976379395, "learning_rate": 3.9064041643398884e-05, "loss": 0.1964, "step": 21239 }, { "epoch": 0.37883922519887275, "grad_norm": 0.26633015275001526, "learning_rate": 3.906275476520055e-05, "loss": 0.1807, "step": 21240 }, { "epoch": 0.37885706132058644, "grad_norm": 0.20632028579711914, "learning_rate": 3.9061467832490125e-05, "loss": 0.1247, "step": 21241 }, { "epoch": 0.37887489744230013, "grad_norm": 0.31353750824928284, "learning_rate": 3.90601808452726e-05, "loss": 0.1945, "step": 21242 }, { "epoch": 0.3788927335640138, "grad_norm": 0.3836959898471832, "learning_rate": 3.905889380355295e-05, "loss": 0.1782, "step": 21243 }, { "epoch": 0.37891056968572756, "grad_norm": 0.32514551281929016, "learning_rate": 3.9057606707336174e-05, "loss": 0.1609, "step": 21244 }, { "epoch": 0.37892840580744125, "grad_norm": 0.17793434858322144, "learning_rate": 3.905631955662726e-05, "loss": 0.1334, "step": 21245 }, { "epoch": 0.37894624192915494, "grad_norm": 0.22976845502853394, "learning_rate": 3.90550323514312e-05, "loss": 0.168, "step": 21246 }, { "epoch": 0.3789640780508686, "grad_norm": 0.2741779088973999, "learning_rate": 3.905374509175297e-05, "loss": 0.1788, "step": 21247 }, { "epoch": 0.3789819141725823, "grad_norm": 0.3179311752319336, "learning_rate": 3.905245777759757e-05, "loss": 0.1563, "step": 21248 }, { "epoch": 0.378999750294296, "grad_norm": 0.1751677691936493, "learning_rate": 3.905117040896999e-05, "loss": 0.1265, "step": 21249 }, { "epoch": 0.3790175864160097, "grad_norm": 0.27600058913230896, "learning_rate": 3.904988298587524e-05, "loss": 0.1717, "step": 21250 }, { "epoch": 0.3790354225377234, "grad_norm": 0.2622409760951996, "learning_rate": 3.904859550831827e-05, "loss": 0.1359, "step": 21251 }, { "epoch": 0.37905325865943706, "grad_norm": 0.2521669268608093, "learning_rate": 3.90473079763041e-05, "loss": 0.1846, "step": 21252 }, { "epoch": 0.3790710947811508, "grad_norm": 0.2424364686012268, "learning_rate": 3.904602038983771e-05, "loss": 0.1084, "step": 21253 }, { "epoch": 0.3790889309028645, "grad_norm": 0.2561042904853821, "learning_rate": 3.904473274892409e-05, "loss": 0.153, "step": 21254 }, { "epoch": 0.3791067670245782, "grad_norm": 0.3898017704486847, "learning_rate": 3.904344505356824e-05, "loss": 0.1666, "step": 21255 }, { "epoch": 0.37912460314629187, "grad_norm": 0.3199654221534729, "learning_rate": 3.904215730377515e-05, "loss": 0.1522, "step": 21256 }, { "epoch": 0.37914243926800556, "grad_norm": 0.2556706666946411, "learning_rate": 3.9040869499549806e-05, "loss": 0.1583, "step": 21257 }, { "epoch": 0.37916027538971925, "grad_norm": 0.2571445107460022, "learning_rate": 3.9039581640897206e-05, "loss": 0.1755, "step": 21258 }, { "epoch": 0.37917811151143294, "grad_norm": 0.28556835651397705, "learning_rate": 3.9038293727822326e-05, "loss": 0.1232, "step": 21259 }, { "epoch": 0.3791959476331466, "grad_norm": 0.28444764018058777, "learning_rate": 3.903700576033018e-05, "loss": 0.1542, "step": 21260 }, { "epoch": 0.37921378375486037, "grad_norm": 0.35084104537963867, "learning_rate": 3.903571773842575e-05, "loss": 0.2259, "step": 21261 }, { "epoch": 0.37923161987657406, "grad_norm": 0.2079666405916214, "learning_rate": 3.9034429662114026e-05, "loss": 0.0906, "step": 21262 }, { "epoch": 0.37924945599828774, "grad_norm": 0.2521560490131378, "learning_rate": 3.903314153140001e-05, "loss": 0.1741, "step": 21263 }, { "epoch": 0.37926729212000143, "grad_norm": 0.2655588984489441, "learning_rate": 3.903185334628869e-05, "loss": 0.1366, "step": 21264 }, { "epoch": 0.3792851282417151, "grad_norm": 0.263223797082901, "learning_rate": 3.903056510678506e-05, "loss": 0.2033, "step": 21265 }, { "epoch": 0.3793029643634288, "grad_norm": 0.2996566593647003, "learning_rate": 3.902927681289411e-05, "loss": 0.199, "step": 21266 }, { "epoch": 0.3793208004851425, "grad_norm": 0.34763407707214355, "learning_rate": 3.902798846462085e-05, "loss": 0.2062, "step": 21267 }, { "epoch": 0.3793386366068562, "grad_norm": 0.2197709083557129, "learning_rate": 3.902670006197024e-05, "loss": 0.1169, "step": 21268 }, { "epoch": 0.3793564727285699, "grad_norm": 0.23021401464939117, "learning_rate": 3.902541160494732e-05, "loss": 0.1165, "step": 21269 }, { "epoch": 0.3793743088502836, "grad_norm": 0.36089032888412476, "learning_rate": 3.902412309355704e-05, "loss": 0.1859, "step": 21270 }, { "epoch": 0.3793921449719973, "grad_norm": 0.17536687850952148, "learning_rate": 3.9022834527804425e-05, "loss": 0.1293, "step": 21271 }, { "epoch": 0.379409981093711, "grad_norm": 0.22288085520267487, "learning_rate": 3.902154590769446e-05, "loss": 0.2072, "step": 21272 }, { "epoch": 0.3794278172154247, "grad_norm": 0.32936733961105347, "learning_rate": 3.902025723323214e-05, "loss": 0.1509, "step": 21273 }, { "epoch": 0.37944565333713837, "grad_norm": 0.22451190650463104, "learning_rate": 3.901896850442246e-05, "loss": 0.1763, "step": 21274 }, { "epoch": 0.37946348945885205, "grad_norm": 0.309291809797287, "learning_rate": 3.9017679721270415e-05, "loss": 0.1725, "step": 21275 }, { "epoch": 0.37948132558056574, "grad_norm": 0.3289521038532257, "learning_rate": 3.9016390883781e-05, "loss": 0.1937, "step": 21276 }, { "epoch": 0.37949916170227943, "grad_norm": 0.26594680547714233, "learning_rate": 3.9015101991959215e-05, "loss": 0.1087, "step": 21277 }, { "epoch": 0.3795169978239932, "grad_norm": 0.35846877098083496, "learning_rate": 3.9013813045810054e-05, "loss": 0.1338, "step": 21278 }, { "epoch": 0.37953483394570686, "grad_norm": 0.23709867894649506, "learning_rate": 3.901252404533851e-05, "loss": 0.1589, "step": 21279 }, { "epoch": 0.37955267006742055, "grad_norm": 0.37287458777427673, "learning_rate": 3.901123499054959e-05, "loss": 0.1838, "step": 21280 }, { "epoch": 0.37957050618913424, "grad_norm": 0.21567387878894806, "learning_rate": 3.900994588144828e-05, "loss": 0.1388, "step": 21281 }, { "epoch": 0.3795883423108479, "grad_norm": 0.2750032842159271, "learning_rate": 3.9008656718039585e-05, "loss": 0.1665, "step": 21282 }, { "epoch": 0.3796061784325616, "grad_norm": 0.23288658261299133, "learning_rate": 3.900736750032849e-05, "loss": 0.14, "step": 21283 }, { "epoch": 0.3796240145542753, "grad_norm": 0.21842624247074127, "learning_rate": 3.900607822832001e-05, "loss": 0.1446, "step": 21284 }, { "epoch": 0.379641850675989, "grad_norm": 0.25891900062561035, "learning_rate": 3.900478890201913e-05, "loss": 0.1166, "step": 21285 }, { "epoch": 0.37965968679770273, "grad_norm": 0.28322353959083557, "learning_rate": 3.9003499521430844e-05, "loss": 0.1617, "step": 21286 }, { "epoch": 0.3796775229194164, "grad_norm": 0.24988368153572083, "learning_rate": 3.9002210086560165e-05, "loss": 0.1246, "step": 21287 }, { "epoch": 0.3796953590411301, "grad_norm": 0.2221831977367401, "learning_rate": 3.9000920597412076e-05, "loss": 0.139, "step": 21288 }, { "epoch": 0.3797131951628438, "grad_norm": 0.24445988237857819, "learning_rate": 3.899963105399159e-05, "loss": 0.1336, "step": 21289 }, { "epoch": 0.3797310312845575, "grad_norm": 0.2632139325141907, "learning_rate": 3.89983414563037e-05, "loss": 0.1645, "step": 21290 }, { "epoch": 0.3797488674062712, "grad_norm": 0.3205166757106781, "learning_rate": 3.8997051804353395e-05, "loss": 0.0936, "step": 21291 }, { "epoch": 0.37976670352798486, "grad_norm": 0.29358381032943726, "learning_rate": 3.899576209814569e-05, "loss": 0.1812, "step": 21292 }, { "epoch": 0.37978453964969855, "grad_norm": 0.2555774748325348, "learning_rate": 3.899447233768557e-05, "loss": 0.1811, "step": 21293 }, { "epoch": 0.37980237577141224, "grad_norm": 0.32090839743614197, "learning_rate": 3.899318252297805e-05, "loss": 0.2228, "step": 21294 }, { "epoch": 0.379820211893126, "grad_norm": 0.2669735848903656, "learning_rate": 3.8991892654028115e-05, "loss": 0.1243, "step": 21295 }, { "epoch": 0.37983804801483967, "grad_norm": 0.2927315831184387, "learning_rate": 3.8990602730840774e-05, "loss": 0.155, "step": 21296 }, { "epoch": 0.37985588413655336, "grad_norm": 0.2219819873571396, "learning_rate": 3.898931275342104e-05, "loss": 0.17, "step": 21297 }, { "epoch": 0.37987372025826704, "grad_norm": 0.23766762018203735, "learning_rate": 3.898802272177388e-05, "loss": 0.1575, "step": 21298 }, { "epoch": 0.37989155637998073, "grad_norm": 0.24942241609096527, "learning_rate": 3.898673263590431e-05, "loss": 0.1357, "step": 21299 }, { "epoch": 0.3799093925016944, "grad_norm": 0.30427879095077515, "learning_rate": 3.8985442495817345e-05, "loss": 0.1702, "step": 21300 }, { "epoch": 0.3799272286234081, "grad_norm": 0.2935236692428589, "learning_rate": 3.898415230151796e-05, "loss": 0.1042, "step": 21301 }, { "epoch": 0.3799450647451218, "grad_norm": 0.2680317163467407, "learning_rate": 3.898286205301118e-05, "loss": 0.1427, "step": 21302 }, { "epoch": 0.37996290086683554, "grad_norm": 0.3771883249282837, "learning_rate": 3.8981571750302e-05, "loss": 0.1681, "step": 21303 }, { "epoch": 0.37998073698854923, "grad_norm": 0.20892265439033508, "learning_rate": 3.898028139339542e-05, "loss": 0.1501, "step": 21304 }, { "epoch": 0.3799985731102629, "grad_norm": 0.23447923362255096, "learning_rate": 3.897899098229643e-05, "loss": 0.1736, "step": 21305 }, { "epoch": 0.3800164092319766, "grad_norm": 0.28292062878608704, "learning_rate": 3.897770051701005e-05, "loss": 0.1891, "step": 21306 }, { "epoch": 0.3800342453536903, "grad_norm": 0.20241063833236694, "learning_rate": 3.8976409997541276e-05, "loss": 0.1402, "step": 21307 }, { "epoch": 0.380052081475404, "grad_norm": 0.24780890345573425, "learning_rate": 3.897511942389511e-05, "loss": 0.151, "step": 21308 }, { "epoch": 0.38006991759711767, "grad_norm": 0.2711176574230194, "learning_rate": 3.897382879607655e-05, "loss": 0.1622, "step": 21309 }, { "epoch": 0.38008775371883136, "grad_norm": 0.2536943554878235, "learning_rate": 3.8972538114090605e-05, "loss": 0.1352, "step": 21310 }, { "epoch": 0.3801055898405451, "grad_norm": 0.2562722861766815, "learning_rate": 3.897124737794228e-05, "loss": 0.181, "step": 21311 }, { "epoch": 0.3801234259622588, "grad_norm": 0.3270852565765381, "learning_rate": 3.896995658763657e-05, "loss": 0.1412, "step": 21312 }, { "epoch": 0.3801412620839725, "grad_norm": 0.22758400440216064, "learning_rate": 3.8968665743178484e-05, "loss": 0.1631, "step": 21313 }, { "epoch": 0.38015909820568616, "grad_norm": 0.23497611284255981, "learning_rate": 3.8967374844573026e-05, "loss": 0.2003, "step": 21314 }, { "epoch": 0.38017693432739985, "grad_norm": 0.3024590313434601, "learning_rate": 3.89660838918252e-05, "loss": 0.1772, "step": 21315 }, { "epoch": 0.38019477044911354, "grad_norm": 0.29801487922668457, "learning_rate": 3.8964792884940004e-05, "loss": 0.1364, "step": 21316 }, { "epoch": 0.3802126065708272, "grad_norm": 0.3958665430545807, "learning_rate": 3.8963501823922456e-05, "loss": 0.2566, "step": 21317 }, { "epoch": 0.3802304426925409, "grad_norm": 0.24767561256885529, "learning_rate": 3.896221070877754e-05, "loss": 0.1649, "step": 21318 }, { "epoch": 0.3802482788142546, "grad_norm": 0.2352449893951416, "learning_rate": 3.8960919539510284e-05, "loss": 0.1362, "step": 21319 }, { "epoch": 0.38026611493596835, "grad_norm": 0.3384075164794922, "learning_rate": 3.8959628316125675e-05, "loss": 0.1882, "step": 21320 }, { "epoch": 0.38028395105768203, "grad_norm": 0.43732476234436035, "learning_rate": 3.895833703862873e-05, "loss": 0.1852, "step": 21321 }, { "epoch": 0.3803017871793957, "grad_norm": 0.2435319423675537, "learning_rate": 3.895704570702444e-05, "loss": 0.1486, "step": 21322 }, { "epoch": 0.3803196233011094, "grad_norm": 0.23220938444137573, "learning_rate": 3.8955754321317833e-05, "loss": 0.1757, "step": 21323 }, { "epoch": 0.3803374594228231, "grad_norm": 0.3116278350353241, "learning_rate": 3.89544628815139e-05, "loss": 0.1468, "step": 21324 }, { "epoch": 0.3803552955445368, "grad_norm": 0.2871105670928955, "learning_rate": 3.8953171387617644e-05, "loss": 0.1331, "step": 21325 }, { "epoch": 0.3803731316662505, "grad_norm": 0.3474808931350708, "learning_rate": 3.895187983963408e-05, "loss": 0.1122, "step": 21326 }, { "epoch": 0.38039096778796416, "grad_norm": 0.2743847370147705, "learning_rate": 3.895058823756821e-05, "loss": 0.134, "step": 21327 }, { "epoch": 0.3804088039096779, "grad_norm": 0.23974527418613434, "learning_rate": 3.8949296581425044e-05, "loss": 0.1466, "step": 21328 }, { "epoch": 0.3804266400313916, "grad_norm": 0.2747625708580017, "learning_rate": 3.8948004871209576e-05, "loss": 0.2196, "step": 21329 }, { "epoch": 0.3804444761531053, "grad_norm": 0.1899239718914032, "learning_rate": 3.894671310692684e-05, "loss": 0.0982, "step": 21330 }, { "epoch": 0.38046231227481897, "grad_norm": 0.3362436294555664, "learning_rate": 3.8945421288581807e-05, "loss": 0.1282, "step": 21331 }, { "epoch": 0.38048014839653266, "grad_norm": 0.3614468276500702, "learning_rate": 3.894412941617952e-05, "loss": 0.176, "step": 21332 }, { "epoch": 0.38049798451824635, "grad_norm": 0.2632768452167511, "learning_rate": 3.894283748972496e-05, "loss": 0.1586, "step": 21333 }, { "epoch": 0.38051582063996003, "grad_norm": 0.26188868284225464, "learning_rate": 3.894154550922315e-05, "loss": 0.1101, "step": 21334 }, { "epoch": 0.3805336567616737, "grad_norm": 0.30740755796432495, "learning_rate": 3.89402534746791e-05, "loss": 0.2337, "step": 21335 }, { "epoch": 0.3805514928833874, "grad_norm": 0.3170974552631378, "learning_rate": 3.893896138609782e-05, "loss": 0.1768, "step": 21336 }, { "epoch": 0.38056932900510115, "grad_norm": 0.3016303777694702, "learning_rate": 3.8937669243484296e-05, "loss": 0.173, "step": 21337 }, { "epoch": 0.38058716512681484, "grad_norm": 0.2653926908969879, "learning_rate": 3.893637704684356e-05, "loss": 0.1472, "step": 21338 }, { "epoch": 0.38060500124852853, "grad_norm": 0.19086246192455292, "learning_rate": 3.893508479618061e-05, "loss": 0.1832, "step": 21339 }, { "epoch": 0.3806228373702422, "grad_norm": 0.2814791798591614, "learning_rate": 3.893379249150045e-05, "loss": 0.1481, "step": 21340 }, { "epoch": 0.3806406734919559, "grad_norm": 0.24880683422088623, "learning_rate": 3.893250013280811e-05, "loss": 0.1455, "step": 21341 }, { "epoch": 0.3806585096136696, "grad_norm": 0.2275916486978531, "learning_rate": 3.893120772010859e-05, "loss": 0.1565, "step": 21342 }, { "epoch": 0.3806763457353833, "grad_norm": 0.1871194988489151, "learning_rate": 3.892991525340689e-05, "loss": 0.1073, "step": 21343 }, { "epoch": 0.38069418185709697, "grad_norm": 0.24523408710956573, "learning_rate": 3.892862273270803e-05, "loss": 0.1133, "step": 21344 }, { "epoch": 0.3807120179788107, "grad_norm": 0.1998000293970108, "learning_rate": 3.8927330158017016e-05, "loss": 0.1473, "step": 21345 }, { "epoch": 0.3807298541005244, "grad_norm": 0.23183801770210266, "learning_rate": 3.8926037529338855e-05, "loss": 0.1417, "step": 21346 }, { "epoch": 0.3807476902222381, "grad_norm": 0.3325054347515106, "learning_rate": 3.8924744846678566e-05, "loss": 0.1371, "step": 21347 }, { "epoch": 0.3807655263439518, "grad_norm": 0.23030337691307068, "learning_rate": 3.892345211004116e-05, "loss": 0.13, "step": 21348 }, { "epoch": 0.38078336246566546, "grad_norm": 0.28740453720092773, "learning_rate": 3.892215931943164e-05, "loss": 0.1826, "step": 21349 }, { "epoch": 0.38080119858737915, "grad_norm": 0.2430138736963272, "learning_rate": 3.892086647485503e-05, "loss": 0.1241, "step": 21350 }, { "epoch": 0.38081903470909284, "grad_norm": 0.22843456268310547, "learning_rate": 3.8919573576316323e-05, "loss": 0.1501, "step": 21351 }, { "epoch": 0.38083687083080653, "grad_norm": 0.3002528250217438, "learning_rate": 3.891828062382055e-05, "loss": 0.2014, "step": 21352 }, { "epoch": 0.3808547069525202, "grad_norm": 0.2832220494747162, "learning_rate": 3.891698761737271e-05, "loss": 0.2018, "step": 21353 }, { "epoch": 0.38087254307423396, "grad_norm": 0.22214335203170776, "learning_rate": 3.8915694556977825e-05, "loss": 0.1408, "step": 21354 }, { "epoch": 0.38089037919594765, "grad_norm": 0.21637408435344696, "learning_rate": 3.891440144264089e-05, "loss": 0.1366, "step": 21355 }, { "epoch": 0.38090821531766134, "grad_norm": 0.2187330573797226, "learning_rate": 3.8913108274366935e-05, "loss": 0.1432, "step": 21356 }, { "epoch": 0.380926051439375, "grad_norm": 0.42244794964790344, "learning_rate": 3.891181505216096e-05, "loss": 0.1338, "step": 21357 }, { "epoch": 0.3809438875610887, "grad_norm": 0.22175978124141693, "learning_rate": 3.8910521776027995e-05, "loss": 0.1336, "step": 21358 }, { "epoch": 0.3809617236828024, "grad_norm": 0.3732893466949463, "learning_rate": 3.8909228445973045e-05, "loss": 0.1501, "step": 21359 }, { "epoch": 0.3809795598045161, "grad_norm": 0.24530957639217377, "learning_rate": 3.8907935062001114e-05, "loss": 0.15, "step": 21360 }, { "epoch": 0.3809973959262298, "grad_norm": 0.26988792419433594, "learning_rate": 3.890664162411722e-05, "loss": 0.1167, "step": 21361 }, { "epoch": 0.3810152320479435, "grad_norm": 0.2606840431690216, "learning_rate": 3.8905348132326394e-05, "loss": 0.1496, "step": 21362 }, { "epoch": 0.3810330681696572, "grad_norm": 0.3573759198188782, "learning_rate": 3.8904054586633627e-05, "loss": 0.1985, "step": 21363 }, { "epoch": 0.3810509042913709, "grad_norm": 0.25282537937164307, "learning_rate": 3.890276098704394e-05, "loss": 0.1786, "step": 21364 }, { "epoch": 0.3810687404130846, "grad_norm": 0.24276134371757507, "learning_rate": 3.890146733356235e-05, "loss": 0.1014, "step": 21365 }, { "epoch": 0.38108657653479827, "grad_norm": 0.2656461298465729, "learning_rate": 3.890017362619387e-05, "loss": 0.167, "step": 21366 }, { "epoch": 0.38110441265651196, "grad_norm": 0.2616214454174042, "learning_rate": 3.8898879864943524e-05, "loss": 0.1631, "step": 21367 }, { "epoch": 0.38112224877822565, "grad_norm": 0.253395140171051, "learning_rate": 3.889758604981631e-05, "loss": 0.1479, "step": 21368 }, { "epoch": 0.38114008489993934, "grad_norm": 0.26738929748535156, "learning_rate": 3.889629218081726e-05, "loss": 0.1797, "step": 21369 }, { "epoch": 0.3811579210216531, "grad_norm": 0.2278267741203308, "learning_rate": 3.8894998257951376e-05, "loss": 0.1458, "step": 21370 }, { "epoch": 0.38117575714336677, "grad_norm": 0.2141774445772171, "learning_rate": 3.889370428122369e-05, "loss": 0.1265, "step": 21371 }, { "epoch": 0.38119359326508045, "grad_norm": 0.23725609481334686, "learning_rate": 3.889241025063919e-05, "loss": 0.1922, "step": 21372 }, { "epoch": 0.38121142938679414, "grad_norm": 0.2640169858932495, "learning_rate": 3.889111616620292e-05, "loss": 0.1573, "step": 21373 }, { "epoch": 0.38122926550850783, "grad_norm": 0.29436805844306946, "learning_rate": 3.888982202791989e-05, "loss": 0.1488, "step": 21374 }, { "epoch": 0.3812471016302215, "grad_norm": 0.35168540477752686, "learning_rate": 3.888852783579511e-05, "loss": 0.1973, "step": 21375 }, { "epoch": 0.3812649377519352, "grad_norm": 0.2316911369562149, "learning_rate": 3.8887233589833595e-05, "loss": 0.1076, "step": 21376 }, { "epoch": 0.3812827738736489, "grad_norm": 0.2865270972251892, "learning_rate": 3.8885939290040364e-05, "loss": 0.1776, "step": 21377 }, { "epoch": 0.3813006099953626, "grad_norm": 0.22280453145503998, "learning_rate": 3.888464493642045e-05, "loss": 0.1348, "step": 21378 }, { "epoch": 0.3813184461170763, "grad_norm": 0.3431093990802765, "learning_rate": 3.8883350528978836e-05, "loss": 0.2177, "step": 21379 }, { "epoch": 0.38133628223879, "grad_norm": 0.24846325814723969, "learning_rate": 3.8882056067720573e-05, "loss": 0.1498, "step": 21380 }, { "epoch": 0.3813541183605037, "grad_norm": 0.29790768027305603, "learning_rate": 3.888076155265066e-05, "loss": 0.1658, "step": 21381 }, { "epoch": 0.3813719544822174, "grad_norm": 0.3084394931793213, "learning_rate": 3.8879466983774124e-05, "loss": 0.1668, "step": 21382 }, { "epoch": 0.3813897906039311, "grad_norm": 0.3174588978290558, "learning_rate": 3.887817236109598e-05, "loss": 0.1738, "step": 21383 }, { "epoch": 0.38140762672564477, "grad_norm": 0.24115942418575287, "learning_rate": 3.887687768462125e-05, "loss": 0.1534, "step": 21384 }, { "epoch": 0.38142546284735845, "grad_norm": 0.2526243031024933, "learning_rate": 3.887558295435495e-05, "loss": 0.127, "step": 21385 }, { "epoch": 0.38144329896907214, "grad_norm": 0.3192991316318512, "learning_rate": 3.8874288170302095e-05, "loss": 0.1741, "step": 21386 }, { "epoch": 0.3814611350907859, "grad_norm": 0.27901574969291687, "learning_rate": 3.88729933324677e-05, "loss": 0.1616, "step": 21387 }, { "epoch": 0.3814789712124996, "grad_norm": 0.2452971637248993, "learning_rate": 3.88716984408568e-05, "loss": 0.1622, "step": 21388 }, { "epoch": 0.38149680733421326, "grad_norm": 0.422721803188324, "learning_rate": 3.8870403495474404e-05, "loss": 0.2254, "step": 21389 }, { "epoch": 0.38151464345592695, "grad_norm": 0.31347596645355225, "learning_rate": 3.8869108496325534e-05, "loss": 0.2171, "step": 21390 }, { "epoch": 0.38153247957764064, "grad_norm": 0.23665069043636322, "learning_rate": 3.886781344341521e-05, "loss": 0.1526, "step": 21391 }, { "epoch": 0.3815503156993543, "grad_norm": 0.3179226219654083, "learning_rate": 3.8866518336748445e-05, "loss": 0.1996, "step": 21392 }, { "epoch": 0.381568151821068, "grad_norm": 0.2677600383758545, "learning_rate": 3.8865223176330275e-05, "loss": 0.161, "step": 21393 }, { "epoch": 0.3815859879427817, "grad_norm": 0.253722220659256, "learning_rate": 3.8863927962165704e-05, "loss": 0.1984, "step": 21394 }, { "epoch": 0.3816038240644954, "grad_norm": 0.2372373342514038, "learning_rate": 3.886263269425976e-05, "loss": 0.1393, "step": 21395 }, { "epoch": 0.38162166018620913, "grad_norm": 0.24138881266117096, "learning_rate": 3.8861337372617466e-05, "loss": 0.181, "step": 21396 }, { "epoch": 0.3816394963079228, "grad_norm": 0.24450714886188507, "learning_rate": 3.886004199724385e-05, "loss": 0.1359, "step": 21397 }, { "epoch": 0.3816573324296365, "grad_norm": 0.20878930389881134, "learning_rate": 3.8858746568143914e-05, "loss": 0.1351, "step": 21398 }, { "epoch": 0.3816751685513502, "grad_norm": 0.31570184230804443, "learning_rate": 3.8857451085322684e-05, "loss": 0.2223, "step": 21399 }, { "epoch": 0.3816930046730639, "grad_norm": 0.3362729549407959, "learning_rate": 3.885615554878519e-05, "loss": 0.2215, "step": 21400 }, { "epoch": 0.3817108407947776, "grad_norm": 0.23103202879428864, "learning_rate": 3.885485995853646e-05, "loss": 0.1711, "step": 21401 }, { "epoch": 0.38172867691649126, "grad_norm": 0.20432880520820618, "learning_rate": 3.88535643145815e-05, "loss": 0.1724, "step": 21402 }, { "epoch": 0.38174651303820495, "grad_norm": 0.23820751905441284, "learning_rate": 3.885226861692534e-05, "loss": 0.1335, "step": 21403 }, { "epoch": 0.3817643491599187, "grad_norm": 0.2773251235485077, "learning_rate": 3.885097286557301e-05, "loss": 0.1557, "step": 21404 }, { "epoch": 0.3817821852816324, "grad_norm": 0.28145670890808105, "learning_rate": 3.884967706052952e-05, "loss": 0.1891, "step": 21405 }, { "epoch": 0.38180002140334607, "grad_norm": 0.2892676591873169, "learning_rate": 3.88483812017999e-05, "loss": 0.1466, "step": 21406 }, { "epoch": 0.38181785752505976, "grad_norm": 0.20915253460407257, "learning_rate": 3.884708528938916e-05, "loss": 0.1048, "step": 21407 }, { "epoch": 0.38183569364677344, "grad_norm": 0.27298206090927124, "learning_rate": 3.884578932330235e-05, "loss": 0.1788, "step": 21408 }, { "epoch": 0.38185352976848713, "grad_norm": 0.21241484582424164, "learning_rate": 3.884449330354447e-05, "loss": 0.1282, "step": 21409 }, { "epoch": 0.3818713658902008, "grad_norm": 0.23862679302692413, "learning_rate": 3.8843197230120555e-05, "loss": 0.1351, "step": 21410 }, { "epoch": 0.3818892020119145, "grad_norm": 0.33557847142219543, "learning_rate": 3.884190110303563e-05, "loss": 0.1403, "step": 21411 }, { "epoch": 0.38190703813362825, "grad_norm": 0.26551353931427, "learning_rate": 3.884060492229471e-05, "loss": 0.1894, "step": 21412 }, { "epoch": 0.38192487425534194, "grad_norm": 0.39905598759651184, "learning_rate": 3.883930868790282e-05, "loss": 0.2243, "step": 21413 }, { "epoch": 0.3819427103770556, "grad_norm": 0.3208553194999695, "learning_rate": 3.8838012399865006e-05, "loss": 0.1497, "step": 21414 }, { "epoch": 0.3819605464987693, "grad_norm": 0.2847328782081604, "learning_rate": 3.883671605818626e-05, "loss": 0.1947, "step": 21415 }, { "epoch": 0.381978382620483, "grad_norm": 0.3341546654701233, "learning_rate": 3.883541966287163e-05, "loss": 0.1793, "step": 21416 }, { "epoch": 0.3819962187421967, "grad_norm": 0.2877810597419739, "learning_rate": 3.883412321392614e-05, "loss": 0.1468, "step": 21417 }, { "epoch": 0.3820140548639104, "grad_norm": 0.26836660504341125, "learning_rate": 3.88328267113548e-05, "loss": 0.1618, "step": 21418 }, { "epoch": 0.38203189098562407, "grad_norm": 0.287304550409317, "learning_rate": 3.883153015516266e-05, "loss": 0.1516, "step": 21419 }, { "epoch": 0.38204972710733776, "grad_norm": 0.23285901546478271, "learning_rate": 3.883023354535472e-05, "loss": 0.1471, "step": 21420 }, { "epoch": 0.3820675632290515, "grad_norm": 0.2514244019985199, "learning_rate": 3.882893688193602e-05, "loss": 0.1369, "step": 21421 }, { "epoch": 0.3820853993507652, "grad_norm": 0.2865428328514099, "learning_rate": 3.8827640164911586e-05, "loss": 0.2377, "step": 21422 }, { "epoch": 0.3821032354724789, "grad_norm": 0.2535489499568939, "learning_rate": 3.882634339428643e-05, "loss": 0.1507, "step": 21423 }, { "epoch": 0.38212107159419256, "grad_norm": 0.26763004064559937, "learning_rate": 3.882504657006561e-05, "loss": 0.1703, "step": 21424 }, { "epoch": 0.38213890771590625, "grad_norm": 0.3215639889240265, "learning_rate": 3.882374969225413e-05, "loss": 0.1775, "step": 21425 }, { "epoch": 0.38215674383761994, "grad_norm": 0.23614788055419922, "learning_rate": 3.882245276085702e-05, "loss": 0.1262, "step": 21426 }, { "epoch": 0.3821745799593336, "grad_norm": 0.2856839895248413, "learning_rate": 3.882115577587931e-05, "loss": 0.142, "step": 21427 }, { "epoch": 0.3821924160810473, "grad_norm": 0.228141650557518, "learning_rate": 3.881985873732603e-05, "loss": 0.1919, "step": 21428 }, { "epoch": 0.38221025220276106, "grad_norm": 0.198617085814476, "learning_rate": 3.881856164520219e-05, "loss": 0.123, "step": 21429 }, { "epoch": 0.38222808832447475, "grad_norm": 0.3066501319408417, "learning_rate": 3.8817264499512846e-05, "loss": 0.1867, "step": 21430 }, { "epoch": 0.38224592444618843, "grad_norm": 0.24149169027805328, "learning_rate": 3.881596730026301e-05, "loss": 0.1022, "step": 21431 }, { "epoch": 0.3822637605679021, "grad_norm": 0.22939139604568481, "learning_rate": 3.8814670047457715e-05, "loss": 0.1746, "step": 21432 }, { "epoch": 0.3822815966896158, "grad_norm": 0.26302871108055115, "learning_rate": 3.881337274110197e-05, "loss": 0.1833, "step": 21433 }, { "epoch": 0.3822994328113295, "grad_norm": 0.2806277275085449, "learning_rate": 3.881207538120084e-05, "loss": 0.2147, "step": 21434 }, { "epoch": 0.3823172689330432, "grad_norm": 0.24292179942131042, "learning_rate": 3.881077796775933e-05, "loss": 0.1512, "step": 21435 }, { "epoch": 0.3823351050547569, "grad_norm": 0.4341593384742737, "learning_rate": 3.8809480500782474e-05, "loss": 0.1599, "step": 21436 }, { "epoch": 0.38235294117647056, "grad_norm": 0.24315306544303894, "learning_rate": 3.88081829802753e-05, "loss": 0.0894, "step": 21437 }, { "epoch": 0.3823707772981843, "grad_norm": 0.1908676028251648, "learning_rate": 3.8806885406242844e-05, "loss": 0.1528, "step": 21438 }, { "epoch": 0.382388613419898, "grad_norm": 0.30401498079299927, "learning_rate": 3.880558777869013e-05, "loss": 0.1994, "step": 21439 }, { "epoch": 0.3824064495416117, "grad_norm": 0.2861475646495819, "learning_rate": 3.880429009762219e-05, "loss": 0.1818, "step": 21440 }, { "epoch": 0.38242428566332537, "grad_norm": 0.25684410333633423, "learning_rate": 3.880299236304405e-05, "loss": 0.0947, "step": 21441 }, { "epoch": 0.38244212178503906, "grad_norm": 0.33284851908683777, "learning_rate": 3.880169457496075e-05, "loss": 0.1738, "step": 21442 }, { "epoch": 0.38245995790675275, "grad_norm": 0.3143170177936554, "learning_rate": 3.880039673337731e-05, "loss": 0.2008, "step": 21443 }, { "epoch": 0.38247779402846643, "grad_norm": 0.29654011130332947, "learning_rate": 3.879909883829877e-05, "loss": 0.1546, "step": 21444 }, { "epoch": 0.3824956301501801, "grad_norm": 0.1805470585823059, "learning_rate": 3.879780088973016e-05, "loss": 0.14, "step": 21445 }, { "epoch": 0.38251346627189386, "grad_norm": 0.2544291317462921, "learning_rate": 3.87965028876765e-05, "loss": 0.1599, "step": 21446 }, { "epoch": 0.38253130239360755, "grad_norm": 0.24353724718093872, "learning_rate": 3.879520483214283e-05, "loss": 0.1858, "step": 21447 }, { "epoch": 0.38254913851532124, "grad_norm": 0.23016461730003357, "learning_rate": 3.879390672313418e-05, "loss": 0.1382, "step": 21448 }, { "epoch": 0.38256697463703493, "grad_norm": 0.26850953698158264, "learning_rate": 3.8792608560655594e-05, "loss": 0.1737, "step": 21449 }, { "epoch": 0.3825848107587486, "grad_norm": 0.3238796889781952, "learning_rate": 3.879131034471208e-05, "loss": 0.1482, "step": 21450 }, { "epoch": 0.3826026468804623, "grad_norm": 0.3209700584411621, "learning_rate": 3.879001207530869e-05, "loss": 0.1852, "step": 21451 }, { "epoch": 0.382620483002176, "grad_norm": 0.25144535303115845, "learning_rate": 3.878871375245045e-05, "loss": 0.1725, "step": 21452 }, { "epoch": 0.3826383191238897, "grad_norm": 0.21510393917560577, "learning_rate": 3.87874153761424e-05, "loss": 0.1507, "step": 21453 }, { "epoch": 0.38265615524560337, "grad_norm": 0.258568674325943, "learning_rate": 3.878611694638955e-05, "loss": 0.1819, "step": 21454 }, { "epoch": 0.3826739913673171, "grad_norm": 0.20808269083499908, "learning_rate": 3.8784818463196956e-05, "loss": 0.1074, "step": 21455 }, { "epoch": 0.3826918274890308, "grad_norm": 0.2588508427143097, "learning_rate": 3.878351992656966e-05, "loss": 0.1693, "step": 21456 }, { "epoch": 0.3827096636107445, "grad_norm": 0.22035464644432068, "learning_rate": 3.878222133651266e-05, "loss": 0.1918, "step": 21457 }, { "epoch": 0.3827274997324582, "grad_norm": 0.2975381016731262, "learning_rate": 3.878092269303102e-05, "loss": 0.1943, "step": 21458 }, { "epoch": 0.38274533585417186, "grad_norm": 0.26722148060798645, "learning_rate": 3.8779623996129753e-05, "loss": 0.1313, "step": 21459 }, { "epoch": 0.38276317197588555, "grad_norm": 0.2953987121582031, "learning_rate": 3.877832524581392e-05, "loss": 0.1735, "step": 21460 }, { "epoch": 0.38278100809759924, "grad_norm": 0.3316297233104706, "learning_rate": 3.877702644208853e-05, "loss": 0.2455, "step": 21461 }, { "epoch": 0.38279884421931293, "grad_norm": 0.32206907868385315, "learning_rate": 3.8775727584958625e-05, "loss": 0.1248, "step": 21462 }, { "epoch": 0.38281668034102667, "grad_norm": 0.25685063004493713, "learning_rate": 3.8774428674429245e-05, "loss": 0.1143, "step": 21463 }, { "epoch": 0.38283451646274036, "grad_norm": 0.2138339728116989, "learning_rate": 3.877312971050542e-05, "loss": 0.1517, "step": 21464 }, { "epoch": 0.38285235258445405, "grad_norm": 0.2990202009677887, "learning_rate": 3.877183069319219e-05, "loss": 0.1569, "step": 21465 }, { "epoch": 0.38287018870616774, "grad_norm": 0.23563086986541748, "learning_rate": 3.8770531622494585e-05, "loss": 0.1687, "step": 21466 }, { "epoch": 0.3828880248278814, "grad_norm": 0.2641627788543701, "learning_rate": 3.8769232498417655e-05, "loss": 0.1356, "step": 21467 }, { "epoch": 0.3829058609495951, "grad_norm": 0.30582690238952637, "learning_rate": 3.876793332096641e-05, "loss": 0.1541, "step": 21468 }, { "epoch": 0.3829236970713088, "grad_norm": 0.3575577437877655, "learning_rate": 3.8766634090145904e-05, "loss": 0.1483, "step": 21469 }, { "epoch": 0.3829415331930225, "grad_norm": 0.44699031114578247, "learning_rate": 3.876533480596117e-05, "loss": 0.1528, "step": 21470 }, { "epoch": 0.38295936931473623, "grad_norm": 0.36194881796836853, "learning_rate": 3.876403546841725e-05, "loss": 0.1608, "step": 21471 }, { "epoch": 0.3829772054364499, "grad_norm": 0.24675357341766357, "learning_rate": 3.876273607751916e-05, "loss": 0.1608, "step": 21472 }, { "epoch": 0.3829950415581636, "grad_norm": 0.3369715213775635, "learning_rate": 3.876143663327196e-05, "loss": 0.217, "step": 21473 }, { "epoch": 0.3830128776798773, "grad_norm": 0.355873167514801, "learning_rate": 3.876013713568068e-05, "loss": 0.2866, "step": 21474 }, { "epoch": 0.383030713801591, "grad_norm": 0.21274901926517487, "learning_rate": 3.8758837584750354e-05, "loss": 0.1613, "step": 21475 }, { "epoch": 0.38304854992330467, "grad_norm": 0.19465862214565277, "learning_rate": 3.875753798048603e-05, "loss": 0.1117, "step": 21476 }, { "epoch": 0.38306638604501836, "grad_norm": 0.1701757162809372, "learning_rate": 3.8756238322892724e-05, "loss": 0.1219, "step": 21477 }, { "epoch": 0.38308422216673205, "grad_norm": 0.3683616816997528, "learning_rate": 3.875493861197549e-05, "loss": 0.1381, "step": 21478 }, { "epoch": 0.38310205828844573, "grad_norm": 0.2715027332305908, "learning_rate": 3.875363884773936e-05, "loss": 0.1118, "step": 21479 }, { "epoch": 0.3831198944101595, "grad_norm": 0.20807315409183502, "learning_rate": 3.8752339030189384e-05, "loss": 0.1491, "step": 21480 }, { "epoch": 0.38313773053187317, "grad_norm": 0.23150336742401123, "learning_rate": 3.875103915933059e-05, "loss": 0.1436, "step": 21481 }, { "epoch": 0.38315556665358685, "grad_norm": 0.2969799041748047, "learning_rate": 3.874973923516802e-05, "loss": 0.2224, "step": 21482 }, { "epoch": 0.38317340277530054, "grad_norm": 0.3532843589782715, "learning_rate": 3.87484392577067e-05, "loss": 0.168, "step": 21483 }, { "epoch": 0.38319123889701423, "grad_norm": 0.29249686002731323, "learning_rate": 3.87471392269517e-05, "loss": 0.1467, "step": 21484 }, { "epoch": 0.3832090750187279, "grad_norm": 0.2062867432832718, "learning_rate": 3.874583914290802e-05, "loss": 0.1426, "step": 21485 }, { "epoch": 0.3832269111404416, "grad_norm": 0.23950551450252533, "learning_rate": 3.8744539005580736e-05, "loss": 0.1638, "step": 21486 }, { "epoch": 0.3832447472621553, "grad_norm": 0.292850524187088, "learning_rate": 3.874323881497487e-05, "loss": 0.1975, "step": 21487 }, { "epoch": 0.38326258338386904, "grad_norm": 0.26524338126182556, "learning_rate": 3.874193857109545e-05, "loss": 0.1329, "step": 21488 }, { "epoch": 0.3832804195055827, "grad_norm": 0.2546485364437103, "learning_rate": 3.8740638273947535e-05, "loss": 0.1324, "step": 21489 }, { "epoch": 0.3832982556272964, "grad_norm": 0.28564849495887756, "learning_rate": 3.873933792353617e-05, "loss": 0.1533, "step": 21490 }, { "epoch": 0.3833160917490101, "grad_norm": 0.3343300521373749, "learning_rate": 3.873803751986638e-05, "loss": 0.1144, "step": 21491 }, { "epoch": 0.3833339278707238, "grad_norm": 0.19869981706142426, "learning_rate": 3.873673706294321e-05, "loss": 0.1297, "step": 21492 }, { "epoch": 0.3833517639924375, "grad_norm": 0.24130672216415405, "learning_rate": 3.87354365527717e-05, "loss": 0.1467, "step": 21493 }, { "epoch": 0.38336960011415117, "grad_norm": 0.28504040837287903, "learning_rate": 3.87341359893569e-05, "loss": 0.1615, "step": 21494 }, { "epoch": 0.38338743623586485, "grad_norm": 0.2641395926475525, "learning_rate": 3.873283537270385e-05, "loss": 0.1396, "step": 21495 }, { "epoch": 0.38340527235757854, "grad_norm": 0.2860143482685089, "learning_rate": 3.873153470281757e-05, "loss": 0.12, "step": 21496 }, { "epoch": 0.3834231084792923, "grad_norm": 0.2566232681274414, "learning_rate": 3.8730233979703136e-05, "loss": 0.1532, "step": 21497 }, { "epoch": 0.383440944601006, "grad_norm": 0.23894478380680084, "learning_rate": 3.872893320336556e-05, "loss": 0.1692, "step": 21498 }, { "epoch": 0.38345878072271966, "grad_norm": 0.2790544033050537, "learning_rate": 3.872763237380991e-05, "loss": 0.156, "step": 21499 }, { "epoch": 0.38347661684443335, "grad_norm": 0.31465715169906616, "learning_rate": 3.87263314910412e-05, "loss": 0.1715, "step": 21500 }, { "epoch": 0.38349445296614704, "grad_norm": 0.25573471188545227, "learning_rate": 3.87250305550645e-05, "loss": 0.149, "step": 21501 }, { "epoch": 0.3835122890878607, "grad_norm": 0.38926461338996887, "learning_rate": 3.872372956588484e-05, "loss": 0.1401, "step": 21502 }, { "epoch": 0.3835301252095744, "grad_norm": 0.23719745874404907, "learning_rate": 3.872242852350726e-05, "loss": 0.1439, "step": 21503 }, { "epoch": 0.3835479613312881, "grad_norm": 0.2531653940677643, "learning_rate": 3.872112742793681e-05, "loss": 0.1583, "step": 21504 }, { "epoch": 0.38356579745300184, "grad_norm": 0.22133518755435944, "learning_rate": 3.871982627917853e-05, "loss": 0.1297, "step": 21505 }, { "epoch": 0.38358363357471553, "grad_norm": 0.2650386393070221, "learning_rate": 3.8718525077237465e-05, "loss": 0.1733, "step": 21506 }, { "epoch": 0.3836014696964292, "grad_norm": 0.30449217557907104, "learning_rate": 3.871722382211866e-05, "loss": 0.11, "step": 21507 }, { "epoch": 0.3836193058181429, "grad_norm": 0.20377209782600403, "learning_rate": 3.871592251382716e-05, "loss": 0.1272, "step": 21508 }, { "epoch": 0.3836371419398566, "grad_norm": 0.2618069052696228, "learning_rate": 3.8714621152367994e-05, "loss": 0.1201, "step": 21509 }, { "epoch": 0.3836549780615703, "grad_norm": 0.3051649034023285, "learning_rate": 3.8713319737746235e-05, "loss": 0.1256, "step": 21510 }, { "epoch": 0.383672814183284, "grad_norm": 0.2645023465156555, "learning_rate": 3.87120182699669e-05, "loss": 0.1302, "step": 21511 }, { "epoch": 0.38369065030499766, "grad_norm": 0.24106624722480774, "learning_rate": 3.8710716749035056e-05, "loss": 0.1158, "step": 21512 }, { "epoch": 0.38370848642671135, "grad_norm": 0.368408739566803, "learning_rate": 3.870941517495573e-05, "loss": 0.1925, "step": 21513 }, { "epoch": 0.3837263225484251, "grad_norm": 0.2175321727991104, "learning_rate": 3.870811354773398e-05, "loss": 0.1249, "step": 21514 }, { "epoch": 0.3837441586701388, "grad_norm": 0.2704111337661743, "learning_rate": 3.870681186737485e-05, "loss": 0.177, "step": 21515 }, { "epoch": 0.38376199479185247, "grad_norm": 0.27574092149734497, "learning_rate": 3.870551013388338e-05, "loss": 0.1898, "step": 21516 }, { "epoch": 0.38377983091356616, "grad_norm": 0.29014134407043457, "learning_rate": 3.870420834726462e-05, "loss": 0.1653, "step": 21517 }, { "epoch": 0.38379766703527984, "grad_norm": 0.34705883264541626, "learning_rate": 3.870290650752362e-05, "loss": 0.212, "step": 21518 }, { "epoch": 0.38381550315699353, "grad_norm": 0.2782190442085266, "learning_rate": 3.870160461466541e-05, "loss": 0.1575, "step": 21519 }, { "epoch": 0.3838333392787072, "grad_norm": 0.25739315152168274, "learning_rate": 3.870030266869505e-05, "loss": 0.1315, "step": 21520 }, { "epoch": 0.3838511754004209, "grad_norm": 0.24090169370174408, "learning_rate": 3.86990006696176e-05, "loss": 0.1468, "step": 21521 }, { "epoch": 0.38386901152213465, "grad_norm": 0.23150403797626495, "learning_rate": 3.8697698617438075e-05, "loss": 0.1095, "step": 21522 }, { "epoch": 0.38388684764384834, "grad_norm": 0.36557114124298096, "learning_rate": 3.869639651216155e-05, "loss": 0.1641, "step": 21523 }, { "epoch": 0.383904683765562, "grad_norm": 0.30151206254959106, "learning_rate": 3.869509435379305e-05, "loss": 0.1717, "step": 21524 }, { "epoch": 0.3839225198872757, "grad_norm": 0.27847880125045776, "learning_rate": 3.869379214233765e-05, "loss": 0.1725, "step": 21525 }, { "epoch": 0.3839403560089894, "grad_norm": 0.23101796209812164, "learning_rate": 3.869248987780036e-05, "loss": 0.1308, "step": 21526 }, { "epoch": 0.3839581921307031, "grad_norm": 0.32623910903930664, "learning_rate": 3.869118756018627e-05, "loss": 0.1522, "step": 21527 }, { "epoch": 0.3839760282524168, "grad_norm": 0.2518160045146942, "learning_rate": 3.8689885189500396e-05, "loss": 0.185, "step": 21528 }, { "epoch": 0.38399386437413047, "grad_norm": 0.30759429931640625, "learning_rate": 3.868858276574781e-05, "loss": 0.1314, "step": 21529 }, { "epoch": 0.3840117004958442, "grad_norm": 0.2791420519351959, "learning_rate": 3.868728028893354e-05, "loss": 0.183, "step": 21530 }, { "epoch": 0.3840295366175579, "grad_norm": 0.29207777976989746, "learning_rate": 3.868597775906265e-05, "loss": 0.1437, "step": 21531 }, { "epoch": 0.3840473727392716, "grad_norm": 0.25608715415000916, "learning_rate": 3.868467517614018e-05, "loss": 0.1787, "step": 21532 }, { "epoch": 0.3840652088609853, "grad_norm": 0.2718445956707001, "learning_rate": 3.868337254017118e-05, "loss": 0.1427, "step": 21533 }, { "epoch": 0.38408304498269896, "grad_norm": 0.2942187190055847, "learning_rate": 3.868206985116071e-05, "loss": 0.1639, "step": 21534 }, { "epoch": 0.38410088110441265, "grad_norm": 0.2436235398054123, "learning_rate": 3.86807671091138e-05, "loss": 0.1575, "step": 21535 }, { "epoch": 0.38411871722612634, "grad_norm": 0.29770779609680176, "learning_rate": 3.867946431403552e-05, "loss": 0.1282, "step": 21536 }, { "epoch": 0.38413655334784, "grad_norm": 0.2541572153568268, "learning_rate": 3.867816146593091e-05, "loss": 0.1549, "step": 21537 }, { "epoch": 0.3841543894695537, "grad_norm": 0.22383952140808105, "learning_rate": 3.8676858564805026e-05, "loss": 0.0999, "step": 21538 }, { "epoch": 0.38417222559126746, "grad_norm": 0.23770824074745178, "learning_rate": 3.8675555610662904e-05, "loss": 0.1405, "step": 21539 }, { "epoch": 0.38419006171298115, "grad_norm": 0.2964611053466797, "learning_rate": 3.867425260350961e-05, "loss": 0.1803, "step": 21540 }, { "epoch": 0.38420789783469483, "grad_norm": 0.26499688625335693, "learning_rate": 3.867294954335019e-05, "loss": 0.1859, "step": 21541 }, { "epoch": 0.3842257339564085, "grad_norm": 0.3101528584957123, "learning_rate": 3.86716464301897e-05, "loss": 0.2197, "step": 21542 }, { "epoch": 0.3842435700781222, "grad_norm": 0.445478618144989, "learning_rate": 3.867034326403318e-05, "loss": 0.1536, "step": 21543 }, { "epoch": 0.3842614061998359, "grad_norm": 0.2813752293586731, "learning_rate": 3.8669040044885693e-05, "loss": 0.1539, "step": 21544 }, { "epoch": 0.3842792423215496, "grad_norm": 0.31148186326026917, "learning_rate": 3.8667736772752285e-05, "loss": 0.167, "step": 21545 }, { "epoch": 0.3842970784432633, "grad_norm": 0.31324324011802673, "learning_rate": 3.8666433447638e-05, "loss": 0.1531, "step": 21546 }, { "epoch": 0.384314914564977, "grad_norm": 0.27325940132141113, "learning_rate": 3.866513006954791e-05, "loss": 0.169, "step": 21547 }, { "epoch": 0.3843327506866907, "grad_norm": 0.29829758405685425, "learning_rate": 3.866382663848706e-05, "loss": 0.2055, "step": 21548 }, { "epoch": 0.3843505868084044, "grad_norm": 0.20920826494693756, "learning_rate": 3.8662523154460484e-05, "loss": 0.1349, "step": 21549 }, { "epoch": 0.3843684229301181, "grad_norm": 0.24438165128231049, "learning_rate": 3.8661219617473256e-05, "loss": 0.1426, "step": 21550 }, { "epoch": 0.38438625905183177, "grad_norm": 0.24366679787635803, "learning_rate": 3.865991602753042e-05, "loss": 0.1926, "step": 21551 }, { "epoch": 0.38440409517354546, "grad_norm": 0.3072656989097595, "learning_rate": 3.8658612384637034e-05, "loss": 0.2118, "step": 21552 }, { "epoch": 0.38442193129525914, "grad_norm": 0.22273799777030945, "learning_rate": 3.865730868879815e-05, "loss": 0.1756, "step": 21553 }, { "epoch": 0.38443976741697283, "grad_norm": 0.2541927993297577, "learning_rate": 3.8656004940018816e-05, "loss": 0.1543, "step": 21554 }, { "epoch": 0.3844576035386865, "grad_norm": 0.20942744612693787, "learning_rate": 3.865470113830409e-05, "loss": 0.1138, "step": 21555 }, { "epoch": 0.38447543966040026, "grad_norm": 0.29884278774261475, "learning_rate": 3.865339728365903e-05, "loss": 0.1976, "step": 21556 }, { "epoch": 0.38449327578211395, "grad_norm": 0.24784955382347107, "learning_rate": 3.865209337608869e-05, "loss": 0.1919, "step": 21557 }, { "epoch": 0.38451111190382764, "grad_norm": 0.26318109035491943, "learning_rate": 3.865078941559811e-05, "loss": 0.186, "step": 21558 }, { "epoch": 0.38452894802554133, "grad_norm": 0.309365451335907, "learning_rate": 3.864948540219237e-05, "loss": 0.2005, "step": 21559 }, { "epoch": 0.384546784147255, "grad_norm": 0.21352458000183105, "learning_rate": 3.86481813358765e-05, "loss": 0.1083, "step": 21560 }, { "epoch": 0.3845646202689687, "grad_norm": 0.2547975778579712, "learning_rate": 3.8646877216655566e-05, "loss": 0.1931, "step": 21561 }, { "epoch": 0.3845824563906824, "grad_norm": 0.2607114911079407, "learning_rate": 3.864557304453462e-05, "loss": 0.1353, "step": 21562 }, { "epoch": 0.3846002925123961, "grad_norm": 0.30372127890586853, "learning_rate": 3.8644268819518726e-05, "loss": 0.1907, "step": 21563 }, { "epoch": 0.3846181286341098, "grad_norm": 0.261281818151474, "learning_rate": 3.864296454161292e-05, "loss": 0.1633, "step": 21564 }, { "epoch": 0.3846359647558235, "grad_norm": 0.2646227180957794, "learning_rate": 3.864166021082229e-05, "loss": 0.1713, "step": 21565 }, { "epoch": 0.3846538008775372, "grad_norm": 0.2874150276184082, "learning_rate": 3.8640355827151865e-05, "loss": 0.2352, "step": 21566 }, { "epoch": 0.3846716369992509, "grad_norm": 0.30703097581863403, "learning_rate": 3.863905139060671e-05, "loss": 0.1595, "step": 21567 }, { "epoch": 0.3846894731209646, "grad_norm": 0.31142300367355347, "learning_rate": 3.8637746901191885e-05, "loss": 0.1671, "step": 21568 }, { "epoch": 0.38470730924267826, "grad_norm": 0.41892385482788086, "learning_rate": 3.8636442358912434e-05, "loss": 0.2343, "step": 21569 }, { "epoch": 0.38472514536439195, "grad_norm": 0.2691972851753235, "learning_rate": 3.863513776377343e-05, "loss": 0.1467, "step": 21570 }, { "epoch": 0.38474298148610564, "grad_norm": 0.22149133682250977, "learning_rate": 3.863383311577992e-05, "loss": 0.1427, "step": 21571 }, { "epoch": 0.3847608176078194, "grad_norm": 0.35529500246047974, "learning_rate": 3.863252841493696e-05, "loss": 0.0988, "step": 21572 }, { "epoch": 0.38477865372953307, "grad_norm": 0.2516630291938782, "learning_rate": 3.863122366124961e-05, "loss": 0.1523, "step": 21573 }, { "epoch": 0.38479648985124676, "grad_norm": 0.3124215602874756, "learning_rate": 3.862991885472294e-05, "loss": 0.1271, "step": 21574 }, { "epoch": 0.38481432597296045, "grad_norm": 0.33533337712287903, "learning_rate": 3.8628613995361996e-05, "loss": 0.1131, "step": 21575 }, { "epoch": 0.38483216209467414, "grad_norm": 0.2423637956380844, "learning_rate": 3.8627309083171825e-05, "loss": 0.1941, "step": 21576 }, { "epoch": 0.3848499982163878, "grad_norm": 0.24964472651481628, "learning_rate": 3.862600411815751e-05, "loss": 0.2078, "step": 21577 }, { "epoch": 0.3848678343381015, "grad_norm": 0.3183000981807709, "learning_rate": 3.862469910032409e-05, "loss": 0.1426, "step": 21578 }, { "epoch": 0.3848856704598152, "grad_norm": 0.2533881962299347, "learning_rate": 3.862339402967663e-05, "loss": 0.1065, "step": 21579 }, { "epoch": 0.3849035065815289, "grad_norm": 0.3077002763748169, "learning_rate": 3.8622088906220185e-05, "loss": 0.1535, "step": 21580 }, { "epoch": 0.38492134270324263, "grad_norm": 0.24204425513744354, "learning_rate": 3.862078372995983e-05, "loss": 0.132, "step": 21581 }, { "epoch": 0.3849391788249563, "grad_norm": 0.3703136146068573, "learning_rate": 3.861947850090061e-05, "loss": 0.1863, "step": 21582 }, { "epoch": 0.38495701494667, "grad_norm": 0.20650333166122437, "learning_rate": 3.861817321904758e-05, "loss": 0.1334, "step": 21583 }, { "epoch": 0.3849748510683837, "grad_norm": 0.22031338512897491, "learning_rate": 3.8616867884405805e-05, "loss": 0.1385, "step": 21584 }, { "epoch": 0.3849926871900974, "grad_norm": 0.20216822624206543, "learning_rate": 3.861556249698036e-05, "loss": 0.1254, "step": 21585 }, { "epoch": 0.38501052331181107, "grad_norm": 0.25917112827301025, "learning_rate": 3.861425705677629e-05, "loss": 0.1412, "step": 21586 }, { "epoch": 0.38502835943352476, "grad_norm": 0.21913515031337738, "learning_rate": 3.861295156379865e-05, "loss": 0.1074, "step": 21587 }, { "epoch": 0.38504619555523845, "grad_norm": 0.28667446970939636, "learning_rate": 3.861164601805251e-05, "loss": 0.1526, "step": 21588 }, { "epoch": 0.3850640316769522, "grad_norm": 0.31146547198295593, "learning_rate": 3.861034041954292e-05, "loss": 0.1507, "step": 21589 }, { "epoch": 0.3850818677986659, "grad_norm": 0.311732679605484, "learning_rate": 3.8609034768274965e-05, "loss": 0.1117, "step": 21590 }, { "epoch": 0.38509970392037957, "grad_norm": 0.3943082094192505, "learning_rate": 3.860772906425368e-05, "loss": 0.2095, "step": 21591 }, { "epoch": 0.38511754004209325, "grad_norm": 0.3782370388507843, "learning_rate": 3.8606423307484154e-05, "loss": 0.1876, "step": 21592 }, { "epoch": 0.38513537616380694, "grad_norm": 0.29106682538986206, "learning_rate": 3.860511749797141e-05, "loss": 0.1268, "step": 21593 }, { "epoch": 0.38515321228552063, "grad_norm": 0.2340225726366043, "learning_rate": 3.860381163572055e-05, "loss": 0.1491, "step": 21594 }, { "epoch": 0.3851710484072343, "grad_norm": 0.32674309611320496, "learning_rate": 3.86025057207366e-05, "loss": 0.1392, "step": 21595 }, { "epoch": 0.385188884528948, "grad_norm": 0.32957813143730164, "learning_rate": 3.860119975302465e-05, "loss": 0.186, "step": 21596 }, { "epoch": 0.3852067206506617, "grad_norm": 0.2759378254413605, "learning_rate": 3.8599893732589754e-05, "loss": 0.1881, "step": 21597 }, { "epoch": 0.38522455677237544, "grad_norm": 0.28699544072151184, "learning_rate": 3.859858765943697e-05, "loss": 0.1252, "step": 21598 }, { "epoch": 0.3852423928940891, "grad_norm": 0.2482403814792633, "learning_rate": 3.859728153357136e-05, "loss": 0.1755, "step": 21599 }, { "epoch": 0.3852602290158028, "grad_norm": 0.2203858345746994, "learning_rate": 3.859597535499799e-05, "loss": 0.172, "step": 21600 }, { "epoch": 0.3852780651375165, "grad_norm": 0.22645880281925201, "learning_rate": 3.8594669123721935e-05, "loss": 0.0836, "step": 21601 }, { "epoch": 0.3852959012592302, "grad_norm": 0.24230335652828217, "learning_rate": 3.859336283974824e-05, "loss": 0.1522, "step": 21602 }, { "epoch": 0.3853137373809439, "grad_norm": 0.4262154698371887, "learning_rate": 3.859205650308198e-05, "loss": 0.2049, "step": 21603 }, { "epoch": 0.38533157350265757, "grad_norm": 0.34571588039398193, "learning_rate": 3.85907501137282e-05, "loss": 0.1275, "step": 21604 }, { "epoch": 0.38534940962437125, "grad_norm": 0.27967000007629395, "learning_rate": 3.8589443671691995e-05, "loss": 0.1223, "step": 21605 }, { "epoch": 0.385367245746085, "grad_norm": 0.25240615010261536, "learning_rate": 3.85881371769784e-05, "loss": 0.1564, "step": 21606 }, { "epoch": 0.3853850818677987, "grad_norm": 0.2631133496761322, "learning_rate": 3.85868306295925e-05, "loss": 0.1977, "step": 21607 }, { "epoch": 0.3854029179895124, "grad_norm": 0.3707141876220703, "learning_rate": 3.858552402953934e-05, "loss": 0.1582, "step": 21608 }, { "epoch": 0.38542075411122606, "grad_norm": 0.24326975643634796, "learning_rate": 3.858421737682401e-05, "loss": 0.1087, "step": 21609 }, { "epoch": 0.38543859023293975, "grad_norm": 0.33008456230163574, "learning_rate": 3.8582910671451556e-05, "loss": 0.104, "step": 21610 }, { "epoch": 0.38545642635465344, "grad_norm": 0.2847519814968109, "learning_rate": 3.8581603913427054e-05, "loss": 0.1589, "step": 21611 }, { "epoch": 0.3854742624763671, "grad_norm": 0.36371302604675293, "learning_rate": 3.858029710275556e-05, "loss": 0.1704, "step": 21612 }, { "epoch": 0.3854920985980808, "grad_norm": 0.40692663192749023, "learning_rate": 3.857899023944215e-05, "loss": 0.2057, "step": 21613 }, { "epoch": 0.3855099347197945, "grad_norm": 0.34015026688575745, "learning_rate": 3.857768332349187e-05, "loss": 0.1696, "step": 21614 }, { "epoch": 0.38552777084150824, "grad_norm": 0.24905699491500854, "learning_rate": 3.857637635490981e-05, "loss": 0.1762, "step": 21615 }, { "epoch": 0.38554560696322193, "grad_norm": 0.23990298807621002, "learning_rate": 3.857506933370102e-05, "loss": 0.1239, "step": 21616 }, { "epoch": 0.3855634430849356, "grad_norm": 0.27261462807655334, "learning_rate": 3.857376225987058e-05, "loss": 0.1556, "step": 21617 }, { "epoch": 0.3855812792066493, "grad_norm": 0.2500361204147339, "learning_rate": 3.8572455133423546e-05, "loss": 0.155, "step": 21618 }, { "epoch": 0.385599115328363, "grad_norm": 0.38261857628822327, "learning_rate": 3.857114795436498e-05, "loss": 0.2161, "step": 21619 }, { "epoch": 0.3856169514500767, "grad_norm": 0.24765770137310028, "learning_rate": 3.856984072269997e-05, "loss": 0.1387, "step": 21620 }, { "epoch": 0.38563478757179037, "grad_norm": 0.29271161556243896, "learning_rate": 3.856853343843356e-05, "loss": 0.1553, "step": 21621 }, { "epoch": 0.38565262369350406, "grad_norm": 0.2441394329071045, "learning_rate": 3.856722610157084e-05, "loss": 0.1897, "step": 21622 }, { "epoch": 0.3856704598152178, "grad_norm": 0.23362718522548676, "learning_rate": 3.856591871211686e-05, "loss": 0.0922, "step": 21623 }, { "epoch": 0.3856882959369315, "grad_norm": 0.40063345432281494, "learning_rate": 3.856461127007669e-05, "loss": 0.1127, "step": 21624 }, { "epoch": 0.3857061320586452, "grad_norm": 0.36613729596138, "learning_rate": 3.856330377545541e-05, "loss": 0.1274, "step": 21625 }, { "epoch": 0.38572396818035887, "grad_norm": 0.2747527062892914, "learning_rate": 3.8561996228258076e-05, "loss": 0.1613, "step": 21626 }, { "epoch": 0.38574180430207256, "grad_norm": 0.20412592589855194, "learning_rate": 3.856068862848976e-05, "loss": 0.1519, "step": 21627 }, { "epoch": 0.38575964042378624, "grad_norm": 0.23202116787433624, "learning_rate": 3.8559380976155525e-05, "loss": 0.1891, "step": 21628 }, { "epoch": 0.38577747654549993, "grad_norm": 0.22255411744117737, "learning_rate": 3.855807327126045e-05, "loss": 0.1417, "step": 21629 }, { "epoch": 0.3857953126672136, "grad_norm": 0.3256995379924774, "learning_rate": 3.8556765513809604e-05, "loss": 0.1934, "step": 21630 }, { "epoch": 0.38581314878892736, "grad_norm": 0.2698051929473877, "learning_rate": 3.8555457703808054e-05, "loss": 0.1605, "step": 21631 }, { "epoch": 0.38583098491064105, "grad_norm": 0.21564166247844696, "learning_rate": 3.8554149841260856e-05, "loss": 0.1331, "step": 21632 }, { "epoch": 0.38584882103235474, "grad_norm": 0.28024598956108093, "learning_rate": 3.8552841926173106e-05, "loss": 0.2173, "step": 21633 }, { "epoch": 0.3858666571540684, "grad_norm": 0.5236746072769165, "learning_rate": 3.855153395854985e-05, "loss": 0.2136, "step": 21634 }, { "epoch": 0.3858844932757821, "grad_norm": 0.27546462416648865, "learning_rate": 3.8550225938396175e-05, "loss": 0.1847, "step": 21635 }, { "epoch": 0.3859023293974958, "grad_norm": 0.23911882936954498, "learning_rate": 3.854891786571714e-05, "loss": 0.1428, "step": 21636 }, { "epoch": 0.3859201655192095, "grad_norm": 0.24938642978668213, "learning_rate": 3.8547609740517824e-05, "loss": 0.1288, "step": 21637 }, { "epoch": 0.3859380016409232, "grad_norm": 0.34109604358673096, "learning_rate": 3.8546301562803286e-05, "loss": 0.1251, "step": 21638 }, { "epoch": 0.38595583776263687, "grad_norm": 0.3316737115383148, "learning_rate": 3.85449933325786e-05, "loss": 0.17, "step": 21639 }, { "epoch": 0.3859736738843506, "grad_norm": 0.31739625334739685, "learning_rate": 3.854368504984885e-05, "loss": 0.1609, "step": 21640 }, { "epoch": 0.3859915100060643, "grad_norm": 0.2375878244638443, "learning_rate": 3.85423767146191e-05, "loss": 0.1234, "step": 21641 }, { "epoch": 0.386009346127778, "grad_norm": 0.277337908744812, "learning_rate": 3.8541068326894424e-05, "loss": 0.158, "step": 21642 }, { "epoch": 0.3860271822494917, "grad_norm": 0.3270178735256195, "learning_rate": 3.8539759886679884e-05, "loss": 0.173, "step": 21643 }, { "epoch": 0.38604501837120536, "grad_norm": 0.3947116434574127, "learning_rate": 3.853845139398056e-05, "loss": 0.1369, "step": 21644 }, { "epoch": 0.38606285449291905, "grad_norm": 0.19846835732460022, "learning_rate": 3.8537142848801514e-05, "loss": 0.1601, "step": 21645 }, { "epoch": 0.38608069061463274, "grad_norm": 0.260405570268631, "learning_rate": 3.853583425114784e-05, "loss": 0.1925, "step": 21646 }, { "epoch": 0.3860985267363464, "grad_norm": 0.29649707674980164, "learning_rate": 3.853452560102459e-05, "loss": 0.1478, "step": 21647 }, { "epoch": 0.38611636285806017, "grad_norm": 0.3264923095703125, "learning_rate": 3.8533216898436845e-05, "loss": 0.1844, "step": 21648 }, { "epoch": 0.38613419897977386, "grad_norm": 0.259240984916687, "learning_rate": 3.853190814338968e-05, "loss": 0.1611, "step": 21649 }, { "epoch": 0.38615203510148755, "grad_norm": 0.25992581248283386, "learning_rate": 3.853059933588816e-05, "loss": 0.1476, "step": 21650 }, { "epoch": 0.38616987122320123, "grad_norm": 0.25727441906929016, "learning_rate": 3.8529290475937374e-05, "loss": 0.1611, "step": 21651 }, { "epoch": 0.3861877073449149, "grad_norm": 0.22839511930942535, "learning_rate": 3.852798156354237e-05, "loss": 0.1678, "step": 21652 }, { "epoch": 0.3862055434666286, "grad_norm": 0.18959090113639832, "learning_rate": 3.852667259870825e-05, "loss": 0.1034, "step": 21653 }, { "epoch": 0.3862233795883423, "grad_norm": 0.30014801025390625, "learning_rate": 3.852536358144007e-05, "loss": 0.1277, "step": 21654 }, { "epoch": 0.386241215710056, "grad_norm": 0.19943666458129883, "learning_rate": 3.852405451174291e-05, "loss": 0.1307, "step": 21655 }, { "epoch": 0.3862590518317697, "grad_norm": 0.28613272309303284, "learning_rate": 3.852274538962184e-05, "loss": 0.1565, "step": 21656 }, { "epoch": 0.3862768879534834, "grad_norm": 0.2960610091686249, "learning_rate": 3.8521436215081945e-05, "loss": 0.1585, "step": 21657 }, { "epoch": 0.3862947240751971, "grad_norm": 0.2918239235877991, "learning_rate": 3.852012698812829e-05, "loss": 0.2052, "step": 21658 }, { "epoch": 0.3863125601969108, "grad_norm": 0.38167905807495117, "learning_rate": 3.851881770876595e-05, "loss": 0.2402, "step": 21659 }, { "epoch": 0.3863303963186245, "grad_norm": 0.21273306012153625, "learning_rate": 3.8517508377000006e-05, "loss": 0.166, "step": 21660 }, { "epoch": 0.38634823244033817, "grad_norm": 0.22714021801948547, "learning_rate": 3.851619899283553e-05, "loss": 0.141, "step": 21661 }, { "epoch": 0.38636606856205186, "grad_norm": 0.232809379696846, "learning_rate": 3.85148895562776e-05, "loss": 0.1739, "step": 21662 }, { "epoch": 0.38638390468376554, "grad_norm": 0.24009500443935394, "learning_rate": 3.851358006733129e-05, "loss": 0.1675, "step": 21663 }, { "epoch": 0.38640174080547923, "grad_norm": 0.28202730417251587, "learning_rate": 3.851227052600167e-05, "loss": 0.1919, "step": 21664 }, { "epoch": 0.386419576927193, "grad_norm": 0.2808999717235565, "learning_rate": 3.8510960932293835e-05, "loss": 0.2019, "step": 21665 }, { "epoch": 0.38643741304890666, "grad_norm": 0.18273112177848816, "learning_rate": 3.850965128621284e-05, "loss": 0.1042, "step": 21666 }, { "epoch": 0.38645524917062035, "grad_norm": 0.24341915547847748, "learning_rate": 3.850834158776377e-05, "loss": 0.135, "step": 21667 }, { "epoch": 0.38647308529233404, "grad_norm": 0.30897343158721924, "learning_rate": 3.8507031836951704e-05, "loss": 0.189, "step": 21668 }, { "epoch": 0.38649092141404773, "grad_norm": 0.23671004176139832, "learning_rate": 3.850572203378172e-05, "loss": 0.1754, "step": 21669 }, { "epoch": 0.3865087575357614, "grad_norm": 0.3675598204135895, "learning_rate": 3.8504412178258886e-05, "loss": 0.1918, "step": 21670 }, { "epoch": 0.3865265936574751, "grad_norm": 0.18170593678951263, "learning_rate": 3.850310227038829e-05, "loss": 0.133, "step": 21671 }, { "epoch": 0.3865444297791888, "grad_norm": 0.21059879660606384, "learning_rate": 3.850179231017501e-05, "loss": 0.1692, "step": 21672 }, { "epoch": 0.38656226590090254, "grad_norm": 0.23930495977401733, "learning_rate": 3.850048229762412e-05, "loss": 0.1628, "step": 21673 }, { "epoch": 0.3865801020226162, "grad_norm": 0.3416757583618164, "learning_rate": 3.849917223274069e-05, "loss": 0.1497, "step": 21674 }, { "epoch": 0.3865979381443299, "grad_norm": 0.2754180431365967, "learning_rate": 3.849786211552981e-05, "loss": 0.1016, "step": 21675 }, { "epoch": 0.3866157742660436, "grad_norm": 0.24283252656459808, "learning_rate": 3.8496551945996556e-05, "loss": 0.1746, "step": 21676 }, { "epoch": 0.3866336103877573, "grad_norm": 0.3330623209476471, "learning_rate": 3.8495241724146006e-05, "loss": 0.1303, "step": 21677 }, { "epoch": 0.386651446509471, "grad_norm": 0.33630359172821045, "learning_rate": 3.849393144998324e-05, "loss": 0.1668, "step": 21678 }, { "epoch": 0.38666928263118466, "grad_norm": 0.32251837849617004, "learning_rate": 3.849262112351332e-05, "loss": 0.1187, "step": 21679 }, { "epoch": 0.38668711875289835, "grad_norm": 0.32943660020828247, "learning_rate": 3.849131074474135e-05, "loss": 0.1358, "step": 21680 }, { "epoch": 0.38670495487461204, "grad_norm": 0.24475125968456268, "learning_rate": 3.84900003136724e-05, "loss": 0.1217, "step": 21681 }, { "epoch": 0.3867227909963258, "grad_norm": 0.36403796076774597, "learning_rate": 3.8488689830311554e-05, "loss": 0.1751, "step": 21682 }, { "epoch": 0.38674062711803947, "grad_norm": 0.26338905096054077, "learning_rate": 3.8487379294663886e-05, "loss": 0.2033, "step": 21683 }, { "epoch": 0.38675846323975316, "grad_norm": 0.22861157357692719, "learning_rate": 3.8486068706734465e-05, "loss": 0.1232, "step": 21684 }, { "epoch": 0.38677629936146685, "grad_norm": 0.25246092677116394, "learning_rate": 3.84847580665284e-05, "loss": 0.1377, "step": 21685 }, { "epoch": 0.38679413548318053, "grad_norm": 0.23795294761657715, "learning_rate": 3.8483447374050746e-05, "loss": 0.1212, "step": 21686 }, { "epoch": 0.3868119716048942, "grad_norm": 0.2064659744501114, "learning_rate": 3.84821366293066e-05, "loss": 0.1455, "step": 21687 }, { "epoch": 0.3868298077266079, "grad_norm": 0.2659085690975189, "learning_rate": 3.8480825832301026e-05, "loss": 0.1492, "step": 21688 }, { "epoch": 0.3868476438483216, "grad_norm": 0.22061419486999512, "learning_rate": 3.8479514983039125e-05, "loss": 0.1379, "step": 21689 }, { "epoch": 0.38686547997003534, "grad_norm": 0.2448718398809433, "learning_rate": 3.847820408152596e-05, "loss": 0.1889, "step": 21690 }, { "epoch": 0.38688331609174903, "grad_norm": 0.5124644637107849, "learning_rate": 3.847689312776663e-05, "loss": 0.1861, "step": 21691 }, { "epoch": 0.3869011522134627, "grad_norm": 0.27622994780540466, "learning_rate": 3.84755821217662e-05, "loss": 0.1889, "step": 21692 }, { "epoch": 0.3869189883351764, "grad_norm": 0.31315430998802185, "learning_rate": 3.847427106352976e-05, "loss": 0.1445, "step": 21693 }, { "epoch": 0.3869368244568901, "grad_norm": 0.21457968652248383, "learning_rate": 3.8472959953062394e-05, "loss": 0.1384, "step": 21694 }, { "epoch": 0.3869546605786038, "grad_norm": 0.23343250155448914, "learning_rate": 3.847164879036918e-05, "loss": 0.1887, "step": 21695 }, { "epoch": 0.38697249670031747, "grad_norm": 0.33423900604248047, "learning_rate": 3.847033757545521e-05, "loss": 0.1818, "step": 21696 }, { "epoch": 0.38699033282203116, "grad_norm": 0.35718485713005066, "learning_rate": 3.846902630832555e-05, "loss": 0.1553, "step": 21697 }, { "epoch": 0.38700816894374485, "grad_norm": 0.26283907890319824, "learning_rate": 3.846771498898529e-05, "loss": 0.1542, "step": 21698 }, { "epoch": 0.3870260050654586, "grad_norm": 0.3402228057384491, "learning_rate": 3.846640361743952e-05, "loss": 0.1389, "step": 21699 }, { "epoch": 0.3870438411871723, "grad_norm": 0.26243266463279724, "learning_rate": 3.846509219369332e-05, "loss": 0.1499, "step": 21700 }, { "epoch": 0.38706167730888597, "grad_norm": 0.18809480965137482, "learning_rate": 3.846378071775176e-05, "loss": 0.1384, "step": 21701 }, { "epoch": 0.38707951343059965, "grad_norm": 0.29910749197006226, "learning_rate": 3.8462469189619955e-05, "loss": 0.1778, "step": 21702 }, { "epoch": 0.38709734955231334, "grad_norm": 0.26675617694854736, "learning_rate": 3.846115760930296e-05, "loss": 0.1842, "step": 21703 }, { "epoch": 0.38711518567402703, "grad_norm": 0.19826537370681763, "learning_rate": 3.8459845976805866e-05, "loss": 0.12, "step": 21704 }, { "epoch": 0.3871330217957407, "grad_norm": 0.23616188764572144, "learning_rate": 3.845853429213377e-05, "loss": 0.1357, "step": 21705 }, { "epoch": 0.3871508579174544, "grad_norm": 0.33836686611175537, "learning_rate": 3.845722255529173e-05, "loss": 0.1604, "step": 21706 }, { "epoch": 0.38716869403916815, "grad_norm": 0.21005497872829437, "learning_rate": 3.845591076628486e-05, "loss": 0.1221, "step": 21707 }, { "epoch": 0.38718653016088184, "grad_norm": 0.4515397548675537, "learning_rate": 3.845459892511822e-05, "loss": 0.1368, "step": 21708 }, { "epoch": 0.3872043662825955, "grad_norm": 0.320442795753479, "learning_rate": 3.845328703179692e-05, "loss": 0.1526, "step": 21709 }, { "epoch": 0.3872222024043092, "grad_norm": 0.2485407143831253, "learning_rate": 3.845197508632603e-05, "loss": 0.1396, "step": 21710 }, { "epoch": 0.3872400385260229, "grad_norm": 0.2517538368701935, "learning_rate": 3.845066308871065e-05, "loss": 0.213, "step": 21711 }, { "epoch": 0.3872578746477366, "grad_norm": 0.21894724667072296, "learning_rate": 3.8449351038955836e-05, "loss": 0.0998, "step": 21712 }, { "epoch": 0.3872757107694503, "grad_norm": 0.1558244526386261, "learning_rate": 3.84480389370667e-05, "loss": 0.1074, "step": 21713 }, { "epoch": 0.38729354689116396, "grad_norm": 0.2754209637641907, "learning_rate": 3.844672678304831e-05, "loss": 0.142, "step": 21714 }, { "epoch": 0.38731138301287765, "grad_norm": 0.24693749845027924, "learning_rate": 3.844541457690578e-05, "loss": 0.1736, "step": 21715 }, { "epoch": 0.3873292191345914, "grad_norm": 0.2691107392311096, "learning_rate": 3.8444102318644165e-05, "loss": 0.1575, "step": 21716 }, { "epoch": 0.3873470552563051, "grad_norm": 0.25201287865638733, "learning_rate": 3.8442790008268576e-05, "loss": 0.1509, "step": 21717 }, { "epoch": 0.38736489137801877, "grad_norm": 0.28409329056739807, "learning_rate": 3.8441477645784084e-05, "loss": 0.1729, "step": 21718 }, { "epoch": 0.38738272749973246, "grad_norm": 0.23766516149044037, "learning_rate": 3.844016523119578e-05, "loss": 0.1601, "step": 21719 }, { "epoch": 0.38740056362144615, "grad_norm": 0.25733527541160583, "learning_rate": 3.843885276450876e-05, "loss": 0.1327, "step": 21720 }, { "epoch": 0.38741839974315984, "grad_norm": 0.3048568069934845, "learning_rate": 3.8437540245728095e-05, "loss": 0.1091, "step": 21721 }, { "epoch": 0.3874362358648735, "grad_norm": 0.2690945863723755, "learning_rate": 3.8436227674858895e-05, "loss": 0.1946, "step": 21722 }, { "epoch": 0.3874540719865872, "grad_norm": 0.23019066452980042, "learning_rate": 3.843491505190623e-05, "loss": 0.1736, "step": 21723 }, { "epoch": 0.38747190810830096, "grad_norm": 0.18285711109638214, "learning_rate": 3.84336023768752e-05, "loss": 0.169, "step": 21724 }, { "epoch": 0.38748974423001464, "grad_norm": 0.28343191742897034, "learning_rate": 3.843228964977088e-05, "loss": 0.1573, "step": 21725 }, { "epoch": 0.38750758035172833, "grad_norm": 0.25772616267204285, "learning_rate": 3.8430976870598366e-05, "loss": 0.2121, "step": 21726 }, { "epoch": 0.387525416473442, "grad_norm": 0.24936121702194214, "learning_rate": 3.842966403936274e-05, "loss": 0.1319, "step": 21727 }, { "epoch": 0.3875432525951557, "grad_norm": 0.281258225440979, "learning_rate": 3.842835115606911e-05, "loss": 0.1184, "step": 21728 }, { "epoch": 0.3875610887168694, "grad_norm": 0.38405823707580566, "learning_rate": 3.842703822072255e-05, "loss": 0.1969, "step": 21729 }, { "epoch": 0.3875789248385831, "grad_norm": 0.3724100589752197, "learning_rate": 3.8425725233328157e-05, "loss": 0.1761, "step": 21730 }, { "epoch": 0.38759676096029677, "grad_norm": 0.2624076306819916, "learning_rate": 3.8424412193891016e-05, "loss": 0.1827, "step": 21731 }, { "epoch": 0.3876145970820105, "grad_norm": 0.19694197177886963, "learning_rate": 3.84230991024162e-05, "loss": 0.1873, "step": 21732 }, { "epoch": 0.3876324332037242, "grad_norm": 0.21732911467552185, "learning_rate": 3.8421785958908826e-05, "loss": 0.1376, "step": 21733 }, { "epoch": 0.3876502693254379, "grad_norm": 0.320049911737442, "learning_rate": 3.8420472763373976e-05, "loss": 0.1449, "step": 21734 }, { "epoch": 0.3876681054471516, "grad_norm": 0.38718122243881226, "learning_rate": 3.841915951581674e-05, "loss": 0.1507, "step": 21735 }, { "epoch": 0.38768594156886527, "grad_norm": 0.3303813934326172, "learning_rate": 3.84178462162422e-05, "loss": 0.1582, "step": 21736 }, { "epoch": 0.38770377769057895, "grad_norm": 0.3199482560157776, "learning_rate": 3.841653286465546e-05, "loss": 0.1461, "step": 21737 }, { "epoch": 0.38772161381229264, "grad_norm": 0.2577473819255829, "learning_rate": 3.8415219461061605e-05, "loss": 0.1477, "step": 21738 }, { "epoch": 0.38773944993400633, "grad_norm": 0.22839266061782837, "learning_rate": 3.8413906005465725e-05, "loss": 0.1219, "step": 21739 }, { "epoch": 0.38775728605572, "grad_norm": 0.3764893710613251, "learning_rate": 3.8412592497872905e-05, "loss": 0.174, "step": 21740 }, { "epoch": 0.38777512217743376, "grad_norm": 0.23945572972297668, "learning_rate": 3.8411278938288254e-05, "loss": 0.1756, "step": 21741 }, { "epoch": 0.38779295829914745, "grad_norm": 0.26251915097236633, "learning_rate": 3.840996532671685e-05, "loss": 0.1487, "step": 21742 }, { "epoch": 0.38781079442086114, "grad_norm": 0.2576858103275299, "learning_rate": 3.840865166316379e-05, "loss": 0.1665, "step": 21743 }, { "epoch": 0.3878286305425748, "grad_norm": 0.3027690052986145, "learning_rate": 3.840733794763416e-05, "loss": 0.1587, "step": 21744 }, { "epoch": 0.3878464666642885, "grad_norm": 0.21462975442409515, "learning_rate": 3.840602418013306e-05, "loss": 0.156, "step": 21745 }, { "epoch": 0.3878643027860022, "grad_norm": 0.19485387206077576, "learning_rate": 3.840471036066559e-05, "loss": 0.1318, "step": 21746 }, { "epoch": 0.3878821389077159, "grad_norm": 0.2827766239643097, "learning_rate": 3.8403396489236806e-05, "loss": 0.1589, "step": 21747 }, { "epoch": 0.3878999750294296, "grad_norm": 0.4065455496311188, "learning_rate": 3.840208256585185e-05, "loss": 0.166, "step": 21748 }, { "epoch": 0.3879178111511433, "grad_norm": 0.23038853704929352, "learning_rate": 3.840076859051578e-05, "loss": 0.1877, "step": 21749 }, { "epoch": 0.387935647272857, "grad_norm": 0.27545079588890076, "learning_rate": 3.8399454563233716e-05, "loss": 0.1404, "step": 21750 }, { "epoch": 0.3879534833945707, "grad_norm": 0.2805190980434418, "learning_rate": 3.839814048401074e-05, "loss": 0.13, "step": 21751 }, { "epoch": 0.3879713195162844, "grad_norm": 0.329283744096756, "learning_rate": 3.839682635285193e-05, "loss": 0.1753, "step": 21752 }, { "epoch": 0.3879891556379981, "grad_norm": 0.29797378182411194, "learning_rate": 3.8395512169762406e-05, "loss": 0.1533, "step": 21753 }, { "epoch": 0.38800699175971176, "grad_norm": 0.2928731441497803, "learning_rate": 3.839419793474723e-05, "loss": 0.2148, "step": 21754 }, { "epoch": 0.38802482788142545, "grad_norm": 0.2856592833995819, "learning_rate": 3.839288364781154e-05, "loss": 0.1616, "step": 21755 }, { "epoch": 0.38804266400313914, "grad_norm": 0.26209142804145813, "learning_rate": 3.839156930896039e-05, "loss": 0.168, "step": 21756 }, { "epoch": 0.3880605001248528, "grad_norm": 0.2429530918598175, "learning_rate": 3.839025491819891e-05, "loss": 0.2252, "step": 21757 }, { "epoch": 0.38807833624656657, "grad_norm": 0.46106863021850586, "learning_rate": 3.838894047553217e-05, "loss": 0.1843, "step": 21758 }, { "epoch": 0.38809617236828026, "grad_norm": 0.2597132623195648, "learning_rate": 3.838762598096527e-05, "loss": 0.1671, "step": 21759 }, { "epoch": 0.38811400848999394, "grad_norm": 0.2598365247249603, "learning_rate": 3.838631143450331e-05, "loss": 0.1248, "step": 21760 }, { "epoch": 0.38813184461170763, "grad_norm": 0.23313425481319427, "learning_rate": 3.8384996836151374e-05, "loss": 0.1692, "step": 21761 }, { "epoch": 0.3881496807334213, "grad_norm": 0.2174071967601776, "learning_rate": 3.838368218591457e-05, "loss": 0.1385, "step": 21762 }, { "epoch": 0.388167516855135, "grad_norm": 0.3024972975254059, "learning_rate": 3.8382367483797996e-05, "loss": 0.2051, "step": 21763 }, { "epoch": 0.3881853529768487, "grad_norm": 0.286173552274704, "learning_rate": 3.8381052729806745e-05, "loss": 0.168, "step": 21764 }, { "epoch": 0.3882031890985624, "grad_norm": 0.4113507866859436, "learning_rate": 3.837973792394591e-05, "loss": 0.1419, "step": 21765 }, { "epoch": 0.38822102522027613, "grad_norm": 0.2892780303955078, "learning_rate": 3.837842306622059e-05, "loss": 0.1706, "step": 21766 }, { "epoch": 0.3882388613419898, "grad_norm": 0.27912452816963196, "learning_rate": 3.837710815663589e-05, "loss": 0.2151, "step": 21767 }, { "epoch": 0.3882566974637035, "grad_norm": 0.3342384696006775, "learning_rate": 3.8375793195196886e-05, "loss": 0.1705, "step": 21768 }, { "epoch": 0.3882745335854172, "grad_norm": 0.2256297618150711, "learning_rate": 3.8374478181908684e-05, "loss": 0.1586, "step": 21769 }, { "epoch": 0.3882923697071309, "grad_norm": 0.1997583657503128, "learning_rate": 3.837316311677639e-05, "loss": 0.171, "step": 21770 }, { "epoch": 0.38831020582884457, "grad_norm": 0.2296144664287567, "learning_rate": 3.8371847999805096e-05, "loss": 0.159, "step": 21771 }, { "epoch": 0.38832804195055826, "grad_norm": 0.30388113856315613, "learning_rate": 3.837053283099992e-05, "loss": 0.1758, "step": 21772 }, { "epoch": 0.38834587807227194, "grad_norm": 0.2751940190792084, "learning_rate": 3.8369217610365916e-05, "loss": 0.1995, "step": 21773 }, { "epoch": 0.3883637141939857, "grad_norm": 0.27325740456581116, "learning_rate": 3.836790233790821e-05, "loss": 0.1748, "step": 21774 }, { "epoch": 0.3883815503156994, "grad_norm": 0.2639085054397583, "learning_rate": 3.836658701363191e-05, "loss": 0.189, "step": 21775 }, { "epoch": 0.38839938643741306, "grad_norm": 0.2682328224182129, "learning_rate": 3.836527163754209e-05, "loss": 0.1438, "step": 21776 }, { "epoch": 0.38841722255912675, "grad_norm": 0.2557844817638397, "learning_rate": 3.836395620964387e-05, "loss": 0.1246, "step": 21777 }, { "epoch": 0.38843505868084044, "grad_norm": 0.3033906817436218, "learning_rate": 3.836264072994233e-05, "loss": 0.1671, "step": 21778 }, { "epoch": 0.3884528948025541, "grad_norm": 0.25656500458717346, "learning_rate": 3.836132519844259e-05, "loss": 0.1269, "step": 21779 }, { "epoch": 0.3884707309242678, "grad_norm": 0.22285985946655273, "learning_rate": 3.836000961514974e-05, "loss": 0.126, "step": 21780 }, { "epoch": 0.3884885670459815, "grad_norm": 0.3256186842918396, "learning_rate": 3.835869398006887e-05, "loss": 0.201, "step": 21781 }, { "epoch": 0.3885064031676952, "grad_norm": 0.21302203834056854, "learning_rate": 3.835737829320508e-05, "loss": 0.116, "step": 21782 }, { "epoch": 0.38852423928940893, "grad_norm": 0.24312090873718262, "learning_rate": 3.83560625545635e-05, "loss": 0.1744, "step": 21783 }, { "epoch": 0.3885420754111226, "grad_norm": 0.31743359565734863, "learning_rate": 3.8354746764149194e-05, "loss": 0.1298, "step": 21784 }, { "epoch": 0.3885599115328363, "grad_norm": 0.23558802902698517, "learning_rate": 3.835343092196728e-05, "loss": 0.1825, "step": 21785 }, { "epoch": 0.38857774765455, "grad_norm": 0.21505975723266602, "learning_rate": 3.835211502802285e-05, "loss": 0.1673, "step": 21786 }, { "epoch": 0.3885955837762637, "grad_norm": 0.3972247540950775, "learning_rate": 3.835079908232102e-05, "loss": 0.1849, "step": 21787 }, { "epoch": 0.3886134198979774, "grad_norm": 0.26634714007377625, "learning_rate": 3.834948308486688e-05, "loss": 0.1688, "step": 21788 }, { "epoch": 0.38863125601969106, "grad_norm": 0.2278829962015152, "learning_rate": 3.8348167035665525e-05, "loss": 0.1511, "step": 21789 }, { "epoch": 0.38864909214140475, "grad_norm": 0.32002341747283936, "learning_rate": 3.834685093472207e-05, "loss": 0.1361, "step": 21790 }, { "epoch": 0.3886669282631185, "grad_norm": 0.2517262399196625, "learning_rate": 3.8345534782041614e-05, "loss": 0.148, "step": 21791 }, { "epoch": 0.3886847643848322, "grad_norm": 0.24844186007976532, "learning_rate": 3.834421857762925e-05, "loss": 0.168, "step": 21792 }, { "epoch": 0.38870260050654587, "grad_norm": 0.3359774947166443, "learning_rate": 3.8342902321490095e-05, "loss": 0.2133, "step": 21793 }, { "epoch": 0.38872043662825956, "grad_norm": 0.24227365851402283, "learning_rate": 3.834158601362923e-05, "loss": 0.1768, "step": 21794 }, { "epoch": 0.38873827274997325, "grad_norm": 0.23671619594097137, "learning_rate": 3.8340269654051775e-05, "loss": 0.1648, "step": 21795 }, { "epoch": 0.38875610887168693, "grad_norm": 0.3522777855396271, "learning_rate": 3.8338953242762826e-05, "loss": 0.2152, "step": 21796 }, { "epoch": 0.3887739449934006, "grad_norm": 0.21119537949562073, "learning_rate": 3.833763677976748e-05, "loss": 0.1568, "step": 21797 }, { "epoch": 0.3887917811151143, "grad_norm": 0.28719526529312134, "learning_rate": 3.8336320265070865e-05, "loss": 0.1569, "step": 21798 }, { "epoch": 0.388809617236828, "grad_norm": 0.20979127287864685, "learning_rate": 3.8335003698678053e-05, "loss": 0.1177, "step": 21799 }, { "epoch": 0.38882745335854174, "grad_norm": 0.2139885425567627, "learning_rate": 3.8333687080594175e-05, "loss": 0.1463, "step": 21800 }, { "epoch": 0.38884528948025543, "grad_norm": 0.42440083622932434, "learning_rate": 3.8332370410824305e-05, "loss": 0.1749, "step": 21801 }, { "epoch": 0.3888631256019691, "grad_norm": 0.2534075081348419, "learning_rate": 3.833105368937356e-05, "loss": 0.1405, "step": 21802 }, { "epoch": 0.3888809617236828, "grad_norm": 0.3661023676395416, "learning_rate": 3.832973691624706e-05, "loss": 0.1899, "step": 21803 }, { "epoch": 0.3888987978453965, "grad_norm": 0.3053882420063019, "learning_rate": 3.832842009144989e-05, "loss": 0.1715, "step": 21804 }, { "epoch": 0.3889166339671102, "grad_norm": 0.32161349058151245, "learning_rate": 3.8327103214987156e-05, "loss": 0.1542, "step": 21805 }, { "epoch": 0.38893447008882387, "grad_norm": 0.23700089752674103, "learning_rate": 3.832578628686397e-05, "loss": 0.1774, "step": 21806 }, { "epoch": 0.38895230621053756, "grad_norm": 0.3484228849411011, "learning_rate": 3.832446930708544e-05, "loss": 0.1781, "step": 21807 }, { "epoch": 0.3889701423322513, "grad_norm": 0.3152025640010834, "learning_rate": 3.832315227565666e-05, "loss": 0.1742, "step": 21808 }, { "epoch": 0.388987978453965, "grad_norm": 0.18587124347686768, "learning_rate": 3.832183519258274e-05, "loss": 0.1241, "step": 21809 }, { "epoch": 0.3890058145756787, "grad_norm": 0.2782955765724182, "learning_rate": 3.832051805786878e-05, "loss": 0.1613, "step": 21810 }, { "epoch": 0.38902365069739236, "grad_norm": 0.30954328179359436, "learning_rate": 3.83192008715199e-05, "loss": 0.2096, "step": 21811 }, { "epoch": 0.38904148681910605, "grad_norm": 0.2265925109386444, "learning_rate": 3.8317883633541195e-05, "loss": 0.1692, "step": 21812 }, { "epoch": 0.38905932294081974, "grad_norm": 0.22565065324306488, "learning_rate": 3.8316566343937774e-05, "loss": 0.1624, "step": 21813 }, { "epoch": 0.38907715906253343, "grad_norm": 0.35718339681625366, "learning_rate": 3.8315249002714737e-05, "loss": 0.1792, "step": 21814 }, { "epoch": 0.3890949951842471, "grad_norm": 0.2791139781475067, "learning_rate": 3.8313931609877204e-05, "loss": 0.1936, "step": 21815 }, { "epoch": 0.3891128313059608, "grad_norm": 0.2908702790737152, "learning_rate": 3.8312614165430266e-05, "loss": 0.1762, "step": 21816 }, { "epoch": 0.38913066742767455, "grad_norm": 0.3196234703063965, "learning_rate": 3.831129666937904e-05, "loss": 0.1217, "step": 21817 }, { "epoch": 0.38914850354938824, "grad_norm": 0.22735252976417542, "learning_rate": 3.830997912172863e-05, "loss": 0.1548, "step": 21818 }, { "epoch": 0.3891663396711019, "grad_norm": 0.29175421595573425, "learning_rate": 3.830866152248414e-05, "loss": 0.1526, "step": 21819 }, { "epoch": 0.3891841757928156, "grad_norm": 0.330176442861557, "learning_rate": 3.830734387165069e-05, "loss": 0.2183, "step": 21820 }, { "epoch": 0.3892020119145293, "grad_norm": 0.27319836616516113, "learning_rate": 3.8306026169233375e-05, "loss": 0.1433, "step": 21821 }, { "epoch": 0.389219848036243, "grad_norm": 0.3326437175273895, "learning_rate": 3.830470841523731e-05, "loss": 0.2051, "step": 21822 }, { "epoch": 0.3892376841579567, "grad_norm": 0.2857581079006195, "learning_rate": 3.83033906096676e-05, "loss": 0.1825, "step": 21823 }, { "epoch": 0.38925552027967036, "grad_norm": 0.22990186512470245, "learning_rate": 3.830207275252934e-05, "loss": 0.1832, "step": 21824 }, { "epoch": 0.3892733564013841, "grad_norm": 0.2547556161880493, "learning_rate": 3.830075484382767e-05, "loss": 0.2202, "step": 21825 }, { "epoch": 0.3892911925230978, "grad_norm": 0.2691248059272766, "learning_rate": 3.829943688356767e-05, "loss": 0.1813, "step": 21826 }, { "epoch": 0.3893090286448115, "grad_norm": 0.2959471046924591, "learning_rate": 3.8298118871754465e-05, "loss": 0.1736, "step": 21827 }, { "epoch": 0.38932686476652517, "grad_norm": 0.29780328273773193, "learning_rate": 3.829680080839315e-05, "loss": 0.1457, "step": 21828 }, { "epoch": 0.38934470088823886, "grad_norm": 0.26026979088783264, "learning_rate": 3.829548269348885e-05, "loss": 0.1616, "step": 21829 }, { "epoch": 0.38936253700995255, "grad_norm": 0.2784067690372467, "learning_rate": 3.829416452704666e-05, "loss": 0.214, "step": 21830 }, { "epoch": 0.38938037313166624, "grad_norm": 0.23501050472259521, "learning_rate": 3.82928463090717e-05, "loss": 0.1798, "step": 21831 }, { "epoch": 0.3893982092533799, "grad_norm": 0.2049226462841034, "learning_rate": 3.829152803956908e-05, "loss": 0.1342, "step": 21832 }, { "epoch": 0.38941604537509367, "grad_norm": 0.3493288457393646, "learning_rate": 3.8290209718543896e-05, "loss": 0.1866, "step": 21833 }, { "epoch": 0.38943388149680735, "grad_norm": 0.3064088225364685, "learning_rate": 3.828889134600128e-05, "loss": 0.1812, "step": 21834 }, { "epoch": 0.38945171761852104, "grad_norm": 0.3190014660358429, "learning_rate": 3.828757292194633e-05, "loss": 0.1919, "step": 21835 }, { "epoch": 0.38946955374023473, "grad_norm": 0.23864136636257172, "learning_rate": 3.828625444638415e-05, "loss": 0.1391, "step": 21836 }, { "epoch": 0.3894873898619484, "grad_norm": 0.34682396054267883, "learning_rate": 3.828493591931986e-05, "loss": 0.1363, "step": 21837 }, { "epoch": 0.3895052259836621, "grad_norm": 0.21826475858688354, "learning_rate": 3.8283617340758584e-05, "loss": 0.1496, "step": 21838 }, { "epoch": 0.3895230621053758, "grad_norm": 0.2667967677116394, "learning_rate": 3.828229871070541e-05, "loss": 0.1047, "step": 21839 }, { "epoch": 0.3895408982270895, "grad_norm": 0.30081990361213684, "learning_rate": 3.828098002916545e-05, "loss": 0.1719, "step": 21840 }, { "epoch": 0.38955873434880317, "grad_norm": 0.26464831829071045, "learning_rate": 3.827966129614384e-05, "loss": 0.1065, "step": 21841 }, { "epoch": 0.3895765704705169, "grad_norm": 0.2265709638595581, "learning_rate": 3.827834251164567e-05, "loss": 0.1594, "step": 21842 }, { "epoch": 0.3895944065922306, "grad_norm": 0.2632579803466797, "learning_rate": 3.827702367567606e-05, "loss": 0.1339, "step": 21843 }, { "epoch": 0.3896122427139443, "grad_norm": 0.2761947810649872, "learning_rate": 3.827570478824011e-05, "loss": 0.1955, "step": 21844 }, { "epoch": 0.389630078835658, "grad_norm": 0.27302491664886475, "learning_rate": 3.827438584934295e-05, "loss": 0.1887, "step": 21845 }, { "epoch": 0.38964791495737167, "grad_norm": 0.26912567019462585, "learning_rate": 3.827306685898968e-05, "loss": 0.1366, "step": 21846 }, { "epoch": 0.38966575107908535, "grad_norm": 0.2460840344429016, "learning_rate": 3.8271747817185434e-05, "loss": 0.0945, "step": 21847 }, { "epoch": 0.38968358720079904, "grad_norm": 0.28369140625, "learning_rate": 3.82704287239353e-05, "loss": 0.1759, "step": 21848 }, { "epoch": 0.38970142332251273, "grad_norm": 0.2833975553512573, "learning_rate": 3.82691095792444e-05, "loss": 0.1409, "step": 21849 }, { "epoch": 0.3897192594442265, "grad_norm": 0.24005232751369476, "learning_rate": 3.826779038311785e-05, "loss": 0.1578, "step": 21850 }, { "epoch": 0.38973709556594016, "grad_norm": 0.35843420028686523, "learning_rate": 3.8266471135560756e-05, "loss": 0.149, "step": 21851 }, { "epoch": 0.38975493168765385, "grad_norm": 0.2971751093864441, "learning_rate": 3.8265151836578237e-05, "loss": 0.1273, "step": 21852 }, { "epoch": 0.38977276780936754, "grad_norm": 0.3116188049316406, "learning_rate": 3.8263832486175416e-05, "loss": 0.22, "step": 21853 }, { "epoch": 0.3897906039310812, "grad_norm": 0.28640657663345337, "learning_rate": 3.82625130843574e-05, "loss": 0.15, "step": 21854 }, { "epoch": 0.3898084400527949, "grad_norm": 0.2245357185602188, "learning_rate": 3.82611936311293e-05, "loss": 0.1454, "step": 21855 }, { "epoch": 0.3898262761745086, "grad_norm": 0.23760348558425903, "learning_rate": 3.825987412649623e-05, "loss": 0.1515, "step": 21856 }, { "epoch": 0.3898441122962223, "grad_norm": 0.21389377117156982, "learning_rate": 3.8258554570463314e-05, "loss": 0.1222, "step": 21857 }, { "epoch": 0.389861948417936, "grad_norm": 0.30927756428718567, "learning_rate": 3.825723496303565e-05, "loss": 0.1904, "step": 21858 }, { "epoch": 0.3898797845396497, "grad_norm": 0.31562522053718567, "learning_rate": 3.825591530421837e-05, "loss": 0.1818, "step": 21859 }, { "epoch": 0.3898976206613634, "grad_norm": 0.20763133466243744, "learning_rate": 3.8254595594016594e-05, "loss": 0.1643, "step": 21860 }, { "epoch": 0.3899154567830771, "grad_norm": 0.27545467019081116, "learning_rate": 3.825327583243541e-05, "loss": 0.1748, "step": 21861 }, { "epoch": 0.3899332929047908, "grad_norm": 0.2428862750530243, "learning_rate": 3.825195601947997e-05, "loss": 0.1759, "step": 21862 }, { "epoch": 0.3899511290265045, "grad_norm": 0.2666897475719452, "learning_rate": 3.825063615515536e-05, "loss": 0.1404, "step": 21863 }, { "epoch": 0.38996896514821816, "grad_norm": 0.27640974521636963, "learning_rate": 3.8249316239466714e-05, "loss": 0.2102, "step": 21864 }, { "epoch": 0.38998680126993185, "grad_norm": 0.3013707995414734, "learning_rate": 3.824799627241913e-05, "loss": 0.1749, "step": 21865 }, { "epoch": 0.39000463739164554, "grad_norm": 0.21664096415042877, "learning_rate": 3.824667625401774e-05, "loss": 0.1292, "step": 21866 }, { "epoch": 0.3900224735133593, "grad_norm": 0.48859432339668274, "learning_rate": 3.8245356184267665e-05, "loss": 0.214, "step": 21867 }, { "epoch": 0.39004030963507297, "grad_norm": 0.24816346168518066, "learning_rate": 3.8244036063174015e-05, "loss": 0.1271, "step": 21868 }, { "epoch": 0.39005814575678666, "grad_norm": 0.2757461369037628, "learning_rate": 3.824271589074191e-05, "loss": 0.1222, "step": 21869 }, { "epoch": 0.39007598187850034, "grad_norm": 0.30328255891799927, "learning_rate": 3.824139566697645e-05, "loss": 0.131, "step": 21870 }, { "epoch": 0.39009381800021403, "grad_norm": 0.22096651792526245, "learning_rate": 3.824007539188278e-05, "loss": 0.1842, "step": 21871 }, { "epoch": 0.3901116541219277, "grad_norm": 0.3081246018409729, "learning_rate": 3.8238755065466e-05, "loss": 0.1937, "step": 21872 }, { "epoch": 0.3901294902436414, "grad_norm": 0.363783061504364, "learning_rate": 3.823743468773123e-05, "loss": 0.1664, "step": 21873 }, { "epoch": 0.3901473263653551, "grad_norm": 0.3148186206817627, "learning_rate": 3.8236114258683594e-05, "loss": 0.1795, "step": 21874 }, { "epoch": 0.3901651624870688, "grad_norm": 0.21325372159481049, "learning_rate": 3.8234793778328204e-05, "loss": 0.121, "step": 21875 }, { "epoch": 0.3901829986087825, "grad_norm": 0.2629788815975189, "learning_rate": 3.8233473246670196e-05, "loss": 0.1563, "step": 21876 }, { "epoch": 0.3902008347304962, "grad_norm": 0.29258981347084045, "learning_rate": 3.823215266371466e-05, "loss": 0.1693, "step": 21877 }, { "epoch": 0.3902186708522099, "grad_norm": 0.2947407066822052, "learning_rate": 3.823083202946673e-05, "loss": 0.1726, "step": 21878 }, { "epoch": 0.3902365069739236, "grad_norm": 0.25252288579940796, "learning_rate": 3.822951134393152e-05, "loss": 0.1148, "step": 21879 }, { "epoch": 0.3902543430956373, "grad_norm": 0.28613191843032837, "learning_rate": 3.822819060711418e-05, "loss": 0.1277, "step": 21880 }, { "epoch": 0.39027217921735097, "grad_norm": 0.32311660051345825, "learning_rate": 3.8226869819019786e-05, "loss": 0.2133, "step": 21881 }, { "epoch": 0.39029001533906466, "grad_norm": 0.2721821963787079, "learning_rate": 3.822554897965348e-05, "loss": 0.1831, "step": 21882 }, { "epoch": 0.39030785146077834, "grad_norm": 0.35092875361442566, "learning_rate": 3.822422808902037e-05, "loss": 0.2193, "step": 21883 }, { "epoch": 0.3903256875824921, "grad_norm": 0.2275260090827942, "learning_rate": 3.82229071471256e-05, "loss": 0.1633, "step": 21884 }, { "epoch": 0.3903435237042058, "grad_norm": 0.2982957065105438, "learning_rate": 3.822158615397426e-05, "loss": 0.1646, "step": 21885 }, { "epoch": 0.39036135982591946, "grad_norm": 0.2884788513183594, "learning_rate": 3.822026510957149e-05, "loss": 0.1416, "step": 21886 }, { "epoch": 0.39037919594763315, "grad_norm": 0.38194820284843445, "learning_rate": 3.821894401392241e-05, "loss": 0.1224, "step": 21887 }, { "epoch": 0.39039703206934684, "grad_norm": 0.2699371576309204, "learning_rate": 3.821762286703213e-05, "loss": 0.1196, "step": 21888 }, { "epoch": 0.3904148681910605, "grad_norm": 0.20406337082386017, "learning_rate": 3.821630166890579e-05, "loss": 0.1091, "step": 21889 }, { "epoch": 0.3904327043127742, "grad_norm": 0.30036500096321106, "learning_rate": 3.8214980419548495e-05, "loss": 0.1745, "step": 21890 }, { "epoch": 0.3904505404344879, "grad_norm": 0.2711452543735504, "learning_rate": 3.8213659118965375e-05, "loss": 0.1545, "step": 21891 }, { "epoch": 0.39046837655620165, "grad_norm": 0.23234771192073822, "learning_rate": 3.8212337767161536e-05, "loss": 0.1368, "step": 21892 }, { "epoch": 0.39048621267791533, "grad_norm": 0.22849218547344208, "learning_rate": 3.821101636414212e-05, "loss": 0.1523, "step": 21893 }, { "epoch": 0.390504048799629, "grad_norm": 0.3615040183067322, "learning_rate": 3.820969490991224e-05, "loss": 0.1711, "step": 21894 }, { "epoch": 0.3905218849213427, "grad_norm": 0.24644432961940765, "learning_rate": 3.820837340447703e-05, "loss": 0.1279, "step": 21895 }, { "epoch": 0.3905397210430564, "grad_norm": 0.25518178939819336, "learning_rate": 3.820705184784159e-05, "loss": 0.1138, "step": 21896 }, { "epoch": 0.3905575571647701, "grad_norm": 0.25514426827430725, "learning_rate": 3.820573024001106e-05, "loss": 0.1536, "step": 21897 }, { "epoch": 0.3905753932864838, "grad_norm": 0.2216884195804596, "learning_rate": 3.8204408580990556e-05, "loss": 0.1854, "step": 21898 }, { "epoch": 0.39059322940819746, "grad_norm": 0.2821498215198517, "learning_rate": 3.820308687078521e-05, "loss": 0.1779, "step": 21899 }, { "epoch": 0.39061106552991115, "grad_norm": 0.3352297246456146, "learning_rate": 3.8201765109400134e-05, "loss": 0.1788, "step": 21900 }, { "epoch": 0.3906289016516249, "grad_norm": 0.39840319752693176, "learning_rate": 3.820044329684046e-05, "loss": 0.206, "step": 21901 }, { "epoch": 0.3906467377733386, "grad_norm": 0.29875972867012024, "learning_rate": 3.8199121433111306e-05, "loss": 0.1897, "step": 21902 }, { "epoch": 0.39066457389505227, "grad_norm": 0.26019251346588135, "learning_rate": 3.81977995182178e-05, "loss": 0.1171, "step": 21903 }, { "epoch": 0.39068241001676596, "grad_norm": 0.3058810830116272, "learning_rate": 3.819647755216507e-05, "loss": 0.138, "step": 21904 }, { "epoch": 0.39070024613847965, "grad_norm": 0.3068974018096924, "learning_rate": 3.819515553495822e-05, "loss": 0.1239, "step": 21905 }, { "epoch": 0.39071808226019333, "grad_norm": 0.35325440764427185, "learning_rate": 3.81938334666024e-05, "loss": 0.1634, "step": 21906 }, { "epoch": 0.390735918381907, "grad_norm": 0.2938697040081024, "learning_rate": 3.8192511347102725e-05, "loss": 0.176, "step": 21907 }, { "epoch": 0.3907537545036207, "grad_norm": 0.29065683484077454, "learning_rate": 3.8191189176464316e-05, "loss": 0.1425, "step": 21908 }, { "epoch": 0.39077159062533445, "grad_norm": 0.4246031939983368, "learning_rate": 3.81898669546923e-05, "loss": 0.1649, "step": 21909 }, { "epoch": 0.39078942674704814, "grad_norm": 0.22863398492336273, "learning_rate": 3.818854468179181e-05, "loss": 0.1612, "step": 21910 }, { "epoch": 0.39080726286876183, "grad_norm": 0.27201688289642334, "learning_rate": 3.818722235776796e-05, "loss": 0.1632, "step": 21911 }, { "epoch": 0.3908250989904755, "grad_norm": 0.19823288917541504, "learning_rate": 3.818589998262589e-05, "loss": 0.152, "step": 21912 }, { "epoch": 0.3908429351121892, "grad_norm": 0.35975563526153564, "learning_rate": 3.81845775563707e-05, "loss": 0.1282, "step": 21913 }, { "epoch": 0.3908607712339029, "grad_norm": 0.22304262220859528, "learning_rate": 3.8183255079007555e-05, "loss": 0.1151, "step": 21914 }, { "epoch": 0.3908786073556166, "grad_norm": 0.3620227575302124, "learning_rate": 3.818193255054155e-05, "loss": 0.1705, "step": 21915 }, { "epoch": 0.39089644347733027, "grad_norm": 0.2189723700284958, "learning_rate": 3.818060997097782e-05, "loss": 0.1804, "step": 21916 }, { "epoch": 0.39091427959904396, "grad_norm": 0.2931801974773407, "learning_rate": 3.8179287340321494e-05, "loss": 0.1589, "step": 21917 }, { "epoch": 0.3909321157207577, "grad_norm": 0.2061789631843567, "learning_rate": 3.8177964658577706e-05, "loss": 0.1486, "step": 21918 }, { "epoch": 0.3909499518424714, "grad_norm": 0.41693368554115295, "learning_rate": 3.817664192575156e-05, "loss": 0.1699, "step": 21919 }, { "epoch": 0.3909677879641851, "grad_norm": 0.25839945673942566, "learning_rate": 3.817531914184821e-05, "loss": 0.131, "step": 21920 }, { "epoch": 0.39098562408589876, "grad_norm": 0.28907933831214905, "learning_rate": 3.817399630687277e-05, "loss": 0.1826, "step": 21921 }, { "epoch": 0.39100346020761245, "grad_norm": 0.2138252705335617, "learning_rate": 3.817267342083037e-05, "loss": 0.1296, "step": 21922 }, { "epoch": 0.39102129632932614, "grad_norm": 0.35851821303367615, "learning_rate": 3.8171350483726145e-05, "loss": 0.1647, "step": 21923 }, { "epoch": 0.39103913245103983, "grad_norm": 0.26591426134109497, "learning_rate": 3.81700274955652e-05, "loss": 0.1395, "step": 21924 }, { "epoch": 0.3910569685727535, "grad_norm": 0.37865716218948364, "learning_rate": 3.816870445635269e-05, "loss": 0.2024, "step": 21925 }, { "epoch": 0.39107480469446726, "grad_norm": 0.25254198908805847, "learning_rate": 3.8167381366093736e-05, "loss": 0.1739, "step": 21926 }, { "epoch": 0.39109264081618095, "grad_norm": 0.29420819878578186, "learning_rate": 3.816605822479346e-05, "loss": 0.1693, "step": 21927 }, { "epoch": 0.39111047693789464, "grad_norm": 0.23101702332496643, "learning_rate": 3.8164735032457e-05, "loss": 0.1448, "step": 21928 }, { "epoch": 0.3911283130596083, "grad_norm": 0.2782047688961029, "learning_rate": 3.816341178908947e-05, "loss": 0.1411, "step": 21929 }, { "epoch": 0.391146149181322, "grad_norm": 0.27685534954071045, "learning_rate": 3.816208849469601e-05, "loss": 0.1306, "step": 21930 }, { "epoch": 0.3911639853030357, "grad_norm": 0.3027855455875397, "learning_rate": 3.816076514928176e-05, "loss": 0.1756, "step": 21931 }, { "epoch": 0.3911818214247494, "grad_norm": 0.34002652764320374, "learning_rate": 3.815944175285183e-05, "loss": 0.1714, "step": 21932 }, { "epoch": 0.3911996575464631, "grad_norm": 0.24387463927268982, "learning_rate": 3.815811830541136e-05, "loss": 0.1842, "step": 21933 }, { "epoch": 0.3912174936681768, "grad_norm": 0.28653308749198914, "learning_rate": 3.815679480696548e-05, "loss": 0.1682, "step": 21934 }, { "epoch": 0.3912353297898905, "grad_norm": 0.18379320204257965, "learning_rate": 3.815547125751931e-05, "loss": 0.1411, "step": 21935 }, { "epoch": 0.3912531659116042, "grad_norm": 0.29890039563179016, "learning_rate": 3.815414765707801e-05, "loss": 0.1422, "step": 21936 }, { "epoch": 0.3912710020333179, "grad_norm": 0.2728569507598877, "learning_rate": 3.815282400564668e-05, "loss": 0.1703, "step": 21937 }, { "epoch": 0.39128883815503157, "grad_norm": 0.2689141631126404, "learning_rate": 3.815150030323046e-05, "loss": 0.1852, "step": 21938 }, { "epoch": 0.39130667427674526, "grad_norm": 0.24566493928432465, "learning_rate": 3.8150176549834484e-05, "loss": 0.1185, "step": 21939 }, { "epoch": 0.39132451039845895, "grad_norm": 0.2373753786087036, "learning_rate": 3.8148852745463883e-05, "loss": 0.1134, "step": 21940 }, { "epoch": 0.39134234652017263, "grad_norm": 0.27469125390052795, "learning_rate": 3.814752889012378e-05, "loss": 0.1914, "step": 21941 }, { "epoch": 0.3913601826418863, "grad_norm": 0.26927450299263, "learning_rate": 3.814620498381932e-05, "loss": 0.1366, "step": 21942 }, { "epoch": 0.39137801876360007, "grad_norm": 0.26489993929862976, "learning_rate": 3.814488102655563e-05, "loss": 0.1604, "step": 21943 }, { "epoch": 0.39139585488531375, "grad_norm": 0.23299403488636017, "learning_rate": 3.814355701833784e-05, "loss": 0.1635, "step": 21944 }, { "epoch": 0.39141369100702744, "grad_norm": 0.22611603140830994, "learning_rate": 3.814223295917107e-05, "loss": 0.1619, "step": 21945 }, { "epoch": 0.39143152712874113, "grad_norm": 0.2590923309326172, "learning_rate": 3.814090884906049e-05, "loss": 0.1148, "step": 21946 }, { "epoch": 0.3914493632504548, "grad_norm": 0.2542882561683655, "learning_rate": 3.81395846880112e-05, "loss": 0.1659, "step": 21947 }, { "epoch": 0.3914671993721685, "grad_norm": 0.3152708411216736, "learning_rate": 3.813826047602833e-05, "loss": 0.1694, "step": 21948 }, { "epoch": 0.3914850354938822, "grad_norm": 0.26109474897384644, "learning_rate": 3.8136936213117036e-05, "loss": 0.1467, "step": 21949 }, { "epoch": 0.3915028716155959, "grad_norm": 0.26293495297431946, "learning_rate": 3.813561189928243e-05, "loss": 0.1767, "step": 21950 }, { "epoch": 0.3915207077373096, "grad_norm": 0.3232541084289551, "learning_rate": 3.8134287534529665e-05, "loss": 0.1565, "step": 21951 }, { "epoch": 0.3915385438590233, "grad_norm": 0.28313371539115906, "learning_rate": 3.8132963118863864e-05, "loss": 0.197, "step": 21952 }, { "epoch": 0.391556379980737, "grad_norm": 0.3670051693916321, "learning_rate": 3.8131638652290156e-05, "loss": 0.1344, "step": 21953 }, { "epoch": 0.3915742161024507, "grad_norm": 0.20290377736091614, "learning_rate": 3.813031413481369e-05, "loss": 0.1458, "step": 21954 }, { "epoch": 0.3915920522241644, "grad_norm": 0.31401917338371277, "learning_rate": 3.8128989566439586e-05, "loss": 0.2025, "step": 21955 }, { "epoch": 0.39160988834587807, "grad_norm": 0.31459882855415344, "learning_rate": 3.812766494717298e-05, "loss": 0.1763, "step": 21956 }, { "epoch": 0.39162772446759175, "grad_norm": 0.3275716006755829, "learning_rate": 3.812634027701901e-05, "loss": 0.1369, "step": 21957 }, { "epoch": 0.39164556058930544, "grad_norm": 0.23377977311611176, "learning_rate": 3.8125015555982824e-05, "loss": 0.179, "step": 21958 }, { "epoch": 0.39166339671101913, "grad_norm": 0.18560905754566193, "learning_rate": 3.8123690784069534e-05, "loss": 0.1205, "step": 21959 }, { "epoch": 0.3916812328327329, "grad_norm": 0.2274252325296402, "learning_rate": 3.812236596128429e-05, "loss": 0.1436, "step": 21960 }, { "epoch": 0.39169906895444656, "grad_norm": 0.21028538048267365, "learning_rate": 3.812104108763223e-05, "loss": 0.1562, "step": 21961 }, { "epoch": 0.39171690507616025, "grad_norm": 0.24540400505065918, "learning_rate": 3.811971616311847e-05, "loss": 0.1006, "step": 21962 }, { "epoch": 0.39173474119787394, "grad_norm": 0.25537940859794617, "learning_rate": 3.811839118774816e-05, "loss": 0.1304, "step": 21963 }, { "epoch": 0.3917525773195876, "grad_norm": 0.20558589696884155, "learning_rate": 3.811706616152644e-05, "loss": 0.1664, "step": 21964 }, { "epoch": 0.3917704134413013, "grad_norm": 0.3093239367008209, "learning_rate": 3.811574108445843e-05, "loss": 0.1098, "step": 21965 }, { "epoch": 0.391788249563015, "grad_norm": 0.3342975974082947, "learning_rate": 3.8114415956549296e-05, "loss": 0.1729, "step": 21966 }, { "epoch": 0.3918060856847287, "grad_norm": 0.2315700501203537, "learning_rate": 3.811309077780415e-05, "loss": 0.1863, "step": 21967 }, { "epoch": 0.39182392180644243, "grad_norm": 0.2535211741924286, "learning_rate": 3.811176554822813e-05, "loss": 0.1636, "step": 21968 }, { "epoch": 0.3918417579281561, "grad_norm": 0.2755582928657532, "learning_rate": 3.811044026782637e-05, "loss": 0.192, "step": 21969 }, { "epoch": 0.3918595940498698, "grad_norm": 0.2801467478275299, "learning_rate": 3.810911493660403e-05, "loss": 0.1459, "step": 21970 }, { "epoch": 0.3918774301715835, "grad_norm": 0.2801015079021454, "learning_rate": 3.810778955456623e-05, "loss": 0.1518, "step": 21971 }, { "epoch": 0.3918952662932972, "grad_norm": 0.29074493050575256, "learning_rate": 3.8106464121718106e-05, "loss": 0.1525, "step": 21972 }, { "epoch": 0.39191310241501087, "grad_norm": 0.25924018025398254, "learning_rate": 3.810513863806481e-05, "loss": 0.1272, "step": 21973 }, { "epoch": 0.39193093853672456, "grad_norm": 0.2612329423427582, "learning_rate": 3.810381310361146e-05, "loss": 0.1723, "step": 21974 }, { "epoch": 0.39194877465843825, "grad_norm": 0.335252046585083, "learning_rate": 3.8102487518363206e-05, "loss": 0.1788, "step": 21975 }, { "epoch": 0.39196661078015194, "grad_norm": 0.20958372950553894, "learning_rate": 3.8101161882325185e-05, "loss": 0.1775, "step": 21976 }, { "epoch": 0.3919844469018657, "grad_norm": 0.23732659220695496, "learning_rate": 3.809983619550254e-05, "loss": 0.1528, "step": 21977 }, { "epoch": 0.39200228302357937, "grad_norm": 0.38146233558654785, "learning_rate": 3.8098510457900396e-05, "loss": 0.1397, "step": 21978 }, { "epoch": 0.39202011914529306, "grad_norm": 0.27583402395248413, "learning_rate": 3.809718466952391e-05, "loss": 0.1749, "step": 21979 }, { "epoch": 0.39203795526700674, "grad_norm": 0.37987610697746277, "learning_rate": 3.809585883037822e-05, "loss": 0.1471, "step": 21980 }, { "epoch": 0.39205579138872043, "grad_norm": 0.21410000324249268, "learning_rate": 3.809453294046844e-05, "loss": 0.1316, "step": 21981 }, { "epoch": 0.3920736275104341, "grad_norm": 0.3638959527015686, "learning_rate": 3.8093206999799737e-05, "loss": 0.1574, "step": 21982 }, { "epoch": 0.3920914636321478, "grad_norm": 0.29879966378211975, "learning_rate": 3.809188100837724e-05, "loss": 0.1504, "step": 21983 }, { "epoch": 0.3921092997538615, "grad_norm": 0.2999878227710724, "learning_rate": 3.809055496620609e-05, "loss": 0.1055, "step": 21984 }, { "epoch": 0.39212713587557524, "grad_norm": 0.2939455807209015, "learning_rate": 3.808922887329143e-05, "loss": 0.1417, "step": 21985 }, { "epoch": 0.3921449719972889, "grad_norm": 0.2802325487136841, "learning_rate": 3.808790272963839e-05, "loss": 0.1563, "step": 21986 }, { "epoch": 0.3921628081190026, "grad_norm": 0.2000342160463333, "learning_rate": 3.808657653525213e-05, "loss": 0.1626, "step": 21987 }, { "epoch": 0.3921806442407163, "grad_norm": 0.22181005775928497, "learning_rate": 3.8085250290137776e-05, "loss": 0.1525, "step": 21988 }, { "epoch": 0.39219848036243, "grad_norm": 0.29537370800971985, "learning_rate": 3.8083923994300466e-05, "loss": 0.2184, "step": 21989 }, { "epoch": 0.3922163164841437, "grad_norm": 0.2631196677684784, "learning_rate": 3.808259764774536e-05, "loss": 0.1687, "step": 21990 }, { "epoch": 0.39223415260585737, "grad_norm": 0.25447559356689453, "learning_rate": 3.808127125047757e-05, "loss": 0.134, "step": 21991 }, { "epoch": 0.39225198872757105, "grad_norm": 0.2882976233959198, "learning_rate": 3.8079944802502265e-05, "loss": 0.157, "step": 21992 }, { "epoch": 0.3922698248492848, "grad_norm": 0.3343904912471771, "learning_rate": 3.807861830382457e-05, "loss": 0.1216, "step": 21993 }, { "epoch": 0.3922876609709985, "grad_norm": 0.31246238946914673, "learning_rate": 3.807729175444965e-05, "loss": 0.1805, "step": 21994 }, { "epoch": 0.3923054970927122, "grad_norm": 0.23825860023498535, "learning_rate": 3.807596515438262e-05, "loss": 0.1401, "step": 21995 }, { "epoch": 0.39232333321442586, "grad_norm": 0.2660251259803772, "learning_rate": 3.8074638503628626e-05, "loss": 0.1385, "step": 21996 }, { "epoch": 0.39234116933613955, "grad_norm": 0.2345874309539795, "learning_rate": 3.807331180219282e-05, "loss": 0.1474, "step": 21997 }, { "epoch": 0.39235900545785324, "grad_norm": 0.25704678893089294, "learning_rate": 3.8071985050080345e-05, "loss": 0.1405, "step": 21998 }, { "epoch": 0.3923768415795669, "grad_norm": 0.313232421875, "learning_rate": 3.8070658247296344e-05, "loss": 0.1126, "step": 21999 }, { "epoch": 0.3923946777012806, "grad_norm": 0.27740153670310974, "learning_rate": 3.8069331393845956e-05, "loss": 0.1568, "step": 22000 }, { "epoch": 0.3923946777012806, "eval_loss": 0.15289585292339325, "eval_runtime": 107.1293, "eval_samples_per_second": 9.559, "eval_steps_per_second": 1.596, "step": 22000 }, { "epoch": 0.3924125138229943, "grad_norm": 0.2480067014694214, "learning_rate": 3.8068004489734324e-05, "loss": 0.161, "step": 22001 }, { "epoch": 0.39243034994470805, "grad_norm": 0.31506189703941345, "learning_rate": 3.8066677534966585e-05, "loss": 0.2246, "step": 22002 }, { "epoch": 0.39244818606642173, "grad_norm": 0.19158689677715302, "learning_rate": 3.80653505295479e-05, "loss": 0.1396, "step": 22003 }, { "epoch": 0.3924660221881354, "grad_norm": 0.3690859079360962, "learning_rate": 3.8064023473483404e-05, "loss": 0.1614, "step": 22004 }, { "epoch": 0.3924838583098491, "grad_norm": 0.21442316472530365, "learning_rate": 3.8062696366778236e-05, "loss": 0.1873, "step": 22005 }, { "epoch": 0.3925016944315628, "grad_norm": 0.2317836433649063, "learning_rate": 3.806136920943755e-05, "loss": 0.164, "step": 22006 }, { "epoch": 0.3925195305532765, "grad_norm": 0.200971320271492, "learning_rate": 3.8060042001466485e-05, "loss": 0.1537, "step": 22007 }, { "epoch": 0.3925373666749902, "grad_norm": 0.2796572148799896, "learning_rate": 3.805871474287018e-05, "loss": 0.0724, "step": 22008 }, { "epoch": 0.39255520279670386, "grad_norm": 0.247846782207489, "learning_rate": 3.8057387433653795e-05, "loss": 0.1694, "step": 22009 }, { "epoch": 0.3925730389184176, "grad_norm": 0.33010581135749817, "learning_rate": 3.8056060073822466e-05, "loss": 0.1605, "step": 22010 }, { "epoch": 0.3925908750401313, "grad_norm": 0.26217830181121826, "learning_rate": 3.805473266338133e-05, "loss": 0.1904, "step": 22011 }, { "epoch": 0.392608711161845, "grad_norm": 0.303500235080719, "learning_rate": 3.805340520233555e-05, "loss": 0.174, "step": 22012 }, { "epoch": 0.39262654728355867, "grad_norm": 0.27895060181617737, "learning_rate": 3.805207769069026e-05, "loss": 0.1717, "step": 22013 }, { "epoch": 0.39264438340527236, "grad_norm": 0.2857508659362793, "learning_rate": 3.805075012845061e-05, "loss": 0.1142, "step": 22014 }, { "epoch": 0.39266221952698604, "grad_norm": 0.33048808574676514, "learning_rate": 3.804942251562174e-05, "loss": 0.1298, "step": 22015 }, { "epoch": 0.39268005564869973, "grad_norm": 0.20053231716156006, "learning_rate": 3.804809485220881e-05, "loss": 0.1246, "step": 22016 }, { "epoch": 0.3926978917704134, "grad_norm": 0.2536463737487793, "learning_rate": 3.804676713821695e-05, "loss": 0.1166, "step": 22017 }, { "epoch": 0.3927157278921271, "grad_norm": 0.24449673295021057, "learning_rate": 3.804543937365131e-05, "loss": 0.1449, "step": 22018 }, { "epoch": 0.39273356401384085, "grad_norm": 0.25147151947021484, "learning_rate": 3.804411155851706e-05, "loss": 0.1417, "step": 22019 }, { "epoch": 0.39275140013555454, "grad_norm": 0.22726039588451385, "learning_rate": 3.804278369281931e-05, "loss": 0.128, "step": 22020 }, { "epoch": 0.39276923625726823, "grad_norm": 0.23316025733947754, "learning_rate": 3.804145577656324e-05, "loss": 0.1225, "step": 22021 }, { "epoch": 0.3927870723789819, "grad_norm": 0.2329714596271515, "learning_rate": 3.8040127809753966e-05, "loss": 0.1555, "step": 22022 }, { "epoch": 0.3928049085006956, "grad_norm": 0.26174619793891907, "learning_rate": 3.803879979239667e-05, "loss": 0.1559, "step": 22023 }, { "epoch": 0.3928227446224093, "grad_norm": 0.31746765971183777, "learning_rate": 3.803747172449647e-05, "loss": 0.2248, "step": 22024 }, { "epoch": 0.392840580744123, "grad_norm": 0.2919473648071289, "learning_rate": 3.803614360605853e-05, "loss": 0.195, "step": 22025 }, { "epoch": 0.39285841686583667, "grad_norm": 0.2725481688976288, "learning_rate": 3.803481543708799e-05, "loss": 0.1591, "step": 22026 }, { "epoch": 0.3928762529875504, "grad_norm": 0.2553013563156128, "learning_rate": 3.803348721759002e-05, "loss": 0.1602, "step": 22027 }, { "epoch": 0.3928940891092641, "grad_norm": 0.32993438839912415, "learning_rate": 3.803215894756973e-05, "loss": 0.1637, "step": 22028 }, { "epoch": 0.3929119252309778, "grad_norm": 0.2857172191143036, "learning_rate": 3.803083062703231e-05, "loss": 0.1502, "step": 22029 }, { "epoch": 0.3929297613526915, "grad_norm": 0.2713981568813324, "learning_rate": 3.8029502255982875e-05, "loss": 0.16, "step": 22030 }, { "epoch": 0.39294759747440516, "grad_norm": 0.24887074530124664, "learning_rate": 3.80281738344266e-05, "loss": 0.2073, "step": 22031 }, { "epoch": 0.39296543359611885, "grad_norm": 0.34048402309417725, "learning_rate": 3.802684536236862e-05, "loss": 0.1989, "step": 22032 }, { "epoch": 0.39298326971783254, "grad_norm": 0.22031594812870026, "learning_rate": 3.802551683981408e-05, "loss": 0.1838, "step": 22033 }, { "epoch": 0.3930011058395462, "grad_norm": 0.2398686408996582, "learning_rate": 3.802418826676815e-05, "loss": 0.1797, "step": 22034 }, { "epoch": 0.39301894196125997, "grad_norm": 0.32442519068717957, "learning_rate": 3.8022859643235966e-05, "loss": 0.1654, "step": 22035 }, { "epoch": 0.39303677808297366, "grad_norm": 0.23995022475719452, "learning_rate": 3.802153096922267e-05, "loss": 0.1665, "step": 22036 }, { "epoch": 0.39305461420468735, "grad_norm": 0.21244728565216064, "learning_rate": 3.802020224473343e-05, "loss": 0.1599, "step": 22037 }, { "epoch": 0.39307245032640103, "grad_norm": 0.3076680302619934, "learning_rate": 3.8018873469773386e-05, "loss": 0.1303, "step": 22038 }, { "epoch": 0.3930902864481147, "grad_norm": 0.2760683596134186, "learning_rate": 3.8017544644347694e-05, "loss": 0.1435, "step": 22039 }, { "epoch": 0.3931081225698284, "grad_norm": 0.28017204999923706, "learning_rate": 3.8016215768461505e-05, "loss": 0.2137, "step": 22040 }, { "epoch": 0.3931259586915421, "grad_norm": 0.32485297322273254, "learning_rate": 3.801488684211997e-05, "loss": 0.1693, "step": 22041 }, { "epoch": 0.3931437948132558, "grad_norm": 0.22792267799377441, "learning_rate": 3.801355786532823e-05, "loss": 0.1126, "step": 22042 }, { "epoch": 0.3931616309349695, "grad_norm": 0.3081381916999817, "learning_rate": 3.801222883809145e-05, "loss": 0.1653, "step": 22043 }, { "epoch": 0.3931794670566832, "grad_norm": 0.28138798475265503, "learning_rate": 3.801089976041478e-05, "loss": 0.1563, "step": 22044 }, { "epoch": 0.3931973031783969, "grad_norm": 0.38139185309410095, "learning_rate": 3.800957063230336e-05, "loss": 0.1471, "step": 22045 }, { "epoch": 0.3932151393001106, "grad_norm": 0.19801881909370422, "learning_rate": 3.800824145376236e-05, "loss": 0.1639, "step": 22046 }, { "epoch": 0.3932329754218243, "grad_norm": 0.23384051024913788, "learning_rate": 3.800691222479692e-05, "loss": 0.1333, "step": 22047 }, { "epoch": 0.39325081154353797, "grad_norm": 0.28682368993759155, "learning_rate": 3.800558294541219e-05, "loss": 0.1937, "step": 22048 }, { "epoch": 0.39326864766525166, "grad_norm": 0.2321326583623886, "learning_rate": 3.800425361561334e-05, "loss": 0.1601, "step": 22049 }, { "epoch": 0.39328648378696535, "grad_norm": 0.26319414377212524, "learning_rate": 3.80029242354055e-05, "loss": 0.1716, "step": 22050 }, { "epoch": 0.39330431990867903, "grad_norm": 0.3451308608055115, "learning_rate": 3.8001594804793836e-05, "loss": 0.2004, "step": 22051 }, { "epoch": 0.3933221560303928, "grad_norm": 0.34930896759033203, "learning_rate": 3.800026532378351e-05, "loss": 0.1627, "step": 22052 }, { "epoch": 0.39333999215210647, "grad_norm": 0.17884886264801025, "learning_rate": 3.799893579237965e-05, "loss": 0.1142, "step": 22053 }, { "epoch": 0.39335782827382015, "grad_norm": 0.27442896366119385, "learning_rate": 3.7997606210587434e-05, "loss": 0.169, "step": 22054 }, { "epoch": 0.39337566439553384, "grad_norm": 0.2611648440361023, "learning_rate": 3.7996276578412015e-05, "loss": 0.1393, "step": 22055 }, { "epoch": 0.39339350051724753, "grad_norm": 0.26651236414909363, "learning_rate": 3.799494689585853e-05, "loss": 0.1643, "step": 22056 }, { "epoch": 0.3934113366389612, "grad_norm": 0.270926296710968, "learning_rate": 3.799361716293214e-05, "loss": 0.1672, "step": 22057 }, { "epoch": 0.3934291727606749, "grad_norm": 0.24135589599609375, "learning_rate": 3.7992287379638e-05, "loss": 0.1981, "step": 22058 }, { "epoch": 0.3934470088823886, "grad_norm": 0.29821598529815674, "learning_rate": 3.799095754598128e-05, "loss": 0.2353, "step": 22059 }, { "epoch": 0.3934648450041023, "grad_norm": 0.24282781779766083, "learning_rate": 3.798962766196712e-05, "loss": 0.1557, "step": 22060 }, { "epoch": 0.393482681125816, "grad_norm": 0.22573618590831757, "learning_rate": 3.798829772760067e-05, "loss": 0.1527, "step": 22061 }, { "epoch": 0.3935005172475297, "grad_norm": 0.1954156756401062, "learning_rate": 3.798696774288709e-05, "loss": 0.1307, "step": 22062 }, { "epoch": 0.3935183533692434, "grad_norm": 0.3251855671405792, "learning_rate": 3.798563770783153e-05, "loss": 0.1541, "step": 22063 }, { "epoch": 0.3935361894909571, "grad_norm": 0.2626895010471344, "learning_rate": 3.7984307622439174e-05, "loss": 0.1764, "step": 22064 }, { "epoch": 0.3935540256126708, "grad_norm": 0.22068209946155548, "learning_rate": 3.798297748671514e-05, "loss": 0.1235, "step": 22065 }, { "epoch": 0.39357186173438446, "grad_norm": 0.2717522978782654, "learning_rate": 3.798164730066461e-05, "loss": 0.111, "step": 22066 }, { "epoch": 0.39358969785609815, "grad_norm": 0.1987258791923523, "learning_rate": 3.798031706429273e-05, "loss": 0.1106, "step": 22067 }, { "epoch": 0.39360753397781184, "grad_norm": 0.23701933026313782, "learning_rate": 3.797898677760465e-05, "loss": 0.1199, "step": 22068 }, { "epoch": 0.3936253700995256, "grad_norm": 0.32771843671798706, "learning_rate": 3.7977656440605547e-05, "loss": 0.1536, "step": 22069 }, { "epoch": 0.3936432062212393, "grad_norm": 0.2665507197380066, "learning_rate": 3.797632605330056e-05, "loss": 0.153, "step": 22070 }, { "epoch": 0.39366104234295296, "grad_norm": 0.2856513559818268, "learning_rate": 3.797499561569485e-05, "loss": 0.141, "step": 22071 }, { "epoch": 0.39367887846466665, "grad_norm": 0.2678152322769165, "learning_rate": 3.797366512779358e-05, "loss": 0.1661, "step": 22072 }, { "epoch": 0.39369671458638034, "grad_norm": 0.26610809564590454, "learning_rate": 3.7972334589601896e-05, "loss": 0.1872, "step": 22073 }, { "epoch": 0.393714550708094, "grad_norm": 0.22589686512947083, "learning_rate": 3.7971004001124965e-05, "loss": 0.1947, "step": 22074 }, { "epoch": 0.3937323868298077, "grad_norm": 0.23755650222301483, "learning_rate": 3.796967336236794e-05, "loss": 0.1542, "step": 22075 }, { "epoch": 0.3937502229515214, "grad_norm": 0.25805798172950745, "learning_rate": 3.796834267333599e-05, "loss": 0.1975, "step": 22076 }, { "epoch": 0.3937680590732351, "grad_norm": 0.21485163271427155, "learning_rate": 3.796701193403426e-05, "loss": 0.149, "step": 22077 }, { "epoch": 0.39378589519494883, "grad_norm": 0.27399903535842896, "learning_rate": 3.7965681144467916e-05, "loss": 0.1433, "step": 22078 }, { "epoch": 0.3938037313166625, "grad_norm": 0.3664385974407196, "learning_rate": 3.79643503046421e-05, "loss": 0.2024, "step": 22079 }, { "epoch": 0.3938215674383762, "grad_norm": 0.33587291836738586, "learning_rate": 3.7963019414562e-05, "loss": 0.1486, "step": 22080 }, { "epoch": 0.3938394035600899, "grad_norm": 0.5331902503967285, "learning_rate": 3.7961688474232754e-05, "loss": 0.2395, "step": 22081 }, { "epoch": 0.3938572396818036, "grad_norm": 0.2669908404350281, "learning_rate": 3.7960357483659525e-05, "loss": 0.1929, "step": 22082 }, { "epoch": 0.39387507580351727, "grad_norm": 0.26761671900749207, "learning_rate": 3.795902644284748e-05, "loss": 0.1697, "step": 22083 }, { "epoch": 0.39389291192523096, "grad_norm": 0.27993664145469666, "learning_rate": 3.795769535180176e-05, "loss": 0.1688, "step": 22084 }, { "epoch": 0.39391074804694465, "grad_norm": 0.25549808144569397, "learning_rate": 3.795636421052755e-05, "loss": 0.1345, "step": 22085 }, { "epoch": 0.3939285841686584, "grad_norm": 0.26063039898872375, "learning_rate": 3.795503301902999e-05, "loss": 0.1487, "step": 22086 }, { "epoch": 0.3939464202903721, "grad_norm": 0.21613682806491852, "learning_rate": 3.795370177731425e-05, "loss": 0.1573, "step": 22087 }, { "epoch": 0.39396425641208577, "grad_norm": 0.24348105490207672, "learning_rate": 3.795237048538549e-05, "loss": 0.1602, "step": 22088 }, { "epoch": 0.39398209253379946, "grad_norm": 0.25034475326538086, "learning_rate": 3.795103914324887e-05, "loss": 0.1727, "step": 22089 }, { "epoch": 0.39399992865551314, "grad_norm": 0.22405272722244263, "learning_rate": 3.794970775090955e-05, "loss": 0.15, "step": 22090 }, { "epoch": 0.39401776477722683, "grad_norm": 0.1801377832889557, "learning_rate": 3.794837630837268e-05, "loss": 0.1121, "step": 22091 }, { "epoch": 0.3940356008989405, "grad_norm": 0.36839425563812256, "learning_rate": 3.794704481564344e-05, "loss": 0.2228, "step": 22092 }, { "epoch": 0.3940534370206542, "grad_norm": 0.2218928039073944, "learning_rate": 3.794571327272698e-05, "loss": 0.1595, "step": 22093 }, { "epoch": 0.39407127314236795, "grad_norm": 0.2231508046388626, "learning_rate": 3.794438167962846e-05, "loss": 0.1874, "step": 22094 }, { "epoch": 0.39408910926408164, "grad_norm": 0.2118954062461853, "learning_rate": 3.794305003635305e-05, "loss": 0.1369, "step": 22095 }, { "epoch": 0.3941069453857953, "grad_norm": 0.2247815579175949, "learning_rate": 3.7941718342905905e-05, "loss": 0.1593, "step": 22096 }, { "epoch": 0.394124781507509, "grad_norm": 0.3307328224182129, "learning_rate": 3.79403865992922e-05, "loss": 0.1784, "step": 22097 }, { "epoch": 0.3941426176292227, "grad_norm": 0.2795007526874542, "learning_rate": 3.793905480551708e-05, "loss": 0.1423, "step": 22098 }, { "epoch": 0.3941604537509364, "grad_norm": 0.22355566918849945, "learning_rate": 3.793772296158571e-05, "loss": 0.1173, "step": 22099 }, { "epoch": 0.3941782898726501, "grad_norm": 0.3083060383796692, "learning_rate": 3.793639106750326e-05, "loss": 0.1703, "step": 22100 }, { "epoch": 0.39419612599436377, "grad_norm": 0.2806645631790161, "learning_rate": 3.7935059123274895e-05, "loss": 0.1689, "step": 22101 }, { "epoch": 0.39421396211607745, "grad_norm": 0.34339267015457153, "learning_rate": 3.793372712890576e-05, "loss": 0.133, "step": 22102 }, { "epoch": 0.3942317982377912, "grad_norm": 0.1788356453180313, "learning_rate": 3.793239508440105e-05, "loss": 0.1014, "step": 22103 }, { "epoch": 0.3942496343595049, "grad_norm": 0.25159740447998047, "learning_rate": 3.7931062989765896e-05, "loss": 0.1134, "step": 22104 }, { "epoch": 0.3942674704812186, "grad_norm": 0.2123439460992813, "learning_rate": 3.792973084500548e-05, "loss": 0.1578, "step": 22105 }, { "epoch": 0.39428530660293226, "grad_norm": 0.3405452370643616, "learning_rate": 3.792839865012496e-05, "loss": 0.2322, "step": 22106 }, { "epoch": 0.39430314272464595, "grad_norm": 0.19305455684661865, "learning_rate": 3.7927066405129515e-05, "loss": 0.1152, "step": 22107 }, { "epoch": 0.39432097884635964, "grad_norm": 0.3004809319972992, "learning_rate": 3.792573411002428e-05, "loss": 0.1843, "step": 22108 }, { "epoch": 0.3943388149680733, "grad_norm": 0.29001888632774353, "learning_rate": 3.7924401764814436e-05, "loss": 0.1585, "step": 22109 }, { "epoch": 0.394356651089787, "grad_norm": 0.24814005196094513, "learning_rate": 3.792306936950515e-05, "loss": 0.1451, "step": 22110 }, { "epoch": 0.39437448721150076, "grad_norm": 0.2911388576030731, "learning_rate": 3.792173692410159e-05, "loss": 0.1994, "step": 22111 }, { "epoch": 0.39439232333321445, "grad_norm": 1.0766066312789917, "learning_rate": 3.7920404428608905e-05, "loss": 0.2033, "step": 22112 }, { "epoch": 0.39441015945492813, "grad_norm": 0.21739526093006134, "learning_rate": 3.7919071883032276e-05, "loss": 0.2058, "step": 22113 }, { "epoch": 0.3944279955766418, "grad_norm": 0.23893605172634125, "learning_rate": 3.791773928737685e-05, "loss": 0.1465, "step": 22114 }, { "epoch": 0.3944458316983555, "grad_norm": 0.27785786986351013, "learning_rate": 3.791640664164782e-05, "loss": 0.1663, "step": 22115 }, { "epoch": 0.3944636678200692, "grad_norm": 0.19211722910404205, "learning_rate": 3.791507394585033e-05, "loss": 0.0984, "step": 22116 }, { "epoch": 0.3944815039417829, "grad_norm": 0.2102123647928238, "learning_rate": 3.7913741199989556e-05, "loss": 0.1285, "step": 22117 }, { "epoch": 0.3944993400634966, "grad_norm": 0.3376919627189636, "learning_rate": 3.791240840407066e-05, "loss": 0.1439, "step": 22118 }, { "epoch": 0.39451717618521026, "grad_norm": 0.2521054446697235, "learning_rate": 3.791107555809881e-05, "loss": 0.1594, "step": 22119 }, { "epoch": 0.394535012306924, "grad_norm": 0.26240217685699463, "learning_rate": 3.7909742662079165e-05, "loss": 0.1593, "step": 22120 }, { "epoch": 0.3945528484286377, "grad_norm": 0.3223896324634552, "learning_rate": 3.79084097160169e-05, "loss": 0.1718, "step": 22121 }, { "epoch": 0.3945706845503514, "grad_norm": 0.24559205770492554, "learning_rate": 3.790707671991719e-05, "loss": 0.1484, "step": 22122 }, { "epoch": 0.39458852067206507, "grad_norm": 0.2500097155570984, "learning_rate": 3.790574367378518e-05, "loss": 0.1589, "step": 22123 }, { "epoch": 0.39460635679377876, "grad_norm": 0.2428676187992096, "learning_rate": 3.790441057762606e-05, "loss": 0.1567, "step": 22124 }, { "epoch": 0.39462419291549244, "grad_norm": 0.3735401928424835, "learning_rate": 3.790307743144499e-05, "loss": 0.1904, "step": 22125 }, { "epoch": 0.39464202903720613, "grad_norm": 0.2904157340526581, "learning_rate": 3.790174423524713e-05, "loss": 0.1492, "step": 22126 }, { "epoch": 0.3946598651589198, "grad_norm": 0.21411994099617004, "learning_rate": 3.790041098903765e-05, "loss": 0.1638, "step": 22127 }, { "epoch": 0.39467770128063356, "grad_norm": 0.29817232489585876, "learning_rate": 3.7899077692821724e-05, "loss": 0.1892, "step": 22128 }, { "epoch": 0.39469553740234725, "grad_norm": 0.2873034179210663, "learning_rate": 3.7897744346604515e-05, "loss": 0.1507, "step": 22129 }, { "epoch": 0.39471337352406094, "grad_norm": 0.29415106773376465, "learning_rate": 3.789641095039119e-05, "loss": 0.1887, "step": 22130 }, { "epoch": 0.39473120964577463, "grad_norm": 0.22156280279159546, "learning_rate": 3.7895077504186936e-05, "loss": 0.1575, "step": 22131 }, { "epoch": 0.3947490457674883, "grad_norm": 0.3194652795791626, "learning_rate": 3.78937440079969e-05, "loss": 0.2154, "step": 22132 }, { "epoch": 0.394766881889202, "grad_norm": 0.3019934296607971, "learning_rate": 3.789241046182626e-05, "loss": 0.1236, "step": 22133 }, { "epoch": 0.3947847180109157, "grad_norm": 0.2365180402994156, "learning_rate": 3.789107686568018e-05, "loss": 0.1471, "step": 22134 }, { "epoch": 0.3948025541326294, "grad_norm": 0.25868576765060425, "learning_rate": 3.7889743219563844e-05, "loss": 0.147, "step": 22135 }, { "epoch": 0.3948203902543431, "grad_norm": 0.22758358716964722, "learning_rate": 3.78884095234824e-05, "loss": 0.1384, "step": 22136 }, { "epoch": 0.3948382263760568, "grad_norm": 0.2360512614250183, "learning_rate": 3.788707577744103e-05, "loss": 0.1284, "step": 22137 }, { "epoch": 0.3948560624977705, "grad_norm": 0.19539006054401398, "learning_rate": 3.7885741981444914e-05, "loss": 0.1389, "step": 22138 }, { "epoch": 0.3948738986194842, "grad_norm": 0.34173741936683655, "learning_rate": 3.78844081354992e-05, "loss": 0.1732, "step": 22139 }, { "epoch": 0.3948917347411979, "grad_norm": 0.1790994256734848, "learning_rate": 3.788307423960907e-05, "loss": 0.1578, "step": 22140 }, { "epoch": 0.39490957086291156, "grad_norm": 0.19829240441322327, "learning_rate": 3.78817402937797e-05, "loss": 0.1373, "step": 22141 }, { "epoch": 0.39492740698462525, "grad_norm": 0.21088539063930511, "learning_rate": 3.788040629801626e-05, "loss": 0.159, "step": 22142 }, { "epoch": 0.39494524310633894, "grad_norm": 0.26099902391433716, "learning_rate": 3.78790722523239e-05, "loss": 0.0941, "step": 22143 }, { "epoch": 0.3949630792280526, "grad_norm": 0.28984883427619934, "learning_rate": 3.7877738156707826e-05, "loss": 0.1461, "step": 22144 }, { "epoch": 0.39498091534976637, "grad_norm": 0.2620595097541809, "learning_rate": 3.7876404011173184e-05, "loss": 0.2001, "step": 22145 }, { "epoch": 0.39499875147148006, "grad_norm": 0.25255337357521057, "learning_rate": 3.7875069815725154e-05, "loss": 0.1438, "step": 22146 }, { "epoch": 0.39501658759319375, "grad_norm": 0.21990849077701569, "learning_rate": 3.78737355703689e-05, "loss": 0.1665, "step": 22147 }, { "epoch": 0.39503442371490743, "grad_norm": 0.24035438895225525, "learning_rate": 3.787240127510961e-05, "loss": 0.1826, "step": 22148 }, { "epoch": 0.3950522598366211, "grad_norm": 0.3047240674495697, "learning_rate": 3.7871066929952436e-05, "loss": 0.1259, "step": 22149 }, { "epoch": 0.3950700959583348, "grad_norm": 0.3384449779987335, "learning_rate": 3.786973253490257e-05, "loss": 0.1539, "step": 22150 }, { "epoch": 0.3950879320800485, "grad_norm": 0.3169119954109192, "learning_rate": 3.786839808996517e-05, "loss": 0.1561, "step": 22151 }, { "epoch": 0.3951057682017622, "grad_norm": 0.3256834149360657, "learning_rate": 3.7867063595145414e-05, "loss": 0.1343, "step": 22152 }, { "epoch": 0.39512360432347593, "grad_norm": 0.28872379660606384, "learning_rate": 3.786572905044848e-05, "loss": 0.1954, "step": 22153 }, { "epoch": 0.3951414404451896, "grad_norm": 0.3238251507282257, "learning_rate": 3.7864394455879536e-05, "loss": 0.1151, "step": 22154 }, { "epoch": 0.3951592765669033, "grad_norm": 0.27694183588027954, "learning_rate": 3.7863059811443755e-05, "loss": 0.1521, "step": 22155 }, { "epoch": 0.395177112688617, "grad_norm": 0.24887990951538086, "learning_rate": 3.7861725117146316e-05, "loss": 0.1931, "step": 22156 }, { "epoch": 0.3951949488103307, "grad_norm": 0.21174845099449158, "learning_rate": 3.7860390372992375e-05, "loss": 0.1691, "step": 22157 }, { "epoch": 0.39521278493204437, "grad_norm": 0.30390486121177673, "learning_rate": 3.785905557898712e-05, "loss": 0.1876, "step": 22158 }, { "epoch": 0.39523062105375806, "grad_norm": 0.2500879168510437, "learning_rate": 3.785772073513574e-05, "loss": 0.1433, "step": 22159 }, { "epoch": 0.39524845717547175, "grad_norm": 0.33439457416534424, "learning_rate": 3.785638584144339e-05, "loss": 0.1475, "step": 22160 }, { "epoch": 0.39526629329718543, "grad_norm": 0.32102084159851074, "learning_rate": 3.785505089791524e-05, "loss": 0.1603, "step": 22161 }, { "epoch": 0.3952841294188992, "grad_norm": 0.26970216631889343, "learning_rate": 3.7853715904556473e-05, "loss": 0.1781, "step": 22162 }, { "epoch": 0.39530196554061287, "grad_norm": 0.3270612359046936, "learning_rate": 3.785238086137227e-05, "loss": 0.1474, "step": 22163 }, { "epoch": 0.39531980166232655, "grad_norm": 0.2136305421590805, "learning_rate": 3.7851045768367795e-05, "loss": 0.1329, "step": 22164 }, { "epoch": 0.39533763778404024, "grad_norm": 0.2370368391275406, "learning_rate": 3.784971062554823e-05, "loss": 0.1869, "step": 22165 }, { "epoch": 0.39535547390575393, "grad_norm": 0.2910391390323639, "learning_rate": 3.784837543291875e-05, "loss": 0.1436, "step": 22166 }, { "epoch": 0.3953733100274676, "grad_norm": 0.37344807386398315, "learning_rate": 3.784704019048452e-05, "loss": 0.1599, "step": 22167 }, { "epoch": 0.3953911461491813, "grad_norm": 0.21428127586841583, "learning_rate": 3.784570489825073e-05, "loss": 0.117, "step": 22168 }, { "epoch": 0.395408982270895, "grad_norm": 0.22344574332237244, "learning_rate": 3.784436955622256e-05, "loss": 0.1478, "step": 22169 }, { "epoch": 0.39542681839260874, "grad_norm": 0.22434456646442413, "learning_rate": 3.784303416440517e-05, "loss": 0.1709, "step": 22170 }, { "epoch": 0.3954446545143224, "grad_norm": 0.3279266953468323, "learning_rate": 3.7841698722803736e-05, "loss": 0.2596, "step": 22171 }, { "epoch": 0.3954624906360361, "grad_norm": 0.23844437301158905, "learning_rate": 3.784036323142345e-05, "loss": 0.1516, "step": 22172 }, { "epoch": 0.3954803267577498, "grad_norm": 0.18370580673217773, "learning_rate": 3.783902769026948e-05, "loss": 0.1142, "step": 22173 }, { "epoch": 0.3954981628794635, "grad_norm": 0.37334781885147095, "learning_rate": 3.7837692099347014e-05, "loss": 0.2122, "step": 22174 }, { "epoch": 0.3955159990011772, "grad_norm": 0.32715171575546265, "learning_rate": 3.78363564586612e-05, "loss": 0.1851, "step": 22175 }, { "epoch": 0.39553383512289086, "grad_norm": 0.2590390145778656, "learning_rate": 3.783502076821726e-05, "loss": 0.1726, "step": 22176 }, { "epoch": 0.39555167124460455, "grad_norm": 0.36881256103515625, "learning_rate": 3.7833685028020327e-05, "loss": 0.2004, "step": 22177 }, { "epoch": 0.39556950736631824, "grad_norm": 0.38517460227012634, "learning_rate": 3.78323492380756e-05, "loss": 0.2125, "step": 22178 }, { "epoch": 0.395587343488032, "grad_norm": 0.18701235949993134, "learning_rate": 3.783101339838826e-05, "loss": 0.1699, "step": 22179 }, { "epoch": 0.39560517960974567, "grad_norm": 0.43048009276390076, "learning_rate": 3.782967750896348e-05, "loss": 0.1691, "step": 22180 }, { "epoch": 0.39562301573145936, "grad_norm": 0.26353105902671814, "learning_rate": 3.782834156980643e-05, "loss": 0.1758, "step": 22181 }, { "epoch": 0.39564085185317305, "grad_norm": 0.3236018419265747, "learning_rate": 3.7827005580922316e-05, "loss": 0.1492, "step": 22182 }, { "epoch": 0.39565868797488674, "grad_norm": 0.22929665446281433, "learning_rate": 3.782566954231629e-05, "loss": 0.1317, "step": 22183 }, { "epoch": 0.3956765240966004, "grad_norm": 0.23387475311756134, "learning_rate": 3.782433345399353e-05, "loss": 0.1433, "step": 22184 }, { "epoch": 0.3956943602183141, "grad_norm": 0.3089311420917511, "learning_rate": 3.782299731595923e-05, "loss": 0.1411, "step": 22185 }, { "epoch": 0.3957121963400278, "grad_norm": 0.4106653332710266, "learning_rate": 3.782166112821855e-05, "loss": 0.1611, "step": 22186 }, { "epoch": 0.39573003246174154, "grad_norm": 0.242791548371315, "learning_rate": 3.782032489077671e-05, "loss": 0.1436, "step": 22187 }, { "epoch": 0.39574786858345523, "grad_norm": 0.2590719759464264, "learning_rate": 3.781898860363885e-05, "loss": 0.1918, "step": 22188 }, { "epoch": 0.3957657047051689, "grad_norm": 0.25568678975105286, "learning_rate": 3.781765226681016e-05, "loss": 0.1872, "step": 22189 }, { "epoch": 0.3957835408268826, "grad_norm": 0.28192266821861267, "learning_rate": 3.781631588029583e-05, "loss": 0.1826, "step": 22190 }, { "epoch": 0.3958013769485963, "grad_norm": 0.30621835589408875, "learning_rate": 3.781497944410102e-05, "loss": 0.142, "step": 22191 }, { "epoch": 0.39581921307031, "grad_norm": 0.389258474111557, "learning_rate": 3.781364295823093e-05, "loss": 0.189, "step": 22192 }, { "epoch": 0.39583704919202367, "grad_norm": 0.21447554230690002, "learning_rate": 3.781230642269073e-05, "loss": 0.1682, "step": 22193 }, { "epoch": 0.39585488531373736, "grad_norm": 0.29244545102119446, "learning_rate": 3.781096983748562e-05, "loss": 0.1636, "step": 22194 }, { "epoch": 0.3958727214354511, "grad_norm": 0.30486592650413513, "learning_rate": 3.780963320262075e-05, "loss": 0.1675, "step": 22195 }, { "epoch": 0.3958905575571648, "grad_norm": 0.24616453051567078, "learning_rate": 3.7808296518101334e-05, "loss": 0.1936, "step": 22196 }, { "epoch": 0.3959083936788785, "grad_norm": 0.2412756085395813, "learning_rate": 3.780695978393253e-05, "loss": 0.1565, "step": 22197 }, { "epoch": 0.39592622980059217, "grad_norm": 0.25053858757019043, "learning_rate": 3.780562300011952e-05, "loss": 0.1249, "step": 22198 }, { "epoch": 0.39594406592230585, "grad_norm": 0.26132482290267944, "learning_rate": 3.780428616666749e-05, "loss": 0.2577, "step": 22199 }, { "epoch": 0.39596190204401954, "grad_norm": 0.23156170547008514, "learning_rate": 3.7802949283581634e-05, "loss": 0.1324, "step": 22200 }, { "epoch": 0.39597973816573323, "grad_norm": 0.309425413608551, "learning_rate": 3.780161235086712e-05, "loss": 0.1115, "step": 22201 }, { "epoch": 0.3959975742874469, "grad_norm": 0.2567881643772125, "learning_rate": 3.780027536852914e-05, "loss": 0.1254, "step": 22202 }, { "epoch": 0.3960154104091606, "grad_norm": 0.5447714924812317, "learning_rate": 3.7798938336572864e-05, "loss": 0.1988, "step": 22203 }, { "epoch": 0.39603324653087435, "grad_norm": 0.26598888635635376, "learning_rate": 3.779760125500349e-05, "loss": 0.1727, "step": 22204 }, { "epoch": 0.39605108265258804, "grad_norm": 0.34892353415489197, "learning_rate": 3.7796264123826185e-05, "loss": 0.1434, "step": 22205 }, { "epoch": 0.3960689187743017, "grad_norm": 0.27633213996887207, "learning_rate": 3.779492694304614e-05, "loss": 0.1531, "step": 22206 }, { "epoch": 0.3960867548960154, "grad_norm": 0.23184406757354736, "learning_rate": 3.7793589712668545e-05, "loss": 0.1598, "step": 22207 }, { "epoch": 0.3961045910177291, "grad_norm": 0.20744064450263977, "learning_rate": 3.779225243269858e-05, "loss": 0.1175, "step": 22208 }, { "epoch": 0.3961224271394428, "grad_norm": 0.4271492063999176, "learning_rate": 3.7790915103141425e-05, "loss": 0.1515, "step": 22209 }, { "epoch": 0.3961402632611565, "grad_norm": 0.27226418256759644, "learning_rate": 3.778957772400226e-05, "loss": 0.1531, "step": 22210 }, { "epoch": 0.39615809938287017, "grad_norm": 0.24618516862392426, "learning_rate": 3.778824029528628e-05, "loss": 0.1661, "step": 22211 }, { "epoch": 0.3961759355045839, "grad_norm": 0.26164987683296204, "learning_rate": 3.778690281699866e-05, "loss": 0.1401, "step": 22212 }, { "epoch": 0.3961937716262976, "grad_norm": 0.22632980346679688, "learning_rate": 3.778556528914459e-05, "loss": 0.1127, "step": 22213 }, { "epoch": 0.3962116077480113, "grad_norm": 0.30319634079933167, "learning_rate": 3.778422771172925e-05, "loss": 0.1264, "step": 22214 }, { "epoch": 0.396229443869725, "grad_norm": 0.2946068346500397, "learning_rate": 3.778289008475783e-05, "loss": 0.1342, "step": 22215 }, { "epoch": 0.39624727999143866, "grad_norm": 0.25682687759399414, "learning_rate": 3.778155240823551e-05, "loss": 0.1679, "step": 22216 }, { "epoch": 0.39626511611315235, "grad_norm": 0.29321160912513733, "learning_rate": 3.7780214682167484e-05, "loss": 0.1814, "step": 22217 }, { "epoch": 0.39628295223486604, "grad_norm": 0.3286098539829254, "learning_rate": 3.7778876906558926e-05, "loss": 0.136, "step": 22218 }, { "epoch": 0.3963007883565797, "grad_norm": 0.1981721818447113, "learning_rate": 3.777753908141503e-05, "loss": 0.1721, "step": 22219 }, { "epoch": 0.3963186244782934, "grad_norm": 0.16597972810268402, "learning_rate": 3.777620120674098e-05, "loss": 0.1545, "step": 22220 }, { "epoch": 0.39633646060000716, "grad_norm": 0.3351482152938843, "learning_rate": 3.777486328254196e-05, "loss": 0.1784, "step": 22221 }, { "epoch": 0.39635429672172084, "grad_norm": 0.37906208634376526, "learning_rate": 3.777352530882316e-05, "loss": 0.1893, "step": 22222 }, { "epoch": 0.39637213284343453, "grad_norm": 0.2731074094772339, "learning_rate": 3.7772187285589764e-05, "loss": 0.1375, "step": 22223 }, { "epoch": 0.3963899689651482, "grad_norm": 0.2367071658372879, "learning_rate": 3.777084921284696e-05, "loss": 0.1333, "step": 22224 }, { "epoch": 0.3964078050868619, "grad_norm": 0.2556086778640747, "learning_rate": 3.7769511090599926e-05, "loss": 0.1646, "step": 22225 }, { "epoch": 0.3964256412085756, "grad_norm": 0.2598172426223755, "learning_rate": 3.776817291885386e-05, "loss": 0.1692, "step": 22226 }, { "epoch": 0.3964434773302893, "grad_norm": 0.2346215397119522, "learning_rate": 3.776683469761394e-05, "loss": 0.1794, "step": 22227 }, { "epoch": 0.396461313452003, "grad_norm": 0.39767736196517944, "learning_rate": 3.776549642688537e-05, "loss": 0.2489, "step": 22228 }, { "epoch": 0.3964791495737167, "grad_norm": 0.2696288824081421, "learning_rate": 3.776415810667333e-05, "loss": 0.1633, "step": 22229 }, { "epoch": 0.3964969856954304, "grad_norm": 0.3055456280708313, "learning_rate": 3.7762819736982994e-05, "loss": 0.1834, "step": 22230 }, { "epoch": 0.3965148218171441, "grad_norm": 0.30127641558647156, "learning_rate": 3.776148131781957e-05, "loss": 0.1647, "step": 22231 }, { "epoch": 0.3965326579388578, "grad_norm": 0.3112037777900696, "learning_rate": 3.776014284918823e-05, "loss": 0.2043, "step": 22232 }, { "epoch": 0.39655049406057147, "grad_norm": 0.2905730903148651, "learning_rate": 3.775880433109417e-05, "loss": 0.1893, "step": 22233 }, { "epoch": 0.39656833018228516, "grad_norm": 0.23809996247291565, "learning_rate": 3.775746576354257e-05, "loss": 0.1828, "step": 22234 }, { "epoch": 0.39658616630399884, "grad_norm": 0.3379803001880646, "learning_rate": 3.775612714653864e-05, "loss": 0.1538, "step": 22235 }, { "epoch": 0.39660400242571253, "grad_norm": 0.25696924328804016, "learning_rate": 3.775478848008754e-05, "loss": 0.13, "step": 22236 }, { "epoch": 0.3966218385474263, "grad_norm": 0.2714236378669739, "learning_rate": 3.775344976419449e-05, "loss": 0.1624, "step": 22237 }, { "epoch": 0.39663967466913996, "grad_norm": 0.26460570096969604, "learning_rate": 3.775211099886466e-05, "loss": 0.134, "step": 22238 }, { "epoch": 0.39665751079085365, "grad_norm": 0.30905723571777344, "learning_rate": 3.775077218410324e-05, "loss": 0.1707, "step": 22239 }, { "epoch": 0.39667534691256734, "grad_norm": 0.34891965985298157, "learning_rate": 3.7749433319915425e-05, "loss": 0.1807, "step": 22240 }, { "epoch": 0.396693183034281, "grad_norm": 0.216149240732193, "learning_rate": 3.77480944063064e-05, "loss": 0.1588, "step": 22241 }, { "epoch": 0.3967110191559947, "grad_norm": 0.26740992069244385, "learning_rate": 3.7746755443281363e-05, "loss": 0.1853, "step": 22242 }, { "epoch": 0.3967288552777084, "grad_norm": 0.23010098934173584, "learning_rate": 3.7745416430845494e-05, "loss": 0.1816, "step": 22243 }, { "epoch": 0.3967466913994221, "grad_norm": 0.22635534405708313, "learning_rate": 3.7744077369003995e-05, "loss": 0.1484, "step": 22244 }, { "epoch": 0.3967645275211358, "grad_norm": 0.31512561440467834, "learning_rate": 3.7742738257762044e-05, "loss": 0.1606, "step": 22245 }, { "epoch": 0.3967823636428495, "grad_norm": 0.35845622420310974, "learning_rate": 3.774139909712484e-05, "loss": 0.1722, "step": 22246 }, { "epoch": 0.3968001997645632, "grad_norm": 0.2786567807197571, "learning_rate": 3.774005988709757e-05, "loss": 0.1694, "step": 22247 }, { "epoch": 0.3968180358862769, "grad_norm": 0.22597715258598328, "learning_rate": 3.773872062768543e-05, "loss": 0.1437, "step": 22248 }, { "epoch": 0.3968358720079906, "grad_norm": 0.3453173041343689, "learning_rate": 3.773738131889362e-05, "loss": 0.2041, "step": 22249 }, { "epoch": 0.3968537081297043, "grad_norm": 0.3514271080493927, "learning_rate": 3.773604196072731e-05, "loss": 0.186, "step": 22250 }, { "epoch": 0.39687154425141796, "grad_norm": 0.2693471908569336, "learning_rate": 3.77347025531917e-05, "loss": 0.1178, "step": 22251 }, { "epoch": 0.39688938037313165, "grad_norm": 0.26904910802841187, "learning_rate": 3.7733363096291985e-05, "loss": 0.118, "step": 22252 }, { "epoch": 0.39690721649484534, "grad_norm": 0.23082782328128815, "learning_rate": 3.773202359003336e-05, "loss": 0.1706, "step": 22253 }, { "epoch": 0.3969250526165591, "grad_norm": 0.34383949637413025, "learning_rate": 3.773068403442102e-05, "loss": 0.1695, "step": 22254 }, { "epoch": 0.39694288873827277, "grad_norm": 0.21252372860908508, "learning_rate": 3.7729344429460136e-05, "loss": 0.1475, "step": 22255 }, { "epoch": 0.39696072485998646, "grad_norm": 0.2579759359359741, "learning_rate": 3.7728004775155926e-05, "loss": 0.1334, "step": 22256 }, { "epoch": 0.39697856098170015, "grad_norm": 0.22919298708438873, "learning_rate": 3.772666507151358e-05, "loss": 0.1211, "step": 22257 }, { "epoch": 0.39699639710341383, "grad_norm": 0.2603635787963867, "learning_rate": 3.772532531853827e-05, "loss": 0.2079, "step": 22258 }, { "epoch": 0.3970142332251275, "grad_norm": 0.32817214727401733, "learning_rate": 3.772398551623521e-05, "loss": 0.1618, "step": 22259 }, { "epoch": 0.3970320693468412, "grad_norm": 0.1930309534072876, "learning_rate": 3.772264566460959e-05, "loss": 0.1442, "step": 22260 }, { "epoch": 0.3970499054685549, "grad_norm": 0.27403295040130615, "learning_rate": 3.7721305763666604e-05, "loss": 0.1389, "step": 22261 }, { "epoch": 0.3970677415902686, "grad_norm": 0.31754204630851746, "learning_rate": 3.7719965813411434e-05, "loss": 0.2156, "step": 22262 }, { "epoch": 0.39708557771198233, "grad_norm": 0.2536564767360687, "learning_rate": 3.7718625813849297e-05, "loss": 0.2135, "step": 22263 }, { "epoch": 0.397103413833696, "grad_norm": 0.2569412589073181, "learning_rate": 3.7717285764985356e-05, "loss": 0.2338, "step": 22264 }, { "epoch": 0.3971212499554097, "grad_norm": 0.2939034700393677, "learning_rate": 3.771594566682484e-05, "loss": 0.2066, "step": 22265 }, { "epoch": 0.3971390860771234, "grad_norm": 0.19327019155025482, "learning_rate": 3.7714605519372916e-05, "loss": 0.1411, "step": 22266 }, { "epoch": 0.3971569221988371, "grad_norm": 0.33009007573127747, "learning_rate": 3.7713265322634797e-05, "loss": 0.127, "step": 22267 }, { "epoch": 0.39717475832055077, "grad_norm": 0.180852010846138, "learning_rate": 3.771192507661565e-05, "loss": 0.1446, "step": 22268 }, { "epoch": 0.39719259444226446, "grad_norm": 0.2605664134025574, "learning_rate": 3.7710584781320715e-05, "loss": 0.1308, "step": 22269 }, { "epoch": 0.39721043056397815, "grad_norm": 0.5936512351036072, "learning_rate": 3.770924443675515e-05, "loss": 0.3512, "step": 22270 }, { "epoch": 0.3972282666856919, "grad_norm": 0.32129839062690735, "learning_rate": 3.770790404292417e-05, "loss": 0.1741, "step": 22271 }, { "epoch": 0.3972461028074056, "grad_norm": 0.2187911570072174, "learning_rate": 3.770656359983297e-05, "loss": 0.1535, "step": 22272 }, { "epoch": 0.39726393892911926, "grad_norm": 0.28206637501716614, "learning_rate": 3.770522310748673e-05, "loss": 0.1475, "step": 22273 }, { "epoch": 0.39728177505083295, "grad_norm": 0.23699229955673218, "learning_rate": 3.770388256589066e-05, "loss": 0.1526, "step": 22274 }, { "epoch": 0.39729961117254664, "grad_norm": 0.2758311629295349, "learning_rate": 3.770254197504995e-05, "loss": 0.1657, "step": 22275 }, { "epoch": 0.39731744729426033, "grad_norm": 0.36731085181236267, "learning_rate": 3.7701201334969804e-05, "loss": 0.175, "step": 22276 }, { "epoch": 0.397335283415974, "grad_norm": 0.2826375961303711, "learning_rate": 3.7699860645655416e-05, "loss": 0.1211, "step": 22277 }, { "epoch": 0.3973531195376877, "grad_norm": 0.28067928552627563, "learning_rate": 3.769851990711198e-05, "loss": 0.11, "step": 22278 }, { "epoch": 0.3973709556594014, "grad_norm": 0.26242756843566895, "learning_rate": 3.76971791193447e-05, "loss": 0.1722, "step": 22279 }, { "epoch": 0.39738879178111514, "grad_norm": 0.26454052329063416, "learning_rate": 3.769583828235876e-05, "loss": 0.1722, "step": 22280 }, { "epoch": 0.3974066279028288, "grad_norm": 0.2344706952571869, "learning_rate": 3.7694497396159364e-05, "loss": 0.1143, "step": 22281 }, { "epoch": 0.3974244640245425, "grad_norm": 0.3165126144886017, "learning_rate": 3.769315646075172e-05, "loss": 0.1744, "step": 22282 }, { "epoch": 0.3974423001462562, "grad_norm": 0.23090432584285736, "learning_rate": 3.769181547614102e-05, "loss": 0.1397, "step": 22283 }, { "epoch": 0.3974601362679699, "grad_norm": 0.2685081660747528, "learning_rate": 3.769047444233245e-05, "loss": 0.1917, "step": 22284 }, { "epoch": 0.3974779723896836, "grad_norm": 0.3602457642555237, "learning_rate": 3.768913335933123e-05, "loss": 0.152, "step": 22285 }, { "epoch": 0.39749580851139726, "grad_norm": 0.2178330272436142, "learning_rate": 3.768779222714254e-05, "loss": 0.1505, "step": 22286 }, { "epoch": 0.39751364463311095, "grad_norm": 0.2397204488515854, "learning_rate": 3.768645104577158e-05, "loss": 0.1106, "step": 22287 }, { "epoch": 0.3975314807548247, "grad_norm": 0.21508941054344177, "learning_rate": 3.7685109815223554e-05, "loss": 0.1281, "step": 22288 }, { "epoch": 0.3975493168765384, "grad_norm": 0.246878981590271, "learning_rate": 3.768376853550367e-05, "loss": 0.1827, "step": 22289 }, { "epoch": 0.39756715299825207, "grad_norm": 0.2514778673648834, "learning_rate": 3.768242720661711e-05, "loss": 0.1654, "step": 22290 }, { "epoch": 0.39758498911996576, "grad_norm": 0.27585023641586304, "learning_rate": 3.7681085828569086e-05, "loss": 0.117, "step": 22291 }, { "epoch": 0.39760282524167945, "grad_norm": 0.2424265444278717, "learning_rate": 3.767974440136479e-05, "loss": 0.174, "step": 22292 }, { "epoch": 0.39762066136339314, "grad_norm": 0.366639107465744, "learning_rate": 3.7678402925009425e-05, "loss": 0.1621, "step": 22293 }, { "epoch": 0.3976384974851068, "grad_norm": 0.2671668827533722, "learning_rate": 3.76770613995082e-05, "loss": 0.1739, "step": 22294 }, { "epoch": 0.3976563336068205, "grad_norm": 0.21506819128990173, "learning_rate": 3.767571982486629e-05, "loss": 0.1608, "step": 22295 }, { "epoch": 0.39767416972853425, "grad_norm": 0.2388947308063507, "learning_rate": 3.7674378201088935e-05, "loss": 0.1498, "step": 22296 }, { "epoch": 0.39769200585024794, "grad_norm": 0.23440612852573395, "learning_rate": 3.7673036528181294e-05, "loss": 0.1819, "step": 22297 }, { "epoch": 0.39770984197196163, "grad_norm": 0.3379155993461609, "learning_rate": 3.7671694806148596e-05, "loss": 0.1383, "step": 22298 }, { "epoch": 0.3977276780936753, "grad_norm": 0.26498284935951233, "learning_rate": 3.767035303499602e-05, "loss": 0.1522, "step": 22299 }, { "epoch": 0.397745514215389, "grad_norm": 0.3507102131843567, "learning_rate": 3.76690112147288e-05, "loss": 0.1182, "step": 22300 }, { "epoch": 0.3977633503371027, "grad_norm": 0.2222672402858734, "learning_rate": 3.76676693453521e-05, "loss": 0.0872, "step": 22301 }, { "epoch": 0.3977811864588164, "grad_norm": 0.2475961446762085, "learning_rate": 3.766632742687114e-05, "loss": 0.1414, "step": 22302 }, { "epoch": 0.39779902258053007, "grad_norm": 0.3509806990623474, "learning_rate": 3.7664985459291125e-05, "loss": 0.1876, "step": 22303 }, { "epoch": 0.39781685870224376, "grad_norm": 0.27201980352401733, "learning_rate": 3.766364344261725e-05, "loss": 0.138, "step": 22304 }, { "epoch": 0.3978346948239575, "grad_norm": 0.25919872522354126, "learning_rate": 3.766230137685471e-05, "loss": 0.1495, "step": 22305 }, { "epoch": 0.3978525309456712, "grad_norm": 0.3091748356819153, "learning_rate": 3.766095926200873e-05, "loss": 0.1728, "step": 22306 }, { "epoch": 0.3978703670673849, "grad_norm": 0.22791191935539246, "learning_rate": 3.76596170980845e-05, "loss": 0.1409, "step": 22307 }, { "epoch": 0.39788820318909857, "grad_norm": 0.2801245152950287, "learning_rate": 3.765827488508722e-05, "loss": 0.1685, "step": 22308 }, { "epoch": 0.39790603931081225, "grad_norm": 0.29067695140838623, "learning_rate": 3.7656932623022084e-05, "loss": 0.165, "step": 22309 }, { "epoch": 0.39792387543252594, "grad_norm": 0.28457409143447876, "learning_rate": 3.7655590311894314e-05, "loss": 0.1358, "step": 22310 }, { "epoch": 0.39794171155423963, "grad_norm": 0.24204015731811523, "learning_rate": 3.76542479517091e-05, "loss": 0.0948, "step": 22311 }, { "epoch": 0.3979595476759533, "grad_norm": 0.27102288603782654, "learning_rate": 3.765290554247165e-05, "loss": 0.1943, "step": 22312 }, { "epoch": 0.39797738379766706, "grad_norm": 0.2584947943687439, "learning_rate": 3.765156308418718e-05, "loss": 0.191, "step": 22313 }, { "epoch": 0.39799521991938075, "grad_norm": 0.21742399036884308, "learning_rate": 3.7650220576860874e-05, "loss": 0.1574, "step": 22314 }, { "epoch": 0.39801305604109444, "grad_norm": 0.4372897148132324, "learning_rate": 3.7648878020497944e-05, "loss": 0.1629, "step": 22315 }, { "epoch": 0.3980308921628081, "grad_norm": 0.23204554617404938, "learning_rate": 3.764753541510359e-05, "loss": 0.1393, "step": 22316 }, { "epoch": 0.3980487282845218, "grad_norm": 0.23232850432395935, "learning_rate": 3.764619276068302e-05, "loss": 0.162, "step": 22317 }, { "epoch": 0.3980665644062355, "grad_norm": 0.38425225019454956, "learning_rate": 3.764485005724144e-05, "loss": 0.1872, "step": 22318 }, { "epoch": 0.3980844005279492, "grad_norm": 0.2631813585758209, "learning_rate": 3.764350730478406e-05, "loss": 0.1142, "step": 22319 }, { "epoch": 0.3981022366496629, "grad_norm": 0.24448172748088837, "learning_rate": 3.7642164503316076e-05, "loss": 0.1155, "step": 22320 }, { "epoch": 0.39812007277137657, "grad_norm": 0.30166247487068176, "learning_rate": 3.7640821652842693e-05, "loss": 0.174, "step": 22321 }, { "epoch": 0.3981379088930903, "grad_norm": 0.2300938218832016, "learning_rate": 3.7639478753369126e-05, "loss": 0.1878, "step": 22322 }, { "epoch": 0.398155745014804, "grad_norm": 0.32995182275772095, "learning_rate": 3.763813580490056e-05, "loss": 0.1745, "step": 22323 }, { "epoch": 0.3981735811365177, "grad_norm": 0.22452934086322784, "learning_rate": 3.763679280744223e-05, "loss": 0.1085, "step": 22324 }, { "epoch": 0.3981914172582314, "grad_norm": 0.4042935073375702, "learning_rate": 3.763544976099932e-05, "loss": 0.256, "step": 22325 }, { "epoch": 0.39820925337994506, "grad_norm": 0.20228131115436554, "learning_rate": 3.763410666557704e-05, "loss": 0.0926, "step": 22326 }, { "epoch": 0.39822708950165875, "grad_norm": 0.25248879194259644, "learning_rate": 3.76327635211806e-05, "loss": 0.1934, "step": 22327 }, { "epoch": 0.39824492562337244, "grad_norm": 0.20059946179389954, "learning_rate": 3.763142032781522e-05, "loss": 0.1151, "step": 22328 }, { "epoch": 0.3982627617450861, "grad_norm": 0.33941495418548584, "learning_rate": 3.763007708548607e-05, "loss": 0.1386, "step": 22329 }, { "epoch": 0.39828059786679987, "grad_norm": 0.20216259360313416, "learning_rate": 3.762873379419839e-05, "loss": 0.1175, "step": 22330 }, { "epoch": 0.39829843398851356, "grad_norm": 0.42369261384010315, "learning_rate": 3.7627390453957365e-05, "loss": 0.1556, "step": 22331 }, { "epoch": 0.39831627011022724, "grad_norm": 0.2654978334903717, "learning_rate": 3.762604706476823e-05, "loss": 0.145, "step": 22332 }, { "epoch": 0.39833410623194093, "grad_norm": 0.23754015564918518, "learning_rate": 3.762470362663616e-05, "loss": 0.1397, "step": 22333 }, { "epoch": 0.3983519423536546, "grad_norm": 0.43318185210227966, "learning_rate": 3.762336013956639e-05, "loss": 0.2091, "step": 22334 }, { "epoch": 0.3983697784753683, "grad_norm": 0.26307061314582825, "learning_rate": 3.762201660356411e-05, "loss": 0.1639, "step": 22335 }, { "epoch": 0.398387614597082, "grad_norm": 0.34982284903526306, "learning_rate": 3.7620673018634545e-05, "loss": 0.1432, "step": 22336 }, { "epoch": 0.3984054507187957, "grad_norm": 0.2314211130142212, "learning_rate": 3.761932938478288e-05, "loss": 0.1615, "step": 22337 }, { "epoch": 0.39842328684050937, "grad_norm": 0.3286464512348175, "learning_rate": 3.761798570201434e-05, "loss": 0.1348, "step": 22338 }, { "epoch": 0.3984411229622231, "grad_norm": 0.3589918613433838, "learning_rate": 3.7616641970334124e-05, "loss": 0.1657, "step": 22339 }, { "epoch": 0.3984589590839368, "grad_norm": 0.4123172461986542, "learning_rate": 3.761529818974745e-05, "loss": 0.1621, "step": 22340 }, { "epoch": 0.3984767952056505, "grad_norm": 0.29044750332832336, "learning_rate": 3.761395436025953e-05, "loss": 0.1466, "step": 22341 }, { "epoch": 0.3984946313273642, "grad_norm": 0.24817374348640442, "learning_rate": 3.761261048187555e-05, "loss": 0.2051, "step": 22342 }, { "epoch": 0.39851246744907787, "grad_norm": 0.18409378826618195, "learning_rate": 3.7611266554600755e-05, "loss": 0.1436, "step": 22343 }, { "epoch": 0.39853030357079156, "grad_norm": 0.35922834277153015, "learning_rate": 3.7609922578440325e-05, "loss": 0.187, "step": 22344 }, { "epoch": 0.39854813969250524, "grad_norm": 0.28755804896354675, "learning_rate": 3.760857855339947e-05, "loss": 0.2006, "step": 22345 }, { "epoch": 0.39856597581421893, "grad_norm": 0.2868260443210602, "learning_rate": 3.7607234479483425e-05, "loss": 0.1871, "step": 22346 }, { "epoch": 0.3985838119359327, "grad_norm": 0.21651361882686615, "learning_rate": 3.760589035669738e-05, "loss": 0.114, "step": 22347 }, { "epoch": 0.39860164805764636, "grad_norm": 0.2022954225540161, "learning_rate": 3.760454618504656e-05, "loss": 0.1369, "step": 22348 }, { "epoch": 0.39861948417936005, "grad_norm": 0.3257977366447449, "learning_rate": 3.760320196453615e-05, "loss": 0.193, "step": 22349 }, { "epoch": 0.39863732030107374, "grad_norm": 0.31494295597076416, "learning_rate": 3.7601857695171384e-05, "loss": 0.149, "step": 22350 }, { "epoch": 0.3986551564227874, "grad_norm": 0.3476201295852661, "learning_rate": 3.760051337695746e-05, "loss": 0.2422, "step": 22351 }, { "epoch": 0.3986729925445011, "grad_norm": 0.20647187530994415, "learning_rate": 3.759916900989959e-05, "loss": 0.1268, "step": 22352 }, { "epoch": 0.3986908286662148, "grad_norm": 0.2499152272939682, "learning_rate": 3.7597824594003e-05, "loss": 0.1741, "step": 22353 }, { "epoch": 0.3987086647879285, "grad_norm": 0.24941423535346985, "learning_rate": 3.7596480129272885e-05, "loss": 0.1428, "step": 22354 }, { "epoch": 0.39872650090964223, "grad_norm": 0.3463935852050781, "learning_rate": 3.759513561571447e-05, "loss": 0.2242, "step": 22355 }, { "epoch": 0.3987443370313559, "grad_norm": 0.2725744843482971, "learning_rate": 3.759379105333295e-05, "loss": 0.2093, "step": 22356 }, { "epoch": 0.3987621731530696, "grad_norm": 0.23158814013004303, "learning_rate": 3.759244644213355e-05, "loss": 0.1716, "step": 22357 }, { "epoch": 0.3987800092747833, "grad_norm": 0.22862771153450012, "learning_rate": 3.7591101782121475e-05, "loss": 0.1302, "step": 22358 }, { "epoch": 0.398797845396497, "grad_norm": 0.29475483298301697, "learning_rate": 3.758975707330194e-05, "loss": 0.1675, "step": 22359 }, { "epoch": 0.3988156815182107, "grad_norm": 0.22144821286201477, "learning_rate": 3.758841231568017e-05, "loss": 0.1693, "step": 22360 }, { "epoch": 0.39883351763992436, "grad_norm": 0.334911972284317, "learning_rate": 3.7587067509261356e-05, "loss": 0.2412, "step": 22361 }, { "epoch": 0.39885135376163805, "grad_norm": 0.21591593325138092, "learning_rate": 3.758572265405072e-05, "loss": 0.1336, "step": 22362 }, { "epoch": 0.39886918988335174, "grad_norm": 0.24001966416835785, "learning_rate": 3.758437775005348e-05, "loss": 0.1418, "step": 22363 }, { "epoch": 0.3988870260050655, "grad_norm": 0.24681399762630463, "learning_rate": 3.758303279727484e-05, "loss": 0.1795, "step": 22364 }, { "epoch": 0.39890486212677917, "grad_norm": 0.24292513728141785, "learning_rate": 3.758168779572002e-05, "loss": 0.1782, "step": 22365 }, { "epoch": 0.39892269824849286, "grad_norm": 0.24503962695598602, "learning_rate": 3.7580342745394237e-05, "loss": 0.1608, "step": 22366 }, { "epoch": 0.39894053437020655, "grad_norm": 0.35116299986839294, "learning_rate": 3.75789976463027e-05, "loss": 0.1883, "step": 22367 }, { "epoch": 0.39895837049192023, "grad_norm": 0.32512491941452026, "learning_rate": 3.757765249845062e-05, "loss": 0.1835, "step": 22368 }, { "epoch": 0.3989762066136339, "grad_norm": 0.21189890801906586, "learning_rate": 3.7576307301843213e-05, "loss": 0.132, "step": 22369 }, { "epoch": 0.3989940427353476, "grad_norm": 0.22695215046405792, "learning_rate": 3.757496205648571e-05, "loss": 0.1457, "step": 22370 }, { "epoch": 0.3990118788570613, "grad_norm": 0.4169876277446747, "learning_rate": 3.757361676238329e-05, "loss": 0.1461, "step": 22371 }, { "epoch": 0.39902971497877504, "grad_norm": 0.2240995466709137, "learning_rate": 3.757227141954119e-05, "loss": 0.1418, "step": 22372 }, { "epoch": 0.39904755110048873, "grad_norm": 0.25536563992500305, "learning_rate": 3.7570926027964645e-05, "loss": 0.1612, "step": 22373 }, { "epoch": 0.3990653872222024, "grad_norm": 0.2658531665802002, "learning_rate": 3.756958058765884e-05, "loss": 0.1654, "step": 22374 }, { "epoch": 0.3990832233439161, "grad_norm": 0.2901322841644287, "learning_rate": 3.756823509862899e-05, "loss": 0.1316, "step": 22375 }, { "epoch": 0.3991010594656298, "grad_norm": 0.4107397496700287, "learning_rate": 3.7566889560880326e-05, "loss": 0.1926, "step": 22376 }, { "epoch": 0.3991188955873435, "grad_norm": 0.35893312096595764, "learning_rate": 3.756554397441805e-05, "loss": 0.152, "step": 22377 }, { "epoch": 0.39913673170905717, "grad_norm": 0.17993880808353424, "learning_rate": 3.756419833924739e-05, "loss": 0.1697, "step": 22378 }, { "epoch": 0.39915456783077086, "grad_norm": 0.3351760506629944, "learning_rate": 3.756285265537356e-05, "loss": 0.1804, "step": 22379 }, { "epoch": 0.39917240395248454, "grad_norm": 0.25972780585289, "learning_rate": 3.756150692280178e-05, "loss": 0.1709, "step": 22380 }, { "epoch": 0.3991902400741983, "grad_norm": 0.2831493318080902, "learning_rate": 3.7560161141537254e-05, "loss": 0.1665, "step": 22381 }, { "epoch": 0.399208076195912, "grad_norm": 0.24255450069904327, "learning_rate": 3.755881531158521e-05, "loss": 0.1601, "step": 22382 }, { "epoch": 0.39922591231762566, "grad_norm": 0.2736910283565521, "learning_rate": 3.755746943295085e-05, "loss": 0.1347, "step": 22383 }, { "epoch": 0.39924374843933935, "grad_norm": 0.29703789949417114, "learning_rate": 3.755612350563941e-05, "loss": 0.1865, "step": 22384 }, { "epoch": 0.39926158456105304, "grad_norm": 0.4246012568473816, "learning_rate": 3.755477752965609e-05, "loss": 0.1653, "step": 22385 }, { "epoch": 0.39927942068276673, "grad_norm": 0.26222506165504456, "learning_rate": 3.755343150500612e-05, "loss": 0.1294, "step": 22386 }, { "epoch": 0.3992972568044804, "grad_norm": 0.33491799235343933, "learning_rate": 3.755208543169472e-05, "loss": 0.1318, "step": 22387 }, { "epoch": 0.3993150929261941, "grad_norm": 0.21842913329601288, "learning_rate": 3.7550739309727104e-05, "loss": 0.14, "step": 22388 }, { "epoch": 0.39933292904790785, "grad_norm": 0.2426721751689911, "learning_rate": 3.754939313910848e-05, "loss": 0.1433, "step": 22389 }, { "epoch": 0.39935076516962154, "grad_norm": 0.23122094571590424, "learning_rate": 3.754804691984407e-05, "loss": 0.1739, "step": 22390 }, { "epoch": 0.3993686012913352, "grad_norm": 0.26716575026512146, "learning_rate": 3.7546700651939105e-05, "loss": 0.1494, "step": 22391 }, { "epoch": 0.3993864374130489, "grad_norm": 0.23491953313350677, "learning_rate": 3.7545354335398785e-05, "loss": 0.2122, "step": 22392 }, { "epoch": 0.3994042735347626, "grad_norm": 0.2516017258167267, "learning_rate": 3.7544007970228344e-05, "loss": 0.1327, "step": 22393 }, { "epoch": 0.3994221096564763, "grad_norm": 0.29688936471939087, "learning_rate": 3.754266155643299e-05, "loss": 0.1667, "step": 22394 }, { "epoch": 0.39943994577819, "grad_norm": 0.2807506024837494, "learning_rate": 3.754131509401796e-05, "loss": 0.1744, "step": 22395 }, { "epoch": 0.39945778189990366, "grad_norm": 0.30966708064079285, "learning_rate": 3.7539968582988446e-05, "loss": 0.1635, "step": 22396 }, { "epoch": 0.3994756180216174, "grad_norm": 0.2621917426586151, "learning_rate": 3.75386220233497e-05, "loss": 0.1816, "step": 22397 }, { "epoch": 0.3994934541433311, "grad_norm": 0.2556757628917694, "learning_rate": 3.753727541510691e-05, "loss": 0.1406, "step": 22398 }, { "epoch": 0.3995112902650448, "grad_norm": 0.3581705689430237, "learning_rate": 3.7535928758265315e-05, "loss": 0.2429, "step": 22399 }, { "epoch": 0.39952912638675847, "grad_norm": 0.24466313421726227, "learning_rate": 3.753458205283013e-05, "loss": 0.1612, "step": 22400 }, { "epoch": 0.39954696250847216, "grad_norm": 0.23017403483390808, "learning_rate": 3.753323529880658e-05, "loss": 0.1384, "step": 22401 }, { "epoch": 0.39956479863018585, "grad_norm": 0.2083551585674286, "learning_rate": 3.7531888496199876e-05, "loss": 0.1505, "step": 22402 }, { "epoch": 0.39958263475189953, "grad_norm": 0.2748313248157501, "learning_rate": 3.753054164501524e-05, "loss": 0.146, "step": 22403 }, { "epoch": 0.3996004708736132, "grad_norm": 0.2720942795276642, "learning_rate": 3.7529194745257903e-05, "loss": 0.1649, "step": 22404 }, { "epoch": 0.3996183069953269, "grad_norm": 0.28043875098228455, "learning_rate": 3.752784779693308e-05, "loss": 0.1312, "step": 22405 }, { "epoch": 0.39963614311704065, "grad_norm": 0.3267343044281006, "learning_rate": 3.752650080004599e-05, "loss": 0.202, "step": 22406 }, { "epoch": 0.39965397923875434, "grad_norm": 0.1945066750049591, "learning_rate": 3.7525153754601855e-05, "loss": 0.1299, "step": 22407 }, { "epoch": 0.39967181536046803, "grad_norm": 0.2010776400566101, "learning_rate": 3.75238066606059e-05, "loss": 0.1421, "step": 22408 }, { "epoch": 0.3996896514821817, "grad_norm": 0.275698184967041, "learning_rate": 3.752245951806335e-05, "loss": 0.1972, "step": 22409 }, { "epoch": 0.3997074876038954, "grad_norm": 0.18570183217525482, "learning_rate": 3.752111232697941e-05, "loss": 0.1568, "step": 22410 }, { "epoch": 0.3997253237256091, "grad_norm": 0.24819031357765198, "learning_rate": 3.751976508735932e-05, "loss": 0.1236, "step": 22411 }, { "epoch": 0.3997431598473228, "grad_norm": 0.28810280561447144, "learning_rate": 3.7518417799208305e-05, "loss": 0.1431, "step": 22412 }, { "epoch": 0.39976099596903647, "grad_norm": 0.26540902256965637, "learning_rate": 3.751707046253157e-05, "loss": 0.1358, "step": 22413 }, { "epoch": 0.3997788320907502, "grad_norm": 0.21171307563781738, "learning_rate": 3.751572307733434e-05, "loss": 0.0911, "step": 22414 }, { "epoch": 0.3997966682124639, "grad_norm": 0.2565472722053528, "learning_rate": 3.751437564362186e-05, "loss": 0.1399, "step": 22415 }, { "epoch": 0.3998145043341776, "grad_norm": 0.34243470430374146, "learning_rate": 3.7513028161399324e-05, "loss": 0.1419, "step": 22416 }, { "epoch": 0.3998323404558913, "grad_norm": 0.2572193741798401, "learning_rate": 3.7511680630671975e-05, "loss": 0.1157, "step": 22417 }, { "epoch": 0.39985017657760497, "grad_norm": 0.2543811798095703, "learning_rate": 3.751033305144503e-05, "loss": 0.1481, "step": 22418 }, { "epoch": 0.39986801269931865, "grad_norm": 0.3897438645362854, "learning_rate": 3.750898542372372e-05, "loss": 0.211, "step": 22419 }, { "epoch": 0.39988584882103234, "grad_norm": 0.19551517069339752, "learning_rate": 3.7507637747513245e-05, "loss": 0.1355, "step": 22420 }, { "epoch": 0.39990368494274603, "grad_norm": 0.264609158039093, "learning_rate": 3.750629002281886e-05, "loss": 0.1883, "step": 22421 }, { "epoch": 0.3999215210644597, "grad_norm": 0.2650822699069977, "learning_rate": 3.750494224964577e-05, "loss": 0.1666, "step": 22422 }, { "epoch": 0.39993935718617346, "grad_norm": 0.2808986008167267, "learning_rate": 3.7503594427999204e-05, "loss": 0.1012, "step": 22423 }, { "epoch": 0.39995719330788715, "grad_norm": 0.26200565695762634, "learning_rate": 3.7502246557884394e-05, "loss": 0.142, "step": 22424 }, { "epoch": 0.39997502942960084, "grad_norm": 0.2342466562986374, "learning_rate": 3.750089863930655e-05, "loss": 0.1828, "step": 22425 }, { "epoch": 0.3999928655513145, "grad_norm": 0.24660594761371613, "learning_rate": 3.749955067227092e-05, "loss": 0.159, "step": 22426 }, { "epoch": 0.4000107016730282, "grad_norm": 0.28356727957725525, "learning_rate": 3.749820265678269e-05, "loss": 0.1524, "step": 22427 }, { "epoch": 0.4000285377947419, "grad_norm": 0.21599942445755005, "learning_rate": 3.749685459284713e-05, "loss": 0.1481, "step": 22428 }, { "epoch": 0.4000463739164556, "grad_norm": 0.2328146994113922, "learning_rate": 3.7495506480469434e-05, "loss": 0.1099, "step": 22429 }, { "epoch": 0.4000642100381693, "grad_norm": 0.25440514087677, "learning_rate": 3.749415831965485e-05, "loss": 0.1719, "step": 22430 }, { "epoch": 0.400082046159883, "grad_norm": 0.27376434206962585, "learning_rate": 3.7492810110408585e-05, "loss": 0.173, "step": 22431 }, { "epoch": 0.4000998822815967, "grad_norm": 0.2598424255847931, "learning_rate": 3.7491461852735876e-05, "loss": 0.1774, "step": 22432 }, { "epoch": 0.4001177184033104, "grad_norm": 0.25894680619239807, "learning_rate": 3.749011354664194e-05, "loss": 0.1671, "step": 22433 }, { "epoch": 0.4001355545250241, "grad_norm": 0.343810111284256, "learning_rate": 3.748876519213201e-05, "loss": 0.2451, "step": 22434 }, { "epoch": 0.40015339064673777, "grad_norm": 0.24313119053840637, "learning_rate": 3.748741678921132e-05, "loss": 0.1408, "step": 22435 }, { "epoch": 0.40017122676845146, "grad_norm": 0.25919029116630554, "learning_rate": 3.7486068337885094e-05, "loss": 0.1193, "step": 22436 }, { "epoch": 0.40018906289016515, "grad_norm": 0.27186962962150574, "learning_rate": 3.7484719838158545e-05, "loss": 0.1393, "step": 22437 }, { "epoch": 0.40020689901187884, "grad_norm": 0.2500014007091522, "learning_rate": 3.748337129003692e-05, "loss": 0.1567, "step": 22438 }, { "epoch": 0.4002247351335925, "grad_norm": 0.27956870198249817, "learning_rate": 3.748202269352543e-05, "loss": 0.1355, "step": 22439 }, { "epoch": 0.40024257125530627, "grad_norm": 0.2722923755645752, "learning_rate": 3.7480674048629304e-05, "loss": 0.1397, "step": 22440 }, { "epoch": 0.40026040737701996, "grad_norm": 0.2864592373371124, "learning_rate": 3.747932535535378e-05, "loss": 0.1884, "step": 22441 }, { "epoch": 0.40027824349873364, "grad_norm": 0.23527361452579498, "learning_rate": 3.747797661370407e-05, "loss": 0.1418, "step": 22442 }, { "epoch": 0.40029607962044733, "grad_norm": 0.2685128450393677, "learning_rate": 3.7476627823685426e-05, "loss": 0.1688, "step": 22443 }, { "epoch": 0.400313915742161, "grad_norm": 0.300820916891098, "learning_rate": 3.7475278985303056e-05, "loss": 0.1574, "step": 22444 }, { "epoch": 0.4003317518638747, "grad_norm": 0.23595267534255981, "learning_rate": 3.7473930098562206e-05, "loss": 0.1591, "step": 22445 }, { "epoch": 0.4003495879855884, "grad_norm": 0.295297771692276, "learning_rate": 3.747258116346809e-05, "loss": 0.2227, "step": 22446 }, { "epoch": 0.4003674241073021, "grad_norm": 0.4549243748188019, "learning_rate": 3.747123218002594e-05, "loss": 0.1571, "step": 22447 }, { "epoch": 0.4003852602290158, "grad_norm": 0.26432284712791443, "learning_rate": 3.746988314824098e-05, "loss": 0.1795, "step": 22448 }, { "epoch": 0.4004030963507295, "grad_norm": 0.26903557777404785, "learning_rate": 3.746853406811845e-05, "loss": 0.1442, "step": 22449 }, { "epoch": 0.4004209324724432, "grad_norm": 0.30301082134246826, "learning_rate": 3.746718493966358e-05, "loss": 0.1428, "step": 22450 }, { "epoch": 0.4004387685941569, "grad_norm": 0.29643484950065613, "learning_rate": 3.74658357628816e-05, "loss": 0.1769, "step": 22451 }, { "epoch": 0.4004566047158706, "grad_norm": 0.2608742117881775, "learning_rate": 3.7464486537777724e-05, "loss": 0.1171, "step": 22452 }, { "epoch": 0.40047444083758427, "grad_norm": 0.24533872306346893, "learning_rate": 3.746313726435719e-05, "loss": 0.1313, "step": 22453 }, { "epoch": 0.40049227695929795, "grad_norm": 0.21685953438282013, "learning_rate": 3.746178794262524e-05, "loss": 0.1431, "step": 22454 }, { "epoch": 0.40051011308101164, "grad_norm": 0.5362133979797363, "learning_rate": 3.746043857258709e-05, "loss": 0.1813, "step": 22455 }, { "epoch": 0.4005279492027254, "grad_norm": 0.23753641545772552, "learning_rate": 3.7459089154247985e-05, "loss": 0.1307, "step": 22456 }, { "epoch": 0.4005457853244391, "grad_norm": 0.34868675470352173, "learning_rate": 3.745773968761314e-05, "loss": 0.1958, "step": 22457 }, { "epoch": 0.40056362144615276, "grad_norm": 0.2925041615962982, "learning_rate": 3.74563901726878e-05, "loss": 0.1693, "step": 22458 }, { "epoch": 0.40058145756786645, "grad_norm": 0.206575408577919, "learning_rate": 3.745504060947718e-05, "loss": 0.1662, "step": 22459 }, { "epoch": 0.40059929368958014, "grad_norm": 0.26016613841056824, "learning_rate": 3.7453690997986534e-05, "loss": 0.1226, "step": 22460 }, { "epoch": 0.4006171298112938, "grad_norm": 0.2690242826938629, "learning_rate": 3.745234133822107e-05, "loss": 0.1679, "step": 22461 }, { "epoch": 0.4006349659330075, "grad_norm": 0.3547515869140625, "learning_rate": 3.745099163018603e-05, "loss": 0.1546, "step": 22462 }, { "epoch": 0.4006528020547212, "grad_norm": 0.19431684911251068, "learning_rate": 3.744964187388664e-05, "loss": 0.1331, "step": 22463 }, { "epoch": 0.4006706381764349, "grad_norm": 0.2520959973335266, "learning_rate": 3.744829206932815e-05, "loss": 0.1287, "step": 22464 }, { "epoch": 0.40068847429814863, "grad_norm": 0.22487646341323853, "learning_rate": 3.744694221651578e-05, "loss": 0.1466, "step": 22465 }, { "epoch": 0.4007063104198623, "grad_norm": 0.24782566726207733, "learning_rate": 3.7445592315454766e-05, "loss": 0.1773, "step": 22466 }, { "epoch": 0.400724146541576, "grad_norm": 0.2625008225440979, "learning_rate": 3.744424236615033e-05, "loss": 0.1482, "step": 22467 }, { "epoch": 0.4007419826632897, "grad_norm": 0.26926976442337036, "learning_rate": 3.744289236860771e-05, "loss": 0.1434, "step": 22468 }, { "epoch": 0.4007598187850034, "grad_norm": 0.2567867040634155, "learning_rate": 3.7441542322832146e-05, "loss": 0.1418, "step": 22469 }, { "epoch": 0.4007776549067171, "grad_norm": 0.3140268921852112, "learning_rate": 3.7440192228828864e-05, "loss": 0.1816, "step": 22470 }, { "epoch": 0.40079549102843076, "grad_norm": 0.20431353151798248, "learning_rate": 3.74388420866031e-05, "loss": 0.1524, "step": 22471 }, { "epoch": 0.40081332715014445, "grad_norm": 0.22864453494548798, "learning_rate": 3.743749189616009e-05, "loss": 0.1151, "step": 22472 }, { "epoch": 0.4008311632718582, "grad_norm": 0.2789619565010071, "learning_rate": 3.7436141657505074e-05, "loss": 0.1537, "step": 22473 }, { "epoch": 0.4008489993935719, "grad_norm": 0.2794075012207031, "learning_rate": 3.7434791370643266e-05, "loss": 0.1838, "step": 22474 }, { "epoch": 0.40086683551528557, "grad_norm": 0.3180589973926544, "learning_rate": 3.743344103557992e-05, "loss": 0.1656, "step": 22475 }, { "epoch": 0.40088467163699926, "grad_norm": 0.24583542346954346, "learning_rate": 3.743209065232025e-05, "loss": 0.1523, "step": 22476 }, { "epoch": 0.40090250775871294, "grad_norm": 0.3447679877281189, "learning_rate": 3.743074022086951e-05, "loss": 0.1374, "step": 22477 }, { "epoch": 0.40092034388042663, "grad_norm": 0.38320985436439514, "learning_rate": 3.742938974123293e-05, "loss": 0.2163, "step": 22478 }, { "epoch": 0.4009381800021403, "grad_norm": 0.30444759130477905, "learning_rate": 3.742803921341574e-05, "loss": 0.1371, "step": 22479 }, { "epoch": 0.400956016123854, "grad_norm": 0.21661725640296936, "learning_rate": 3.742668863742318e-05, "loss": 0.116, "step": 22480 }, { "epoch": 0.4009738522455677, "grad_norm": 0.24259746074676514, "learning_rate": 3.742533801326048e-05, "loss": 0.1726, "step": 22481 }, { "epoch": 0.40099168836728144, "grad_norm": 0.3091650605201721, "learning_rate": 3.742398734093287e-05, "loss": 0.1417, "step": 22482 }, { "epoch": 0.40100952448899513, "grad_norm": 0.2781609892845154, "learning_rate": 3.7422636620445605e-05, "loss": 0.1696, "step": 22483 }, { "epoch": 0.4010273606107088, "grad_norm": 0.3832854628562927, "learning_rate": 3.742128585180391e-05, "loss": 0.109, "step": 22484 }, { "epoch": 0.4010451967324225, "grad_norm": 0.23148325085639954, "learning_rate": 3.741993503501302e-05, "loss": 0.1636, "step": 22485 }, { "epoch": 0.4010630328541362, "grad_norm": 0.28823745250701904, "learning_rate": 3.7418584170078176e-05, "loss": 0.1409, "step": 22486 }, { "epoch": 0.4010808689758499, "grad_norm": 0.29489466547966003, "learning_rate": 3.74172332570046e-05, "loss": 0.1864, "step": 22487 }, { "epoch": 0.40109870509756357, "grad_norm": 0.28868696093559265, "learning_rate": 3.7415882295797545e-05, "loss": 0.1668, "step": 22488 }, { "epoch": 0.40111654121927726, "grad_norm": 0.25830361247062683, "learning_rate": 3.741453128646224e-05, "loss": 0.2186, "step": 22489 }, { "epoch": 0.401134377340991, "grad_norm": 0.374521404504776, "learning_rate": 3.741318022900392e-05, "loss": 0.118, "step": 22490 }, { "epoch": 0.4011522134627047, "grad_norm": 0.3246745467185974, "learning_rate": 3.741182912342783e-05, "loss": 0.1641, "step": 22491 }, { "epoch": 0.4011700495844184, "grad_norm": 0.23623211681842804, "learning_rate": 3.741047796973921e-05, "loss": 0.1634, "step": 22492 }, { "epoch": 0.40118788570613206, "grad_norm": 0.2502305507659912, "learning_rate": 3.7409126767943284e-05, "loss": 0.1851, "step": 22493 }, { "epoch": 0.40120572182784575, "grad_norm": 0.30829817056655884, "learning_rate": 3.74077755180453e-05, "loss": 0.182, "step": 22494 }, { "epoch": 0.40122355794955944, "grad_norm": 0.22984784841537476, "learning_rate": 3.7406424220050487e-05, "loss": 0.12, "step": 22495 }, { "epoch": 0.4012413940712731, "grad_norm": 0.23265908658504486, "learning_rate": 3.74050728739641e-05, "loss": 0.1443, "step": 22496 }, { "epoch": 0.4012592301929868, "grad_norm": 0.23170119524002075, "learning_rate": 3.740372147979136e-05, "loss": 0.1551, "step": 22497 }, { "epoch": 0.40127706631470056, "grad_norm": 0.26299262046813965, "learning_rate": 3.7402370037537496e-05, "loss": 0.1849, "step": 22498 }, { "epoch": 0.40129490243641425, "grad_norm": 0.24848175048828125, "learning_rate": 3.7401018547207786e-05, "loss": 0.1973, "step": 22499 }, { "epoch": 0.40131273855812793, "grad_norm": 0.2625092566013336, "learning_rate": 3.7399667008807425e-05, "loss": 0.1472, "step": 22500 }, { "epoch": 0.4013305746798416, "grad_norm": 0.2969052195549011, "learning_rate": 3.739831542234169e-05, "loss": 0.1521, "step": 22501 }, { "epoch": 0.4013484108015553, "grad_norm": 0.23838455975055695, "learning_rate": 3.739696378781579e-05, "loss": 0.1396, "step": 22502 }, { "epoch": 0.401366246923269, "grad_norm": 0.23480050265789032, "learning_rate": 3.7395612105234985e-05, "loss": 0.1125, "step": 22503 }, { "epoch": 0.4013840830449827, "grad_norm": 0.2930973768234253, "learning_rate": 3.7394260374604494e-05, "loss": 0.1123, "step": 22504 }, { "epoch": 0.4014019191666964, "grad_norm": 0.2859051823616028, "learning_rate": 3.739290859592958e-05, "loss": 0.1428, "step": 22505 }, { "epoch": 0.40141975528841006, "grad_norm": 0.2250789999961853, "learning_rate": 3.739155676921547e-05, "loss": 0.1484, "step": 22506 }, { "epoch": 0.4014375914101238, "grad_norm": 0.25904330611228943, "learning_rate": 3.73902048944674e-05, "loss": 0.1769, "step": 22507 }, { "epoch": 0.4014554275318375, "grad_norm": 0.21639803051948547, "learning_rate": 3.738885297169062e-05, "loss": 0.153, "step": 22508 }, { "epoch": 0.4014732636535512, "grad_norm": 0.3896249532699585, "learning_rate": 3.738750100089037e-05, "loss": 0.1749, "step": 22509 }, { "epoch": 0.40149109977526487, "grad_norm": 0.23915569484233856, "learning_rate": 3.738614898207188e-05, "loss": 0.1728, "step": 22510 }, { "epoch": 0.40150893589697856, "grad_norm": 0.27284306287765503, "learning_rate": 3.738479691524041e-05, "loss": 0.2205, "step": 22511 }, { "epoch": 0.40152677201869225, "grad_norm": 0.22742249071598053, "learning_rate": 3.738344480040118e-05, "loss": 0.1464, "step": 22512 }, { "epoch": 0.40154460814040593, "grad_norm": 0.28184935450553894, "learning_rate": 3.7382092637559443e-05, "loss": 0.0968, "step": 22513 }, { "epoch": 0.4015624442621196, "grad_norm": 0.3212919235229492, "learning_rate": 3.738074042672044e-05, "loss": 0.2036, "step": 22514 }, { "epoch": 0.40158028038383337, "grad_norm": 0.3239867091178894, "learning_rate": 3.7379388167889415e-05, "loss": 0.1872, "step": 22515 }, { "epoch": 0.40159811650554705, "grad_norm": 0.280558705329895, "learning_rate": 3.7378035861071606e-05, "loss": 0.1648, "step": 22516 }, { "epoch": 0.40161595262726074, "grad_norm": 0.3539159595966339, "learning_rate": 3.737668350627225e-05, "loss": 0.1791, "step": 22517 }, { "epoch": 0.40163378874897443, "grad_norm": 0.3493812382221222, "learning_rate": 3.737533110349658e-05, "loss": 0.1909, "step": 22518 }, { "epoch": 0.4016516248706881, "grad_norm": 0.2634727358818054, "learning_rate": 3.737397865274987e-05, "loss": 0.1573, "step": 22519 }, { "epoch": 0.4016694609924018, "grad_norm": 0.23298679292201996, "learning_rate": 3.7372626154037346e-05, "loss": 0.1116, "step": 22520 }, { "epoch": 0.4016872971141155, "grad_norm": 0.2536587417125702, "learning_rate": 3.737127360736424e-05, "loss": 0.1411, "step": 22521 }, { "epoch": 0.4017051332358292, "grad_norm": 0.30334731936454773, "learning_rate": 3.736992101273581e-05, "loss": 0.1472, "step": 22522 }, { "epoch": 0.40172296935754287, "grad_norm": 0.44926324486732483, "learning_rate": 3.73685683701573e-05, "loss": 0.1944, "step": 22523 }, { "epoch": 0.4017408054792566, "grad_norm": 0.3534368872642517, "learning_rate": 3.736721567963394e-05, "loss": 0.1675, "step": 22524 }, { "epoch": 0.4017586416009703, "grad_norm": 0.2229832410812378, "learning_rate": 3.736586294117097e-05, "loss": 0.1322, "step": 22525 }, { "epoch": 0.401776477722684, "grad_norm": 0.3615153133869171, "learning_rate": 3.736451015477366e-05, "loss": 0.1822, "step": 22526 }, { "epoch": 0.4017943138443977, "grad_norm": 0.3857594430446625, "learning_rate": 3.736315732044724e-05, "loss": 0.2246, "step": 22527 }, { "epoch": 0.40181214996611136, "grad_norm": 0.18655601143836975, "learning_rate": 3.736180443819694e-05, "loss": 0.1073, "step": 22528 }, { "epoch": 0.40182998608782505, "grad_norm": 0.21997660398483276, "learning_rate": 3.736045150802803e-05, "loss": 0.1579, "step": 22529 }, { "epoch": 0.40184782220953874, "grad_norm": 0.3747881352901459, "learning_rate": 3.7359098529945724e-05, "loss": 0.1611, "step": 22530 }, { "epoch": 0.40186565833125243, "grad_norm": 0.28655287623405457, "learning_rate": 3.735774550395529e-05, "loss": 0.1782, "step": 22531 }, { "epoch": 0.4018834944529662, "grad_norm": 0.2734461724758148, "learning_rate": 3.735639243006197e-05, "loss": 0.139, "step": 22532 }, { "epoch": 0.40190133057467986, "grad_norm": 0.2696603536605835, "learning_rate": 3.7355039308271e-05, "loss": 0.1562, "step": 22533 }, { "epoch": 0.40191916669639355, "grad_norm": 0.2373989075422287, "learning_rate": 3.735368613858764e-05, "loss": 0.1511, "step": 22534 }, { "epoch": 0.40193700281810724, "grad_norm": 0.28286632895469666, "learning_rate": 3.735233292101712e-05, "loss": 0.1372, "step": 22535 }, { "epoch": 0.4019548389398209, "grad_norm": 0.35591399669647217, "learning_rate": 3.735097965556469e-05, "loss": 0.193, "step": 22536 }, { "epoch": 0.4019726750615346, "grad_norm": 0.21864116191864014, "learning_rate": 3.7349626342235595e-05, "loss": 0.1847, "step": 22537 }, { "epoch": 0.4019905111832483, "grad_norm": 0.30557364225387573, "learning_rate": 3.7348272981035084e-05, "loss": 0.2072, "step": 22538 }, { "epoch": 0.402008347304962, "grad_norm": 0.22314032912254333, "learning_rate": 3.7346919571968395e-05, "loss": 0.1918, "step": 22539 }, { "epoch": 0.4020261834266757, "grad_norm": 0.3513238728046417, "learning_rate": 3.73455661150408e-05, "loss": 0.1352, "step": 22540 }, { "epoch": 0.4020440195483894, "grad_norm": 0.32878682017326355, "learning_rate": 3.734421261025751e-05, "loss": 0.1154, "step": 22541 }, { "epoch": 0.4020618556701031, "grad_norm": 0.22667662799358368, "learning_rate": 3.73428590576238e-05, "loss": 0.1244, "step": 22542 }, { "epoch": 0.4020796917918168, "grad_norm": 0.2991669774055481, "learning_rate": 3.7341505457144896e-05, "loss": 0.1466, "step": 22543 }, { "epoch": 0.4020975279135305, "grad_norm": 0.2183166742324829, "learning_rate": 3.734015180882606e-05, "loss": 0.1495, "step": 22544 }, { "epoch": 0.40211536403524417, "grad_norm": 0.21264046430587769, "learning_rate": 3.7338798112672536e-05, "loss": 0.1551, "step": 22545 }, { "epoch": 0.40213320015695786, "grad_norm": 0.23337967693805695, "learning_rate": 3.7337444368689555e-05, "loss": 0.1816, "step": 22546 }, { "epoch": 0.40215103627867155, "grad_norm": 0.2925909757614136, "learning_rate": 3.7336090576882396e-05, "loss": 0.1779, "step": 22547 }, { "epoch": 0.40216887240038524, "grad_norm": 0.33091145753860474, "learning_rate": 3.7334736737256277e-05, "loss": 0.149, "step": 22548 }, { "epoch": 0.402186708522099, "grad_norm": 0.3367851972579956, "learning_rate": 3.733338284981647e-05, "loss": 0.1423, "step": 22549 }, { "epoch": 0.40220454464381267, "grad_norm": 0.32828691601753235, "learning_rate": 3.7332028914568196e-05, "loss": 0.1703, "step": 22550 }, { "epoch": 0.40222238076552635, "grad_norm": 0.23279611766338348, "learning_rate": 3.733067493151673e-05, "loss": 0.1536, "step": 22551 }, { "epoch": 0.40224021688724004, "grad_norm": 0.22553859651088715, "learning_rate": 3.73293209006673e-05, "loss": 0.1781, "step": 22552 }, { "epoch": 0.40225805300895373, "grad_norm": 0.2534477412700653, "learning_rate": 3.7327966822025166e-05, "loss": 0.1919, "step": 22553 }, { "epoch": 0.4022758891306674, "grad_norm": 0.22570395469665527, "learning_rate": 3.7326612695595574e-05, "loss": 0.0963, "step": 22554 }, { "epoch": 0.4022937252523811, "grad_norm": 0.21601274609565735, "learning_rate": 3.7325258521383784e-05, "loss": 0.1699, "step": 22555 }, { "epoch": 0.4023115613740948, "grad_norm": 0.1645299345254898, "learning_rate": 3.732390429939503e-05, "loss": 0.1288, "step": 22556 }, { "epoch": 0.40232939749580854, "grad_norm": 0.22468598186969757, "learning_rate": 3.732255002963456e-05, "loss": 0.1558, "step": 22557 }, { "epoch": 0.4023472336175222, "grad_norm": 0.22805725038051605, "learning_rate": 3.7321195712107643e-05, "loss": 0.1496, "step": 22558 }, { "epoch": 0.4023650697392359, "grad_norm": 0.376115620136261, "learning_rate": 3.73198413468195e-05, "loss": 0.2402, "step": 22559 }, { "epoch": 0.4023829058609496, "grad_norm": 0.26651424169540405, "learning_rate": 3.73184869337754e-05, "loss": 0.1837, "step": 22560 }, { "epoch": 0.4024007419826633, "grad_norm": 0.3118203282356262, "learning_rate": 3.73171324729806e-05, "loss": 0.1609, "step": 22561 }, { "epoch": 0.402418578104377, "grad_norm": 0.33430716395378113, "learning_rate": 3.7315777964440336e-05, "loss": 0.1453, "step": 22562 }, { "epoch": 0.40243641422609067, "grad_norm": 0.2300124615430832, "learning_rate": 3.731442340815986e-05, "loss": 0.1758, "step": 22563 }, { "epoch": 0.40245425034780435, "grad_norm": 0.235808864235878, "learning_rate": 3.731306880414442e-05, "loss": 0.1344, "step": 22564 }, { "epoch": 0.40247208646951804, "grad_norm": 0.3269568979740143, "learning_rate": 3.731171415239929e-05, "loss": 0.1482, "step": 22565 }, { "epoch": 0.4024899225912318, "grad_norm": 0.2756299078464508, "learning_rate": 3.731035945292969e-05, "loss": 0.1876, "step": 22566 }, { "epoch": 0.4025077587129455, "grad_norm": 0.26264750957489014, "learning_rate": 3.730900470574088e-05, "loss": 0.2085, "step": 22567 }, { "epoch": 0.40252559483465916, "grad_norm": 0.2224518060684204, "learning_rate": 3.7307649910838126e-05, "loss": 0.1287, "step": 22568 }, { "epoch": 0.40254343095637285, "grad_norm": 0.2809959650039673, "learning_rate": 3.730629506822667e-05, "loss": 0.1602, "step": 22569 }, { "epoch": 0.40256126707808654, "grad_norm": 0.24181436002254486, "learning_rate": 3.730494017791176e-05, "loss": 0.1537, "step": 22570 }, { "epoch": 0.4025791031998002, "grad_norm": 0.21231873333454132, "learning_rate": 3.730358523989866e-05, "loss": 0.1964, "step": 22571 }, { "epoch": 0.4025969393215139, "grad_norm": 0.21915362775325775, "learning_rate": 3.730223025419261e-05, "loss": 0.1508, "step": 22572 }, { "epoch": 0.4026147754432276, "grad_norm": 0.3757163882255554, "learning_rate": 3.7300875220798855e-05, "loss": 0.1826, "step": 22573 }, { "epoch": 0.40263261156494135, "grad_norm": 0.254212349653244, "learning_rate": 3.729952013972267e-05, "loss": 0.1804, "step": 22574 }, { "epoch": 0.40265044768665503, "grad_norm": 0.3039003908634186, "learning_rate": 3.72981650109693e-05, "loss": 0.1597, "step": 22575 }, { "epoch": 0.4026682838083687, "grad_norm": 0.22406205534934998, "learning_rate": 3.729680983454399e-05, "loss": 0.1297, "step": 22576 }, { "epoch": 0.4026861199300824, "grad_norm": 0.2974556088447571, "learning_rate": 3.7295454610452e-05, "loss": 0.184, "step": 22577 }, { "epoch": 0.4027039560517961, "grad_norm": 0.25405627489089966, "learning_rate": 3.7294099338698575e-05, "loss": 0.1275, "step": 22578 }, { "epoch": 0.4027217921735098, "grad_norm": 0.27596405148506165, "learning_rate": 3.729274401928898e-05, "loss": 0.1329, "step": 22579 }, { "epoch": 0.4027396282952235, "grad_norm": 0.508490264415741, "learning_rate": 3.729138865222846e-05, "loss": 0.1714, "step": 22580 }, { "epoch": 0.40275746441693716, "grad_norm": 0.28058138489723206, "learning_rate": 3.7290033237522276e-05, "loss": 0.1695, "step": 22581 }, { "epoch": 0.40277530053865085, "grad_norm": 0.21250146627426147, "learning_rate": 3.728867777517567e-05, "loss": 0.1697, "step": 22582 }, { "epoch": 0.4027931366603646, "grad_norm": 0.20889438688755035, "learning_rate": 3.728732226519392e-05, "loss": 0.1658, "step": 22583 }, { "epoch": 0.4028109727820783, "grad_norm": 0.2138649970293045, "learning_rate": 3.7285966707582256e-05, "loss": 0.154, "step": 22584 }, { "epoch": 0.40282880890379197, "grad_norm": 0.23668667674064636, "learning_rate": 3.728461110234594e-05, "loss": 0.1757, "step": 22585 }, { "epoch": 0.40284664502550566, "grad_norm": 0.2773319184780121, "learning_rate": 3.728325544949024e-05, "loss": 0.1433, "step": 22586 }, { "epoch": 0.40286448114721934, "grad_norm": 0.22297628223896027, "learning_rate": 3.728189974902038e-05, "loss": 0.186, "step": 22587 }, { "epoch": 0.40288231726893303, "grad_norm": 0.22563689947128296, "learning_rate": 3.728054400094165e-05, "loss": 0.1151, "step": 22588 }, { "epoch": 0.4029001533906467, "grad_norm": 0.25474026799201965, "learning_rate": 3.727918820525928e-05, "loss": 0.1793, "step": 22589 }, { "epoch": 0.4029179895123604, "grad_norm": 0.412511944770813, "learning_rate": 3.7277832361978546e-05, "loss": 0.1667, "step": 22590 }, { "epoch": 0.40293582563407415, "grad_norm": 0.20552769303321838, "learning_rate": 3.7276476471104684e-05, "loss": 0.107, "step": 22591 }, { "epoch": 0.40295366175578784, "grad_norm": 0.22179926931858063, "learning_rate": 3.727512053264297e-05, "loss": 0.1073, "step": 22592 }, { "epoch": 0.4029714978775015, "grad_norm": 0.3287063539028168, "learning_rate": 3.727376454659863e-05, "loss": 0.1518, "step": 22593 }, { "epoch": 0.4029893339992152, "grad_norm": 0.2026943862438202, "learning_rate": 3.727240851297695e-05, "loss": 0.1408, "step": 22594 }, { "epoch": 0.4030071701209289, "grad_norm": 0.28636741638183594, "learning_rate": 3.7271052431783175e-05, "loss": 0.1427, "step": 22595 }, { "epoch": 0.4030250062426426, "grad_norm": 0.24123352766036987, "learning_rate": 3.726969630302257e-05, "loss": 0.1148, "step": 22596 }, { "epoch": 0.4030428423643563, "grad_norm": 0.24426031112670898, "learning_rate": 3.726834012670038e-05, "loss": 0.1018, "step": 22597 }, { "epoch": 0.40306067848606997, "grad_norm": 0.27445563673973083, "learning_rate": 3.726698390282186e-05, "loss": 0.1468, "step": 22598 }, { "epoch": 0.4030785146077837, "grad_norm": 0.23138689994812012, "learning_rate": 3.726562763139229e-05, "loss": 0.1022, "step": 22599 }, { "epoch": 0.4030963507294974, "grad_norm": 0.2385842204093933, "learning_rate": 3.726427131241689e-05, "loss": 0.163, "step": 22600 }, { "epoch": 0.4031141868512111, "grad_norm": 0.24416375160217285, "learning_rate": 3.726291494590095e-05, "loss": 0.1906, "step": 22601 }, { "epoch": 0.4031320229729248, "grad_norm": 0.33560794591903687, "learning_rate": 3.7261558531849705e-05, "loss": 0.161, "step": 22602 }, { "epoch": 0.40314985909463846, "grad_norm": 0.2666366398334503, "learning_rate": 3.7260202070268435e-05, "loss": 0.1432, "step": 22603 }, { "epoch": 0.40316769521635215, "grad_norm": 0.24450832605361938, "learning_rate": 3.725884556116238e-05, "loss": 0.1532, "step": 22604 }, { "epoch": 0.40318553133806584, "grad_norm": 0.23879143595695496, "learning_rate": 3.7257489004536814e-05, "loss": 0.1187, "step": 22605 }, { "epoch": 0.4032033674597795, "grad_norm": 0.3145284056663513, "learning_rate": 3.7256132400396985e-05, "loss": 0.2259, "step": 22606 }, { "epoch": 0.4032212035814932, "grad_norm": 0.25536713004112244, "learning_rate": 3.7254775748748156e-05, "loss": 0.0876, "step": 22607 }, { "epoch": 0.40323903970320696, "grad_norm": 0.3537450432777405, "learning_rate": 3.725341904959557e-05, "loss": 0.19, "step": 22608 }, { "epoch": 0.40325687582492065, "grad_norm": 0.261639803647995, "learning_rate": 3.7252062302944516e-05, "loss": 0.15, "step": 22609 }, { "epoch": 0.40327471194663433, "grad_norm": 0.36763739585876465, "learning_rate": 3.725070550880023e-05, "loss": 0.1491, "step": 22610 }, { "epoch": 0.403292548068348, "grad_norm": 0.2335500717163086, "learning_rate": 3.724934866716798e-05, "loss": 0.1406, "step": 22611 }, { "epoch": 0.4033103841900617, "grad_norm": 0.28220289945602417, "learning_rate": 3.7247991778053025e-05, "loss": 0.1744, "step": 22612 }, { "epoch": 0.4033282203117754, "grad_norm": 0.2794845700263977, "learning_rate": 3.724663484146061e-05, "loss": 0.1517, "step": 22613 }, { "epoch": 0.4033460564334891, "grad_norm": 0.20992477238178253, "learning_rate": 3.724527785739603e-05, "loss": 0.1519, "step": 22614 }, { "epoch": 0.4033638925552028, "grad_norm": 0.30859726667404175, "learning_rate": 3.724392082586451e-05, "loss": 0.1206, "step": 22615 }, { "epoch": 0.4033817286769165, "grad_norm": 0.24820156395435333, "learning_rate": 3.724256374687133e-05, "loss": 0.1687, "step": 22616 }, { "epoch": 0.4033995647986302, "grad_norm": 0.26261571049690247, "learning_rate": 3.724120662042174e-05, "loss": 0.1665, "step": 22617 }, { "epoch": 0.4034174009203439, "grad_norm": 0.22059425711631775, "learning_rate": 3.7239849446521004e-05, "loss": 0.1407, "step": 22618 }, { "epoch": 0.4034352370420576, "grad_norm": 0.3970620632171631, "learning_rate": 3.723849222517438e-05, "loss": 0.2214, "step": 22619 }, { "epoch": 0.40345307316377127, "grad_norm": 0.21623550355434418, "learning_rate": 3.7237134956387146e-05, "loss": 0.16, "step": 22620 }, { "epoch": 0.40347090928548496, "grad_norm": 0.2906252145767212, "learning_rate": 3.723577764016454e-05, "loss": 0.1559, "step": 22621 }, { "epoch": 0.40348874540719865, "grad_norm": 0.22615738213062286, "learning_rate": 3.723442027651184e-05, "loss": 0.1496, "step": 22622 }, { "epoch": 0.40350658152891233, "grad_norm": 0.2039412260055542, "learning_rate": 3.72330628654343e-05, "loss": 0.1219, "step": 22623 }, { "epoch": 0.403524417650626, "grad_norm": 0.2280758023262024, "learning_rate": 3.723170540693718e-05, "loss": 0.1588, "step": 22624 }, { "epoch": 0.40354225377233977, "grad_norm": 0.2778186798095703, "learning_rate": 3.723034790102575e-05, "loss": 0.2097, "step": 22625 }, { "epoch": 0.40356008989405345, "grad_norm": 0.17122596502304077, "learning_rate": 3.722899034770526e-05, "loss": 0.1232, "step": 22626 }, { "epoch": 0.40357792601576714, "grad_norm": 0.21911244094371796, "learning_rate": 3.722763274698099e-05, "loss": 0.1322, "step": 22627 }, { "epoch": 0.40359576213748083, "grad_norm": 0.20474018156528473, "learning_rate": 3.722627509885819e-05, "loss": 0.1141, "step": 22628 }, { "epoch": 0.4036135982591945, "grad_norm": 0.275081604719162, "learning_rate": 3.722491740334211e-05, "loss": 0.1423, "step": 22629 }, { "epoch": 0.4036314343809082, "grad_norm": 0.19890952110290527, "learning_rate": 3.722355966043804e-05, "loss": 0.1377, "step": 22630 }, { "epoch": 0.4036492705026219, "grad_norm": 0.23106753826141357, "learning_rate": 3.7222201870151235e-05, "loss": 0.1582, "step": 22631 }, { "epoch": 0.4036671066243356, "grad_norm": 0.2641909122467041, "learning_rate": 3.722084403248695e-05, "loss": 0.1225, "step": 22632 }, { "epoch": 0.4036849427460493, "grad_norm": 0.2284470647573471, "learning_rate": 3.721948614745045e-05, "loss": 0.1446, "step": 22633 }, { "epoch": 0.403702778867763, "grad_norm": 0.2533705234527588, "learning_rate": 3.7218128215047e-05, "loss": 0.1513, "step": 22634 }, { "epoch": 0.4037206149894767, "grad_norm": 0.23808880150318146, "learning_rate": 3.721677023528187e-05, "loss": 0.1489, "step": 22635 }, { "epoch": 0.4037384511111904, "grad_norm": 0.30811694264411926, "learning_rate": 3.7215412208160314e-05, "loss": 0.1707, "step": 22636 }, { "epoch": 0.4037562872329041, "grad_norm": 0.26869192719459534, "learning_rate": 3.7214054133687604e-05, "loss": 0.178, "step": 22637 }, { "epoch": 0.40377412335461776, "grad_norm": 0.24078595638275146, "learning_rate": 3.721269601186901e-05, "loss": 0.1204, "step": 22638 }, { "epoch": 0.40379195947633145, "grad_norm": 0.193939208984375, "learning_rate": 3.7211337842709774e-05, "loss": 0.1013, "step": 22639 }, { "epoch": 0.40380979559804514, "grad_norm": 0.2960437834262848, "learning_rate": 3.7209979626215185e-05, "loss": 0.1445, "step": 22640 }, { "epoch": 0.40382763171975883, "grad_norm": 0.3177621364593506, "learning_rate": 3.7208621362390496e-05, "loss": 0.1796, "step": 22641 }, { "epoch": 0.40384546784147257, "grad_norm": 0.3184565007686615, "learning_rate": 3.7207263051240964e-05, "loss": 0.1858, "step": 22642 }, { "epoch": 0.40386330396318626, "grad_norm": 0.23536665737628937, "learning_rate": 3.720590469277187e-05, "loss": 0.1544, "step": 22643 }, { "epoch": 0.40388114008489995, "grad_norm": 0.23937270045280457, "learning_rate": 3.7204546286988476e-05, "loss": 0.0994, "step": 22644 }, { "epoch": 0.40389897620661364, "grad_norm": 0.3052597641944885, "learning_rate": 3.720318783389605e-05, "loss": 0.1803, "step": 22645 }, { "epoch": 0.4039168123283273, "grad_norm": 0.21923767030239105, "learning_rate": 3.720182933349984e-05, "loss": 0.1392, "step": 22646 }, { "epoch": 0.403934648450041, "grad_norm": 0.23469729721546173, "learning_rate": 3.720047078580514e-05, "loss": 0.1406, "step": 22647 }, { "epoch": 0.4039524845717547, "grad_norm": 0.3268749713897705, "learning_rate": 3.719911219081719e-05, "loss": 0.1485, "step": 22648 }, { "epoch": 0.4039703206934684, "grad_norm": 0.22655069828033447, "learning_rate": 3.719775354854127e-05, "loss": 0.154, "step": 22649 }, { "epoch": 0.40398815681518213, "grad_norm": 0.3410642445087433, "learning_rate": 3.719639485898265e-05, "loss": 0.1521, "step": 22650 }, { "epoch": 0.4040059929368958, "grad_norm": 0.3202931582927704, "learning_rate": 3.719503612214659e-05, "loss": 0.1568, "step": 22651 }, { "epoch": 0.4040238290586095, "grad_norm": 0.5891566872596741, "learning_rate": 3.7193677338038354e-05, "loss": 0.2045, "step": 22652 }, { "epoch": 0.4040416651803232, "grad_norm": 0.2292635589838028, "learning_rate": 3.7192318506663215e-05, "loss": 0.1256, "step": 22653 }, { "epoch": 0.4040595013020369, "grad_norm": 0.27007508277893066, "learning_rate": 3.719095962802643e-05, "loss": 0.1797, "step": 22654 }, { "epoch": 0.40407733742375057, "grad_norm": 0.23769807815551758, "learning_rate": 3.7189600702133285e-05, "loss": 0.1794, "step": 22655 }, { "epoch": 0.40409517354546426, "grad_norm": 0.2937954366207123, "learning_rate": 3.7188241728989036e-05, "loss": 0.1988, "step": 22656 }, { "epoch": 0.40411300966717795, "grad_norm": 0.3008861839771271, "learning_rate": 3.718688270859895e-05, "loss": 0.1036, "step": 22657 }, { "epoch": 0.4041308457888917, "grad_norm": 0.22435985505580902, "learning_rate": 3.71855236409683e-05, "loss": 0.156, "step": 22658 }, { "epoch": 0.4041486819106054, "grad_norm": 0.2193506509065628, "learning_rate": 3.718416452610235e-05, "loss": 0.1555, "step": 22659 }, { "epoch": 0.40416651803231907, "grad_norm": 0.2615002989768982, "learning_rate": 3.718280536400637e-05, "loss": 0.1678, "step": 22660 }, { "epoch": 0.40418435415403275, "grad_norm": 0.30031007528305054, "learning_rate": 3.7181446154685626e-05, "loss": 0.1244, "step": 22661 }, { "epoch": 0.40420219027574644, "grad_norm": 0.274593323469162, "learning_rate": 3.71800868981454e-05, "loss": 0.1272, "step": 22662 }, { "epoch": 0.40422002639746013, "grad_norm": 0.2646281123161316, "learning_rate": 3.717872759439094e-05, "loss": 0.1586, "step": 22663 }, { "epoch": 0.4042378625191738, "grad_norm": 0.289614737033844, "learning_rate": 3.7177368243427525e-05, "loss": 0.2013, "step": 22664 }, { "epoch": 0.4042556986408875, "grad_norm": 0.2174532264471054, "learning_rate": 3.7176008845260425e-05, "loss": 0.1637, "step": 22665 }, { "epoch": 0.4042735347626012, "grad_norm": 0.28268003463745117, "learning_rate": 3.7174649399894916e-05, "loss": 0.1544, "step": 22666 }, { "epoch": 0.40429137088431494, "grad_norm": 0.25396421551704407, "learning_rate": 3.7173289907336254e-05, "loss": 0.138, "step": 22667 }, { "epoch": 0.4043092070060286, "grad_norm": 0.37442055344581604, "learning_rate": 3.7171930367589725e-05, "loss": 0.2028, "step": 22668 }, { "epoch": 0.4043270431277423, "grad_norm": 0.3185790181159973, "learning_rate": 3.717057078066058e-05, "loss": 0.1605, "step": 22669 }, { "epoch": 0.404344879249456, "grad_norm": 0.21763397753238678, "learning_rate": 3.71692111465541e-05, "loss": 0.1617, "step": 22670 }, { "epoch": 0.4043627153711697, "grad_norm": 0.19118084013462067, "learning_rate": 3.716785146527556e-05, "loss": 0.1259, "step": 22671 }, { "epoch": 0.4043805514928834, "grad_norm": 0.27597668766975403, "learning_rate": 3.716649173683022e-05, "loss": 0.2024, "step": 22672 }, { "epoch": 0.40439838761459707, "grad_norm": 0.4114522635936737, "learning_rate": 3.716513196122336e-05, "loss": 0.203, "step": 22673 }, { "epoch": 0.40441622373631075, "grad_norm": 0.19684277474880219, "learning_rate": 3.716377213846024e-05, "loss": 0.168, "step": 22674 }, { "epoch": 0.4044340598580245, "grad_norm": 0.19954712688922882, "learning_rate": 3.7162412268546146e-05, "loss": 0.1436, "step": 22675 }, { "epoch": 0.4044518959797382, "grad_norm": 0.2597973048686981, "learning_rate": 3.7161052351486345e-05, "loss": 0.1382, "step": 22676 }, { "epoch": 0.4044697321014519, "grad_norm": 0.2797156572341919, "learning_rate": 3.715969238728609e-05, "loss": 0.1903, "step": 22677 }, { "epoch": 0.40448756822316556, "grad_norm": 0.26486700773239136, "learning_rate": 3.7158332375950675e-05, "loss": 0.1179, "step": 22678 }, { "epoch": 0.40450540434487925, "grad_norm": 0.26257798075675964, "learning_rate": 3.715697231748537e-05, "loss": 0.1942, "step": 22679 }, { "epoch": 0.40452324046659294, "grad_norm": 0.36095571517944336, "learning_rate": 3.7155612211895436e-05, "loss": 0.173, "step": 22680 }, { "epoch": 0.4045410765883066, "grad_norm": 0.2301870882511139, "learning_rate": 3.715425205918615e-05, "loss": 0.1472, "step": 22681 }, { "epoch": 0.4045589127100203, "grad_norm": 0.2968779504299164, "learning_rate": 3.715289185936278e-05, "loss": 0.1955, "step": 22682 }, { "epoch": 0.404576748831734, "grad_norm": 0.24576152861118317, "learning_rate": 3.7151531612430614e-05, "loss": 0.1133, "step": 22683 }, { "epoch": 0.40459458495344774, "grad_norm": 0.23309847712516785, "learning_rate": 3.715017131839491e-05, "loss": 0.1597, "step": 22684 }, { "epoch": 0.40461242107516143, "grad_norm": 0.25851136445999146, "learning_rate": 3.714881097726094e-05, "loss": 0.1135, "step": 22685 }, { "epoch": 0.4046302571968751, "grad_norm": 0.2883867025375366, "learning_rate": 3.7147450589033996e-05, "loss": 0.1262, "step": 22686 }, { "epoch": 0.4046480933185888, "grad_norm": 0.18903659284114838, "learning_rate": 3.714609015371933e-05, "loss": 0.1369, "step": 22687 }, { "epoch": 0.4046659294403025, "grad_norm": 0.3064503073692322, "learning_rate": 3.714472967132222e-05, "loss": 0.2039, "step": 22688 }, { "epoch": 0.4046837655620162, "grad_norm": 0.3582994341850281, "learning_rate": 3.714336914184795e-05, "loss": 0.1629, "step": 22689 }, { "epoch": 0.4047016016837299, "grad_norm": 0.2574089765548706, "learning_rate": 3.714200856530178e-05, "loss": 0.189, "step": 22690 }, { "epoch": 0.40471943780544356, "grad_norm": 0.24698716402053833, "learning_rate": 3.714064794168899e-05, "loss": 0.2003, "step": 22691 }, { "epoch": 0.4047372739271573, "grad_norm": 0.2423950731754303, "learning_rate": 3.713928727101487e-05, "loss": 0.1076, "step": 22692 }, { "epoch": 0.404755110048871, "grad_norm": 0.25401636958122253, "learning_rate": 3.7137926553284666e-05, "loss": 0.1938, "step": 22693 }, { "epoch": 0.4047729461705847, "grad_norm": 0.2981540262699127, "learning_rate": 3.713656578850367e-05, "loss": 0.1992, "step": 22694 }, { "epoch": 0.40479078229229837, "grad_norm": 0.4166143238544464, "learning_rate": 3.7135204976677155e-05, "loss": 0.1668, "step": 22695 }, { "epoch": 0.40480861841401206, "grad_norm": 0.3178115785121918, "learning_rate": 3.71338441178104e-05, "loss": 0.2333, "step": 22696 }, { "epoch": 0.40482645453572574, "grad_norm": 0.33400195837020874, "learning_rate": 3.713248321190866e-05, "loss": 0.1333, "step": 22697 }, { "epoch": 0.40484429065743943, "grad_norm": 0.2740047574043274, "learning_rate": 3.713112225897723e-05, "loss": 0.1735, "step": 22698 }, { "epoch": 0.4048621267791531, "grad_norm": 0.45984068512916565, "learning_rate": 3.712976125902138e-05, "loss": 0.1703, "step": 22699 }, { "epoch": 0.40487996290086686, "grad_norm": 0.2941805124282837, "learning_rate": 3.7128400212046386e-05, "loss": 0.1409, "step": 22700 }, { "epoch": 0.40489779902258055, "grad_norm": 0.23208025097846985, "learning_rate": 3.7127039118057527e-05, "loss": 0.124, "step": 22701 }, { "epoch": 0.40491563514429424, "grad_norm": 0.3174727261066437, "learning_rate": 3.7125677977060073e-05, "loss": 0.1275, "step": 22702 }, { "epoch": 0.4049334712660079, "grad_norm": 0.22294315695762634, "learning_rate": 3.712431678905931e-05, "loss": 0.1823, "step": 22703 }, { "epoch": 0.4049513073877216, "grad_norm": 0.2801859974861145, "learning_rate": 3.71229555540605e-05, "loss": 0.1846, "step": 22704 }, { "epoch": 0.4049691435094353, "grad_norm": 0.28641200065612793, "learning_rate": 3.7121594272068925e-05, "loss": 0.2046, "step": 22705 }, { "epoch": 0.404986979631149, "grad_norm": 0.2667654752731323, "learning_rate": 3.712023294308987e-05, "loss": 0.1561, "step": 22706 }, { "epoch": 0.4050048157528627, "grad_norm": 0.2095882147550583, "learning_rate": 3.711887156712861e-05, "loss": 0.1581, "step": 22707 }, { "epoch": 0.40502265187457637, "grad_norm": 0.2592107951641083, "learning_rate": 3.7117510144190404e-05, "loss": 0.2215, "step": 22708 }, { "epoch": 0.4050404879962901, "grad_norm": 0.23236940801143646, "learning_rate": 3.711614867428056e-05, "loss": 0.1473, "step": 22709 }, { "epoch": 0.4050583241180038, "grad_norm": 0.33154410123825073, "learning_rate": 3.7114787157404326e-05, "loss": 0.1784, "step": 22710 }, { "epoch": 0.4050761602397175, "grad_norm": 0.3170108199119568, "learning_rate": 3.7113425593566996e-05, "loss": 0.1955, "step": 22711 }, { "epoch": 0.4050939963614312, "grad_norm": 0.2649974822998047, "learning_rate": 3.711206398277384e-05, "loss": 0.2132, "step": 22712 }, { "epoch": 0.40511183248314486, "grad_norm": 0.2959611117839813, "learning_rate": 3.7110702325030146e-05, "loss": 0.1804, "step": 22713 }, { "epoch": 0.40512966860485855, "grad_norm": 0.23980401456356049, "learning_rate": 3.7109340620341184e-05, "loss": 0.1217, "step": 22714 }, { "epoch": 0.40514750472657224, "grad_norm": 0.28079959750175476, "learning_rate": 3.7107978868712245e-05, "loss": 0.1303, "step": 22715 }, { "epoch": 0.4051653408482859, "grad_norm": 0.2960323393344879, "learning_rate": 3.710661707014859e-05, "loss": 0.1852, "step": 22716 }, { "epoch": 0.40518317696999967, "grad_norm": 0.3154200315475464, "learning_rate": 3.710525522465551e-05, "loss": 0.1264, "step": 22717 }, { "epoch": 0.40520101309171336, "grad_norm": 0.339114785194397, "learning_rate": 3.710389333223827e-05, "loss": 0.1971, "step": 22718 }, { "epoch": 0.40521884921342705, "grad_norm": 0.19106505811214447, "learning_rate": 3.7102531392902166e-05, "loss": 0.1039, "step": 22719 }, { "epoch": 0.40523668533514073, "grad_norm": 0.22526821494102478, "learning_rate": 3.710116940665247e-05, "loss": 0.1519, "step": 22720 }, { "epoch": 0.4052545214568544, "grad_norm": 0.31064149737358093, "learning_rate": 3.709980737349447e-05, "loss": 0.1642, "step": 22721 }, { "epoch": 0.4052723575785681, "grad_norm": 0.267220139503479, "learning_rate": 3.709844529343342e-05, "loss": 0.1386, "step": 22722 }, { "epoch": 0.4052901937002818, "grad_norm": 0.308649480342865, "learning_rate": 3.709708316647463e-05, "loss": 0.1125, "step": 22723 }, { "epoch": 0.4053080298219955, "grad_norm": 0.31479600071907043, "learning_rate": 3.709572099262337e-05, "loss": 0.2352, "step": 22724 }, { "epoch": 0.4053258659437092, "grad_norm": 0.22942212224006653, "learning_rate": 3.709435877188491e-05, "loss": 0.1374, "step": 22725 }, { "epoch": 0.4053437020654229, "grad_norm": 0.2632072865962982, "learning_rate": 3.709299650426453e-05, "loss": 0.1767, "step": 22726 }, { "epoch": 0.4053615381871366, "grad_norm": 0.2541314959526062, "learning_rate": 3.7091634189767536e-05, "loss": 0.1598, "step": 22727 }, { "epoch": 0.4053793743088503, "grad_norm": 0.26471981406211853, "learning_rate": 3.709027182839918e-05, "loss": 0.1741, "step": 22728 }, { "epoch": 0.405397210430564, "grad_norm": 0.41050460934638977, "learning_rate": 3.7088909420164765e-05, "loss": 0.1866, "step": 22729 }, { "epoch": 0.40541504655227767, "grad_norm": 0.2278328686952591, "learning_rate": 3.7087546965069556e-05, "loss": 0.1522, "step": 22730 }, { "epoch": 0.40543288267399136, "grad_norm": 0.33635637164115906, "learning_rate": 3.7086184463118835e-05, "loss": 0.1241, "step": 22731 }, { "epoch": 0.40545071879570505, "grad_norm": 0.24367345869541168, "learning_rate": 3.708482191431789e-05, "loss": 0.1341, "step": 22732 }, { "epoch": 0.40546855491741873, "grad_norm": 0.25286567211151123, "learning_rate": 3.7083459318672e-05, "loss": 0.1526, "step": 22733 }, { "epoch": 0.4054863910391325, "grad_norm": 0.38565927743911743, "learning_rate": 3.7082096676186454e-05, "loss": 0.1896, "step": 22734 }, { "epoch": 0.40550422716084616, "grad_norm": 0.2287045568227768, "learning_rate": 3.7080733986866536e-05, "loss": 0.164, "step": 22735 }, { "epoch": 0.40552206328255985, "grad_norm": 0.28129565715789795, "learning_rate": 3.707937125071751e-05, "loss": 0.1483, "step": 22736 }, { "epoch": 0.40553989940427354, "grad_norm": 0.32759547233581543, "learning_rate": 3.7078008467744667e-05, "loss": 0.1556, "step": 22737 }, { "epoch": 0.40555773552598723, "grad_norm": 0.2574281692504883, "learning_rate": 3.707664563795329e-05, "loss": 0.1828, "step": 22738 }, { "epoch": 0.4055755716477009, "grad_norm": 0.31409069895744324, "learning_rate": 3.707528276134867e-05, "loss": 0.1447, "step": 22739 }, { "epoch": 0.4055934077694146, "grad_norm": 0.22808100283145905, "learning_rate": 3.707391983793608e-05, "loss": 0.1805, "step": 22740 }, { "epoch": 0.4056112438911283, "grad_norm": 0.270231157541275, "learning_rate": 3.7072556867720807e-05, "loss": 0.0756, "step": 22741 }, { "epoch": 0.405629080012842, "grad_norm": 0.2477722018957138, "learning_rate": 3.707119385070814e-05, "loss": 0.1491, "step": 22742 }, { "epoch": 0.4056469161345557, "grad_norm": 0.32718339562416077, "learning_rate": 3.706983078690335e-05, "loss": 0.1341, "step": 22743 }, { "epoch": 0.4056647522562694, "grad_norm": 0.284368634223938, "learning_rate": 3.706846767631173e-05, "loss": 0.1365, "step": 22744 }, { "epoch": 0.4056825883779831, "grad_norm": 0.3289968967437744, "learning_rate": 3.706710451893856e-05, "loss": 0.1584, "step": 22745 }, { "epoch": 0.4057004244996968, "grad_norm": 0.21433857083320618, "learning_rate": 3.706574131478912e-05, "loss": 0.1298, "step": 22746 }, { "epoch": 0.4057182606214105, "grad_norm": 0.2692579925060272, "learning_rate": 3.706437806386871e-05, "loss": 0.1765, "step": 22747 }, { "epoch": 0.40573609674312416, "grad_norm": 0.30741509795188904, "learning_rate": 3.7063014766182594e-05, "loss": 0.1785, "step": 22748 }, { "epoch": 0.40575393286483785, "grad_norm": 0.2657647728919983, "learning_rate": 3.706165142173607e-05, "loss": 0.1793, "step": 22749 }, { "epoch": 0.40577176898655154, "grad_norm": 0.24137602746486664, "learning_rate": 3.7060288030534415e-05, "loss": 0.1785, "step": 22750 }, { "epoch": 0.4057896051082653, "grad_norm": 0.3676193654537201, "learning_rate": 3.705892459258292e-05, "loss": 0.1525, "step": 22751 }, { "epoch": 0.40580744122997897, "grad_norm": 0.27030256390571594, "learning_rate": 3.7057561107886874e-05, "loss": 0.0998, "step": 22752 }, { "epoch": 0.40582527735169266, "grad_norm": 0.23731249570846558, "learning_rate": 3.705619757645155e-05, "loss": 0.152, "step": 22753 }, { "epoch": 0.40584311347340635, "grad_norm": 0.24279236793518066, "learning_rate": 3.705483399828225e-05, "loss": 0.166, "step": 22754 }, { "epoch": 0.40586094959512004, "grad_norm": 0.30513203144073486, "learning_rate": 3.7053470373384244e-05, "loss": 0.1613, "step": 22755 }, { "epoch": 0.4058787857168337, "grad_norm": 0.29539796710014343, "learning_rate": 3.705210670176282e-05, "loss": 0.2068, "step": 22756 }, { "epoch": 0.4058966218385474, "grad_norm": 0.41609761118888855, "learning_rate": 3.705074298342327e-05, "loss": 0.1339, "step": 22757 }, { "epoch": 0.4059144579602611, "grad_norm": 0.2749961018562317, "learning_rate": 3.7049379218370875e-05, "loss": 0.1435, "step": 22758 }, { "epoch": 0.40593229408197484, "grad_norm": 0.3201879858970642, "learning_rate": 3.704801540661093e-05, "loss": 0.1655, "step": 22759 }, { "epoch": 0.40595013020368853, "grad_norm": 0.3555706739425659, "learning_rate": 3.704665154814872e-05, "loss": 0.152, "step": 22760 }, { "epoch": 0.4059679663254022, "grad_norm": 0.2815662622451782, "learning_rate": 3.7045287642989514e-05, "loss": 0.1365, "step": 22761 }, { "epoch": 0.4059858024471159, "grad_norm": 0.21779082715511322, "learning_rate": 3.7043923691138616e-05, "loss": 0.1426, "step": 22762 }, { "epoch": 0.4060036385688296, "grad_norm": 0.3736729919910431, "learning_rate": 3.704255969260132e-05, "loss": 0.1295, "step": 22763 }, { "epoch": 0.4060214746905433, "grad_norm": 0.2549952268600464, "learning_rate": 3.704119564738289e-05, "loss": 0.1632, "step": 22764 }, { "epoch": 0.40603931081225697, "grad_norm": 0.24689269065856934, "learning_rate": 3.703983155548864e-05, "loss": 0.1586, "step": 22765 }, { "epoch": 0.40605714693397066, "grad_norm": 0.26894739270210266, "learning_rate": 3.703846741692384e-05, "loss": 0.1448, "step": 22766 }, { "epoch": 0.40607498305568435, "grad_norm": 0.3928578495979309, "learning_rate": 3.7037103231693774e-05, "loss": 0.1562, "step": 22767 }, { "epoch": 0.4060928191773981, "grad_norm": 0.2933739721775055, "learning_rate": 3.703573899980375e-05, "loss": 0.1183, "step": 22768 }, { "epoch": 0.4061106552991118, "grad_norm": 0.3074215352535248, "learning_rate": 3.703437472125903e-05, "loss": 0.2131, "step": 22769 }, { "epoch": 0.40612849142082547, "grad_norm": 0.3377894163131714, "learning_rate": 3.703301039606494e-05, "loss": 0.1517, "step": 22770 }, { "epoch": 0.40614632754253915, "grad_norm": 0.2866095006465912, "learning_rate": 3.703164602422673e-05, "loss": 0.1279, "step": 22771 }, { "epoch": 0.40616416366425284, "grad_norm": 0.21049438416957855, "learning_rate": 3.703028160574971e-05, "loss": 0.1233, "step": 22772 }, { "epoch": 0.40618199978596653, "grad_norm": 0.291842520236969, "learning_rate": 3.7028917140639155e-05, "loss": 0.1708, "step": 22773 }, { "epoch": 0.4061998359076802, "grad_norm": 0.25131165981292725, "learning_rate": 3.702755262890037e-05, "loss": 0.1282, "step": 22774 }, { "epoch": 0.4062176720293939, "grad_norm": 0.3336566984653473, "learning_rate": 3.702618807053863e-05, "loss": 0.2221, "step": 22775 }, { "epoch": 0.40623550815110765, "grad_norm": 0.28126558661460876, "learning_rate": 3.702482346555924e-05, "loss": 0.1793, "step": 22776 }, { "epoch": 0.40625334427282134, "grad_norm": 0.2520708441734314, "learning_rate": 3.702345881396748e-05, "loss": 0.1435, "step": 22777 }, { "epoch": 0.406271180394535, "grad_norm": 0.25496798753738403, "learning_rate": 3.702209411576864e-05, "loss": 0.1467, "step": 22778 }, { "epoch": 0.4062890165162487, "grad_norm": 0.2044905573129654, "learning_rate": 3.7020729370968e-05, "loss": 0.1474, "step": 22779 }, { "epoch": 0.4063068526379624, "grad_norm": 0.2566578984260559, "learning_rate": 3.701936457957088e-05, "loss": 0.1363, "step": 22780 }, { "epoch": 0.4063246887596761, "grad_norm": 0.32850053906440735, "learning_rate": 3.701799974158254e-05, "loss": 0.1741, "step": 22781 }, { "epoch": 0.4063425248813898, "grad_norm": 0.2941726744174957, "learning_rate": 3.701663485700828e-05, "loss": 0.1643, "step": 22782 }, { "epoch": 0.40636036100310347, "grad_norm": 0.24518142640590668, "learning_rate": 3.7015269925853395e-05, "loss": 0.158, "step": 22783 }, { "epoch": 0.40637819712481715, "grad_norm": 0.29638490080833435, "learning_rate": 3.701390494812317e-05, "loss": 0.1633, "step": 22784 }, { "epoch": 0.4063960332465309, "grad_norm": 0.3184903860092163, "learning_rate": 3.701253992382291e-05, "loss": 0.1989, "step": 22785 }, { "epoch": 0.4064138693682446, "grad_norm": 0.3627992868423462, "learning_rate": 3.701117485295789e-05, "loss": 0.199, "step": 22786 }, { "epoch": 0.4064317054899583, "grad_norm": 0.22520382702350616, "learning_rate": 3.700980973553342e-05, "loss": 0.1781, "step": 22787 }, { "epoch": 0.40644954161167196, "grad_norm": 0.229277566075325, "learning_rate": 3.700844457155476e-05, "loss": 0.1458, "step": 22788 }, { "epoch": 0.40646737773338565, "grad_norm": 0.3314931094646454, "learning_rate": 3.700707936102723e-05, "loss": 0.2539, "step": 22789 }, { "epoch": 0.40648521385509934, "grad_norm": 0.26296812295913696, "learning_rate": 3.700571410395611e-05, "loss": 0.1387, "step": 22790 }, { "epoch": 0.406503049976813, "grad_norm": 0.22750885784626007, "learning_rate": 3.7004348800346706e-05, "loss": 0.1406, "step": 22791 }, { "epoch": 0.4065208860985267, "grad_norm": 0.23684147000312805, "learning_rate": 3.7002983450204284e-05, "loss": 0.1776, "step": 22792 }, { "epoch": 0.40653872222024046, "grad_norm": 0.3230094015598297, "learning_rate": 3.7001618053534174e-05, "loss": 0.2182, "step": 22793 }, { "epoch": 0.40655655834195414, "grad_norm": 0.22826208174228668, "learning_rate": 3.7000252610341626e-05, "loss": 0.157, "step": 22794 }, { "epoch": 0.40657439446366783, "grad_norm": 0.33501726388931274, "learning_rate": 3.6998887120631966e-05, "loss": 0.2059, "step": 22795 }, { "epoch": 0.4065922305853815, "grad_norm": 0.3803720474243164, "learning_rate": 3.6997521584410474e-05, "loss": 0.1252, "step": 22796 }, { "epoch": 0.4066100667070952, "grad_norm": 0.2851491868495941, "learning_rate": 3.6996156001682434e-05, "loss": 0.1252, "step": 22797 }, { "epoch": 0.4066279028288089, "grad_norm": 0.2280944436788559, "learning_rate": 3.6994790372453167e-05, "loss": 0.079, "step": 22798 }, { "epoch": 0.4066457389505226, "grad_norm": 0.29659906029701233, "learning_rate": 3.6993424696727936e-05, "loss": 0.1364, "step": 22799 }, { "epoch": 0.40666357507223627, "grad_norm": 0.332767128944397, "learning_rate": 3.6992058974512056e-05, "loss": 0.1704, "step": 22800 }, { "epoch": 0.40668141119394996, "grad_norm": 0.20667962729930878, "learning_rate": 3.699069320581081e-05, "loss": 0.1619, "step": 22801 }, { "epoch": 0.4066992473156637, "grad_norm": 0.22946684062480927, "learning_rate": 3.6989327390629493e-05, "loss": 0.1786, "step": 22802 }, { "epoch": 0.4067170834373774, "grad_norm": 0.5766844153404236, "learning_rate": 3.69879615289734e-05, "loss": 0.1444, "step": 22803 }, { "epoch": 0.4067349195590911, "grad_norm": 0.34237435460090637, "learning_rate": 3.6986595620847844e-05, "loss": 0.2386, "step": 22804 }, { "epoch": 0.40675275568080477, "grad_norm": 0.31693196296691895, "learning_rate": 3.6985229666258084e-05, "loss": 0.1838, "step": 22805 }, { "epoch": 0.40677059180251846, "grad_norm": 0.2959299683570862, "learning_rate": 3.698386366520945e-05, "loss": 0.1283, "step": 22806 }, { "epoch": 0.40678842792423214, "grad_norm": 0.25631704926490784, "learning_rate": 3.698249761770721e-05, "loss": 0.1265, "step": 22807 }, { "epoch": 0.40680626404594583, "grad_norm": 0.2766932547092438, "learning_rate": 3.698113152375668e-05, "loss": 0.1467, "step": 22808 }, { "epoch": 0.4068241001676595, "grad_norm": 0.20347727835178375, "learning_rate": 3.697976538336313e-05, "loss": 0.1529, "step": 22809 }, { "epoch": 0.40684193628937326, "grad_norm": 0.25040000677108765, "learning_rate": 3.697839919653189e-05, "loss": 0.1548, "step": 22810 }, { "epoch": 0.40685977241108695, "grad_norm": 0.29970845580101013, "learning_rate": 3.697703296326823e-05, "loss": 0.1867, "step": 22811 }, { "epoch": 0.40687760853280064, "grad_norm": 0.27633410692214966, "learning_rate": 3.6975666683577455e-05, "loss": 0.1319, "step": 22812 }, { "epoch": 0.4068954446545143, "grad_norm": 0.2531062066555023, "learning_rate": 3.697430035746486e-05, "loss": 0.159, "step": 22813 }, { "epoch": 0.406913280776228, "grad_norm": 0.25076374411582947, "learning_rate": 3.697293398493573e-05, "loss": 0.112, "step": 22814 }, { "epoch": 0.4069311168979417, "grad_norm": 0.3027176260948181, "learning_rate": 3.697156756599539e-05, "loss": 0.1585, "step": 22815 }, { "epoch": 0.4069489530196554, "grad_norm": 0.24157872796058655, "learning_rate": 3.6970201100649113e-05, "loss": 0.1431, "step": 22816 }, { "epoch": 0.4069667891413691, "grad_norm": 0.2958551347255707, "learning_rate": 3.696883458890219e-05, "loss": 0.1662, "step": 22817 }, { "epoch": 0.4069846252630828, "grad_norm": 0.22539092600345612, "learning_rate": 3.696746803075994e-05, "loss": 0.1386, "step": 22818 }, { "epoch": 0.4070024613847965, "grad_norm": 0.30554768443107605, "learning_rate": 3.696610142622766e-05, "loss": 0.1777, "step": 22819 }, { "epoch": 0.4070202975065102, "grad_norm": 0.3243509531021118, "learning_rate": 3.696473477531063e-05, "loss": 0.1447, "step": 22820 }, { "epoch": 0.4070381336282239, "grad_norm": 0.3667590618133545, "learning_rate": 3.696336807801415e-05, "loss": 0.1316, "step": 22821 }, { "epoch": 0.4070559697499376, "grad_norm": 0.2549302577972412, "learning_rate": 3.696200133434353e-05, "loss": 0.1701, "step": 22822 }, { "epoch": 0.40707380587165126, "grad_norm": 0.30883529782295227, "learning_rate": 3.696063454430405e-05, "loss": 0.2499, "step": 22823 }, { "epoch": 0.40709164199336495, "grad_norm": 0.21412034332752228, "learning_rate": 3.6959267707901037e-05, "loss": 0.1952, "step": 22824 }, { "epoch": 0.40710947811507864, "grad_norm": 0.34255868196487427, "learning_rate": 3.6957900825139755e-05, "loss": 0.1154, "step": 22825 }, { "epoch": 0.4071273142367923, "grad_norm": 0.2662195563316345, "learning_rate": 3.695653389602554e-05, "loss": 0.1909, "step": 22826 }, { "epoch": 0.40714515035850607, "grad_norm": 0.24291059374809265, "learning_rate": 3.695516692056365e-05, "loss": 0.1453, "step": 22827 }, { "epoch": 0.40716298648021976, "grad_norm": 0.34842705726623535, "learning_rate": 3.695379989875941e-05, "loss": 0.114, "step": 22828 }, { "epoch": 0.40718082260193345, "grad_norm": 0.2803455591201782, "learning_rate": 3.6952432830618116e-05, "loss": 0.175, "step": 22829 }, { "epoch": 0.40719865872364713, "grad_norm": 0.21294955909252167, "learning_rate": 3.695106571614506e-05, "loss": 0.1777, "step": 22830 }, { "epoch": 0.4072164948453608, "grad_norm": 0.2907697558403015, "learning_rate": 3.694969855534555e-05, "loss": 0.1726, "step": 22831 }, { "epoch": 0.4072343309670745, "grad_norm": 0.19814598560333252, "learning_rate": 3.694833134822487e-05, "loss": 0.1285, "step": 22832 }, { "epoch": 0.4072521670887882, "grad_norm": 0.3283410966396332, "learning_rate": 3.694696409478835e-05, "loss": 0.1104, "step": 22833 }, { "epoch": 0.4072700032105019, "grad_norm": 0.2641567289829254, "learning_rate": 3.694559679504126e-05, "loss": 0.1668, "step": 22834 }, { "epoch": 0.40728783933221563, "grad_norm": 0.2554190754890442, "learning_rate": 3.6944229448988906e-05, "loss": 0.1376, "step": 22835 }, { "epoch": 0.4073056754539293, "grad_norm": 0.2924638092517853, "learning_rate": 3.69428620566366e-05, "loss": 0.1946, "step": 22836 }, { "epoch": 0.407323511575643, "grad_norm": 0.3419770300388336, "learning_rate": 3.6941494617989637e-05, "loss": 0.1287, "step": 22837 }, { "epoch": 0.4073413476973567, "grad_norm": 0.25817233324050903, "learning_rate": 3.694012713305331e-05, "loss": 0.1644, "step": 22838 }, { "epoch": 0.4073591838190704, "grad_norm": 0.247476264834404, "learning_rate": 3.6938759601832936e-05, "loss": 0.1568, "step": 22839 }, { "epoch": 0.40737701994078407, "grad_norm": 0.23786133527755737, "learning_rate": 3.6937392024333794e-05, "loss": 0.1393, "step": 22840 }, { "epoch": 0.40739485606249776, "grad_norm": 0.2678735554218292, "learning_rate": 3.693602440056121e-05, "loss": 0.1358, "step": 22841 }, { "epoch": 0.40741269218421144, "grad_norm": 0.2773587703704834, "learning_rate": 3.693465673052046e-05, "loss": 0.154, "step": 22842 }, { "epoch": 0.40743052830592513, "grad_norm": 0.24629567563533783, "learning_rate": 3.693328901421687e-05, "loss": 0.1371, "step": 22843 }, { "epoch": 0.4074483644276389, "grad_norm": 0.3306807577610016, "learning_rate": 3.693192125165572e-05, "loss": 0.2294, "step": 22844 }, { "epoch": 0.40746620054935256, "grad_norm": 0.23849107325077057, "learning_rate": 3.693055344284233e-05, "loss": 0.1161, "step": 22845 }, { "epoch": 0.40748403667106625, "grad_norm": 0.24857784807682037, "learning_rate": 3.6929185587782e-05, "loss": 0.1906, "step": 22846 }, { "epoch": 0.40750187279277994, "grad_norm": 0.26840338110923767, "learning_rate": 3.692781768648001e-05, "loss": 0.1723, "step": 22847 }, { "epoch": 0.40751970891449363, "grad_norm": 0.2885158360004425, "learning_rate": 3.692644973894169e-05, "loss": 0.1466, "step": 22848 }, { "epoch": 0.4075375450362073, "grad_norm": 0.21937188506126404, "learning_rate": 3.692508174517233e-05, "loss": 0.1374, "step": 22849 }, { "epoch": 0.407555381157921, "grad_norm": 0.25022321939468384, "learning_rate": 3.6923713705177225e-05, "loss": 0.1894, "step": 22850 }, { "epoch": 0.4075732172796347, "grad_norm": 0.3140817880630493, "learning_rate": 3.6922345618961696e-05, "loss": 0.1306, "step": 22851 }, { "epoch": 0.40759105340134844, "grad_norm": 0.26757583022117615, "learning_rate": 3.692097748653104e-05, "loss": 0.1612, "step": 22852 }, { "epoch": 0.4076088895230621, "grad_norm": 0.2838507890701294, "learning_rate": 3.691960930789055e-05, "loss": 0.1888, "step": 22853 }, { "epoch": 0.4076267256447758, "grad_norm": 0.3510291278362274, "learning_rate": 3.691824108304554e-05, "loss": 0.2161, "step": 22854 }, { "epoch": 0.4076445617664895, "grad_norm": 0.2649875581264496, "learning_rate": 3.691687281200132e-05, "loss": 0.1486, "step": 22855 }, { "epoch": 0.4076623978882032, "grad_norm": 0.28027260303497314, "learning_rate": 3.691550449476318e-05, "loss": 0.1737, "step": 22856 }, { "epoch": 0.4076802340099169, "grad_norm": 0.2549683749675751, "learning_rate": 3.691413613133643e-05, "loss": 0.142, "step": 22857 }, { "epoch": 0.40769807013163056, "grad_norm": 0.24638888239860535, "learning_rate": 3.691276772172636e-05, "loss": 0.1233, "step": 22858 }, { "epoch": 0.40771590625334425, "grad_norm": 0.27769047021865845, "learning_rate": 3.6911399265938304e-05, "loss": 0.1757, "step": 22859 }, { "epoch": 0.407733742375058, "grad_norm": 0.3439522087574005, "learning_rate": 3.6910030763977544e-05, "loss": 0.1086, "step": 22860 }, { "epoch": 0.4077515784967717, "grad_norm": 0.27945300936698914, "learning_rate": 3.6908662215849396e-05, "loss": 0.1605, "step": 22861 }, { "epoch": 0.40776941461848537, "grad_norm": 0.32378441095352173, "learning_rate": 3.690729362155915e-05, "loss": 0.1786, "step": 22862 }, { "epoch": 0.40778725074019906, "grad_norm": 0.3368411362171173, "learning_rate": 3.690592498111213e-05, "loss": 0.176, "step": 22863 }, { "epoch": 0.40780508686191275, "grad_norm": 0.32447549700737, "learning_rate": 3.690455629451363e-05, "loss": 0.2008, "step": 22864 }, { "epoch": 0.40782292298362643, "grad_norm": 0.203013613820076, "learning_rate": 3.690318756176896e-05, "loss": 0.1404, "step": 22865 }, { "epoch": 0.4078407591053401, "grad_norm": 0.21278530359268188, "learning_rate": 3.690181878288342e-05, "loss": 0.1353, "step": 22866 }, { "epoch": 0.4078585952270538, "grad_norm": 0.2698383927345276, "learning_rate": 3.690044995786232e-05, "loss": 0.1536, "step": 22867 }, { "epoch": 0.4078764313487675, "grad_norm": 0.2509118914604187, "learning_rate": 3.689908108671096e-05, "loss": 0.1366, "step": 22868 }, { "epoch": 0.40789426747048124, "grad_norm": 0.2102491706609726, "learning_rate": 3.689771216943467e-05, "loss": 0.125, "step": 22869 }, { "epoch": 0.40791210359219493, "grad_norm": 0.20959524810314178, "learning_rate": 3.6896343206038723e-05, "loss": 0.1241, "step": 22870 }, { "epoch": 0.4079299397139086, "grad_norm": 0.3055448830127716, "learning_rate": 3.689497419652844e-05, "loss": 0.1598, "step": 22871 }, { "epoch": 0.4079477758356223, "grad_norm": 0.21058553457260132, "learning_rate": 3.689360514090914e-05, "loss": 0.1333, "step": 22872 }, { "epoch": 0.407965611957336, "grad_norm": 0.2746939957141876, "learning_rate": 3.689223603918611e-05, "loss": 0.1359, "step": 22873 }, { "epoch": 0.4079834480790497, "grad_norm": 0.30057674646377563, "learning_rate": 3.689086689136467e-05, "loss": 0.1467, "step": 22874 }, { "epoch": 0.40800128420076337, "grad_norm": 0.22446024417877197, "learning_rate": 3.688949769745012e-05, "loss": 0.1608, "step": 22875 }, { "epoch": 0.40801912032247706, "grad_norm": 0.2605692446231842, "learning_rate": 3.688812845744777e-05, "loss": 0.1865, "step": 22876 }, { "epoch": 0.4080369564441908, "grad_norm": 0.27201569080352783, "learning_rate": 3.688675917136293e-05, "loss": 0.1935, "step": 22877 }, { "epoch": 0.4080547925659045, "grad_norm": 0.3429532051086426, "learning_rate": 3.688538983920091e-05, "loss": 0.1557, "step": 22878 }, { "epoch": 0.4080726286876182, "grad_norm": 0.2994801998138428, "learning_rate": 3.688402046096701e-05, "loss": 0.1154, "step": 22879 }, { "epoch": 0.40809046480933187, "grad_norm": 0.19963397085666656, "learning_rate": 3.6882651036666536e-05, "loss": 0.1519, "step": 22880 }, { "epoch": 0.40810830093104555, "grad_norm": 0.20837277173995972, "learning_rate": 3.6881281566304805e-05, "loss": 0.1615, "step": 22881 }, { "epoch": 0.40812613705275924, "grad_norm": 0.2550000846385956, "learning_rate": 3.6879912049887136e-05, "loss": 0.1547, "step": 22882 }, { "epoch": 0.40814397317447293, "grad_norm": 0.25186896324157715, "learning_rate": 3.687854248741881e-05, "loss": 0.1807, "step": 22883 }, { "epoch": 0.4081618092961866, "grad_norm": 0.2719725966453552, "learning_rate": 3.6877172878905154e-05, "loss": 0.1886, "step": 22884 }, { "epoch": 0.4081796454179003, "grad_norm": 0.18179181218147278, "learning_rate": 3.6875803224351474e-05, "loss": 0.1265, "step": 22885 }, { "epoch": 0.40819748153961405, "grad_norm": 0.2512252628803253, "learning_rate": 3.687443352376308e-05, "loss": 0.1527, "step": 22886 }, { "epoch": 0.40821531766132774, "grad_norm": 0.2676926255226135, "learning_rate": 3.687306377714528e-05, "loss": 0.1621, "step": 22887 }, { "epoch": 0.4082331537830414, "grad_norm": 0.2575880289077759, "learning_rate": 3.687169398450339e-05, "loss": 0.1568, "step": 22888 }, { "epoch": 0.4082509899047551, "grad_norm": 0.24720615148544312, "learning_rate": 3.6870324145842706e-05, "loss": 0.1283, "step": 22889 }, { "epoch": 0.4082688260264688, "grad_norm": 0.2541947364807129, "learning_rate": 3.6868954261168545e-05, "loss": 0.1459, "step": 22890 }, { "epoch": 0.4082866621481825, "grad_norm": 0.3343372046947479, "learning_rate": 3.6867584330486224e-05, "loss": 0.1461, "step": 22891 }, { "epoch": 0.4083044982698962, "grad_norm": 0.20460598170757294, "learning_rate": 3.6866214353801035e-05, "loss": 0.1391, "step": 22892 }, { "epoch": 0.40832233439160986, "grad_norm": 0.2759687304496765, "learning_rate": 3.686484433111831e-05, "loss": 0.1548, "step": 22893 }, { "epoch": 0.4083401705133236, "grad_norm": 0.2569333612918854, "learning_rate": 3.6863474262443346e-05, "loss": 0.1292, "step": 22894 }, { "epoch": 0.4083580066350373, "grad_norm": 0.3087644577026367, "learning_rate": 3.686210414778147e-05, "loss": 0.1503, "step": 22895 }, { "epoch": 0.408375842756751, "grad_norm": 0.3194817006587982, "learning_rate": 3.686073398713797e-05, "loss": 0.108, "step": 22896 }, { "epoch": 0.40839367887846467, "grad_norm": 0.27774205803871155, "learning_rate": 3.6859363780518174e-05, "loss": 0.1881, "step": 22897 }, { "epoch": 0.40841151500017836, "grad_norm": 0.31939876079559326, "learning_rate": 3.685799352792738e-05, "loss": 0.1594, "step": 22898 }, { "epoch": 0.40842935112189205, "grad_norm": 0.22911010682582855, "learning_rate": 3.685662322937091e-05, "loss": 0.1313, "step": 22899 }, { "epoch": 0.40844718724360574, "grad_norm": 0.3676689565181732, "learning_rate": 3.685525288485409e-05, "loss": 0.2567, "step": 22900 }, { "epoch": 0.4084650233653194, "grad_norm": 0.2931276559829712, "learning_rate": 3.68538824943822e-05, "loss": 0.1375, "step": 22901 }, { "epoch": 0.4084828594870331, "grad_norm": 0.29382312297821045, "learning_rate": 3.685251205796057e-05, "loss": 0.1802, "step": 22902 }, { "epoch": 0.40850069560874686, "grad_norm": 0.2815816104412079, "learning_rate": 3.68511415755945e-05, "loss": 0.1887, "step": 22903 }, { "epoch": 0.40851853173046054, "grad_norm": 0.3132527470588684, "learning_rate": 3.684977104728933e-05, "loss": 0.136, "step": 22904 }, { "epoch": 0.40853636785217423, "grad_norm": 0.2781032919883728, "learning_rate": 3.6848400473050335e-05, "loss": 0.1566, "step": 22905 }, { "epoch": 0.4085542039738879, "grad_norm": 0.28257012367248535, "learning_rate": 3.6847029852882856e-05, "loss": 0.1563, "step": 22906 }, { "epoch": 0.4085720400956016, "grad_norm": 0.26520785689353943, "learning_rate": 3.68456591867922e-05, "loss": 0.1533, "step": 22907 }, { "epoch": 0.4085898762173153, "grad_norm": 0.25135770440101624, "learning_rate": 3.684428847478368e-05, "loss": 0.1309, "step": 22908 }, { "epoch": 0.408607712339029, "grad_norm": 0.2853555381298065, "learning_rate": 3.6842917716862603e-05, "loss": 0.1605, "step": 22909 }, { "epoch": 0.40862554846074267, "grad_norm": 0.2451864629983902, "learning_rate": 3.6841546913034285e-05, "loss": 0.1475, "step": 22910 }, { "epoch": 0.4086433845824564, "grad_norm": 0.21090559661388397, "learning_rate": 3.6840176063304045e-05, "loss": 0.1332, "step": 22911 }, { "epoch": 0.4086612207041701, "grad_norm": 0.24229754507541656, "learning_rate": 3.683880516767719e-05, "loss": 0.162, "step": 22912 }, { "epoch": 0.4086790568258838, "grad_norm": 0.2401350885629654, "learning_rate": 3.683743422615904e-05, "loss": 0.1528, "step": 22913 }, { "epoch": 0.4086968929475975, "grad_norm": 0.3371230959892273, "learning_rate": 3.68360632387549e-05, "loss": 0.0936, "step": 22914 }, { "epoch": 0.40871472906931117, "grad_norm": 0.26828733086586, "learning_rate": 3.68346922054701e-05, "loss": 0.1132, "step": 22915 }, { "epoch": 0.40873256519102485, "grad_norm": 0.22380150854587555, "learning_rate": 3.683332112630994e-05, "loss": 0.1305, "step": 22916 }, { "epoch": 0.40875040131273854, "grad_norm": 0.246516615152359, "learning_rate": 3.6831950001279744e-05, "loss": 0.1678, "step": 22917 }, { "epoch": 0.40876823743445223, "grad_norm": 0.30184081196784973, "learning_rate": 3.683057883038482e-05, "loss": 0.1148, "step": 22918 }, { "epoch": 0.408786073556166, "grad_norm": 0.313192754983902, "learning_rate": 3.6829207613630487e-05, "loss": 0.2113, "step": 22919 }, { "epoch": 0.40880390967787966, "grad_norm": 0.18925026059150696, "learning_rate": 3.682783635102206e-05, "loss": 0.1216, "step": 22920 }, { "epoch": 0.40882174579959335, "grad_norm": 0.2137240469455719, "learning_rate": 3.682646504256485e-05, "loss": 0.1111, "step": 22921 }, { "epoch": 0.40883958192130704, "grad_norm": 0.24453464150428772, "learning_rate": 3.682509368826418e-05, "loss": 0.1571, "step": 22922 }, { "epoch": 0.4088574180430207, "grad_norm": 0.24433757364749908, "learning_rate": 3.682372228812536e-05, "loss": 0.1246, "step": 22923 }, { "epoch": 0.4088752541647344, "grad_norm": 0.31137651205062866, "learning_rate": 3.682235084215371e-05, "loss": 0.19, "step": 22924 }, { "epoch": 0.4088930902864481, "grad_norm": 0.2959708571434021, "learning_rate": 3.682097935035456e-05, "loss": 0.1315, "step": 22925 }, { "epoch": 0.4089109264081618, "grad_norm": 0.2607278823852539, "learning_rate": 3.681960781273319e-05, "loss": 0.1382, "step": 22926 }, { "epoch": 0.4089287625298755, "grad_norm": 0.2750498950481415, "learning_rate": 3.681823622929495e-05, "loss": 0.1502, "step": 22927 }, { "epoch": 0.4089465986515892, "grad_norm": 0.23453126847743988, "learning_rate": 3.681686460004514e-05, "loss": 0.1275, "step": 22928 }, { "epoch": 0.4089644347733029, "grad_norm": 0.2705915868282318, "learning_rate": 3.6815492924989074e-05, "loss": 0.1703, "step": 22929 }, { "epoch": 0.4089822708950166, "grad_norm": 0.24737876653671265, "learning_rate": 3.681412120413209e-05, "loss": 0.1157, "step": 22930 }, { "epoch": 0.4090001070167303, "grad_norm": 0.22455322742462158, "learning_rate": 3.681274943747948e-05, "loss": 0.1527, "step": 22931 }, { "epoch": 0.409017943138444, "grad_norm": 0.27392327785491943, "learning_rate": 3.681137762503658e-05, "loss": 0.2148, "step": 22932 }, { "epoch": 0.40903577926015766, "grad_norm": 0.215500608086586, "learning_rate": 3.68100057668087e-05, "loss": 0.1086, "step": 22933 }, { "epoch": 0.40905361538187135, "grad_norm": 0.2355826497077942, "learning_rate": 3.680863386280116e-05, "loss": 0.1362, "step": 22934 }, { "epoch": 0.40907145150358504, "grad_norm": 0.2417983114719391, "learning_rate": 3.680726191301927e-05, "loss": 0.131, "step": 22935 }, { "epoch": 0.4090892876252988, "grad_norm": 0.4128175675868988, "learning_rate": 3.680588991746836e-05, "loss": 0.1872, "step": 22936 }, { "epoch": 0.40910712374701247, "grad_norm": 0.2485012263059616, "learning_rate": 3.6804517876153746e-05, "loss": 0.1843, "step": 22937 }, { "epoch": 0.40912495986872616, "grad_norm": 0.3083796799182892, "learning_rate": 3.680314578908074e-05, "loss": 0.2065, "step": 22938 }, { "epoch": 0.40914279599043984, "grad_norm": 0.2656121850013733, "learning_rate": 3.680177365625467e-05, "loss": 0.1171, "step": 22939 }, { "epoch": 0.40916063211215353, "grad_norm": 0.2742651104927063, "learning_rate": 3.680040147768083e-05, "loss": 0.1892, "step": 22940 }, { "epoch": 0.4091784682338672, "grad_norm": 0.2656703591346741, "learning_rate": 3.6799029253364574e-05, "loss": 0.1681, "step": 22941 }, { "epoch": 0.4091963043555809, "grad_norm": 0.2989206314086914, "learning_rate": 3.67976569833112e-05, "loss": 0.1628, "step": 22942 }, { "epoch": 0.4092141404772946, "grad_norm": 0.3289611339569092, "learning_rate": 3.679628466752604e-05, "loss": 0.1893, "step": 22943 }, { "epoch": 0.4092319765990083, "grad_norm": 0.2592921555042267, "learning_rate": 3.67949123060144e-05, "loss": 0.1531, "step": 22944 }, { "epoch": 0.40924981272072203, "grad_norm": 0.2873404920101166, "learning_rate": 3.679353989878162e-05, "loss": 0.1485, "step": 22945 }, { "epoch": 0.4092676488424357, "grad_norm": 0.3295323848724365, "learning_rate": 3.6792167445832986e-05, "loss": 0.1779, "step": 22946 }, { "epoch": 0.4092854849641494, "grad_norm": 0.2260526716709137, "learning_rate": 3.679079494717385e-05, "loss": 0.1482, "step": 22947 }, { "epoch": 0.4093033210858631, "grad_norm": 0.31729191541671753, "learning_rate": 3.678942240280951e-05, "loss": 0.1624, "step": 22948 }, { "epoch": 0.4093211572075768, "grad_norm": 0.2952575981616974, "learning_rate": 3.6788049812745306e-05, "loss": 0.1089, "step": 22949 }, { "epoch": 0.40933899332929047, "grad_norm": 0.3142538070678711, "learning_rate": 3.678667717698655e-05, "loss": 0.1406, "step": 22950 }, { "epoch": 0.40935682945100416, "grad_norm": 0.2574765086174011, "learning_rate": 3.678530449553855e-05, "loss": 0.1495, "step": 22951 }, { "epoch": 0.40937466557271784, "grad_norm": 0.4022500514984131, "learning_rate": 3.678393176840666e-05, "loss": 0.1521, "step": 22952 }, { "epoch": 0.4093925016944316, "grad_norm": 0.36074599623680115, "learning_rate": 3.6782558995596164e-05, "loss": 0.125, "step": 22953 }, { "epoch": 0.4094103378161453, "grad_norm": 0.2353619486093521, "learning_rate": 3.67811861771124e-05, "loss": 0.1912, "step": 22954 }, { "epoch": 0.40942817393785896, "grad_norm": 0.2922440469264984, "learning_rate": 3.6779813312960695e-05, "loss": 0.135, "step": 22955 }, { "epoch": 0.40944601005957265, "grad_norm": 0.2769284248352051, "learning_rate": 3.6778440403146364e-05, "loss": 0.161, "step": 22956 }, { "epoch": 0.40946384618128634, "grad_norm": 0.32437753677368164, "learning_rate": 3.677706744767473e-05, "loss": 0.179, "step": 22957 }, { "epoch": 0.409481682303, "grad_norm": 0.2687344551086426, "learning_rate": 3.677569444655112e-05, "loss": 0.1093, "step": 22958 }, { "epoch": 0.4094995184247137, "grad_norm": 0.21496063470840454, "learning_rate": 3.6774321399780844e-05, "loss": 0.0968, "step": 22959 }, { "epoch": 0.4095173545464274, "grad_norm": 0.3070719838142395, "learning_rate": 3.677294830736923e-05, "loss": 0.1865, "step": 22960 }, { "epoch": 0.40953519066814115, "grad_norm": 0.22498443722724915, "learning_rate": 3.6771575169321605e-05, "loss": 0.1586, "step": 22961 }, { "epoch": 0.40955302678985483, "grad_norm": 0.28707748651504517, "learning_rate": 3.677020198564329e-05, "loss": 0.1418, "step": 22962 }, { "epoch": 0.4095708629115685, "grad_norm": 0.2691110670566559, "learning_rate": 3.67688287563396e-05, "loss": 0.1701, "step": 22963 }, { "epoch": 0.4095886990332822, "grad_norm": 0.3171987235546112, "learning_rate": 3.676745548141587e-05, "loss": 0.1601, "step": 22964 }, { "epoch": 0.4096065351549959, "grad_norm": 0.2266453355550766, "learning_rate": 3.676608216087743e-05, "loss": 0.1482, "step": 22965 }, { "epoch": 0.4096243712767096, "grad_norm": 0.2855393886566162, "learning_rate": 3.676470879472958e-05, "loss": 0.2021, "step": 22966 }, { "epoch": 0.4096422073984233, "grad_norm": 0.25436756014823914, "learning_rate": 3.6763335382977656e-05, "loss": 0.1806, "step": 22967 }, { "epoch": 0.40966004352013696, "grad_norm": 0.2488749623298645, "learning_rate": 3.676196192562698e-05, "loss": 0.1786, "step": 22968 }, { "epoch": 0.40967787964185065, "grad_norm": 0.41944360733032227, "learning_rate": 3.6760588422682873e-05, "loss": 0.1389, "step": 22969 }, { "epoch": 0.4096957157635644, "grad_norm": 0.35119929909706116, "learning_rate": 3.675921487415067e-05, "loss": 0.1583, "step": 22970 }, { "epoch": 0.4097135518852781, "grad_norm": 0.2227325141429901, "learning_rate": 3.675784128003569e-05, "loss": 0.1045, "step": 22971 }, { "epoch": 0.40973138800699177, "grad_norm": 0.18357279896736145, "learning_rate": 3.675646764034325e-05, "loss": 0.1106, "step": 22972 }, { "epoch": 0.40974922412870546, "grad_norm": 0.2682918906211853, "learning_rate": 3.675509395507869e-05, "loss": 0.1603, "step": 22973 }, { "epoch": 0.40976706025041915, "grad_norm": 0.27669504284858704, "learning_rate": 3.675372022424732e-05, "loss": 0.1426, "step": 22974 }, { "epoch": 0.40978489637213283, "grad_norm": 0.4094705879688263, "learning_rate": 3.675234644785447e-05, "loss": 0.1884, "step": 22975 }, { "epoch": 0.4098027324938465, "grad_norm": 0.26464247703552246, "learning_rate": 3.675097262590546e-05, "loss": 0.1872, "step": 22976 }, { "epoch": 0.4098205686155602, "grad_norm": 0.3582855463027954, "learning_rate": 3.674959875840563e-05, "loss": 0.171, "step": 22977 }, { "epoch": 0.40983840473727395, "grad_norm": 0.23134541511535645, "learning_rate": 3.67482248453603e-05, "loss": 0.1355, "step": 22978 }, { "epoch": 0.40985624085898764, "grad_norm": 0.2304838001728058, "learning_rate": 3.6746850886774787e-05, "loss": 0.1363, "step": 22979 }, { "epoch": 0.40987407698070133, "grad_norm": 0.3144586682319641, "learning_rate": 3.674547688265443e-05, "loss": 0.1209, "step": 22980 }, { "epoch": 0.409891913102415, "grad_norm": 0.3483356833457947, "learning_rate": 3.6744102833004535e-05, "loss": 0.209, "step": 22981 }, { "epoch": 0.4099097492241287, "grad_norm": 0.22373300790786743, "learning_rate": 3.6742728737830444e-05, "loss": 0.1677, "step": 22982 }, { "epoch": 0.4099275853458424, "grad_norm": 0.4133744239807129, "learning_rate": 3.674135459713749e-05, "loss": 0.1615, "step": 22983 }, { "epoch": 0.4099454214675561, "grad_norm": 0.23916204273700714, "learning_rate": 3.673998041093098e-05, "loss": 0.177, "step": 22984 }, { "epoch": 0.40996325758926977, "grad_norm": 0.2945150136947632, "learning_rate": 3.673860617921626e-05, "loss": 0.1786, "step": 22985 }, { "epoch": 0.40998109371098346, "grad_norm": 0.38855665922164917, "learning_rate": 3.673723190199864e-05, "loss": 0.1564, "step": 22986 }, { "epoch": 0.4099989298326972, "grad_norm": 0.2788127064704895, "learning_rate": 3.673585757928346e-05, "loss": 0.1819, "step": 22987 }, { "epoch": 0.4100167659544109, "grad_norm": 0.29421499371528625, "learning_rate": 3.673448321107604e-05, "loss": 0.1944, "step": 22988 }, { "epoch": 0.4100346020761246, "grad_norm": 0.2578313946723938, "learning_rate": 3.6733108797381717e-05, "loss": 0.1665, "step": 22989 }, { "epoch": 0.41005243819783826, "grad_norm": 0.3013758361339569, "learning_rate": 3.67317343382058e-05, "loss": 0.204, "step": 22990 }, { "epoch": 0.41007027431955195, "grad_norm": 0.26153460144996643, "learning_rate": 3.673035983355364e-05, "loss": 0.148, "step": 22991 }, { "epoch": 0.41008811044126564, "grad_norm": 0.24209193885326385, "learning_rate": 3.672898528343055e-05, "loss": 0.1643, "step": 22992 }, { "epoch": 0.41010594656297933, "grad_norm": 0.26765844225883484, "learning_rate": 3.6727610687841864e-05, "loss": 0.1333, "step": 22993 }, { "epoch": 0.410123782684693, "grad_norm": 0.2752847671508789, "learning_rate": 3.6726236046792905e-05, "loss": 0.1427, "step": 22994 }, { "epoch": 0.41014161880640676, "grad_norm": 0.38417741656303406, "learning_rate": 3.6724861360289006e-05, "loss": 0.184, "step": 22995 }, { "epoch": 0.41015945492812045, "grad_norm": 0.2892318069934845, "learning_rate": 3.672348662833549e-05, "loss": 0.1804, "step": 22996 }, { "epoch": 0.41017729104983414, "grad_norm": 0.3207838833332062, "learning_rate": 3.67221118509377e-05, "loss": 0.1366, "step": 22997 }, { "epoch": 0.4101951271715478, "grad_norm": 0.3486321270465851, "learning_rate": 3.6720737028100945e-05, "loss": 0.1211, "step": 22998 }, { "epoch": 0.4102129632932615, "grad_norm": 0.24128076434135437, "learning_rate": 3.671936215983057e-05, "loss": 0.1517, "step": 22999 }, { "epoch": 0.4102307994149752, "grad_norm": 0.28812161087989807, "learning_rate": 3.67179872461319e-05, "loss": 0.1395, "step": 23000 }, { "epoch": 0.4102307994149752, "eval_loss": 0.15161660313606262, "eval_runtime": 106.9424, "eval_samples_per_second": 9.575, "eval_steps_per_second": 1.599, "step": 23000 }, { "epoch": 0.4102486355366889, "grad_norm": 0.32620468735694885, "learning_rate": 3.671661228701026e-05, "loss": 0.1579, "step": 23001 }, { "epoch": 0.4102664716584026, "grad_norm": 0.2858385145664215, "learning_rate": 3.671523728247099e-05, "loss": 0.1558, "step": 23002 }, { "epoch": 0.41028430778011626, "grad_norm": 0.3071691393852234, "learning_rate": 3.671386223251941e-05, "loss": 0.1659, "step": 23003 }, { "epoch": 0.41030214390183, "grad_norm": 0.2689734995365143, "learning_rate": 3.671248713716087e-05, "loss": 0.1537, "step": 23004 }, { "epoch": 0.4103199800235437, "grad_norm": 0.3195585310459137, "learning_rate": 3.671111199640066e-05, "loss": 0.1728, "step": 23005 }, { "epoch": 0.4103378161452574, "grad_norm": 0.30270084738731384, "learning_rate": 3.6709736810244155e-05, "loss": 0.2003, "step": 23006 }, { "epoch": 0.41035565226697107, "grad_norm": 0.2977372109889984, "learning_rate": 3.6708361578696646e-05, "loss": 0.1528, "step": 23007 }, { "epoch": 0.41037348838868476, "grad_norm": 0.3400401771068573, "learning_rate": 3.6706986301763505e-05, "loss": 0.1747, "step": 23008 }, { "epoch": 0.41039132451039845, "grad_norm": 0.22327066957950592, "learning_rate": 3.6705610979450025e-05, "loss": 0.133, "step": 23009 }, { "epoch": 0.41040916063211214, "grad_norm": 0.26148056983947754, "learning_rate": 3.670423561176156e-05, "loss": 0.2047, "step": 23010 }, { "epoch": 0.4104269967538258, "grad_norm": 0.28364816308021545, "learning_rate": 3.6702860198703434e-05, "loss": 0.1444, "step": 23011 }, { "epoch": 0.41044483287553957, "grad_norm": 0.3011294901371002, "learning_rate": 3.670148474028098e-05, "loss": 0.1727, "step": 23012 }, { "epoch": 0.41046266899725325, "grad_norm": 0.3662911057472229, "learning_rate": 3.670010923649953e-05, "loss": 0.1682, "step": 23013 }, { "epoch": 0.41048050511896694, "grad_norm": 0.2082054167985916, "learning_rate": 3.6698733687364416e-05, "loss": 0.174, "step": 23014 }, { "epoch": 0.41049834124068063, "grad_norm": 0.21559834480285645, "learning_rate": 3.6697358092880975e-05, "loss": 0.1026, "step": 23015 }, { "epoch": 0.4105161773623943, "grad_norm": 0.2405346781015396, "learning_rate": 3.6695982453054525e-05, "loss": 0.1623, "step": 23016 }, { "epoch": 0.410534013484108, "grad_norm": 0.27425476908683777, "learning_rate": 3.669460676789041e-05, "loss": 0.1723, "step": 23017 }, { "epoch": 0.4105518496058217, "grad_norm": 0.40821516513824463, "learning_rate": 3.669323103739396e-05, "loss": 0.1977, "step": 23018 }, { "epoch": 0.4105696857275354, "grad_norm": 0.22954507172107697, "learning_rate": 3.6691855261570514e-05, "loss": 0.0947, "step": 23019 }, { "epoch": 0.4105875218492491, "grad_norm": 0.16959792375564575, "learning_rate": 3.669047944042539e-05, "loss": 0.1385, "step": 23020 }, { "epoch": 0.4106053579709628, "grad_norm": 0.232644721865654, "learning_rate": 3.6689103573963934e-05, "loss": 0.1362, "step": 23021 }, { "epoch": 0.4106231940926765, "grad_norm": 0.22902779281139374, "learning_rate": 3.668772766219147e-05, "loss": 0.1593, "step": 23022 }, { "epoch": 0.4106410302143902, "grad_norm": 0.2516598403453827, "learning_rate": 3.668635170511335e-05, "loss": 0.1457, "step": 23023 }, { "epoch": 0.4106588663361039, "grad_norm": 0.25940564274787903, "learning_rate": 3.668497570273488e-05, "loss": 0.1651, "step": 23024 }, { "epoch": 0.41067670245781757, "grad_norm": 0.2819032669067383, "learning_rate": 3.668359965506142e-05, "loss": 0.0931, "step": 23025 }, { "epoch": 0.41069453857953125, "grad_norm": 0.3397025465965271, "learning_rate": 3.6682223562098276e-05, "loss": 0.1775, "step": 23026 }, { "epoch": 0.41071237470124494, "grad_norm": 0.3517206609249115, "learning_rate": 3.668084742385082e-05, "loss": 0.1667, "step": 23027 }, { "epoch": 0.41073021082295863, "grad_norm": 0.2156389206647873, "learning_rate": 3.667947124032435e-05, "loss": 0.1696, "step": 23028 }, { "epoch": 0.4107480469446724, "grad_norm": 0.22551634907722473, "learning_rate": 3.667809501152422e-05, "loss": 0.1408, "step": 23029 }, { "epoch": 0.41076588306638606, "grad_norm": 0.18486660718917847, "learning_rate": 3.6676718737455754e-05, "loss": 0.1278, "step": 23030 }, { "epoch": 0.41078371918809975, "grad_norm": 0.21407462656497955, "learning_rate": 3.66753424181243e-05, "loss": 0.1371, "step": 23031 }, { "epoch": 0.41080155530981344, "grad_norm": 0.22909492254257202, "learning_rate": 3.6673966053535195e-05, "loss": 0.109, "step": 23032 }, { "epoch": 0.4108193914315271, "grad_norm": 0.22414657473564148, "learning_rate": 3.6672589643693745e-05, "loss": 0.1692, "step": 23033 }, { "epoch": 0.4108372275532408, "grad_norm": 0.2653462588787079, "learning_rate": 3.667121318860533e-05, "loss": 0.1522, "step": 23034 }, { "epoch": 0.4108550636749545, "grad_norm": 0.21764078736305237, "learning_rate": 3.666983668827524e-05, "loss": 0.1827, "step": 23035 }, { "epoch": 0.4108728997966682, "grad_norm": 0.23941759765148163, "learning_rate": 3.6668460142708836e-05, "loss": 0.152, "step": 23036 }, { "epoch": 0.41089073591838193, "grad_norm": 0.23800028860569, "learning_rate": 3.6667083551911456e-05, "loss": 0.1532, "step": 23037 }, { "epoch": 0.4109085720400956, "grad_norm": 0.27145278453826904, "learning_rate": 3.666570691588843e-05, "loss": 0.2035, "step": 23038 }, { "epoch": 0.4109264081618093, "grad_norm": 0.26834502816200256, "learning_rate": 3.66643302346451e-05, "loss": 0.1562, "step": 23039 }, { "epoch": 0.410944244283523, "grad_norm": 0.19137997925281525, "learning_rate": 3.6662953508186784e-05, "loss": 0.1506, "step": 23040 }, { "epoch": 0.4109620804052367, "grad_norm": 0.2708612084388733, "learning_rate": 3.666157673651884e-05, "loss": 0.1828, "step": 23041 }, { "epoch": 0.4109799165269504, "grad_norm": 0.23833531141281128, "learning_rate": 3.66601999196466e-05, "loss": 0.171, "step": 23042 }, { "epoch": 0.41099775264866406, "grad_norm": 0.26716694235801697, "learning_rate": 3.6658823057575396e-05, "loss": 0.1659, "step": 23043 }, { "epoch": 0.41101558877037775, "grad_norm": 0.36645907163619995, "learning_rate": 3.665744615031056e-05, "loss": 0.1967, "step": 23044 }, { "epoch": 0.41103342489209144, "grad_norm": 0.27841904759407043, "learning_rate": 3.665606919785745e-05, "loss": 0.1551, "step": 23045 }, { "epoch": 0.4110512610138052, "grad_norm": 0.30871209502220154, "learning_rate": 3.6654692200221375e-05, "loss": 0.1321, "step": 23046 }, { "epoch": 0.41106909713551887, "grad_norm": 0.24763981997966766, "learning_rate": 3.6653315157407695e-05, "loss": 0.1544, "step": 23047 }, { "epoch": 0.41108693325723256, "grad_norm": 0.30146706104278564, "learning_rate": 3.665193806942174e-05, "loss": 0.1308, "step": 23048 }, { "epoch": 0.41110476937894624, "grad_norm": 0.21637827157974243, "learning_rate": 3.6650560936268846e-05, "loss": 0.1042, "step": 23049 }, { "epoch": 0.41112260550065993, "grad_norm": 0.31351467967033386, "learning_rate": 3.6649183757954356e-05, "loss": 0.1394, "step": 23050 }, { "epoch": 0.4111404416223736, "grad_norm": 0.4078234136104584, "learning_rate": 3.6647806534483605e-05, "loss": 0.178, "step": 23051 }, { "epoch": 0.4111582777440873, "grad_norm": 0.2947288751602173, "learning_rate": 3.664642926586194e-05, "loss": 0.0667, "step": 23052 }, { "epoch": 0.411176113865801, "grad_norm": 0.23695625364780426, "learning_rate": 3.664505195209468e-05, "loss": 0.1649, "step": 23053 }, { "epoch": 0.41119394998751474, "grad_norm": 0.4286949932575226, "learning_rate": 3.664367459318718e-05, "loss": 0.2361, "step": 23054 }, { "epoch": 0.4112117861092284, "grad_norm": 0.22863931953907013, "learning_rate": 3.664229718914478e-05, "loss": 0.1325, "step": 23055 }, { "epoch": 0.4112296222309421, "grad_norm": 0.25124645233154297, "learning_rate": 3.6640919739972816e-05, "loss": 0.1665, "step": 23056 }, { "epoch": 0.4112474583526558, "grad_norm": 0.3131903111934662, "learning_rate": 3.663954224567663e-05, "loss": 0.1398, "step": 23057 }, { "epoch": 0.4112652944743695, "grad_norm": 0.20030651986598969, "learning_rate": 3.6638164706261546e-05, "loss": 0.1409, "step": 23058 }, { "epoch": 0.4112831305960832, "grad_norm": 0.20581752061843872, "learning_rate": 3.663678712173292e-05, "loss": 0.1607, "step": 23059 }, { "epoch": 0.41130096671779687, "grad_norm": 0.29224833846092224, "learning_rate": 3.663540949209609e-05, "loss": 0.1544, "step": 23060 }, { "epoch": 0.41131880283951056, "grad_norm": 0.27331843972206116, "learning_rate": 3.6634031817356396e-05, "loss": 0.1381, "step": 23061 }, { "epoch": 0.4113366389612243, "grad_norm": 0.2171776443719864, "learning_rate": 3.6632654097519174e-05, "loss": 0.1022, "step": 23062 }, { "epoch": 0.411354475082938, "grad_norm": 0.32310447096824646, "learning_rate": 3.663127633258977e-05, "loss": 0.1817, "step": 23063 }, { "epoch": 0.4113723112046517, "grad_norm": 0.37830615043640137, "learning_rate": 3.662989852257352e-05, "loss": 0.2586, "step": 23064 }, { "epoch": 0.41139014732636536, "grad_norm": 0.21193359792232513, "learning_rate": 3.6628520667475764e-05, "loss": 0.1735, "step": 23065 }, { "epoch": 0.41140798344807905, "grad_norm": 0.2421891689300537, "learning_rate": 3.6627142767301846e-05, "loss": 0.1211, "step": 23066 }, { "epoch": 0.41142581956979274, "grad_norm": 0.22349536418914795, "learning_rate": 3.662576482205711e-05, "loss": 0.1645, "step": 23067 }, { "epoch": 0.4114436556915064, "grad_norm": 0.28665441274642944, "learning_rate": 3.662438683174689e-05, "loss": 0.1604, "step": 23068 }, { "epoch": 0.4114614918132201, "grad_norm": 0.28855791687965393, "learning_rate": 3.6623008796376535e-05, "loss": 0.1947, "step": 23069 }, { "epoch": 0.4114793279349338, "grad_norm": 0.22060483694076538, "learning_rate": 3.6621630715951384e-05, "loss": 0.1095, "step": 23070 }, { "epoch": 0.41149716405664755, "grad_norm": 0.2542547881603241, "learning_rate": 3.662025259047678e-05, "loss": 0.1516, "step": 23071 }, { "epoch": 0.41151500017836123, "grad_norm": 0.23093442618846893, "learning_rate": 3.661887441995805e-05, "loss": 0.1256, "step": 23072 }, { "epoch": 0.4115328363000749, "grad_norm": 0.2967522144317627, "learning_rate": 3.6617496204400565e-05, "loss": 0.1652, "step": 23073 }, { "epoch": 0.4115506724217886, "grad_norm": 0.3545559048652649, "learning_rate": 3.661611794380965e-05, "loss": 0.1559, "step": 23074 }, { "epoch": 0.4115685085435023, "grad_norm": 0.3534194529056549, "learning_rate": 3.6614739638190645e-05, "loss": 0.1678, "step": 23075 }, { "epoch": 0.411586344665216, "grad_norm": 0.26916420459747314, "learning_rate": 3.66133612875489e-05, "loss": 0.1224, "step": 23076 }, { "epoch": 0.4116041807869297, "grad_norm": 0.27614539861679077, "learning_rate": 3.661198289188975e-05, "loss": 0.19, "step": 23077 }, { "epoch": 0.41162201690864336, "grad_norm": 0.284241646528244, "learning_rate": 3.661060445121855e-05, "loss": 0.2069, "step": 23078 }, { "epoch": 0.4116398530303571, "grad_norm": 0.30562517046928406, "learning_rate": 3.660922596554064e-05, "loss": 0.093, "step": 23079 }, { "epoch": 0.4116576891520708, "grad_norm": 0.22723637521266937, "learning_rate": 3.660784743486135e-05, "loss": 0.1213, "step": 23080 }, { "epoch": 0.4116755252737845, "grad_norm": 0.3314789831638336, "learning_rate": 3.6606468859186035e-05, "loss": 0.1536, "step": 23081 }, { "epoch": 0.41169336139549817, "grad_norm": 0.20812579989433289, "learning_rate": 3.6605090238520045e-05, "loss": 0.1718, "step": 23082 }, { "epoch": 0.41171119751721186, "grad_norm": 0.3055664598941803, "learning_rate": 3.660371157286872e-05, "loss": 0.1303, "step": 23083 }, { "epoch": 0.41172903363892555, "grad_norm": 0.24226170778274536, "learning_rate": 3.66023328622374e-05, "loss": 0.1091, "step": 23084 }, { "epoch": 0.41174686976063923, "grad_norm": 0.38672134280204773, "learning_rate": 3.6600954106631424e-05, "loss": 0.1637, "step": 23085 }, { "epoch": 0.4117647058823529, "grad_norm": 0.20913872122764587, "learning_rate": 3.6599575306056144e-05, "loss": 0.121, "step": 23086 }, { "epoch": 0.4117825420040666, "grad_norm": 0.3569769561290741, "learning_rate": 3.65981964605169e-05, "loss": 0.1184, "step": 23087 }, { "epoch": 0.41180037812578035, "grad_norm": 0.23577100038528442, "learning_rate": 3.659681757001905e-05, "loss": 0.1646, "step": 23088 }, { "epoch": 0.41181821424749404, "grad_norm": 0.27256104350090027, "learning_rate": 3.659543863456792e-05, "loss": 0.1639, "step": 23089 }, { "epoch": 0.41183605036920773, "grad_norm": 0.38175252079963684, "learning_rate": 3.6594059654168875e-05, "loss": 0.1361, "step": 23090 }, { "epoch": 0.4118538864909214, "grad_norm": 0.2671360969543457, "learning_rate": 3.659268062882725e-05, "loss": 0.1818, "step": 23091 }, { "epoch": 0.4118717226126351, "grad_norm": 0.3260008990764618, "learning_rate": 3.6591301558548385e-05, "loss": 0.1362, "step": 23092 }, { "epoch": 0.4118895587343488, "grad_norm": 0.244276225566864, "learning_rate": 3.6589922443337634e-05, "loss": 0.1144, "step": 23093 }, { "epoch": 0.4119073948560625, "grad_norm": 0.2617519795894623, "learning_rate": 3.658854328320034e-05, "loss": 0.1569, "step": 23094 }, { "epoch": 0.41192523097777617, "grad_norm": 0.3298129439353943, "learning_rate": 3.658716407814184e-05, "loss": 0.1951, "step": 23095 }, { "epoch": 0.4119430670994899, "grad_norm": 0.2517102062702179, "learning_rate": 3.6585784828167505e-05, "loss": 0.2047, "step": 23096 }, { "epoch": 0.4119609032212036, "grad_norm": 0.27042925357818604, "learning_rate": 3.658440553328267e-05, "loss": 0.1797, "step": 23097 }, { "epoch": 0.4119787393429173, "grad_norm": 0.2952825129032135, "learning_rate": 3.6583026193492666e-05, "loss": 0.1819, "step": 23098 }, { "epoch": 0.411996575464631, "grad_norm": 0.2329859882593155, "learning_rate": 3.6581646808802855e-05, "loss": 0.1427, "step": 23099 }, { "epoch": 0.41201441158634466, "grad_norm": 0.23942644894123077, "learning_rate": 3.658026737921858e-05, "loss": 0.147, "step": 23100 }, { "epoch": 0.41203224770805835, "grad_norm": 0.4394679367542267, "learning_rate": 3.6578887904745186e-05, "loss": 0.1458, "step": 23101 }, { "epoch": 0.41205008382977204, "grad_norm": 0.23011663556098938, "learning_rate": 3.6577508385388026e-05, "loss": 0.1706, "step": 23102 }, { "epoch": 0.41206791995148573, "grad_norm": 0.3207777738571167, "learning_rate": 3.657612882115245e-05, "loss": 0.2025, "step": 23103 }, { "epoch": 0.4120857560731994, "grad_norm": 0.2791122496128082, "learning_rate": 3.657474921204379e-05, "loss": 0.1487, "step": 23104 }, { "epoch": 0.41210359219491316, "grad_norm": 0.17093636095523834, "learning_rate": 3.657336955806741e-05, "loss": 0.1508, "step": 23105 }, { "epoch": 0.41212142831662685, "grad_norm": 0.1648503988981247, "learning_rate": 3.6571989859228654e-05, "loss": 0.1081, "step": 23106 }, { "epoch": 0.41213926443834054, "grad_norm": 0.23249873518943787, "learning_rate": 3.657061011553287e-05, "loss": 0.1283, "step": 23107 }, { "epoch": 0.4121571005600542, "grad_norm": 0.2866901755332947, "learning_rate": 3.65692303269854e-05, "loss": 0.1557, "step": 23108 }, { "epoch": 0.4121749366817679, "grad_norm": 0.2189938873052597, "learning_rate": 3.65678504935916e-05, "loss": 0.1571, "step": 23109 }, { "epoch": 0.4121927728034816, "grad_norm": 0.23248472809791565, "learning_rate": 3.656647061535682e-05, "loss": 0.1425, "step": 23110 }, { "epoch": 0.4122106089251953, "grad_norm": 0.2784644067287445, "learning_rate": 3.65650906922864e-05, "loss": 0.1874, "step": 23111 }, { "epoch": 0.412228445046909, "grad_norm": 0.23064307868480682, "learning_rate": 3.656371072438569e-05, "loss": 0.1495, "step": 23112 }, { "epoch": 0.4122462811686227, "grad_norm": 0.30065059661865234, "learning_rate": 3.656233071166005e-05, "loss": 0.1217, "step": 23113 }, { "epoch": 0.4122641172903364, "grad_norm": 0.22854925692081451, "learning_rate": 3.656095065411482e-05, "loss": 0.1876, "step": 23114 }, { "epoch": 0.4122819534120501, "grad_norm": 0.2647433578968048, "learning_rate": 3.655957055175535e-05, "loss": 0.1379, "step": 23115 }, { "epoch": 0.4122997895337638, "grad_norm": 0.2703079283237457, "learning_rate": 3.655819040458699e-05, "loss": 0.1605, "step": 23116 }, { "epoch": 0.41231762565547747, "grad_norm": 0.29445329308509827, "learning_rate": 3.65568102126151e-05, "loss": 0.1338, "step": 23117 }, { "epoch": 0.41233546177719116, "grad_norm": 0.2516380846500397, "learning_rate": 3.655542997584502e-05, "loss": 0.1534, "step": 23118 }, { "epoch": 0.41235329789890485, "grad_norm": 0.32217973470687866, "learning_rate": 3.65540496942821e-05, "loss": 0.1623, "step": 23119 }, { "epoch": 0.41237113402061853, "grad_norm": 0.25049564242362976, "learning_rate": 3.65526693679317e-05, "loss": 0.1355, "step": 23120 }, { "epoch": 0.4123889701423323, "grad_norm": 0.3135298788547516, "learning_rate": 3.655128899679915e-05, "loss": 0.1535, "step": 23121 }, { "epoch": 0.41240680626404597, "grad_norm": 0.2812309265136719, "learning_rate": 3.654990858088982e-05, "loss": 0.1177, "step": 23122 }, { "epoch": 0.41242464238575965, "grad_norm": 0.29339414834976196, "learning_rate": 3.654852812020906e-05, "loss": 0.197, "step": 23123 }, { "epoch": 0.41244247850747334, "grad_norm": 0.17289677262306213, "learning_rate": 3.654714761476221e-05, "loss": 0.129, "step": 23124 }, { "epoch": 0.41246031462918703, "grad_norm": 0.2023874968290329, "learning_rate": 3.654576706455464e-05, "loss": 0.1381, "step": 23125 }, { "epoch": 0.4124781507509007, "grad_norm": 0.22508110105991364, "learning_rate": 3.6544386469591675e-05, "loss": 0.1326, "step": 23126 }, { "epoch": 0.4124959868726144, "grad_norm": 0.33962857723236084, "learning_rate": 3.6543005829878686e-05, "loss": 0.1858, "step": 23127 }, { "epoch": 0.4125138229943281, "grad_norm": 0.2923021912574768, "learning_rate": 3.654162514542101e-05, "loss": 0.1453, "step": 23128 }, { "epoch": 0.4125316591160418, "grad_norm": 0.36241307854652405, "learning_rate": 3.6540244416224015e-05, "loss": 0.2042, "step": 23129 }, { "epoch": 0.4125494952377555, "grad_norm": 0.23861025273799896, "learning_rate": 3.653886364229305e-05, "loss": 0.1368, "step": 23130 }, { "epoch": 0.4125673313594692, "grad_norm": 0.26289698481559753, "learning_rate": 3.653748282363347e-05, "loss": 0.1866, "step": 23131 }, { "epoch": 0.4125851674811829, "grad_norm": 0.3463841676712036, "learning_rate": 3.653610196025061e-05, "loss": 0.1816, "step": 23132 }, { "epoch": 0.4126030036028966, "grad_norm": 0.23657816648483276, "learning_rate": 3.653472105214984e-05, "loss": 0.1577, "step": 23133 }, { "epoch": 0.4126208397246103, "grad_norm": 0.3227595388889313, "learning_rate": 3.653334009933651e-05, "loss": 0.0959, "step": 23134 }, { "epoch": 0.41263867584632397, "grad_norm": 0.3086320161819458, "learning_rate": 3.653195910181596e-05, "loss": 0.1929, "step": 23135 }, { "epoch": 0.41265651196803765, "grad_norm": 0.443154513835907, "learning_rate": 3.6530578059593564e-05, "loss": 0.1811, "step": 23136 }, { "epoch": 0.41267434808975134, "grad_norm": 0.24609248340129852, "learning_rate": 3.652919697267466e-05, "loss": 0.2163, "step": 23137 }, { "epoch": 0.4126921842114651, "grad_norm": 0.20582136511802673, "learning_rate": 3.6527815841064605e-05, "loss": 0.1665, "step": 23138 }, { "epoch": 0.4127100203331788, "grad_norm": 0.2355787456035614, "learning_rate": 3.652643466476876e-05, "loss": 0.1226, "step": 23139 }, { "epoch": 0.41272785645489246, "grad_norm": 0.2802969515323639, "learning_rate": 3.652505344379247e-05, "loss": 0.1003, "step": 23140 }, { "epoch": 0.41274569257660615, "grad_norm": 0.24644528329372406, "learning_rate": 3.6523672178141086e-05, "loss": 0.1944, "step": 23141 }, { "epoch": 0.41276352869831984, "grad_norm": 0.2273740917444229, "learning_rate": 3.6522290867819976e-05, "loss": 0.1357, "step": 23142 }, { "epoch": 0.4127813648200335, "grad_norm": 0.3127565383911133, "learning_rate": 3.652090951283448e-05, "loss": 0.1942, "step": 23143 }, { "epoch": 0.4127992009417472, "grad_norm": 0.21294841170310974, "learning_rate": 3.651952811318997e-05, "loss": 0.12, "step": 23144 }, { "epoch": 0.4128170370634609, "grad_norm": 0.22707761824131012, "learning_rate": 3.6518146668891786e-05, "loss": 0.1521, "step": 23145 }, { "epoch": 0.4128348731851746, "grad_norm": 0.20713070034980774, "learning_rate": 3.651676517994529e-05, "loss": 0.1221, "step": 23146 }, { "epoch": 0.41285270930688833, "grad_norm": 0.49638211727142334, "learning_rate": 3.6515383646355825e-05, "loss": 0.1586, "step": 23147 }, { "epoch": 0.412870545428602, "grad_norm": 0.1768767386674881, "learning_rate": 3.6514002068128766e-05, "loss": 0.1191, "step": 23148 }, { "epoch": 0.4128883815503157, "grad_norm": 0.33315321803092957, "learning_rate": 3.6512620445269453e-05, "loss": 0.1519, "step": 23149 }, { "epoch": 0.4129062176720294, "grad_norm": 0.24592268466949463, "learning_rate": 3.651123877778325e-05, "loss": 0.1523, "step": 23150 }, { "epoch": 0.4129240537937431, "grad_norm": 0.28065225481987, "learning_rate": 3.650985706567551e-05, "loss": 0.1856, "step": 23151 }, { "epoch": 0.4129418899154568, "grad_norm": 0.2765066921710968, "learning_rate": 3.650847530895158e-05, "loss": 0.141, "step": 23152 }, { "epoch": 0.41295972603717046, "grad_norm": 0.23578819632530212, "learning_rate": 3.650709350761683e-05, "loss": 0.1254, "step": 23153 }, { "epoch": 0.41297756215888415, "grad_norm": 0.2761731743812561, "learning_rate": 3.6505711661676614e-05, "loss": 0.1498, "step": 23154 }, { "epoch": 0.4129953982805979, "grad_norm": 0.3300350606441498, "learning_rate": 3.650432977113629e-05, "loss": 0.1429, "step": 23155 }, { "epoch": 0.4130132344023116, "grad_norm": 0.32916247844696045, "learning_rate": 3.65029478360012e-05, "loss": 0.1331, "step": 23156 }, { "epoch": 0.41303107052402527, "grad_norm": 0.19779275357723236, "learning_rate": 3.6501565856276706e-05, "loss": 0.1359, "step": 23157 }, { "epoch": 0.41304890664573896, "grad_norm": 0.22703024744987488, "learning_rate": 3.650018383196818e-05, "loss": 0.1427, "step": 23158 }, { "epoch": 0.41306674276745264, "grad_norm": 0.22725622355937958, "learning_rate": 3.649880176308098e-05, "loss": 0.1467, "step": 23159 }, { "epoch": 0.41308457888916633, "grad_norm": 0.2867787182331085, "learning_rate": 3.649741964962043e-05, "loss": 0.1324, "step": 23160 }, { "epoch": 0.41310241501088, "grad_norm": 0.3186010718345642, "learning_rate": 3.649603749159193e-05, "loss": 0.1978, "step": 23161 }, { "epoch": 0.4131202511325937, "grad_norm": 0.31810036301612854, "learning_rate": 3.649465528900081e-05, "loss": 0.1778, "step": 23162 }, { "epoch": 0.4131380872543074, "grad_norm": 0.3615397810935974, "learning_rate": 3.6493273041852424e-05, "loss": 0.2253, "step": 23163 }, { "epoch": 0.41315592337602114, "grad_norm": 0.2043425738811493, "learning_rate": 3.6491890750152166e-05, "loss": 0.1582, "step": 23164 }, { "epoch": 0.4131737594977348, "grad_norm": 0.27091988921165466, "learning_rate": 3.6490508413905354e-05, "loss": 0.1778, "step": 23165 }, { "epoch": 0.4131915956194485, "grad_norm": 0.30199170112609863, "learning_rate": 3.648912603311737e-05, "loss": 0.1688, "step": 23166 }, { "epoch": 0.4132094317411622, "grad_norm": 0.3116376996040344, "learning_rate": 3.648774360779356e-05, "loss": 0.1677, "step": 23167 }, { "epoch": 0.4132272678628759, "grad_norm": 0.24620358645915985, "learning_rate": 3.648636113793929e-05, "loss": 0.1552, "step": 23168 }, { "epoch": 0.4132451039845896, "grad_norm": 0.2617967426776886, "learning_rate": 3.648497862355992e-05, "loss": 0.2027, "step": 23169 }, { "epoch": 0.41326294010630327, "grad_norm": 0.2816195487976074, "learning_rate": 3.6483596064660794e-05, "loss": 0.1446, "step": 23170 }, { "epoch": 0.41328077622801696, "grad_norm": 0.25955280661582947, "learning_rate": 3.6482213461247295e-05, "loss": 0.1755, "step": 23171 }, { "epoch": 0.4132986123497307, "grad_norm": 0.25968292355537415, "learning_rate": 3.648083081332478e-05, "loss": 0.1245, "step": 23172 }, { "epoch": 0.4133164484714444, "grad_norm": 0.22797012329101562, "learning_rate": 3.647944812089857e-05, "loss": 0.1159, "step": 23173 }, { "epoch": 0.4133342845931581, "grad_norm": 0.35979729890823364, "learning_rate": 3.647806538397408e-05, "loss": 0.1626, "step": 23174 }, { "epoch": 0.41335212071487176, "grad_norm": 0.22722698748111725, "learning_rate": 3.647668260255665e-05, "loss": 0.1227, "step": 23175 }, { "epoch": 0.41336995683658545, "grad_norm": 0.3491485118865967, "learning_rate": 3.6475299776651614e-05, "loss": 0.1771, "step": 23176 }, { "epoch": 0.41338779295829914, "grad_norm": 0.3642544150352478, "learning_rate": 3.647391690626435e-05, "loss": 0.2034, "step": 23177 }, { "epoch": 0.4134056290800128, "grad_norm": 0.2418254017829895, "learning_rate": 3.647253399140023e-05, "loss": 0.0914, "step": 23178 }, { "epoch": 0.4134234652017265, "grad_norm": 0.25657105445861816, "learning_rate": 3.647115103206461e-05, "loss": 0.1414, "step": 23179 }, { "epoch": 0.41344130132344026, "grad_norm": 0.2985260486602783, "learning_rate": 3.646976802826284e-05, "loss": 0.1974, "step": 23180 }, { "epoch": 0.41345913744515395, "grad_norm": 0.22879236936569214, "learning_rate": 3.646838498000029e-05, "loss": 0.1617, "step": 23181 }, { "epoch": 0.41347697356686763, "grad_norm": 0.2395196557044983, "learning_rate": 3.646700188728232e-05, "loss": 0.1317, "step": 23182 }, { "epoch": 0.4134948096885813, "grad_norm": 0.32290178537368774, "learning_rate": 3.6465618750114293e-05, "loss": 0.1669, "step": 23183 }, { "epoch": 0.413512645810295, "grad_norm": 0.29915207624435425, "learning_rate": 3.6464235568501556e-05, "loss": 0.2162, "step": 23184 }, { "epoch": 0.4135304819320087, "grad_norm": 0.3112390339374542, "learning_rate": 3.646285234244949e-05, "loss": 0.1712, "step": 23185 }, { "epoch": 0.4135483180537224, "grad_norm": 0.2557258605957031, "learning_rate": 3.646146907196345e-05, "loss": 0.1545, "step": 23186 }, { "epoch": 0.4135661541754361, "grad_norm": 0.24216027557849884, "learning_rate": 3.64600857570488e-05, "loss": 0.1339, "step": 23187 }, { "epoch": 0.41358399029714976, "grad_norm": 0.268718957901001, "learning_rate": 3.64587023977109e-05, "loss": 0.1914, "step": 23188 }, { "epoch": 0.4136018264188635, "grad_norm": 0.2952747642993927, "learning_rate": 3.6457318993955105e-05, "loss": 0.1851, "step": 23189 }, { "epoch": 0.4136196625405772, "grad_norm": 0.279373437166214, "learning_rate": 3.6455935545786784e-05, "loss": 0.1109, "step": 23190 }, { "epoch": 0.4136374986622909, "grad_norm": 0.2655762732028961, "learning_rate": 3.64545520532113e-05, "loss": 0.121, "step": 23191 }, { "epoch": 0.41365533478400457, "grad_norm": 0.24142512679100037, "learning_rate": 3.6453168516234026e-05, "loss": 0.1701, "step": 23192 }, { "epoch": 0.41367317090571826, "grad_norm": 0.18149729073047638, "learning_rate": 3.64517849348603e-05, "loss": 0.1396, "step": 23193 }, { "epoch": 0.41369100702743195, "grad_norm": 0.24246717989444733, "learning_rate": 3.645040130909552e-05, "loss": 0.129, "step": 23194 }, { "epoch": 0.41370884314914563, "grad_norm": 0.28347063064575195, "learning_rate": 3.644901763894501e-05, "loss": 0.1856, "step": 23195 }, { "epoch": 0.4137266792708593, "grad_norm": 0.3277598023414612, "learning_rate": 3.644763392441417e-05, "loss": 0.2089, "step": 23196 }, { "epoch": 0.41374451539257306, "grad_norm": 0.22673563659191132, "learning_rate": 3.6446250165508334e-05, "loss": 0.1482, "step": 23197 }, { "epoch": 0.41376235151428675, "grad_norm": 0.33759182691574097, "learning_rate": 3.6444866362232875e-05, "loss": 0.0903, "step": 23198 }, { "epoch": 0.41378018763600044, "grad_norm": 0.26132363080978394, "learning_rate": 3.644348251459317e-05, "loss": 0.137, "step": 23199 }, { "epoch": 0.41379802375771413, "grad_norm": 0.3460666835308075, "learning_rate": 3.6442098622594576e-05, "loss": 0.1874, "step": 23200 }, { "epoch": 0.4138158598794278, "grad_norm": 0.2770192325115204, "learning_rate": 3.644071468624246e-05, "loss": 0.1858, "step": 23201 }, { "epoch": 0.4138336960011415, "grad_norm": 0.2089637666940689, "learning_rate": 3.6439330705542176e-05, "loss": 0.1377, "step": 23202 }, { "epoch": 0.4138515321228552, "grad_norm": 0.23270075023174286, "learning_rate": 3.643794668049909e-05, "loss": 0.1567, "step": 23203 }, { "epoch": 0.4138693682445689, "grad_norm": 0.3162238597869873, "learning_rate": 3.643656261111858e-05, "loss": 0.1178, "step": 23204 }, { "epoch": 0.41388720436628257, "grad_norm": 0.3702440559864044, "learning_rate": 3.6435178497405996e-05, "loss": 0.2463, "step": 23205 }, { "epoch": 0.4139050404879963, "grad_norm": 0.22339370846748352, "learning_rate": 3.643379433936671e-05, "loss": 0.1383, "step": 23206 }, { "epoch": 0.41392287660971, "grad_norm": 0.20407943427562714, "learning_rate": 3.64324101370061e-05, "loss": 0.1209, "step": 23207 }, { "epoch": 0.4139407127314237, "grad_norm": 0.23986142873764038, "learning_rate": 3.643102589032951e-05, "loss": 0.1452, "step": 23208 }, { "epoch": 0.4139585488531374, "grad_norm": 0.2907426059246063, "learning_rate": 3.6429641599342326e-05, "loss": 0.1201, "step": 23209 }, { "epoch": 0.41397638497485106, "grad_norm": 0.2096967250108719, "learning_rate": 3.642825726404989e-05, "loss": 0.0943, "step": 23210 }, { "epoch": 0.41399422109656475, "grad_norm": 0.3130171298980713, "learning_rate": 3.6426872884457585e-05, "loss": 0.1909, "step": 23211 }, { "epoch": 0.41401205721827844, "grad_norm": 0.2926204800605774, "learning_rate": 3.642548846057077e-05, "loss": 0.2103, "step": 23212 }, { "epoch": 0.4140298933399921, "grad_norm": 0.2604474425315857, "learning_rate": 3.642410399239482e-05, "loss": 0.1646, "step": 23213 }, { "epoch": 0.41404772946170587, "grad_norm": 0.28436794877052307, "learning_rate": 3.642271947993511e-05, "loss": 0.1107, "step": 23214 }, { "epoch": 0.41406556558341956, "grad_norm": 0.3702876567840576, "learning_rate": 3.642133492319698e-05, "loss": 0.1483, "step": 23215 }, { "epoch": 0.41408340170513325, "grad_norm": 0.27169784903526306, "learning_rate": 3.641995032218582e-05, "loss": 0.2042, "step": 23216 }, { "epoch": 0.41410123782684694, "grad_norm": 0.28211212158203125, "learning_rate": 3.641856567690698e-05, "loss": 0.1465, "step": 23217 }, { "epoch": 0.4141190739485606, "grad_norm": 0.24151967465877533, "learning_rate": 3.6417180987365835e-05, "loss": 0.1849, "step": 23218 }, { "epoch": 0.4141369100702743, "grad_norm": 0.28115060925483704, "learning_rate": 3.641579625356775e-05, "loss": 0.165, "step": 23219 }, { "epoch": 0.414154746191988, "grad_norm": 0.25493156909942627, "learning_rate": 3.641441147551811e-05, "loss": 0.1739, "step": 23220 }, { "epoch": 0.4141725823137017, "grad_norm": 0.45502808690071106, "learning_rate": 3.641302665322226e-05, "loss": 0.1765, "step": 23221 }, { "epoch": 0.41419041843541543, "grad_norm": 0.3129476308822632, "learning_rate": 3.641164178668557e-05, "loss": 0.0975, "step": 23222 }, { "epoch": 0.4142082545571291, "grad_norm": 0.3463571071624756, "learning_rate": 3.641025687591343e-05, "loss": 0.1273, "step": 23223 }, { "epoch": 0.4142260906788428, "grad_norm": 0.2650723159313202, "learning_rate": 3.640887192091118e-05, "loss": 0.1523, "step": 23224 }, { "epoch": 0.4142439268005565, "grad_norm": 0.25148633122444153, "learning_rate": 3.6407486921684206e-05, "loss": 0.1904, "step": 23225 }, { "epoch": 0.4142617629222702, "grad_norm": 0.3528432548046112, "learning_rate": 3.640610187823788e-05, "loss": 0.1326, "step": 23226 }, { "epoch": 0.41427959904398387, "grad_norm": 0.3362433910369873, "learning_rate": 3.6404716790577555e-05, "loss": 0.1647, "step": 23227 }, { "epoch": 0.41429743516569756, "grad_norm": 0.25073257088661194, "learning_rate": 3.640333165870861e-05, "loss": 0.124, "step": 23228 }, { "epoch": 0.41431527128741125, "grad_norm": 0.29544728994369507, "learning_rate": 3.640194648263642e-05, "loss": 0.0908, "step": 23229 }, { "epoch": 0.41433310740912493, "grad_norm": 0.33751586079597473, "learning_rate": 3.640056126236634e-05, "loss": 0.1477, "step": 23230 }, { "epoch": 0.4143509435308387, "grad_norm": 0.3116064667701721, "learning_rate": 3.639917599790375e-05, "loss": 0.1474, "step": 23231 }, { "epoch": 0.41436877965255237, "grad_norm": 0.31631600856781006, "learning_rate": 3.639779068925401e-05, "loss": 0.1889, "step": 23232 }, { "epoch": 0.41438661577426605, "grad_norm": 0.2231280654668808, "learning_rate": 3.63964053364225e-05, "loss": 0.1669, "step": 23233 }, { "epoch": 0.41440445189597974, "grad_norm": 0.19502250850200653, "learning_rate": 3.639501993941459e-05, "loss": 0.165, "step": 23234 }, { "epoch": 0.41442228801769343, "grad_norm": 0.3572506010532379, "learning_rate": 3.6393634498235645e-05, "loss": 0.2092, "step": 23235 }, { "epoch": 0.4144401241394071, "grad_norm": 0.338662713766098, "learning_rate": 3.6392249012891036e-05, "loss": 0.155, "step": 23236 }, { "epoch": 0.4144579602611208, "grad_norm": 0.2813863158226013, "learning_rate": 3.639086348338614e-05, "loss": 0.1587, "step": 23237 }, { "epoch": 0.4144757963828345, "grad_norm": 0.2780759036540985, "learning_rate": 3.638947790972632e-05, "loss": 0.1493, "step": 23238 }, { "epoch": 0.41449363250454824, "grad_norm": 0.3177599310874939, "learning_rate": 3.6388092291916945e-05, "loss": 0.0855, "step": 23239 }, { "epoch": 0.4145114686262619, "grad_norm": 0.3616902232170105, "learning_rate": 3.63867066299634e-05, "loss": 0.1613, "step": 23240 }, { "epoch": 0.4145293047479756, "grad_norm": 0.27790239453315735, "learning_rate": 3.638532092387104e-05, "loss": 0.2083, "step": 23241 }, { "epoch": 0.4145471408696893, "grad_norm": 0.23762886226177216, "learning_rate": 3.638393517364525e-05, "loss": 0.1081, "step": 23242 }, { "epoch": 0.414564976991403, "grad_norm": 0.24658772349357605, "learning_rate": 3.638254937929139e-05, "loss": 0.1537, "step": 23243 }, { "epoch": 0.4145828131131167, "grad_norm": 0.27946385741233826, "learning_rate": 3.6381163540814845e-05, "loss": 0.1309, "step": 23244 }, { "epoch": 0.41460064923483037, "grad_norm": 0.29800471663475037, "learning_rate": 3.637977765822097e-05, "loss": 0.1609, "step": 23245 }, { "epoch": 0.41461848535654405, "grad_norm": 0.2592264413833618, "learning_rate": 3.637839173151515e-05, "loss": 0.1504, "step": 23246 }, { "epoch": 0.41463632147825774, "grad_norm": 0.24479345977306366, "learning_rate": 3.6377005760702754e-05, "loss": 0.1419, "step": 23247 }, { "epoch": 0.4146541575999715, "grad_norm": 0.257204532623291, "learning_rate": 3.6375619745789155e-05, "loss": 0.1599, "step": 23248 }, { "epoch": 0.4146719937216852, "grad_norm": 0.26670849323272705, "learning_rate": 3.637423368677972e-05, "loss": 0.1637, "step": 23249 }, { "epoch": 0.41468982984339886, "grad_norm": 0.23996742069721222, "learning_rate": 3.637284758367983e-05, "loss": 0.1514, "step": 23250 }, { "epoch": 0.41470766596511255, "grad_norm": 0.27667713165283203, "learning_rate": 3.637146143649486e-05, "loss": 0.1002, "step": 23251 }, { "epoch": 0.41472550208682624, "grad_norm": 0.23895084857940674, "learning_rate": 3.637007524523017e-05, "loss": 0.1472, "step": 23252 }, { "epoch": 0.4147433382085399, "grad_norm": 0.2592090368270874, "learning_rate": 3.636868900989114e-05, "loss": 0.105, "step": 23253 }, { "epoch": 0.4147611743302536, "grad_norm": 0.2676096260547638, "learning_rate": 3.636730273048315e-05, "loss": 0.174, "step": 23254 }, { "epoch": 0.4147790104519673, "grad_norm": 0.20837606489658356, "learning_rate": 3.636591640701157e-05, "loss": 0.1605, "step": 23255 }, { "epoch": 0.41479684657368104, "grad_norm": 0.2332581877708435, "learning_rate": 3.636453003948177e-05, "loss": 0.1287, "step": 23256 }, { "epoch": 0.41481468269539473, "grad_norm": 0.4502076804637909, "learning_rate": 3.636314362789913e-05, "loss": 0.1805, "step": 23257 }, { "epoch": 0.4148325188171084, "grad_norm": 0.24375800788402557, "learning_rate": 3.636175717226901e-05, "loss": 0.1454, "step": 23258 }, { "epoch": 0.4148503549388221, "grad_norm": 0.33940210938453674, "learning_rate": 3.636037067259681e-05, "loss": 0.1664, "step": 23259 }, { "epoch": 0.4148681910605358, "grad_norm": 0.2320115566253662, "learning_rate": 3.635898412888787e-05, "loss": 0.1501, "step": 23260 }, { "epoch": 0.4148860271822495, "grad_norm": 0.2881716191768646, "learning_rate": 3.6357597541147596e-05, "loss": 0.1331, "step": 23261 }, { "epoch": 0.41490386330396317, "grad_norm": 0.2525869309902191, "learning_rate": 3.635621090938135e-05, "loss": 0.1101, "step": 23262 }, { "epoch": 0.41492169942567686, "grad_norm": 0.2740098237991333, "learning_rate": 3.6354824233594514e-05, "loss": 0.1243, "step": 23263 }, { "epoch": 0.41493953554739055, "grad_norm": 0.255045622587204, "learning_rate": 3.635343751379245e-05, "loss": 0.1738, "step": 23264 }, { "epoch": 0.4149573716691043, "grad_norm": 0.22921952605247498, "learning_rate": 3.6352050749980546e-05, "loss": 0.1843, "step": 23265 }, { "epoch": 0.414975207790818, "grad_norm": 0.24881695210933685, "learning_rate": 3.635066394216416e-05, "loss": 0.1641, "step": 23266 }, { "epoch": 0.41499304391253167, "grad_norm": 0.32158026099205017, "learning_rate": 3.634927709034869e-05, "loss": 0.1828, "step": 23267 }, { "epoch": 0.41501088003424536, "grad_norm": 0.2720593214035034, "learning_rate": 3.6347890194539504e-05, "loss": 0.173, "step": 23268 }, { "epoch": 0.41502871615595904, "grad_norm": 0.23455075919628143, "learning_rate": 3.634650325474198e-05, "loss": 0.155, "step": 23269 }, { "epoch": 0.41504655227767273, "grad_norm": 0.3205150365829468, "learning_rate": 3.6345116270961485e-05, "loss": 0.1891, "step": 23270 }, { "epoch": 0.4150643883993864, "grad_norm": 0.27757516503334045, "learning_rate": 3.6343729243203395e-05, "loss": 0.1526, "step": 23271 }, { "epoch": 0.4150822245211001, "grad_norm": 0.307907372713089, "learning_rate": 3.634234217147311e-05, "loss": 0.1554, "step": 23272 }, { "epoch": 0.41510006064281385, "grad_norm": 0.32490867376327515, "learning_rate": 3.6340955055775974e-05, "loss": 0.1409, "step": 23273 }, { "epoch": 0.41511789676452754, "grad_norm": 0.31072351336479187, "learning_rate": 3.633956789611738e-05, "loss": 0.201, "step": 23274 }, { "epoch": 0.4151357328862412, "grad_norm": 0.1969567835330963, "learning_rate": 3.63381806925027e-05, "loss": 0.1672, "step": 23275 }, { "epoch": 0.4151535690079549, "grad_norm": 0.24947373569011688, "learning_rate": 3.633679344493732e-05, "loss": 0.1627, "step": 23276 }, { "epoch": 0.4151714051296686, "grad_norm": 0.3956585228443146, "learning_rate": 3.6335406153426616e-05, "loss": 0.152, "step": 23277 }, { "epoch": 0.4151892412513823, "grad_norm": 0.2326977699995041, "learning_rate": 3.633401881797597e-05, "loss": 0.1603, "step": 23278 }, { "epoch": 0.415207077373096, "grad_norm": 0.25640445947647095, "learning_rate": 3.6332631438590736e-05, "loss": 0.1278, "step": 23279 }, { "epoch": 0.41522491349480967, "grad_norm": 0.27067849040031433, "learning_rate": 3.633124401527632e-05, "loss": 0.1044, "step": 23280 }, { "epoch": 0.4152427496165234, "grad_norm": 0.26866787672042847, "learning_rate": 3.632985654803808e-05, "loss": 0.2111, "step": 23281 }, { "epoch": 0.4152605857382371, "grad_norm": 0.28165364265441895, "learning_rate": 3.6328469036881405e-05, "loss": 0.1863, "step": 23282 }, { "epoch": 0.4152784218599508, "grad_norm": 0.2928689420223236, "learning_rate": 3.632708148181168e-05, "loss": 0.1302, "step": 23283 }, { "epoch": 0.4152962579816645, "grad_norm": 0.2067536860704422, "learning_rate": 3.632569388283427e-05, "loss": 0.1619, "step": 23284 }, { "epoch": 0.41531409410337816, "grad_norm": 0.4143860936164856, "learning_rate": 3.632430623995456e-05, "loss": 0.19, "step": 23285 }, { "epoch": 0.41533193022509185, "grad_norm": 0.1750083714723587, "learning_rate": 3.632291855317792e-05, "loss": 0.1237, "step": 23286 }, { "epoch": 0.41534976634680554, "grad_norm": 0.37058934569358826, "learning_rate": 3.632153082250975e-05, "loss": 0.1655, "step": 23287 }, { "epoch": 0.4153676024685192, "grad_norm": 0.27337387204170227, "learning_rate": 3.6320143047955396e-05, "loss": 0.1627, "step": 23288 }, { "epoch": 0.4153854385902329, "grad_norm": 0.21912992000579834, "learning_rate": 3.631875522952027e-05, "loss": 0.1835, "step": 23289 }, { "epoch": 0.41540327471194666, "grad_norm": 0.32558396458625793, "learning_rate": 3.6317367367209744e-05, "loss": 0.1802, "step": 23290 }, { "epoch": 0.41542111083366035, "grad_norm": 0.24679528176784515, "learning_rate": 3.631597946102919e-05, "loss": 0.162, "step": 23291 }, { "epoch": 0.41543894695537403, "grad_norm": 0.24788397550582886, "learning_rate": 3.6314591510983984e-05, "loss": 0.1647, "step": 23292 }, { "epoch": 0.4154567830770877, "grad_norm": 0.29844745993614197, "learning_rate": 3.631320351707953e-05, "loss": 0.2673, "step": 23293 }, { "epoch": 0.4154746191988014, "grad_norm": 0.28938814997673035, "learning_rate": 3.6311815479321174e-05, "loss": 0.1621, "step": 23294 }, { "epoch": 0.4154924553205151, "grad_norm": 0.24037320911884308, "learning_rate": 3.6310427397714316e-05, "loss": 0.1757, "step": 23295 }, { "epoch": 0.4155102914422288, "grad_norm": 0.21771900355815887, "learning_rate": 3.630903927226434e-05, "loss": 0.146, "step": 23296 }, { "epoch": 0.4155281275639425, "grad_norm": 0.24293573200702667, "learning_rate": 3.6307651102976625e-05, "loss": 0.1259, "step": 23297 }, { "epoch": 0.4155459636856562, "grad_norm": 0.25553926825523376, "learning_rate": 3.630626288985655e-05, "loss": 0.1635, "step": 23298 }, { "epoch": 0.4155637998073699, "grad_norm": 0.3220132291316986, "learning_rate": 3.630487463290949e-05, "loss": 0.151, "step": 23299 }, { "epoch": 0.4155816359290836, "grad_norm": 0.2131488025188446, "learning_rate": 3.630348633214083e-05, "loss": 0.1354, "step": 23300 }, { "epoch": 0.4155994720507973, "grad_norm": 0.23164942860603333, "learning_rate": 3.6302097987555955e-05, "loss": 0.1536, "step": 23301 }, { "epoch": 0.41561730817251097, "grad_norm": 0.2612023651599884, "learning_rate": 3.630070959916024e-05, "loss": 0.1465, "step": 23302 }, { "epoch": 0.41563514429422466, "grad_norm": 0.30307537317276, "learning_rate": 3.6299321166959075e-05, "loss": 0.1627, "step": 23303 }, { "epoch": 0.41565298041593834, "grad_norm": 0.23780179023742676, "learning_rate": 3.629793269095785e-05, "loss": 0.1186, "step": 23304 }, { "epoch": 0.41567081653765203, "grad_norm": 0.3202025592327118, "learning_rate": 3.6296544171161914e-05, "loss": 0.1933, "step": 23305 }, { "epoch": 0.4156886526593657, "grad_norm": 0.18145042657852173, "learning_rate": 3.629515560757669e-05, "loss": 0.1366, "step": 23306 }, { "epoch": 0.41570648878107946, "grad_norm": 0.2425667941570282, "learning_rate": 3.6293767000207534e-05, "loss": 0.1874, "step": 23307 }, { "epoch": 0.41572432490279315, "grad_norm": 0.29195621609687805, "learning_rate": 3.6292378349059836e-05, "loss": 0.1598, "step": 23308 }, { "epoch": 0.41574216102450684, "grad_norm": 0.34642493724823, "learning_rate": 3.629098965413897e-05, "loss": 0.1953, "step": 23309 }, { "epoch": 0.41575999714622053, "grad_norm": 0.3469013571739197, "learning_rate": 3.628960091545034e-05, "loss": 0.169, "step": 23310 }, { "epoch": 0.4157778332679342, "grad_norm": 0.2694319784641266, "learning_rate": 3.628821213299932e-05, "loss": 0.1811, "step": 23311 }, { "epoch": 0.4157956693896479, "grad_norm": 0.3065076172351837, "learning_rate": 3.6286823306791284e-05, "loss": 0.2084, "step": 23312 }, { "epoch": 0.4158135055113616, "grad_norm": 0.3911265730857849, "learning_rate": 3.628543443683163e-05, "loss": 0.1043, "step": 23313 }, { "epoch": 0.4158313416330753, "grad_norm": 0.3049227297306061, "learning_rate": 3.628404552312573e-05, "loss": 0.1705, "step": 23314 }, { "epoch": 0.415849177754789, "grad_norm": 0.2835361957550049, "learning_rate": 3.628265656567897e-05, "loss": 0.1157, "step": 23315 }, { "epoch": 0.4158670138765027, "grad_norm": 0.20075726509094238, "learning_rate": 3.628126756449673e-05, "loss": 0.1496, "step": 23316 }, { "epoch": 0.4158848499982164, "grad_norm": 0.2600798010826111, "learning_rate": 3.627987851958441e-05, "loss": 0.1416, "step": 23317 }, { "epoch": 0.4159026861199301, "grad_norm": 0.3323518931865692, "learning_rate": 3.6278489430947383e-05, "loss": 0.1661, "step": 23318 }, { "epoch": 0.4159205222416438, "grad_norm": 0.23960067331790924, "learning_rate": 3.627710029859104e-05, "loss": 0.1484, "step": 23319 }, { "epoch": 0.41593835836335746, "grad_norm": 0.2297036498785019, "learning_rate": 3.6275711122520753e-05, "loss": 0.1504, "step": 23320 }, { "epoch": 0.41595619448507115, "grad_norm": 0.33420923352241516, "learning_rate": 3.627432190274192e-05, "loss": 0.1116, "step": 23321 }, { "epoch": 0.41597403060678484, "grad_norm": 0.20574922859668732, "learning_rate": 3.6272932639259916e-05, "loss": 0.105, "step": 23322 }, { "epoch": 0.4159918667284986, "grad_norm": 0.26272812485694885, "learning_rate": 3.627154333208014e-05, "loss": 0.1394, "step": 23323 }, { "epoch": 0.41600970285021227, "grad_norm": 0.31237339973449707, "learning_rate": 3.627015398120797e-05, "loss": 0.1336, "step": 23324 }, { "epoch": 0.41602753897192596, "grad_norm": 0.3028266131877899, "learning_rate": 3.6268764586648774e-05, "loss": 0.1923, "step": 23325 }, { "epoch": 0.41604537509363965, "grad_norm": 0.2907858192920685, "learning_rate": 3.6267375148407975e-05, "loss": 0.1719, "step": 23326 }, { "epoch": 0.41606321121535333, "grad_norm": 0.24408207833766937, "learning_rate": 3.626598566649092e-05, "loss": 0.1542, "step": 23327 }, { "epoch": 0.416081047337067, "grad_norm": 0.1730697900056839, "learning_rate": 3.626459614090303e-05, "loss": 0.1309, "step": 23328 }, { "epoch": 0.4160988834587807, "grad_norm": 0.22531509399414062, "learning_rate": 3.626320657164966e-05, "loss": 0.0978, "step": 23329 }, { "epoch": 0.4161167195804944, "grad_norm": 0.3676927387714386, "learning_rate": 3.626181695873622e-05, "loss": 0.1725, "step": 23330 }, { "epoch": 0.4161345557022081, "grad_norm": 0.23085030913352966, "learning_rate": 3.6260427302168084e-05, "loss": 0.1621, "step": 23331 }, { "epoch": 0.41615239182392183, "grad_norm": 0.18741777539253235, "learning_rate": 3.6259037601950646e-05, "loss": 0.1296, "step": 23332 }, { "epoch": 0.4161702279456355, "grad_norm": 0.2550046145915985, "learning_rate": 3.625764785808929e-05, "loss": 0.1748, "step": 23333 }, { "epoch": 0.4161880640673492, "grad_norm": 0.20631776750087738, "learning_rate": 3.62562580705894e-05, "loss": 0.1421, "step": 23334 }, { "epoch": 0.4162059001890629, "grad_norm": 0.2726269066333771, "learning_rate": 3.6254868239456367e-05, "loss": 0.1342, "step": 23335 }, { "epoch": 0.4162237363107766, "grad_norm": 0.21395710110664368, "learning_rate": 3.625347836469557e-05, "loss": 0.1643, "step": 23336 }, { "epoch": 0.41624157243249027, "grad_norm": 0.4118204712867737, "learning_rate": 3.625208844631241e-05, "loss": 0.1633, "step": 23337 }, { "epoch": 0.41625940855420396, "grad_norm": 0.35637524724006653, "learning_rate": 3.625069848431227e-05, "loss": 0.1537, "step": 23338 }, { "epoch": 0.41627724467591765, "grad_norm": 0.25940996408462524, "learning_rate": 3.624930847870054e-05, "loss": 0.1503, "step": 23339 }, { "epoch": 0.4162950807976314, "grad_norm": 0.21032650768756866, "learning_rate": 3.62479184294826e-05, "loss": 0.1193, "step": 23340 }, { "epoch": 0.4163129169193451, "grad_norm": 0.27171435952186584, "learning_rate": 3.6246528336663846e-05, "loss": 0.1905, "step": 23341 }, { "epoch": 0.41633075304105877, "grad_norm": 0.20779871940612793, "learning_rate": 3.624513820024966e-05, "loss": 0.1689, "step": 23342 }, { "epoch": 0.41634858916277245, "grad_norm": 0.26859521865844727, "learning_rate": 3.624374802024544e-05, "loss": 0.1591, "step": 23343 }, { "epoch": 0.41636642528448614, "grad_norm": 0.36573532223701477, "learning_rate": 3.6242357796656563e-05, "loss": 0.19, "step": 23344 }, { "epoch": 0.41638426140619983, "grad_norm": 0.24918632209300995, "learning_rate": 3.624096752948843e-05, "loss": 0.0949, "step": 23345 }, { "epoch": 0.4164020975279135, "grad_norm": 0.3105931878089905, "learning_rate": 3.623957721874642e-05, "loss": 0.1422, "step": 23346 }, { "epoch": 0.4164199336496272, "grad_norm": 0.2787606716156006, "learning_rate": 3.6238186864435934e-05, "loss": 0.1414, "step": 23347 }, { "epoch": 0.4164377697713409, "grad_norm": 0.24953365325927734, "learning_rate": 3.623679646656235e-05, "loss": 0.163, "step": 23348 }, { "epoch": 0.41645560589305464, "grad_norm": 0.27132412791252136, "learning_rate": 3.623540602513106e-05, "loss": 0.1515, "step": 23349 }, { "epoch": 0.4164734420147683, "grad_norm": 0.2446313053369522, "learning_rate": 3.623401554014745e-05, "loss": 0.1056, "step": 23350 }, { "epoch": 0.416491278136482, "grad_norm": 0.26457521319389343, "learning_rate": 3.623262501161692e-05, "loss": 0.141, "step": 23351 }, { "epoch": 0.4165091142581957, "grad_norm": 0.2918124198913574, "learning_rate": 3.623123443954486e-05, "loss": 0.2161, "step": 23352 }, { "epoch": 0.4165269503799094, "grad_norm": 0.2150900661945343, "learning_rate": 3.622984382393665e-05, "loss": 0.1156, "step": 23353 }, { "epoch": 0.4165447865016231, "grad_norm": 0.24866187572479248, "learning_rate": 3.622845316479769e-05, "loss": 0.1425, "step": 23354 }, { "epoch": 0.41656262262333676, "grad_norm": 0.3092412054538727, "learning_rate": 3.622706246213337e-05, "loss": 0.1814, "step": 23355 }, { "epoch": 0.41658045874505045, "grad_norm": 0.28487467765808105, "learning_rate": 3.622567171594908e-05, "loss": 0.2087, "step": 23356 }, { "epoch": 0.4165982948667642, "grad_norm": 0.2582945227622986, "learning_rate": 3.62242809262502e-05, "loss": 0.1168, "step": 23357 }, { "epoch": 0.4166161309884779, "grad_norm": 0.28591442108154297, "learning_rate": 3.622289009304214e-05, "loss": 0.1497, "step": 23358 }, { "epoch": 0.41663396711019157, "grad_norm": 0.298454612493515, "learning_rate": 3.622149921633027e-05, "loss": 0.1519, "step": 23359 }, { "epoch": 0.41665180323190526, "grad_norm": 0.2814613878726959, "learning_rate": 3.6220108296120005e-05, "loss": 0.1306, "step": 23360 }, { "epoch": 0.41666963935361895, "grad_norm": 0.29728448390960693, "learning_rate": 3.6218717332416724e-05, "loss": 0.2144, "step": 23361 }, { "epoch": 0.41668747547533264, "grad_norm": 0.48023849725723267, "learning_rate": 3.6217326325225816e-05, "loss": 0.1878, "step": 23362 }, { "epoch": 0.4167053115970463, "grad_norm": 0.21834638714790344, "learning_rate": 3.6215935274552674e-05, "loss": 0.1552, "step": 23363 }, { "epoch": 0.41672314771876, "grad_norm": 0.371028333902359, "learning_rate": 3.62145441804027e-05, "loss": 0.2564, "step": 23364 }, { "epoch": 0.4167409838404737, "grad_norm": 0.3147681951522827, "learning_rate": 3.621315304278127e-05, "loss": 0.1312, "step": 23365 }, { "epoch": 0.41675881996218744, "grad_norm": 0.3369613587856293, "learning_rate": 3.621176186169379e-05, "loss": 0.1762, "step": 23366 }, { "epoch": 0.41677665608390113, "grad_norm": 0.2935454845428467, "learning_rate": 3.621037063714565e-05, "loss": 0.2089, "step": 23367 }, { "epoch": 0.4167944922056148, "grad_norm": 0.28261691331863403, "learning_rate": 3.6208979369142245e-05, "loss": 0.1785, "step": 23368 }, { "epoch": 0.4168123283273285, "grad_norm": 0.3761289119720459, "learning_rate": 3.620758805768896e-05, "loss": 0.1494, "step": 23369 }, { "epoch": 0.4168301644490422, "grad_norm": 0.16350127756595612, "learning_rate": 3.6206196702791186e-05, "loss": 0.1181, "step": 23370 }, { "epoch": 0.4168480005707559, "grad_norm": 0.22421710193157196, "learning_rate": 3.6204805304454334e-05, "loss": 0.1521, "step": 23371 }, { "epoch": 0.41686583669246957, "grad_norm": 0.20705775916576385, "learning_rate": 3.620341386268379e-05, "loss": 0.1168, "step": 23372 }, { "epoch": 0.41688367281418326, "grad_norm": 0.2572283446788788, "learning_rate": 3.620202237748493e-05, "loss": 0.1374, "step": 23373 }, { "epoch": 0.416901508935897, "grad_norm": 0.3022323548793793, "learning_rate": 3.620063084886318e-05, "loss": 0.1563, "step": 23374 }, { "epoch": 0.4169193450576107, "grad_norm": 0.19538500905036926, "learning_rate": 3.61992392768239e-05, "loss": 0.124, "step": 23375 }, { "epoch": 0.4169371811793244, "grad_norm": 0.26758912205696106, "learning_rate": 3.619784766137251e-05, "loss": 0.2, "step": 23376 }, { "epoch": 0.41695501730103807, "grad_norm": 0.2323193997144699, "learning_rate": 3.619645600251439e-05, "loss": 0.1597, "step": 23377 }, { "epoch": 0.41697285342275175, "grad_norm": 0.24129992723464966, "learning_rate": 3.619506430025494e-05, "loss": 0.1635, "step": 23378 }, { "epoch": 0.41699068954446544, "grad_norm": 0.2884138524532318, "learning_rate": 3.619367255459955e-05, "loss": 0.1376, "step": 23379 }, { "epoch": 0.41700852566617913, "grad_norm": 0.2532219886779785, "learning_rate": 3.6192280765553624e-05, "loss": 0.1283, "step": 23380 }, { "epoch": 0.4170263617878928, "grad_norm": 0.2978946566581726, "learning_rate": 3.619088893312255e-05, "loss": 0.1756, "step": 23381 }, { "epoch": 0.41704419790960656, "grad_norm": 0.22412151098251343, "learning_rate": 3.6189497057311735e-05, "loss": 0.1388, "step": 23382 }, { "epoch": 0.41706203403132025, "grad_norm": 0.31035101413726807, "learning_rate": 3.618810513812655e-05, "loss": 0.2271, "step": 23383 }, { "epoch": 0.41707987015303394, "grad_norm": 0.25164157152175903, "learning_rate": 3.618671317557242e-05, "loss": 0.167, "step": 23384 }, { "epoch": 0.4170977062747476, "grad_norm": 0.23634856939315796, "learning_rate": 3.6185321169654714e-05, "loss": 0.1385, "step": 23385 }, { "epoch": 0.4171155423964613, "grad_norm": 0.2758852243423462, "learning_rate": 3.618392912037884e-05, "loss": 0.174, "step": 23386 }, { "epoch": 0.417133378518175, "grad_norm": 0.3032567799091339, "learning_rate": 3.6182537027750205e-05, "loss": 0.1714, "step": 23387 }, { "epoch": 0.4171512146398887, "grad_norm": 0.25116029381752014, "learning_rate": 3.618114489177418e-05, "loss": 0.1335, "step": 23388 }, { "epoch": 0.4171690507616024, "grad_norm": 0.2649308741092682, "learning_rate": 3.617975271245619e-05, "loss": 0.143, "step": 23389 }, { "epoch": 0.41718688688331607, "grad_norm": 0.35784125328063965, "learning_rate": 3.61783604898016e-05, "loss": 0.2568, "step": 23390 }, { "epoch": 0.4172047230050298, "grad_norm": 0.23808801174163818, "learning_rate": 3.617696822381584e-05, "loss": 0.1657, "step": 23391 }, { "epoch": 0.4172225591267435, "grad_norm": 0.24368254840373993, "learning_rate": 3.617557591450428e-05, "loss": 0.1325, "step": 23392 }, { "epoch": 0.4172403952484572, "grad_norm": 0.32355621457099915, "learning_rate": 3.617418356187233e-05, "loss": 0.22, "step": 23393 }, { "epoch": 0.4172582313701709, "grad_norm": 0.2619912028312683, "learning_rate": 3.617279116592539e-05, "loss": 0.1348, "step": 23394 }, { "epoch": 0.41727606749188456, "grad_norm": 0.2434076964855194, "learning_rate": 3.617139872666885e-05, "loss": 0.1283, "step": 23395 }, { "epoch": 0.41729390361359825, "grad_norm": 0.28391337394714355, "learning_rate": 3.617000624410811e-05, "loss": 0.1334, "step": 23396 }, { "epoch": 0.41731173973531194, "grad_norm": 0.25604772567749023, "learning_rate": 3.6168613718248574e-05, "loss": 0.1102, "step": 23397 }, { "epoch": 0.4173295758570256, "grad_norm": 0.3729875981807709, "learning_rate": 3.616722114909562e-05, "loss": 0.1808, "step": 23398 }, { "epoch": 0.41734741197873937, "grad_norm": 0.28842636942863464, "learning_rate": 3.6165828536654666e-05, "loss": 0.136, "step": 23399 }, { "epoch": 0.41736524810045306, "grad_norm": 0.2618440091609955, "learning_rate": 3.6164435880931116e-05, "loss": 0.1738, "step": 23400 }, { "epoch": 0.41738308422216674, "grad_norm": 0.30283886194229126, "learning_rate": 3.616304318193034e-05, "loss": 0.1184, "step": 23401 }, { "epoch": 0.41740092034388043, "grad_norm": 0.24745404720306396, "learning_rate": 3.616165043965776e-05, "loss": 0.1694, "step": 23402 }, { "epoch": 0.4174187564655941, "grad_norm": 0.18638542294502258, "learning_rate": 3.616025765411876e-05, "loss": 0.1493, "step": 23403 }, { "epoch": 0.4174365925873078, "grad_norm": 0.3083762228488922, "learning_rate": 3.615886482531876e-05, "loss": 0.1582, "step": 23404 }, { "epoch": 0.4174544287090215, "grad_norm": 0.25041407346725464, "learning_rate": 3.615747195326314e-05, "loss": 0.1361, "step": 23405 }, { "epoch": 0.4174722648307352, "grad_norm": 0.2558700144290924, "learning_rate": 3.61560790379573e-05, "loss": 0.1673, "step": 23406 }, { "epoch": 0.4174901009524489, "grad_norm": 0.22794656455516815, "learning_rate": 3.6154686079406645e-05, "loss": 0.1656, "step": 23407 }, { "epoch": 0.4175079370741626, "grad_norm": 0.367709219455719, "learning_rate": 3.6153293077616576e-05, "loss": 0.1676, "step": 23408 }, { "epoch": 0.4175257731958763, "grad_norm": 0.4466867446899414, "learning_rate": 3.6151900032592495e-05, "loss": 0.1031, "step": 23409 }, { "epoch": 0.41754360931759, "grad_norm": 0.2913598120212555, "learning_rate": 3.61505069443398e-05, "loss": 0.2015, "step": 23410 }, { "epoch": 0.4175614454393037, "grad_norm": 0.23446781933307648, "learning_rate": 3.614911381286389e-05, "loss": 0.1509, "step": 23411 }, { "epoch": 0.41757928156101737, "grad_norm": 0.233299121260643, "learning_rate": 3.6147720638170155e-05, "loss": 0.1509, "step": 23412 }, { "epoch": 0.41759711768273106, "grad_norm": 0.26786768436431885, "learning_rate": 3.6146327420264006e-05, "loss": 0.1379, "step": 23413 }, { "epoch": 0.41761495380444474, "grad_norm": 0.25657570362091064, "learning_rate": 3.6144934159150836e-05, "loss": 0.1488, "step": 23414 }, { "epoch": 0.41763278992615843, "grad_norm": 0.196097269654274, "learning_rate": 3.6143540854836065e-05, "loss": 0.1324, "step": 23415 }, { "epoch": 0.4176506260478722, "grad_norm": 0.29617664217948914, "learning_rate": 3.6142147507325074e-05, "loss": 0.1524, "step": 23416 }, { "epoch": 0.41766846216958586, "grad_norm": 0.281398743391037, "learning_rate": 3.614075411662327e-05, "loss": 0.1505, "step": 23417 }, { "epoch": 0.41768629829129955, "grad_norm": 0.2473234385251999, "learning_rate": 3.613936068273606e-05, "loss": 0.11, "step": 23418 }, { "epoch": 0.41770413441301324, "grad_norm": 0.33206626772880554, "learning_rate": 3.613796720566884e-05, "loss": 0.1342, "step": 23419 }, { "epoch": 0.4177219705347269, "grad_norm": 0.2577970027923584, "learning_rate": 3.6136573685427e-05, "loss": 0.1637, "step": 23420 }, { "epoch": 0.4177398066564406, "grad_norm": 0.2510599195957184, "learning_rate": 3.613518012201597e-05, "loss": 0.1521, "step": 23421 }, { "epoch": 0.4177576427781543, "grad_norm": 0.30506131052970886, "learning_rate": 3.613378651544113e-05, "loss": 0.2153, "step": 23422 }, { "epoch": 0.417775478899868, "grad_norm": 0.29952675104141235, "learning_rate": 3.613239286570789e-05, "loss": 0.1853, "step": 23423 }, { "epoch": 0.41779331502158173, "grad_norm": 0.32651522755622864, "learning_rate": 3.613099917282165e-05, "loss": 0.1619, "step": 23424 }, { "epoch": 0.4178111511432954, "grad_norm": 0.4050840437412262, "learning_rate": 3.612960543678781e-05, "loss": 0.1205, "step": 23425 }, { "epoch": 0.4178289872650091, "grad_norm": 0.27712371945381165, "learning_rate": 3.612821165761177e-05, "loss": 0.146, "step": 23426 }, { "epoch": 0.4178468233867228, "grad_norm": 0.23165202140808105, "learning_rate": 3.612681783529894e-05, "loss": 0.1785, "step": 23427 }, { "epoch": 0.4178646595084365, "grad_norm": 0.4011196792125702, "learning_rate": 3.612542396985473e-05, "loss": 0.1583, "step": 23428 }, { "epoch": 0.4178824956301502, "grad_norm": 0.24495315551757812, "learning_rate": 3.612403006128453e-05, "loss": 0.1607, "step": 23429 }, { "epoch": 0.41790033175186386, "grad_norm": 0.2975074350833893, "learning_rate": 3.612263610959375e-05, "loss": 0.1624, "step": 23430 }, { "epoch": 0.41791816787357755, "grad_norm": 0.3701333701610565, "learning_rate": 3.612124211478778e-05, "loss": 0.1314, "step": 23431 }, { "epoch": 0.41793600399529124, "grad_norm": 0.24394141137599945, "learning_rate": 3.6119848076872045e-05, "loss": 0.1496, "step": 23432 }, { "epoch": 0.417953840117005, "grad_norm": 0.280828058719635, "learning_rate": 3.6118453995851935e-05, "loss": 0.1753, "step": 23433 }, { "epoch": 0.41797167623871867, "grad_norm": 0.2302258461713791, "learning_rate": 3.6117059871732856e-05, "loss": 0.1523, "step": 23434 }, { "epoch": 0.41798951236043236, "grad_norm": 0.23049025237560272, "learning_rate": 3.611566570452021e-05, "loss": 0.1209, "step": 23435 }, { "epoch": 0.41800734848214605, "grad_norm": 0.279909610748291, "learning_rate": 3.611427149421941e-05, "loss": 0.2142, "step": 23436 }, { "epoch": 0.41802518460385973, "grad_norm": 0.27000412344932556, "learning_rate": 3.611287724083586e-05, "loss": 0.1513, "step": 23437 }, { "epoch": 0.4180430207255734, "grad_norm": 0.3522058129310608, "learning_rate": 3.6111482944374955e-05, "loss": 0.1016, "step": 23438 }, { "epoch": 0.4180608568472871, "grad_norm": 0.28997260332107544, "learning_rate": 3.61100886048421e-05, "loss": 0.1559, "step": 23439 }, { "epoch": 0.4180786929690008, "grad_norm": 0.2866147756576538, "learning_rate": 3.610869422224271e-05, "loss": 0.1297, "step": 23440 }, { "epoch": 0.41809652909071454, "grad_norm": 0.24868327379226685, "learning_rate": 3.610729979658218e-05, "loss": 0.1465, "step": 23441 }, { "epoch": 0.41811436521242823, "grad_norm": 0.2247244119644165, "learning_rate": 3.610590532786592e-05, "loss": 0.1161, "step": 23442 }, { "epoch": 0.4181322013341419, "grad_norm": 0.3591804802417755, "learning_rate": 3.610451081609934e-05, "loss": 0.225, "step": 23443 }, { "epoch": 0.4181500374558556, "grad_norm": 0.4007553160190582, "learning_rate": 3.610311626128783e-05, "loss": 0.216, "step": 23444 }, { "epoch": 0.4181678735775693, "grad_norm": 0.22225411236286163, "learning_rate": 3.610172166343682e-05, "loss": 0.1122, "step": 23445 }, { "epoch": 0.418185709699283, "grad_norm": 0.1966107189655304, "learning_rate": 3.610032702255169e-05, "loss": 0.1285, "step": 23446 }, { "epoch": 0.41820354582099667, "grad_norm": 0.4930487275123596, "learning_rate": 3.609893233863786e-05, "loss": 0.1185, "step": 23447 }, { "epoch": 0.41822138194271036, "grad_norm": 0.22558413445949554, "learning_rate": 3.609753761170074e-05, "loss": 0.1351, "step": 23448 }, { "epoch": 0.41823921806442405, "grad_norm": 0.22826021909713745, "learning_rate": 3.609614284174574e-05, "loss": 0.1203, "step": 23449 }, { "epoch": 0.4182570541861378, "grad_norm": 0.21137797832489014, "learning_rate": 3.609474802877824e-05, "loss": 0.1116, "step": 23450 }, { "epoch": 0.4182748903078515, "grad_norm": 0.17108070850372314, "learning_rate": 3.609335317280367e-05, "loss": 0.1431, "step": 23451 }, { "epoch": 0.41829272642956516, "grad_norm": 0.2823784649372101, "learning_rate": 3.609195827382744e-05, "loss": 0.1902, "step": 23452 }, { "epoch": 0.41831056255127885, "grad_norm": 0.3045092821121216, "learning_rate": 3.609056333185494e-05, "loss": 0.1488, "step": 23453 }, { "epoch": 0.41832839867299254, "grad_norm": 0.24341613054275513, "learning_rate": 3.608916834689159e-05, "loss": 0.1433, "step": 23454 }, { "epoch": 0.41834623479470623, "grad_norm": 0.30423226952552795, "learning_rate": 3.6087773318942785e-05, "loss": 0.201, "step": 23455 }, { "epoch": 0.4183640709164199, "grad_norm": 0.38588663935661316, "learning_rate": 3.608637824801395e-05, "loss": 0.1285, "step": 23456 }, { "epoch": 0.4183819070381336, "grad_norm": 0.37726718187332153, "learning_rate": 3.608498313411049e-05, "loss": 0.141, "step": 23457 }, { "epoch": 0.41839974315984735, "grad_norm": 0.35581764578819275, "learning_rate": 3.60835879772378e-05, "loss": 0.1408, "step": 23458 }, { "epoch": 0.41841757928156104, "grad_norm": 0.23475539684295654, "learning_rate": 3.608219277740129e-05, "loss": 0.1477, "step": 23459 }, { "epoch": 0.4184354154032747, "grad_norm": 0.3895374834537506, "learning_rate": 3.608079753460638e-05, "loss": 0.2008, "step": 23460 }, { "epoch": 0.4184532515249884, "grad_norm": 0.24002403020858765, "learning_rate": 3.607940224885846e-05, "loss": 0.1717, "step": 23461 }, { "epoch": 0.4184710876467021, "grad_norm": 0.2423631250858307, "learning_rate": 3.6078006920162965e-05, "loss": 0.1195, "step": 23462 }, { "epoch": 0.4184889237684158, "grad_norm": 0.2357056587934494, "learning_rate": 3.6076611548525285e-05, "loss": 0.094, "step": 23463 }, { "epoch": 0.4185067598901295, "grad_norm": 0.2701158821582794, "learning_rate": 3.607521613395083e-05, "loss": 0.1552, "step": 23464 }, { "epoch": 0.41852459601184316, "grad_norm": 0.26690196990966797, "learning_rate": 3.607382067644501e-05, "loss": 0.127, "step": 23465 }, { "epoch": 0.41854243213355685, "grad_norm": 0.33754757046699524, "learning_rate": 3.6072425176013235e-05, "loss": 0.1489, "step": 23466 }, { "epoch": 0.4185602682552706, "grad_norm": 0.24351945519447327, "learning_rate": 3.607102963266092e-05, "loss": 0.1569, "step": 23467 }, { "epoch": 0.4185781043769843, "grad_norm": 0.3732454776763916, "learning_rate": 3.6069634046393476e-05, "loss": 0.1813, "step": 23468 }, { "epoch": 0.41859594049869797, "grad_norm": 0.21357886493206024, "learning_rate": 3.6068238417216295e-05, "loss": 0.1636, "step": 23469 }, { "epoch": 0.41861377662041166, "grad_norm": 0.39409980177879333, "learning_rate": 3.60668427451348e-05, "loss": 0.143, "step": 23470 }, { "epoch": 0.41863161274212535, "grad_norm": 0.34588822722435, "learning_rate": 3.606544703015442e-05, "loss": 0.1199, "step": 23471 }, { "epoch": 0.41864944886383904, "grad_norm": 0.27032163739204407, "learning_rate": 3.606405127228052e-05, "loss": 0.1499, "step": 23472 }, { "epoch": 0.4186672849855527, "grad_norm": 0.44959843158721924, "learning_rate": 3.6062655471518556e-05, "loss": 0.2312, "step": 23473 }, { "epoch": 0.4186851211072664, "grad_norm": 0.2795168459415436, "learning_rate": 3.60612596278739e-05, "loss": 0.1339, "step": 23474 }, { "epoch": 0.41870295722898015, "grad_norm": 0.334330290555954, "learning_rate": 3.605986374135199e-05, "loss": 0.1939, "step": 23475 }, { "epoch": 0.41872079335069384, "grad_norm": 0.2417813390493393, "learning_rate": 3.605846781195823e-05, "loss": 0.1389, "step": 23476 }, { "epoch": 0.41873862947240753, "grad_norm": 0.23533926904201508, "learning_rate": 3.6057071839698026e-05, "loss": 0.1479, "step": 23477 }, { "epoch": 0.4187564655941212, "grad_norm": 0.2055702954530716, "learning_rate": 3.6055675824576795e-05, "loss": 0.1068, "step": 23478 }, { "epoch": 0.4187743017158349, "grad_norm": 0.21435564756393433, "learning_rate": 3.6054279766599946e-05, "loss": 0.1435, "step": 23479 }, { "epoch": 0.4187921378375486, "grad_norm": 0.2539478540420532, "learning_rate": 3.60528836657729e-05, "loss": 0.117, "step": 23480 }, { "epoch": 0.4188099739592623, "grad_norm": 0.326172798871994, "learning_rate": 3.605148752210104e-05, "loss": 0.1344, "step": 23481 }, { "epoch": 0.41882781008097597, "grad_norm": 0.29429468512535095, "learning_rate": 3.60500913355898e-05, "loss": 0.1833, "step": 23482 }, { "epoch": 0.4188456462026897, "grad_norm": 0.2629934549331665, "learning_rate": 3.60486951062446e-05, "loss": 0.1215, "step": 23483 }, { "epoch": 0.4188634823244034, "grad_norm": 0.22771377861499786, "learning_rate": 3.604729883407084e-05, "loss": 0.1554, "step": 23484 }, { "epoch": 0.4188813184461171, "grad_norm": 0.26041877269744873, "learning_rate": 3.6045902519073925e-05, "loss": 0.1431, "step": 23485 }, { "epoch": 0.4188991545678308, "grad_norm": 0.2677501440048218, "learning_rate": 3.604450616125929e-05, "loss": 0.1812, "step": 23486 }, { "epoch": 0.41891699068954447, "grad_norm": 0.2599920630455017, "learning_rate": 3.6043109760632326e-05, "loss": 0.1287, "step": 23487 }, { "epoch": 0.41893482681125815, "grad_norm": 0.2790832817554474, "learning_rate": 3.604171331719846e-05, "loss": 0.1486, "step": 23488 }, { "epoch": 0.41895266293297184, "grad_norm": 0.23356810212135315, "learning_rate": 3.60403168309631e-05, "loss": 0.1565, "step": 23489 }, { "epoch": 0.41897049905468553, "grad_norm": 0.2652481496334076, "learning_rate": 3.603892030193164e-05, "loss": 0.0968, "step": 23490 }, { "epoch": 0.4189883351763992, "grad_norm": 0.22331734001636505, "learning_rate": 3.6037523730109534e-05, "loss": 0.1675, "step": 23491 }, { "epoch": 0.41900617129811296, "grad_norm": 0.237565815448761, "learning_rate": 3.6036127115502164e-05, "loss": 0.1264, "step": 23492 }, { "epoch": 0.41902400741982665, "grad_norm": 0.1968141794204712, "learning_rate": 3.603473045811496e-05, "loss": 0.0989, "step": 23493 }, { "epoch": 0.41904184354154034, "grad_norm": 0.2734472453594208, "learning_rate": 3.603333375795333e-05, "loss": 0.2273, "step": 23494 }, { "epoch": 0.419059679663254, "grad_norm": 0.23806653916835785, "learning_rate": 3.6031937015022676e-05, "loss": 0.1582, "step": 23495 }, { "epoch": 0.4190775157849677, "grad_norm": 0.244029238820076, "learning_rate": 3.6030540229328434e-05, "loss": 0.1248, "step": 23496 }, { "epoch": 0.4190953519066814, "grad_norm": 0.2946047782897949, "learning_rate": 3.6029143400876e-05, "loss": 0.0984, "step": 23497 }, { "epoch": 0.4191131880283951, "grad_norm": 0.23965643346309662, "learning_rate": 3.60277465296708e-05, "loss": 0.1635, "step": 23498 }, { "epoch": 0.4191310241501088, "grad_norm": 0.21011687815189362, "learning_rate": 3.602634961571825e-05, "loss": 0.1697, "step": 23499 }, { "epoch": 0.4191488602718225, "grad_norm": 0.37227901816368103, "learning_rate": 3.6024952659023756e-05, "loss": 0.162, "step": 23500 }, { "epoch": 0.4191666963935362, "grad_norm": 0.22852960228919983, "learning_rate": 3.6023555659592744e-05, "loss": 0.1501, "step": 23501 }, { "epoch": 0.4191845325152499, "grad_norm": 0.23039503395557404, "learning_rate": 3.602215861743062e-05, "loss": 0.1374, "step": 23502 }, { "epoch": 0.4192023686369636, "grad_norm": 0.20760031044483185, "learning_rate": 3.602076153254279e-05, "loss": 0.1252, "step": 23503 }, { "epoch": 0.4192202047586773, "grad_norm": 0.22538182139396667, "learning_rate": 3.60193644049347e-05, "loss": 0.1374, "step": 23504 }, { "epoch": 0.41923804088039096, "grad_norm": 0.2839062213897705, "learning_rate": 3.601796723461174e-05, "loss": 0.1305, "step": 23505 }, { "epoch": 0.41925587700210465, "grad_norm": 0.37018483877182007, "learning_rate": 3.601657002157934e-05, "loss": 0.1736, "step": 23506 }, { "epoch": 0.41927371312381834, "grad_norm": 0.22349640727043152, "learning_rate": 3.60151727658429e-05, "loss": 0.1743, "step": 23507 }, { "epoch": 0.419291549245532, "grad_norm": 0.28234776854515076, "learning_rate": 3.601377546740785e-05, "loss": 0.1752, "step": 23508 }, { "epoch": 0.41930938536724577, "grad_norm": 0.26952725648880005, "learning_rate": 3.60123781262796e-05, "loss": 0.1789, "step": 23509 }, { "epoch": 0.41932722148895946, "grad_norm": 0.2512272298336029, "learning_rate": 3.601098074246357e-05, "loss": 0.103, "step": 23510 }, { "epoch": 0.41934505761067314, "grad_norm": 0.2429272085428238, "learning_rate": 3.600958331596517e-05, "loss": 0.184, "step": 23511 }, { "epoch": 0.41936289373238683, "grad_norm": 0.2635399103164673, "learning_rate": 3.600818584678983e-05, "loss": 0.1704, "step": 23512 }, { "epoch": 0.4193807298541005, "grad_norm": 0.2201332449913025, "learning_rate": 3.600678833494296e-05, "loss": 0.1292, "step": 23513 }, { "epoch": 0.4193985659758142, "grad_norm": 0.27403369545936584, "learning_rate": 3.600539078042998e-05, "loss": 0.1675, "step": 23514 }, { "epoch": 0.4194164020975279, "grad_norm": 0.34280261397361755, "learning_rate": 3.6003993183256293e-05, "loss": 0.1432, "step": 23515 }, { "epoch": 0.4194342382192416, "grad_norm": 0.3418848514556885, "learning_rate": 3.6002595543427336e-05, "loss": 0.185, "step": 23516 }, { "epoch": 0.4194520743409553, "grad_norm": 0.287748783826828, "learning_rate": 3.6001197860948515e-05, "loss": 0.1434, "step": 23517 }, { "epoch": 0.419469910462669, "grad_norm": 0.26222291588783264, "learning_rate": 3.599980013582525e-05, "loss": 0.1789, "step": 23518 }, { "epoch": 0.4194877465843827, "grad_norm": 0.20043300092220306, "learning_rate": 3.5998402368062964e-05, "loss": 0.1676, "step": 23519 }, { "epoch": 0.4195055827060964, "grad_norm": 0.38191646337509155, "learning_rate": 3.599700455766707e-05, "loss": 0.1887, "step": 23520 }, { "epoch": 0.4195234188278101, "grad_norm": 0.3139938414096832, "learning_rate": 3.599560670464299e-05, "loss": 0.1453, "step": 23521 }, { "epoch": 0.41954125494952377, "grad_norm": 0.31105825304985046, "learning_rate": 3.599420880899614e-05, "loss": 0.1366, "step": 23522 }, { "epoch": 0.41955909107123746, "grad_norm": 0.19444000720977783, "learning_rate": 3.599281087073194e-05, "loss": 0.1115, "step": 23523 }, { "epoch": 0.41957692719295114, "grad_norm": 0.2868615984916687, "learning_rate": 3.5991412889855804e-05, "loss": 0.2068, "step": 23524 }, { "epoch": 0.4195947633146649, "grad_norm": 0.24259425699710846, "learning_rate": 3.599001486637315e-05, "loss": 0.1991, "step": 23525 }, { "epoch": 0.4196125994363786, "grad_norm": 0.19913876056671143, "learning_rate": 3.598861680028942e-05, "loss": 0.1354, "step": 23526 }, { "epoch": 0.41963043555809226, "grad_norm": 0.24840469658374786, "learning_rate": 3.598721869161001e-05, "loss": 0.1858, "step": 23527 }, { "epoch": 0.41964827167980595, "grad_norm": 0.3437330424785614, "learning_rate": 3.598582054034034e-05, "loss": 0.1271, "step": 23528 }, { "epoch": 0.41966610780151964, "grad_norm": 0.25164932012557983, "learning_rate": 3.5984422346485835e-05, "loss": 0.14, "step": 23529 }, { "epoch": 0.4196839439232333, "grad_norm": 0.2910948395729065, "learning_rate": 3.5983024110051924e-05, "loss": 0.1735, "step": 23530 }, { "epoch": 0.419701780044947, "grad_norm": 0.22296029329299927, "learning_rate": 3.598162583104401e-05, "loss": 0.1106, "step": 23531 }, { "epoch": 0.4197196161666607, "grad_norm": 0.29615527391433716, "learning_rate": 3.598022750946752e-05, "loss": 0.1809, "step": 23532 }, { "epoch": 0.4197374522883744, "grad_norm": 0.29448026418685913, "learning_rate": 3.597882914532788e-05, "loss": 0.1659, "step": 23533 }, { "epoch": 0.41975528841008813, "grad_norm": 0.26123934984207153, "learning_rate": 3.5977430738630505e-05, "loss": 0.1791, "step": 23534 }, { "epoch": 0.4197731245318018, "grad_norm": 0.3136507570743561, "learning_rate": 3.597603228938082e-05, "loss": 0.1743, "step": 23535 }, { "epoch": 0.4197909606535155, "grad_norm": 0.24377232789993286, "learning_rate": 3.597463379758424e-05, "loss": 0.1107, "step": 23536 }, { "epoch": 0.4198087967752292, "grad_norm": 0.2058449238538742, "learning_rate": 3.5973235263246184e-05, "loss": 0.1008, "step": 23537 }, { "epoch": 0.4198266328969429, "grad_norm": 0.4670116901397705, "learning_rate": 3.597183668637209e-05, "loss": 0.1907, "step": 23538 }, { "epoch": 0.4198444690186566, "grad_norm": 0.26883623003959656, "learning_rate": 3.597043806696735e-05, "loss": 0.1781, "step": 23539 }, { "epoch": 0.41986230514037026, "grad_norm": 0.27269601821899414, "learning_rate": 3.596903940503742e-05, "loss": 0.1546, "step": 23540 }, { "epoch": 0.41988014126208395, "grad_norm": 0.2307538241147995, "learning_rate": 3.5967640700587693e-05, "loss": 0.1669, "step": 23541 }, { "epoch": 0.4198979773837977, "grad_norm": 0.26575759053230286, "learning_rate": 3.5966241953623614e-05, "loss": 0.0946, "step": 23542 }, { "epoch": 0.4199158135055114, "grad_norm": 0.2895721197128296, "learning_rate": 3.596484316415058e-05, "loss": 0.179, "step": 23543 }, { "epoch": 0.41993364962722507, "grad_norm": 0.27788469195365906, "learning_rate": 3.596344433217404e-05, "loss": 0.2152, "step": 23544 }, { "epoch": 0.41995148574893876, "grad_norm": 0.42665326595306396, "learning_rate": 3.596204545769939e-05, "loss": 0.2037, "step": 23545 }, { "epoch": 0.41996932187065245, "grad_norm": 0.21723249554634094, "learning_rate": 3.596064654073207e-05, "loss": 0.0994, "step": 23546 }, { "epoch": 0.41998715799236613, "grad_norm": 0.21885307133197784, "learning_rate": 3.5959247581277513e-05, "loss": 0.1357, "step": 23547 }, { "epoch": 0.4200049941140798, "grad_norm": 0.30911707878112793, "learning_rate": 3.595784857934111e-05, "loss": 0.1402, "step": 23548 }, { "epoch": 0.4200228302357935, "grad_norm": 0.23402488231658936, "learning_rate": 3.5956449534928304e-05, "loss": 0.1537, "step": 23549 }, { "epoch": 0.4200406663575072, "grad_norm": 0.18036618828773499, "learning_rate": 3.595505044804452e-05, "loss": 0.1235, "step": 23550 }, { "epoch": 0.42005850247922094, "grad_norm": 0.488210529088974, "learning_rate": 3.595365131869518e-05, "loss": 0.1756, "step": 23551 }, { "epoch": 0.42007633860093463, "grad_norm": 0.29397720098495483, "learning_rate": 3.595225214688569e-05, "loss": 0.1681, "step": 23552 }, { "epoch": 0.4200941747226483, "grad_norm": 0.453151136636734, "learning_rate": 3.59508529326215e-05, "loss": 0.1668, "step": 23553 }, { "epoch": 0.420112010844362, "grad_norm": 0.3504691421985626, "learning_rate": 3.5949453675908016e-05, "loss": 0.2063, "step": 23554 }, { "epoch": 0.4201298469660757, "grad_norm": 0.3381533920764923, "learning_rate": 3.594805437675067e-05, "loss": 0.178, "step": 23555 }, { "epoch": 0.4201476830877894, "grad_norm": 0.2532208561897278, "learning_rate": 3.5946655035154886e-05, "loss": 0.1979, "step": 23556 }, { "epoch": 0.42016551920950307, "grad_norm": 0.308187335729599, "learning_rate": 3.5945255651126085e-05, "loss": 0.1998, "step": 23557 }, { "epoch": 0.42018335533121676, "grad_norm": 0.2154446244239807, "learning_rate": 3.5943856224669695e-05, "loss": 0.0835, "step": 23558 }, { "epoch": 0.4202011914529305, "grad_norm": 0.3717007040977478, "learning_rate": 3.5942456755791124e-05, "loss": 0.2221, "step": 23559 }, { "epoch": 0.4202190275746442, "grad_norm": 0.2461913526058197, "learning_rate": 3.5941057244495826e-05, "loss": 0.1213, "step": 23560 }, { "epoch": 0.4202368636963579, "grad_norm": 0.27049311995506287, "learning_rate": 3.593965769078921e-05, "loss": 0.1733, "step": 23561 }, { "epoch": 0.42025469981807156, "grad_norm": 0.22452659904956818, "learning_rate": 3.59382580946767e-05, "loss": 0.1562, "step": 23562 }, { "epoch": 0.42027253593978525, "grad_norm": 0.314168781042099, "learning_rate": 3.593685845616372e-05, "loss": 0.1276, "step": 23563 }, { "epoch": 0.42029037206149894, "grad_norm": 0.26814785599708557, "learning_rate": 3.593545877525571e-05, "loss": 0.1752, "step": 23564 }, { "epoch": 0.42030820818321263, "grad_norm": 0.24847298860549927, "learning_rate": 3.593405905195807e-05, "loss": 0.1444, "step": 23565 }, { "epoch": 0.4203260443049263, "grad_norm": 0.33460527658462524, "learning_rate": 3.593265928627625e-05, "loss": 0.1459, "step": 23566 }, { "epoch": 0.42034388042664, "grad_norm": 0.2722640931606293, "learning_rate": 3.593125947821566e-05, "loss": 0.1548, "step": 23567 }, { "epoch": 0.42036171654835375, "grad_norm": 0.26249417662620544, "learning_rate": 3.592985962778174e-05, "loss": 0.1591, "step": 23568 }, { "epoch": 0.42037955267006744, "grad_norm": 0.24926874041557312, "learning_rate": 3.5928459734979915e-05, "loss": 0.1344, "step": 23569 }, { "epoch": 0.4203973887917811, "grad_norm": 0.26802873611450195, "learning_rate": 3.5927059799815595e-05, "loss": 0.1956, "step": 23570 }, { "epoch": 0.4204152249134948, "grad_norm": 0.5149135589599609, "learning_rate": 3.592565982229422e-05, "loss": 0.1341, "step": 23571 }, { "epoch": 0.4204330610352085, "grad_norm": 0.20482110977172852, "learning_rate": 3.592425980242121e-05, "loss": 0.1312, "step": 23572 }, { "epoch": 0.4204508971569222, "grad_norm": 0.28916677832603455, "learning_rate": 3.5922859740202e-05, "loss": 0.136, "step": 23573 }, { "epoch": 0.4204687332786359, "grad_norm": 0.2726137936115265, "learning_rate": 3.592145963564201e-05, "loss": 0.1812, "step": 23574 }, { "epoch": 0.42048656940034956, "grad_norm": 0.21922151744365692, "learning_rate": 3.5920059488746674e-05, "loss": 0.1384, "step": 23575 }, { "epoch": 0.4205044055220633, "grad_norm": 0.26206812262535095, "learning_rate": 3.5918659299521414e-05, "loss": 0.1503, "step": 23576 }, { "epoch": 0.420522241643777, "grad_norm": 0.24848762154579163, "learning_rate": 3.591725906797166e-05, "loss": 0.1702, "step": 23577 }, { "epoch": 0.4205400777654907, "grad_norm": 0.25790759921073914, "learning_rate": 3.591585879410284e-05, "loss": 0.1202, "step": 23578 }, { "epoch": 0.42055791388720437, "grad_norm": 0.2853497564792633, "learning_rate": 3.591445847792038e-05, "loss": 0.1392, "step": 23579 }, { "epoch": 0.42057575000891806, "grad_norm": 0.29755979776382446, "learning_rate": 3.5913058119429706e-05, "loss": 0.1564, "step": 23580 }, { "epoch": 0.42059358613063175, "grad_norm": 0.25221002101898193, "learning_rate": 3.591165771863625e-05, "loss": 0.1818, "step": 23581 }, { "epoch": 0.42061142225234543, "grad_norm": 0.2708946168422699, "learning_rate": 3.5910257275545445e-05, "loss": 0.1515, "step": 23582 }, { "epoch": 0.4206292583740591, "grad_norm": 0.24300266802310944, "learning_rate": 3.590885679016271e-05, "loss": 0.1553, "step": 23583 }, { "epoch": 0.42064709449577287, "grad_norm": 0.2931252121925354, "learning_rate": 3.5907456262493485e-05, "loss": 0.1711, "step": 23584 }, { "epoch": 0.42066493061748655, "grad_norm": 0.3231823742389679, "learning_rate": 3.5906055692543186e-05, "loss": 0.2082, "step": 23585 }, { "epoch": 0.42068276673920024, "grad_norm": 0.25302112102508545, "learning_rate": 3.590465508031725e-05, "loss": 0.1334, "step": 23586 }, { "epoch": 0.42070060286091393, "grad_norm": 0.25858011841773987, "learning_rate": 3.59032544258211e-05, "loss": 0.1754, "step": 23587 }, { "epoch": 0.4207184389826276, "grad_norm": 0.30044376850128174, "learning_rate": 3.590185372906018e-05, "loss": 0.1398, "step": 23588 }, { "epoch": 0.4207362751043413, "grad_norm": 0.2664319574832916, "learning_rate": 3.59004529900399e-05, "loss": 0.1316, "step": 23589 }, { "epoch": 0.420754111226055, "grad_norm": 0.23234841227531433, "learning_rate": 3.58990522087657e-05, "loss": 0.1166, "step": 23590 }, { "epoch": 0.4207719473477687, "grad_norm": 0.3134883642196655, "learning_rate": 3.589765138524301e-05, "loss": 0.2265, "step": 23591 }, { "epoch": 0.42078978346948237, "grad_norm": 0.21505266427993774, "learning_rate": 3.589625051947727e-05, "loss": 0.1607, "step": 23592 }, { "epoch": 0.4208076195911961, "grad_norm": 0.238050639629364, "learning_rate": 3.589484961147389e-05, "loss": 0.1752, "step": 23593 }, { "epoch": 0.4208254557129098, "grad_norm": 0.2429279386997223, "learning_rate": 3.5893448661238305e-05, "loss": 0.1541, "step": 23594 }, { "epoch": 0.4208432918346235, "grad_norm": 0.3417379856109619, "learning_rate": 3.5892047668775964e-05, "loss": 0.1447, "step": 23595 }, { "epoch": 0.4208611279563372, "grad_norm": 0.31697213649749756, "learning_rate": 3.589064663409227e-05, "loss": 0.1602, "step": 23596 }, { "epoch": 0.42087896407805087, "grad_norm": 0.2814241051673889, "learning_rate": 3.588924555719268e-05, "loss": 0.1461, "step": 23597 }, { "epoch": 0.42089680019976455, "grad_norm": 0.4176954925060272, "learning_rate": 3.588784443808261e-05, "loss": 0.191, "step": 23598 }, { "epoch": 0.42091463632147824, "grad_norm": 0.23985715210437775, "learning_rate": 3.588644327676749e-05, "loss": 0.1549, "step": 23599 }, { "epoch": 0.42093247244319193, "grad_norm": 0.32432979345321655, "learning_rate": 3.588504207325276e-05, "loss": 0.1343, "step": 23600 }, { "epoch": 0.4209503085649057, "grad_norm": 0.22184689342975616, "learning_rate": 3.588364082754384e-05, "loss": 0.1881, "step": 23601 }, { "epoch": 0.42096814468661936, "grad_norm": 0.24987797439098358, "learning_rate": 3.588223953964618e-05, "loss": 0.1482, "step": 23602 }, { "epoch": 0.42098598080833305, "grad_norm": 0.3044281601905823, "learning_rate": 3.5880838209565195e-05, "loss": 0.1386, "step": 23603 }, { "epoch": 0.42100381693004674, "grad_norm": 0.2577959895133972, "learning_rate": 3.5879436837306325e-05, "loss": 0.1497, "step": 23604 }, { "epoch": 0.4210216530517604, "grad_norm": 0.2630673050880432, "learning_rate": 3.5878035422875e-05, "loss": 0.1295, "step": 23605 }, { "epoch": 0.4210394891734741, "grad_norm": 0.22514985501766205, "learning_rate": 3.5876633966276645e-05, "loss": 0.1306, "step": 23606 }, { "epoch": 0.4210573252951878, "grad_norm": 0.32045015692710876, "learning_rate": 3.5875232467516704e-05, "loss": 0.1343, "step": 23607 }, { "epoch": 0.4210751614169015, "grad_norm": 0.234904944896698, "learning_rate": 3.587383092660062e-05, "loss": 0.1422, "step": 23608 }, { "epoch": 0.4210929975386152, "grad_norm": 0.20119354128837585, "learning_rate": 3.5872429343533793e-05, "loss": 0.1428, "step": 23609 }, { "epoch": 0.4211108336603289, "grad_norm": 0.23028001189231873, "learning_rate": 3.587102771832168e-05, "loss": 0.1737, "step": 23610 }, { "epoch": 0.4211286697820426, "grad_norm": 0.18881520628929138, "learning_rate": 3.586962605096971e-05, "loss": 0.1355, "step": 23611 }, { "epoch": 0.4211465059037563, "grad_norm": 0.2693285048007965, "learning_rate": 3.586822434148332e-05, "loss": 0.1229, "step": 23612 }, { "epoch": 0.42116434202547, "grad_norm": 0.29025503993034363, "learning_rate": 3.586682258986793e-05, "loss": 0.166, "step": 23613 }, { "epoch": 0.4211821781471837, "grad_norm": 0.271465539932251, "learning_rate": 3.586542079612899e-05, "loss": 0.126, "step": 23614 }, { "epoch": 0.42120001426889736, "grad_norm": 0.2229209691286087, "learning_rate": 3.586401896027192e-05, "loss": 0.1378, "step": 23615 }, { "epoch": 0.42121785039061105, "grad_norm": 0.24449051916599274, "learning_rate": 3.5862617082302164e-05, "loss": 0.1703, "step": 23616 }, { "epoch": 0.42123568651232474, "grad_norm": 0.25472691655158997, "learning_rate": 3.586121516222515e-05, "loss": 0.1469, "step": 23617 }, { "epoch": 0.4212535226340385, "grad_norm": 0.20706047117710114, "learning_rate": 3.585981320004632e-05, "loss": 0.1583, "step": 23618 }, { "epoch": 0.42127135875575217, "grad_norm": 0.34037670493125916, "learning_rate": 3.58584111957711e-05, "loss": 0.1485, "step": 23619 }, { "epoch": 0.42128919487746586, "grad_norm": 0.32346442341804504, "learning_rate": 3.5857009149404927e-05, "loss": 0.1611, "step": 23620 }, { "epoch": 0.42130703099917954, "grad_norm": 0.25114670395851135, "learning_rate": 3.585560706095323e-05, "loss": 0.1636, "step": 23621 }, { "epoch": 0.42132486712089323, "grad_norm": 0.2218189686536789, "learning_rate": 3.585420493042146e-05, "loss": 0.1573, "step": 23622 }, { "epoch": 0.4213427032426069, "grad_norm": 0.2421419471502304, "learning_rate": 3.5852802757815044e-05, "loss": 0.1961, "step": 23623 }, { "epoch": 0.4213605393643206, "grad_norm": 0.3725896179676056, "learning_rate": 3.585140054313941e-05, "loss": 0.1619, "step": 23624 }, { "epoch": 0.4213783754860343, "grad_norm": 0.24416519701480865, "learning_rate": 3.5849998286400005e-05, "loss": 0.174, "step": 23625 }, { "epoch": 0.421396211607748, "grad_norm": 0.26666975021362305, "learning_rate": 3.584859598760225e-05, "loss": 0.156, "step": 23626 }, { "epoch": 0.4214140477294617, "grad_norm": 0.1893339902162552, "learning_rate": 3.58471936467516e-05, "loss": 0.1152, "step": 23627 }, { "epoch": 0.4214318838511754, "grad_norm": 0.29018595814704895, "learning_rate": 3.584579126385347e-05, "loss": 0.1275, "step": 23628 }, { "epoch": 0.4214497199728891, "grad_norm": 0.30612584948539734, "learning_rate": 3.5844388838913316e-05, "loss": 0.1721, "step": 23629 }, { "epoch": 0.4214675560946028, "grad_norm": 0.2823601961135864, "learning_rate": 3.584298637193656e-05, "loss": 0.1406, "step": 23630 }, { "epoch": 0.4214853922163165, "grad_norm": 0.2962504029273987, "learning_rate": 3.584158386292865e-05, "loss": 0.1744, "step": 23631 }, { "epoch": 0.42150322833803017, "grad_norm": 0.24475839734077454, "learning_rate": 3.584018131189502e-05, "loss": 0.1654, "step": 23632 }, { "epoch": 0.42152106445974385, "grad_norm": 0.45434901118278503, "learning_rate": 3.583877871884109e-05, "loss": 0.191, "step": 23633 }, { "epoch": 0.42153890058145754, "grad_norm": 0.2057182639837265, "learning_rate": 3.5837376083772315e-05, "loss": 0.1663, "step": 23634 }, { "epoch": 0.4215567367031713, "grad_norm": 0.2028048038482666, "learning_rate": 3.583597340669413e-05, "loss": 0.1277, "step": 23635 }, { "epoch": 0.421574572824885, "grad_norm": 0.45485812425613403, "learning_rate": 3.583457068761197e-05, "loss": 0.1454, "step": 23636 }, { "epoch": 0.42159240894659866, "grad_norm": 0.253262996673584, "learning_rate": 3.583316792653127e-05, "loss": 0.1633, "step": 23637 }, { "epoch": 0.42161024506831235, "grad_norm": 0.2653898000717163, "learning_rate": 3.5831765123457474e-05, "loss": 0.1603, "step": 23638 }, { "epoch": 0.42162808119002604, "grad_norm": 0.326353520154953, "learning_rate": 3.5830362278396004e-05, "loss": 0.1441, "step": 23639 }, { "epoch": 0.4216459173117397, "grad_norm": 0.34983694553375244, "learning_rate": 3.582895939135232e-05, "loss": 0.1871, "step": 23640 }, { "epoch": 0.4216637534334534, "grad_norm": 0.3122880458831787, "learning_rate": 3.582755646233185e-05, "loss": 0.1882, "step": 23641 }, { "epoch": 0.4216815895551671, "grad_norm": 0.2588161528110504, "learning_rate": 3.582615349134002e-05, "loss": 0.1258, "step": 23642 }, { "epoch": 0.42169942567688085, "grad_norm": 0.2501724660396576, "learning_rate": 3.582475047838229e-05, "loss": 0.117, "step": 23643 }, { "epoch": 0.42171726179859453, "grad_norm": 0.19072869420051575, "learning_rate": 3.582334742346408e-05, "loss": 0.1248, "step": 23644 }, { "epoch": 0.4217350979203082, "grad_norm": 0.29543107748031616, "learning_rate": 3.5821944326590836e-05, "loss": 0.1902, "step": 23645 }, { "epoch": 0.4217529340420219, "grad_norm": 0.30165714025497437, "learning_rate": 3.5820541187768006e-05, "loss": 0.1254, "step": 23646 }, { "epoch": 0.4217707701637356, "grad_norm": 0.29045385122299194, "learning_rate": 3.581913800700103e-05, "loss": 0.0816, "step": 23647 }, { "epoch": 0.4217886062854493, "grad_norm": 0.24130268394947052, "learning_rate": 3.581773478429532e-05, "loss": 0.1064, "step": 23648 }, { "epoch": 0.421806442407163, "grad_norm": 0.24183712899684906, "learning_rate": 3.581633151965634e-05, "loss": 0.2029, "step": 23649 }, { "epoch": 0.42182427852887666, "grad_norm": 0.32043445110321045, "learning_rate": 3.581492821308953e-05, "loss": 0.1578, "step": 23650 }, { "epoch": 0.42184211465059035, "grad_norm": 0.22469887137413025, "learning_rate": 3.581352486460031e-05, "loss": 0.1023, "step": 23651 }, { "epoch": 0.4218599507723041, "grad_norm": 0.23213882744312286, "learning_rate": 3.581212147419414e-05, "loss": 0.1526, "step": 23652 }, { "epoch": 0.4218777868940178, "grad_norm": 0.3670051097869873, "learning_rate": 3.581071804187646e-05, "loss": 0.1764, "step": 23653 }, { "epoch": 0.42189562301573147, "grad_norm": 0.262602835893631, "learning_rate": 3.580931456765269e-05, "loss": 0.1891, "step": 23654 }, { "epoch": 0.42191345913744516, "grad_norm": 0.22695784270763397, "learning_rate": 3.580791105152829e-05, "loss": 0.1318, "step": 23655 }, { "epoch": 0.42193129525915885, "grad_norm": 0.2450912594795227, "learning_rate": 3.580650749350869e-05, "loss": 0.1625, "step": 23656 }, { "epoch": 0.42194913138087253, "grad_norm": 0.21248646080493927, "learning_rate": 3.580510389359934e-05, "loss": 0.1585, "step": 23657 }, { "epoch": 0.4219669675025862, "grad_norm": 0.41088610887527466, "learning_rate": 3.5803700251805674e-05, "loss": 0.1674, "step": 23658 }, { "epoch": 0.4219848036242999, "grad_norm": 0.2377018928527832, "learning_rate": 3.580229656813313e-05, "loss": 0.1573, "step": 23659 }, { "epoch": 0.42200263974601365, "grad_norm": 0.26952099800109863, "learning_rate": 3.580089284258716e-05, "loss": 0.1353, "step": 23660 }, { "epoch": 0.42202047586772734, "grad_norm": 0.28703954815864563, "learning_rate": 3.579948907517319e-05, "loss": 0.144, "step": 23661 }, { "epoch": 0.42203831198944103, "grad_norm": 0.22947105765342712, "learning_rate": 3.579808526589668e-05, "loss": 0.1239, "step": 23662 }, { "epoch": 0.4220561481111547, "grad_norm": 0.2878875434398651, "learning_rate": 3.579668141476305e-05, "loss": 0.2091, "step": 23663 }, { "epoch": 0.4220739842328684, "grad_norm": 0.19706681370735168, "learning_rate": 3.579527752177777e-05, "loss": 0.1599, "step": 23664 }, { "epoch": 0.4220918203545821, "grad_norm": 0.2394402027130127, "learning_rate": 3.579387358694625e-05, "loss": 0.1409, "step": 23665 }, { "epoch": 0.4221096564762958, "grad_norm": 0.2105996161699295, "learning_rate": 3.579246961027396e-05, "loss": 0.2027, "step": 23666 }, { "epoch": 0.42212749259800947, "grad_norm": 0.18609221279621124, "learning_rate": 3.579106559176632e-05, "loss": 0.131, "step": 23667 }, { "epoch": 0.42214532871972316, "grad_norm": 0.24411694705486298, "learning_rate": 3.578966153142879e-05, "loss": 0.1485, "step": 23668 }, { "epoch": 0.4221631648414369, "grad_norm": 0.2051008641719818, "learning_rate": 3.5788257429266804e-05, "loss": 0.1254, "step": 23669 }, { "epoch": 0.4221810009631506, "grad_norm": 0.27412158250808716, "learning_rate": 3.5786853285285805e-05, "loss": 0.1747, "step": 23670 }, { "epoch": 0.4221988370848643, "grad_norm": 0.2188235968351364, "learning_rate": 3.578544909949123e-05, "loss": 0.1357, "step": 23671 }, { "epoch": 0.42221667320657796, "grad_norm": 0.226033017039299, "learning_rate": 3.578404487188854e-05, "loss": 0.1606, "step": 23672 }, { "epoch": 0.42223450932829165, "grad_norm": 0.2508758306503296, "learning_rate": 3.5782640602483166e-05, "loss": 0.1499, "step": 23673 }, { "epoch": 0.42225234545000534, "grad_norm": 0.31331875920295715, "learning_rate": 3.578123629128055e-05, "loss": 0.1627, "step": 23674 }, { "epoch": 0.422270181571719, "grad_norm": 0.3454189598560333, "learning_rate": 3.577983193828615e-05, "loss": 0.1701, "step": 23675 }, { "epoch": 0.4222880176934327, "grad_norm": 0.28553205728530884, "learning_rate": 3.5778427543505375e-05, "loss": 0.1489, "step": 23676 }, { "epoch": 0.42230585381514646, "grad_norm": 0.22232919931411743, "learning_rate": 3.5777023106943706e-05, "loss": 0.1596, "step": 23677 }, { "epoch": 0.42232368993686015, "grad_norm": 0.2846347987651825, "learning_rate": 3.577561862860657e-05, "loss": 0.1272, "step": 23678 }, { "epoch": 0.42234152605857384, "grad_norm": 0.3221072554588318, "learning_rate": 3.577421410849942e-05, "loss": 0.1931, "step": 23679 }, { "epoch": 0.4223593621802875, "grad_norm": 0.21035178005695343, "learning_rate": 3.577280954662769e-05, "loss": 0.167, "step": 23680 }, { "epoch": 0.4223771983020012, "grad_norm": 0.2143920660018921, "learning_rate": 3.5771404942996825e-05, "loss": 0.126, "step": 23681 }, { "epoch": 0.4223950344237149, "grad_norm": 0.5835981369018555, "learning_rate": 3.577000029761228e-05, "loss": 0.151, "step": 23682 }, { "epoch": 0.4224128705454286, "grad_norm": 0.38990381360054016, "learning_rate": 3.5768595610479496e-05, "loss": 0.1893, "step": 23683 }, { "epoch": 0.4224307066671423, "grad_norm": 0.28467774391174316, "learning_rate": 3.5767190881603904e-05, "loss": 0.1568, "step": 23684 }, { "epoch": 0.422448542788856, "grad_norm": 0.27227291464805603, "learning_rate": 3.576578611099097e-05, "loss": 0.127, "step": 23685 }, { "epoch": 0.4224663789105697, "grad_norm": 0.2288699597120285, "learning_rate": 3.576438129864613e-05, "loss": 0.1356, "step": 23686 }, { "epoch": 0.4224842150322834, "grad_norm": 0.2638051211833954, "learning_rate": 3.5762976444574835e-05, "loss": 0.1392, "step": 23687 }, { "epoch": 0.4225020511539971, "grad_norm": 0.30850347876548767, "learning_rate": 3.576157154878253e-05, "loss": 0.1803, "step": 23688 }, { "epoch": 0.42251988727571077, "grad_norm": 0.2594379782676697, "learning_rate": 3.5760166611274646e-05, "loss": 0.1249, "step": 23689 }, { "epoch": 0.42253772339742446, "grad_norm": 0.27456754446029663, "learning_rate": 3.575876163205664e-05, "loss": 0.2294, "step": 23690 }, { "epoch": 0.42255555951913815, "grad_norm": 0.2205992043018341, "learning_rate": 3.575735661113396e-05, "loss": 0.1522, "step": 23691 }, { "epoch": 0.42257339564085183, "grad_norm": 0.4007713496685028, "learning_rate": 3.575595154851205e-05, "loss": 0.1421, "step": 23692 }, { "epoch": 0.4225912317625655, "grad_norm": 0.2664162516593933, "learning_rate": 3.575454644419636e-05, "loss": 0.1366, "step": 23693 }, { "epoch": 0.42260906788427927, "grad_norm": 0.2566847503185272, "learning_rate": 3.575314129819233e-05, "loss": 0.1533, "step": 23694 }, { "epoch": 0.42262690400599295, "grad_norm": 0.30679431557655334, "learning_rate": 3.575173611050541e-05, "loss": 0.1864, "step": 23695 }, { "epoch": 0.42264474012770664, "grad_norm": 0.21846045553684235, "learning_rate": 3.575033088114105e-05, "loss": 0.143, "step": 23696 }, { "epoch": 0.42266257624942033, "grad_norm": 0.23655980825424194, "learning_rate": 3.5748925610104694e-05, "loss": 0.1118, "step": 23697 }, { "epoch": 0.422680412371134, "grad_norm": 0.21267226338386536, "learning_rate": 3.574752029740179e-05, "loss": 0.1122, "step": 23698 }, { "epoch": 0.4226982484928477, "grad_norm": 0.28920501470565796, "learning_rate": 3.574611494303778e-05, "loss": 0.148, "step": 23699 }, { "epoch": 0.4227160846145614, "grad_norm": 0.20284847915172577, "learning_rate": 3.574470954701812e-05, "loss": 0.1458, "step": 23700 }, { "epoch": 0.4227339207362751, "grad_norm": 0.3121538758277893, "learning_rate": 3.5743304109348265e-05, "loss": 0.1543, "step": 23701 }, { "epoch": 0.4227517568579888, "grad_norm": 0.2441626340150833, "learning_rate": 3.5741898630033635e-05, "loss": 0.1919, "step": 23702 }, { "epoch": 0.4227695929797025, "grad_norm": 0.3428318202495575, "learning_rate": 3.574049310907971e-05, "loss": 0.1634, "step": 23703 }, { "epoch": 0.4227874291014162, "grad_norm": 0.23114174604415894, "learning_rate": 3.573908754649192e-05, "loss": 0.1234, "step": 23704 }, { "epoch": 0.4228052652231299, "grad_norm": 0.2092934101819992, "learning_rate": 3.5737681942275713e-05, "loss": 0.1422, "step": 23705 }, { "epoch": 0.4228231013448436, "grad_norm": 0.35993555188179016, "learning_rate": 3.573627629643655e-05, "loss": 0.1261, "step": 23706 }, { "epoch": 0.42284093746655727, "grad_norm": 0.24289120733737946, "learning_rate": 3.573487060897987e-05, "loss": 0.1643, "step": 23707 }, { "epoch": 0.42285877358827095, "grad_norm": 0.28490716218948364, "learning_rate": 3.573346487991111e-05, "loss": 0.1943, "step": 23708 }, { "epoch": 0.42287660970998464, "grad_norm": 0.25983208417892456, "learning_rate": 3.573205910923575e-05, "loss": 0.1496, "step": 23709 }, { "epoch": 0.42289444583169833, "grad_norm": 0.4670659899711609, "learning_rate": 3.573065329695921e-05, "loss": 0.1323, "step": 23710 }, { "epoch": 0.4229122819534121, "grad_norm": 0.24420194327831268, "learning_rate": 3.572924744308696e-05, "loss": 0.1622, "step": 23711 }, { "epoch": 0.42293011807512576, "grad_norm": 0.314409464597702, "learning_rate": 3.572784154762443e-05, "loss": 0.1883, "step": 23712 }, { "epoch": 0.42294795419683945, "grad_norm": 0.27840864658355713, "learning_rate": 3.572643561057709e-05, "loss": 0.1836, "step": 23713 }, { "epoch": 0.42296579031855314, "grad_norm": 0.31840789318084717, "learning_rate": 3.572502963195039e-05, "loss": 0.1551, "step": 23714 }, { "epoch": 0.4229836264402668, "grad_norm": 0.3444361984729767, "learning_rate": 3.5723623611749754e-05, "loss": 0.1482, "step": 23715 }, { "epoch": 0.4230014625619805, "grad_norm": 0.39516517519950867, "learning_rate": 3.572221754998066e-05, "loss": 0.2096, "step": 23716 }, { "epoch": 0.4230192986836942, "grad_norm": 0.20691877603530884, "learning_rate": 3.5720811446648546e-05, "loss": 0.1584, "step": 23717 }, { "epoch": 0.4230371348054079, "grad_norm": 0.18600843846797943, "learning_rate": 3.571940530175886e-05, "loss": 0.1406, "step": 23718 }, { "epoch": 0.42305497092712163, "grad_norm": 0.26819100975990295, "learning_rate": 3.5717999115317054e-05, "loss": 0.1171, "step": 23719 }, { "epoch": 0.4230728070488353, "grad_norm": 0.27075424790382385, "learning_rate": 3.571659288732859e-05, "loss": 0.1539, "step": 23720 }, { "epoch": 0.423090643170549, "grad_norm": 0.32275882363319397, "learning_rate": 3.5715186617798904e-05, "loss": 0.2049, "step": 23721 }, { "epoch": 0.4231084792922627, "grad_norm": 0.26822638511657715, "learning_rate": 3.5713780306733455e-05, "loss": 0.1772, "step": 23722 }, { "epoch": 0.4231263154139764, "grad_norm": 0.2827689051628113, "learning_rate": 3.571237395413769e-05, "loss": 0.2033, "step": 23723 }, { "epoch": 0.42314415153569007, "grad_norm": 0.2942645847797394, "learning_rate": 3.5710967560017074e-05, "loss": 0.1807, "step": 23724 }, { "epoch": 0.42316198765740376, "grad_norm": 0.21800316870212555, "learning_rate": 3.570956112437704e-05, "loss": 0.0937, "step": 23725 }, { "epoch": 0.42317982377911745, "grad_norm": 0.24841605126857758, "learning_rate": 3.5708154647223044e-05, "loss": 0.1602, "step": 23726 }, { "epoch": 0.42319765990083114, "grad_norm": 0.4378014802932739, "learning_rate": 3.570674812856056e-05, "loss": 0.1805, "step": 23727 }, { "epoch": 0.4232154960225449, "grad_norm": 0.3208266794681549, "learning_rate": 3.5705341568395e-05, "loss": 0.1482, "step": 23728 }, { "epoch": 0.42323333214425857, "grad_norm": 0.31566476821899414, "learning_rate": 3.570393496673186e-05, "loss": 0.1053, "step": 23729 }, { "epoch": 0.42325116826597226, "grad_norm": 0.42010727524757385, "learning_rate": 3.5702528323576556e-05, "loss": 0.1665, "step": 23730 }, { "epoch": 0.42326900438768594, "grad_norm": 0.2503792345523834, "learning_rate": 3.570112163893456e-05, "loss": 0.1813, "step": 23731 }, { "epoch": 0.42328684050939963, "grad_norm": 0.22296959161758423, "learning_rate": 3.569971491281132e-05, "loss": 0.1226, "step": 23732 }, { "epoch": 0.4233046766311133, "grad_norm": 0.22855298221111298, "learning_rate": 3.569830814521229e-05, "loss": 0.1499, "step": 23733 }, { "epoch": 0.423322512752827, "grad_norm": 0.23642213642597198, "learning_rate": 3.569690133614292e-05, "loss": 0.1453, "step": 23734 }, { "epoch": 0.4233403488745407, "grad_norm": 0.3246593177318573, "learning_rate": 3.569549448560867e-05, "loss": 0.1409, "step": 23735 }, { "epoch": 0.42335818499625444, "grad_norm": 0.24724628031253815, "learning_rate": 3.5694087593614986e-05, "loss": 0.1924, "step": 23736 }, { "epoch": 0.4233760211179681, "grad_norm": 0.3473714590072632, "learning_rate": 3.5692680660167325e-05, "loss": 0.157, "step": 23737 }, { "epoch": 0.4233938572396818, "grad_norm": 0.31693729758262634, "learning_rate": 3.569127368527114e-05, "loss": 0.1904, "step": 23738 }, { "epoch": 0.4234116933613955, "grad_norm": 0.28705716133117676, "learning_rate": 3.568986666893189e-05, "loss": 0.1326, "step": 23739 }, { "epoch": 0.4234295294831092, "grad_norm": 0.1985987275838852, "learning_rate": 3.5688459611155024e-05, "loss": 0.1121, "step": 23740 }, { "epoch": 0.4234473656048229, "grad_norm": 0.26479002833366394, "learning_rate": 3.568705251194599e-05, "loss": 0.1916, "step": 23741 }, { "epoch": 0.42346520172653657, "grad_norm": 0.35913845896720886, "learning_rate": 3.568564537131026e-05, "loss": 0.1757, "step": 23742 }, { "epoch": 0.42348303784825025, "grad_norm": 0.27993494272232056, "learning_rate": 3.568423818925327e-05, "loss": 0.1926, "step": 23743 }, { "epoch": 0.423500873969964, "grad_norm": 0.2530037760734558, "learning_rate": 3.568283096578049e-05, "loss": 0.1525, "step": 23744 }, { "epoch": 0.4235187100916777, "grad_norm": 0.319872111082077, "learning_rate": 3.568142370089735e-05, "loss": 0.1669, "step": 23745 }, { "epoch": 0.4235365462133914, "grad_norm": 0.258839875459671, "learning_rate": 3.568001639460934e-05, "loss": 0.1584, "step": 23746 }, { "epoch": 0.42355438233510506, "grad_norm": 0.31688395142555237, "learning_rate": 3.567860904692189e-05, "loss": 0.1569, "step": 23747 }, { "epoch": 0.42357221845681875, "grad_norm": 0.3236488997936249, "learning_rate": 3.567720165784046e-05, "loss": 0.1374, "step": 23748 }, { "epoch": 0.42359005457853244, "grad_norm": 0.31253373622894287, "learning_rate": 3.5675794227370516e-05, "loss": 0.1728, "step": 23749 }, { "epoch": 0.4236078907002461, "grad_norm": 0.3201337158679962, "learning_rate": 3.567438675551751e-05, "loss": 0.2051, "step": 23750 }, { "epoch": 0.4236257268219598, "grad_norm": 0.2224111407995224, "learning_rate": 3.567297924228689e-05, "loss": 0.1139, "step": 23751 }, { "epoch": 0.4236435629436735, "grad_norm": 0.24392402172088623, "learning_rate": 3.5671571687684115e-05, "loss": 0.1709, "step": 23752 }, { "epoch": 0.42366139906538725, "grad_norm": 0.2275601178407669, "learning_rate": 3.5670164091714645e-05, "loss": 0.1803, "step": 23753 }, { "epoch": 0.42367923518710093, "grad_norm": 0.21494191884994507, "learning_rate": 3.5668756454383926e-05, "loss": 0.0959, "step": 23754 }, { "epoch": 0.4236970713088146, "grad_norm": 0.34057918190956116, "learning_rate": 3.5667348775697426e-05, "loss": 0.2145, "step": 23755 }, { "epoch": 0.4237149074305283, "grad_norm": 0.2655284106731415, "learning_rate": 3.5665941055660594e-05, "loss": 0.1364, "step": 23756 }, { "epoch": 0.423732743552242, "grad_norm": 0.2598637342453003, "learning_rate": 3.5664533294278905e-05, "loss": 0.1486, "step": 23757 }, { "epoch": 0.4237505796739557, "grad_norm": 0.19027076661586761, "learning_rate": 3.566312549155778e-05, "loss": 0.1248, "step": 23758 }, { "epoch": 0.4237684157956694, "grad_norm": 0.19882620871067047, "learning_rate": 3.566171764750271e-05, "loss": 0.1455, "step": 23759 }, { "epoch": 0.42378625191738306, "grad_norm": 0.2997380793094635, "learning_rate": 3.566030976211914e-05, "loss": 0.1742, "step": 23760 }, { "epoch": 0.4238040880390968, "grad_norm": 0.22737203538417816, "learning_rate": 3.565890183541253e-05, "loss": 0.1456, "step": 23761 }, { "epoch": 0.4238219241608105, "grad_norm": 0.2445087879896164, "learning_rate": 3.5657493867388324e-05, "loss": 0.1699, "step": 23762 }, { "epoch": 0.4238397602825242, "grad_norm": 0.35739731788635254, "learning_rate": 3.5656085858052004e-05, "loss": 0.1144, "step": 23763 }, { "epoch": 0.42385759640423787, "grad_norm": 0.25468719005584717, "learning_rate": 3.565467780740901e-05, "loss": 0.1143, "step": 23764 }, { "epoch": 0.42387543252595156, "grad_norm": 0.201274573802948, "learning_rate": 3.5653269715464805e-05, "loss": 0.1523, "step": 23765 }, { "epoch": 0.42389326864766524, "grad_norm": 0.25827720761299133, "learning_rate": 3.5651861582224844e-05, "loss": 0.1229, "step": 23766 }, { "epoch": 0.42391110476937893, "grad_norm": 0.3260040879249573, "learning_rate": 3.565045340769458e-05, "loss": 0.1454, "step": 23767 }, { "epoch": 0.4239289408910926, "grad_norm": 0.25070345401763916, "learning_rate": 3.56490451918795e-05, "loss": 0.2157, "step": 23768 }, { "epoch": 0.4239467770128063, "grad_norm": 0.29251378774642944, "learning_rate": 3.564763693478503e-05, "loss": 0.2199, "step": 23769 }, { "epoch": 0.42396461313452005, "grad_norm": 0.2800651788711548, "learning_rate": 3.564622863641665e-05, "loss": 0.1551, "step": 23770 }, { "epoch": 0.42398244925623374, "grad_norm": 0.1974204033613205, "learning_rate": 3.56448202967798e-05, "loss": 0.1439, "step": 23771 }, { "epoch": 0.42400028537794743, "grad_norm": 0.27609798312187195, "learning_rate": 3.5643411915879956e-05, "loss": 0.1334, "step": 23772 }, { "epoch": 0.4240181214996611, "grad_norm": 0.32134515047073364, "learning_rate": 3.564200349372257e-05, "loss": 0.1648, "step": 23773 }, { "epoch": 0.4240359576213748, "grad_norm": 0.23624029755592346, "learning_rate": 3.5640595030313105e-05, "loss": 0.1753, "step": 23774 }, { "epoch": 0.4240537937430885, "grad_norm": 0.4308352768421173, "learning_rate": 3.5639186525657017e-05, "loss": 0.2437, "step": 23775 }, { "epoch": 0.4240716298648022, "grad_norm": 0.2978517711162567, "learning_rate": 3.563777797975977e-05, "loss": 0.2309, "step": 23776 }, { "epoch": 0.42408946598651587, "grad_norm": 0.28222936391830444, "learning_rate": 3.5636369392626813e-05, "loss": 0.1818, "step": 23777 }, { "epoch": 0.4241073021082296, "grad_norm": 0.26422178745269775, "learning_rate": 3.563496076426362e-05, "loss": 0.1565, "step": 23778 }, { "epoch": 0.4241251382299433, "grad_norm": 0.22211819887161255, "learning_rate": 3.563355209467566e-05, "loss": 0.127, "step": 23779 }, { "epoch": 0.424142974351657, "grad_norm": 0.27805814146995544, "learning_rate": 3.563214338386836e-05, "loss": 0.1514, "step": 23780 }, { "epoch": 0.4241608104733707, "grad_norm": 0.17706941068172455, "learning_rate": 3.56307346318472e-05, "loss": 0.1408, "step": 23781 }, { "epoch": 0.42417864659508436, "grad_norm": 0.2802286744117737, "learning_rate": 3.5629325838617644e-05, "loss": 0.1678, "step": 23782 }, { "epoch": 0.42419648271679805, "grad_norm": 0.2613270580768585, "learning_rate": 3.562791700418516e-05, "loss": 0.1435, "step": 23783 }, { "epoch": 0.42421431883851174, "grad_norm": 0.21706587076187134, "learning_rate": 3.5626508128555184e-05, "loss": 0.1868, "step": 23784 }, { "epoch": 0.4242321549602254, "grad_norm": 0.25618359446525574, "learning_rate": 3.56250992117332e-05, "loss": 0.1549, "step": 23785 }, { "epoch": 0.42424999108193917, "grad_norm": 0.34990188479423523, "learning_rate": 3.562369025372466e-05, "loss": 0.2101, "step": 23786 }, { "epoch": 0.42426782720365286, "grad_norm": 0.26322513818740845, "learning_rate": 3.562228125453503e-05, "loss": 0.1763, "step": 23787 }, { "epoch": 0.42428566332536655, "grad_norm": 0.23602735996246338, "learning_rate": 3.5620872214169767e-05, "loss": 0.1426, "step": 23788 }, { "epoch": 0.42430349944708023, "grad_norm": 0.25292080640792847, "learning_rate": 3.5619463132634333e-05, "loss": 0.1676, "step": 23789 }, { "epoch": 0.4243213355687939, "grad_norm": 0.3345840871334076, "learning_rate": 3.561805400993419e-05, "loss": 0.2286, "step": 23790 }, { "epoch": 0.4243391716905076, "grad_norm": 0.2635435163974762, "learning_rate": 3.561664484607481e-05, "loss": 0.1482, "step": 23791 }, { "epoch": 0.4243570078122213, "grad_norm": 0.24531997740268707, "learning_rate": 3.561523564106165e-05, "loss": 0.1939, "step": 23792 }, { "epoch": 0.424374843933935, "grad_norm": 0.27716177701950073, "learning_rate": 3.561382639490016e-05, "loss": 0.146, "step": 23793 }, { "epoch": 0.4243926800556487, "grad_norm": 0.26818716526031494, "learning_rate": 3.561241710759582e-05, "loss": 0.1625, "step": 23794 }, { "epoch": 0.4244105161773624, "grad_norm": 0.31768810749053955, "learning_rate": 3.561100777915408e-05, "loss": 0.1805, "step": 23795 }, { "epoch": 0.4244283522990761, "grad_norm": 0.28001144528388977, "learning_rate": 3.560959840958042e-05, "loss": 0.1585, "step": 23796 }, { "epoch": 0.4244461884207898, "grad_norm": 0.24928061664104462, "learning_rate": 3.5608188998880276e-05, "loss": 0.1407, "step": 23797 }, { "epoch": 0.4244640245425035, "grad_norm": 0.26309409737586975, "learning_rate": 3.5606779547059145e-05, "loss": 0.1488, "step": 23798 }, { "epoch": 0.42448186066421717, "grad_norm": 0.21805571019649506, "learning_rate": 3.560537005412246e-05, "loss": 0.1552, "step": 23799 }, { "epoch": 0.42449969678593086, "grad_norm": 0.3703272342681885, "learning_rate": 3.5603960520075706e-05, "loss": 0.1727, "step": 23800 }, { "epoch": 0.42451753290764455, "grad_norm": 0.216144397854805, "learning_rate": 3.560255094492433e-05, "loss": 0.1724, "step": 23801 }, { "epoch": 0.42453536902935823, "grad_norm": 0.27654367685317993, "learning_rate": 3.56011413286738e-05, "loss": 0.1085, "step": 23802 }, { "epoch": 0.424553205151072, "grad_norm": 0.33142679929733276, "learning_rate": 3.55997316713296e-05, "loss": 0.204, "step": 23803 }, { "epoch": 0.42457104127278567, "grad_norm": 0.3179510831832886, "learning_rate": 3.5598321972897176e-05, "loss": 0.1499, "step": 23804 }, { "epoch": 0.42458887739449935, "grad_norm": 0.23971857130527496, "learning_rate": 3.5596912233381996e-05, "loss": 0.1581, "step": 23805 }, { "epoch": 0.42460671351621304, "grad_norm": 0.2246101051568985, "learning_rate": 3.559550245278951e-05, "loss": 0.1426, "step": 23806 }, { "epoch": 0.42462454963792673, "grad_norm": 0.21489155292510986, "learning_rate": 3.5594092631125215e-05, "loss": 0.1379, "step": 23807 }, { "epoch": 0.4246423857596404, "grad_norm": 0.24619744718074799, "learning_rate": 3.559268276839455e-05, "loss": 0.107, "step": 23808 }, { "epoch": 0.4246602218813541, "grad_norm": 0.2678712010383606, "learning_rate": 3.559127286460299e-05, "loss": 0.1912, "step": 23809 }, { "epoch": 0.4246780580030678, "grad_norm": 0.2584265172481537, "learning_rate": 3.558986291975599e-05, "loss": 0.1306, "step": 23810 }, { "epoch": 0.4246958941247815, "grad_norm": 0.28185492753982544, "learning_rate": 3.558845293385903e-05, "loss": 0.1847, "step": 23811 }, { "epoch": 0.4247137302464952, "grad_norm": 0.23856894671916962, "learning_rate": 3.5587042906917565e-05, "loss": 0.1548, "step": 23812 }, { "epoch": 0.4247315663682089, "grad_norm": 0.3829350173473358, "learning_rate": 3.5585632838937075e-05, "loss": 0.1763, "step": 23813 }, { "epoch": 0.4247494024899226, "grad_norm": 0.2294577807188034, "learning_rate": 3.558422272992301e-05, "loss": 0.1632, "step": 23814 }, { "epoch": 0.4247672386116363, "grad_norm": 0.3405967652797699, "learning_rate": 3.558281257988084e-05, "loss": 0.1153, "step": 23815 }, { "epoch": 0.42478507473335, "grad_norm": 0.30611565709114075, "learning_rate": 3.558140238881603e-05, "loss": 0.1688, "step": 23816 }, { "epoch": 0.42480291085506366, "grad_norm": 0.22104357182979584, "learning_rate": 3.557999215673406e-05, "loss": 0.1259, "step": 23817 }, { "epoch": 0.42482074697677735, "grad_norm": 0.2542276680469513, "learning_rate": 3.557858188364038e-05, "loss": 0.1378, "step": 23818 }, { "epoch": 0.42483858309849104, "grad_norm": 0.28939351439476013, "learning_rate": 3.557717156954047e-05, "loss": 0.2234, "step": 23819 }, { "epoch": 0.4248564192202048, "grad_norm": 0.2534879148006439, "learning_rate": 3.5575761214439786e-05, "loss": 0.1917, "step": 23820 }, { "epoch": 0.42487425534191847, "grad_norm": 0.23282602429389954, "learning_rate": 3.557435081834379e-05, "loss": 0.1524, "step": 23821 }, { "epoch": 0.42489209146363216, "grad_norm": 0.27611321210861206, "learning_rate": 3.557294038125797e-05, "loss": 0.1334, "step": 23822 }, { "epoch": 0.42490992758534585, "grad_norm": 0.26024171710014343, "learning_rate": 3.557152990318777e-05, "loss": 0.1659, "step": 23823 }, { "epoch": 0.42492776370705954, "grad_norm": 0.39405298233032227, "learning_rate": 3.5570119384138676e-05, "loss": 0.166, "step": 23824 }, { "epoch": 0.4249455998287732, "grad_norm": 0.24254606664180756, "learning_rate": 3.556870882411615e-05, "loss": 0.1236, "step": 23825 }, { "epoch": 0.4249634359504869, "grad_norm": 0.23225122690200806, "learning_rate": 3.556729822312566e-05, "loss": 0.1492, "step": 23826 }, { "epoch": 0.4249812720722006, "grad_norm": 0.18533039093017578, "learning_rate": 3.5565887581172665e-05, "loss": 0.1842, "step": 23827 }, { "epoch": 0.4249991081939143, "grad_norm": 0.30577361583709717, "learning_rate": 3.556447689826264e-05, "loss": 0.1483, "step": 23828 }, { "epoch": 0.42501694431562803, "grad_norm": 0.2784716486930847, "learning_rate": 3.5563066174401054e-05, "loss": 0.1702, "step": 23829 }, { "epoch": 0.4250347804373417, "grad_norm": 0.21199016273021698, "learning_rate": 3.556165540959338e-05, "loss": 0.1059, "step": 23830 }, { "epoch": 0.4250526165590554, "grad_norm": 0.2019483745098114, "learning_rate": 3.5560244603845085e-05, "loss": 0.1309, "step": 23831 }, { "epoch": 0.4250704526807691, "grad_norm": 0.23276233673095703, "learning_rate": 3.5558833757161626e-05, "loss": 0.1326, "step": 23832 }, { "epoch": 0.4250882888024828, "grad_norm": 0.26270052790641785, "learning_rate": 3.5557422869548485e-05, "loss": 0.1346, "step": 23833 }, { "epoch": 0.42510612492419647, "grad_norm": 0.35761696100234985, "learning_rate": 3.5556011941011124e-05, "loss": 0.1777, "step": 23834 }, { "epoch": 0.42512396104591016, "grad_norm": 0.18861781060695648, "learning_rate": 3.555460097155502e-05, "loss": 0.1515, "step": 23835 }, { "epoch": 0.42514179716762385, "grad_norm": 0.2612263560295105, "learning_rate": 3.5553189961185626e-05, "loss": 0.2049, "step": 23836 }, { "epoch": 0.4251596332893376, "grad_norm": 0.28499284386634827, "learning_rate": 3.555177890990843e-05, "loss": 0.1972, "step": 23837 }, { "epoch": 0.4251774694110513, "grad_norm": 0.232050359249115, "learning_rate": 3.5550367817728895e-05, "loss": 0.1659, "step": 23838 }, { "epoch": 0.42519530553276497, "grad_norm": 0.307167649269104, "learning_rate": 3.554895668465249e-05, "loss": 0.1846, "step": 23839 }, { "epoch": 0.42521314165447865, "grad_norm": 0.2804708480834961, "learning_rate": 3.554754551068469e-05, "loss": 0.1547, "step": 23840 }, { "epoch": 0.42523097777619234, "grad_norm": 0.3466965854167938, "learning_rate": 3.5546134295830954e-05, "loss": 0.2108, "step": 23841 }, { "epoch": 0.42524881389790603, "grad_norm": 0.2884812653064728, "learning_rate": 3.554472304009676e-05, "loss": 0.1606, "step": 23842 }, { "epoch": 0.4252666500196197, "grad_norm": 0.2581908404827118, "learning_rate": 3.554331174348757e-05, "loss": 0.1594, "step": 23843 }, { "epoch": 0.4252844861413334, "grad_norm": 0.19995135068893433, "learning_rate": 3.554190040600888e-05, "loss": 0.1312, "step": 23844 }, { "epoch": 0.42530232226304715, "grad_norm": 0.19691519439220428, "learning_rate": 3.554048902766613e-05, "loss": 0.1324, "step": 23845 }, { "epoch": 0.42532015838476084, "grad_norm": 0.2621724009513855, "learning_rate": 3.5539077608464814e-05, "loss": 0.1209, "step": 23846 }, { "epoch": 0.4253379945064745, "grad_norm": 0.16922634840011597, "learning_rate": 3.553766614841038e-05, "loss": 0.1398, "step": 23847 }, { "epoch": 0.4253558306281882, "grad_norm": 0.22233954071998596, "learning_rate": 3.553625464750832e-05, "loss": 0.186, "step": 23848 }, { "epoch": 0.4253736667499019, "grad_norm": 0.2795570194721222, "learning_rate": 3.55348431057641e-05, "loss": 0.1762, "step": 23849 }, { "epoch": 0.4253915028716156, "grad_norm": 0.2672940790653229, "learning_rate": 3.553343152318318e-05, "loss": 0.1766, "step": 23850 }, { "epoch": 0.4254093389933293, "grad_norm": 0.2887563407421112, "learning_rate": 3.5532019899771045e-05, "loss": 0.2031, "step": 23851 }, { "epoch": 0.42542717511504297, "grad_norm": 0.32539504766464233, "learning_rate": 3.553060823553317e-05, "loss": 0.1544, "step": 23852 }, { "epoch": 0.42544501123675665, "grad_norm": 0.25815871357917786, "learning_rate": 3.552919653047502e-05, "loss": 0.1472, "step": 23853 }, { "epoch": 0.4254628473584704, "grad_norm": 0.27862316370010376, "learning_rate": 3.5527784784602064e-05, "loss": 0.1714, "step": 23854 }, { "epoch": 0.4254806834801841, "grad_norm": 0.24769659340381622, "learning_rate": 3.5526372997919774e-05, "loss": 0.0831, "step": 23855 }, { "epoch": 0.4254985196018978, "grad_norm": 0.2353099286556244, "learning_rate": 3.552496117043364e-05, "loss": 0.1333, "step": 23856 }, { "epoch": 0.42551635572361146, "grad_norm": 0.26901599764823914, "learning_rate": 3.552354930214911e-05, "loss": 0.1541, "step": 23857 }, { "epoch": 0.42553419184532515, "grad_norm": 0.31995871663093567, "learning_rate": 3.552213739307166e-05, "loss": 0.15, "step": 23858 }, { "epoch": 0.42555202796703884, "grad_norm": 0.23480314016342163, "learning_rate": 3.552072544320678e-05, "loss": 0.2033, "step": 23859 }, { "epoch": 0.4255698640887525, "grad_norm": 0.2328900545835495, "learning_rate": 3.551931345255994e-05, "loss": 0.1772, "step": 23860 }, { "epoch": 0.4255877002104662, "grad_norm": 0.2869192063808441, "learning_rate": 3.55179014211366e-05, "loss": 0.1827, "step": 23861 }, { "epoch": 0.42560553633217996, "grad_norm": 0.2263847291469574, "learning_rate": 3.551648934894225e-05, "loss": 0.1407, "step": 23862 }, { "epoch": 0.42562337245389364, "grad_norm": 0.26627570390701294, "learning_rate": 3.5515077235982354e-05, "loss": 0.1714, "step": 23863 }, { "epoch": 0.42564120857560733, "grad_norm": 0.26254335045814514, "learning_rate": 3.551366508226237e-05, "loss": 0.1407, "step": 23864 }, { "epoch": 0.425659044697321, "grad_norm": 0.33721083402633667, "learning_rate": 3.5512252887787806e-05, "loss": 0.1665, "step": 23865 }, { "epoch": 0.4256768808190347, "grad_norm": 0.27196943759918213, "learning_rate": 3.551084065256411e-05, "loss": 0.1295, "step": 23866 }, { "epoch": 0.4256947169407484, "grad_norm": 0.34141629934310913, "learning_rate": 3.550942837659678e-05, "loss": 0.1878, "step": 23867 }, { "epoch": 0.4257125530624621, "grad_norm": 0.2678629755973816, "learning_rate": 3.550801605989126e-05, "loss": 0.1388, "step": 23868 }, { "epoch": 0.4257303891841758, "grad_norm": 0.2903549075126648, "learning_rate": 3.550660370245305e-05, "loss": 0.1197, "step": 23869 }, { "epoch": 0.42574822530588946, "grad_norm": 0.32868221402168274, "learning_rate": 3.55051913042876e-05, "loss": 0.1806, "step": 23870 }, { "epoch": 0.4257660614276032, "grad_norm": 0.2503345310688019, "learning_rate": 3.550377886540042e-05, "loss": 0.208, "step": 23871 }, { "epoch": 0.4257838975493169, "grad_norm": 0.2820666432380676, "learning_rate": 3.550236638579695e-05, "loss": 0.1704, "step": 23872 }, { "epoch": 0.4258017336710306, "grad_norm": 0.3565189838409424, "learning_rate": 3.550095386548269e-05, "loss": 0.2019, "step": 23873 }, { "epoch": 0.42581956979274427, "grad_norm": 0.25917696952819824, "learning_rate": 3.54995413044631e-05, "loss": 0.1702, "step": 23874 }, { "epoch": 0.42583740591445796, "grad_norm": 0.36733704805374146, "learning_rate": 3.5498128702743664e-05, "loss": 0.1639, "step": 23875 }, { "epoch": 0.42585524203617164, "grad_norm": 0.26840436458587646, "learning_rate": 3.549671606032986e-05, "loss": 0.0889, "step": 23876 }, { "epoch": 0.42587307815788533, "grad_norm": 0.3518798351287842, "learning_rate": 3.5495303377227153e-05, "loss": 0.229, "step": 23877 }, { "epoch": 0.425890914279599, "grad_norm": 0.2991696298122406, "learning_rate": 3.549389065344103e-05, "loss": 0.1593, "step": 23878 }, { "epoch": 0.42590875040131276, "grad_norm": 0.2914685606956482, "learning_rate": 3.549247788897695e-05, "loss": 0.1763, "step": 23879 }, { "epoch": 0.42592658652302645, "grad_norm": 0.26813337206840515, "learning_rate": 3.549106508384041e-05, "loss": 0.1285, "step": 23880 }, { "epoch": 0.42594442264474014, "grad_norm": 0.2833791971206665, "learning_rate": 3.548965223803688e-05, "loss": 0.1261, "step": 23881 }, { "epoch": 0.4259622587664538, "grad_norm": 0.28477099537849426, "learning_rate": 3.5488239351571836e-05, "loss": 0.1153, "step": 23882 }, { "epoch": 0.4259800948881675, "grad_norm": 0.28000229597091675, "learning_rate": 3.5486826424450756e-05, "loss": 0.1662, "step": 23883 }, { "epoch": 0.4259979310098812, "grad_norm": 0.26784002780914307, "learning_rate": 3.548541345667911e-05, "loss": 0.136, "step": 23884 }, { "epoch": 0.4260157671315949, "grad_norm": 0.24080026149749756, "learning_rate": 3.548400044826238e-05, "loss": 0.1299, "step": 23885 }, { "epoch": 0.4260336032533086, "grad_norm": 0.40504252910614014, "learning_rate": 3.5482587399206034e-05, "loss": 0.1567, "step": 23886 }, { "epoch": 0.4260514393750223, "grad_norm": 0.27305126190185547, "learning_rate": 3.5481174309515574e-05, "loss": 0.1484, "step": 23887 }, { "epoch": 0.426069275496736, "grad_norm": 0.27621057629585266, "learning_rate": 3.547976117919646e-05, "loss": 0.1138, "step": 23888 }, { "epoch": 0.4260871116184497, "grad_norm": 0.2447374016046524, "learning_rate": 3.547834800825417e-05, "loss": 0.1557, "step": 23889 }, { "epoch": 0.4261049477401634, "grad_norm": 0.20814082026481628, "learning_rate": 3.547693479669418e-05, "loss": 0.1216, "step": 23890 }, { "epoch": 0.4261227838618771, "grad_norm": 0.27645888924598694, "learning_rate": 3.5475521544521974e-05, "loss": 0.1565, "step": 23891 }, { "epoch": 0.42614061998359076, "grad_norm": 0.3014417290687561, "learning_rate": 3.547410825174302e-05, "loss": 0.1959, "step": 23892 }, { "epoch": 0.42615845610530445, "grad_norm": 0.22696557641029358, "learning_rate": 3.547269491836282e-05, "loss": 0.1418, "step": 23893 }, { "epoch": 0.42617629222701814, "grad_norm": 0.39561939239501953, "learning_rate": 3.547128154438683e-05, "loss": 0.155, "step": 23894 }, { "epoch": 0.4261941283487318, "grad_norm": 0.25103700160980225, "learning_rate": 3.5469868129820535e-05, "loss": 0.1269, "step": 23895 }, { "epoch": 0.42621196447044557, "grad_norm": 0.3278326988220215, "learning_rate": 3.546845467466942e-05, "loss": 0.1266, "step": 23896 }, { "epoch": 0.42622980059215926, "grad_norm": 0.33121931552886963, "learning_rate": 3.546704117893896e-05, "loss": 0.174, "step": 23897 }, { "epoch": 0.42624763671387295, "grad_norm": 0.2715900242328644, "learning_rate": 3.546562764263462e-05, "loss": 0.178, "step": 23898 }, { "epoch": 0.42626547283558663, "grad_norm": 0.23536370694637299, "learning_rate": 3.54642140657619e-05, "loss": 0.158, "step": 23899 }, { "epoch": 0.4262833089573003, "grad_norm": 0.36287975311279297, "learning_rate": 3.546280044832628e-05, "loss": 0.2215, "step": 23900 }, { "epoch": 0.426301145079014, "grad_norm": 0.3486074209213257, "learning_rate": 3.5461386790333227e-05, "loss": 0.1994, "step": 23901 }, { "epoch": 0.4263189812007277, "grad_norm": 0.23576629161834717, "learning_rate": 3.5459973091788226e-05, "loss": 0.1818, "step": 23902 }, { "epoch": 0.4263368173224414, "grad_norm": 0.2224901169538498, "learning_rate": 3.5458559352696754e-05, "loss": 0.1352, "step": 23903 }, { "epoch": 0.42635465344415513, "grad_norm": 0.2519136369228363, "learning_rate": 3.54571455730643e-05, "loss": 0.1902, "step": 23904 }, { "epoch": 0.4263724895658688, "grad_norm": 0.16021160781383514, "learning_rate": 3.5455731752896326e-05, "loss": 0.121, "step": 23905 }, { "epoch": 0.4263903256875825, "grad_norm": 0.2509852647781372, "learning_rate": 3.545431789219833e-05, "loss": 0.1311, "step": 23906 }, { "epoch": 0.4264081618092962, "grad_norm": 0.29924800992012024, "learning_rate": 3.5452903990975784e-05, "loss": 0.1853, "step": 23907 }, { "epoch": 0.4264259979310099, "grad_norm": 0.27524664998054504, "learning_rate": 3.545149004923418e-05, "loss": 0.1587, "step": 23908 }, { "epoch": 0.42644383405272357, "grad_norm": 0.29763686656951904, "learning_rate": 3.5450076066978984e-05, "loss": 0.2084, "step": 23909 }, { "epoch": 0.42646167017443726, "grad_norm": 0.3354107141494751, "learning_rate": 3.544866204421568e-05, "loss": 0.1435, "step": 23910 }, { "epoch": 0.42647950629615095, "grad_norm": 0.2531072497367859, "learning_rate": 3.5447247980949775e-05, "loss": 0.1308, "step": 23911 }, { "epoch": 0.42649734241786463, "grad_norm": 0.25510644912719727, "learning_rate": 3.5445833877186706e-05, "loss": 0.0815, "step": 23912 }, { "epoch": 0.4265151785395784, "grad_norm": 0.22560018301010132, "learning_rate": 3.5444419732931986e-05, "loss": 0.1315, "step": 23913 }, { "epoch": 0.42653301466129206, "grad_norm": 0.25691989064216614, "learning_rate": 3.5443005548191077e-05, "loss": 0.1649, "step": 23914 }, { "epoch": 0.42655085078300575, "grad_norm": 0.3879161477088928, "learning_rate": 3.544159132296949e-05, "loss": 0.1394, "step": 23915 }, { "epoch": 0.42656868690471944, "grad_norm": 0.2623904347419739, "learning_rate": 3.544017705727267e-05, "loss": 0.1504, "step": 23916 }, { "epoch": 0.42658652302643313, "grad_norm": 0.25704699754714966, "learning_rate": 3.5438762751106134e-05, "loss": 0.1804, "step": 23917 }, { "epoch": 0.4266043591481468, "grad_norm": 0.28251948952674866, "learning_rate": 3.5437348404475334e-05, "loss": 0.1167, "step": 23918 }, { "epoch": 0.4266221952698605, "grad_norm": 0.2453586906194687, "learning_rate": 3.5435934017385775e-05, "loss": 0.1734, "step": 23919 }, { "epoch": 0.4266400313915742, "grad_norm": 0.2514716386795044, "learning_rate": 3.543451958984293e-05, "loss": 0.1921, "step": 23920 }, { "epoch": 0.42665786751328794, "grad_norm": 0.2496187537908554, "learning_rate": 3.543310512185228e-05, "loss": 0.162, "step": 23921 }, { "epoch": 0.4266757036350016, "grad_norm": 0.2603379786014557, "learning_rate": 3.5431690613419317e-05, "loss": 0.1137, "step": 23922 }, { "epoch": 0.4266935397567153, "grad_norm": 0.22919818758964539, "learning_rate": 3.5430276064549514e-05, "loss": 0.136, "step": 23923 }, { "epoch": 0.426711375878429, "grad_norm": 0.3271125555038452, "learning_rate": 3.542886147524836e-05, "loss": 0.1512, "step": 23924 }, { "epoch": 0.4267292120001427, "grad_norm": 0.1695825755596161, "learning_rate": 3.542744684552134e-05, "loss": 0.1263, "step": 23925 }, { "epoch": 0.4267470481218564, "grad_norm": 0.23370809853076935, "learning_rate": 3.5426032175373927e-05, "loss": 0.1441, "step": 23926 }, { "epoch": 0.42676488424357006, "grad_norm": 0.2657979130744934, "learning_rate": 3.542461746481161e-05, "loss": 0.1568, "step": 23927 }, { "epoch": 0.42678272036528375, "grad_norm": 0.2109915167093277, "learning_rate": 3.5423202713839885e-05, "loss": 0.1372, "step": 23928 }, { "epoch": 0.42680055648699744, "grad_norm": 0.2852080166339874, "learning_rate": 3.5421787922464224e-05, "loss": 0.1678, "step": 23929 }, { "epoch": 0.4268183926087112, "grad_norm": 0.2549762725830078, "learning_rate": 3.542037309069012e-05, "loss": 0.1861, "step": 23930 }, { "epoch": 0.42683622873042487, "grad_norm": 0.2283683717250824, "learning_rate": 3.5418958218523034e-05, "loss": 0.1529, "step": 23931 }, { "epoch": 0.42685406485213856, "grad_norm": 0.22913269698619843, "learning_rate": 3.541754330596848e-05, "loss": 0.1258, "step": 23932 }, { "epoch": 0.42687190097385225, "grad_norm": 0.29181382060050964, "learning_rate": 3.5416128353031926e-05, "loss": 0.1635, "step": 23933 }, { "epoch": 0.42688973709556594, "grad_norm": 0.21410875022411346, "learning_rate": 3.541471335971886e-05, "loss": 0.1247, "step": 23934 }, { "epoch": 0.4269075732172796, "grad_norm": 0.34712743759155273, "learning_rate": 3.541329832603477e-05, "loss": 0.1638, "step": 23935 }, { "epoch": 0.4269254093389933, "grad_norm": 0.21683192253112793, "learning_rate": 3.541188325198513e-05, "loss": 0.121, "step": 23936 }, { "epoch": 0.426943245460707, "grad_norm": 0.6718734502792358, "learning_rate": 3.5410468137575445e-05, "loss": 0.2966, "step": 23937 }, { "epoch": 0.42696108158242074, "grad_norm": 0.37805864214897156, "learning_rate": 3.540905298281119e-05, "loss": 0.1739, "step": 23938 }, { "epoch": 0.42697891770413443, "grad_norm": 0.28198936581611633, "learning_rate": 3.540763778769785e-05, "loss": 0.1785, "step": 23939 }, { "epoch": 0.4269967538258481, "grad_norm": 0.24662794172763824, "learning_rate": 3.540622255224091e-05, "loss": 0.1213, "step": 23940 }, { "epoch": 0.4270145899475618, "grad_norm": 0.3209708034992218, "learning_rate": 3.540480727644585e-05, "loss": 0.195, "step": 23941 }, { "epoch": 0.4270324260692755, "grad_norm": 0.34436291456222534, "learning_rate": 3.5403391960318165e-05, "loss": 0.1701, "step": 23942 }, { "epoch": 0.4270502621909892, "grad_norm": 0.2335614413022995, "learning_rate": 3.540197660386335e-05, "loss": 0.1497, "step": 23943 }, { "epoch": 0.42706809831270287, "grad_norm": 0.2021789401769638, "learning_rate": 3.540056120708687e-05, "loss": 0.113, "step": 23944 }, { "epoch": 0.42708593443441656, "grad_norm": 0.3811666667461395, "learning_rate": 3.539914576999424e-05, "loss": 0.1941, "step": 23945 }, { "epoch": 0.4271037705561303, "grad_norm": 0.24851712584495544, "learning_rate": 3.5397730292590906e-05, "loss": 0.1857, "step": 23946 }, { "epoch": 0.427121606677844, "grad_norm": 0.21563822031021118, "learning_rate": 3.5396314774882386e-05, "loss": 0.1654, "step": 23947 }, { "epoch": 0.4271394427995577, "grad_norm": 0.2698385715484619, "learning_rate": 3.5394899216874166e-05, "loss": 0.191, "step": 23948 }, { "epoch": 0.42715727892127137, "grad_norm": 0.25924378633499146, "learning_rate": 3.539348361857172e-05, "loss": 0.1626, "step": 23949 }, { "epoch": 0.42717511504298505, "grad_norm": 0.2653866410255432, "learning_rate": 3.539206797998055e-05, "loss": 0.1656, "step": 23950 }, { "epoch": 0.42719295116469874, "grad_norm": 0.2630806565284729, "learning_rate": 3.5390652301106134e-05, "loss": 0.1417, "step": 23951 }, { "epoch": 0.42721078728641243, "grad_norm": 0.3655237555503845, "learning_rate": 3.5389236581953954e-05, "loss": 0.1564, "step": 23952 }, { "epoch": 0.4272286234081261, "grad_norm": 0.3011694550514221, "learning_rate": 3.5387820822529505e-05, "loss": 0.1807, "step": 23953 }, { "epoch": 0.4272464595298398, "grad_norm": 0.2681520879268646, "learning_rate": 3.5386405022838276e-05, "loss": 0.1604, "step": 23954 }, { "epoch": 0.42726429565155355, "grad_norm": 0.2561852037906647, "learning_rate": 3.538498918288575e-05, "loss": 0.1412, "step": 23955 }, { "epoch": 0.42728213177326724, "grad_norm": 0.2399996519088745, "learning_rate": 3.538357330267742e-05, "loss": 0.1828, "step": 23956 }, { "epoch": 0.4272999678949809, "grad_norm": 0.270550400018692, "learning_rate": 3.5382157382218776e-05, "loss": 0.1548, "step": 23957 }, { "epoch": 0.4273178040166946, "grad_norm": 0.22000697255134583, "learning_rate": 3.538074142151531e-05, "loss": 0.1536, "step": 23958 }, { "epoch": 0.4273356401384083, "grad_norm": 0.27395185828208923, "learning_rate": 3.53793254205725e-05, "loss": 0.1144, "step": 23959 }, { "epoch": 0.427353476260122, "grad_norm": 0.2699791193008423, "learning_rate": 3.537790937939584e-05, "loss": 0.1941, "step": 23960 }, { "epoch": 0.4273713123818357, "grad_norm": 0.3365248143672943, "learning_rate": 3.537649329799082e-05, "loss": 0.0994, "step": 23961 }, { "epoch": 0.42738914850354937, "grad_norm": 0.3106212317943573, "learning_rate": 3.537507717636292e-05, "loss": 0.1784, "step": 23962 }, { "epoch": 0.4274069846252631, "grad_norm": 0.260199636220932, "learning_rate": 3.537366101451765e-05, "loss": 0.0817, "step": 23963 }, { "epoch": 0.4274248207469768, "grad_norm": 0.26688748598098755, "learning_rate": 3.537224481246048e-05, "loss": 0.1499, "step": 23964 }, { "epoch": 0.4274426568686905, "grad_norm": 0.26480117440223694, "learning_rate": 3.5370828570196905e-05, "loss": 0.1654, "step": 23965 }, { "epoch": 0.4274604929904042, "grad_norm": 0.2640722095966339, "learning_rate": 3.5369412287732417e-05, "loss": 0.149, "step": 23966 }, { "epoch": 0.42747832911211786, "grad_norm": 0.39676433801651, "learning_rate": 3.5367995965072515e-05, "loss": 0.0998, "step": 23967 }, { "epoch": 0.42749616523383155, "grad_norm": 0.19692380726337433, "learning_rate": 3.536657960222267e-05, "loss": 0.151, "step": 23968 }, { "epoch": 0.42751400135554524, "grad_norm": 0.21552708745002747, "learning_rate": 3.536516319918838e-05, "loss": 0.1726, "step": 23969 }, { "epoch": 0.4275318374772589, "grad_norm": 0.2576480507850647, "learning_rate": 3.5363746755975144e-05, "loss": 0.1567, "step": 23970 }, { "epoch": 0.4275496735989726, "grad_norm": 0.277039498090744, "learning_rate": 3.536233027258844e-05, "loss": 0.1687, "step": 23971 }, { "epoch": 0.42756750972068636, "grad_norm": 0.23763813078403473, "learning_rate": 3.536091374903377e-05, "loss": 0.1668, "step": 23972 }, { "epoch": 0.42758534584240004, "grad_norm": 0.18927772343158722, "learning_rate": 3.535949718531662e-05, "loss": 0.1324, "step": 23973 }, { "epoch": 0.42760318196411373, "grad_norm": 0.27033889293670654, "learning_rate": 3.5358080581442475e-05, "loss": 0.1378, "step": 23974 }, { "epoch": 0.4276210180858274, "grad_norm": 0.35147562623023987, "learning_rate": 3.5356663937416837e-05, "loss": 0.2035, "step": 23975 }, { "epoch": 0.4276388542075411, "grad_norm": 0.19672538340091705, "learning_rate": 3.535524725324519e-05, "loss": 0.1554, "step": 23976 }, { "epoch": 0.4276566903292548, "grad_norm": 0.16885758936405182, "learning_rate": 3.5353830528933026e-05, "loss": 0.1282, "step": 23977 }, { "epoch": 0.4276745264509685, "grad_norm": 0.258370041847229, "learning_rate": 3.5352413764485845e-05, "loss": 0.1606, "step": 23978 }, { "epoch": 0.42769236257268217, "grad_norm": 0.21713489294052124, "learning_rate": 3.535099695990913e-05, "loss": 0.1149, "step": 23979 }, { "epoch": 0.4277101986943959, "grad_norm": 0.5765313506126404, "learning_rate": 3.534958011520838e-05, "loss": 0.1654, "step": 23980 }, { "epoch": 0.4277280348161096, "grad_norm": 0.25585129857063293, "learning_rate": 3.534816323038907e-05, "loss": 0.1636, "step": 23981 }, { "epoch": 0.4277458709378233, "grad_norm": 0.281139075756073, "learning_rate": 3.534674630545672e-05, "loss": 0.1273, "step": 23982 }, { "epoch": 0.427763707059537, "grad_norm": 0.32164880633354187, "learning_rate": 3.5345329340416796e-05, "loss": 0.1298, "step": 23983 }, { "epoch": 0.42778154318125067, "grad_norm": 0.26561206579208374, "learning_rate": 3.5343912335274816e-05, "loss": 0.1763, "step": 23984 }, { "epoch": 0.42779937930296436, "grad_norm": 0.2745407521724701, "learning_rate": 3.534249529003625e-05, "loss": 0.1631, "step": 23985 }, { "epoch": 0.42781721542467804, "grad_norm": 0.24976280331611633, "learning_rate": 3.53410782047066e-05, "loss": 0.1884, "step": 23986 }, { "epoch": 0.42783505154639173, "grad_norm": 0.27511921525001526, "learning_rate": 3.533966107929136e-05, "loss": 0.1445, "step": 23987 }, { "epoch": 0.4278528876681055, "grad_norm": 0.3072541654109955, "learning_rate": 3.533824391379602e-05, "loss": 0.1458, "step": 23988 }, { "epoch": 0.42787072378981916, "grad_norm": 0.3907562792301178, "learning_rate": 3.533682670822608e-05, "loss": 0.2003, "step": 23989 }, { "epoch": 0.42788855991153285, "grad_norm": 0.30391618609428406, "learning_rate": 3.5335409462587026e-05, "loss": 0.1211, "step": 23990 }, { "epoch": 0.42790639603324654, "grad_norm": 0.24945279955863953, "learning_rate": 3.5333992176884354e-05, "loss": 0.1486, "step": 23991 }, { "epoch": 0.4279242321549602, "grad_norm": 0.2895711362361908, "learning_rate": 3.533257485112357e-05, "loss": 0.1407, "step": 23992 }, { "epoch": 0.4279420682766739, "grad_norm": 0.2654116749763489, "learning_rate": 3.533115748531015e-05, "loss": 0.1268, "step": 23993 }, { "epoch": 0.4279599043983876, "grad_norm": 0.23434217274188995, "learning_rate": 3.5329740079449594e-05, "loss": 0.1472, "step": 23994 }, { "epoch": 0.4279777405201013, "grad_norm": 0.26843225955963135, "learning_rate": 3.532832263354739e-05, "loss": 0.1597, "step": 23995 }, { "epoch": 0.427995576641815, "grad_norm": 0.28946638107299805, "learning_rate": 3.5326905147609046e-05, "loss": 0.1113, "step": 23996 }, { "epoch": 0.4280134127635287, "grad_norm": 0.3010145425796509, "learning_rate": 3.532548762164006e-05, "loss": 0.1529, "step": 23997 }, { "epoch": 0.4280312488852424, "grad_norm": 0.325039803981781, "learning_rate": 3.5324070055645905e-05, "loss": 0.1555, "step": 23998 }, { "epoch": 0.4280490850069561, "grad_norm": 0.300077348947525, "learning_rate": 3.53226524496321e-05, "loss": 0.1509, "step": 23999 }, { "epoch": 0.4280669211286698, "grad_norm": 0.2601543664932251, "learning_rate": 3.532123480360412e-05, "loss": 0.1475, "step": 24000 }, { "epoch": 0.4280669211286698, "eval_loss": 0.14888684451580048, "eval_runtime": 106.9842, "eval_samples_per_second": 9.572, "eval_steps_per_second": 1.598, "step": 24000 }, { "epoch": 0.4280847572503835, "grad_norm": 0.24454689025878906, "learning_rate": 3.5319817117567475e-05, "loss": 0.1194, "step": 24001 }, { "epoch": 0.42810259337209716, "grad_norm": 0.3253958821296692, "learning_rate": 3.531839939152765e-05, "loss": 0.1408, "step": 24002 }, { "epoch": 0.42812042949381085, "grad_norm": 0.289745569229126, "learning_rate": 3.531698162549015e-05, "loss": 0.2044, "step": 24003 }, { "epoch": 0.42813826561552454, "grad_norm": 0.3855065703392029, "learning_rate": 3.531556381946046e-05, "loss": 0.147, "step": 24004 }, { "epoch": 0.4281561017372383, "grad_norm": 0.25362133979797363, "learning_rate": 3.531414597344409e-05, "loss": 0.165, "step": 24005 }, { "epoch": 0.42817393785895197, "grad_norm": 0.3345668315887451, "learning_rate": 3.5312728087446524e-05, "loss": 0.1857, "step": 24006 }, { "epoch": 0.42819177398066566, "grad_norm": 0.44475558400154114, "learning_rate": 3.5311310161473254e-05, "loss": 0.2183, "step": 24007 }, { "epoch": 0.42820961010237935, "grad_norm": 0.2567703425884247, "learning_rate": 3.5309892195529794e-05, "loss": 0.1221, "step": 24008 }, { "epoch": 0.42822744622409303, "grad_norm": 0.21394670009613037, "learning_rate": 3.5308474189621625e-05, "loss": 0.1506, "step": 24009 }, { "epoch": 0.4282452823458067, "grad_norm": 0.2452140897512436, "learning_rate": 3.530705614375425e-05, "loss": 0.1426, "step": 24010 }, { "epoch": 0.4282631184675204, "grad_norm": 0.29380446672439575, "learning_rate": 3.5305638057933164e-05, "loss": 0.1625, "step": 24011 }, { "epoch": 0.4282809545892341, "grad_norm": 0.17671631276607513, "learning_rate": 3.530421993216387e-05, "loss": 0.1598, "step": 24012 }, { "epoch": 0.4282987907109478, "grad_norm": 0.2578020989894867, "learning_rate": 3.530280176645186e-05, "loss": 0.155, "step": 24013 }, { "epoch": 0.42831662683266153, "grad_norm": 0.2121681421995163, "learning_rate": 3.530138356080264e-05, "loss": 0.1123, "step": 24014 }, { "epoch": 0.4283344629543752, "grad_norm": 0.3155827224254608, "learning_rate": 3.5299965315221694e-05, "loss": 0.2084, "step": 24015 }, { "epoch": 0.4283522990760889, "grad_norm": 0.23339667916297913, "learning_rate": 3.5298547029714515e-05, "loss": 0.1449, "step": 24016 }, { "epoch": 0.4283701351978026, "grad_norm": 0.3760417401790619, "learning_rate": 3.529712870428662e-05, "loss": 0.1686, "step": 24017 }, { "epoch": 0.4283879713195163, "grad_norm": 0.27320724725723267, "learning_rate": 3.5295710338943495e-05, "loss": 0.1347, "step": 24018 }, { "epoch": 0.42840580744122997, "grad_norm": 0.2863955795764923, "learning_rate": 3.5294291933690646e-05, "loss": 0.111, "step": 24019 }, { "epoch": 0.42842364356294366, "grad_norm": 0.2796202600002289, "learning_rate": 3.529287348853355e-05, "loss": 0.193, "step": 24020 }, { "epoch": 0.42844147968465734, "grad_norm": 0.22371521592140198, "learning_rate": 3.5291455003477744e-05, "loss": 0.163, "step": 24021 }, { "epoch": 0.4284593158063711, "grad_norm": 0.3729160726070404, "learning_rate": 3.529003647852869e-05, "loss": 0.0918, "step": 24022 }, { "epoch": 0.4284771519280848, "grad_norm": 0.1812201291322708, "learning_rate": 3.52886179136919e-05, "loss": 0.1463, "step": 24023 }, { "epoch": 0.42849498804979846, "grad_norm": 0.2121412456035614, "learning_rate": 3.528719930897287e-05, "loss": 0.1115, "step": 24024 }, { "epoch": 0.42851282417151215, "grad_norm": 0.2475673407316208, "learning_rate": 3.528578066437711e-05, "loss": 0.1651, "step": 24025 }, { "epoch": 0.42853066029322584, "grad_norm": 0.2292526662349701, "learning_rate": 3.5284361979910106e-05, "loss": 0.1542, "step": 24026 }, { "epoch": 0.42854849641493953, "grad_norm": 0.1916513293981552, "learning_rate": 3.528294325557737e-05, "loss": 0.1536, "step": 24027 }, { "epoch": 0.4285663325366532, "grad_norm": 0.2592538595199585, "learning_rate": 3.52815244913844e-05, "loss": 0.145, "step": 24028 }, { "epoch": 0.4285841686583669, "grad_norm": 0.2909541428089142, "learning_rate": 3.528010568733668e-05, "loss": 0.1873, "step": 24029 }, { "epoch": 0.4286020047800806, "grad_norm": 0.27688759565353394, "learning_rate": 3.527868684343972e-05, "loss": 0.1267, "step": 24030 }, { "epoch": 0.42861984090179434, "grad_norm": 0.21342121064662933, "learning_rate": 3.5277267959699014e-05, "loss": 0.1118, "step": 24031 }, { "epoch": 0.428637677023508, "grad_norm": 0.24847741425037384, "learning_rate": 3.527584903612008e-05, "loss": 0.194, "step": 24032 }, { "epoch": 0.4286555131452217, "grad_norm": 0.23573623597621918, "learning_rate": 3.52744300727084e-05, "loss": 0.1229, "step": 24033 }, { "epoch": 0.4286733492669354, "grad_norm": 0.21822835505008698, "learning_rate": 3.527301106946948e-05, "loss": 0.1757, "step": 24034 }, { "epoch": 0.4286911853886491, "grad_norm": 0.2192905992269516, "learning_rate": 3.5271592026408815e-05, "loss": 0.1275, "step": 24035 }, { "epoch": 0.4287090215103628, "grad_norm": 0.3949853181838989, "learning_rate": 3.5270172943531924e-05, "loss": 0.2046, "step": 24036 }, { "epoch": 0.42872685763207646, "grad_norm": 0.21146616339683533, "learning_rate": 3.526875382084429e-05, "loss": 0.1114, "step": 24037 }, { "epoch": 0.42874469375379015, "grad_norm": 0.22338072955608368, "learning_rate": 3.526733465835141e-05, "loss": 0.1303, "step": 24038 }, { "epoch": 0.4287625298755039, "grad_norm": 0.30598244071006775, "learning_rate": 3.526591545605881e-05, "loss": 0.1044, "step": 24039 }, { "epoch": 0.4287803659972176, "grad_norm": 0.2424745261669159, "learning_rate": 3.526449621397197e-05, "loss": 0.1644, "step": 24040 }, { "epoch": 0.42879820211893127, "grad_norm": 0.2184273898601532, "learning_rate": 3.52630769320964e-05, "loss": 0.1386, "step": 24041 }, { "epoch": 0.42881603824064496, "grad_norm": 0.21332210302352905, "learning_rate": 3.5261657610437594e-05, "loss": 0.164, "step": 24042 }, { "epoch": 0.42883387436235865, "grad_norm": 0.32384905219078064, "learning_rate": 3.5260238249001065e-05, "loss": 0.1984, "step": 24043 }, { "epoch": 0.42885171048407233, "grad_norm": 0.30819737911224365, "learning_rate": 3.52588188477923e-05, "loss": 0.1774, "step": 24044 }, { "epoch": 0.428869546605786, "grad_norm": 0.24642221629619598, "learning_rate": 3.5257399406816815e-05, "loss": 0.1758, "step": 24045 }, { "epoch": 0.4288873827274997, "grad_norm": 0.2304585725069046, "learning_rate": 3.525597992608011e-05, "loss": 0.1473, "step": 24046 }, { "epoch": 0.42890521884921345, "grad_norm": 0.20225901901721954, "learning_rate": 3.5254560405587676e-05, "loss": 0.1626, "step": 24047 }, { "epoch": 0.42892305497092714, "grad_norm": 0.3168150782585144, "learning_rate": 3.5253140845345026e-05, "loss": 0.1789, "step": 24048 }, { "epoch": 0.42894089109264083, "grad_norm": 0.2006775587797165, "learning_rate": 3.525172124535767e-05, "loss": 0.1479, "step": 24049 }, { "epoch": 0.4289587272143545, "grad_norm": 0.2692861557006836, "learning_rate": 3.525030160563109e-05, "loss": 0.1794, "step": 24050 }, { "epoch": 0.4289765633360682, "grad_norm": 0.3805452287197113, "learning_rate": 3.5248881926170804e-05, "loss": 0.1632, "step": 24051 }, { "epoch": 0.4289943994577819, "grad_norm": 0.26520583033561707, "learning_rate": 3.524746220698232e-05, "loss": 0.1735, "step": 24052 }, { "epoch": 0.4290122355794956, "grad_norm": 0.20892249047756195, "learning_rate": 3.524604244807113e-05, "loss": 0.145, "step": 24053 }, { "epoch": 0.42903007170120927, "grad_norm": 0.26491081714630127, "learning_rate": 3.524462264944274e-05, "loss": 0.1476, "step": 24054 }, { "epoch": 0.42904790782292296, "grad_norm": 0.2512350082397461, "learning_rate": 3.524320281110265e-05, "loss": 0.1618, "step": 24055 }, { "epoch": 0.4290657439446367, "grad_norm": 0.26457685232162476, "learning_rate": 3.524178293305637e-05, "loss": 0.1167, "step": 24056 }, { "epoch": 0.4290835800663504, "grad_norm": 0.27770259976387024, "learning_rate": 3.52403630153094e-05, "loss": 0.131, "step": 24057 }, { "epoch": 0.4291014161880641, "grad_norm": 0.22504554688930511, "learning_rate": 3.523894305786725e-05, "loss": 0.1385, "step": 24058 }, { "epoch": 0.42911925230977777, "grad_norm": 0.3124557137489319, "learning_rate": 3.523752306073541e-05, "loss": 0.2122, "step": 24059 }, { "epoch": 0.42913708843149145, "grad_norm": 0.30401766300201416, "learning_rate": 3.523610302391941e-05, "loss": 0.1464, "step": 24060 }, { "epoch": 0.42915492455320514, "grad_norm": 0.3035971522331238, "learning_rate": 3.523468294742473e-05, "loss": 0.1818, "step": 24061 }, { "epoch": 0.42917276067491883, "grad_norm": 0.3898014426231384, "learning_rate": 3.523326283125689e-05, "loss": 0.14, "step": 24062 }, { "epoch": 0.4291905967966325, "grad_norm": 0.2723587453365326, "learning_rate": 3.523184267542138e-05, "loss": 0.1826, "step": 24063 }, { "epoch": 0.42920843291834626, "grad_norm": 0.24735338985919952, "learning_rate": 3.5230422479923726e-05, "loss": 0.1745, "step": 24064 }, { "epoch": 0.42922626904005995, "grad_norm": 0.22546587884426117, "learning_rate": 3.522900224476941e-05, "loss": 0.0991, "step": 24065 }, { "epoch": 0.42924410516177364, "grad_norm": 0.3785165250301361, "learning_rate": 3.522758196996395e-05, "loss": 0.1395, "step": 24066 }, { "epoch": 0.4292619412834873, "grad_norm": 0.23490187525749207, "learning_rate": 3.522616165551286e-05, "loss": 0.131, "step": 24067 }, { "epoch": 0.429279777405201, "grad_norm": 0.23220893740653992, "learning_rate": 3.522474130142162e-05, "loss": 0.1722, "step": 24068 }, { "epoch": 0.4292976135269147, "grad_norm": 0.22375334799289703, "learning_rate": 3.522332090769576e-05, "loss": 0.1535, "step": 24069 }, { "epoch": 0.4293154496486284, "grad_norm": 0.2489255964756012, "learning_rate": 3.522190047434078e-05, "loss": 0.162, "step": 24070 }, { "epoch": 0.4293332857703421, "grad_norm": 0.2375880628824234, "learning_rate": 3.5220480001362176e-05, "loss": 0.0928, "step": 24071 }, { "epoch": 0.42935112189205576, "grad_norm": 0.26167815923690796, "learning_rate": 3.5219059488765465e-05, "loss": 0.1337, "step": 24072 }, { "epoch": 0.4293689580137695, "grad_norm": 0.23371227085590363, "learning_rate": 3.521763893655615e-05, "loss": 0.1342, "step": 24073 }, { "epoch": 0.4293867941354832, "grad_norm": 0.3406495749950409, "learning_rate": 3.521621834473973e-05, "loss": 0.1793, "step": 24074 }, { "epoch": 0.4294046302571969, "grad_norm": 0.28165772557258606, "learning_rate": 3.521479771332173e-05, "loss": 0.1951, "step": 24075 }, { "epoch": 0.4294224663789106, "grad_norm": 0.32539570331573486, "learning_rate": 3.521337704230764e-05, "loss": 0.2462, "step": 24076 }, { "epoch": 0.42944030250062426, "grad_norm": 0.31212443113327026, "learning_rate": 3.5211956331702975e-05, "loss": 0.1754, "step": 24077 }, { "epoch": 0.42945813862233795, "grad_norm": 0.3066851496696472, "learning_rate": 3.521053558151324e-05, "loss": 0.1004, "step": 24078 }, { "epoch": 0.42947597474405164, "grad_norm": 0.2351367473602295, "learning_rate": 3.520911479174394e-05, "loss": 0.1825, "step": 24079 }, { "epoch": 0.4294938108657653, "grad_norm": 0.3087090253829956, "learning_rate": 3.520769396240058e-05, "loss": 0.1798, "step": 24080 }, { "epoch": 0.42951164698747907, "grad_norm": 0.29715174436569214, "learning_rate": 3.520627309348869e-05, "loss": 0.1759, "step": 24081 }, { "epoch": 0.42952948310919276, "grad_norm": 0.31474366784095764, "learning_rate": 3.5204852185013755e-05, "loss": 0.1765, "step": 24082 }, { "epoch": 0.42954731923090644, "grad_norm": 0.26054781675338745, "learning_rate": 3.520343123698128e-05, "loss": 0.1492, "step": 24083 }, { "epoch": 0.42956515535262013, "grad_norm": 0.33756422996520996, "learning_rate": 3.520201024939679e-05, "loss": 0.1337, "step": 24084 }, { "epoch": 0.4295829914743338, "grad_norm": 0.3311472535133362, "learning_rate": 3.5200589222265775e-05, "loss": 0.1964, "step": 24085 }, { "epoch": 0.4296008275960475, "grad_norm": 0.26739028096199036, "learning_rate": 3.519916815559375e-05, "loss": 0.1265, "step": 24086 }, { "epoch": 0.4296186637177612, "grad_norm": 0.25582271814346313, "learning_rate": 3.5197747049386234e-05, "loss": 0.1404, "step": 24087 }, { "epoch": 0.4296364998394749, "grad_norm": 0.2394641935825348, "learning_rate": 3.519632590364873e-05, "loss": 0.1786, "step": 24088 }, { "epoch": 0.42965433596118857, "grad_norm": 0.2776090204715729, "learning_rate": 3.5194904718386744e-05, "loss": 0.1849, "step": 24089 }, { "epoch": 0.4296721720829023, "grad_norm": 0.2702847123146057, "learning_rate": 3.5193483493605795e-05, "loss": 0.1532, "step": 24090 }, { "epoch": 0.429690008204616, "grad_norm": 0.2719164490699768, "learning_rate": 3.519206222931137e-05, "loss": 0.1577, "step": 24091 }, { "epoch": 0.4297078443263297, "grad_norm": 0.23871085047721863, "learning_rate": 3.5190640925509e-05, "loss": 0.1551, "step": 24092 }, { "epoch": 0.4297256804480434, "grad_norm": 0.2889685034751892, "learning_rate": 3.518921958220418e-05, "loss": 0.1347, "step": 24093 }, { "epoch": 0.42974351656975707, "grad_norm": 0.2794472873210907, "learning_rate": 3.518779819940242e-05, "loss": 0.1274, "step": 24094 }, { "epoch": 0.42976135269147075, "grad_norm": 0.23831377923488617, "learning_rate": 3.5186376777109246e-05, "loss": 0.1473, "step": 24095 }, { "epoch": 0.42977918881318444, "grad_norm": 0.29172539710998535, "learning_rate": 3.518495531533015e-05, "loss": 0.1713, "step": 24096 }, { "epoch": 0.42979702493489813, "grad_norm": 0.27400290966033936, "learning_rate": 3.5183533814070656e-05, "loss": 0.2138, "step": 24097 }, { "epoch": 0.4298148610566119, "grad_norm": 0.27783259749412537, "learning_rate": 3.518211227333627e-05, "loss": 0.1305, "step": 24098 }, { "epoch": 0.42983269717832556, "grad_norm": 0.23592181503772736, "learning_rate": 3.5180690693132495e-05, "loss": 0.1516, "step": 24099 }, { "epoch": 0.42985053330003925, "grad_norm": 0.21255047619342804, "learning_rate": 3.517926907346484e-05, "loss": 0.132, "step": 24100 }, { "epoch": 0.42986836942175294, "grad_norm": 0.2559812366962433, "learning_rate": 3.5177847414338833e-05, "loss": 0.2054, "step": 24101 }, { "epoch": 0.4298862055434666, "grad_norm": 0.1958625167608261, "learning_rate": 3.517642571575996e-05, "loss": 0.1056, "step": 24102 }, { "epoch": 0.4299040416651803, "grad_norm": 0.24039381742477417, "learning_rate": 3.5175003977733766e-05, "loss": 0.0952, "step": 24103 }, { "epoch": 0.429921877786894, "grad_norm": 0.18486778438091278, "learning_rate": 3.517358220026573e-05, "loss": 0.0977, "step": 24104 }, { "epoch": 0.4299397139086077, "grad_norm": 0.38379645347595215, "learning_rate": 3.517216038336138e-05, "loss": 0.1363, "step": 24105 }, { "epoch": 0.42995755003032143, "grad_norm": 0.2336011677980423, "learning_rate": 3.517073852702622e-05, "loss": 0.1431, "step": 24106 }, { "epoch": 0.4299753861520351, "grad_norm": 0.3343081772327423, "learning_rate": 3.5169316631265764e-05, "loss": 0.1608, "step": 24107 }, { "epoch": 0.4299932222737488, "grad_norm": 0.24900609254837036, "learning_rate": 3.5167894696085526e-05, "loss": 0.1585, "step": 24108 }, { "epoch": 0.4300110583954625, "grad_norm": 0.36066752672195435, "learning_rate": 3.5166472721491016e-05, "loss": 0.1924, "step": 24109 }, { "epoch": 0.4300288945171762, "grad_norm": 0.3907620906829834, "learning_rate": 3.516505070748775e-05, "loss": 0.1667, "step": 24110 }, { "epoch": 0.4300467306388899, "grad_norm": 0.2419070452451706, "learning_rate": 3.5163628654081234e-05, "loss": 0.1482, "step": 24111 }, { "epoch": 0.43006456676060356, "grad_norm": 0.18374262750148773, "learning_rate": 3.516220656127698e-05, "loss": 0.1589, "step": 24112 }, { "epoch": 0.43008240288231725, "grad_norm": 0.3223990499973297, "learning_rate": 3.5160784429080504e-05, "loss": 0.1562, "step": 24113 }, { "epoch": 0.43010023900403094, "grad_norm": 0.27670204639434814, "learning_rate": 3.515936225749732e-05, "loss": 0.1907, "step": 24114 }, { "epoch": 0.4301180751257447, "grad_norm": 0.2766922414302826, "learning_rate": 3.5157940046532934e-05, "loss": 0.153, "step": 24115 }, { "epoch": 0.43013591124745837, "grad_norm": 0.25355401635169983, "learning_rate": 3.515651779619287e-05, "loss": 0.1543, "step": 24116 }, { "epoch": 0.43015374736917206, "grad_norm": 0.430664598941803, "learning_rate": 3.515509550648264e-05, "loss": 0.1494, "step": 24117 }, { "epoch": 0.43017158349088574, "grad_norm": 0.33976903557777405, "learning_rate": 3.5153673177407745e-05, "loss": 0.1856, "step": 24118 }, { "epoch": 0.43018941961259943, "grad_norm": 0.28228557109832764, "learning_rate": 3.51522508089737e-05, "loss": 0.1725, "step": 24119 }, { "epoch": 0.4302072557343131, "grad_norm": 0.31134703755378723, "learning_rate": 3.5150828401186034e-05, "loss": 0.1549, "step": 24120 }, { "epoch": 0.4302250918560268, "grad_norm": 0.3672589361667633, "learning_rate": 3.5149405954050254e-05, "loss": 0.1775, "step": 24121 }, { "epoch": 0.4302429279777405, "grad_norm": 0.24515683948993683, "learning_rate": 3.514798346757186e-05, "loss": 0.1848, "step": 24122 }, { "epoch": 0.43026076409945424, "grad_norm": 0.24136663973331451, "learning_rate": 3.514656094175638e-05, "loss": 0.1735, "step": 24123 }, { "epoch": 0.43027860022116793, "grad_norm": 0.2479276806116104, "learning_rate": 3.5145138376609335e-05, "loss": 0.1203, "step": 24124 }, { "epoch": 0.4302964363428816, "grad_norm": 0.23985235393047333, "learning_rate": 3.514371577213622e-05, "loss": 0.1578, "step": 24125 }, { "epoch": 0.4303142724645953, "grad_norm": 0.2573501765727997, "learning_rate": 3.5142293128342566e-05, "loss": 0.107, "step": 24126 }, { "epoch": 0.430332108586309, "grad_norm": 0.27935051918029785, "learning_rate": 3.514087044523387e-05, "loss": 0.1949, "step": 24127 }, { "epoch": 0.4303499447080227, "grad_norm": 0.29461532831192017, "learning_rate": 3.513944772281566e-05, "loss": 0.1878, "step": 24128 }, { "epoch": 0.43036778082973637, "grad_norm": 0.20467537641525269, "learning_rate": 3.5138024961093456e-05, "loss": 0.1501, "step": 24129 }, { "epoch": 0.43038561695145006, "grad_norm": 0.20023281872272491, "learning_rate": 3.5136602160072765e-05, "loss": 0.1275, "step": 24130 }, { "epoch": 0.43040345307316374, "grad_norm": 0.23346124589443207, "learning_rate": 3.5135179319759096e-05, "loss": 0.157, "step": 24131 }, { "epoch": 0.4304212891948775, "grad_norm": 0.22761325538158417, "learning_rate": 3.513375644015797e-05, "loss": 0.1411, "step": 24132 }, { "epoch": 0.4304391253165912, "grad_norm": 0.45452895760536194, "learning_rate": 3.513233352127492e-05, "loss": 0.1637, "step": 24133 }, { "epoch": 0.43045696143830486, "grad_norm": 0.2729017734527588, "learning_rate": 3.513091056311543e-05, "loss": 0.1747, "step": 24134 }, { "epoch": 0.43047479756001855, "grad_norm": 0.19828414916992188, "learning_rate": 3.512948756568504e-05, "loss": 0.1598, "step": 24135 }, { "epoch": 0.43049263368173224, "grad_norm": 0.22937597334384918, "learning_rate": 3.5128064528989255e-05, "loss": 0.142, "step": 24136 }, { "epoch": 0.4305104698034459, "grad_norm": 0.27797451615333557, "learning_rate": 3.512664145303359e-05, "loss": 0.1442, "step": 24137 }, { "epoch": 0.4305283059251596, "grad_norm": 0.2587609589099884, "learning_rate": 3.512521833782357e-05, "loss": 0.1396, "step": 24138 }, { "epoch": 0.4305461420468733, "grad_norm": 0.29430365562438965, "learning_rate": 3.512379518336471e-05, "loss": 0.1195, "step": 24139 }, { "epoch": 0.43056397816858705, "grad_norm": 0.6127382516860962, "learning_rate": 3.512237198966251e-05, "loss": 0.1425, "step": 24140 }, { "epoch": 0.43058181429030074, "grad_norm": 0.26574718952178955, "learning_rate": 3.5120948756722514e-05, "loss": 0.1522, "step": 24141 }, { "epoch": 0.4305996504120144, "grad_norm": 0.4380887448787689, "learning_rate": 3.511952548455021e-05, "loss": 0.1494, "step": 24142 }, { "epoch": 0.4306174865337281, "grad_norm": 0.31454208493232727, "learning_rate": 3.511810217315114e-05, "loss": 0.1933, "step": 24143 }, { "epoch": 0.4306353226554418, "grad_norm": 0.20737364888191223, "learning_rate": 3.5116678822530814e-05, "loss": 0.13, "step": 24144 }, { "epoch": 0.4306531587771555, "grad_norm": 0.2648198902606964, "learning_rate": 3.5115255432694736e-05, "loss": 0.1457, "step": 24145 }, { "epoch": 0.4306709948988692, "grad_norm": 0.18288712203502655, "learning_rate": 3.511383200364845e-05, "loss": 0.1332, "step": 24146 }, { "epoch": 0.43068883102058286, "grad_norm": 0.23093189299106598, "learning_rate": 3.511240853539745e-05, "loss": 0.1393, "step": 24147 }, { "epoch": 0.4307066671422966, "grad_norm": 0.28162264823913574, "learning_rate": 3.511098502794726e-05, "loss": 0.142, "step": 24148 }, { "epoch": 0.4307245032640103, "grad_norm": 0.36396291851997375, "learning_rate": 3.51095614813034e-05, "loss": 0.1876, "step": 24149 }, { "epoch": 0.430742339385724, "grad_norm": 0.280042827129364, "learning_rate": 3.5108137895471385e-05, "loss": 0.1493, "step": 24150 }, { "epoch": 0.43076017550743767, "grad_norm": 0.3160635828971863, "learning_rate": 3.5106714270456745e-05, "loss": 0.2073, "step": 24151 }, { "epoch": 0.43077801162915136, "grad_norm": 0.24016617238521576, "learning_rate": 3.510529060626498e-05, "loss": 0.1015, "step": 24152 }, { "epoch": 0.43079584775086505, "grad_norm": 0.25095823407173157, "learning_rate": 3.510386690290163e-05, "loss": 0.0878, "step": 24153 }, { "epoch": 0.43081368387257873, "grad_norm": 0.33107972145080566, "learning_rate": 3.510244316037219e-05, "loss": 0.2136, "step": 24154 }, { "epoch": 0.4308315199942924, "grad_norm": 0.2084999680519104, "learning_rate": 3.510101937868219e-05, "loss": 0.1279, "step": 24155 }, { "epoch": 0.4308493561160061, "grad_norm": 0.31072115898132324, "learning_rate": 3.5099595557837154e-05, "loss": 0.1266, "step": 24156 }, { "epoch": 0.43086719223771985, "grad_norm": 0.30046194791793823, "learning_rate": 3.5098171697842604e-05, "loss": 0.1735, "step": 24157 }, { "epoch": 0.43088502835943354, "grad_norm": 0.37660181522369385, "learning_rate": 3.509674779870405e-05, "loss": 0.2102, "step": 24158 }, { "epoch": 0.43090286448114723, "grad_norm": 0.26191726326942444, "learning_rate": 3.509532386042702e-05, "loss": 0.1417, "step": 24159 }, { "epoch": 0.4309207006028609, "grad_norm": 0.2836020588874817, "learning_rate": 3.509389988301702e-05, "loss": 0.1806, "step": 24160 }, { "epoch": 0.4309385367245746, "grad_norm": 0.2684483528137207, "learning_rate": 3.509247586647957e-05, "loss": 0.1514, "step": 24161 }, { "epoch": 0.4309563728462883, "grad_norm": 0.28021323680877686, "learning_rate": 3.509105181082021e-05, "loss": 0.1504, "step": 24162 }, { "epoch": 0.430974208968002, "grad_norm": 0.3087092936038971, "learning_rate": 3.5089627716044436e-05, "loss": 0.1399, "step": 24163 }, { "epoch": 0.43099204508971567, "grad_norm": 0.24836251139640808, "learning_rate": 3.50882035821578e-05, "loss": 0.1884, "step": 24164 }, { "epoch": 0.4310098812114294, "grad_norm": 0.24367257952690125, "learning_rate": 3.5086779409165784e-05, "loss": 0.1537, "step": 24165 }, { "epoch": 0.4310277173331431, "grad_norm": 0.2773861587047577, "learning_rate": 3.508535519707393e-05, "loss": 0.1383, "step": 24166 }, { "epoch": 0.4310455534548568, "grad_norm": 0.21887654066085815, "learning_rate": 3.5083930945887764e-05, "loss": 0.1239, "step": 24167 }, { "epoch": 0.4310633895765705, "grad_norm": 0.1991252303123474, "learning_rate": 3.5082506655612796e-05, "loss": 0.1422, "step": 24168 }, { "epoch": 0.43108122569828417, "grad_norm": 0.2893853187561035, "learning_rate": 3.508108232625454e-05, "loss": 0.1292, "step": 24169 }, { "epoch": 0.43109906181999785, "grad_norm": 0.27717188000679016, "learning_rate": 3.507965795781854e-05, "loss": 0.1207, "step": 24170 }, { "epoch": 0.43111689794171154, "grad_norm": 0.25788143277168274, "learning_rate": 3.5078233550310295e-05, "loss": 0.1285, "step": 24171 }, { "epoch": 0.43113473406342523, "grad_norm": 0.22396482527256012, "learning_rate": 3.507680910373534e-05, "loss": 0.1405, "step": 24172 }, { "epoch": 0.4311525701851389, "grad_norm": 0.28769373893737793, "learning_rate": 3.50753846180992e-05, "loss": 0.1434, "step": 24173 }, { "epoch": 0.43117040630685266, "grad_norm": 0.3915373682975769, "learning_rate": 3.507396009340738e-05, "loss": 0.1954, "step": 24174 }, { "epoch": 0.43118824242856635, "grad_norm": 0.2991529107093811, "learning_rate": 3.507253552966542e-05, "loss": 0.133, "step": 24175 }, { "epoch": 0.43120607855028004, "grad_norm": 0.20913895964622498, "learning_rate": 3.507111092687882e-05, "loss": 0.1265, "step": 24176 }, { "epoch": 0.4312239146719937, "grad_norm": 0.2475053369998932, "learning_rate": 3.506968628505312e-05, "loss": 0.1359, "step": 24177 }, { "epoch": 0.4312417507937074, "grad_norm": 0.2712688148021698, "learning_rate": 3.5068261604193844e-05, "loss": 0.1991, "step": 24178 }, { "epoch": 0.4312595869154211, "grad_norm": 0.32760435342788696, "learning_rate": 3.50668368843065e-05, "loss": 0.1482, "step": 24179 }, { "epoch": 0.4312774230371348, "grad_norm": 0.3047506809234619, "learning_rate": 3.5065412125396625e-05, "loss": 0.1899, "step": 24180 }, { "epoch": 0.4312952591588485, "grad_norm": 0.3048126995563507, "learning_rate": 3.506398732746974e-05, "loss": 0.1844, "step": 24181 }, { "epoch": 0.4313130952805622, "grad_norm": 0.25498640537261963, "learning_rate": 3.506256249053136e-05, "loss": 0.1945, "step": 24182 }, { "epoch": 0.4313309314022759, "grad_norm": 0.35354673862457275, "learning_rate": 3.5061137614587005e-05, "loss": 0.1012, "step": 24183 }, { "epoch": 0.4313487675239896, "grad_norm": 0.25961971282958984, "learning_rate": 3.505971269964221e-05, "loss": 0.1693, "step": 24184 }, { "epoch": 0.4313666036457033, "grad_norm": 0.28149473667144775, "learning_rate": 3.50582877457025e-05, "loss": 0.1389, "step": 24185 }, { "epoch": 0.43138443976741697, "grad_norm": 0.2908150851726532, "learning_rate": 3.505686275277339e-05, "loss": 0.1537, "step": 24186 }, { "epoch": 0.43140227588913066, "grad_norm": 0.40581485629081726, "learning_rate": 3.505543772086041e-05, "loss": 0.1668, "step": 24187 }, { "epoch": 0.43142011201084435, "grad_norm": 0.19538424909114838, "learning_rate": 3.505401264996908e-05, "loss": 0.1355, "step": 24188 }, { "epoch": 0.43143794813255804, "grad_norm": 0.282721608877182, "learning_rate": 3.5052587540104916e-05, "loss": 0.1657, "step": 24189 }, { "epoch": 0.4314557842542717, "grad_norm": 0.20696620643138885, "learning_rate": 3.505116239127345e-05, "loss": 0.1284, "step": 24190 }, { "epoch": 0.43147362037598547, "grad_norm": 0.23924341797828674, "learning_rate": 3.504973720348021e-05, "loss": 0.1502, "step": 24191 }, { "epoch": 0.43149145649769916, "grad_norm": 0.23188172280788422, "learning_rate": 3.504831197673072e-05, "loss": 0.1191, "step": 24192 }, { "epoch": 0.43150929261941284, "grad_norm": 0.3421745300292969, "learning_rate": 3.5046886711030505e-05, "loss": 0.1931, "step": 24193 }, { "epoch": 0.43152712874112653, "grad_norm": 0.25157660245895386, "learning_rate": 3.5045461406385085e-05, "loss": 0.1825, "step": 24194 }, { "epoch": 0.4315449648628402, "grad_norm": 0.38536420464515686, "learning_rate": 3.504403606279998e-05, "loss": 0.1672, "step": 24195 }, { "epoch": 0.4315628009845539, "grad_norm": 0.2484949678182602, "learning_rate": 3.504261068028073e-05, "loss": 0.1442, "step": 24196 }, { "epoch": 0.4315806371062676, "grad_norm": 0.21039129793643951, "learning_rate": 3.504118525883286e-05, "loss": 0.1373, "step": 24197 }, { "epoch": 0.4315984732279813, "grad_norm": 0.28027409315109253, "learning_rate": 3.5039759798461866e-05, "loss": 0.1919, "step": 24198 }, { "epoch": 0.431616309349695, "grad_norm": 0.30816641449928284, "learning_rate": 3.503833429917332e-05, "loss": 0.1835, "step": 24199 }, { "epoch": 0.4316341454714087, "grad_norm": 0.34088900685310364, "learning_rate": 3.50369087609727e-05, "loss": 0.1602, "step": 24200 }, { "epoch": 0.4316519815931224, "grad_norm": 0.21640869975090027, "learning_rate": 3.503548318386557e-05, "loss": 0.1039, "step": 24201 }, { "epoch": 0.4316698177148361, "grad_norm": 0.189417764544487, "learning_rate": 3.503405756785743e-05, "loss": 0.1529, "step": 24202 }, { "epoch": 0.4316876538365498, "grad_norm": 0.386018842458725, "learning_rate": 3.503263191295383e-05, "loss": 0.2075, "step": 24203 }, { "epoch": 0.43170548995826347, "grad_norm": 0.2628712058067322, "learning_rate": 3.503120621916027e-05, "loss": 0.1434, "step": 24204 }, { "epoch": 0.43172332607997715, "grad_norm": 0.31899526715278625, "learning_rate": 3.50297804864823e-05, "loss": 0.1411, "step": 24205 }, { "epoch": 0.43174116220169084, "grad_norm": 0.2391701340675354, "learning_rate": 3.502835471492543e-05, "loss": 0.1539, "step": 24206 }, { "epoch": 0.4317589983234046, "grad_norm": 0.25236669182777405, "learning_rate": 3.502692890449521e-05, "loss": 0.1211, "step": 24207 }, { "epoch": 0.4317768344451183, "grad_norm": 0.3210826516151428, "learning_rate": 3.5025503055197126e-05, "loss": 0.1513, "step": 24208 }, { "epoch": 0.43179467056683196, "grad_norm": 0.23876696825027466, "learning_rate": 3.5024077167036746e-05, "loss": 0.1968, "step": 24209 }, { "epoch": 0.43181250668854565, "grad_norm": 0.24284358322620392, "learning_rate": 3.502265124001958e-05, "loss": 0.1156, "step": 24210 }, { "epoch": 0.43183034281025934, "grad_norm": 0.3190374970436096, "learning_rate": 3.502122527415114e-05, "loss": 0.1759, "step": 24211 }, { "epoch": 0.431848178931973, "grad_norm": 0.4405178725719452, "learning_rate": 3.501979926943699e-05, "loss": 0.2023, "step": 24212 }, { "epoch": 0.4318660150536867, "grad_norm": 0.36915671825408936, "learning_rate": 3.501837322588263e-05, "loss": 0.1438, "step": 24213 }, { "epoch": 0.4318838511754004, "grad_norm": 0.26710161566734314, "learning_rate": 3.50169471434936e-05, "loss": 0.1664, "step": 24214 }, { "epoch": 0.4319016872971141, "grad_norm": 0.2517680525779724, "learning_rate": 3.501552102227541e-05, "loss": 0.1789, "step": 24215 }, { "epoch": 0.43191952341882783, "grad_norm": 0.34554535150527954, "learning_rate": 3.501409486223361e-05, "loss": 0.0978, "step": 24216 }, { "epoch": 0.4319373595405415, "grad_norm": 0.26666978001594543, "learning_rate": 3.501266866337372e-05, "loss": 0.1112, "step": 24217 }, { "epoch": 0.4319551956622552, "grad_norm": 0.20696979761123657, "learning_rate": 3.5011242425701266e-05, "loss": 0.1441, "step": 24218 }, { "epoch": 0.4319730317839689, "grad_norm": 0.4186224639415741, "learning_rate": 3.500981614922177e-05, "loss": 0.1665, "step": 24219 }, { "epoch": 0.4319908679056826, "grad_norm": 0.27967795729637146, "learning_rate": 3.500838983394078e-05, "loss": 0.1569, "step": 24220 }, { "epoch": 0.4320087040273963, "grad_norm": 0.5172132849693298, "learning_rate": 3.5006963479863807e-05, "loss": 0.1595, "step": 24221 }, { "epoch": 0.43202654014910996, "grad_norm": 0.2538737952709198, "learning_rate": 3.50055370869964e-05, "loss": 0.151, "step": 24222 }, { "epoch": 0.43204437627082365, "grad_norm": 0.3613035976886749, "learning_rate": 3.500411065534407e-05, "loss": 0.1734, "step": 24223 }, { "epoch": 0.4320622123925374, "grad_norm": 0.31018775701522827, "learning_rate": 3.5002684184912347e-05, "loss": 0.1463, "step": 24224 }, { "epoch": 0.4320800485142511, "grad_norm": 0.2684520184993744, "learning_rate": 3.5001257675706767e-05, "loss": 0.1515, "step": 24225 }, { "epoch": 0.43209788463596477, "grad_norm": 0.27552419900894165, "learning_rate": 3.4999831127732854e-05, "loss": 0.1268, "step": 24226 }, { "epoch": 0.43211572075767846, "grad_norm": 0.2797854542732239, "learning_rate": 3.499840454099615e-05, "loss": 0.1148, "step": 24227 }, { "epoch": 0.43213355687939214, "grad_norm": 0.32367444038391113, "learning_rate": 3.499697791550217e-05, "loss": 0.2238, "step": 24228 }, { "epoch": 0.43215139300110583, "grad_norm": 0.2627507448196411, "learning_rate": 3.499555125125647e-05, "loss": 0.1562, "step": 24229 }, { "epoch": 0.4321692291228195, "grad_norm": 0.2851807773113251, "learning_rate": 3.4994124548264535e-05, "loss": 0.1497, "step": 24230 }, { "epoch": 0.4321870652445332, "grad_norm": 0.2750164866447449, "learning_rate": 3.499269780653193e-05, "loss": 0.1554, "step": 24231 }, { "epoch": 0.4322049013662469, "grad_norm": 0.24700988829135895, "learning_rate": 3.499127102606418e-05, "loss": 0.1287, "step": 24232 }, { "epoch": 0.43222273748796064, "grad_norm": 0.2587621510028839, "learning_rate": 3.4989844206866807e-05, "loss": 0.1795, "step": 24233 }, { "epoch": 0.43224057360967433, "grad_norm": 0.522209107875824, "learning_rate": 3.498841734894535e-05, "loss": 0.1916, "step": 24234 }, { "epoch": 0.432258409731388, "grad_norm": 0.24742832779884338, "learning_rate": 3.498699045230534e-05, "loss": 0.1953, "step": 24235 }, { "epoch": 0.4322762458531017, "grad_norm": 0.2535552978515625, "learning_rate": 3.498556351695231e-05, "loss": 0.1125, "step": 24236 }, { "epoch": 0.4322940819748154, "grad_norm": 0.27269431948661804, "learning_rate": 3.4984136542891776e-05, "loss": 0.1095, "step": 24237 }, { "epoch": 0.4323119180965291, "grad_norm": 0.27531227469444275, "learning_rate": 3.498270953012929e-05, "loss": 0.1251, "step": 24238 }, { "epoch": 0.43232975421824277, "grad_norm": 0.2995506525039673, "learning_rate": 3.498128247867036e-05, "loss": 0.1533, "step": 24239 }, { "epoch": 0.43234759033995646, "grad_norm": 0.21968425810337067, "learning_rate": 3.497985538852055e-05, "loss": 0.1373, "step": 24240 }, { "epoch": 0.4323654264616702, "grad_norm": 0.4896388351917267, "learning_rate": 3.497842825968537e-05, "loss": 0.1508, "step": 24241 }, { "epoch": 0.4323832625833839, "grad_norm": 0.19803155958652496, "learning_rate": 3.497700109217035e-05, "loss": 0.134, "step": 24242 }, { "epoch": 0.4324010987050976, "grad_norm": 0.22787396609783173, "learning_rate": 3.4975573885981024e-05, "loss": 0.168, "step": 24243 }, { "epoch": 0.43241893482681126, "grad_norm": 0.22332033514976501, "learning_rate": 3.497414664112294e-05, "loss": 0.0872, "step": 24244 }, { "epoch": 0.43243677094852495, "grad_norm": 0.42162781953811646, "learning_rate": 3.49727193576016e-05, "loss": 0.2102, "step": 24245 }, { "epoch": 0.43245460707023864, "grad_norm": 0.3007100820541382, "learning_rate": 3.497129203542257e-05, "loss": 0.1605, "step": 24246 }, { "epoch": 0.4324724431919523, "grad_norm": 0.30453598499298096, "learning_rate": 3.4969864674591364e-05, "loss": 0.1426, "step": 24247 }, { "epoch": 0.432490279313666, "grad_norm": 0.24274639785289764, "learning_rate": 3.496843727511352e-05, "loss": 0.1651, "step": 24248 }, { "epoch": 0.43250811543537976, "grad_norm": 0.24301005899906158, "learning_rate": 3.496700983699457e-05, "loss": 0.1783, "step": 24249 }, { "epoch": 0.43252595155709345, "grad_norm": 0.28563907742500305, "learning_rate": 3.496558236024004e-05, "loss": 0.19, "step": 24250 }, { "epoch": 0.43254378767880713, "grad_norm": 0.2543340027332306, "learning_rate": 3.496415484485549e-05, "loss": 0.1495, "step": 24251 }, { "epoch": 0.4325616238005208, "grad_norm": 0.31891199946403503, "learning_rate": 3.496272729084642e-05, "loss": 0.162, "step": 24252 }, { "epoch": 0.4325794599222345, "grad_norm": 0.21086058020591736, "learning_rate": 3.496129969821838e-05, "loss": 0.1361, "step": 24253 }, { "epoch": 0.4325972960439482, "grad_norm": 0.28397929668426514, "learning_rate": 3.4959872066976895e-05, "loss": 0.173, "step": 24254 }, { "epoch": 0.4326151321656619, "grad_norm": 0.2805987596511841, "learning_rate": 3.495844439712752e-05, "loss": 0.1465, "step": 24255 }, { "epoch": 0.4326329682873756, "grad_norm": 0.23381903767585754, "learning_rate": 3.495701668867576e-05, "loss": 0.1534, "step": 24256 }, { "epoch": 0.43265080440908926, "grad_norm": 0.3065488040447235, "learning_rate": 3.495558894162718e-05, "loss": 0.1355, "step": 24257 }, { "epoch": 0.432668640530803, "grad_norm": 0.30921217799186707, "learning_rate": 3.495416115598729e-05, "loss": 0.1901, "step": 24258 }, { "epoch": 0.4326864766525167, "grad_norm": 0.32827669382095337, "learning_rate": 3.4952733331761635e-05, "loss": 0.1771, "step": 24259 }, { "epoch": 0.4327043127742304, "grad_norm": 0.21493420004844666, "learning_rate": 3.4951305468955745e-05, "loss": 0.1005, "step": 24260 }, { "epoch": 0.43272214889594407, "grad_norm": 0.27582138776779175, "learning_rate": 3.494987756757516e-05, "loss": 0.0742, "step": 24261 }, { "epoch": 0.43273998501765776, "grad_norm": 0.4195863902568817, "learning_rate": 3.494844962762541e-05, "loss": 0.1158, "step": 24262 }, { "epoch": 0.43275782113937145, "grad_norm": 0.19916103780269623, "learning_rate": 3.494702164911204e-05, "loss": 0.1196, "step": 24263 }, { "epoch": 0.43277565726108513, "grad_norm": 0.3065303564071655, "learning_rate": 3.4945593632040577e-05, "loss": 0.1989, "step": 24264 }, { "epoch": 0.4327934933827988, "grad_norm": 0.25682297348976135, "learning_rate": 3.4944165576416553e-05, "loss": 0.1704, "step": 24265 }, { "epoch": 0.43281132950451257, "grad_norm": 0.2471446394920349, "learning_rate": 3.494273748224551e-05, "loss": 0.1596, "step": 24266 }, { "epoch": 0.43282916562622625, "grad_norm": 0.27245932817459106, "learning_rate": 3.494130934953298e-05, "loss": 0.1577, "step": 24267 }, { "epoch": 0.43284700174793994, "grad_norm": 0.22331424057483673, "learning_rate": 3.493988117828451e-05, "loss": 0.0874, "step": 24268 }, { "epoch": 0.43286483786965363, "grad_norm": 0.3115536570549011, "learning_rate": 3.493845296850562e-05, "loss": 0.1924, "step": 24269 }, { "epoch": 0.4328826739913673, "grad_norm": 0.2679843604564667, "learning_rate": 3.493702472020186e-05, "loss": 0.1563, "step": 24270 }, { "epoch": 0.432900510113081, "grad_norm": 0.1973322033882141, "learning_rate": 3.4935596433378757e-05, "loss": 0.1501, "step": 24271 }, { "epoch": 0.4329183462347947, "grad_norm": 0.26838403940200806, "learning_rate": 3.493416810804185e-05, "loss": 0.2326, "step": 24272 }, { "epoch": 0.4329361823565084, "grad_norm": 0.2561957836151123, "learning_rate": 3.4932739744196675e-05, "loss": 0.152, "step": 24273 }, { "epoch": 0.43295401847822207, "grad_norm": 0.2538825273513794, "learning_rate": 3.4931311341848774e-05, "loss": 0.1391, "step": 24274 }, { "epoch": 0.4329718545999358, "grad_norm": 0.2524832487106323, "learning_rate": 3.4929882901003674e-05, "loss": 0.1531, "step": 24275 }, { "epoch": 0.4329896907216495, "grad_norm": 0.32115912437438965, "learning_rate": 3.492845442166692e-05, "loss": 0.1712, "step": 24276 }, { "epoch": 0.4330075268433632, "grad_norm": 0.46569839119911194, "learning_rate": 3.4927025903844045e-05, "loss": 0.1601, "step": 24277 }, { "epoch": 0.4330253629650769, "grad_norm": 0.28514623641967773, "learning_rate": 3.49255973475406e-05, "loss": 0.1382, "step": 24278 }, { "epoch": 0.43304319908679056, "grad_norm": 0.24826541543006897, "learning_rate": 3.492416875276211e-05, "loss": 0.1541, "step": 24279 }, { "epoch": 0.43306103520850425, "grad_norm": 0.29415425658226013, "learning_rate": 3.4922740119514106e-05, "loss": 0.1897, "step": 24280 }, { "epoch": 0.43307887133021794, "grad_norm": 0.30140557885169983, "learning_rate": 3.492131144780213e-05, "loss": 0.1122, "step": 24281 }, { "epoch": 0.43309670745193163, "grad_norm": 0.29900699853897095, "learning_rate": 3.491988273763173e-05, "loss": 0.1283, "step": 24282 }, { "epoch": 0.43311454357364537, "grad_norm": 0.29860368371009827, "learning_rate": 3.491845398900844e-05, "loss": 0.1613, "step": 24283 }, { "epoch": 0.43313237969535906, "grad_norm": 0.3095369040966034, "learning_rate": 3.49170252019378e-05, "loss": 0.1299, "step": 24284 }, { "epoch": 0.43315021581707275, "grad_norm": 0.2636614441871643, "learning_rate": 3.491559637642534e-05, "loss": 0.1697, "step": 24285 }, { "epoch": 0.43316805193878644, "grad_norm": 0.23080094158649445, "learning_rate": 3.4914167512476605e-05, "loss": 0.1372, "step": 24286 }, { "epoch": 0.4331858880605001, "grad_norm": 0.20795312523841858, "learning_rate": 3.491273861009713e-05, "loss": 0.1447, "step": 24287 }, { "epoch": 0.4332037241822138, "grad_norm": 0.2338692992925644, "learning_rate": 3.491130966929246e-05, "loss": 0.1398, "step": 24288 }, { "epoch": 0.4332215603039275, "grad_norm": 0.4019908010959625, "learning_rate": 3.490988069006813e-05, "loss": 0.1483, "step": 24289 }, { "epoch": 0.4332393964256412, "grad_norm": 0.3317795693874359, "learning_rate": 3.4908451672429674e-05, "loss": 0.1244, "step": 24290 }, { "epoch": 0.4332572325473549, "grad_norm": 0.3120705783367157, "learning_rate": 3.490702261638265e-05, "loss": 0.1439, "step": 24291 }, { "epoch": 0.4332750686690686, "grad_norm": 0.2482733130455017, "learning_rate": 3.4905593521932575e-05, "loss": 0.1218, "step": 24292 }, { "epoch": 0.4332929047907823, "grad_norm": 0.233924001455307, "learning_rate": 3.4904164389084996e-05, "loss": 0.1367, "step": 24293 }, { "epoch": 0.433310740912496, "grad_norm": 0.32161903381347656, "learning_rate": 3.490273521784546e-05, "loss": 0.1596, "step": 24294 }, { "epoch": 0.4333285770342097, "grad_norm": 0.39300015568733215, "learning_rate": 3.490130600821949e-05, "loss": 0.1387, "step": 24295 }, { "epoch": 0.43334641315592337, "grad_norm": 0.33966004848480225, "learning_rate": 3.4899876760212655e-05, "loss": 0.2224, "step": 24296 }, { "epoch": 0.43336424927763706, "grad_norm": 0.26362356543540955, "learning_rate": 3.489844747383047e-05, "loss": 0.1287, "step": 24297 }, { "epoch": 0.43338208539935075, "grad_norm": 0.3174605071544647, "learning_rate": 3.4897018149078494e-05, "loss": 0.1574, "step": 24298 }, { "epoch": 0.43339992152106444, "grad_norm": 0.20997481048107147, "learning_rate": 3.489558878596224e-05, "loss": 0.1897, "step": 24299 }, { "epoch": 0.4334177576427782, "grad_norm": 0.37025001645088196, "learning_rate": 3.489415938448728e-05, "loss": 0.2124, "step": 24300 }, { "epoch": 0.43343559376449187, "grad_norm": 0.2173960655927658, "learning_rate": 3.489272994465914e-05, "loss": 0.2001, "step": 24301 }, { "epoch": 0.43345342988620555, "grad_norm": 0.2510943114757538, "learning_rate": 3.4891300466483354e-05, "loss": 0.2245, "step": 24302 }, { "epoch": 0.43347126600791924, "grad_norm": 0.22401520609855652, "learning_rate": 3.488987094996547e-05, "loss": 0.154, "step": 24303 }, { "epoch": 0.43348910212963293, "grad_norm": 0.3415440320968628, "learning_rate": 3.4888441395111036e-05, "loss": 0.123, "step": 24304 }, { "epoch": 0.4335069382513466, "grad_norm": 0.27179810404777527, "learning_rate": 3.488701180192559e-05, "loss": 0.1551, "step": 24305 }, { "epoch": 0.4335247743730603, "grad_norm": 0.31604230403900146, "learning_rate": 3.488558217041467e-05, "loss": 0.186, "step": 24306 }, { "epoch": 0.433542610494774, "grad_norm": 0.3194698989391327, "learning_rate": 3.488415250058382e-05, "loss": 0.0988, "step": 24307 }, { "epoch": 0.43356044661648774, "grad_norm": 0.2013579159975052, "learning_rate": 3.4882722792438574e-05, "loss": 0.1288, "step": 24308 }, { "epoch": 0.4335782827382014, "grad_norm": 0.24615490436553955, "learning_rate": 3.4881293045984485e-05, "loss": 0.1627, "step": 24309 }, { "epoch": 0.4335961188599151, "grad_norm": 0.21517011523246765, "learning_rate": 3.487986326122709e-05, "loss": 0.1356, "step": 24310 }, { "epoch": 0.4336139549816288, "grad_norm": 0.34740614891052246, "learning_rate": 3.4878433438171944e-05, "loss": 0.1811, "step": 24311 }, { "epoch": 0.4336317911033425, "grad_norm": 0.28739190101623535, "learning_rate": 3.4877003576824565e-05, "loss": 0.1492, "step": 24312 }, { "epoch": 0.4336496272250562, "grad_norm": 0.3440515995025635, "learning_rate": 3.487557367719051e-05, "loss": 0.173, "step": 24313 }, { "epoch": 0.43366746334676987, "grad_norm": 0.30778539180755615, "learning_rate": 3.487414373927532e-05, "loss": 0.1887, "step": 24314 }, { "epoch": 0.43368529946848355, "grad_norm": 0.2444865107536316, "learning_rate": 3.487271376308454e-05, "loss": 0.1526, "step": 24315 }, { "epoch": 0.43370313559019724, "grad_norm": 0.21706320345401764, "learning_rate": 3.487128374862371e-05, "loss": 0.1047, "step": 24316 }, { "epoch": 0.433720971711911, "grad_norm": 0.2686428129673004, "learning_rate": 3.4869853695898384e-05, "loss": 0.1823, "step": 24317 }, { "epoch": 0.4337388078336247, "grad_norm": 0.24340467154979706, "learning_rate": 3.486842360491409e-05, "loss": 0.125, "step": 24318 }, { "epoch": 0.43375664395533836, "grad_norm": 0.23119409382343292, "learning_rate": 3.486699347567638e-05, "loss": 0.1502, "step": 24319 }, { "epoch": 0.43377448007705205, "grad_norm": 0.24016402661800385, "learning_rate": 3.4865563308190796e-05, "loss": 0.1656, "step": 24320 }, { "epoch": 0.43379231619876574, "grad_norm": 0.2949119806289673, "learning_rate": 3.486413310246287e-05, "loss": 0.1833, "step": 24321 }, { "epoch": 0.4338101523204794, "grad_norm": 0.2694990038871765, "learning_rate": 3.486270285849816e-05, "loss": 0.2416, "step": 24322 }, { "epoch": 0.4338279884421931, "grad_norm": 0.23864035308361053, "learning_rate": 3.486127257630222e-05, "loss": 0.1727, "step": 24323 }, { "epoch": 0.4338458245639068, "grad_norm": 0.32019928097724915, "learning_rate": 3.485984225588058e-05, "loss": 0.1307, "step": 24324 }, { "epoch": 0.43386366068562054, "grad_norm": 0.2653842568397522, "learning_rate": 3.4858411897238774e-05, "loss": 0.1546, "step": 24325 }, { "epoch": 0.43388149680733423, "grad_norm": 0.32241615653038025, "learning_rate": 3.4856981500382365e-05, "loss": 0.1408, "step": 24326 }, { "epoch": 0.4338993329290479, "grad_norm": 0.2626042068004608, "learning_rate": 3.485555106531689e-05, "loss": 0.1716, "step": 24327 }, { "epoch": 0.4339171690507616, "grad_norm": 0.30881282687187195, "learning_rate": 3.48541205920479e-05, "loss": 0.1203, "step": 24328 }, { "epoch": 0.4339350051724753, "grad_norm": 0.28870540857315063, "learning_rate": 3.485269008058093e-05, "loss": 0.197, "step": 24329 }, { "epoch": 0.433952841294189, "grad_norm": 0.36803141236305237, "learning_rate": 3.485125953092153e-05, "loss": 0.1556, "step": 24330 }, { "epoch": 0.4339706774159027, "grad_norm": 0.18751604855060577, "learning_rate": 3.4849828943075245e-05, "loss": 0.0948, "step": 24331 }, { "epoch": 0.43398851353761636, "grad_norm": 0.3452558219432831, "learning_rate": 3.484839831704762e-05, "loss": 0.1405, "step": 24332 }, { "epoch": 0.43400634965933005, "grad_norm": 0.23843905329704285, "learning_rate": 3.484696765284421e-05, "loss": 0.1922, "step": 24333 }, { "epoch": 0.4340241857810438, "grad_norm": 0.23658090829849243, "learning_rate": 3.484553695047054e-05, "loss": 0.1361, "step": 24334 }, { "epoch": 0.4340420219027575, "grad_norm": 0.21467217803001404, "learning_rate": 3.484410620993218e-05, "loss": 0.1519, "step": 24335 }, { "epoch": 0.43405985802447117, "grad_norm": 0.26574647426605225, "learning_rate": 3.484267543123465e-05, "loss": 0.1388, "step": 24336 }, { "epoch": 0.43407769414618486, "grad_norm": 0.30465880036354065, "learning_rate": 3.4841244614383525e-05, "loss": 0.0962, "step": 24337 }, { "epoch": 0.43409553026789854, "grad_norm": 0.2994210720062256, "learning_rate": 3.483981375938433e-05, "loss": 0.1904, "step": 24338 }, { "epoch": 0.43411336638961223, "grad_norm": 0.22072015702724457, "learning_rate": 3.483838286624262e-05, "loss": 0.1376, "step": 24339 }, { "epoch": 0.4341312025113259, "grad_norm": 0.2772163450717926, "learning_rate": 3.483695193496394e-05, "loss": 0.1219, "step": 24340 }, { "epoch": 0.4341490386330396, "grad_norm": 0.2658138871192932, "learning_rate": 3.483552096555384e-05, "loss": 0.1368, "step": 24341 }, { "epoch": 0.43416687475475335, "grad_norm": 0.29843610525131226, "learning_rate": 3.4834089958017854e-05, "loss": 0.1522, "step": 24342 }, { "epoch": 0.43418471087646704, "grad_norm": 0.21727637946605682, "learning_rate": 3.4832658912361544e-05, "loss": 0.1289, "step": 24343 }, { "epoch": 0.4342025469981807, "grad_norm": 0.31400787830352783, "learning_rate": 3.483122782859046e-05, "loss": 0.1837, "step": 24344 }, { "epoch": 0.4342203831198944, "grad_norm": 0.32936444878578186, "learning_rate": 3.482979670671013e-05, "loss": 0.1496, "step": 24345 }, { "epoch": 0.4342382192416081, "grad_norm": 0.31356382369995117, "learning_rate": 3.4828365546726114e-05, "loss": 0.1352, "step": 24346 }, { "epoch": 0.4342560553633218, "grad_norm": 0.32013651728630066, "learning_rate": 3.482693434864396e-05, "loss": 0.1606, "step": 24347 }, { "epoch": 0.4342738914850355, "grad_norm": 0.24525177478790283, "learning_rate": 3.482550311246922e-05, "loss": 0.1681, "step": 24348 }, { "epoch": 0.43429172760674917, "grad_norm": 0.28695160150527954, "learning_rate": 3.482407183820743e-05, "loss": 0.1751, "step": 24349 }, { "epoch": 0.4343095637284629, "grad_norm": 0.27639666199684143, "learning_rate": 3.4822640525864146e-05, "loss": 0.1215, "step": 24350 }, { "epoch": 0.4343273998501766, "grad_norm": 0.28301528096199036, "learning_rate": 3.4821209175444914e-05, "loss": 0.2216, "step": 24351 }, { "epoch": 0.4343452359718903, "grad_norm": 0.27372410893440247, "learning_rate": 3.481977778695529e-05, "loss": 0.113, "step": 24352 }, { "epoch": 0.434363072093604, "grad_norm": 0.2885031998157501, "learning_rate": 3.48183463604008e-05, "loss": 0.1431, "step": 24353 }, { "epoch": 0.43438090821531766, "grad_norm": 0.24308699369430542, "learning_rate": 3.4816914895787026e-05, "loss": 0.1446, "step": 24354 }, { "epoch": 0.43439874433703135, "grad_norm": 0.2509942650794983, "learning_rate": 3.481548339311948e-05, "loss": 0.1755, "step": 24355 }, { "epoch": 0.43441658045874504, "grad_norm": 0.19815278053283691, "learning_rate": 3.481405185240375e-05, "loss": 0.1075, "step": 24356 }, { "epoch": 0.4344344165804587, "grad_norm": 0.30510663986206055, "learning_rate": 3.481262027364536e-05, "loss": 0.1764, "step": 24357 }, { "epoch": 0.4344522527021724, "grad_norm": 0.26673340797424316, "learning_rate": 3.4811188656849856e-05, "loss": 0.1731, "step": 24358 }, { "epoch": 0.43447008882388616, "grad_norm": 0.231350839138031, "learning_rate": 3.4809757002022805e-05, "loss": 0.1485, "step": 24359 }, { "epoch": 0.43448792494559985, "grad_norm": 0.24012552201747894, "learning_rate": 3.480832530916974e-05, "loss": 0.1545, "step": 24360 }, { "epoch": 0.43450576106731353, "grad_norm": 0.28711503744125366, "learning_rate": 3.480689357829623e-05, "loss": 0.1513, "step": 24361 }, { "epoch": 0.4345235971890272, "grad_norm": 0.18398378789424896, "learning_rate": 3.48054618094078e-05, "loss": 0.1048, "step": 24362 }, { "epoch": 0.4345414333107409, "grad_norm": 0.23202653229236603, "learning_rate": 3.480403000251002e-05, "loss": 0.1233, "step": 24363 }, { "epoch": 0.4345592694324546, "grad_norm": 0.2332935333251953, "learning_rate": 3.480259815760843e-05, "loss": 0.1565, "step": 24364 }, { "epoch": 0.4345771055541683, "grad_norm": 0.24581611156463623, "learning_rate": 3.480116627470859e-05, "loss": 0.1759, "step": 24365 }, { "epoch": 0.434594941675882, "grad_norm": 0.21847109496593475, "learning_rate": 3.479973435381604e-05, "loss": 0.1049, "step": 24366 }, { "epoch": 0.4346127777975957, "grad_norm": 0.35353147983551025, "learning_rate": 3.4798302394936336e-05, "loss": 0.1559, "step": 24367 }, { "epoch": 0.4346306139193094, "grad_norm": 0.2123904675245285, "learning_rate": 3.479687039807503e-05, "loss": 0.1013, "step": 24368 }, { "epoch": 0.4346484500410231, "grad_norm": 0.2722563147544861, "learning_rate": 3.479543836323767e-05, "loss": 0.1193, "step": 24369 }, { "epoch": 0.4346662861627368, "grad_norm": 0.36920273303985596, "learning_rate": 3.4794006290429806e-05, "loss": 0.2087, "step": 24370 }, { "epoch": 0.43468412228445047, "grad_norm": 0.2945559024810791, "learning_rate": 3.479257417965699e-05, "loss": 0.161, "step": 24371 }, { "epoch": 0.43470195840616416, "grad_norm": 0.45980262756347656, "learning_rate": 3.4791142030924774e-05, "loss": 0.1603, "step": 24372 }, { "epoch": 0.43471979452787785, "grad_norm": 0.2901257276535034, "learning_rate": 3.478970984423871e-05, "loss": 0.157, "step": 24373 }, { "epoch": 0.43473763064959153, "grad_norm": 0.27465295791625977, "learning_rate": 3.4788277619604354e-05, "loss": 0.1617, "step": 24374 }, { "epoch": 0.4347554667713052, "grad_norm": 0.28169938921928406, "learning_rate": 3.478684535702725e-05, "loss": 0.1469, "step": 24375 }, { "epoch": 0.43477330289301896, "grad_norm": 0.311553955078125, "learning_rate": 3.478541305651295e-05, "loss": 0.1776, "step": 24376 }, { "epoch": 0.43479113901473265, "grad_norm": 0.253508061170578, "learning_rate": 3.4783980718067014e-05, "loss": 0.1569, "step": 24377 }, { "epoch": 0.43480897513644634, "grad_norm": 0.31083858013153076, "learning_rate": 3.478254834169498e-05, "loss": 0.1865, "step": 24378 }, { "epoch": 0.43482681125816003, "grad_norm": 0.2461954951286316, "learning_rate": 3.478111592740242e-05, "loss": 0.107, "step": 24379 }, { "epoch": 0.4348446473798737, "grad_norm": 0.20668452978134155, "learning_rate": 3.477968347519488e-05, "loss": 0.1171, "step": 24380 }, { "epoch": 0.4348624835015874, "grad_norm": 0.2546831965446472, "learning_rate": 3.477825098507789e-05, "loss": 0.1533, "step": 24381 }, { "epoch": 0.4348803196233011, "grad_norm": 0.4030230939388275, "learning_rate": 3.4776818457057045e-05, "loss": 0.128, "step": 24382 }, { "epoch": 0.4348981557450148, "grad_norm": 0.26909109950065613, "learning_rate": 3.477538589113786e-05, "loss": 0.1504, "step": 24383 }, { "epoch": 0.4349159918667285, "grad_norm": 0.22686009109020233, "learning_rate": 3.477395328732591e-05, "loss": 0.1434, "step": 24384 }, { "epoch": 0.4349338279884422, "grad_norm": 0.22543098032474518, "learning_rate": 3.477252064562674e-05, "loss": 0.1363, "step": 24385 }, { "epoch": 0.4349516641101559, "grad_norm": 0.31914573907852173, "learning_rate": 3.4771087966045895e-05, "loss": 0.1374, "step": 24386 }, { "epoch": 0.4349695002318696, "grad_norm": 0.37344828248023987, "learning_rate": 3.476965524858895e-05, "loss": 0.2053, "step": 24387 }, { "epoch": 0.4349873363535833, "grad_norm": 0.31551864743232727, "learning_rate": 3.476822249326144e-05, "loss": 0.1464, "step": 24388 }, { "epoch": 0.43500517247529696, "grad_norm": 0.33455032110214233, "learning_rate": 3.476678970006893e-05, "loss": 0.1533, "step": 24389 }, { "epoch": 0.43502300859701065, "grad_norm": 0.2222704142332077, "learning_rate": 3.476535686901697e-05, "loss": 0.1452, "step": 24390 }, { "epoch": 0.43504084471872434, "grad_norm": 0.26246216893196106, "learning_rate": 3.476392400011112e-05, "loss": 0.1138, "step": 24391 }, { "epoch": 0.43505868084043803, "grad_norm": 0.26675572991371155, "learning_rate": 3.476249109335691e-05, "loss": 0.121, "step": 24392 }, { "epoch": 0.43507651696215177, "grad_norm": 0.31257131695747375, "learning_rate": 3.476105814875993e-05, "loss": 0.1382, "step": 24393 }, { "epoch": 0.43509435308386546, "grad_norm": 0.2697868347167969, "learning_rate": 3.475962516632571e-05, "loss": 0.1276, "step": 24394 }, { "epoch": 0.43511218920557915, "grad_norm": 0.22796927392482758, "learning_rate": 3.475819214605981e-05, "loss": 0.1111, "step": 24395 }, { "epoch": 0.43513002532729284, "grad_norm": 0.3646264672279358, "learning_rate": 3.4756759087967794e-05, "loss": 0.1079, "step": 24396 }, { "epoch": 0.4351478614490065, "grad_norm": 0.30447709560394287, "learning_rate": 3.4755325992055204e-05, "loss": 0.1665, "step": 24397 }, { "epoch": 0.4351656975707202, "grad_norm": 0.44530022144317627, "learning_rate": 3.47538928583276e-05, "loss": 0.1765, "step": 24398 }, { "epoch": 0.4351835336924339, "grad_norm": 0.21506398916244507, "learning_rate": 3.475245968679054e-05, "loss": 0.1671, "step": 24399 }, { "epoch": 0.4352013698141476, "grad_norm": 0.1718909740447998, "learning_rate": 3.475102647744958e-05, "loss": 0.1083, "step": 24400 }, { "epoch": 0.43521920593586133, "grad_norm": 0.3857828378677368, "learning_rate": 3.4749593230310275e-05, "loss": 0.1162, "step": 24401 }, { "epoch": 0.435237042057575, "grad_norm": 0.31044501066207886, "learning_rate": 3.474815994537818e-05, "loss": 0.1883, "step": 24402 }, { "epoch": 0.4352548781792887, "grad_norm": 0.2804064452648163, "learning_rate": 3.474672662265884e-05, "loss": 0.144, "step": 24403 }, { "epoch": 0.4352727143010024, "grad_norm": 0.24959516525268555, "learning_rate": 3.474529326215783e-05, "loss": 0.1664, "step": 24404 }, { "epoch": 0.4352905504227161, "grad_norm": 0.29151400923728943, "learning_rate": 3.4743859863880696e-05, "loss": 0.1422, "step": 24405 }, { "epoch": 0.43530838654442977, "grad_norm": 0.2321368306875229, "learning_rate": 3.474242642783299e-05, "loss": 0.1187, "step": 24406 }, { "epoch": 0.43532622266614346, "grad_norm": 0.29965221881866455, "learning_rate": 3.474099295402028e-05, "loss": 0.1111, "step": 24407 }, { "epoch": 0.43534405878785715, "grad_norm": 0.2102593332529068, "learning_rate": 3.4739559442448124e-05, "loss": 0.1484, "step": 24408 }, { "epoch": 0.4353618949095709, "grad_norm": 0.3023035228252411, "learning_rate": 3.4738125893122064e-05, "loss": 0.1514, "step": 24409 }, { "epoch": 0.4353797310312846, "grad_norm": 0.15038491785526276, "learning_rate": 3.4736692306047655e-05, "loss": 0.1222, "step": 24410 }, { "epoch": 0.43539756715299827, "grad_norm": 0.2755349278450012, "learning_rate": 3.473525868123048e-05, "loss": 0.1637, "step": 24411 }, { "epoch": 0.43541540327471195, "grad_norm": 0.32242774963378906, "learning_rate": 3.473382501867608e-05, "loss": 0.1833, "step": 24412 }, { "epoch": 0.43543323939642564, "grad_norm": 0.2398013025522232, "learning_rate": 3.4732391318389997e-05, "loss": 0.1611, "step": 24413 }, { "epoch": 0.43545107551813933, "grad_norm": 0.21700620651245117, "learning_rate": 3.473095758037781e-05, "loss": 0.1624, "step": 24414 }, { "epoch": 0.435468911639853, "grad_norm": 0.19714149832725525, "learning_rate": 3.472952380464508e-05, "loss": 0.1341, "step": 24415 }, { "epoch": 0.4354867477615667, "grad_norm": 0.4227885603904724, "learning_rate": 3.4728089991197345e-05, "loss": 0.1762, "step": 24416 }, { "epoch": 0.4355045838832804, "grad_norm": 0.24466878175735474, "learning_rate": 3.4726656140040174e-05, "loss": 0.1636, "step": 24417 }, { "epoch": 0.43552242000499414, "grad_norm": 0.36232542991638184, "learning_rate": 3.472522225117912e-05, "loss": 0.1802, "step": 24418 }, { "epoch": 0.4355402561267078, "grad_norm": 0.2487695813179016, "learning_rate": 3.4723788324619754e-05, "loss": 0.1443, "step": 24419 }, { "epoch": 0.4355580922484215, "grad_norm": 0.2208571434020996, "learning_rate": 3.4722354360367625e-05, "loss": 0.1516, "step": 24420 }, { "epoch": 0.4355759283701352, "grad_norm": 0.3345034718513489, "learning_rate": 3.4720920358428297e-05, "loss": 0.1344, "step": 24421 }, { "epoch": 0.4355937644918489, "grad_norm": 0.21832901239395142, "learning_rate": 3.471948631880732e-05, "loss": 0.1453, "step": 24422 }, { "epoch": 0.4356116006135626, "grad_norm": 0.38871943950653076, "learning_rate": 3.471805224151025e-05, "loss": 0.1545, "step": 24423 }, { "epoch": 0.43562943673527627, "grad_norm": 0.23371315002441406, "learning_rate": 3.4716618126542665e-05, "loss": 0.1848, "step": 24424 }, { "epoch": 0.43564727285698995, "grad_norm": 0.30884552001953125, "learning_rate": 3.47151839739101e-05, "loss": 0.1675, "step": 24425 }, { "epoch": 0.4356651089787037, "grad_norm": 0.297853946685791, "learning_rate": 3.471374978361813e-05, "loss": 0.1266, "step": 24426 }, { "epoch": 0.4356829451004174, "grad_norm": 0.48763370513916016, "learning_rate": 3.471231555567231e-05, "loss": 0.1414, "step": 24427 }, { "epoch": 0.4357007812221311, "grad_norm": 0.21908225119113922, "learning_rate": 3.471088129007821e-05, "loss": 0.1142, "step": 24428 }, { "epoch": 0.43571861734384476, "grad_norm": 0.33073118329048157, "learning_rate": 3.470944698684137e-05, "loss": 0.1675, "step": 24429 }, { "epoch": 0.43573645346555845, "grad_norm": 0.2916359603404999, "learning_rate": 3.470801264596737e-05, "loss": 0.1733, "step": 24430 }, { "epoch": 0.43575428958727214, "grad_norm": 0.3057893216609955, "learning_rate": 3.470657826746175e-05, "loss": 0.1968, "step": 24431 }, { "epoch": 0.4357721257089858, "grad_norm": 0.19366583228111267, "learning_rate": 3.4705143851330086e-05, "loss": 0.1315, "step": 24432 }, { "epoch": 0.4357899618306995, "grad_norm": 0.2801244556903839, "learning_rate": 3.470370939757793e-05, "loss": 0.1529, "step": 24433 }, { "epoch": 0.4358077979524132, "grad_norm": 0.25890833139419556, "learning_rate": 3.470227490621084e-05, "loss": 0.157, "step": 24434 }, { "epoch": 0.43582563407412694, "grad_norm": 0.4473634362220764, "learning_rate": 3.470084037723439e-05, "loss": 0.105, "step": 24435 }, { "epoch": 0.43584347019584063, "grad_norm": 0.2150295078754425, "learning_rate": 3.469940581065413e-05, "loss": 0.1579, "step": 24436 }, { "epoch": 0.4358613063175543, "grad_norm": 0.2542577087879181, "learning_rate": 3.4697971206475624e-05, "loss": 0.1188, "step": 24437 }, { "epoch": 0.435879142439268, "grad_norm": 0.21434485912322998, "learning_rate": 3.4696536564704425e-05, "loss": 0.1524, "step": 24438 }, { "epoch": 0.4358969785609817, "grad_norm": 0.24575546383857727, "learning_rate": 3.4695101885346106e-05, "loss": 0.1143, "step": 24439 }, { "epoch": 0.4359148146826954, "grad_norm": 0.2871822118759155, "learning_rate": 3.4693667168406216e-05, "loss": 0.1465, "step": 24440 }, { "epoch": 0.43593265080440907, "grad_norm": 0.25435957312583923, "learning_rate": 3.469223241389034e-05, "loss": 0.1464, "step": 24441 }, { "epoch": 0.43595048692612276, "grad_norm": 0.2695740759372711, "learning_rate": 3.469079762180401e-05, "loss": 0.1536, "step": 24442 }, { "epoch": 0.4359683230478365, "grad_norm": 0.2845589220523834, "learning_rate": 3.4689362792152805e-05, "loss": 0.1765, "step": 24443 }, { "epoch": 0.4359861591695502, "grad_norm": 0.24259649217128754, "learning_rate": 3.468792792494228e-05, "loss": 0.143, "step": 24444 }, { "epoch": 0.4360039952912639, "grad_norm": 0.44644781947135925, "learning_rate": 3.4686493020178014e-05, "loss": 0.1331, "step": 24445 }, { "epoch": 0.43602183141297757, "grad_norm": 0.22415953874588013, "learning_rate": 3.468505807786554e-05, "loss": 0.1479, "step": 24446 }, { "epoch": 0.43603966753469126, "grad_norm": 0.25826209783554077, "learning_rate": 3.4683623098010444e-05, "loss": 0.1501, "step": 24447 }, { "epoch": 0.43605750365640494, "grad_norm": 0.24463853240013123, "learning_rate": 3.468218808061828e-05, "loss": 0.163, "step": 24448 }, { "epoch": 0.43607533977811863, "grad_norm": 0.28812387585639954, "learning_rate": 3.46807530256946e-05, "loss": 0.1986, "step": 24449 }, { "epoch": 0.4360931758998323, "grad_norm": 0.20546510815620422, "learning_rate": 3.4679317933244996e-05, "loss": 0.1203, "step": 24450 }, { "epoch": 0.436111012021546, "grad_norm": 0.22608225047588348, "learning_rate": 3.4677882803274994e-05, "loss": 0.1226, "step": 24451 }, { "epoch": 0.43612884814325975, "grad_norm": 0.27659979462623596, "learning_rate": 3.4676447635790195e-05, "loss": 0.1158, "step": 24452 }, { "epoch": 0.43614668426497344, "grad_norm": 0.4154391884803772, "learning_rate": 3.467501243079613e-05, "loss": 0.1683, "step": 24453 }, { "epoch": 0.4361645203866871, "grad_norm": 0.2342330664396286, "learning_rate": 3.4673577188298375e-05, "loss": 0.1404, "step": 24454 }, { "epoch": 0.4361823565084008, "grad_norm": 0.2267889678478241, "learning_rate": 3.4672141908302495e-05, "loss": 0.0653, "step": 24455 }, { "epoch": 0.4362001926301145, "grad_norm": 0.3321382999420166, "learning_rate": 3.467070659081405e-05, "loss": 0.164, "step": 24456 }, { "epoch": 0.4362180287518282, "grad_norm": 0.2762233018875122, "learning_rate": 3.4669271235838615e-05, "loss": 0.1262, "step": 24457 }, { "epoch": 0.4362358648735419, "grad_norm": 0.24827255308628082, "learning_rate": 3.466783584338174e-05, "loss": 0.148, "step": 24458 }, { "epoch": 0.43625370099525557, "grad_norm": 0.39366522431373596, "learning_rate": 3.466640041344899e-05, "loss": 0.2163, "step": 24459 }, { "epoch": 0.4362715371169693, "grad_norm": 0.24751730263233185, "learning_rate": 3.4664964946045945e-05, "loss": 0.1344, "step": 24460 }, { "epoch": 0.436289373238683, "grad_norm": 0.3638309836387634, "learning_rate": 3.4663529441178144e-05, "loss": 0.125, "step": 24461 }, { "epoch": 0.4363072093603967, "grad_norm": 0.2329244762659073, "learning_rate": 3.4662093898851166e-05, "loss": 0.1616, "step": 24462 }, { "epoch": 0.4363250454821104, "grad_norm": 0.2134053260087967, "learning_rate": 3.466065831907058e-05, "loss": 0.1216, "step": 24463 }, { "epoch": 0.43634288160382406, "grad_norm": 0.36467444896698, "learning_rate": 3.465922270184195e-05, "loss": 0.1302, "step": 24464 }, { "epoch": 0.43636071772553775, "grad_norm": 0.4559136927127838, "learning_rate": 3.465778704717083e-05, "loss": 0.1351, "step": 24465 }, { "epoch": 0.43637855384725144, "grad_norm": 0.2538378834724426, "learning_rate": 3.4656351355062796e-05, "loss": 0.127, "step": 24466 }, { "epoch": 0.4363963899689651, "grad_norm": 0.2677273452281952, "learning_rate": 3.46549156255234e-05, "loss": 0.103, "step": 24467 }, { "epoch": 0.43641422609067887, "grad_norm": 0.2955908179283142, "learning_rate": 3.4653479858558225e-05, "loss": 0.2337, "step": 24468 }, { "epoch": 0.43643206221239256, "grad_norm": 0.2496703714132309, "learning_rate": 3.4652044054172826e-05, "loss": 0.1097, "step": 24469 }, { "epoch": 0.43644989833410625, "grad_norm": 0.306892454624176, "learning_rate": 3.465060821237277e-05, "loss": 0.1421, "step": 24470 }, { "epoch": 0.43646773445581993, "grad_norm": 0.26149633526802063, "learning_rate": 3.464917233316363e-05, "loss": 0.1664, "step": 24471 }, { "epoch": 0.4364855705775336, "grad_norm": 0.2137775719165802, "learning_rate": 3.464773641655096e-05, "loss": 0.1328, "step": 24472 }, { "epoch": 0.4365034066992473, "grad_norm": 0.269750714302063, "learning_rate": 3.464630046254033e-05, "loss": 0.1248, "step": 24473 }, { "epoch": 0.436521242820961, "grad_norm": 0.26732224225997925, "learning_rate": 3.464486447113731e-05, "loss": 0.1183, "step": 24474 }, { "epoch": 0.4365390789426747, "grad_norm": 0.2814632058143616, "learning_rate": 3.464342844234746e-05, "loss": 0.1526, "step": 24475 }, { "epoch": 0.4365569150643884, "grad_norm": 0.20892545580863953, "learning_rate": 3.4641992376176354e-05, "loss": 0.1411, "step": 24476 }, { "epoch": 0.4365747511861021, "grad_norm": 0.2442842721939087, "learning_rate": 3.4640556272629556e-05, "loss": 0.1517, "step": 24477 }, { "epoch": 0.4365925873078158, "grad_norm": 0.26687225699424744, "learning_rate": 3.463912013171263e-05, "loss": 0.1477, "step": 24478 }, { "epoch": 0.4366104234295295, "grad_norm": 0.2707096338272095, "learning_rate": 3.463768395343114e-05, "loss": 0.1751, "step": 24479 }, { "epoch": 0.4366282595512432, "grad_norm": 0.2348027527332306, "learning_rate": 3.4636247737790675e-05, "loss": 0.0858, "step": 24480 }, { "epoch": 0.43664609567295687, "grad_norm": 0.24467630684375763, "learning_rate": 3.463481148479677e-05, "loss": 0.1394, "step": 24481 }, { "epoch": 0.43666393179467056, "grad_norm": 0.25188347697257996, "learning_rate": 3.463337519445501e-05, "loss": 0.1709, "step": 24482 }, { "epoch": 0.43668176791638424, "grad_norm": 0.29841721057891846, "learning_rate": 3.4631938866770956e-05, "loss": 0.1824, "step": 24483 }, { "epoch": 0.43669960403809793, "grad_norm": 0.2666328251361847, "learning_rate": 3.46305025017502e-05, "loss": 0.1518, "step": 24484 }, { "epoch": 0.4367174401598117, "grad_norm": 0.2557719945907593, "learning_rate": 3.462906609939826e-05, "loss": 0.1764, "step": 24485 }, { "epoch": 0.43673527628152536, "grad_norm": 0.31345266103744507, "learning_rate": 3.462762965972076e-05, "loss": 0.1676, "step": 24486 }, { "epoch": 0.43675311240323905, "grad_norm": 0.2803944945335388, "learning_rate": 3.462619318272323e-05, "loss": 0.1516, "step": 24487 }, { "epoch": 0.43677094852495274, "grad_norm": 0.26661330461502075, "learning_rate": 3.462475666841126e-05, "loss": 0.1724, "step": 24488 }, { "epoch": 0.43678878464666643, "grad_norm": 0.22016002237796783, "learning_rate": 3.46233201167904e-05, "loss": 0.1776, "step": 24489 }, { "epoch": 0.4368066207683801, "grad_norm": 0.35202881693840027, "learning_rate": 3.4621883527866225e-05, "loss": 0.1715, "step": 24490 }, { "epoch": 0.4368244568900938, "grad_norm": 0.2893364131450653, "learning_rate": 3.4620446901644316e-05, "loss": 0.1546, "step": 24491 }, { "epoch": 0.4368422930118075, "grad_norm": 0.321759968996048, "learning_rate": 3.4619010238130224e-05, "loss": 0.1142, "step": 24492 }, { "epoch": 0.4368601291335212, "grad_norm": 0.4297880232334137, "learning_rate": 3.461757353732953e-05, "loss": 0.1349, "step": 24493 }, { "epoch": 0.4368779652552349, "grad_norm": 0.2464224398136139, "learning_rate": 3.46161367992478e-05, "loss": 0.1487, "step": 24494 }, { "epoch": 0.4368958013769486, "grad_norm": 0.29403191804885864, "learning_rate": 3.46147000238906e-05, "loss": 0.1272, "step": 24495 }, { "epoch": 0.4369136374986623, "grad_norm": 0.30291152000427246, "learning_rate": 3.4613263211263503e-05, "loss": 0.0841, "step": 24496 }, { "epoch": 0.436931473620376, "grad_norm": 0.2908466160297394, "learning_rate": 3.461182636137208e-05, "loss": 0.1706, "step": 24497 }, { "epoch": 0.4369493097420897, "grad_norm": 0.3156099319458008, "learning_rate": 3.4610389474221885e-05, "loss": 0.1646, "step": 24498 }, { "epoch": 0.43696714586380336, "grad_norm": 0.20026199519634247, "learning_rate": 3.460895254981852e-05, "loss": 0.1395, "step": 24499 }, { "epoch": 0.43698498198551705, "grad_norm": 0.32203853130340576, "learning_rate": 3.460751558816753e-05, "loss": 0.142, "step": 24500 }, { "epoch": 0.43700281810723074, "grad_norm": 0.32851582765579224, "learning_rate": 3.4606078589274486e-05, "loss": 0.1438, "step": 24501 }, { "epoch": 0.4370206542289445, "grad_norm": 0.21360023319721222, "learning_rate": 3.460464155314497e-05, "loss": 0.1638, "step": 24502 }, { "epoch": 0.43703849035065817, "grad_norm": 0.3464330732822418, "learning_rate": 3.460320447978453e-05, "loss": 0.1225, "step": 24503 }, { "epoch": 0.43705632647237186, "grad_norm": 0.18150708079338074, "learning_rate": 3.460176736919877e-05, "loss": 0.144, "step": 24504 }, { "epoch": 0.43707416259408555, "grad_norm": 0.3969244360923767, "learning_rate": 3.460033022139324e-05, "loss": 0.246, "step": 24505 }, { "epoch": 0.43709199871579923, "grad_norm": 0.26365572214126587, "learning_rate": 3.459889303637351e-05, "loss": 0.1753, "step": 24506 }, { "epoch": 0.4371098348375129, "grad_norm": 0.3229334354400635, "learning_rate": 3.4597455814145164e-05, "loss": 0.1668, "step": 24507 }, { "epoch": 0.4371276709592266, "grad_norm": 0.2189752757549286, "learning_rate": 3.459601855471376e-05, "loss": 0.1523, "step": 24508 }, { "epoch": 0.4371455070809403, "grad_norm": 0.31201139092445374, "learning_rate": 3.459458125808487e-05, "loss": 0.1672, "step": 24509 }, { "epoch": 0.43716334320265404, "grad_norm": 0.2635055482387543, "learning_rate": 3.4593143924264066e-05, "loss": 0.1342, "step": 24510 }, { "epoch": 0.43718117932436773, "grad_norm": 0.2361844927072525, "learning_rate": 3.459170655325692e-05, "loss": 0.1145, "step": 24511 }, { "epoch": 0.4371990154460814, "grad_norm": 0.24420146644115448, "learning_rate": 3.4590269145069024e-05, "loss": 0.1457, "step": 24512 }, { "epoch": 0.4372168515677951, "grad_norm": 0.18792513012886047, "learning_rate": 3.458883169970592e-05, "loss": 0.1066, "step": 24513 }, { "epoch": 0.4372346876895088, "grad_norm": 0.24578148126602173, "learning_rate": 3.4587394217173194e-05, "loss": 0.1422, "step": 24514 }, { "epoch": 0.4372525238112225, "grad_norm": 0.2214363068342209, "learning_rate": 3.458595669747643e-05, "loss": 0.1336, "step": 24515 }, { "epoch": 0.43727035993293617, "grad_norm": 0.28363943099975586, "learning_rate": 3.458451914062117e-05, "loss": 0.1784, "step": 24516 }, { "epoch": 0.43728819605464986, "grad_norm": 0.4099245071411133, "learning_rate": 3.4583081546613006e-05, "loss": 0.1431, "step": 24517 }, { "epoch": 0.43730603217636355, "grad_norm": 0.23216953873634338, "learning_rate": 3.458164391545751e-05, "loss": 0.1441, "step": 24518 }, { "epoch": 0.4373238682980773, "grad_norm": 0.23158235847949982, "learning_rate": 3.458020624716025e-05, "loss": 0.152, "step": 24519 }, { "epoch": 0.437341704419791, "grad_norm": 0.3129504919052124, "learning_rate": 3.457876854172681e-05, "loss": 0.1511, "step": 24520 }, { "epoch": 0.43735954054150467, "grad_norm": 0.27938613295555115, "learning_rate": 3.457733079916275e-05, "loss": 0.101, "step": 24521 }, { "epoch": 0.43737737666321835, "grad_norm": 0.23409724235534668, "learning_rate": 3.457589301947364e-05, "loss": 0.1018, "step": 24522 }, { "epoch": 0.43739521278493204, "grad_norm": 0.32318681478500366, "learning_rate": 3.457445520266507e-05, "loss": 0.1863, "step": 24523 }, { "epoch": 0.43741304890664573, "grad_norm": 0.2505429685115814, "learning_rate": 3.45730173487426e-05, "loss": 0.1487, "step": 24524 }, { "epoch": 0.4374308850283594, "grad_norm": 0.26734769344329834, "learning_rate": 3.457157945771182e-05, "loss": 0.1433, "step": 24525 }, { "epoch": 0.4374487211500731, "grad_norm": 0.2437477856874466, "learning_rate": 3.457014152957828e-05, "loss": 0.1376, "step": 24526 }, { "epoch": 0.43746655727178685, "grad_norm": 0.29736319184303284, "learning_rate": 3.456870356434757e-05, "loss": 0.1286, "step": 24527 }, { "epoch": 0.43748439339350054, "grad_norm": 0.31667613983154297, "learning_rate": 3.456726556202526e-05, "loss": 0.1752, "step": 24528 }, { "epoch": 0.4375022295152142, "grad_norm": 0.2735440731048584, "learning_rate": 3.456582752261693e-05, "loss": 0.1444, "step": 24529 }, { "epoch": 0.4375200656369279, "grad_norm": 0.2536827027797699, "learning_rate": 3.4564389446128134e-05, "loss": 0.1595, "step": 24530 }, { "epoch": 0.4375379017586416, "grad_norm": 0.26457610726356506, "learning_rate": 3.456295133256447e-05, "loss": 0.1388, "step": 24531 }, { "epoch": 0.4375557378803553, "grad_norm": 0.1932777315378189, "learning_rate": 3.456151318193151e-05, "loss": 0.1535, "step": 24532 }, { "epoch": 0.437573574002069, "grad_norm": 0.3857862651348114, "learning_rate": 3.4560074994234816e-05, "loss": 0.1309, "step": 24533 }, { "epoch": 0.43759141012378266, "grad_norm": 0.18543091416358948, "learning_rate": 3.455863676947997e-05, "loss": 0.1318, "step": 24534 }, { "epoch": 0.43760924624549635, "grad_norm": 0.3003429174423218, "learning_rate": 3.455719850767254e-05, "loss": 0.1418, "step": 24535 }, { "epoch": 0.4376270823672101, "grad_norm": 0.3420927822589874, "learning_rate": 3.455576020881812e-05, "loss": 0.1492, "step": 24536 }, { "epoch": 0.4376449184889238, "grad_norm": 0.2572770416736603, "learning_rate": 3.4554321872922265e-05, "loss": 0.1536, "step": 24537 }, { "epoch": 0.43766275461063747, "grad_norm": 0.2656768262386322, "learning_rate": 3.455288349999056e-05, "loss": 0.2012, "step": 24538 }, { "epoch": 0.43768059073235116, "grad_norm": 0.2661580741405487, "learning_rate": 3.455144509002857e-05, "loss": 0.1357, "step": 24539 }, { "epoch": 0.43769842685406485, "grad_norm": 0.29712095856666565, "learning_rate": 3.455000664304189e-05, "loss": 0.1373, "step": 24540 }, { "epoch": 0.43771626297577854, "grad_norm": 0.2587900757789612, "learning_rate": 3.4548568159036096e-05, "loss": 0.0936, "step": 24541 }, { "epoch": 0.4377340990974922, "grad_norm": 0.3766099512577057, "learning_rate": 3.454712963801674e-05, "loss": 0.1425, "step": 24542 }, { "epoch": 0.4377519352192059, "grad_norm": 0.28522539138793945, "learning_rate": 3.454569107998942e-05, "loss": 0.1802, "step": 24543 }, { "epoch": 0.43776977134091966, "grad_norm": 0.25206080079078674, "learning_rate": 3.4544252484959684e-05, "loss": 0.1766, "step": 24544 }, { "epoch": 0.43778760746263334, "grad_norm": 0.3596685528755188, "learning_rate": 3.454281385293315e-05, "loss": 0.1179, "step": 24545 }, { "epoch": 0.43780544358434703, "grad_norm": 0.28925469517707825, "learning_rate": 3.454137518391536e-05, "loss": 0.1792, "step": 24546 }, { "epoch": 0.4378232797060607, "grad_norm": 0.42516639828681946, "learning_rate": 3.4539936477911916e-05, "loss": 0.1905, "step": 24547 }, { "epoch": 0.4378411158277744, "grad_norm": 0.32832595705986023, "learning_rate": 3.4538497734928374e-05, "loss": 0.1689, "step": 24548 }, { "epoch": 0.4378589519494881, "grad_norm": 0.24498018622398376, "learning_rate": 3.453705895497034e-05, "loss": 0.1372, "step": 24549 }, { "epoch": 0.4378767880712018, "grad_norm": 0.3208106458187103, "learning_rate": 3.453562013804335e-05, "loss": 0.1533, "step": 24550 }, { "epoch": 0.43789462419291547, "grad_norm": 0.2686372995376587, "learning_rate": 3.4534181284153005e-05, "loss": 0.2071, "step": 24551 }, { "epoch": 0.43791246031462916, "grad_norm": 0.28134483098983765, "learning_rate": 3.4532742393304886e-05, "loss": 0.1201, "step": 24552 }, { "epoch": 0.4379302964363429, "grad_norm": 0.21770042181015015, "learning_rate": 3.453130346550457e-05, "loss": 0.1086, "step": 24553 }, { "epoch": 0.4379481325580566, "grad_norm": 0.19623777270317078, "learning_rate": 3.452986450075762e-05, "loss": 0.1468, "step": 24554 }, { "epoch": 0.4379659686797703, "grad_norm": 0.25837016105651855, "learning_rate": 3.4528425499069625e-05, "loss": 0.127, "step": 24555 }, { "epoch": 0.43798380480148397, "grad_norm": 0.202680766582489, "learning_rate": 3.452698646044617e-05, "loss": 0.0911, "step": 24556 }, { "epoch": 0.43800164092319765, "grad_norm": 0.2903212904930115, "learning_rate": 3.452554738489282e-05, "loss": 0.1546, "step": 24557 }, { "epoch": 0.43801947704491134, "grad_norm": 0.27089986205101013, "learning_rate": 3.452410827241515e-05, "loss": 0.204, "step": 24558 }, { "epoch": 0.43803731316662503, "grad_norm": 0.2867121696472168, "learning_rate": 3.452266912301875e-05, "loss": 0.1661, "step": 24559 }, { "epoch": 0.4380551492883387, "grad_norm": 0.21074457466602325, "learning_rate": 3.45212299367092e-05, "loss": 0.0843, "step": 24560 }, { "epoch": 0.43807298541005246, "grad_norm": 0.26539722084999084, "learning_rate": 3.451979071349208e-05, "loss": 0.1901, "step": 24561 }, { "epoch": 0.43809082153176615, "grad_norm": 0.28294482827186584, "learning_rate": 3.451835145337295e-05, "loss": 0.1707, "step": 24562 }, { "epoch": 0.43810865765347984, "grad_norm": 0.2809041738510132, "learning_rate": 3.451691215635742e-05, "loss": 0.1798, "step": 24563 }, { "epoch": 0.4381264937751935, "grad_norm": 0.3937005400657654, "learning_rate": 3.4515472822451037e-05, "loss": 0.2056, "step": 24564 }, { "epoch": 0.4381443298969072, "grad_norm": 0.18566061556339264, "learning_rate": 3.45140334516594e-05, "loss": 0.1391, "step": 24565 }, { "epoch": 0.4381621660186209, "grad_norm": 0.37465575337409973, "learning_rate": 3.451259404398808e-05, "loss": 0.1641, "step": 24566 }, { "epoch": 0.4381800021403346, "grad_norm": 0.28893840312957764, "learning_rate": 3.4511154599442666e-05, "loss": 0.155, "step": 24567 }, { "epoch": 0.4381978382620483, "grad_norm": 0.2862136662006378, "learning_rate": 3.450971511802872e-05, "loss": 0.1797, "step": 24568 }, { "epoch": 0.438215674383762, "grad_norm": 0.31373411417007446, "learning_rate": 3.450827559975185e-05, "loss": 0.1659, "step": 24569 }, { "epoch": 0.4382335105054757, "grad_norm": 0.2854815125465393, "learning_rate": 3.450683604461761e-05, "loss": 0.1598, "step": 24570 }, { "epoch": 0.4382513466271894, "grad_norm": 0.27923718094825745, "learning_rate": 3.450539645263159e-05, "loss": 0.1418, "step": 24571 }, { "epoch": 0.4382691827489031, "grad_norm": 0.33809757232666016, "learning_rate": 3.450395682379936e-05, "loss": 0.106, "step": 24572 }, { "epoch": 0.4382870188706168, "grad_norm": 0.22958843410015106, "learning_rate": 3.4502517158126527e-05, "loss": 0.1633, "step": 24573 }, { "epoch": 0.43830485499233046, "grad_norm": 0.28078243136405945, "learning_rate": 3.450107745561865e-05, "loss": 0.1625, "step": 24574 }, { "epoch": 0.43832269111404415, "grad_norm": 0.2779524624347687, "learning_rate": 3.449963771628132e-05, "loss": 0.1436, "step": 24575 }, { "epoch": 0.43834052723575784, "grad_norm": 0.23051893711090088, "learning_rate": 3.4498197940120106e-05, "loss": 0.1781, "step": 24576 }, { "epoch": 0.4383583633574715, "grad_norm": 0.23626069724559784, "learning_rate": 3.44967581271406e-05, "loss": 0.1549, "step": 24577 }, { "epoch": 0.43837619947918527, "grad_norm": 0.4216504991054535, "learning_rate": 3.449531827734838e-05, "loss": 0.137, "step": 24578 }, { "epoch": 0.43839403560089896, "grad_norm": 0.3295765817165375, "learning_rate": 3.4493878390749016e-05, "loss": 0.2072, "step": 24579 }, { "epoch": 0.43841187172261264, "grad_norm": 0.24754376709461212, "learning_rate": 3.449243846734812e-05, "loss": 0.1263, "step": 24580 }, { "epoch": 0.43842970784432633, "grad_norm": 0.2161509245634079, "learning_rate": 3.449099850715123e-05, "loss": 0.1852, "step": 24581 }, { "epoch": 0.43844754396604, "grad_norm": 0.27608081698417664, "learning_rate": 3.4489558510163974e-05, "loss": 0.1585, "step": 24582 }, { "epoch": 0.4384653800877537, "grad_norm": 0.39963284134864807, "learning_rate": 3.44881184763919e-05, "loss": 0.1747, "step": 24583 }, { "epoch": 0.4384832162094674, "grad_norm": 0.27768880128860474, "learning_rate": 3.448667840584061e-05, "loss": 0.1402, "step": 24584 }, { "epoch": 0.4385010523311811, "grad_norm": 0.24097082018852234, "learning_rate": 3.4485238298515665e-05, "loss": 0.1328, "step": 24585 }, { "epoch": 0.43851888845289483, "grad_norm": 0.27341970801353455, "learning_rate": 3.448379815442267e-05, "loss": 0.1978, "step": 24586 }, { "epoch": 0.4385367245746085, "grad_norm": 0.26556727290153503, "learning_rate": 3.448235797356719e-05, "loss": 0.1306, "step": 24587 }, { "epoch": 0.4385545606963222, "grad_norm": 0.35834696888923645, "learning_rate": 3.4480917755954825e-05, "loss": 0.227, "step": 24588 }, { "epoch": 0.4385723968180359, "grad_norm": 0.24175631999969482, "learning_rate": 3.447947750159114e-05, "loss": 0.1613, "step": 24589 }, { "epoch": 0.4385902329397496, "grad_norm": 0.20927119255065918, "learning_rate": 3.4478037210481737e-05, "loss": 0.1202, "step": 24590 }, { "epoch": 0.43860806906146327, "grad_norm": 0.3092963695526123, "learning_rate": 3.447659688263218e-05, "loss": 0.1408, "step": 24591 }, { "epoch": 0.43862590518317696, "grad_norm": 0.26060038805007935, "learning_rate": 3.4475156518048066e-05, "loss": 0.1206, "step": 24592 }, { "epoch": 0.43864374130489064, "grad_norm": 0.25653448700904846, "learning_rate": 3.447371611673496e-05, "loss": 0.1902, "step": 24593 }, { "epoch": 0.43866157742660433, "grad_norm": 0.2920917570590973, "learning_rate": 3.447227567869846e-05, "loss": 0.2089, "step": 24594 }, { "epoch": 0.4386794135483181, "grad_norm": 0.3284919261932373, "learning_rate": 3.4470835203944166e-05, "loss": 0.131, "step": 24595 }, { "epoch": 0.43869724967003176, "grad_norm": 0.23107919096946716, "learning_rate": 3.4469394692477626e-05, "loss": 0.1198, "step": 24596 }, { "epoch": 0.43871508579174545, "grad_norm": 0.2584525942802429, "learning_rate": 3.4467954144304446e-05, "loss": 0.1476, "step": 24597 }, { "epoch": 0.43873292191345914, "grad_norm": 0.2407422959804535, "learning_rate": 3.446651355943021e-05, "loss": 0.1221, "step": 24598 }, { "epoch": 0.4387507580351728, "grad_norm": 0.26085159182548523, "learning_rate": 3.446507293786049e-05, "loss": 0.1597, "step": 24599 }, { "epoch": 0.4387685941568865, "grad_norm": 0.3187166750431061, "learning_rate": 3.446363227960088e-05, "loss": 0.1808, "step": 24600 }, { "epoch": 0.4387864302786002, "grad_norm": 0.1783655285835266, "learning_rate": 3.446219158465697e-05, "loss": 0.1238, "step": 24601 }, { "epoch": 0.4388042664003139, "grad_norm": 0.3295961022377014, "learning_rate": 3.446075085303433e-05, "loss": 0.1156, "step": 24602 }, { "epoch": 0.43882210252202764, "grad_norm": 0.3164421319961548, "learning_rate": 3.445931008473856e-05, "loss": 0.1384, "step": 24603 }, { "epoch": 0.4388399386437413, "grad_norm": 0.24440906941890717, "learning_rate": 3.445786927977523e-05, "loss": 0.1914, "step": 24604 }, { "epoch": 0.438857774765455, "grad_norm": 0.24379198253154755, "learning_rate": 3.445642843814994e-05, "loss": 0.186, "step": 24605 }, { "epoch": 0.4388756108871687, "grad_norm": 0.2811765968799591, "learning_rate": 3.445498755986826e-05, "loss": 0.1611, "step": 24606 }, { "epoch": 0.4388934470088824, "grad_norm": 0.35629937052726746, "learning_rate": 3.4453546644935776e-05, "loss": 0.1557, "step": 24607 }, { "epoch": 0.4389112831305961, "grad_norm": 0.31019970774650574, "learning_rate": 3.445210569335809e-05, "loss": 0.1727, "step": 24608 }, { "epoch": 0.43892911925230976, "grad_norm": 0.2642403244972229, "learning_rate": 3.445066470514078e-05, "loss": 0.1563, "step": 24609 }, { "epoch": 0.43894695537402345, "grad_norm": 0.40901580452919006, "learning_rate": 3.444922368028942e-05, "loss": 0.1801, "step": 24610 }, { "epoch": 0.4389647914957372, "grad_norm": 0.2804419696331024, "learning_rate": 3.444778261880961e-05, "loss": 0.2067, "step": 24611 }, { "epoch": 0.4389826276174509, "grad_norm": 0.32510489225387573, "learning_rate": 3.444634152070694e-05, "loss": 0.2355, "step": 24612 }, { "epoch": 0.43900046373916457, "grad_norm": 0.33013513684272766, "learning_rate": 3.444490038598697e-05, "loss": 0.1408, "step": 24613 }, { "epoch": 0.43901829986087826, "grad_norm": 0.34253761172294617, "learning_rate": 3.444345921465532e-05, "loss": 0.1769, "step": 24614 }, { "epoch": 0.43903613598259195, "grad_norm": 0.256433367729187, "learning_rate": 3.444201800671755e-05, "loss": 0.1645, "step": 24615 }, { "epoch": 0.43905397210430563, "grad_norm": 0.25067293643951416, "learning_rate": 3.444057676217926e-05, "loss": 0.1168, "step": 24616 }, { "epoch": 0.4390718082260193, "grad_norm": 0.31757479906082153, "learning_rate": 3.443913548104603e-05, "loss": 0.1463, "step": 24617 }, { "epoch": 0.439089644347733, "grad_norm": 0.29563412070274353, "learning_rate": 3.4437694163323464e-05, "loss": 0.1332, "step": 24618 }, { "epoch": 0.4391074804694467, "grad_norm": 0.44224053621292114, "learning_rate": 3.443625280901713e-05, "loss": 0.1571, "step": 24619 }, { "epoch": 0.43912531659116044, "grad_norm": 0.2787693738937378, "learning_rate": 3.443481141813261e-05, "loss": 0.162, "step": 24620 }, { "epoch": 0.43914315271287413, "grad_norm": 0.2649666965007782, "learning_rate": 3.4433369990675515e-05, "loss": 0.1063, "step": 24621 }, { "epoch": 0.4391609888345878, "grad_norm": 0.26307302713394165, "learning_rate": 3.443192852665141e-05, "loss": 0.14, "step": 24622 }, { "epoch": 0.4391788249563015, "grad_norm": 0.22876949608325958, "learning_rate": 3.44304870260659e-05, "loss": 0.1396, "step": 24623 }, { "epoch": 0.4391966610780152, "grad_norm": 0.3244122564792633, "learning_rate": 3.442904548892456e-05, "loss": 0.2014, "step": 24624 }, { "epoch": 0.4392144971997289, "grad_norm": 0.37037718296051025, "learning_rate": 3.442760391523299e-05, "loss": 0.1614, "step": 24625 }, { "epoch": 0.43923233332144257, "grad_norm": 0.2491759955883026, "learning_rate": 3.442616230499676e-05, "loss": 0.1962, "step": 24626 }, { "epoch": 0.43925016944315626, "grad_norm": 0.23094677925109863, "learning_rate": 3.4424720658221474e-05, "loss": 0.1333, "step": 24627 }, { "epoch": 0.43926800556487, "grad_norm": 0.31826314330101013, "learning_rate": 3.4423278974912716e-05, "loss": 0.175, "step": 24628 }, { "epoch": 0.4392858416865837, "grad_norm": 0.24727092683315277, "learning_rate": 3.4421837255076075e-05, "loss": 0.1795, "step": 24629 }, { "epoch": 0.4393036778082974, "grad_norm": 0.24087651073932648, "learning_rate": 3.442039549871714e-05, "loss": 0.1253, "step": 24630 }, { "epoch": 0.43932151393001106, "grad_norm": 0.3798896074295044, "learning_rate": 3.44189537058415e-05, "loss": 0.1788, "step": 24631 }, { "epoch": 0.43933935005172475, "grad_norm": 0.26776382327079773, "learning_rate": 3.441751187645474e-05, "loss": 0.1496, "step": 24632 }, { "epoch": 0.43935718617343844, "grad_norm": 0.30220523476600647, "learning_rate": 3.441607001056245e-05, "loss": 0.139, "step": 24633 }, { "epoch": 0.43937502229515213, "grad_norm": 0.2621837258338928, "learning_rate": 3.4414628108170225e-05, "loss": 0.1447, "step": 24634 }, { "epoch": 0.4393928584168658, "grad_norm": 0.30371183156967163, "learning_rate": 3.441318616928364e-05, "loss": 0.073, "step": 24635 }, { "epoch": 0.4394106945385795, "grad_norm": 0.25109121203422546, "learning_rate": 3.44117441939083e-05, "loss": 0.1442, "step": 24636 }, { "epoch": 0.43942853066029325, "grad_norm": 0.2872004508972168, "learning_rate": 3.4410302182049786e-05, "loss": 0.1212, "step": 24637 }, { "epoch": 0.43944636678200694, "grad_norm": 0.282455712556839, "learning_rate": 3.44088601337137e-05, "loss": 0.1764, "step": 24638 }, { "epoch": 0.4394642029037206, "grad_norm": 0.23103205859661102, "learning_rate": 3.440741804890562e-05, "loss": 0.1326, "step": 24639 }, { "epoch": 0.4394820390254343, "grad_norm": 0.27019548416137695, "learning_rate": 3.440597592763113e-05, "loss": 0.1543, "step": 24640 }, { "epoch": 0.439499875147148, "grad_norm": 0.23728132247924805, "learning_rate": 3.440453376989583e-05, "loss": 0.1504, "step": 24641 }, { "epoch": 0.4395177112688617, "grad_norm": 0.3127742409706116, "learning_rate": 3.440309157570531e-05, "loss": 0.2093, "step": 24642 }, { "epoch": 0.4395355473905754, "grad_norm": 0.22227708995342255, "learning_rate": 3.4401649345065156e-05, "loss": 0.1367, "step": 24643 }, { "epoch": 0.43955338351228906, "grad_norm": 0.3436882495880127, "learning_rate": 3.440020707798097e-05, "loss": 0.155, "step": 24644 }, { "epoch": 0.4395712196340028, "grad_norm": 0.29666784405708313, "learning_rate": 3.439876477445834e-05, "loss": 0.1612, "step": 24645 }, { "epoch": 0.4395890557557165, "grad_norm": 0.24896258115768433, "learning_rate": 3.439732243450284e-05, "loss": 0.1943, "step": 24646 }, { "epoch": 0.4396068918774302, "grad_norm": 0.256962388753891, "learning_rate": 3.439588005812008e-05, "loss": 0.1773, "step": 24647 }, { "epoch": 0.43962472799914387, "grad_norm": 0.260832816362381, "learning_rate": 3.4394437645315634e-05, "loss": 0.1549, "step": 24648 }, { "epoch": 0.43964256412085756, "grad_norm": 0.22145824134349823, "learning_rate": 3.43929951960951e-05, "loss": 0.134, "step": 24649 }, { "epoch": 0.43966040024257125, "grad_norm": 0.30688732862472534, "learning_rate": 3.4391552710464084e-05, "loss": 0.1359, "step": 24650 }, { "epoch": 0.43967823636428494, "grad_norm": 0.1992262452840805, "learning_rate": 3.4390110188428166e-05, "loss": 0.1442, "step": 24651 }, { "epoch": 0.4396960724859986, "grad_norm": 0.2667248845100403, "learning_rate": 3.438866762999293e-05, "loss": 0.1498, "step": 24652 }, { "epoch": 0.4397139086077123, "grad_norm": 0.2612917423248291, "learning_rate": 3.438722503516399e-05, "loss": 0.1383, "step": 24653 }, { "epoch": 0.43973174472942606, "grad_norm": 0.3943200707435608, "learning_rate": 3.4385782403946905e-05, "loss": 0.2004, "step": 24654 }, { "epoch": 0.43974958085113974, "grad_norm": 0.3169809579849243, "learning_rate": 3.438433973634729e-05, "loss": 0.1094, "step": 24655 }, { "epoch": 0.43976741697285343, "grad_norm": 0.30927079916000366, "learning_rate": 3.438289703237074e-05, "loss": 0.1607, "step": 24656 }, { "epoch": 0.4397852530945671, "grad_norm": 0.3377167880535126, "learning_rate": 3.438145429202284e-05, "loss": 0.1987, "step": 24657 }, { "epoch": 0.4398030892162808, "grad_norm": 0.31129828095436096, "learning_rate": 3.438001151530918e-05, "loss": 0.2062, "step": 24658 }, { "epoch": 0.4398209253379945, "grad_norm": 0.26784205436706543, "learning_rate": 3.437856870223535e-05, "loss": 0.167, "step": 24659 }, { "epoch": 0.4398387614597082, "grad_norm": 0.3814446032047272, "learning_rate": 3.4377125852806956e-05, "loss": 0.1448, "step": 24660 }, { "epoch": 0.43985659758142187, "grad_norm": 0.2507796287536621, "learning_rate": 3.4375682967029585e-05, "loss": 0.1706, "step": 24661 }, { "epoch": 0.4398744337031356, "grad_norm": 0.24941763281822205, "learning_rate": 3.437424004490882e-05, "loss": 0.1016, "step": 24662 }, { "epoch": 0.4398922698248493, "grad_norm": 0.37323588132858276, "learning_rate": 3.4372797086450265e-05, "loss": 0.1246, "step": 24663 }, { "epoch": 0.439910105946563, "grad_norm": 0.19328205287456512, "learning_rate": 3.437135409165952e-05, "loss": 0.1425, "step": 24664 }, { "epoch": 0.4399279420682767, "grad_norm": 0.40340539813041687, "learning_rate": 3.436991106054216e-05, "loss": 0.2477, "step": 24665 }, { "epoch": 0.43994577818999037, "grad_norm": 0.2697802484035492, "learning_rate": 3.436846799310379e-05, "loss": 0.1022, "step": 24666 }, { "epoch": 0.43996361431170405, "grad_norm": 0.21388310194015503, "learning_rate": 3.4367024889350006e-05, "loss": 0.1683, "step": 24667 }, { "epoch": 0.43998145043341774, "grad_norm": 0.4298039972782135, "learning_rate": 3.43655817492864e-05, "loss": 0.148, "step": 24668 }, { "epoch": 0.43999928655513143, "grad_norm": 0.1701425164937973, "learning_rate": 3.436413857291856e-05, "loss": 0.0935, "step": 24669 }, { "epoch": 0.4400171226768452, "grad_norm": 0.2667483389377594, "learning_rate": 3.4362695360252086e-05, "loss": 0.1344, "step": 24670 }, { "epoch": 0.44003495879855886, "grad_norm": 0.2699483036994934, "learning_rate": 3.4361252111292575e-05, "loss": 0.1451, "step": 24671 }, { "epoch": 0.44005279492027255, "grad_norm": 0.21968398988246918, "learning_rate": 3.435980882604561e-05, "loss": 0.134, "step": 24672 }, { "epoch": 0.44007063104198624, "grad_norm": 0.3413139879703522, "learning_rate": 3.43583655045168e-05, "loss": 0.2286, "step": 24673 }, { "epoch": 0.4400884671636999, "grad_norm": 0.27021604776382446, "learning_rate": 3.435692214671172e-05, "loss": 0.1253, "step": 24674 }, { "epoch": 0.4401063032854136, "grad_norm": 0.2780676782131195, "learning_rate": 3.4355478752636e-05, "loss": 0.1533, "step": 24675 }, { "epoch": 0.4401241394071273, "grad_norm": 0.2237774133682251, "learning_rate": 3.435403532229519e-05, "loss": 0.1375, "step": 24676 }, { "epoch": 0.440141975528841, "grad_norm": 0.22570393979549408, "learning_rate": 3.435259185569492e-05, "loss": 0.116, "step": 24677 }, { "epoch": 0.4401598116505547, "grad_norm": 0.46217167377471924, "learning_rate": 3.435114835284077e-05, "loss": 0.1292, "step": 24678 }, { "epoch": 0.4401776477722684, "grad_norm": 0.2668130397796631, "learning_rate": 3.434970481373835e-05, "loss": 0.1282, "step": 24679 }, { "epoch": 0.4401954838939821, "grad_norm": 0.36065685749053955, "learning_rate": 3.434826123839323e-05, "loss": 0.1298, "step": 24680 }, { "epoch": 0.4402133200156958, "grad_norm": 0.24330353736877441, "learning_rate": 3.4346817626811036e-05, "loss": 0.1644, "step": 24681 }, { "epoch": 0.4402311561374095, "grad_norm": 0.38841748237609863, "learning_rate": 3.434537397899734e-05, "loss": 0.1444, "step": 24682 }, { "epoch": 0.4402489922591232, "grad_norm": 0.3205896019935608, "learning_rate": 3.434393029495774e-05, "loss": 0.1526, "step": 24683 }, { "epoch": 0.44026682838083686, "grad_norm": 0.3153390884399414, "learning_rate": 3.434248657469784e-05, "loss": 0.1261, "step": 24684 }, { "epoch": 0.44028466450255055, "grad_norm": 0.3627713918685913, "learning_rate": 3.4341042818223246e-05, "loss": 0.1599, "step": 24685 }, { "epoch": 0.44030250062426424, "grad_norm": 0.26903846859931946, "learning_rate": 3.4339599025539544e-05, "loss": 0.1924, "step": 24686 }, { "epoch": 0.440320336745978, "grad_norm": 0.40315642952919006, "learning_rate": 3.433815519665232e-05, "loss": 0.1628, "step": 24687 }, { "epoch": 0.44033817286769167, "grad_norm": 0.3624573349952698, "learning_rate": 3.433671133156719e-05, "loss": 0.1396, "step": 24688 }, { "epoch": 0.44035600898940536, "grad_norm": 0.3920454680919647, "learning_rate": 3.4335267430289735e-05, "loss": 0.1536, "step": 24689 }, { "epoch": 0.44037384511111904, "grad_norm": 0.19093580543994904, "learning_rate": 3.4333823492825564e-05, "loss": 0.1312, "step": 24690 }, { "epoch": 0.44039168123283273, "grad_norm": 0.28893616795539856, "learning_rate": 3.4332379519180266e-05, "loss": 0.155, "step": 24691 }, { "epoch": 0.4404095173545464, "grad_norm": 0.25348326563835144, "learning_rate": 3.4330935509359444e-05, "loss": 0.1524, "step": 24692 }, { "epoch": 0.4404273534762601, "grad_norm": 0.29898956418037415, "learning_rate": 3.432949146336869e-05, "loss": 0.1733, "step": 24693 }, { "epoch": 0.4404451895979738, "grad_norm": 0.27690890431404114, "learning_rate": 3.432804738121361e-05, "loss": 0.1445, "step": 24694 }, { "epoch": 0.4404630257196875, "grad_norm": 0.36643916368484497, "learning_rate": 3.4326603262899795e-05, "loss": 0.196, "step": 24695 }, { "epoch": 0.44048086184140123, "grad_norm": 0.29113900661468506, "learning_rate": 3.4325159108432844e-05, "loss": 0.1509, "step": 24696 }, { "epoch": 0.4404986979631149, "grad_norm": 0.3071874976158142, "learning_rate": 3.4323714917818355e-05, "loss": 0.1373, "step": 24697 }, { "epoch": 0.4405165340848286, "grad_norm": 0.2989560067653656, "learning_rate": 3.4322270691061926e-05, "loss": 0.1384, "step": 24698 }, { "epoch": 0.4405343702065423, "grad_norm": 0.2771482467651367, "learning_rate": 3.4320826428169154e-05, "loss": 0.1657, "step": 24699 }, { "epoch": 0.440552206328256, "grad_norm": 0.30179348587989807, "learning_rate": 3.431938212914564e-05, "loss": 0.0988, "step": 24700 }, { "epoch": 0.44057004244996967, "grad_norm": 0.29966628551483154, "learning_rate": 3.431793779399699e-05, "loss": 0.1558, "step": 24701 }, { "epoch": 0.44058787857168336, "grad_norm": 0.31375232338905334, "learning_rate": 3.4316493422728784e-05, "loss": 0.1745, "step": 24702 }, { "epoch": 0.44060571469339704, "grad_norm": 0.26222479343414307, "learning_rate": 3.431504901534663e-05, "loss": 0.1555, "step": 24703 }, { "epoch": 0.4406235508151108, "grad_norm": 0.27539992332458496, "learning_rate": 3.431360457185614e-05, "loss": 0.1679, "step": 24704 }, { "epoch": 0.4406413869368245, "grad_norm": 0.48759081959724426, "learning_rate": 3.43121600922629e-05, "loss": 0.1947, "step": 24705 }, { "epoch": 0.44065922305853816, "grad_norm": 0.24323388934135437, "learning_rate": 3.4310715576572506e-05, "loss": 0.1694, "step": 24706 }, { "epoch": 0.44067705918025185, "grad_norm": 0.26869115233421326, "learning_rate": 3.430927102479057e-05, "loss": 0.1687, "step": 24707 }, { "epoch": 0.44069489530196554, "grad_norm": 0.2161533385515213, "learning_rate": 3.4307826436922676e-05, "loss": 0.1644, "step": 24708 }, { "epoch": 0.4407127314236792, "grad_norm": 0.21348880231380463, "learning_rate": 3.430638181297444e-05, "loss": 0.1019, "step": 24709 }, { "epoch": 0.4407305675453929, "grad_norm": 0.30145740509033203, "learning_rate": 3.430493715295144e-05, "loss": 0.2164, "step": 24710 }, { "epoch": 0.4407484036671066, "grad_norm": 0.1956244558095932, "learning_rate": 3.43034924568593e-05, "loss": 0.1401, "step": 24711 }, { "epoch": 0.44076623978882035, "grad_norm": 0.23823858797550201, "learning_rate": 3.4302047724703615e-05, "loss": 0.1284, "step": 24712 }, { "epoch": 0.44078407591053403, "grad_norm": 0.31276917457580566, "learning_rate": 3.430060295648997e-05, "loss": 0.1235, "step": 24713 }, { "epoch": 0.4408019120322477, "grad_norm": 0.21205952763557434, "learning_rate": 3.429915815222398e-05, "loss": 0.1315, "step": 24714 }, { "epoch": 0.4408197481539614, "grad_norm": 0.3925333023071289, "learning_rate": 3.429771331191124e-05, "loss": 0.1959, "step": 24715 }, { "epoch": 0.4408375842756751, "grad_norm": 0.2393421232700348, "learning_rate": 3.429626843555736e-05, "loss": 0.1886, "step": 24716 }, { "epoch": 0.4408554203973888, "grad_norm": 0.20441681146621704, "learning_rate": 3.429482352316792e-05, "loss": 0.1081, "step": 24717 }, { "epoch": 0.4408732565191025, "grad_norm": 0.2991001009941101, "learning_rate": 3.4293378574748534e-05, "loss": 0.151, "step": 24718 }, { "epoch": 0.44089109264081616, "grad_norm": 0.23300915956497192, "learning_rate": 3.42919335903048e-05, "loss": 0.1753, "step": 24719 }, { "epoch": 0.44090892876252985, "grad_norm": 0.25761520862579346, "learning_rate": 3.429048856984234e-05, "loss": 0.1374, "step": 24720 }, { "epoch": 0.4409267648842436, "grad_norm": 0.30491897463798523, "learning_rate": 3.428904351336673e-05, "loss": 0.1902, "step": 24721 }, { "epoch": 0.4409446010059573, "grad_norm": 0.26109182834625244, "learning_rate": 3.428759842088357e-05, "loss": 0.1635, "step": 24722 }, { "epoch": 0.44096243712767097, "grad_norm": 0.2662157118320465, "learning_rate": 3.428615329239848e-05, "loss": 0.1718, "step": 24723 }, { "epoch": 0.44098027324938466, "grad_norm": 0.33706337213516235, "learning_rate": 3.428470812791705e-05, "loss": 0.1498, "step": 24724 }, { "epoch": 0.44099810937109835, "grad_norm": 0.2901989221572876, "learning_rate": 3.428326292744488e-05, "loss": 0.1666, "step": 24725 }, { "epoch": 0.44101594549281203, "grad_norm": 0.3397957384586334, "learning_rate": 3.428181769098758e-05, "loss": 0.1322, "step": 24726 }, { "epoch": 0.4410337816145257, "grad_norm": 0.3570844233036041, "learning_rate": 3.428037241855075e-05, "loss": 0.2176, "step": 24727 }, { "epoch": 0.4410516177362394, "grad_norm": 0.34804993867874146, "learning_rate": 3.4278927110139994e-05, "loss": 0.1086, "step": 24728 }, { "epoch": 0.44106945385795315, "grad_norm": 0.2614504396915436, "learning_rate": 3.427748176576091e-05, "loss": 0.1642, "step": 24729 }, { "epoch": 0.44108728997966684, "grad_norm": 0.27107861638069153, "learning_rate": 3.4276036385419094e-05, "loss": 0.1239, "step": 24730 }, { "epoch": 0.44110512610138053, "grad_norm": 0.2110345959663391, "learning_rate": 3.4274590969120154e-05, "loss": 0.1847, "step": 24731 }, { "epoch": 0.4411229622230942, "grad_norm": 0.24015846848487854, "learning_rate": 3.427314551686971e-05, "loss": 0.167, "step": 24732 }, { "epoch": 0.4411407983448079, "grad_norm": 0.23190492391586304, "learning_rate": 3.4271700028673345e-05, "loss": 0.1443, "step": 24733 }, { "epoch": 0.4411586344665216, "grad_norm": 0.20426729321479797, "learning_rate": 3.427025450453667e-05, "loss": 0.1313, "step": 24734 }, { "epoch": 0.4411764705882353, "grad_norm": 0.35279545187950134, "learning_rate": 3.426880894446529e-05, "loss": 0.1829, "step": 24735 }, { "epoch": 0.44119430670994897, "grad_norm": 0.28399360179901123, "learning_rate": 3.4267363348464796e-05, "loss": 0.1528, "step": 24736 }, { "epoch": 0.44121214283166266, "grad_norm": 0.3399839401245117, "learning_rate": 3.426591771654082e-05, "loss": 0.1647, "step": 24737 }, { "epoch": 0.4412299789533764, "grad_norm": 0.25238320231437683, "learning_rate": 3.4264472048698926e-05, "loss": 0.1121, "step": 24738 }, { "epoch": 0.4412478150750901, "grad_norm": 0.1852482706308365, "learning_rate": 3.426302634494474e-05, "loss": 0.1286, "step": 24739 }, { "epoch": 0.4412656511968038, "grad_norm": 0.316701740026474, "learning_rate": 3.426158060528388e-05, "loss": 0.1686, "step": 24740 }, { "epoch": 0.44128348731851746, "grad_norm": 0.30942216515541077, "learning_rate": 3.426013482972192e-05, "loss": 0.1388, "step": 24741 }, { "epoch": 0.44130132344023115, "grad_norm": 0.27361252903938293, "learning_rate": 3.425868901826449e-05, "loss": 0.1967, "step": 24742 }, { "epoch": 0.44131915956194484, "grad_norm": 0.322246253490448, "learning_rate": 3.425724317091717e-05, "loss": 0.1363, "step": 24743 }, { "epoch": 0.44133699568365853, "grad_norm": 0.2132885754108429, "learning_rate": 3.425579728768559e-05, "loss": 0.1571, "step": 24744 }, { "epoch": 0.4413548318053722, "grad_norm": 0.22016534209251404, "learning_rate": 3.4254351368575336e-05, "loss": 0.1519, "step": 24745 }, { "epoch": 0.44137266792708596, "grad_norm": 0.19636160135269165, "learning_rate": 3.4252905413592025e-05, "loss": 0.0994, "step": 24746 }, { "epoch": 0.44139050404879965, "grad_norm": 0.2573027014732361, "learning_rate": 3.425145942274125e-05, "loss": 0.1706, "step": 24747 }, { "epoch": 0.44140834017051334, "grad_norm": 0.29500851035118103, "learning_rate": 3.425001339602863e-05, "loss": 0.1694, "step": 24748 }, { "epoch": 0.441426176292227, "grad_norm": 0.25622713565826416, "learning_rate": 3.424856733345976e-05, "loss": 0.1272, "step": 24749 }, { "epoch": 0.4414440124139407, "grad_norm": 0.332975298166275, "learning_rate": 3.424712123504025e-05, "loss": 0.2292, "step": 24750 }, { "epoch": 0.4414618485356544, "grad_norm": 0.27496057748794556, "learning_rate": 3.4245675100775706e-05, "loss": 0.176, "step": 24751 }, { "epoch": 0.4414796846573681, "grad_norm": 0.16666676104068756, "learning_rate": 3.424422893067173e-05, "loss": 0.1322, "step": 24752 }, { "epoch": 0.4414975207790818, "grad_norm": 0.40514636039733887, "learning_rate": 3.424278272473393e-05, "loss": 0.1453, "step": 24753 }, { "epoch": 0.44151535690079546, "grad_norm": 0.2893943786621094, "learning_rate": 3.42413364829679e-05, "loss": 0.1637, "step": 24754 }, { "epoch": 0.4415331930225092, "grad_norm": 0.23168393969535828, "learning_rate": 3.423989020537927e-05, "loss": 0.1309, "step": 24755 }, { "epoch": 0.4415510291442229, "grad_norm": 0.242039754986763, "learning_rate": 3.4238443891973634e-05, "loss": 0.1419, "step": 24756 }, { "epoch": 0.4415688652659366, "grad_norm": 0.21341006457805634, "learning_rate": 3.42369975427566e-05, "loss": 0.1209, "step": 24757 }, { "epoch": 0.44158670138765027, "grad_norm": 0.29745322465896606, "learning_rate": 3.423555115773377e-05, "loss": 0.1738, "step": 24758 }, { "epoch": 0.44160453750936396, "grad_norm": 0.33084338903427124, "learning_rate": 3.423410473691075e-05, "loss": 0.1601, "step": 24759 }, { "epoch": 0.44162237363107765, "grad_norm": 0.24049882590770721, "learning_rate": 3.423265828029315e-05, "loss": 0.1668, "step": 24760 }, { "epoch": 0.44164020975279134, "grad_norm": 0.22782929241657257, "learning_rate": 3.423121178788659e-05, "loss": 0.0955, "step": 24761 }, { "epoch": 0.441658045874505, "grad_norm": 0.2691223621368408, "learning_rate": 3.4229765259696656e-05, "loss": 0.1319, "step": 24762 }, { "epoch": 0.44167588199621877, "grad_norm": 0.2977958917617798, "learning_rate": 3.4228318695728964e-05, "loss": 0.1894, "step": 24763 }, { "epoch": 0.44169371811793245, "grad_norm": 0.20542508363723755, "learning_rate": 3.422687209598913e-05, "loss": 0.1391, "step": 24764 }, { "epoch": 0.44171155423964614, "grad_norm": 0.23032750189304352, "learning_rate": 3.422542546048274e-05, "loss": 0.1357, "step": 24765 }, { "epoch": 0.44172939036135983, "grad_norm": 0.24167431890964508, "learning_rate": 3.422397878921542e-05, "loss": 0.1541, "step": 24766 }, { "epoch": 0.4417472264830735, "grad_norm": 0.2834479808807373, "learning_rate": 3.422253208219277e-05, "loss": 0.1466, "step": 24767 }, { "epoch": 0.4417650626047872, "grad_norm": 0.1846838891506195, "learning_rate": 3.42210853394204e-05, "loss": 0.1067, "step": 24768 }, { "epoch": 0.4417828987265009, "grad_norm": 0.3064740002155304, "learning_rate": 3.421963856090393e-05, "loss": 0.2035, "step": 24769 }, { "epoch": 0.4418007348482146, "grad_norm": 0.2363947182893753, "learning_rate": 3.421819174664895e-05, "loss": 0.1725, "step": 24770 }, { "epoch": 0.4418185709699283, "grad_norm": 0.2552568018436432, "learning_rate": 3.421674489666107e-05, "loss": 0.0926, "step": 24771 }, { "epoch": 0.441836407091642, "grad_norm": 0.32727375626564026, "learning_rate": 3.421529801094591e-05, "loss": 0.1598, "step": 24772 }, { "epoch": 0.4418542432133557, "grad_norm": 0.321321040391922, "learning_rate": 3.421385108950906e-05, "loss": 0.1764, "step": 24773 }, { "epoch": 0.4418720793350694, "grad_norm": 0.31286823749542236, "learning_rate": 3.421240413235615e-05, "loss": 0.1532, "step": 24774 }, { "epoch": 0.4418899154567831, "grad_norm": 0.25946810841560364, "learning_rate": 3.421095713949278e-05, "loss": 0.2026, "step": 24775 }, { "epoch": 0.44190775157849677, "grad_norm": 0.2240571528673172, "learning_rate": 3.420951011092456e-05, "loss": 0.0928, "step": 24776 }, { "epoch": 0.44192558770021045, "grad_norm": 0.2431851476430893, "learning_rate": 3.4208063046657096e-05, "loss": 0.157, "step": 24777 }, { "epoch": 0.44194342382192414, "grad_norm": 0.259525328874588, "learning_rate": 3.4206615946695996e-05, "loss": 0.0977, "step": 24778 }, { "epoch": 0.44196125994363783, "grad_norm": 0.22379638254642487, "learning_rate": 3.420516881104688e-05, "loss": 0.1468, "step": 24779 }, { "epoch": 0.4419790960653516, "grad_norm": 0.2746237814426422, "learning_rate": 3.4203721639715335e-05, "loss": 0.178, "step": 24780 }, { "epoch": 0.44199693218706526, "grad_norm": 0.2116021066904068, "learning_rate": 3.4202274432707e-05, "loss": 0.1173, "step": 24781 }, { "epoch": 0.44201476830877895, "grad_norm": 0.23600275814533234, "learning_rate": 3.420082719002747e-05, "loss": 0.1045, "step": 24782 }, { "epoch": 0.44203260443049264, "grad_norm": 0.23731102049350739, "learning_rate": 3.4199379911682357e-05, "loss": 0.172, "step": 24783 }, { "epoch": 0.4420504405522063, "grad_norm": 0.3204597532749176, "learning_rate": 3.419793259767726e-05, "loss": 0.1397, "step": 24784 }, { "epoch": 0.44206827667392, "grad_norm": 0.25386473536491394, "learning_rate": 3.419648524801781e-05, "loss": 0.189, "step": 24785 }, { "epoch": 0.4420861127956337, "grad_norm": 0.2224293202161789, "learning_rate": 3.4195037862709604e-05, "loss": 0.169, "step": 24786 }, { "epoch": 0.4421039489173474, "grad_norm": 0.211457297205925, "learning_rate": 3.419359044175825e-05, "loss": 0.1439, "step": 24787 }, { "epoch": 0.44212178503906113, "grad_norm": 0.44649019837379456, "learning_rate": 3.419214298516937e-05, "loss": 0.1529, "step": 24788 }, { "epoch": 0.4421396211607748, "grad_norm": 0.1499038189649582, "learning_rate": 3.419069549294857e-05, "loss": 0.0752, "step": 24789 }, { "epoch": 0.4421574572824885, "grad_norm": 0.29040560126304626, "learning_rate": 3.418924796510146e-05, "loss": 0.1562, "step": 24790 }, { "epoch": 0.4421752934042022, "grad_norm": 0.2621300518512726, "learning_rate": 3.418780040163365e-05, "loss": 0.1407, "step": 24791 }, { "epoch": 0.4421931295259159, "grad_norm": 0.44027602672576904, "learning_rate": 3.418635280255075e-05, "loss": 0.1766, "step": 24792 }, { "epoch": 0.4422109656476296, "grad_norm": 0.24036888778209686, "learning_rate": 3.4184905167858375e-05, "loss": 0.1373, "step": 24793 }, { "epoch": 0.44222880176934326, "grad_norm": 0.2343611866235733, "learning_rate": 3.4183457497562133e-05, "loss": 0.1703, "step": 24794 }, { "epoch": 0.44224663789105695, "grad_norm": 0.21968717873096466, "learning_rate": 3.418200979166764e-05, "loss": 0.1559, "step": 24795 }, { "epoch": 0.44226447401277064, "grad_norm": 0.2974361777305603, "learning_rate": 3.41805620501805e-05, "loss": 0.1681, "step": 24796 }, { "epoch": 0.4422823101344844, "grad_norm": 0.3284761607646942, "learning_rate": 3.417911427310634e-05, "loss": 0.1317, "step": 24797 }, { "epoch": 0.44230014625619807, "grad_norm": 0.31266823410987854, "learning_rate": 3.417766646045076e-05, "loss": 0.1641, "step": 24798 }, { "epoch": 0.44231798237791176, "grad_norm": 0.178290456533432, "learning_rate": 3.417621861221937e-05, "loss": 0.1133, "step": 24799 }, { "epoch": 0.44233581849962544, "grad_norm": 0.3215397298336029, "learning_rate": 3.4174770728417795e-05, "loss": 0.1903, "step": 24800 }, { "epoch": 0.44235365462133913, "grad_norm": 0.2602700889110565, "learning_rate": 3.417332280905163e-05, "loss": 0.0659, "step": 24801 }, { "epoch": 0.4423714907430528, "grad_norm": 0.2936047315597534, "learning_rate": 3.417187485412651e-05, "loss": 0.1783, "step": 24802 }, { "epoch": 0.4423893268647665, "grad_norm": 0.38923677802085876, "learning_rate": 3.4170426863648025e-05, "loss": 0.1866, "step": 24803 }, { "epoch": 0.4424071629864802, "grad_norm": 0.2908266484737396, "learning_rate": 3.41689788376218e-05, "loss": 0.1817, "step": 24804 }, { "epoch": 0.44242499910819394, "grad_norm": 0.206027552485466, "learning_rate": 3.4167530776053446e-05, "loss": 0.1026, "step": 24805 }, { "epoch": 0.4424428352299076, "grad_norm": 0.29428526759147644, "learning_rate": 3.416608267894858e-05, "loss": 0.1384, "step": 24806 }, { "epoch": 0.4424606713516213, "grad_norm": 0.33642786741256714, "learning_rate": 3.416463454631281e-05, "loss": 0.1909, "step": 24807 }, { "epoch": 0.442478507473335, "grad_norm": 0.265349417924881, "learning_rate": 3.416318637815175e-05, "loss": 0.1581, "step": 24808 }, { "epoch": 0.4424963435950487, "grad_norm": 0.21105970442295074, "learning_rate": 3.416173817447101e-05, "loss": 0.0958, "step": 24809 }, { "epoch": 0.4425141797167624, "grad_norm": 0.33798760175704956, "learning_rate": 3.416028993527621e-05, "loss": 0.1012, "step": 24810 }, { "epoch": 0.44253201583847607, "grad_norm": 0.32038789987564087, "learning_rate": 3.415884166057297e-05, "loss": 0.165, "step": 24811 }, { "epoch": 0.44254985196018976, "grad_norm": 0.27280333638191223, "learning_rate": 3.41573933503669e-05, "loss": 0.2035, "step": 24812 }, { "epoch": 0.4425676880819035, "grad_norm": 0.30833297967910767, "learning_rate": 3.41559450046636e-05, "loss": 0.1132, "step": 24813 }, { "epoch": 0.4425855242036172, "grad_norm": 0.2255076915025711, "learning_rate": 3.415449662346869e-05, "loss": 0.1366, "step": 24814 }, { "epoch": 0.4426033603253309, "grad_norm": 0.22436361014842987, "learning_rate": 3.41530482067878e-05, "loss": 0.1397, "step": 24815 }, { "epoch": 0.44262119644704456, "grad_norm": 0.31262320280075073, "learning_rate": 3.4151599754626536e-05, "loss": 0.1618, "step": 24816 }, { "epoch": 0.44263903256875825, "grad_norm": 0.2063130885362625, "learning_rate": 3.415015126699051e-05, "loss": 0.1197, "step": 24817 }, { "epoch": 0.44265686869047194, "grad_norm": 0.2980993390083313, "learning_rate": 3.414870274388533e-05, "loss": 0.1187, "step": 24818 }, { "epoch": 0.4426747048121856, "grad_norm": 0.251115083694458, "learning_rate": 3.414725418531662e-05, "loss": 0.1444, "step": 24819 }, { "epoch": 0.4426925409338993, "grad_norm": 0.3356434404850006, "learning_rate": 3.414580559128999e-05, "loss": 0.1756, "step": 24820 }, { "epoch": 0.442710377055613, "grad_norm": 0.2360800802707672, "learning_rate": 3.4144356961811066e-05, "loss": 0.1582, "step": 24821 }, { "epoch": 0.44272821317732675, "grad_norm": 0.2754876911640167, "learning_rate": 3.414290829688545e-05, "loss": 0.1275, "step": 24822 }, { "epoch": 0.44274604929904043, "grad_norm": 0.2772158086299896, "learning_rate": 3.4141459596518765e-05, "loss": 0.1151, "step": 24823 }, { "epoch": 0.4427638854207541, "grad_norm": 0.21152371168136597, "learning_rate": 3.414001086071663e-05, "loss": 0.154, "step": 24824 }, { "epoch": 0.4427817215424678, "grad_norm": 0.22168295085430145, "learning_rate": 3.413856208948465e-05, "loss": 0.1548, "step": 24825 }, { "epoch": 0.4427995576641815, "grad_norm": 0.3185756802558899, "learning_rate": 3.413711328282845e-05, "loss": 0.1564, "step": 24826 }, { "epoch": 0.4428173937858952, "grad_norm": 0.2771763801574707, "learning_rate": 3.413566444075364e-05, "loss": 0.1021, "step": 24827 }, { "epoch": 0.4428352299076089, "grad_norm": 0.3360157608985901, "learning_rate": 3.413421556326585e-05, "loss": 0.1503, "step": 24828 }, { "epoch": 0.44285306602932256, "grad_norm": 0.22309336066246033, "learning_rate": 3.4132766650370674e-05, "loss": 0.1059, "step": 24829 }, { "epoch": 0.4428709021510363, "grad_norm": 0.3279212415218353, "learning_rate": 3.4131317702073744e-05, "loss": 0.1072, "step": 24830 }, { "epoch": 0.44288873827275, "grad_norm": 0.3064015805721283, "learning_rate": 3.4129868718380676e-05, "loss": 0.208, "step": 24831 }, { "epoch": 0.4429065743944637, "grad_norm": 0.24279336631298065, "learning_rate": 3.4128419699297074e-05, "loss": 0.1347, "step": 24832 }, { "epoch": 0.44292441051617737, "grad_norm": 0.2656693458557129, "learning_rate": 3.4126970644828576e-05, "loss": 0.1328, "step": 24833 }, { "epoch": 0.44294224663789106, "grad_norm": 0.42791885137557983, "learning_rate": 3.4125521554980786e-05, "loss": 0.1509, "step": 24834 }, { "epoch": 0.44296008275960475, "grad_norm": 0.3154014050960541, "learning_rate": 3.4124072429759314e-05, "loss": 0.1295, "step": 24835 }, { "epoch": 0.44297791888131843, "grad_norm": 0.24485813081264496, "learning_rate": 3.4122623269169785e-05, "loss": 0.1532, "step": 24836 }, { "epoch": 0.4429957550030321, "grad_norm": 0.20850135385990143, "learning_rate": 3.412117407321783e-05, "loss": 0.1637, "step": 24837 }, { "epoch": 0.4430135911247458, "grad_norm": 0.3197515904903412, "learning_rate": 3.411972484190904e-05, "loss": 0.1349, "step": 24838 }, { "epoch": 0.44303142724645955, "grad_norm": 0.27301672101020813, "learning_rate": 3.4118275575249056e-05, "loss": 0.1689, "step": 24839 }, { "epoch": 0.44304926336817324, "grad_norm": 0.3225424885749817, "learning_rate": 3.411682627324349e-05, "loss": 0.0943, "step": 24840 }, { "epoch": 0.44306709948988693, "grad_norm": 0.305266410112381, "learning_rate": 3.411537693589795e-05, "loss": 0.2364, "step": 24841 }, { "epoch": 0.4430849356116006, "grad_norm": 0.21491581201553345, "learning_rate": 3.411392756321806e-05, "loss": 0.1307, "step": 24842 }, { "epoch": 0.4431027717333143, "grad_norm": 0.24535910785198212, "learning_rate": 3.411247815520944e-05, "loss": 0.1714, "step": 24843 }, { "epoch": 0.443120607855028, "grad_norm": 0.21555083990097046, "learning_rate": 3.411102871187771e-05, "loss": 0.134, "step": 24844 }, { "epoch": 0.4431384439767417, "grad_norm": 0.1810872107744217, "learning_rate": 3.410957923322848e-05, "loss": 0.1564, "step": 24845 }, { "epoch": 0.44315628009845537, "grad_norm": 0.25940626859664917, "learning_rate": 3.410812971926738e-05, "loss": 0.1243, "step": 24846 }, { "epoch": 0.4431741162201691, "grad_norm": 0.2311011552810669, "learning_rate": 3.4106680170000016e-05, "loss": 0.1196, "step": 24847 }, { "epoch": 0.4431919523418828, "grad_norm": 0.23357993364334106, "learning_rate": 3.410523058543202e-05, "loss": 0.1662, "step": 24848 }, { "epoch": 0.4432097884635965, "grad_norm": 0.2707495391368866, "learning_rate": 3.410378096556901e-05, "loss": 0.1269, "step": 24849 }, { "epoch": 0.4432276245853102, "grad_norm": 0.3249301016330719, "learning_rate": 3.410233131041659e-05, "loss": 0.196, "step": 24850 }, { "epoch": 0.44324546070702386, "grad_norm": 0.2773958444595337, "learning_rate": 3.41008816199804e-05, "loss": 0.1936, "step": 24851 }, { "epoch": 0.44326329682873755, "grad_norm": 0.2752026617527008, "learning_rate": 3.4099431894266044e-05, "loss": 0.1576, "step": 24852 }, { "epoch": 0.44328113295045124, "grad_norm": 0.300048828125, "learning_rate": 3.4097982133279145e-05, "loss": 0.0947, "step": 24853 }, { "epoch": 0.44329896907216493, "grad_norm": 0.23399627208709717, "learning_rate": 3.409653233702533e-05, "loss": 0.1142, "step": 24854 }, { "epoch": 0.4433168051938786, "grad_norm": 0.22062677145004272, "learning_rate": 3.4095082505510214e-05, "loss": 0.0985, "step": 24855 }, { "epoch": 0.44333464131559236, "grad_norm": 0.19096003472805023, "learning_rate": 3.4093632638739417e-05, "loss": 0.1225, "step": 24856 }, { "epoch": 0.44335247743730605, "grad_norm": 0.30973878502845764, "learning_rate": 3.409218273671855e-05, "loss": 0.1645, "step": 24857 }, { "epoch": 0.44337031355901974, "grad_norm": 0.21248957514762878, "learning_rate": 3.409073279945325e-05, "loss": 0.136, "step": 24858 }, { "epoch": 0.4433881496807334, "grad_norm": 0.2656228542327881, "learning_rate": 3.408928282694913e-05, "loss": 0.1495, "step": 24859 }, { "epoch": 0.4434059858024471, "grad_norm": 0.37462103366851807, "learning_rate": 3.408783281921181e-05, "loss": 0.1434, "step": 24860 }, { "epoch": 0.4434238219241608, "grad_norm": 0.2554827630519867, "learning_rate": 3.408638277624691e-05, "loss": 0.1545, "step": 24861 }, { "epoch": 0.4434416580458745, "grad_norm": 0.23490700125694275, "learning_rate": 3.408493269806005e-05, "loss": 0.1533, "step": 24862 }, { "epoch": 0.4434594941675882, "grad_norm": 0.27587994933128357, "learning_rate": 3.408348258465686e-05, "loss": 0.1547, "step": 24863 }, { "epoch": 0.4434773302893019, "grad_norm": 0.3151644468307495, "learning_rate": 3.408203243604294e-05, "loss": 0.1477, "step": 24864 }, { "epoch": 0.4434951664110156, "grad_norm": 0.24536073207855225, "learning_rate": 3.408058225222394e-05, "loss": 0.1166, "step": 24865 }, { "epoch": 0.4435130025327293, "grad_norm": 0.26136505603790283, "learning_rate": 3.4079132033205465e-05, "loss": 0.1297, "step": 24866 }, { "epoch": 0.443530838654443, "grad_norm": 0.28272610902786255, "learning_rate": 3.407768177899314e-05, "loss": 0.1961, "step": 24867 }, { "epoch": 0.44354867477615667, "grad_norm": 0.26030322909355164, "learning_rate": 3.407623148959258e-05, "loss": 0.1252, "step": 24868 }, { "epoch": 0.44356651089787036, "grad_norm": 0.23360766470432281, "learning_rate": 3.407478116500941e-05, "loss": 0.1533, "step": 24869 }, { "epoch": 0.44358434701958405, "grad_norm": 0.28147628903388977, "learning_rate": 3.407333080524925e-05, "loss": 0.1158, "step": 24870 }, { "epoch": 0.44360218314129773, "grad_norm": 0.38535448908805847, "learning_rate": 3.4071880410317735e-05, "loss": 0.1673, "step": 24871 }, { "epoch": 0.4436200192630115, "grad_norm": 0.27549269795417786, "learning_rate": 3.407042998022047e-05, "loss": 0.1577, "step": 24872 }, { "epoch": 0.44363785538472517, "grad_norm": 0.27959156036376953, "learning_rate": 3.40689795149631e-05, "loss": 0.0812, "step": 24873 }, { "epoch": 0.44365569150643885, "grad_norm": 0.2094658613204956, "learning_rate": 3.4067529014551224e-05, "loss": 0.151, "step": 24874 }, { "epoch": 0.44367352762815254, "grad_norm": 0.2217167764902115, "learning_rate": 3.406607847899047e-05, "loss": 0.143, "step": 24875 }, { "epoch": 0.44369136374986623, "grad_norm": 0.4256473183631897, "learning_rate": 3.406462790828647e-05, "loss": 0.1912, "step": 24876 }, { "epoch": 0.4437091998715799, "grad_norm": 0.24542111158370972, "learning_rate": 3.406317730244484e-05, "loss": 0.1107, "step": 24877 }, { "epoch": 0.4437270359932936, "grad_norm": 0.19015569984912872, "learning_rate": 3.406172666147121e-05, "loss": 0.1179, "step": 24878 }, { "epoch": 0.4437448721150073, "grad_norm": 0.2770199179649353, "learning_rate": 3.406027598537118e-05, "loss": 0.1545, "step": 24879 }, { "epoch": 0.443762708236721, "grad_norm": 0.26866960525512695, "learning_rate": 3.405882527415041e-05, "loss": 0.0878, "step": 24880 }, { "epoch": 0.4437805443584347, "grad_norm": 0.2646898925304413, "learning_rate": 3.40573745278145e-05, "loss": 0.1561, "step": 24881 }, { "epoch": 0.4437983804801484, "grad_norm": 0.3976670205593109, "learning_rate": 3.4055923746369075e-05, "loss": 0.1609, "step": 24882 }, { "epoch": 0.4438162166018621, "grad_norm": 0.1968865841627121, "learning_rate": 3.4054472929819766e-05, "loss": 0.164, "step": 24883 }, { "epoch": 0.4438340527235758, "grad_norm": 0.2928166687488556, "learning_rate": 3.4053022078172184e-05, "loss": 0.1475, "step": 24884 }, { "epoch": 0.4438518888452895, "grad_norm": 0.24959976971149445, "learning_rate": 3.4051571191431965e-05, "loss": 0.1539, "step": 24885 }, { "epoch": 0.44386972496700317, "grad_norm": 0.279587060213089, "learning_rate": 3.405012026960473e-05, "loss": 0.1577, "step": 24886 }, { "epoch": 0.44388756108871685, "grad_norm": 0.25099626183509827, "learning_rate": 3.404866931269611e-05, "loss": 0.1628, "step": 24887 }, { "epoch": 0.44390539721043054, "grad_norm": 0.2726922333240509, "learning_rate": 3.404721832071171e-05, "loss": 0.1026, "step": 24888 }, { "epoch": 0.4439232333321443, "grad_norm": 0.3385123908519745, "learning_rate": 3.4045767293657176e-05, "loss": 0.1853, "step": 24889 }, { "epoch": 0.443941069453858, "grad_norm": 0.22980597615242004, "learning_rate": 3.404431623153812e-05, "loss": 0.1336, "step": 24890 }, { "epoch": 0.44395890557557166, "grad_norm": 0.2664623558521271, "learning_rate": 3.404286513436017e-05, "loss": 0.1474, "step": 24891 }, { "epoch": 0.44397674169728535, "grad_norm": 0.2742363512516022, "learning_rate": 3.4041414002128954e-05, "loss": 0.1343, "step": 24892 }, { "epoch": 0.44399457781899904, "grad_norm": 0.2724882662296295, "learning_rate": 3.4039962834850095e-05, "loss": 0.1556, "step": 24893 }, { "epoch": 0.4440124139407127, "grad_norm": 0.2719927132129669, "learning_rate": 3.403851163252921e-05, "loss": 0.1527, "step": 24894 }, { "epoch": 0.4440302500624264, "grad_norm": 0.30843448638916016, "learning_rate": 3.403706039517194e-05, "loss": 0.1121, "step": 24895 }, { "epoch": 0.4440480861841401, "grad_norm": 0.3314504325389862, "learning_rate": 3.4035609122783905e-05, "loss": 0.1513, "step": 24896 }, { "epoch": 0.4440659223058538, "grad_norm": 0.3164519667625427, "learning_rate": 3.403415781537073e-05, "loss": 0.1872, "step": 24897 }, { "epoch": 0.44408375842756753, "grad_norm": 0.334339439868927, "learning_rate": 3.403270647293803e-05, "loss": 0.1764, "step": 24898 }, { "epoch": 0.4441015945492812, "grad_norm": 0.2193564623594284, "learning_rate": 3.4031255095491436e-05, "loss": 0.1967, "step": 24899 }, { "epoch": 0.4441194306709949, "grad_norm": 0.15048037469387054, "learning_rate": 3.4029803683036587e-05, "loss": 0.0838, "step": 24900 }, { "epoch": 0.4441372667927086, "grad_norm": 0.2857602536678314, "learning_rate": 3.40283522355791e-05, "loss": 0.1668, "step": 24901 }, { "epoch": 0.4441551029144223, "grad_norm": 0.2837502360343933, "learning_rate": 3.4026900753124604e-05, "loss": 0.1393, "step": 24902 }, { "epoch": 0.44417293903613597, "grad_norm": 0.23231053352355957, "learning_rate": 3.4025449235678713e-05, "loss": 0.1218, "step": 24903 }, { "epoch": 0.44419077515784966, "grad_norm": 0.2585558593273163, "learning_rate": 3.4023997683247075e-05, "loss": 0.1392, "step": 24904 }, { "epoch": 0.44420861127956335, "grad_norm": 0.21564440429210663, "learning_rate": 3.4022546095835294e-05, "loss": 0.1288, "step": 24905 }, { "epoch": 0.4442264474012771, "grad_norm": 0.32804688811302185, "learning_rate": 3.4021094473449014e-05, "loss": 0.1699, "step": 24906 }, { "epoch": 0.4442442835229908, "grad_norm": 0.22931218147277832, "learning_rate": 3.401964281609385e-05, "loss": 0.1366, "step": 24907 }, { "epoch": 0.44426211964470447, "grad_norm": 0.2769996225833893, "learning_rate": 3.401819112377544e-05, "loss": 0.2147, "step": 24908 }, { "epoch": 0.44427995576641816, "grad_norm": 0.28523778915405273, "learning_rate": 3.401673939649942e-05, "loss": 0.1628, "step": 24909 }, { "epoch": 0.44429779188813184, "grad_norm": 0.21927498281002045, "learning_rate": 3.401528763427139e-05, "loss": 0.1415, "step": 24910 }, { "epoch": 0.44431562800984553, "grad_norm": 0.24998922646045685, "learning_rate": 3.401383583709699e-05, "loss": 0.1426, "step": 24911 }, { "epoch": 0.4443334641315592, "grad_norm": 0.3166537284851074, "learning_rate": 3.4012384004981844e-05, "loss": 0.1688, "step": 24912 }, { "epoch": 0.4443513002532729, "grad_norm": 0.2819408178329468, "learning_rate": 3.4010932137931595e-05, "loss": 0.1804, "step": 24913 }, { "epoch": 0.4443691363749866, "grad_norm": 0.2404019981622696, "learning_rate": 3.4009480235951855e-05, "loss": 0.1257, "step": 24914 }, { "epoch": 0.44438697249670034, "grad_norm": 0.3003208041191101, "learning_rate": 3.400802829904827e-05, "loss": 0.1461, "step": 24915 }, { "epoch": 0.444404808618414, "grad_norm": 0.440448135137558, "learning_rate": 3.4006576327226434e-05, "loss": 0.1942, "step": 24916 }, { "epoch": 0.4444226447401277, "grad_norm": 0.2392699122428894, "learning_rate": 3.400512432049202e-05, "loss": 0.1603, "step": 24917 }, { "epoch": 0.4444404808618414, "grad_norm": 0.24726980924606323, "learning_rate": 3.4003672278850617e-05, "loss": 0.1656, "step": 24918 }, { "epoch": 0.4444583169835551, "grad_norm": 0.2180936634540558, "learning_rate": 3.4002220202307876e-05, "loss": 0.1311, "step": 24919 }, { "epoch": 0.4444761531052688, "grad_norm": 0.20830442011356354, "learning_rate": 3.4000768090869424e-05, "loss": 0.1652, "step": 24920 }, { "epoch": 0.44449398922698247, "grad_norm": 0.3282788097858429, "learning_rate": 3.3999315944540886e-05, "loss": 0.1706, "step": 24921 }, { "epoch": 0.44451182534869615, "grad_norm": 0.460666686296463, "learning_rate": 3.399786376332789e-05, "loss": 0.1299, "step": 24922 }, { "epoch": 0.4445296614704099, "grad_norm": 0.2247573584318161, "learning_rate": 3.399641154723606e-05, "loss": 0.2017, "step": 24923 }, { "epoch": 0.4445474975921236, "grad_norm": 0.2084415704011917, "learning_rate": 3.399495929627105e-05, "loss": 0.1678, "step": 24924 }, { "epoch": 0.4445653337138373, "grad_norm": 0.25306323170661926, "learning_rate": 3.3993507010438446e-05, "loss": 0.1668, "step": 24925 }, { "epoch": 0.44458316983555096, "grad_norm": 0.20952977240085602, "learning_rate": 3.399205468974391e-05, "loss": 0.1446, "step": 24926 }, { "epoch": 0.44460100595726465, "grad_norm": 0.25841525197029114, "learning_rate": 3.399060233419307e-05, "loss": 0.1087, "step": 24927 }, { "epoch": 0.44461884207897834, "grad_norm": 0.29320597648620605, "learning_rate": 3.398914994379155e-05, "loss": 0.1652, "step": 24928 }, { "epoch": 0.444636678200692, "grad_norm": 0.34367331862449646, "learning_rate": 3.398769751854498e-05, "loss": 0.1338, "step": 24929 }, { "epoch": 0.4446545143224057, "grad_norm": 0.23877593874931335, "learning_rate": 3.3986245058458995e-05, "loss": 0.1274, "step": 24930 }, { "epoch": 0.44467235044411946, "grad_norm": 0.28455033898353577, "learning_rate": 3.398479256353921e-05, "loss": 0.1905, "step": 24931 }, { "epoch": 0.44469018656583315, "grad_norm": 0.21717683970928192, "learning_rate": 3.3983340033791275e-05, "loss": 0.1219, "step": 24932 }, { "epoch": 0.44470802268754683, "grad_norm": 0.22644302248954773, "learning_rate": 3.3981887469220805e-05, "loss": 0.1088, "step": 24933 }, { "epoch": 0.4447258588092605, "grad_norm": 0.2652421295642853, "learning_rate": 3.398043486983343e-05, "loss": 0.1327, "step": 24934 }, { "epoch": 0.4447436949309742, "grad_norm": 0.38841715455055237, "learning_rate": 3.3978982235634807e-05, "loss": 0.1773, "step": 24935 }, { "epoch": 0.4447615310526879, "grad_norm": 0.2776479125022888, "learning_rate": 3.397752956663053e-05, "loss": 0.1657, "step": 24936 }, { "epoch": 0.4447793671744016, "grad_norm": 0.28849300742149353, "learning_rate": 3.397607686282626e-05, "loss": 0.1866, "step": 24937 }, { "epoch": 0.4447972032961153, "grad_norm": 0.18944236636161804, "learning_rate": 3.3974624124227604e-05, "loss": 0.1538, "step": 24938 }, { "epoch": 0.44481503941782896, "grad_norm": 0.300076425075531, "learning_rate": 3.397317135084021e-05, "loss": 0.1031, "step": 24939 }, { "epoch": 0.4448328755395427, "grad_norm": 0.18755820393562317, "learning_rate": 3.3971718542669704e-05, "loss": 0.1048, "step": 24940 }, { "epoch": 0.4448507116612564, "grad_norm": 0.25192809104919434, "learning_rate": 3.397026569972172e-05, "loss": 0.1615, "step": 24941 }, { "epoch": 0.4448685477829701, "grad_norm": 0.2576552927494049, "learning_rate": 3.396881282200189e-05, "loss": 0.079, "step": 24942 }, { "epoch": 0.44488638390468377, "grad_norm": 0.2265574336051941, "learning_rate": 3.396735990951585e-05, "loss": 0.146, "step": 24943 }, { "epoch": 0.44490422002639746, "grad_norm": 0.41231682896614075, "learning_rate": 3.3965906962269214e-05, "loss": 0.1329, "step": 24944 }, { "epoch": 0.44492205614811114, "grad_norm": 0.2770184874534607, "learning_rate": 3.396445398026763e-05, "loss": 0.1263, "step": 24945 }, { "epoch": 0.44493989226982483, "grad_norm": 0.23502086102962494, "learning_rate": 3.396300096351672e-05, "loss": 0.1394, "step": 24946 }, { "epoch": 0.4449577283915385, "grad_norm": 0.35654568672180176, "learning_rate": 3.396154791202213e-05, "loss": 0.1386, "step": 24947 }, { "epoch": 0.44497556451325226, "grad_norm": 0.32461968064308167, "learning_rate": 3.396009482578949e-05, "loss": 0.1063, "step": 24948 }, { "epoch": 0.44499340063496595, "grad_norm": 0.2688353955745697, "learning_rate": 3.395864170482441e-05, "loss": 0.1411, "step": 24949 }, { "epoch": 0.44501123675667964, "grad_norm": 0.28183308243751526, "learning_rate": 3.395718854913256e-05, "loss": 0.1387, "step": 24950 }, { "epoch": 0.44502907287839333, "grad_norm": 0.3091309070587158, "learning_rate": 3.395573535871954e-05, "loss": 0.127, "step": 24951 }, { "epoch": 0.445046909000107, "grad_norm": 0.42708688974380493, "learning_rate": 3.3954282133591006e-05, "loss": 0.2512, "step": 24952 }, { "epoch": 0.4450647451218207, "grad_norm": 0.25196170806884766, "learning_rate": 3.3952828873752576e-05, "loss": 0.1353, "step": 24953 }, { "epoch": 0.4450825812435344, "grad_norm": 0.25167515873908997, "learning_rate": 3.395137557920989e-05, "loss": 0.161, "step": 24954 }, { "epoch": 0.4451004173652481, "grad_norm": 0.21120524406433105, "learning_rate": 3.3949922249968576e-05, "loss": 0.1502, "step": 24955 }, { "epoch": 0.44511825348696177, "grad_norm": 0.27936986088752747, "learning_rate": 3.394846888603428e-05, "loss": 0.147, "step": 24956 }, { "epoch": 0.4451360896086755, "grad_norm": 0.25990229845046997, "learning_rate": 3.394701548741262e-05, "loss": 0.1035, "step": 24957 }, { "epoch": 0.4451539257303892, "grad_norm": 0.2814280092716217, "learning_rate": 3.394556205410925e-05, "loss": 0.139, "step": 24958 }, { "epoch": 0.4451717618521029, "grad_norm": 0.23317204415798187, "learning_rate": 3.394410858612977e-05, "loss": 0.1793, "step": 24959 }, { "epoch": 0.4451895979738166, "grad_norm": 0.40178561210632324, "learning_rate": 3.394265508347986e-05, "loss": 0.0678, "step": 24960 }, { "epoch": 0.44520743409553026, "grad_norm": 0.2048632651567459, "learning_rate": 3.394120154616512e-05, "loss": 0.1408, "step": 24961 }, { "epoch": 0.44522527021724395, "grad_norm": 0.3062325716018677, "learning_rate": 3.393974797419119e-05, "loss": 0.1576, "step": 24962 }, { "epoch": 0.44524310633895764, "grad_norm": 0.2437918484210968, "learning_rate": 3.3938294367563724e-05, "loss": 0.1697, "step": 24963 }, { "epoch": 0.4452609424606713, "grad_norm": 0.22002071142196655, "learning_rate": 3.3936840726288326e-05, "loss": 0.0932, "step": 24964 }, { "epoch": 0.44527877858238507, "grad_norm": 0.203473299741745, "learning_rate": 3.393538705037066e-05, "loss": 0.1498, "step": 24965 }, { "epoch": 0.44529661470409876, "grad_norm": 0.21064774692058563, "learning_rate": 3.393393333981634e-05, "loss": 0.1189, "step": 24966 }, { "epoch": 0.44531445082581245, "grad_norm": 0.30560675263404846, "learning_rate": 3.3932479594631e-05, "loss": 0.1638, "step": 24967 }, { "epoch": 0.44533228694752613, "grad_norm": 0.25150179862976074, "learning_rate": 3.39310258148203e-05, "loss": 0.0789, "step": 24968 }, { "epoch": 0.4453501230692398, "grad_norm": 0.28300243616104126, "learning_rate": 3.392957200038985e-05, "loss": 0.1391, "step": 24969 }, { "epoch": 0.4453679591909535, "grad_norm": 0.3144187033176422, "learning_rate": 3.392811815134529e-05, "loss": 0.1409, "step": 24970 }, { "epoch": 0.4453857953126672, "grad_norm": 0.30045562982559204, "learning_rate": 3.392666426769228e-05, "loss": 0.1816, "step": 24971 }, { "epoch": 0.4454036314343809, "grad_norm": 0.31369584798812866, "learning_rate": 3.3925210349436414e-05, "loss": 0.169, "step": 24972 }, { "epoch": 0.44542146755609463, "grad_norm": 0.23182500898838043, "learning_rate": 3.392375639658336e-05, "loss": 0.1489, "step": 24973 }, { "epoch": 0.4454393036778083, "grad_norm": 0.3368208408355713, "learning_rate": 3.3922302409138744e-05, "loss": 0.1485, "step": 24974 }, { "epoch": 0.445457139799522, "grad_norm": 0.23210720717906952, "learning_rate": 3.39208483871082e-05, "loss": 0.1991, "step": 24975 }, { "epoch": 0.4454749759212357, "grad_norm": 0.26044806838035583, "learning_rate": 3.391939433049737e-05, "loss": 0.121, "step": 24976 }, { "epoch": 0.4454928120429494, "grad_norm": 0.2672989070415497, "learning_rate": 3.3917940239311885e-05, "loss": 0.1278, "step": 24977 }, { "epoch": 0.44551064816466307, "grad_norm": 0.2215174436569214, "learning_rate": 3.3916486113557385e-05, "loss": 0.1244, "step": 24978 }, { "epoch": 0.44552848428637676, "grad_norm": 0.38108932971954346, "learning_rate": 3.391503195323951e-05, "loss": 0.1635, "step": 24979 }, { "epoch": 0.44554632040809045, "grad_norm": 0.18883508443832397, "learning_rate": 3.391357775836388e-05, "loss": 0.1392, "step": 24980 }, { "epoch": 0.44556415652980413, "grad_norm": 0.24974606931209564, "learning_rate": 3.3912123528936154e-05, "loss": 0.1078, "step": 24981 }, { "epoch": 0.4455819926515179, "grad_norm": 0.3186878561973572, "learning_rate": 3.391066926496195e-05, "loss": 0.1234, "step": 24982 }, { "epoch": 0.44559982877323157, "grad_norm": 0.2335820198059082, "learning_rate": 3.3909214966446916e-05, "loss": 0.17, "step": 24983 }, { "epoch": 0.44561766489494525, "grad_norm": 0.25362586975097656, "learning_rate": 3.3907760633396694e-05, "loss": 0.0998, "step": 24984 }, { "epoch": 0.44563550101665894, "grad_norm": 0.5613580942153931, "learning_rate": 3.390630626581691e-05, "loss": 0.1224, "step": 24985 }, { "epoch": 0.44565333713837263, "grad_norm": 0.26306864619255066, "learning_rate": 3.390485186371321e-05, "loss": 0.108, "step": 24986 }, { "epoch": 0.4456711732600863, "grad_norm": 0.2483878880739212, "learning_rate": 3.3903397427091234e-05, "loss": 0.1604, "step": 24987 }, { "epoch": 0.4456890093818, "grad_norm": 0.1977320909500122, "learning_rate": 3.39019429559566e-05, "loss": 0.1613, "step": 24988 }, { "epoch": 0.4457068455035137, "grad_norm": 0.1874285489320755, "learning_rate": 3.390048845031497e-05, "loss": 0.1164, "step": 24989 }, { "epoch": 0.44572468162522744, "grad_norm": 0.28090229630470276, "learning_rate": 3.389903391017197e-05, "loss": 0.192, "step": 24990 }, { "epoch": 0.4457425177469411, "grad_norm": 0.20751237869262695, "learning_rate": 3.389757933553324e-05, "loss": 0.0752, "step": 24991 }, { "epoch": 0.4457603538686548, "grad_norm": 0.2270546853542328, "learning_rate": 3.389612472640442e-05, "loss": 0.0792, "step": 24992 }, { "epoch": 0.4457781899903685, "grad_norm": 0.42721545696258545, "learning_rate": 3.389467008279116e-05, "loss": 0.169, "step": 24993 }, { "epoch": 0.4457960261120822, "grad_norm": 0.34319695830345154, "learning_rate": 3.389321540469907e-05, "loss": 0.1316, "step": 24994 }, { "epoch": 0.4458138622337959, "grad_norm": 0.23150020837783813, "learning_rate": 3.3891760692133806e-05, "loss": 0.126, "step": 24995 }, { "epoch": 0.44583169835550956, "grad_norm": 0.25390133261680603, "learning_rate": 3.389030594510101e-05, "loss": 0.1752, "step": 24996 }, { "epoch": 0.44584953447722325, "grad_norm": 0.2629392445087433, "learning_rate": 3.3888851163606324e-05, "loss": 0.127, "step": 24997 }, { "epoch": 0.44586737059893694, "grad_norm": 0.2480379343032837, "learning_rate": 3.3887396347655375e-05, "loss": 0.15, "step": 24998 }, { "epoch": 0.4458852067206507, "grad_norm": 0.3128427565097809, "learning_rate": 3.388594149725381e-05, "loss": 0.1691, "step": 24999 }, { "epoch": 0.44590304284236437, "grad_norm": 0.2217392474412918, "learning_rate": 3.3884486612407266e-05, "loss": 0.1003, "step": 25000 }, { "epoch": 0.44590304284236437, "eval_loss": 0.1438295841217041, "eval_runtime": 107.275, "eval_samples_per_second": 9.546, "eval_steps_per_second": 1.594, "step": 25000 }, { "epoch": 0.44592087896407806, "grad_norm": 0.24391217529773712, "learning_rate": 3.388303169312138e-05, "loss": 0.1476, "step": 25001 }, { "epoch": 0.44593871508579175, "grad_norm": 0.25604453682899475, "learning_rate": 3.38815767394018e-05, "loss": 0.1046, "step": 25002 }, { "epoch": 0.44595655120750544, "grad_norm": 0.38589605689048767, "learning_rate": 3.388012175125416e-05, "loss": 0.1661, "step": 25003 }, { "epoch": 0.4459743873292191, "grad_norm": 0.29729077219963074, "learning_rate": 3.38786667286841e-05, "loss": 0.154, "step": 25004 }, { "epoch": 0.4459922234509328, "grad_norm": 0.335660845041275, "learning_rate": 3.387721167169726e-05, "loss": 0.178, "step": 25005 }, { "epoch": 0.4460100595726465, "grad_norm": 0.29932448267936707, "learning_rate": 3.387575658029928e-05, "loss": 0.14, "step": 25006 }, { "epoch": 0.44602789569436024, "grad_norm": 0.2991105318069458, "learning_rate": 3.387430145449581e-05, "loss": 0.1648, "step": 25007 }, { "epoch": 0.44604573181607393, "grad_norm": 0.24931123852729797, "learning_rate": 3.3872846294292474e-05, "loss": 0.127, "step": 25008 }, { "epoch": 0.4460635679377876, "grad_norm": 0.40050822496414185, "learning_rate": 3.387139109969493e-05, "loss": 0.161, "step": 25009 }, { "epoch": 0.4460814040595013, "grad_norm": 0.3428746163845062, "learning_rate": 3.3869935870708794e-05, "loss": 0.1141, "step": 25010 }, { "epoch": 0.446099240181215, "grad_norm": 0.26931509375572205, "learning_rate": 3.3868480607339735e-05, "loss": 0.1563, "step": 25011 }, { "epoch": 0.4461170763029287, "grad_norm": 0.21234270930290222, "learning_rate": 3.386702530959338e-05, "loss": 0.1497, "step": 25012 }, { "epoch": 0.44613491242464237, "grad_norm": 0.25246769189834595, "learning_rate": 3.3865569977475376e-05, "loss": 0.1534, "step": 25013 }, { "epoch": 0.44615274854635606, "grad_norm": 0.29366806149482727, "learning_rate": 3.386411461099136e-05, "loss": 0.213, "step": 25014 }, { "epoch": 0.44617058466806975, "grad_norm": 0.20416218042373657, "learning_rate": 3.3862659210146975e-05, "loss": 0.1333, "step": 25015 }, { "epoch": 0.4461884207897835, "grad_norm": 0.298424631357193, "learning_rate": 3.386120377494785e-05, "loss": 0.1739, "step": 25016 }, { "epoch": 0.4462062569114972, "grad_norm": 0.23990018665790558, "learning_rate": 3.3859748305399655e-05, "loss": 0.1378, "step": 25017 }, { "epoch": 0.44622409303321087, "grad_norm": 0.251436322927475, "learning_rate": 3.3858292801507995e-05, "loss": 0.1161, "step": 25018 }, { "epoch": 0.44624192915492455, "grad_norm": 0.29651856422424316, "learning_rate": 3.3856837263278554e-05, "loss": 0.2039, "step": 25019 }, { "epoch": 0.44625976527663824, "grad_norm": 0.24433746933937073, "learning_rate": 3.385538169071694e-05, "loss": 0.127, "step": 25020 }, { "epoch": 0.44627760139835193, "grad_norm": 0.15020044147968292, "learning_rate": 3.3853926083828814e-05, "loss": 0.1044, "step": 25021 }, { "epoch": 0.4462954375200656, "grad_norm": 0.32814761996269226, "learning_rate": 3.385247044261981e-05, "loss": 0.1287, "step": 25022 }, { "epoch": 0.4463132736417793, "grad_norm": 0.369859904050827, "learning_rate": 3.3851014767095565e-05, "loss": 0.2183, "step": 25023 }, { "epoch": 0.44633110976349305, "grad_norm": 0.2809942066669464, "learning_rate": 3.384955905726174e-05, "loss": 0.2118, "step": 25024 }, { "epoch": 0.44634894588520674, "grad_norm": 0.2394491732120514, "learning_rate": 3.384810331312397e-05, "loss": 0.1198, "step": 25025 }, { "epoch": 0.4463667820069204, "grad_norm": 0.21639138460159302, "learning_rate": 3.384664753468789e-05, "loss": 0.151, "step": 25026 }, { "epoch": 0.4463846181286341, "grad_norm": 0.2115909904241562, "learning_rate": 3.384519172195915e-05, "loss": 0.1117, "step": 25027 }, { "epoch": 0.4464024542503478, "grad_norm": 0.3918338418006897, "learning_rate": 3.384373587494339e-05, "loss": 0.2509, "step": 25028 }, { "epoch": 0.4464202903720615, "grad_norm": 0.20188231766223907, "learning_rate": 3.384227999364626e-05, "loss": 0.1303, "step": 25029 }, { "epoch": 0.4464381264937752, "grad_norm": 0.2243097722530365, "learning_rate": 3.3840824078073394e-05, "loss": 0.1593, "step": 25030 }, { "epoch": 0.44645596261548887, "grad_norm": 0.18480341136455536, "learning_rate": 3.383936812823044e-05, "loss": 0.1348, "step": 25031 }, { "epoch": 0.4464737987372026, "grad_norm": 0.23222771286964417, "learning_rate": 3.3837912144123045e-05, "loss": 0.1349, "step": 25032 }, { "epoch": 0.4464916348589163, "grad_norm": 0.3027746081352234, "learning_rate": 3.383645612575685e-05, "loss": 0.1282, "step": 25033 }, { "epoch": 0.44650947098063, "grad_norm": 0.2174845039844513, "learning_rate": 3.3835000073137504e-05, "loss": 0.139, "step": 25034 }, { "epoch": 0.4465273071023437, "grad_norm": 0.20630599558353424, "learning_rate": 3.3833543986270634e-05, "loss": 0.0871, "step": 25035 }, { "epoch": 0.44654514322405736, "grad_norm": 0.2154911458492279, "learning_rate": 3.383208786516191e-05, "loss": 0.1475, "step": 25036 }, { "epoch": 0.44656297934577105, "grad_norm": 0.344966858625412, "learning_rate": 3.383063170981695e-05, "loss": 0.2358, "step": 25037 }, { "epoch": 0.44658081546748474, "grad_norm": 0.25561490654945374, "learning_rate": 3.382917552024141e-05, "loss": 0.1566, "step": 25038 }, { "epoch": 0.4465986515891984, "grad_norm": 0.27829602360725403, "learning_rate": 3.382771929644095e-05, "loss": 0.1834, "step": 25039 }, { "epoch": 0.4466164877109121, "grad_norm": 0.3486580550670624, "learning_rate": 3.382626303842119e-05, "loss": 0.1629, "step": 25040 }, { "epoch": 0.44663432383262586, "grad_norm": 0.2230127602815628, "learning_rate": 3.38248067461878e-05, "loss": 0.1105, "step": 25041 }, { "epoch": 0.44665215995433954, "grad_norm": 0.2597845196723938, "learning_rate": 3.38233504197464e-05, "loss": 0.14, "step": 25042 }, { "epoch": 0.44666999607605323, "grad_norm": 0.30955132842063904, "learning_rate": 3.3821894059102645e-05, "loss": 0.2182, "step": 25043 }, { "epoch": 0.4466878321977669, "grad_norm": 0.26418086886405945, "learning_rate": 3.382043766426218e-05, "loss": 0.1485, "step": 25044 }, { "epoch": 0.4467056683194806, "grad_norm": 0.2648506462574005, "learning_rate": 3.3818981235230655e-05, "loss": 0.1206, "step": 25045 }, { "epoch": 0.4467235044411943, "grad_norm": 0.2646901607513428, "learning_rate": 3.381752477201372e-05, "loss": 0.1807, "step": 25046 }, { "epoch": 0.446741340562908, "grad_norm": 0.2794043719768524, "learning_rate": 3.381606827461701e-05, "loss": 0.1296, "step": 25047 }, { "epoch": 0.4467591766846217, "grad_norm": 0.5966060757637024, "learning_rate": 3.3814611743046165e-05, "loss": 0.167, "step": 25048 }, { "epoch": 0.4467770128063354, "grad_norm": 0.31819650530815125, "learning_rate": 3.3813155177306846e-05, "loss": 0.1813, "step": 25049 }, { "epoch": 0.4467948489280491, "grad_norm": 0.1923387199640274, "learning_rate": 3.3811698577404696e-05, "loss": 0.1214, "step": 25050 }, { "epoch": 0.4468126850497628, "grad_norm": 0.2962595820426941, "learning_rate": 3.381024194334535e-05, "loss": 0.1348, "step": 25051 }, { "epoch": 0.4468305211714765, "grad_norm": 0.2852820158004761, "learning_rate": 3.380878527513448e-05, "loss": 0.1186, "step": 25052 }, { "epoch": 0.44684835729319017, "grad_norm": 0.31666702032089233, "learning_rate": 3.3807328572777706e-05, "loss": 0.1882, "step": 25053 }, { "epoch": 0.44686619341490386, "grad_norm": 0.2562214434146881, "learning_rate": 3.380587183628069e-05, "loss": 0.1586, "step": 25054 }, { "epoch": 0.44688402953661754, "grad_norm": 0.25463801622390747, "learning_rate": 3.3804415065649064e-05, "loss": 0.1699, "step": 25055 }, { "epoch": 0.44690186565833123, "grad_norm": 0.260018527507782, "learning_rate": 3.380295826088849e-05, "loss": 0.1453, "step": 25056 }, { "epoch": 0.4469197017800449, "grad_norm": 0.2361421436071396, "learning_rate": 3.380150142200461e-05, "loss": 0.1589, "step": 25057 }, { "epoch": 0.44693753790175866, "grad_norm": 0.2917416989803314, "learning_rate": 3.3800044549003065e-05, "loss": 0.1161, "step": 25058 }, { "epoch": 0.44695537402347235, "grad_norm": 0.22058722376823425, "learning_rate": 3.379858764188951e-05, "loss": 0.1286, "step": 25059 }, { "epoch": 0.44697321014518604, "grad_norm": 0.17733772099018097, "learning_rate": 3.37971307006696e-05, "loss": 0.1376, "step": 25060 }, { "epoch": 0.4469910462668997, "grad_norm": 0.27901384234428406, "learning_rate": 3.379567372534896e-05, "loss": 0.0938, "step": 25061 }, { "epoch": 0.4470088823886134, "grad_norm": 0.284843772649765, "learning_rate": 3.379421671593326e-05, "loss": 0.1839, "step": 25062 }, { "epoch": 0.4470267185103271, "grad_norm": 0.2234141081571579, "learning_rate": 3.379275967242813e-05, "loss": 0.1673, "step": 25063 }, { "epoch": 0.4470445546320408, "grad_norm": 0.21907198429107666, "learning_rate": 3.3791302594839236e-05, "loss": 0.1233, "step": 25064 }, { "epoch": 0.4470623907537545, "grad_norm": 0.3250732123851776, "learning_rate": 3.3789845483172214e-05, "loss": 0.1535, "step": 25065 }, { "epoch": 0.4470802268754682, "grad_norm": 0.2301318347454071, "learning_rate": 3.378838833743271e-05, "loss": 0.1585, "step": 25066 }, { "epoch": 0.4470980629971819, "grad_norm": 0.22827045619487762, "learning_rate": 3.378693115762638e-05, "loss": 0.1443, "step": 25067 }, { "epoch": 0.4471158991188956, "grad_norm": 0.3583378195762634, "learning_rate": 3.378547394375887e-05, "loss": 0.1579, "step": 25068 }, { "epoch": 0.4471337352406093, "grad_norm": 0.19095134735107422, "learning_rate": 3.378401669583583e-05, "loss": 0.1267, "step": 25069 }, { "epoch": 0.447151571362323, "grad_norm": 0.27327707409858704, "learning_rate": 3.37825594138629e-05, "loss": 0.1753, "step": 25070 }, { "epoch": 0.44716940748403666, "grad_norm": 0.21191474795341492, "learning_rate": 3.378110209784574e-05, "loss": 0.1572, "step": 25071 }, { "epoch": 0.44718724360575035, "grad_norm": 0.2567843496799469, "learning_rate": 3.377964474779e-05, "loss": 0.1282, "step": 25072 }, { "epoch": 0.44720507972746404, "grad_norm": 0.2781575620174408, "learning_rate": 3.3778187363701323e-05, "loss": 0.2073, "step": 25073 }, { "epoch": 0.4472229158491778, "grad_norm": 0.30490902066230774, "learning_rate": 3.3776729945585364e-05, "loss": 0.2047, "step": 25074 }, { "epoch": 0.44724075197089147, "grad_norm": 0.22402484714984894, "learning_rate": 3.377527249344776e-05, "loss": 0.1766, "step": 25075 }, { "epoch": 0.44725858809260516, "grad_norm": 0.23545986413955688, "learning_rate": 3.377381500729417e-05, "loss": 0.1764, "step": 25076 }, { "epoch": 0.44727642421431885, "grad_norm": 0.21871119737625122, "learning_rate": 3.3772357487130245e-05, "loss": 0.1424, "step": 25077 }, { "epoch": 0.44729426033603253, "grad_norm": 0.27139389514923096, "learning_rate": 3.377089993296164e-05, "loss": 0.1333, "step": 25078 }, { "epoch": 0.4473120964577462, "grad_norm": 0.2520368993282318, "learning_rate": 3.376944234479398e-05, "loss": 0.1648, "step": 25079 }, { "epoch": 0.4473299325794599, "grad_norm": 0.2421381175518036, "learning_rate": 3.376798472263295e-05, "loss": 0.1336, "step": 25080 }, { "epoch": 0.4473477687011736, "grad_norm": 0.3012990355491638, "learning_rate": 3.376652706648417e-05, "loss": 0.1724, "step": 25081 }, { "epoch": 0.4473656048228873, "grad_norm": 0.26099875569343567, "learning_rate": 3.3765069376353315e-05, "loss": 0.0883, "step": 25082 }, { "epoch": 0.44738344094460103, "grad_norm": 0.22884108126163483, "learning_rate": 3.376361165224601e-05, "loss": 0.1318, "step": 25083 }, { "epoch": 0.4474012770663147, "grad_norm": 0.3147015869617462, "learning_rate": 3.376215389416794e-05, "loss": 0.1369, "step": 25084 }, { "epoch": 0.4474191131880284, "grad_norm": 0.2578750550746918, "learning_rate": 3.376069610212471e-05, "loss": 0.1263, "step": 25085 }, { "epoch": 0.4474369493097421, "grad_norm": 0.2533610165119171, "learning_rate": 3.375923827612201e-05, "loss": 0.1963, "step": 25086 }, { "epoch": 0.4474547854314558, "grad_norm": 0.2715403437614441, "learning_rate": 3.375778041616548e-05, "loss": 0.1646, "step": 25087 }, { "epoch": 0.44747262155316947, "grad_norm": 0.4554165005683899, "learning_rate": 3.375632252226076e-05, "loss": 0.1114, "step": 25088 }, { "epoch": 0.44749045767488316, "grad_norm": 0.27990132570266724, "learning_rate": 3.375486459441351e-05, "loss": 0.1508, "step": 25089 }, { "epoch": 0.44750829379659685, "grad_norm": 0.28010523319244385, "learning_rate": 3.375340663262939e-05, "loss": 0.1291, "step": 25090 }, { "epoch": 0.4475261299183106, "grad_norm": 0.2398664802312851, "learning_rate": 3.3751948636914033e-05, "loss": 0.1386, "step": 25091 }, { "epoch": 0.4475439660400243, "grad_norm": 0.2075163871049881, "learning_rate": 3.37504906072731e-05, "loss": 0.1358, "step": 25092 }, { "epoch": 0.44756180216173796, "grad_norm": 0.198046013712883, "learning_rate": 3.374903254371225e-05, "loss": 0.1533, "step": 25093 }, { "epoch": 0.44757963828345165, "grad_norm": 0.32511812448501587, "learning_rate": 3.374757444623712e-05, "loss": 0.1015, "step": 25094 }, { "epoch": 0.44759747440516534, "grad_norm": 0.2876203656196594, "learning_rate": 3.3746116314853384e-05, "loss": 0.1314, "step": 25095 }, { "epoch": 0.44761531052687903, "grad_norm": 0.2246880978345871, "learning_rate": 3.3744658149566666e-05, "loss": 0.1247, "step": 25096 }, { "epoch": 0.4476331466485927, "grad_norm": 0.4224717319011688, "learning_rate": 3.3743199950382645e-05, "loss": 0.1575, "step": 25097 }, { "epoch": 0.4476509827703064, "grad_norm": 0.2676776349544525, "learning_rate": 3.374174171730695e-05, "loss": 0.1273, "step": 25098 }, { "epoch": 0.4476688188920201, "grad_norm": 0.35073116421699524, "learning_rate": 3.374028345034525e-05, "loss": 0.1305, "step": 25099 }, { "epoch": 0.44768665501373384, "grad_norm": 0.3417600095272064, "learning_rate": 3.373882514950319e-05, "loss": 0.1209, "step": 25100 }, { "epoch": 0.4477044911354475, "grad_norm": 0.26098814606666565, "learning_rate": 3.373736681478643e-05, "loss": 0.1647, "step": 25101 }, { "epoch": 0.4477223272571612, "grad_norm": 0.2984170913696289, "learning_rate": 3.373590844620062e-05, "loss": 0.1723, "step": 25102 }, { "epoch": 0.4477401633788749, "grad_norm": 0.2330995351076126, "learning_rate": 3.3734450043751403e-05, "loss": 0.1413, "step": 25103 }, { "epoch": 0.4477579995005886, "grad_norm": 0.26513171195983887, "learning_rate": 3.373299160744444e-05, "loss": 0.1861, "step": 25104 }, { "epoch": 0.4477758356223023, "grad_norm": 0.22102831304073334, "learning_rate": 3.37315331372854e-05, "loss": 0.1541, "step": 25105 }, { "epoch": 0.44779367174401596, "grad_norm": 0.2039075344800949, "learning_rate": 3.373007463327991e-05, "loss": 0.1405, "step": 25106 }, { "epoch": 0.44781150786572965, "grad_norm": 0.26761913299560547, "learning_rate": 3.372861609543363e-05, "loss": 0.1338, "step": 25107 }, { "epoch": 0.4478293439874434, "grad_norm": 0.24937881529331207, "learning_rate": 3.372715752375223e-05, "loss": 0.1348, "step": 25108 }, { "epoch": 0.4478471801091571, "grad_norm": 0.2639763355255127, "learning_rate": 3.372569891824135e-05, "loss": 0.1474, "step": 25109 }, { "epoch": 0.44786501623087077, "grad_norm": 0.40596500039100647, "learning_rate": 3.3724240278906646e-05, "loss": 0.1187, "step": 25110 }, { "epoch": 0.44788285235258446, "grad_norm": 0.23569276928901672, "learning_rate": 3.372278160575377e-05, "loss": 0.1687, "step": 25111 }, { "epoch": 0.44790068847429815, "grad_norm": 0.29724204540252686, "learning_rate": 3.3721322898788394e-05, "loss": 0.1677, "step": 25112 }, { "epoch": 0.44791852459601184, "grad_norm": 0.2467195838689804, "learning_rate": 3.371986415801615e-05, "loss": 0.1944, "step": 25113 }, { "epoch": 0.4479363607177255, "grad_norm": 0.41708070039749146, "learning_rate": 3.3718405383442694e-05, "loss": 0.1971, "step": 25114 }, { "epoch": 0.4479541968394392, "grad_norm": 0.23805159330368042, "learning_rate": 3.371694657507369e-05, "loss": 0.1195, "step": 25115 }, { "epoch": 0.4479720329611529, "grad_norm": 0.2947307527065277, "learning_rate": 3.37154877329148e-05, "loss": 0.1794, "step": 25116 }, { "epoch": 0.44798986908286664, "grad_norm": 0.34023165702819824, "learning_rate": 3.371402885697166e-05, "loss": 0.1806, "step": 25117 }, { "epoch": 0.44800770520458033, "grad_norm": 0.2649897038936615, "learning_rate": 3.371256994724994e-05, "loss": 0.1659, "step": 25118 }, { "epoch": 0.448025541326294, "grad_norm": 0.2599446773529053, "learning_rate": 3.371111100375528e-05, "loss": 0.1653, "step": 25119 }, { "epoch": 0.4480433774480077, "grad_norm": 0.29039278626441956, "learning_rate": 3.370965202649335e-05, "loss": 0.1642, "step": 25120 }, { "epoch": 0.4480612135697214, "grad_norm": 0.2501363158226013, "learning_rate": 3.37081930154698e-05, "loss": 0.1195, "step": 25121 }, { "epoch": 0.4480790496914351, "grad_norm": 0.38731157779693604, "learning_rate": 3.370673397069029e-05, "loss": 0.247, "step": 25122 }, { "epoch": 0.44809688581314877, "grad_norm": 0.3547469973564148, "learning_rate": 3.370527489216048e-05, "loss": 0.1358, "step": 25123 }, { "epoch": 0.44811472193486246, "grad_norm": 0.28627923130989075, "learning_rate": 3.3703815779886e-05, "loss": 0.1557, "step": 25124 }, { "epoch": 0.4481325580565762, "grad_norm": 0.2766645848751068, "learning_rate": 3.3702356633872536e-05, "loss": 0.144, "step": 25125 }, { "epoch": 0.4481503941782899, "grad_norm": 0.23358498513698578, "learning_rate": 3.370089745412572e-05, "loss": 0.121, "step": 25126 }, { "epoch": 0.4481682303000036, "grad_norm": 0.2869694232940674, "learning_rate": 3.369943824065123e-05, "loss": 0.1304, "step": 25127 }, { "epoch": 0.44818606642171727, "grad_norm": 0.13642704486846924, "learning_rate": 3.369797899345471e-05, "loss": 0.1054, "step": 25128 }, { "epoch": 0.44820390254343095, "grad_norm": 0.25332969427108765, "learning_rate": 3.3696519712541825e-05, "loss": 0.133, "step": 25129 }, { "epoch": 0.44822173866514464, "grad_norm": 0.20523595809936523, "learning_rate": 3.3695060397918216e-05, "loss": 0.13, "step": 25130 }, { "epoch": 0.44823957478685833, "grad_norm": 0.2310948371887207, "learning_rate": 3.369360104958956e-05, "loss": 0.1487, "step": 25131 }, { "epoch": 0.448257410908572, "grad_norm": 0.2618043124675751, "learning_rate": 3.36921416675615e-05, "loss": 0.1664, "step": 25132 }, { "epoch": 0.44827524703028576, "grad_norm": 0.30424538254737854, "learning_rate": 3.369068225183969e-05, "loss": 0.1566, "step": 25133 }, { "epoch": 0.44829308315199945, "grad_norm": 0.29187119007110596, "learning_rate": 3.36892228024298e-05, "loss": 0.1664, "step": 25134 }, { "epoch": 0.44831091927371314, "grad_norm": 0.2773895561695099, "learning_rate": 3.368776331933748e-05, "loss": 0.1563, "step": 25135 }, { "epoch": 0.4483287553954268, "grad_norm": 0.2607872784137726, "learning_rate": 3.3686303802568384e-05, "loss": 0.1919, "step": 25136 }, { "epoch": 0.4483465915171405, "grad_norm": 0.20376341044902802, "learning_rate": 3.3684844252128186e-05, "loss": 0.1341, "step": 25137 }, { "epoch": 0.4483644276388542, "grad_norm": 0.31892910599708557, "learning_rate": 3.368338466802252e-05, "loss": 0.172, "step": 25138 }, { "epoch": 0.4483822637605679, "grad_norm": 0.3441879153251648, "learning_rate": 3.368192505025706e-05, "loss": 0.1556, "step": 25139 }, { "epoch": 0.4484000998822816, "grad_norm": 0.3495695888996124, "learning_rate": 3.3680465398837465e-05, "loss": 0.2275, "step": 25140 }, { "epoch": 0.44841793600399527, "grad_norm": 0.5058633685112, "learning_rate": 3.367900571376938e-05, "loss": 0.1151, "step": 25141 }, { "epoch": 0.448435772125709, "grad_norm": 0.27564477920532227, "learning_rate": 3.367754599505848e-05, "loss": 0.1348, "step": 25142 }, { "epoch": 0.4484536082474227, "grad_norm": 0.2550720274448395, "learning_rate": 3.36760862427104e-05, "loss": 0.1876, "step": 25143 }, { "epoch": 0.4484714443691364, "grad_norm": 0.2571973502635956, "learning_rate": 3.3674626456730826e-05, "loss": 0.1695, "step": 25144 }, { "epoch": 0.4484892804908501, "grad_norm": 0.20841741561889648, "learning_rate": 3.367316663712541e-05, "loss": 0.1759, "step": 25145 }, { "epoch": 0.44850711661256376, "grad_norm": 0.3069261908531189, "learning_rate": 3.3671706783899795e-05, "loss": 0.1077, "step": 25146 }, { "epoch": 0.44852495273427745, "grad_norm": 0.37278303503990173, "learning_rate": 3.367024689705965e-05, "loss": 0.1035, "step": 25147 }, { "epoch": 0.44854278885599114, "grad_norm": 0.2994999587535858, "learning_rate": 3.3668786976610625e-05, "loss": 0.1421, "step": 25148 }, { "epoch": 0.4485606249777048, "grad_norm": 0.3582760691642761, "learning_rate": 3.36673270225584e-05, "loss": 0.1724, "step": 25149 }, { "epoch": 0.44857846109941857, "grad_norm": 0.21472257375717163, "learning_rate": 3.3665867034908615e-05, "loss": 0.1869, "step": 25150 }, { "epoch": 0.44859629722113226, "grad_norm": 0.23597067594528198, "learning_rate": 3.3664407013666946e-05, "loss": 0.1445, "step": 25151 }, { "epoch": 0.44861413334284594, "grad_norm": 0.24522598087787628, "learning_rate": 3.366294695883903e-05, "loss": 0.1681, "step": 25152 }, { "epoch": 0.44863196946455963, "grad_norm": 0.25341346859931946, "learning_rate": 3.366148687043055e-05, "loss": 0.1566, "step": 25153 }, { "epoch": 0.4486498055862733, "grad_norm": 0.3028821647167206, "learning_rate": 3.3660026748447146e-05, "loss": 0.1463, "step": 25154 }, { "epoch": 0.448667641707987, "grad_norm": 0.29849863052368164, "learning_rate": 3.365856659289449e-05, "loss": 0.1714, "step": 25155 }, { "epoch": 0.4486854778297007, "grad_norm": 0.5199018716812134, "learning_rate": 3.365710640377824e-05, "loss": 0.1567, "step": 25156 }, { "epoch": 0.4487033139514144, "grad_norm": 0.27346059679985046, "learning_rate": 3.3655646181104056e-05, "loss": 0.1868, "step": 25157 }, { "epoch": 0.44872115007312807, "grad_norm": 0.2995662987232208, "learning_rate": 3.36541859248776e-05, "loss": 0.1333, "step": 25158 }, { "epoch": 0.4487389861948418, "grad_norm": 0.22186905145645142, "learning_rate": 3.365272563510453e-05, "loss": 0.1153, "step": 25159 }, { "epoch": 0.4487568223165555, "grad_norm": 0.26111921668052673, "learning_rate": 3.365126531179051e-05, "loss": 0.1798, "step": 25160 }, { "epoch": 0.4487746584382692, "grad_norm": 0.28916266560554504, "learning_rate": 3.3649804954941186e-05, "loss": 0.1668, "step": 25161 }, { "epoch": 0.4487924945599829, "grad_norm": 0.21810342371463776, "learning_rate": 3.364834456456224e-05, "loss": 0.1619, "step": 25162 }, { "epoch": 0.44881033068169657, "grad_norm": 0.32179510593414307, "learning_rate": 3.3646884140659315e-05, "loss": 0.2116, "step": 25163 }, { "epoch": 0.44882816680341026, "grad_norm": 0.22521504759788513, "learning_rate": 3.364542368323809e-05, "loss": 0.072, "step": 25164 }, { "epoch": 0.44884600292512394, "grad_norm": 0.22853060066699982, "learning_rate": 3.364396319230421e-05, "loss": 0.1747, "step": 25165 }, { "epoch": 0.44886383904683763, "grad_norm": 0.22562773525714874, "learning_rate": 3.364250266786335e-05, "loss": 0.1329, "step": 25166 }, { "epoch": 0.4488816751685514, "grad_norm": 0.32169923186302185, "learning_rate": 3.364104210992116e-05, "loss": 0.1501, "step": 25167 }, { "epoch": 0.44889951129026506, "grad_norm": 0.3001664876937866, "learning_rate": 3.363958151848331e-05, "loss": 0.0976, "step": 25168 }, { "epoch": 0.44891734741197875, "grad_norm": 0.26077699661254883, "learning_rate": 3.363812089355545e-05, "loss": 0.1592, "step": 25169 }, { "epoch": 0.44893518353369244, "grad_norm": 0.26564738154411316, "learning_rate": 3.363666023514326e-05, "loss": 0.1729, "step": 25170 }, { "epoch": 0.4489530196554061, "grad_norm": 0.2562617063522339, "learning_rate": 3.363519954325239e-05, "loss": 0.1463, "step": 25171 }, { "epoch": 0.4489708557771198, "grad_norm": 0.2161594182252884, "learning_rate": 3.3633738817888495e-05, "loss": 0.0988, "step": 25172 }, { "epoch": 0.4489886918988335, "grad_norm": 0.4524584412574768, "learning_rate": 3.363227805905725e-05, "loss": 0.2153, "step": 25173 }, { "epoch": 0.4490065280205472, "grad_norm": 0.36006593704223633, "learning_rate": 3.363081726676432e-05, "loss": 0.18, "step": 25174 }, { "epoch": 0.44902436414226093, "grad_norm": 0.31347933411598206, "learning_rate": 3.362935644101536e-05, "loss": 0.152, "step": 25175 }, { "epoch": 0.4490422002639746, "grad_norm": 0.2436746209859848, "learning_rate": 3.3627895581816025e-05, "loss": 0.1652, "step": 25176 }, { "epoch": 0.4490600363856883, "grad_norm": 0.34364891052246094, "learning_rate": 3.3626434689172e-05, "loss": 0.1097, "step": 25177 }, { "epoch": 0.449077872507402, "grad_norm": 0.2105472981929779, "learning_rate": 3.362497376308892e-05, "loss": 0.1337, "step": 25178 }, { "epoch": 0.4490957086291157, "grad_norm": 0.2542533576488495, "learning_rate": 3.362351280357248e-05, "loss": 0.1936, "step": 25179 }, { "epoch": 0.4491135447508294, "grad_norm": 0.25485458970069885, "learning_rate": 3.362205181062831e-05, "loss": 0.1562, "step": 25180 }, { "epoch": 0.44913138087254306, "grad_norm": 0.3123902678489685, "learning_rate": 3.36205907842621e-05, "loss": 0.1603, "step": 25181 }, { "epoch": 0.44914921699425675, "grad_norm": 0.25344014167785645, "learning_rate": 3.3619129724479495e-05, "loss": 0.1416, "step": 25182 }, { "epoch": 0.44916705311597044, "grad_norm": 0.25669312477111816, "learning_rate": 3.361766863128617e-05, "loss": 0.1422, "step": 25183 }, { "epoch": 0.4491848892376842, "grad_norm": 0.28137296438217163, "learning_rate": 3.361620750468779e-05, "loss": 0.1775, "step": 25184 }, { "epoch": 0.44920272535939787, "grad_norm": 0.30618470907211304, "learning_rate": 3.361474634469e-05, "loss": 0.1601, "step": 25185 }, { "epoch": 0.44922056148111156, "grad_norm": 0.2281280755996704, "learning_rate": 3.361328515129849e-05, "loss": 0.1266, "step": 25186 }, { "epoch": 0.44923839760282525, "grad_norm": 0.2970804274082184, "learning_rate": 3.361182392451891e-05, "loss": 0.1932, "step": 25187 }, { "epoch": 0.44925623372453893, "grad_norm": 0.22811521589756012, "learning_rate": 3.3610362664356925e-05, "loss": 0.1474, "step": 25188 }, { "epoch": 0.4492740698462526, "grad_norm": 0.3088405430316925, "learning_rate": 3.3608901370818196e-05, "loss": 0.1466, "step": 25189 }, { "epoch": 0.4492919059679663, "grad_norm": 0.32938724756240845, "learning_rate": 3.3607440043908395e-05, "loss": 0.1593, "step": 25190 }, { "epoch": 0.44930974208968, "grad_norm": 0.2268843799829483, "learning_rate": 3.360597868363318e-05, "loss": 0.1707, "step": 25191 }, { "epoch": 0.44932757821139374, "grad_norm": 0.23305165767669678, "learning_rate": 3.360451728999823e-05, "loss": 0.1421, "step": 25192 }, { "epoch": 0.44934541433310743, "grad_norm": 0.28758129477500916, "learning_rate": 3.360305586300919e-05, "loss": 0.1548, "step": 25193 }, { "epoch": 0.4493632504548211, "grad_norm": 0.21277910470962524, "learning_rate": 3.3601594402671735e-05, "loss": 0.1202, "step": 25194 }, { "epoch": 0.4493810865765348, "grad_norm": 0.2662602663040161, "learning_rate": 3.360013290899153e-05, "loss": 0.1862, "step": 25195 }, { "epoch": 0.4493989226982485, "grad_norm": 0.2004675716161728, "learning_rate": 3.359867138197424e-05, "loss": 0.1165, "step": 25196 }, { "epoch": 0.4494167588199622, "grad_norm": 0.22816108167171478, "learning_rate": 3.359720982162553e-05, "loss": 0.1877, "step": 25197 }, { "epoch": 0.44943459494167587, "grad_norm": 0.24822060763835907, "learning_rate": 3.359574822795106e-05, "loss": 0.1655, "step": 25198 }, { "epoch": 0.44945243106338956, "grad_norm": 0.303446501493454, "learning_rate": 3.3594286600956506e-05, "loss": 0.1152, "step": 25199 }, { "epoch": 0.44947026718510324, "grad_norm": 0.2870773673057556, "learning_rate": 3.359282494064753e-05, "loss": 0.1713, "step": 25200 }, { "epoch": 0.449488103306817, "grad_norm": 0.3076757490634918, "learning_rate": 3.35913632470298e-05, "loss": 0.1499, "step": 25201 }, { "epoch": 0.4495059394285307, "grad_norm": 0.2821045517921448, "learning_rate": 3.358990152010897e-05, "loss": 0.181, "step": 25202 }, { "epoch": 0.44952377555024436, "grad_norm": 0.2755041718482971, "learning_rate": 3.358843975989072e-05, "loss": 0.1975, "step": 25203 }, { "epoch": 0.44954161167195805, "grad_norm": 0.18216709792613983, "learning_rate": 3.358697796638071e-05, "loss": 0.1075, "step": 25204 }, { "epoch": 0.44955944779367174, "grad_norm": 0.3075617253780365, "learning_rate": 3.358551613958461e-05, "loss": 0.1647, "step": 25205 }, { "epoch": 0.44957728391538543, "grad_norm": 0.2305593639612198, "learning_rate": 3.3584054279508076e-05, "loss": 0.1173, "step": 25206 }, { "epoch": 0.4495951200370991, "grad_norm": 0.2448464035987854, "learning_rate": 3.358259238615679e-05, "loss": 0.1632, "step": 25207 }, { "epoch": 0.4496129561588128, "grad_norm": 0.19121719896793365, "learning_rate": 3.3581130459536406e-05, "loss": 0.1178, "step": 25208 }, { "epoch": 0.44963079228052655, "grad_norm": 0.2409624606370926, "learning_rate": 3.3579668499652605e-05, "loss": 0.145, "step": 25209 }, { "epoch": 0.44964862840224024, "grad_norm": 0.22802075743675232, "learning_rate": 3.357820650651104e-05, "loss": 0.1153, "step": 25210 }, { "epoch": 0.4496664645239539, "grad_norm": 0.2209000289440155, "learning_rate": 3.3576744480117384e-05, "loss": 0.1347, "step": 25211 }, { "epoch": 0.4496843006456676, "grad_norm": 0.34943556785583496, "learning_rate": 3.357528242047731e-05, "loss": 0.1787, "step": 25212 }, { "epoch": 0.4497021367673813, "grad_norm": 0.20893093943595886, "learning_rate": 3.357382032759647e-05, "loss": 0.1175, "step": 25213 }, { "epoch": 0.449719972889095, "grad_norm": 0.2644076943397522, "learning_rate": 3.3572358201480545e-05, "loss": 0.1645, "step": 25214 }, { "epoch": 0.4497378090108087, "grad_norm": 0.25215739011764526, "learning_rate": 3.35708960421352e-05, "loss": 0.127, "step": 25215 }, { "epoch": 0.44975564513252236, "grad_norm": 0.20632304251194, "learning_rate": 3.3569433849566105e-05, "loss": 0.0626, "step": 25216 }, { "epoch": 0.44977348125423605, "grad_norm": 0.21374443173408508, "learning_rate": 3.356797162377892e-05, "loss": 0.1094, "step": 25217 }, { "epoch": 0.4497913173759498, "grad_norm": 0.22069582343101501, "learning_rate": 3.3566509364779315e-05, "loss": 0.1199, "step": 25218 }, { "epoch": 0.4498091534976635, "grad_norm": 0.25240421295166016, "learning_rate": 3.3565047072572966e-05, "loss": 0.1301, "step": 25219 }, { "epoch": 0.44982698961937717, "grad_norm": 0.20521599054336548, "learning_rate": 3.3563584747165535e-05, "loss": 0.126, "step": 25220 }, { "epoch": 0.44984482574109086, "grad_norm": 0.4605122208595276, "learning_rate": 3.356212238856269e-05, "loss": 0.1837, "step": 25221 }, { "epoch": 0.44986266186280455, "grad_norm": 0.22142378985881805, "learning_rate": 3.3560659996770103e-05, "loss": 0.1468, "step": 25222 }, { "epoch": 0.44988049798451824, "grad_norm": 0.2981818914413452, "learning_rate": 3.355919757179344e-05, "loss": 0.1744, "step": 25223 }, { "epoch": 0.4498983341062319, "grad_norm": 0.2656365931034088, "learning_rate": 3.3557735113638375e-05, "loss": 0.1332, "step": 25224 }, { "epoch": 0.4499161702279456, "grad_norm": 0.2899135947227478, "learning_rate": 3.355627262231057e-05, "loss": 0.1583, "step": 25225 }, { "epoch": 0.44993400634965935, "grad_norm": 0.28080734610557556, "learning_rate": 3.355481009781569e-05, "loss": 0.1788, "step": 25226 }, { "epoch": 0.44995184247137304, "grad_norm": 0.25094074010849, "learning_rate": 3.355334754015943e-05, "loss": 0.1913, "step": 25227 }, { "epoch": 0.44996967859308673, "grad_norm": 0.2630382776260376, "learning_rate": 3.3551884949347415e-05, "loss": 0.1594, "step": 25228 }, { "epoch": 0.4499875147148004, "grad_norm": 0.2984447479248047, "learning_rate": 3.355042232538536e-05, "loss": 0.1683, "step": 25229 }, { "epoch": 0.4500053508365141, "grad_norm": 0.22763073444366455, "learning_rate": 3.35489596682789e-05, "loss": 0.1868, "step": 25230 }, { "epoch": 0.4500231869582278, "grad_norm": 0.20442494750022888, "learning_rate": 3.3547496978033734e-05, "loss": 0.124, "step": 25231 }, { "epoch": 0.4500410230799415, "grad_norm": 0.243928924202919, "learning_rate": 3.354603425465551e-05, "loss": 0.1297, "step": 25232 }, { "epoch": 0.45005885920165517, "grad_norm": 0.2313058078289032, "learning_rate": 3.354457149814991e-05, "loss": 0.1001, "step": 25233 }, { "epoch": 0.4500766953233689, "grad_norm": 0.34878385066986084, "learning_rate": 3.35431087085226e-05, "loss": 0.13, "step": 25234 }, { "epoch": 0.4500945314450826, "grad_norm": 0.2583732008934021, "learning_rate": 3.354164588577925e-05, "loss": 0.1457, "step": 25235 }, { "epoch": 0.4501123675667963, "grad_norm": 0.27169570326805115, "learning_rate": 3.3540183029925526e-05, "loss": 0.1823, "step": 25236 }, { "epoch": 0.45013020368851, "grad_norm": 0.2585698068141937, "learning_rate": 3.3538720140967105e-05, "loss": 0.125, "step": 25237 }, { "epoch": 0.45014803981022367, "grad_norm": 0.3255886733531952, "learning_rate": 3.353725721890965e-05, "loss": 0.1878, "step": 25238 }, { "epoch": 0.45016587593193735, "grad_norm": 0.24168072640895844, "learning_rate": 3.353579426375884e-05, "loss": 0.1227, "step": 25239 }, { "epoch": 0.45018371205365104, "grad_norm": 0.2801237404346466, "learning_rate": 3.353433127552035e-05, "loss": 0.1856, "step": 25240 }, { "epoch": 0.45020154817536473, "grad_norm": 0.22060294449329376, "learning_rate": 3.353286825419984e-05, "loss": 0.1727, "step": 25241 }, { "epoch": 0.4502193842970784, "grad_norm": 0.28346458077430725, "learning_rate": 3.3531405199802986e-05, "loss": 0.1658, "step": 25242 }, { "epoch": 0.45023722041879216, "grad_norm": 0.22537072002887726, "learning_rate": 3.3529942112335455e-05, "loss": 0.1703, "step": 25243 }, { "epoch": 0.45025505654050585, "grad_norm": 0.2824644446372986, "learning_rate": 3.352847899180293e-05, "loss": 0.175, "step": 25244 }, { "epoch": 0.45027289266221954, "grad_norm": 0.3069266676902771, "learning_rate": 3.3527015838211066e-05, "loss": 0.2013, "step": 25245 }, { "epoch": 0.4502907287839332, "grad_norm": 0.25050270557403564, "learning_rate": 3.3525552651565544e-05, "loss": 0.1497, "step": 25246 }, { "epoch": 0.4503085649056469, "grad_norm": 0.2628241181373596, "learning_rate": 3.352408943187204e-05, "loss": 0.1866, "step": 25247 }, { "epoch": 0.4503264010273606, "grad_norm": 0.2206946611404419, "learning_rate": 3.352262617913622e-05, "loss": 0.1617, "step": 25248 }, { "epoch": 0.4503442371490743, "grad_norm": 0.3750324845314026, "learning_rate": 3.352116289336376e-05, "loss": 0.1572, "step": 25249 }, { "epoch": 0.450362073270788, "grad_norm": 0.22658652067184448, "learning_rate": 3.3519699574560325e-05, "loss": 0.1438, "step": 25250 }, { "epoch": 0.4503799093925017, "grad_norm": 0.24892501533031464, "learning_rate": 3.35182362227316e-05, "loss": 0.1207, "step": 25251 }, { "epoch": 0.4503977455142154, "grad_norm": 0.3095797002315521, "learning_rate": 3.351677283788324e-05, "loss": 0.1522, "step": 25252 }, { "epoch": 0.4504155816359291, "grad_norm": 0.3030305504798889, "learning_rate": 3.351530942002093e-05, "loss": 0.1753, "step": 25253 }, { "epoch": 0.4504334177576428, "grad_norm": 0.2439109981060028, "learning_rate": 3.351384596915034e-05, "loss": 0.1364, "step": 25254 }, { "epoch": 0.4504512538793565, "grad_norm": 0.329201877117157, "learning_rate": 3.351238248527714e-05, "loss": 0.1087, "step": 25255 }, { "epoch": 0.45046909000107016, "grad_norm": 0.30045226216316223, "learning_rate": 3.351091896840701e-05, "loss": 0.1632, "step": 25256 }, { "epoch": 0.45048692612278385, "grad_norm": 0.27069029211997986, "learning_rate": 3.3509455418545615e-05, "loss": 0.1947, "step": 25257 }, { "epoch": 0.45050476224449754, "grad_norm": 0.34230273962020874, "learning_rate": 3.350799183569863e-05, "loss": 0.114, "step": 25258 }, { "epoch": 0.4505225983662112, "grad_norm": 0.48819881677627563, "learning_rate": 3.3506528219871733e-05, "loss": 0.1565, "step": 25259 }, { "epoch": 0.45054043448792497, "grad_norm": 0.22449827194213867, "learning_rate": 3.350506457107059e-05, "loss": 0.134, "step": 25260 }, { "epoch": 0.45055827060963866, "grad_norm": 0.2970493733882904, "learning_rate": 3.350360088930088e-05, "loss": 0.2096, "step": 25261 }, { "epoch": 0.45057610673135234, "grad_norm": 0.2858191728591919, "learning_rate": 3.3502137174568286e-05, "loss": 0.1456, "step": 25262 }, { "epoch": 0.45059394285306603, "grad_norm": 0.23565861582756042, "learning_rate": 3.3500673426878465e-05, "loss": 0.194, "step": 25263 }, { "epoch": 0.4506117789747797, "grad_norm": 0.30705341696739197, "learning_rate": 3.34992096462371e-05, "loss": 0.123, "step": 25264 }, { "epoch": 0.4506296150964934, "grad_norm": 0.33568763732910156, "learning_rate": 3.349774583264986e-05, "loss": 0.1909, "step": 25265 }, { "epoch": 0.4506474512182071, "grad_norm": 0.29980939626693726, "learning_rate": 3.3496281986122426e-05, "loss": 0.1694, "step": 25266 }, { "epoch": 0.4506652873399208, "grad_norm": 0.27742552757263184, "learning_rate": 3.349481810666046e-05, "loss": 0.1191, "step": 25267 }, { "epoch": 0.4506831234616345, "grad_norm": 0.2611779570579529, "learning_rate": 3.349335419426966e-05, "loss": 0.1285, "step": 25268 }, { "epoch": 0.4507009595833482, "grad_norm": 0.2133435606956482, "learning_rate": 3.3491890248955665e-05, "loss": 0.1137, "step": 25269 }, { "epoch": 0.4507187957050619, "grad_norm": 0.3011770248413086, "learning_rate": 3.349042627072418e-05, "loss": 0.183, "step": 25270 }, { "epoch": 0.4507366318267756, "grad_norm": 0.3069663643836975, "learning_rate": 3.348896225958088e-05, "loss": 0.1797, "step": 25271 }, { "epoch": 0.4507544679484893, "grad_norm": 0.28608402609825134, "learning_rate": 3.348749821553142e-05, "loss": 0.1483, "step": 25272 }, { "epoch": 0.45077230407020297, "grad_norm": 0.7763242721557617, "learning_rate": 3.3486034138581486e-05, "loss": 0.1449, "step": 25273 }, { "epoch": 0.45079014019191666, "grad_norm": 0.27435481548309326, "learning_rate": 3.348457002873675e-05, "loss": 0.1446, "step": 25274 }, { "epoch": 0.45080797631363034, "grad_norm": 0.21519720554351807, "learning_rate": 3.34831058860029e-05, "loss": 0.1579, "step": 25275 }, { "epoch": 0.45082581243534403, "grad_norm": 0.26937273144721985, "learning_rate": 3.3481641710385594e-05, "loss": 0.1608, "step": 25276 }, { "epoch": 0.4508436485570578, "grad_norm": 0.2907882630825043, "learning_rate": 3.348017750189052e-05, "loss": 0.1345, "step": 25277 }, { "epoch": 0.45086148467877146, "grad_norm": 0.3035869002342224, "learning_rate": 3.3478713260523344e-05, "loss": 0.193, "step": 25278 }, { "epoch": 0.45087932080048515, "grad_norm": 0.22311025857925415, "learning_rate": 3.347724898628974e-05, "loss": 0.1076, "step": 25279 }, { "epoch": 0.45089715692219884, "grad_norm": 0.3103754222393036, "learning_rate": 3.347578467919541e-05, "loss": 0.1588, "step": 25280 }, { "epoch": 0.4509149930439125, "grad_norm": 0.22730982303619385, "learning_rate": 3.347432033924599e-05, "loss": 0.1206, "step": 25281 }, { "epoch": 0.4509328291656262, "grad_norm": 0.3011152148246765, "learning_rate": 3.347285596644719e-05, "loss": 0.1468, "step": 25282 }, { "epoch": 0.4509506652873399, "grad_norm": 0.21181604266166687, "learning_rate": 3.347139156080468e-05, "loss": 0.1441, "step": 25283 }, { "epoch": 0.4509685014090536, "grad_norm": 0.2664339244365692, "learning_rate": 3.346992712232412e-05, "loss": 0.1492, "step": 25284 }, { "epoch": 0.45098633753076733, "grad_norm": 0.24919430911540985, "learning_rate": 3.34684626510112e-05, "loss": 0.1629, "step": 25285 }, { "epoch": 0.451004173652481, "grad_norm": 0.27807408571243286, "learning_rate": 3.34669981468716e-05, "loss": 0.1261, "step": 25286 }, { "epoch": 0.4510220097741947, "grad_norm": 0.33694398403167725, "learning_rate": 3.3465533609910977e-05, "loss": 0.1855, "step": 25287 }, { "epoch": 0.4510398458959084, "grad_norm": 0.29241684079170227, "learning_rate": 3.346406904013503e-05, "loss": 0.1469, "step": 25288 }, { "epoch": 0.4510576820176221, "grad_norm": 0.24050629138946533, "learning_rate": 3.346260443754943e-05, "loss": 0.1599, "step": 25289 }, { "epoch": 0.4510755181393358, "grad_norm": 0.2322806715965271, "learning_rate": 3.346113980215985e-05, "loss": 0.1223, "step": 25290 }, { "epoch": 0.45109335426104946, "grad_norm": 0.3003292381763458, "learning_rate": 3.345967513397197e-05, "loss": 0.1053, "step": 25291 }, { "epoch": 0.45111119038276315, "grad_norm": 0.2685421109199524, "learning_rate": 3.3458210432991464e-05, "loss": 0.1231, "step": 25292 }, { "epoch": 0.4511290265044769, "grad_norm": 0.30601227283477783, "learning_rate": 3.3456745699224015e-05, "loss": 0.1829, "step": 25293 }, { "epoch": 0.4511468626261906, "grad_norm": 0.29467248916625977, "learning_rate": 3.34552809326753e-05, "loss": 0.1267, "step": 25294 }, { "epoch": 0.45116469874790427, "grad_norm": 0.29698774218559265, "learning_rate": 3.345381613335099e-05, "loss": 0.1347, "step": 25295 }, { "epoch": 0.45118253486961796, "grad_norm": 0.3743653893470764, "learning_rate": 3.345235130125678e-05, "loss": 0.1226, "step": 25296 }, { "epoch": 0.45120037099133165, "grad_norm": 0.2456246316432953, "learning_rate": 3.3450886436398324e-05, "loss": 0.1349, "step": 25297 }, { "epoch": 0.45121820711304533, "grad_norm": 0.2276928871870041, "learning_rate": 3.344942153878133e-05, "loss": 0.1077, "step": 25298 }, { "epoch": 0.451236043234759, "grad_norm": 0.3575064241886139, "learning_rate": 3.344795660841145e-05, "loss": 0.1363, "step": 25299 }, { "epoch": 0.4512538793564727, "grad_norm": 0.2665491998195648, "learning_rate": 3.344649164529437e-05, "loss": 0.1741, "step": 25300 }, { "epoch": 0.4512717154781864, "grad_norm": 0.35520878434181213, "learning_rate": 3.3445026649435773e-05, "loss": 0.1228, "step": 25301 }, { "epoch": 0.45128955159990014, "grad_norm": 0.30768412351608276, "learning_rate": 3.3443561620841336e-05, "loss": 0.1684, "step": 25302 }, { "epoch": 0.45130738772161383, "grad_norm": 0.20322144031524658, "learning_rate": 3.344209655951674e-05, "loss": 0.16, "step": 25303 }, { "epoch": 0.4513252238433275, "grad_norm": 0.28124427795410156, "learning_rate": 3.344063146546766e-05, "loss": 0.0946, "step": 25304 }, { "epoch": 0.4513430599650412, "grad_norm": 0.2849627435207367, "learning_rate": 3.343916633869978e-05, "loss": 0.1254, "step": 25305 }, { "epoch": 0.4513608960867549, "grad_norm": 0.26051315665245056, "learning_rate": 3.343770117921877e-05, "loss": 0.145, "step": 25306 }, { "epoch": 0.4513787322084686, "grad_norm": 0.2562755346298218, "learning_rate": 3.3436235987030315e-05, "loss": 0.2008, "step": 25307 }, { "epoch": 0.45139656833018227, "grad_norm": 0.2513726055622101, "learning_rate": 3.34347707621401e-05, "loss": 0.1298, "step": 25308 }, { "epoch": 0.45141440445189596, "grad_norm": 0.27829447388648987, "learning_rate": 3.34333055045538e-05, "loss": 0.1271, "step": 25309 }, { "epoch": 0.4514322405736097, "grad_norm": 0.3086654245853424, "learning_rate": 3.34318402142771e-05, "loss": 0.1415, "step": 25310 }, { "epoch": 0.4514500766953234, "grad_norm": 0.3558303415775299, "learning_rate": 3.3430374891315674e-05, "loss": 0.1666, "step": 25311 }, { "epoch": 0.4514679128170371, "grad_norm": 0.3510570228099823, "learning_rate": 3.3428909535675195e-05, "loss": 0.134, "step": 25312 }, { "epoch": 0.45148574893875076, "grad_norm": 0.3693179488182068, "learning_rate": 3.3427444147361356e-05, "loss": 0.1587, "step": 25313 }, { "epoch": 0.45150358506046445, "grad_norm": 0.25806477665901184, "learning_rate": 3.3425978726379834e-05, "loss": 0.1669, "step": 25314 }, { "epoch": 0.45152142118217814, "grad_norm": 0.3023644685745239, "learning_rate": 3.342451327273631e-05, "loss": 0.1168, "step": 25315 }, { "epoch": 0.45153925730389183, "grad_norm": 0.4080870747566223, "learning_rate": 3.342304778643646e-05, "loss": 0.1196, "step": 25316 }, { "epoch": 0.4515570934256055, "grad_norm": 0.4201013445854187, "learning_rate": 3.342158226748596e-05, "loss": 0.2076, "step": 25317 }, { "epoch": 0.4515749295473192, "grad_norm": 0.3449398875236511, "learning_rate": 3.3420116715890515e-05, "loss": 0.1874, "step": 25318 }, { "epoch": 0.45159276566903295, "grad_norm": 0.29861119389533997, "learning_rate": 3.341865113165577e-05, "loss": 0.161, "step": 25319 }, { "epoch": 0.45161060179074664, "grad_norm": 0.27511903643608093, "learning_rate": 3.3417185514787446e-05, "loss": 0.1134, "step": 25320 }, { "epoch": 0.4516284379124603, "grad_norm": 0.2599897086620331, "learning_rate": 3.3415719865291194e-05, "loss": 0.1182, "step": 25321 }, { "epoch": 0.451646274034174, "grad_norm": 0.3110343813896179, "learning_rate": 3.34142541831727e-05, "loss": 0.163, "step": 25322 }, { "epoch": 0.4516641101558877, "grad_norm": 0.21736828982830048, "learning_rate": 3.341278846843765e-05, "loss": 0.1602, "step": 25323 }, { "epoch": 0.4516819462776014, "grad_norm": 0.31393927335739136, "learning_rate": 3.3411322721091735e-05, "loss": 0.1516, "step": 25324 }, { "epoch": 0.4516997823993151, "grad_norm": 0.22412462532520294, "learning_rate": 3.3409856941140625e-05, "loss": 0.1602, "step": 25325 }, { "epoch": 0.45171761852102876, "grad_norm": 0.3443428874015808, "learning_rate": 3.340839112859001e-05, "loss": 0.1623, "step": 25326 }, { "epoch": 0.4517354546427425, "grad_norm": 0.25632989406585693, "learning_rate": 3.340692528344556e-05, "loss": 0.1751, "step": 25327 }, { "epoch": 0.4517532907644562, "grad_norm": 0.23276600241661072, "learning_rate": 3.340545940571297e-05, "loss": 0.1188, "step": 25328 }, { "epoch": 0.4517711268861699, "grad_norm": 0.30829668045043945, "learning_rate": 3.340399349539791e-05, "loss": 0.1342, "step": 25329 }, { "epoch": 0.45178896300788357, "grad_norm": 0.35151705145835876, "learning_rate": 3.3402527552506064e-05, "loss": 0.1009, "step": 25330 }, { "epoch": 0.45180679912959726, "grad_norm": 0.27517446875572205, "learning_rate": 3.340106157704313e-05, "loss": 0.1204, "step": 25331 }, { "epoch": 0.45182463525131095, "grad_norm": 0.3466947376728058, "learning_rate": 3.339959556901477e-05, "loss": 0.1274, "step": 25332 }, { "epoch": 0.45184247137302463, "grad_norm": 0.39269569516181946, "learning_rate": 3.339812952842668e-05, "loss": 0.1527, "step": 25333 }, { "epoch": 0.4518603074947383, "grad_norm": 0.26444125175476074, "learning_rate": 3.3396663455284536e-05, "loss": 0.1319, "step": 25334 }, { "epoch": 0.45187814361645207, "grad_norm": 0.27987992763519287, "learning_rate": 3.339519734959403e-05, "loss": 0.1756, "step": 25335 }, { "epoch": 0.45189597973816575, "grad_norm": 0.21436183154582977, "learning_rate": 3.339373121136084e-05, "loss": 0.129, "step": 25336 }, { "epoch": 0.45191381585987944, "grad_norm": 0.2604813575744629, "learning_rate": 3.3392265040590645e-05, "loss": 0.1312, "step": 25337 }, { "epoch": 0.45193165198159313, "grad_norm": 0.30031466484069824, "learning_rate": 3.3390798837289134e-05, "loss": 0.1372, "step": 25338 }, { "epoch": 0.4519494881033068, "grad_norm": 0.29773229360580444, "learning_rate": 3.338933260146199e-05, "loss": 0.127, "step": 25339 }, { "epoch": 0.4519673242250205, "grad_norm": 0.23106749355793, "learning_rate": 3.338786633311489e-05, "loss": 0.132, "step": 25340 }, { "epoch": 0.4519851603467342, "grad_norm": 0.2504771649837494, "learning_rate": 3.3386400032253533e-05, "loss": 0.1984, "step": 25341 }, { "epoch": 0.4520029964684479, "grad_norm": 0.25725501775741577, "learning_rate": 3.338493369888359e-05, "loss": 0.1341, "step": 25342 }, { "epoch": 0.45202083259016157, "grad_norm": 0.3508862257003784, "learning_rate": 3.338346733301074e-05, "loss": 0.1219, "step": 25343 }, { "epoch": 0.4520386687118753, "grad_norm": 0.27442172169685364, "learning_rate": 3.3382000934640684e-05, "loss": 0.1694, "step": 25344 }, { "epoch": 0.452056504833589, "grad_norm": 0.3080499470233917, "learning_rate": 3.338053450377909e-05, "loss": 0.1468, "step": 25345 }, { "epoch": 0.4520743409553027, "grad_norm": 0.3203141987323761, "learning_rate": 3.3379068040431653e-05, "loss": 0.1237, "step": 25346 }, { "epoch": 0.4520921770770164, "grad_norm": 0.3010665774345398, "learning_rate": 3.3377601544604055e-05, "loss": 0.1242, "step": 25347 }, { "epoch": 0.45211001319873007, "grad_norm": 0.3806282579898834, "learning_rate": 3.337613501630198e-05, "loss": 0.1127, "step": 25348 }, { "epoch": 0.45212784932044375, "grad_norm": 0.24944719672203064, "learning_rate": 3.337466845553111e-05, "loss": 0.1308, "step": 25349 }, { "epoch": 0.45214568544215744, "grad_norm": 0.38069990277290344, "learning_rate": 3.3373201862297135e-05, "loss": 0.1044, "step": 25350 }, { "epoch": 0.45216352156387113, "grad_norm": 0.2638382017612457, "learning_rate": 3.337173523660573e-05, "loss": 0.1284, "step": 25351 }, { "epoch": 0.4521813576855849, "grad_norm": 0.3609374761581421, "learning_rate": 3.33702685784626e-05, "loss": 0.1425, "step": 25352 }, { "epoch": 0.45219919380729856, "grad_norm": 0.2218160778284073, "learning_rate": 3.3368801887873414e-05, "loss": 0.1243, "step": 25353 }, { "epoch": 0.45221702992901225, "grad_norm": 0.2859613001346588, "learning_rate": 3.336733516484386e-05, "loss": 0.1616, "step": 25354 }, { "epoch": 0.45223486605072594, "grad_norm": 0.24077485501766205, "learning_rate": 3.3365868409379634e-05, "loss": 0.1704, "step": 25355 }, { "epoch": 0.4522527021724396, "grad_norm": 0.22482459247112274, "learning_rate": 3.336440162148639e-05, "loss": 0.116, "step": 25356 }, { "epoch": 0.4522705382941533, "grad_norm": 0.2575322687625885, "learning_rate": 3.336293480116985e-05, "loss": 0.1245, "step": 25357 }, { "epoch": 0.452288374415867, "grad_norm": 0.36693382263183594, "learning_rate": 3.3361467948435685e-05, "loss": 0.1445, "step": 25358 }, { "epoch": 0.4523062105375807, "grad_norm": 0.2518131136894226, "learning_rate": 3.3360001063289585e-05, "loss": 0.214, "step": 25359 }, { "epoch": 0.4523240466592944, "grad_norm": 0.2738848924636841, "learning_rate": 3.335853414573723e-05, "loss": 0.1356, "step": 25360 }, { "epoch": 0.4523418827810081, "grad_norm": 0.22676293551921844, "learning_rate": 3.3357067195784316e-05, "loss": 0.1189, "step": 25361 }, { "epoch": 0.4523597189027218, "grad_norm": 0.2900744080543518, "learning_rate": 3.335560021343652e-05, "loss": 0.1888, "step": 25362 }, { "epoch": 0.4523775550244355, "grad_norm": 0.22740840911865234, "learning_rate": 3.335413319869952e-05, "loss": 0.1274, "step": 25363 }, { "epoch": 0.4523953911461492, "grad_norm": 0.2982439696788788, "learning_rate": 3.335266615157902e-05, "loss": 0.113, "step": 25364 }, { "epoch": 0.45241322726786287, "grad_norm": 0.22127170860767365, "learning_rate": 3.335119907208071e-05, "loss": 0.1145, "step": 25365 }, { "epoch": 0.45243106338957656, "grad_norm": 0.2754104733467102, "learning_rate": 3.3349731960210266e-05, "loss": 0.1335, "step": 25366 }, { "epoch": 0.45244889951129025, "grad_norm": 0.2940003573894501, "learning_rate": 3.3348264815973376e-05, "loss": 0.1883, "step": 25367 }, { "epoch": 0.45246673563300394, "grad_norm": 0.21314938366413116, "learning_rate": 3.3346797639375735e-05, "loss": 0.1194, "step": 25368 }, { "epoch": 0.4524845717547177, "grad_norm": 0.30342239141464233, "learning_rate": 3.3345330430423014e-05, "loss": 0.1218, "step": 25369 }, { "epoch": 0.45250240787643137, "grad_norm": 0.23137035965919495, "learning_rate": 3.3343863189120905e-05, "loss": 0.1296, "step": 25370 }, { "epoch": 0.45252024399814506, "grad_norm": 0.36742621660232544, "learning_rate": 3.334239591547511e-05, "loss": 0.17, "step": 25371 }, { "epoch": 0.45253808011985874, "grad_norm": 0.1878555566072464, "learning_rate": 3.33409286094913e-05, "loss": 0.1174, "step": 25372 }, { "epoch": 0.45255591624157243, "grad_norm": 0.20224159955978394, "learning_rate": 3.3339461271175174e-05, "loss": 0.1174, "step": 25373 }, { "epoch": 0.4525737523632861, "grad_norm": 0.37649932503700256, "learning_rate": 3.3337993900532415e-05, "loss": 0.2172, "step": 25374 }, { "epoch": 0.4525915884849998, "grad_norm": 0.36614614725112915, "learning_rate": 3.333652649756871e-05, "loss": 0.1656, "step": 25375 }, { "epoch": 0.4526094246067135, "grad_norm": 0.4283222556114197, "learning_rate": 3.333505906228975e-05, "loss": 0.2213, "step": 25376 }, { "epoch": 0.4526272607284272, "grad_norm": 0.342178612947464, "learning_rate": 3.333359159470123e-05, "loss": 0.1695, "step": 25377 }, { "epoch": 0.4526450968501409, "grad_norm": 0.27212056517601013, "learning_rate": 3.3332124094808816e-05, "loss": 0.1054, "step": 25378 }, { "epoch": 0.4526629329718546, "grad_norm": 0.26995909214019775, "learning_rate": 3.333065656261822e-05, "loss": 0.1482, "step": 25379 }, { "epoch": 0.4526807690935683, "grad_norm": 0.2667284607887268, "learning_rate": 3.332918899813512e-05, "loss": 0.1067, "step": 25380 }, { "epoch": 0.452698605215282, "grad_norm": 0.2785443067550659, "learning_rate": 3.3327721401365214e-05, "loss": 0.1753, "step": 25381 }, { "epoch": 0.4527164413369957, "grad_norm": 0.3091239333152771, "learning_rate": 3.3326253772314177e-05, "loss": 0.1201, "step": 25382 }, { "epoch": 0.45273427745870937, "grad_norm": 0.2919481694698334, "learning_rate": 3.33247861109877e-05, "loss": 0.192, "step": 25383 }, { "epoch": 0.45275211358042305, "grad_norm": 0.2751409411430359, "learning_rate": 3.332331841739148e-05, "loss": 0.1894, "step": 25384 }, { "epoch": 0.45276994970213674, "grad_norm": 0.2278016209602356, "learning_rate": 3.33218506915312e-05, "loss": 0.143, "step": 25385 }, { "epoch": 0.4527877858238505, "grad_norm": 0.27491307258605957, "learning_rate": 3.332038293341256e-05, "loss": 0.1532, "step": 25386 }, { "epoch": 0.4528056219455642, "grad_norm": 0.32482025027275085, "learning_rate": 3.3318915143041244e-05, "loss": 0.1453, "step": 25387 }, { "epoch": 0.45282345806727786, "grad_norm": 0.21392029523849487, "learning_rate": 3.331744732042293e-05, "loss": 0.1209, "step": 25388 }, { "epoch": 0.45284129418899155, "grad_norm": 0.22954034805297852, "learning_rate": 3.3315979465563325e-05, "loss": 0.1547, "step": 25389 }, { "epoch": 0.45285913031070524, "grad_norm": 0.30901920795440674, "learning_rate": 3.3314511578468105e-05, "loss": 0.1821, "step": 25390 }, { "epoch": 0.4528769664324189, "grad_norm": 0.26551201939582825, "learning_rate": 3.331304365914297e-05, "loss": 0.1711, "step": 25391 }, { "epoch": 0.4528948025541326, "grad_norm": 0.3265780210494995, "learning_rate": 3.3311575707593604e-05, "loss": 0.1816, "step": 25392 }, { "epoch": 0.4529126386758463, "grad_norm": 0.19142450392246246, "learning_rate": 3.33101077238257e-05, "loss": 0.1134, "step": 25393 }, { "epoch": 0.45293047479756005, "grad_norm": 0.2555154860019684, "learning_rate": 3.330863970784496e-05, "loss": 0.1589, "step": 25394 }, { "epoch": 0.45294831091927373, "grad_norm": 0.2754727602005005, "learning_rate": 3.330717165965704e-05, "loss": 0.1535, "step": 25395 }, { "epoch": 0.4529661470409874, "grad_norm": 0.27245625853538513, "learning_rate": 3.3305703579267676e-05, "loss": 0.167, "step": 25396 }, { "epoch": 0.4529839831627011, "grad_norm": 0.2841571271419525, "learning_rate": 3.330423546668252e-05, "loss": 0.1664, "step": 25397 }, { "epoch": 0.4530018192844148, "grad_norm": 0.20171573758125305, "learning_rate": 3.330276732190729e-05, "loss": 0.1414, "step": 25398 }, { "epoch": 0.4530196554061285, "grad_norm": 0.2928656339645386, "learning_rate": 3.3301299144947656e-05, "loss": 0.169, "step": 25399 }, { "epoch": 0.4530374915278422, "grad_norm": 0.3740893304347992, "learning_rate": 3.329983093580933e-05, "loss": 0.2375, "step": 25400 }, { "epoch": 0.45305532764955586, "grad_norm": 0.41993436217308044, "learning_rate": 3.329836269449799e-05, "loss": 0.273, "step": 25401 }, { "epoch": 0.45307316377126955, "grad_norm": 0.3017175495624542, "learning_rate": 3.3296894421019326e-05, "loss": 0.1247, "step": 25402 }, { "epoch": 0.4530909998929833, "grad_norm": 0.23109309375286102, "learning_rate": 3.329542611537904e-05, "loss": 0.1609, "step": 25403 }, { "epoch": 0.453108836014697, "grad_norm": 0.1993561089038849, "learning_rate": 3.3293957777582804e-05, "loss": 0.1714, "step": 25404 }, { "epoch": 0.45312667213641067, "grad_norm": 0.18567967414855957, "learning_rate": 3.329248940763634e-05, "loss": 0.1279, "step": 25405 }, { "epoch": 0.45314450825812436, "grad_norm": 0.254224568605423, "learning_rate": 3.329102100554531e-05, "loss": 0.1253, "step": 25406 }, { "epoch": 0.45316234437983804, "grad_norm": 0.23910973966121674, "learning_rate": 3.3289552571315425e-05, "loss": 0.15, "step": 25407 }, { "epoch": 0.45318018050155173, "grad_norm": 0.3448658287525177, "learning_rate": 3.328808410495236e-05, "loss": 0.147, "step": 25408 }, { "epoch": 0.4531980166232654, "grad_norm": 0.2811363637447357, "learning_rate": 3.3286615606461836e-05, "loss": 0.1247, "step": 25409 }, { "epoch": 0.4532158527449791, "grad_norm": 0.294975608587265, "learning_rate": 3.328514707584952e-05, "loss": 0.1115, "step": 25410 }, { "epoch": 0.45323368886669285, "grad_norm": 0.3807539641857147, "learning_rate": 3.328367851312111e-05, "loss": 0.135, "step": 25411 }, { "epoch": 0.45325152498840654, "grad_norm": 0.3476579487323761, "learning_rate": 3.3282209918282305e-05, "loss": 0.1568, "step": 25412 }, { "epoch": 0.45326936111012023, "grad_norm": 0.22811362147331238, "learning_rate": 3.328074129133879e-05, "loss": 0.1104, "step": 25413 }, { "epoch": 0.4532871972318339, "grad_norm": 0.2721503674983978, "learning_rate": 3.327927263229626e-05, "loss": 0.1288, "step": 25414 }, { "epoch": 0.4533050333535476, "grad_norm": 0.34425482153892517, "learning_rate": 3.3277803941160415e-05, "loss": 0.1654, "step": 25415 }, { "epoch": 0.4533228694752613, "grad_norm": 0.32033097743988037, "learning_rate": 3.327633521793694e-05, "loss": 0.1553, "step": 25416 }, { "epoch": 0.453340705596975, "grad_norm": 0.2533934414386749, "learning_rate": 3.3274866462631536e-05, "loss": 0.1275, "step": 25417 }, { "epoch": 0.45335854171868867, "grad_norm": 0.2786405086517334, "learning_rate": 3.3273397675249886e-05, "loss": 0.1516, "step": 25418 }, { "epoch": 0.45337637784040236, "grad_norm": 0.25266364216804504, "learning_rate": 3.327192885579769e-05, "loss": 0.1579, "step": 25419 }, { "epoch": 0.4533942139621161, "grad_norm": 0.2841903865337372, "learning_rate": 3.3270460004280644e-05, "loss": 0.1715, "step": 25420 }, { "epoch": 0.4534120500838298, "grad_norm": 0.2631411552429199, "learning_rate": 3.3268991120704436e-05, "loss": 0.1828, "step": 25421 }, { "epoch": 0.4534298862055435, "grad_norm": 0.26544615626335144, "learning_rate": 3.326752220507476e-05, "loss": 0.0846, "step": 25422 }, { "epoch": 0.45344772232725716, "grad_norm": 0.2929244935512543, "learning_rate": 3.3266053257397315e-05, "loss": 0.1182, "step": 25423 }, { "epoch": 0.45346555844897085, "grad_norm": 0.3162120580673218, "learning_rate": 3.32645842776778e-05, "loss": 0.1587, "step": 25424 }, { "epoch": 0.45348339457068454, "grad_norm": 0.28302067518234253, "learning_rate": 3.326311526592189e-05, "loss": 0.165, "step": 25425 }, { "epoch": 0.4535012306923982, "grad_norm": 0.2734776735305786, "learning_rate": 3.32616462221353e-05, "loss": 0.1143, "step": 25426 }, { "epoch": 0.4535190668141119, "grad_norm": 0.30495041608810425, "learning_rate": 3.32601771463237e-05, "loss": 0.131, "step": 25427 }, { "epoch": 0.45353690293582566, "grad_norm": 0.18731018900871277, "learning_rate": 3.325870803849282e-05, "loss": 0.1258, "step": 25428 }, { "epoch": 0.45355473905753935, "grad_norm": 0.2503253221511841, "learning_rate": 3.3257238898648324e-05, "loss": 0.0937, "step": 25429 }, { "epoch": 0.45357257517925303, "grad_norm": 0.24718806147575378, "learning_rate": 3.3255769726795925e-05, "loss": 0.1605, "step": 25430 }, { "epoch": 0.4535904113009667, "grad_norm": 0.31829050183296204, "learning_rate": 3.325430052294131e-05, "loss": 0.1425, "step": 25431 }, { "epoch": 0.4536082474226804, "grad_norm": 0.20550750195980072, "learning_rate": 3.325283128709017e-05, "loss": 0.1462, "step": 25432 }, { "epoch": 0.4536260835443941, "grad_norm": 0.25150609016418457, "learning_rate": 3.325136201924821e-05, "loss": 0.1946, "step": 25433 }, { "epoch": 0.4536439196661078, "grad_norm": 0.23848611116409302, "learning_rate": 3.324989271942111e-05, "loss": 0.0974, "step": 25434 }, { "epoch": 0.4536617557878215, "grad_norm": 0.24768418073654175, "learning_rate": 3.324842338761459e-05, "loss": 0.1471, "step": 25435 }, { "epoch": 0.4536795919095352, "grad_norm": 0.31885871291160583, "learning_rate": 3.3246954023834324e-05, "loss": 0.1494, "step": 25436 }, { "epoch": 0.4536974280312489, "grad_norm": 0.2993391156196594, "learning_rate": 3.324548462808602e-05, "loss": 0.183, "step": 25437 }, { "epoch": 0.4537152641529626, "grad_norm": 0.27632758021354675, "learning_rate": 3.3244015200375364e-05, "loss": 0.1606, "step": 25438 }, { "epoch": 0.4537331002746763, "grad_norm": 0.29089802503585815, "learning_rate": 3.324254574070806e-05, "loss": 0.1895, "step": 25439 }, { "epoch": 0.45375093639638997, "grad_norm": 0.23531211912631989, "learning_rate": 3.324107624908981e-05, "loss": 0.1146, "step": 25440 }, { "epoch": 0.45376877251810366, "grad_norm": 0.27233681082725525, "learning_rate": 3.32396067255263e-05, "loss": 0.1677, "step": 25441 }, { "epoch": 0.45378660863981735, "grad_norm": 0.2612689733505249, "learning_rate": 3.323813717002322e-05, "loss": 0.1873, "step": 25442 }, { "epoch": 0.45380444476153103, "grad_norm": 0.31285834312438965, "learning_rate": 3.323666758258628e-05, "loss": 0.1435, "step": 25443 }, { "epoch": 0.4538222808832447, "grad_norm": 0.26134970784187317, "learning_rate": 3.323519796322117e-05, "loss": 0.1281, "step": 25444 }, { "epoch": 0.45384011700495847, "grad_norm": 0.2308206707239151, "learning_rate": 3.32337283119336e-05, "loss": 0.1602, "step": 25445 }, { "epoch": 0.45385795312667215, "grad_norm": 0.35686835646629333, "learning_rate": 3.323225862872924e-05, "loss": 0.2012, "step": 25446 }, { "epoch": 0.45387578924838584, "grad_norm": 0.26989123225212097, "learning_rate": 3.3230788913613804e-05, "loss": 0.1298, "step": 25447 }, { "epoch": 0.45389362537009953, "grad_norm": 0.28670790791511536, "learning_rate": 3.322931916659299e-05, "loss": 0.1652, "step": 25448 }, { "epoch": 0.4539114614918132, "grad_norm": 0.2709404230117798, "learning_rate": 3.3227849387672494e-05, "loss": 0.0825, "step": 25449 }, { "epoch": 0.4539292976135269, "grad_norm": 0.26748818159103394, "learning_rate": 3.322637957685801e-05, "loss": 0.1389, "step": 25450 }, { "epoch": 0.4539471337352406, "grad_norm": 0.22117049992084503, "learning_rate": 3.322490973415524e-05, "loss": 0.1131, "step": 25451 }, { "epoch": 0.4539649698569543, "grad_norm": 0.27598392963409424, "learning_rate": 3.322343985956988e-05, "loss": 0.1431, "step": 25452 }, { "epoch": 0.453982805978668, "grad_norm": 0.271395742893219, "learning_rate": 3.3221969953107625e-05, "loss": 0.156, "step": 25453 }, { "epoch": 0.4540006421003817, "grad_norm": 0.3040768504142761, "learning_rate": 3.322050001477417e-05, "loss": 0.1716, "step": 25454 }, { "epoch": 0.4540184782220954, "grad_norm": 0.34275102615356445, "learning_rate": 3.3219030044575214e-05, "loss": 0.1612, "step": 25455 }, { "epoch": 0.4540363143438091, "grad_norm": 0.19952526688575745, "learning_rate": 3.3217560042516476e-05, "loss": 0.1277, "step": 25456 }, { "epoch": 0.4540541504655228, "grad_norm": 0.3267901837825775, "learning_rate": 3.321609000860362e-05, "loss": 0.1118, "step": 25457 }, { "epoch": 0.45407198658723646, "grad_norm": 0.2913527190685272, "learning_rate": 3.321461994284237e-05, "loss": 0.1749, "step": 25458 }, { "epoch": 0.45408982270895015, "grad_norm": 0.2528184652328491, "learning_rate": 3.321314984523842e-05, "loss": 0.1704, "step": 25459 }, { "epoch": 0.45410765883066384, "grad_norm": 0.3290480673313141, "learning_rate": 3.321167971579746e-05, "loss": 0.1914, "step": 25460 }, { "epoch": 0.45412549495237753, "grad_norm": 0.3445420563220978, "learning_rate": 3.321020955452519e-05, "loss": 0.1525, "step": 25461 }, { "epoch": 0.45414333107409127, "grad_norm": 0.2904965877532959, "learning_rate": 3.320873936142732e-05, "loss": 0.085, "step": 25462 }, { "epoch": 0.45416116719580496, "grad_norm": 0.21717512607574463, "learning_rate": 3.3207269136509536e-05, "loss": 0.1317, "step": 25463 }, { "epoch": 0.45417900331751865, "grad_norm": 0.3402880132198334, "learning_rate": 3.320579887977754e-05, "loss": 0.1838, "step": 25464 }, { "epoch": 0.45419683943923234, "grad_norm": 0.2852194011211395, "learning_rate": 3.3204328591237034e-05, "loss": 0.1623, "step": 25465 }, { "epoch": 0.454214675560946, "grad_norm": 0.23262082040309906, "learning_rate": 3.320285827089372e-05, "loss": 0.1403, "step": 25466 }, { "epoch": 0.4542325116826597, "grad_norm": 0.27028903365135193, "learning_rate": 3.320138791875329e-05, "loss": 0.1484, "step": 25467 }, { "epoch": 0.4542503478043734, "grad_norm": 0.30091822147369385, "learning_rate": 3.319991753482145e-05, "loss": 0.1478, "step": 25468 }, { "epoch": 0.4542681839260871, "grad_norm": 0.2841566205024719, "learning_rate": 3.319844711910389e-05, "loss": 0.1623, "step": 25469 }, { "epoch": 0.45428602004780083, "grad_norm": 0.34597864747047424, "learning_rate": 3.319697667160633e-05, "loss": 0.1169, "step": 25470 }, { "epoch": 0.4543038561695145, "grad_norm": 0.2742307782173157, "learning_rate": 3.319550619233445e-05, "loss": 0.1258, "step": 25471 }, { "epoch": 0.4543216922912282, "grad_norm": 0.291324645280838, "learning_rate": 3.319403568129396e-05, "loss": 0.1178, "step": 25472 }, { "epoch": 0.4543395284129419, "grad_norm": 0.20286288857460022, "learning_rate": 3.319256513849055e-05, "loss": 0.1658, "step": 25473 }, { "epoch": 0.4543573645346556, "grad_norm": 0.3391698896884918, "learning_rate": 3.3191094563929944e-05, "loss": 0.166, "step": 25474 }, { "epoch": 0.45437520065636927, "grad_norm": 0.24068965017795563, "learning_rate": 3.318962395761781e-05, "loss": 0.1494, "step": 25475 }, { "epoch": 0.45439303677808296, "grad_norm": 0.3006434738636017, "learning_rate": 3.3188153319559876e-05, "loss": 0.1393, "step": 25476 }, { "epoch": 0.45441087289979665, "grad_norm": 0.2690424621105194, "learning_rate": 3.318668264976182e-05, "loss": 0.1575, "step": 25477 }, { "epoch": 0.45442870902151034, "grad_norm": 0.22099590301513672, "learning_rate": 3.3185211948229375e-05, "loss": 0.172, "step": 25478 }, { "epoch": 0.4544465451432241, "grad_norm": 0.24531807005405426, "learning_rate": 3.31837412149682e-05, "loss": 0.133, "step": 25479 }, { "epoch": 0.45446438126493777, "grad_norm": 0.4104959964752197, "learning_rate": 3.318227044998403e-05, "loss": 0.0908, "step": 25480 }, { "epoch": 0.45448221738665145, "grad_norm": 0.3880630433559418, "learning_rate": 3.3180799653282546e-05, "loss": 0.16, "step": 25481 }, { "epoch": 0.45450005350836514, "grad_norm": 0.2126411646604538, "learning_rate": 3.317932882486946e-05, "loss": 0.1597, "step": 25482 }, { "epoch": 0.45451788963007883, "grad_norm": 0.34773823618888855, "learning_rate": 3.317785796475047e-05, "loss": 0.2344, "step": 25483 }, { "epoch": 0.4545357257517925, "grad_norm": 0.311054527759552, "learning_rate": 3.317638707293128e-05, "loss": 0.1486, "step": 25484 }, { "epoch": 0.4545535618735062, "grad_norm": 0.2105044275522232, "learning_rate": 3.317491614941759e-05, "loss": 0.1268, "step": 25485 }, { "epoch": 0.4545713979952199, "grad_norm": 0.33068689703941345, "learning_rate": 3.31734451942151e-05, "loss": 0.1361, "step": 25486 }, { "epoch": 0.45458923411693364, "grad_norm": 0.27694565057754517, "learning_rate": 3.317197420732952e-05, "loss": 0.1548, "step": 25487 }, { "epoch": 0.4546070702386473, "grad_norm": 0.31229138374328613, "learning_rate": 3.317050318876653e-05, "loss": 0.172, "step": 25488 }, { "epoch": 0.454624906360361, "grad_norm": 0.40736308693885803, "learning_rate": 3.316903213853186e-05, "loss": 0.2063, "step": 25489 }, { "epoch": 0.4546427424820747, "grad_norm": 0.2958024740219116, "learning_rate": 3.316756105663119e-05, "loss": 0.1292, "step": 25490 }, { "epoch": 0.4546605786037884, "grad_norm": 0.2401237189769745, "learning_rate": 3.3166089943070245e-05, "loss": 0.1521, "step": 25491 }, { "epoch": 0.4546784147255021, "grad_norm": 0.2923887372016907, "learning_rate": 3.316461879785471e-05, "loss": 0.1376, "step": 25492 }, { "epoch": 0.45469625084721577, "grad_norm": 0.32773536443710327, "learning_rate": 3.3163147620990296e-05, "loss": 0.122, "step": 25493 }, { "epoch": 0.45471408696892945, "grad_norm": 0.2533378303050995, "learning_rate": 3.316167641248269e-05, "loss": 0.1372, "step": 25494 }, { "epoch": 0.4547319230906432, "grad_norm": 0.34814250469207764, "learning_rate": 3.316020517233761e-05, "loss": 0.1917, "step": 25495 }, { "epoch": 0.4547497592123569, "grad_norm": 0.20848925411701202, "learning_rate": 3.315873390056076e-05, "loss": 0.1171, "step": 25496 }, { "epoch": 0.4547675953340706, "grad_norm": 0.2148837298154831, "learning_rate": 3.3157262597157846e-05, "loss": 0.1493, "step": 25497 }, { "epoch": 0.45478543145578426, "grad_norm": 0.297224760055542, "learning_rate": 3.315579126213455e-05, "loss": 0.1405, "step": 25498 }, { "epoch": 0.45480326757749795, "grad_norm": 0.2944718599319458, "learning_rate": 3.315431989549661e-05, "loss": 0.1671, "step": 25499 }, { "epoch": 0.45482110369921164, "grad_norm": 0.25866109132766724, "learning_rate": 3.31528484972497e-05, "loss": 0.1391, "step": 25500 }, { "epoch": 0.4548389398209253, "grad_norm": 0.2821806073188782, "learning_rate": 3.3151377067399536e-05, "loss": 0.1221, "step": 25501 }, { "epoch": 0.454856775942639, "grad_norm": 0.4539002776145935, "learning_rate": 3.314990560595181e-05, "loss": 0.1496, "step": 25502 }, { "epoch": 0.4548746120643527, "grad_norm": 0.2803192734718323, "learning_rate": 3.314843411291224e-05, "loss": 0.1278, "step": 25503 }, { "epoch": 0.45489244818606644, "grad_norm": 0.1840869039297104, "learning_rate": 3.314696258828653e-05, "loss": 0.1319, "step": 25504 }, { "epoch": 0.45491028430778013, "grad_norm": 0.25788959860801697, "learning_rate": 3.3145491032080375e-05, "loss": 0.1359, "step": 25505 }, { "epoch": 0.4549281204294938, "grad_norm": 0.40240857005119324, "learning_rate": 3.3144019444299476e-05, "loss": 0.1134, "step": 25506 }, { "epoch": 0.4549459565512075, "grad_norm": 0.33708715438842773, "learning_rate": 3.314254782494956e-05, "loss": 0.1648, "step": 25507 }, { "epoch": 0.4549637926729212, "grad_norm": 0.3606436252593994, "learning_rate": 3.31410761740363e-05, "loss": 0.1902, "step": 25508 }, { "epoch": 0.4549816287946349, "grad_norm": 0.33318817615509033, "learning_rate": 3.313960449156543e-05, "loss": 0.122, "step": 25509 }, { "epoch": 0.4549994649163486, "grad_norm": 0.194342240691185, "learning_rate": 3.3138132777542637e-05, "loss": 0.1376, "step": 25510 }, { "epoch": 0.45501730103806226, "grad_norm": 0.24338695406913757, "learning_rate": 3.313666103197363e-05, "loss": 0.1637, "step": 25511 }, { "epoch": 0.455035137159776, "grad_norm": 0.32704874873161316, "learning_rate": 3.3135189254864114e-05, "loss": 0.1578, "step": 25512 }, { "epoch": 0.4550529732814897, "grad_norm": 0.3315202295780182, "learning_rate": 3.31337174462198e-05, "loss": 0.1532, "step": 25513 }, { "epoch": 0.4550708094032034, "grad_norm": 0.25110602378845215, "learning_rate": 3.313224560604638e-05, "loss": 0.1346, "step": 25514 }, { "epoch": 0.45508864552491707, "grad_norm": 0.2566005289554596, "learning_rate": 3.313077373434957e-05, "loss": 0.1269, "step": 25515 }, { "epoch": 0.45510648164663076, "grad_norm": 0.31238797307014465, "learning_rate": 3.312930183113507e-05, "loss": 0.1426, "step": 25516 }, { "epoch": 0.45512431776834444, "grad_norm": 0.2830149233341217, "learning_rate": 3.3127829896408596e-05, "loss": 0.1595, "step": 25517 }, { "epoch": 0.45514215389005813, "grad_norm": 0.37631404399871826, "learning_rate": 3.3126357930175835e-05, "loss": 0.1397, "step": 25518 }, { "epoch": 0.4551599900117718, "grad_norm": 0.3661668300628662, "learning_rate": 3.3124885932442516e-05, "loss": 0.1107, "step": 25519 }, { "epoch": 0.4551778261334855, "grad_norm": 0.34236806631088257, "learning_rate": 3.312341390321433e-05, "loss": 0.1393, "step": 25520 }, { "epoch": 0.45519566225519925, "grad_norm": 0.32491669058799744, "learning_rate": 3.312194184249698e-05, "loss": 0.1281, "step": 25521 }, { "epoch": 0.45521349837691294, "grad_norm": 0.25198623538017273, "learning_rate": 3.3120469750296185e-05, "loss": 0.118, "step": 25522 }, { "epoch": 0.4552313344986266, "grad_norm": 0.2775854468345642, "learning_rate": 3.311899762661764e-05, "loss": 0.1318, "step": 25523 }, { "epoch": 0.4552491706203403, "grad_norm": 0.31547802686691284, "learning_rate": 3.3117525471467054e-05, "loss": 0.0975, "step": 25524 }, { "epoch": 0.455267006742054, "grad_norm": 0.2870213985443115, "learning_rate": 3.311605328485014e-05, "loss": 0.1494, "step": 25525 }, { "epoch": 0.4552848428637677, "grad_norm": 0.22317950427532196, "learning_rate": 3.31145810667726e-05, "loss": 0.1255, "step": 25526 }, { "epoch": 0.4553026789854814, "grad_norm": 0.21474316716194153, "learning_rate": 3.311310881724014e-05, "loss": 0.152, "step": 25527 }, { "epoch": 0.45532051510719507, "grad_norm": 0.3272128403186798, "learning_rate": 3.311163653625847e-05, "loss": 0.1356, "step": 25528 }, { "epoch": 0.4553383512289088, "grad_norm": 0.2720585763454437, "learning_rate": 3.3110164223833296e-05, "loss": 0.1392, "step": 25529 }, { "epoch": 0.4553561873506225, "grad_norm": 0.2651171088218689, "learning_rate": 3.310869187997032e-05, "loss": 0.1458, "step": 25530 }, { "epoch": 0.4553740234723362, "grad_norm": 0.4045639932155609, "learning_rate": 3.310721950467525e-05, "loss": 0.1968, "step": 25531 }, { "epoch": 0.4553918595940499, "grad_norm": 0.3149820864200592, "learning_rate": 3.31057470979538e-05, "loss": 0.0789, "step": 25532 }, { "epoch": 0.45540969571576356, "grad_norm": 0.23971009254455566, "learning_rate": 3.310427465981168e-05, "loss": 0.1031, "step": 25533 }, { "epoch": 0.45542753183747725, "grad_norm": 0.22553564608097076, "learning_rate": 3.3102802190254594e-05, "loss": 0.1289, "step": 25534 }, { "epoch": 0.45544536795919094, "grad_norm": 0.3127795159816742, "learning_rate": 3.310132968928824e-05, "loss": 0.0965, "step": 25535 }, { "epoch": 0.4554632040809046, "grad_norm": 0.2910325527191162, "learning_rate": 3.309985715691834e-05, "loss": 0.1621, "step": 25536 }, { "epoch": 0.45548104020261837, "grad_norm": 0.2201349139213562, "learning_rate": 3.309838459315059e-05, "loss": 0.1559, "step": 25537 }, { "epoch": 0.45549887632433206, "grad_norm": 0.3200796842575073, "learning_rate": 3.309691199799071e-05, "loss": 0.1392, "step": 25538 }, { "epoch": 0.45551671244604575, "grad_norm": 0.21429410576820374, "learning_rate": 3.3095439371444407e-05, "loss": 0.1563, "step": 25539 }, { "epoch": 0.45553454856775943, "grad_norm": 0.2324209362268448, "learning_rate": 3.309396671351737e-05, "loss": 0.1487, "step": 25540 }, { "epoch": 0.4555523846894731, "grad_norm": 0.3225129544734955, "learning_rate": 3.3092494024215336e-05, "loss": 0.1525, "step": 25541 }, { "epoch": 0.4555702208111868, "grad_norm": 0.24083071947097778, "learning_rate": 3.309102130354399e-05, "loss": 0.1389, "step": 25542 }, { "epoch": 0.4555880569329005, "grad_norm": 0.3956213593482971, "learning_rate": 3.3089548551509054e-05, "loss": 0.1213, "step": 25543 }, { "epoch": 0.4556058930546142, "grad_norm": 0.4100399911403656, "learning_rate": 3.3088075768116233e-05, "loss": 0.2187, "step": 25544 }, { "epoch": 0.4556237291763279, "grad_norm": 0.2815300524234772, "learning_rate": 3.308660295337124e-05, "loss": 0.1867, "step": 25545 }, { "epoch": 0.4556415652980416, "grad_norm": 0.22833020985126495, "learning_rate": 3.308513010727978e-05, "loss": 0.1356, "step": 25546 }, { "epoch": 0.4556594014197553, "grad_norm": 0.261622816324234, "learning_rate": 3.3083657229847566e-05, "loss": 0.1951, "step": 25547 }, { "epoch": 0.455677237541469, "grad_norm": 0.24547246098518372, "learning_rate": 3.30821843210803e-05, "loss": 0.1254, "step": 25548 }, { "epoch": 0.4556950736631827, "grad_norm": 0.2390258014202118, "learning_rate": 3.30807113809837e-05, "loss": 0.1193, "step": 25549 }, { "epoch": 0.45571290978489637, "grad_norm": 0.39300480484962463, "learning_rate": 3.3079238409563467e-05, "loss": 0.136, "step": 25550 }, { "epoch": 0.45573074590661006, "grad_norm": 0.22021755576133728, "learning_rate": 3.307776540682532e-05, "loss": 0.1187, "step": 25551 }, { "epoch": 0.45574858202832375, "grad_norm": 0.25812453031539917, "learning_rate": 3.3076292372774963e-05, "loss": 0.1799, "step": 25552 }, { "epoch": 0.45576641815003743, "grad_norm": 0.2631734311580658, "learning_rate": 3.30748193074181e-05, "loss": 0.1242, "step": 25553 }, { "epoch": 0.4557842542717512, "grad_norm": 0.2188076674938202, "learning_rate": 3.3073346210760466e-05, "loss": 0.1357, "step": 25554 }, { "epoch": 0.45580209039346486, "grad_norm": 0.3625791668891907, "learning_rate": 3.3071873082807734e-05, "loss": 0.1306, "step": 25555 }, { "epoch": 0.45581992651517855, "grad_norm": 0.267915815114975, "learning_rate": 3.3070399923565645e-05, "loss": 0.1634, "step": 25556 }, { "epoch": 0.45583776263689224, "grad_norm": 0.2879968285560608, "learning_rate": 3.306892673303989e-05, "loss": 0.1633, "step": 25557 }, { "epoch": 0.45585559875860593, "grad_norm": 0.3673526644706726, "learning_rate": 3.30674535112362e-05, "loss": 0.1339, "step": 25558 }, { "epoch": 0.4558734348803196, "grad_norm": 0.21310196816921234, "learning_rate": 3.306598025816027e-05, "loss": 0.1638, "step": 25559 }, { "epoch": 0.4558912710020333, "grad_norm": 0.19753588736057281, "learning_rate": 3.306450697381781e-05, "loss": 0.1248, "step": 25560 }, { "epoch": 0.455909107123747, "grad_norm": 0.19765333831310272, "learning_rate": 3.306303365821454e-05, "loss": 0.1091, "step": 25561 }, { "epoch": 0.4559269432454607, "grad_norm": 0.29036945104599, "learning_rate": 3.3061560311356165e-05, "loss": 0.1625, "step": 25562 }, { "epoch": 0.4559447793671744, "grad_norm": 0.3032093346118927, "learning_rate": 3.30600869332484e-05, "loss": 0.1787, "step": 25563 }, { "epoch": 0.4559626154888881, "grad_norm": 0.27889686822891235, "learning_rate": 3.305861352389695e-05, "loss": 0.131, "step": 25564 }, { "epoch": 0.4559804516106018, "grad_norm": 0.30255818367004395, "learning_rate": 3.305714008330753e-05, "loss": 0.1746, "step": 25565 }, { "epoch": 0.4559982877323155, "grad_norm": 0.25976645946502686, "learning_rate": 3.3055666611485854e-05, "loss": 0.1207, "step": 25566 }, { "epoch": 0.4560161238540292, "grad_norm": 0.3765120208263397, "learning_rate": 3.305419310843764e-05, "loss": 0.1992, "step": 25567 }, { "epoch": 0.45603395997574286, "grad_norm": 0.2526637017726898, "learning_rate": 3.3052719574168576e-05, "loss": 0.1319, "step": 25568 }, { "epoch": 0.45605179609745655, "grad_norm": 0.31131696701049805, "learning_rate": 3.30512460086844e-05, "loss": 0.1508, "step": 25569 }, { "epoch": 0.45606963221917024, "grad_norm": 0.30652379989624023, "learning_rate": 3.304977241199081e-05, "loss": 0.1452, "step": 25570 }, { "epoch": 0.456087468340884, "grad_norm": 0.26475590467453003, "learning_rate": 3.304829878409352e-05, "loss": 0.1306, "step": 25571 }, { "epoch": 0.45610530446259767, "grad_norm": 0.3271113336086273, "learning_rate": 3.304682512499825e-05, "loss": 0.1443, "step": 25572 }, { "epoch": 0.45612314058431136, "grad_norm": 0.25378769636154175, "learning_rate": 3.3045351434710705e-05, "loss": 0.1361, "step": 25573 }, { "epoch": 0.45614097670602505, "grad_norm": 0.23429793119430542, "learning_rate": 3.30438777132366e-05, "loss": 0.0991, "step": 25574 }, { "epoch": 0.45615881282773874, "grad_norm": 0.4037632942199707, "learning_rate": 3.3042403960581646e-05, "loss": 0.1542, "step": 25575 }, { "epoch": 0.4561766489494524, "grad_norm": 0.35451459884643555, "learning_rate": 3.304093017675155e-05, "loss": 0.2035, "step": 25576 }, { "epoch": 0.4561944850711661, "grad_norm": 0.2875897288322449, "learning_rate": 3.3039456361752045e-05, "loss": 0.1262, "step": 25577 }, { "epoch": 0.4562123211928798, "grad_norm": 0.25457751750946045, "learning_rate": 3.3037982515588815e-05, "loss": 0.1488, "step": 25578 }, { "epoch": 0.4562301573145935, "grad_norm": 0.34255313873291016, "learning_rate": 3.303650863826759e-05, "loss": 0.1206, "step": 25579 }, { "epoch": 0.45624799343630723, "grad_norm": 0.453331857919693, "learning_rate": 3.30350347297941e-05, "loss": 0.1487, "step": 25580 }, { "epoch": 0.4562658295580209, "grad_norm": 0.23620760440826416, "learning_rate": 3.303356079017402e-05, "loss": 0.1615, "step": 25581 }, { "epoch": 0.4562836656797346, "grad_norm": 0.18880541622638702, "learning_rate": 3.3032086819413094e-05, "loss": 0.1306, "step": 25582 }, { "epoch": 0.4563015018014483, "grad_norm": 0.3897157311439514, "learning_rate": 3.303061281751702e-05, "loss": 0.1168, "step": 25583 }, { "epoch": 0.456319337923162, "grad_norm": 0.26303455233573914, "learning_rate": 3.302913878449153e-05, "loss": 0.1024, "step": 25584 }, { "epoch": 0.45633717404487567, "grad_norm": 0.2909948527812958, "learning_rate": 3.302766472034231e-05, "loss": 0.1377, "step": 25585 }, { "epoch": 0.45635501016658936, "grad_norm": 0.21276971697807312, "learning_rate": 3.302619062507509e-05, "loss": 0.1333, "step": 25586 }, { "epoch": 0.45637284628830305, "grad_norm": 0.26306089758872986, "learning_rate": 3.302471649869559e-05, "loss": 0.2211, "step": 25587 }, { "epoch": 0.4563906824100168, "grad_norm": 0.25632795691490173, "learning_rate": 3.302324234120951e-05, "loss": 0.1043, "step": 25588 }, { "epoch": 0.4564085185317305, "grad_norm": 0.28908053040504456, "learning_rate": 3.302176815262257e-05, "loss": 0.1798, "step": 25589 }, { "epoch": 0.45642635465344417, "grad_norm": 0.22195380926132202, "learning_rate": 3.302029393294049e-05, "loss": 0.134, "step": 25590 }, { "epoch": 0.45644419077515785, "grad_norm": 0.2504135072231293, "learning_rate": 3.301881968216899e-05, "loss": 0.1528, "step": 25591 }, { "epoch": 0.45646202689687154, "grad_norm": 0.30597007274627686, "learning_rate": 3.301734540031376e-05, "loss": 0.1501, "step": 25592 }, { "epoch": 0.45647986301858523, "grad_norm": 0.19958379864692688, "learning_rate": 3.301587108738054e-05, "loss": 0.1291, "step": 25593 }, { "epoch": 0.4564976991402989, "grad_norm": 0.2568812668323517, "learning_rate": 3.301439674337503e-05, "loss": 0.1225, "step": 25594 }, { "epoch": 0.4565155352620126, "grad_norm": 0.26551640033721924, "learning_rate": 3.301292236830295e-05, "loss": 0.1088, "step": 25595 }, { "epoch": 0.45653337138372635, "grad_norm": 0.15709362924098969, "learning_rate": 3.301144796217002e-05, "loss": 0.1291, "step": 25596 }, { "epoch": 0.45655120750544004, "grad_norm": 0.25126639008522034, "learning_rate": 3.300997352498195e-05, "loss": 0.1855, "step": 25597 }, { "epoch": 0.4565690436271537, "grad_norm": 0.3434886932373047, "learning_rate": 3.3008499056744456e-05, "loss": 0.153, "step": 25598 }, { "epoch": 0.4565868797488674, "grad_norm": 0.27629926800727844, "learning_rate": 3.300702455746325e-05, "loss": 0.2016, "step": 25599 }, { "epoch": 0.4566047158705811, "grad_norm": 0.24353432655334473, "learning_rate": 3.300555002714405e-05, "loss": 0.1762, "step": 25600 }, { "epoch": 0.4566225519922948, "grad_norm": 0.3592023253440857, "learning_rate": 3.300407546579258e-05, "loss": 0.1627, "step": 25601 }, { "epoch": 0.4566403881140085, "grad_norm": 0.27998852729797363, "learning_rate": 3.300260087341455e-05, "loss": 0.1385, "step": 25602 }, { "epoch": 0.45665822423572217, "grad_norm": 0.3741909861564636, "learning_rate": 3.300112625001567e-05, "loss": 0.18, "step": 25603 }, { "epoch": 0.45667606035743585, "grad_norm": 0.24563151597976685, "learning_rate": 3.299965159560166e-05, "loss": 0.1241, "step": 25604 }, { "epoch": 0.4566938964791496, "grad_norm": 0.21539169549942017, "learning_rate": 3.299817691017824e-05, "loss": 0.1592, "step": 25605 }, { "epoch": 0.4567117326008633, "grad_norm": 0.31977224349975586, "learning_rate": 3.299670219375112e-05, "loss": 0.1118, "step": 25606 }, { "epoch": 0.456729568722577, "grad_norm": 0.2661239206790924, "learning_rate": 3.299522744632602e-05, "loss": 0.172, "step": 25607 }, { "epoch": 0.45674740484429066, "grad_norm": 0.30055010318756104, "learning_rate": 3.2993752667908665e-05, "loss": 0.1808, "step": 25608 }, { "epoch": 0.45676524096600435, "grad_norm": 0.19384883344173431, "learning_rate": 3.299227785850476e-05, "loss": 0.1122, "step": 25609 }, { "epoch": 0.45678307708771804, "grad_norm": 0.3133239150047302, "learning_rate": 3.299080301812002e-05, "loss": 0.16, "step": 25610 }, { "epoch": 0.4568009132094317, "grad_norm": 0.23757590353488922, "learning_rate": 3.298932814676017e-05, "loss": 0.1265, "step": 25611 }, { "epoch": 0.4568187493311454, "grad_norm": 0.25030988454818726, "learning_rate": 3.2987853244430935e-05, "loss": 0.1857, "step": 25612 }, { "epoch": 0.45683658545285916, "grad_norm": 0.22245270013809204, "learning_rate": 3.2986378311138e-05, "loss": 0.1612, "step": 25613 }, { "epoch": 0.45685442157457284, "grad_norm": 0.28517913818359375, "learning_rate": 3.298490334688712e-05, "loss": 0.169, "step": 25614 }, { "epoch": 0.45687225769628653, "grad_norm": 0.358346551656723, "learning_rate": 3.2983428351684e-05, "loss": 0.1242, "step": 25615 }, { "epoch": 0.4568900938180002, "grad_norm": 0.48526445031166077, "learning_rate": 3.298195332553435e-05, "loss": 0.1863, "step": 25616 }, { "epoch": 0.4569079299397139, "grad_norm": 0.23559698462486267, "learning_rate": 3.298047826844389e-05, "loss": 0.1684, "step": 25617 }, { "epoch": 0.4569257660614276, "grad_norm": 0.25800377130508423, "learning_rate": 3.297900318041834e-05, "loss": 0.1399, "step": 25618 }, { "epoch": 0.4569436021831413, "grad_norm": 0.3152902126312256, "learning_rate": 3.297752806146341e-05, "loss": 0.0944, "step": 25619 }, { "epoch": 0.45696143830485497, "grad_norm": 0.2307669073343277, "learning_rate": 3.297605291158484e-05, "loss": 0.1688, "step": 25620 }, { "epoch": 0.45697927442656866, "grad_norm": 0.31882476806640625, "learning_rate": 3.297457773078833e-05, "loss": 0.0921, "step": 25621 }, { "epoch": 0.4569971105482824, "grad_norm": 0.3550775349140167, "learning_rate": 3.2973102519079595e-05, "loss": 0.1663, "step": 25622 }, { "epoch": 0.4570149466699961, "grad_norm": 0.24725449085235596, "learning_rate": 3.297162727646437e-05, "loss": 0.1162, "step": 25623 }, { "epoch": 0.4570327827917098, "grad_norm": 0.2853067219257355, "learning_rate": 3.297015200294836e-05, "loss": 0.1615, "step": 25624 }, { "epoch": 0.45705061891342347, "grad_norm": 0.31117144227027893, "learning_rate": 3.296867669853729e-05, "loss": 0.2383, "step": 25625 }, { "epoch": 0.45706845503513716, "grad_norm": 0.24993376433849335, "learning_rate": 3.2967201363236874e-05, "loss": 0.1578, "step": 25626 }, { "epoch": 0.45708629115685084, "grad_norm": 0.22071373462677002, "learning_rate": 3.296572599705284e-05, "loss": 0.117, "step": 25627 }, { "epoch": 0.45710412727856453, "grad_norm": 0.3490014672279358, "learning_rate": 3.296425059999089e-05, "loss": 0.1469, "step": 25628 }, { "epoch": 0.4571219634002782, "grad_norm": 0.2714429199695587, "learning_rate": 3.2962775172056766e-05, "loss": 0.1429, "step": 25629 }, { "epoch": 0.45713979952199196, "grad_norm": 0.3379661738872528, "learning_rate": 3.296129971325617e-05, "loss": 0.1922, "step": 25630 }, { "epoch": 0.45715763564370565, "grad_norm": 0.2674955129623413, "learning_rate": 3.2959824223594826e-05, "loss": 0.132, "step": 25631 }, { "epoch": 0.45717547176541934, "grad_norm": 0.32472437620162964, "learning_rate": 3.2958348703078455e-05, "loss": 0.157, "step": 25632 }, { "epoch": 0.457193307887133, "grad_norm": 0.29699936509132385, "learning_rate": 3.295687315171278e-05, "loss": 0.1462, "step": 25633 }, { "epoch": 0.4572111440088467, "grad_norm": 0.2153027504682541, "learning_rate": 3.29553975695035e-05, "loss": 0.1318, "step": 25634 }, { "epoch": 0.4572289801305604, "grad_norm": 0.32456353306770325, "learning_rate": 3.2953921956456364e-05, "loss": 0.1715, "step": 25635 }, { "epoch": 0.4572468162522741, "grad_norm": 0.295696496963501, "learning_rate": 3.295244631257708e-05, "loss": 0.2638, "step": 25636 }, { "epoch": 0.4572646523739878, "grad_norm": 0.2974448502063751, "learning_rate": 3.295097063787136e-05, "loss": 0.1177, "step": 25637 }, { "epoch": 0.4572824884957015, "grad_norm": 0.27639755606651306, "learning_rate": 3.2949494932344944e-05, "loss": 0.1465, "step": 25638 }, { "epoch": 0.4573003246174152, "grad_norm": 0.21070195734500885, "learning_rate": 3.2948019196003535e-05, "loss": 0.1158, "step": 25639 }, { "epoch": 0.4573181607391289, "grad_norm": 0.26121270656585693, "learning_rate": 3.2946543428852854e-05, "loss": 0.1087, "step": 25640 }, { "epoch": 0.4573359968608426, "grad_norm": 0.2552196979522705, "learning_rate": 3.294506763089863e-05, "loss": 0.1646, "step": 25641 }, { "epoch": 0.4573538329825563, "grad_norm": 0.25155118107795715, "learning_rate": 3.2943591802146574e-05, "loss": 0.1557, "step": 25642 }, { "epoch": 0.45737166910426996, "grad_norm": 0.34909623861312866, "learning_rate": 3.294211594260242e-05, "loss": 0.1354, "step": 25643 }, { "epoch": 0.45738950522598365, "grad_norm": 0.2594251334667206, "learning_rate": 3.294064005227188e-05, "loss": 0.1467, "step": 25644 }, { "epoch": 0.45740734134769734, "grad_norm": 0.3125285506248474, "learning_rate": 3.293916413116067e-05, "loss": 0.1207, "step": 25645 }, { "epoch": 0.457425177469411, "grad_norm": 0.18811379373073578, "learning_rate": 3.2937688179274525e-05, "loss": 0.1178, "step": 25646 }, { "epoch": 0.45744301359112477, "grad_norm": 0.2780362665653229, "learning_rate": 3.2936212196619154e-05, "loss": 0.1679, "step": 25647 }, { "epoch": 0.45746084971283846, "grad_norm": 0.27849629521369934, "learning_rate": 3.2934736183200284e-05, "loss": 0.1509, "step": 25648 }, { "epoch": 0.45747868583455215, "grad_norm": 0.345282644033432, "learning_rate": 3.2933260139023635e-05, "loss": 0.1437, "step": 25649 }, { "epoch": 0.45749652195626583, "grad_norm": 0.26852524280548096, "learning_rate": 3.2931784064094934e-05, "loss": 0.1473, "step": 25650 }, { "epoch": 0.4575143580779795, "grad_norm": 0.41945159435272217, "learning_rate": 3.29303079584199e-05, "loss": 0.1387, "step": 25651 }, { "epoch": 0.4575321941996932, "grad_norm": 0.2789818048477173, "learning_rate": 3.292883182200425e-05, "loss": 0.1411, "step": 25652 }, { "epoch": 0.4575500303214069, "grad_norm": 0.19430217146873474, "learning_rate": 3.292735565485371e-05, "loss": 0.1002, "step": 25653 }, { "epoch": 0.4575678664431206, "grad_norm": 0.24267876148223877, "learning_rate": 3.2925879456973996e-05, "loss": 0.103, "step": 25654 }, { "epoch": 0.45758570256483433, "grad_norm": 0.29199084639549255, "learning_rate": 3.292440322837084e-05, "loss": 0.1338, "step": 25655 }, { "epoch": 0.457603538686548, "grad_norm": 0.23551107943058014, "learning_rate": 3.292292696904996e-05, "loss": 0.1415, "step": 25656 }, { "epoch": 0.4576213748082617, "grad_norm": 0.2686185836791992, "learning_rate": 3.292145067901708e-05, "loss": 0.1591, "step": 25657 }, { "epoch": 0.4576392109299754, "grad_norm": 0.28387245535850525, "learning_rate": 3.291997435827793e-05, "loss": 0.1721, "step": 25658 }, { "epoch": 0.4576570470516891, "grad_norm": 0.21101944148540497, "learning_rate": 3.291849800683821e-05, "loss": 0.1204, "step": 25659 }, { "epoch": 0.45767488317340277, "grad_norm": 0.31178781390190125, "learning_rate": 3.291702162470366e-05, "loss": 0.1905, "step": 25660 }, { "epoch": 0.45769271929511646, "grad_norm": 0.2514325678348541, "learning_rate": 3.291554521188e-05, "loss": 0.1569, "step": 25661 }, { "epoch": 0.45771055541683014, "grad_norm": 0.2219846397638321, "learning_rate": 3.291406876837295e-05, "loss": 0.1405, "step": 25662 }, { "epoch": 0.45772839153854383, "grad_norm": 0.22639405727386475, "learning_rate": 3.2912592294188236e-05, "loss": 0.1088, "step": 25663 }, { "epoch": 0.4577462276602576, "grad_norm": 0.26393505930900574, "learning_rate": 3.291111578933159e-05, "loss": 0.1104, "step": 25664 }, { "epoch": 0.45776406378197126, "grad_norm": 0.34459200501441956, "learning_rate": 3.290963925380872e-05, "loss": 0.1405, "step": 25665 }, { "epoch": 0.45778189990368495, "grad_norm": 0.1711176335811615, "learning_rate": 3.2908162687625365e-05, "loss": 0.1219, "step": 25666 }, { "epoch": 0.45779973602539864, "grad_norm": 0.21426557004451752, "learning_rate": 3.290668609078723e-05, "loss": 0.0925, "step": 25667 }, { "epoch": 0.45781757214711233, "grad_norm": 0.2660115361213684, "learning_rate": 3.2905209463300055e-05, "loss": 0.1416, "step": 25668 }, { "epoch": 0.457835408268826, "grad_norm": 0.34632986783981323, "learning_rate": 3.290373280516955e-05, "loss": 0.1243, "step": 25669 }, { "epoch": 0.4578532443905397, "grad_norm": 0.22129449248313904, "learning_rate": 3.2902256116401454e-05, "loss": 0.1364, "step": 25670 }, { "epoch": 0.4578710805122534, "grad_norm": 0.3137301802635193, "learning_rate": 3.290077939700148e-05, "loss": 0.1056, "step": 25671 }, { "epoch": 0.45788891663396714, "grad_norm": 0.2301984578371048, "learning_rate": 3.2899302646975363e-05, "loss": 0.119, "step": 25672 }, { "epoch": 0.4579067527556808, "grad_norm": 0.2068316489458084, "learning_rate": 3.2897825866328816e-05, "loss": 0.1175, "step": 25673 }, { "epoch": 0.4579245888773945, "grad_norm": 0.23619364202022552, "learning_rate": 3.289634905506756e-05, "loss": 0.108, "step": 25674 }, { "epoch": 0.4579424249991082, "grad_norm": 0.32757872343063354, "learning_rate": 3.289487221319734e-05, "loss": 0.2001, "step": 25675 }, { "epoch": 0.4579602611208219, "grad_norm": 0.2315714955329895, "learning_rate": 3.289339534072386e-05, "loss": 0.1303, "step": 25676 }, { "epoch": 0.4579780972425356, "grad_norm": 0.2705220580101013, "learning_rate": 3.2891918437652856e-05, "loss": 0.2017, "step": 25677 }, { "epoch": 0.45799593336424926, "grad_norm": 0.2685730755329132, "learning_rate": 3.289044150399005e-05, "loss": 0.1502, "step": 25678 }, { "epoch": 0.45801376948596295, "grad_norm": 0.31895095109939575, "learning_rate": 3.2888964539741176e-05, "loss": 0.1508, "step": 25679 }, { "epoch": 0.45803160560767664, "grad_norm": 0.24345429241657257, "learning_rate": 3.288748754491194e-05, "loss": 0.1813, "step": 25680 }, { "epoch": 0.4580494417293904, "grad_norm": 0.24111449718475342, "learning_rate": 3.288601051950808e-05, "loss": 0.154, "step": 25681 }, { "epoch": 0.45806727785110407, "grad_norm": 0.2795223593711853, "learning_rate": 3.288453346353532e-05, "loss": 0.086, "step": 25682 }, { "epoch": 0.45808511397281776, "grad_norm": 0.1886620670557022, "learning_rate": 3.2883056376999386e-05, "loss": 0.1117, "step": 25683 }, { "epoch": 0.45810295009453145, "grad_norm": 0.2418023943901062, "learning_rate": 3.2881579259906005e-05, "loss": 0.1297, "step": 25684 }, { "epoch": 0.45812078621624513, "grad_norm": 0.26265498995780945, "learning_rate": 3.28801021122609e-05, "loss": 0.1355, "step": 25685 }, { "epoch": 0.4581386223379588, "grad_norm": 0.2866142988204956, "learning_rate": 3.28786249340698e-05, "loss": 0.127, "step": 25686 }, { "epoch": 0.4581564584596725, "grad_norm": 0.27637115120887756, "learning_rate": 3.287714772533842e-05, "loss": 0.1616, "step": 25687 }, { "epoch": 0.4581742945813862, "grad_norm": 0.25364696979522705, "learning_rate": 3.28756704860725e-05, "loss": 0.1821, "step": 25688 }, { "epoch": 0.45819213070309994, "grad_norm": 0.24076221883296967, "learning_rate": 3.287419321627776e-05, "loss": 0.1768, "step": 25689 }, { "epoch": 0.45820996682481363, "grad_norm": 0.2482614517211914, "learning_rate": 3.287271591595993e-05, "loss": 0.1215, "step": 25690 }, { "epoch": 0.4582278029465273, "grad_norm": 0.3314327597618103, "learning_rate": 3.2871238585124724e-05, "loss": 0.2124, "step": 25691 }, { "epoch": 0.458245639068241, "grad_norm": 0.27108749747276306, "learning_rate": 3.2869761223777893e-05, "loss": 0.1797, "step": 25692 }, { "epoch": 0.4582634751899547, "grad_norm": 0.22751684486865997, "learning_rate": 3.286828383192514e-05, "loss": 0.1774, "step": 25693 }, { "epoch": 0.4582813113116684, "grad_norm": 0.26826396584510803, "learning_rate": 3.28668064095722e-05, "loss": 0.1614, "step": 25694 }, { "epoch": 0.45829914743338207, "grad_norm": 0.24261720478534698, "learning_rate": 3.2865328956724814e-05, "loss": 0.1095, "step": 25695 }, { "epoch": 0.45831698355509576, "grad_norm": 0.28999802470207214, "learning_rate": 3.286385147338868e-05, "loss": 0.1738, "step": 25696 }, { "epoch": 0.4583348196768095, "grad_norm": 0.2743673324584961, "learning_rate": 3.286237395956955e-05, "loss": 0.1484, "step": 25697 }, { "epoch": 0.4583526557985232, "grad_norm": 0.2501561641693115, "learning_rate": 3.286089641527315e-05, "loss": 0.1185, "step": 25698 }, { "epoch": 0.4583704919202369, "grad_norm": 0.33886897563934326, "learning_rate": 3.285941884050519e-05, "loss": 0.1533, "step": 25699 }, { "epoch": 0.45838832804195057, "grad_norm": 0.21820585429668427, "learning_rate": 3.2857941235271405e-05, "loss": 0.1125, "step": 25700 }, { "epoch": 0.45840616416366425, "grad_norm": 0.2532954812049866, "learning_rate": 3.2856463599577535e-05, "loss": 0.1382, "step": 25701 }, { "epoch": 0.45842400028537794, "grad_norm": 0.23721110820770264, "learning_rate": 3.2854985933429293e-05, "loss": 0.127, "step": 25702 }, { "epoch": 0.45844183640709163, "grad_norm": 0.26717960834503174, "learning_rate": 3.285350823683241e-05, "loss": 0.1609, "step": 25703 }, { "epoch": 0.4584596725288053, "grad_norm": 0.2746828496456146, "learning_rate": 3.2852030509792626e-05, "loss": 0.1656, "step": 25704 }, { "epoch": 0.458477508650519, "grad_norm": 0.2644045650959015, "learning_rate": 3.285055275231566e-05, "loss": 0.1164, "step": 25705 }, { "epoch": 0.45849534477223275, "grad_norm": 0.29921290278434753, "learning_rate": 3.284907496440723e-05, "loss": 0.1783, "step": 25706 }, { "epoch": 0.45851318089394644, "grad_norm": 0.2715904116630554, "learning_rate": 3.284759714607308e-05, "loss": 0.1323, "step": 25707 }, { "epoch": 0.4585310170156601, "grad_norm": 0.24322174489498138, "learning_rate": 3.284611929731893e-05, "loss": 0.1721, "step": 25708 }, { "epoch": 0.4585488531373738, "grad_norm": 0.20780092477798462, "learning_rate": 3.284464141815052e-05, "loss": 0.0977, "step": 25709 }, { "epoch": 0.4585666892590875, "grad_norm": 0.23517489433288574, "learning_rate": 3.284316350857356e-05, "loss": 0.1111, "step": 25710 }, { "epoch": 0.4585845253808012, "grad_norm": 0.238978773355484, "learning_rate": 3.284168556859379e-05, "loss": 0.1394, "step": 25711 }, { "epoch": 0.4586023615025149, "grad_norm": 0.22924506664276123, "learning_rate": 3.284020759821694e-05, "loss": 0.1643, "step": 25712 }, { "epoch": 0.45862019762422856, "grad_norm": 0.19677604734897614, "learning_rate": 3.283872959744874e-05, "loss": 0.157, "step": 25713 }, { "epoch": 0.4586380337459423, "grad_norm": 0.26793423295021057, "learning_rate": 3.2837251566294926e-05, "loss": 0.1796, "step": 25714 }, { "epoch": 0.458655869867656, "grad_norm": 0.3087601661682129, "learning_rate": 3.2835773504761205e-05, "loss": 0.1513, "step": 25715 }, { "epoch": 0.4586737059893697, "grad_norm": 0.28473737835884094, "learning_rate": 3.283429541285332e-05, "loss": 0.1631, "step": 25716 }, { "epoch": 0.4586915421110834, "grad_norm": 0.3098143935203552, "learning_rate": 3.2832817290577e-05, "loss": 0.1234, "step": 25717 }, { "epoch": 0.45870937823279706, "grad_norm": 0.25014400482177734, "learning_rate": 3.283133913793798e-05, "loss": 0.104, "step": 25718 }, { "epoch": 0.45872721435451075, "grad_norm": 0.33132413029670715, "learning_rate": 3.2829860954941976e-05, "loss": 0.1606, "step": 25719 }, { "epoch": 0.45874505047622444, "grad_norm": 0.33311113715171814, "learning_rate": 3.2828382741594736e-05, "loss": 0.1864, "step": 25720 }, { "epoch": 0.4587628865979381, "grad_norm": 0.25058743357658386, "learning_rate": 3.282690449790198e-05, "loss": 0.1192, "step": 25721 }, { "epoch": 0.4587807227196518, "grad_norm": 0.22558225691318512, "learning_rate": 3.2825426223869436e-05, "loss": 0.1485, "step": 25722 }, { "epoch": 0.45879855884136556, "grad_norm": 0.22102046012878418, "learning_rate": 3.282394791950284e-05, "loss": 0.1386, "step": 25723 }, { "epoch": 0.45881639496307924, "grad_norm": 0.24871978163719177, "learning_rate": 3.2822469584807906e-05, "loss": 0.0817, "step": 25724 }, { "epoch": 0.45883423108479293, "grad_norm": 0.35498183965682983, "learning_rate": 3.28209912197904e-05, "loss": 0.164, "step": 25725 }, { "epoch": 0.4588520672065066, "grad_norm": 0.21349261701107025, "learning_rate": 3.281951282445601e-05, "loss": 0.135, "step": 25726 }, { "epoch": 0.4588699033282203, "grad_norm": 0.36893653869628906, "learning_rate": 3.2818034398810504e-05, "loss": 0.1557, "step": 25727 }, { "epoch": 0.458887739449934, "grad_norm": 0.3415740132331848, "learning_rate": 3.281655594285959e-05, "loss": 0.1804, "step": 25728 }, { "epoch": 0.4589055755716477, "grad_norm": 0.25834035873413086, "learning_rate": 3.281507745660901e-05, "loss": 0.1252, "step": 25729 }, { "epoch": 0.45892341169336137, "grad_norm": 0.26537999510765076, "learning_rate": 3.2813598940064475e-05, "loss": 0.1724, "step": 25730 }, { "epoch": 0.4589412478150751, "grad_norm": 0.26515117287635803, "learning_rate": 3.2812120393231744e-05, "loss": 0.1675, "step": 25731 }, { "epoch": 0.4589590839367888, "grad_norm": 0.20561553537845612, "learning_rate": 3.2810641816116535e-05, "loss": 0.1086, "step": 25732 }, { "epoch": 0.4589769200585025, "grad_norm": 0.40284013748168945, "learning_rate": 3.280916320872458e-05, "loss": 0.1636, "step": 25733 }, { "epoch": 0.4589947561802162, "grad_norm": 0.23910824954509735, "learning_rate": 3.280768457106161e-05, "loss": 0.1183, "step": 25734 }, { "epoch": 0.45901259230192987, "grad_norm": 0.23710238933563232, "learning_rate": 3.280620590313336e-05, "loss": 0.1631, "step": 25735 }, { "epoch": 0.45903042842364356, "grad_norm": 0.27089130878448486, "learning_rate": 3.280472720494556e-05, "loss": 0.1262, "step": 25736 }, { "epoch": 0.45904826454535724, "grad_norm": 0.21723240613937378, "learning_rate": 3.2803248476503944e-05, "loss": 0.1312, "step": 25737 }, { "epoch": 0.45906610066707093, "grad_norm": 0.3460243046283722, "learning_rate": 3.280176971781423e-05, "loss": 0.1742, "step": 25738 }, { "epoch": 0.4590839367887846, "grad_norm": 0.36793309450149536, "learning_rate": 3.280029092888217e-05, "loss": 0.1279, "step": 25739 }, { "epoch": 0.45910177291049836, "grad_norm": 0.19602982699871063, "learning_rate": 3.2798812109713496e-05, "loss": 0.0997, "step": 25740 }, { "epoch": 0.45911960903221205, "grad_norm": 0.21076786518096924, "learning_rate": 3.279733326031392e-05, "loss": 0.0915, "step": 25741 }, { "epoch": 0.45913744515392574, "grad_norm": 0.25963619351387024, "learning_rate": 3.27958543806892e-05, "loss": 0.1126, "step": 25742 }, { "epoch": 0.4591552812756394, "grad_norm": 0.25655022263526917, "learning_rate": 3.279437547084504e-05, "loss": 0.1376, "step": 25743 }, { "epoch": 0.4591731173973531, "grad_norm": 0.2974923551082611, "learning_rate": 3.2792896530787204e-05, "loss": 0.2223, "step": 25744 }, { "epoch": 0.4591909535190668, "grad_norm": 0.2894544005393982, "learning_rate": 3.2791417560521396e-05, "loss": 0.138, "step": 25745 }, { "epoch": 0.4592087896407805, "grad_norm": 0.30527371168136597, "learning_rate": 3.278993856005337e-05, "loss": 0.1602, "step": 25746 }, { "epoch": 0.4592266257624942, "grad_norm": 0.19043420255184174, "learning_rate": 3.278845952938885e-05, "loss": 0.1585, "step": 25747 }, { "epoch": 0.4592444618842079, "grad_norm": 0.22898824512958527, "learning_rate": 3.278698046853357e-05, "loss": 0.1364, "step": 25748 }, { "epoch": 0.4592622980059216, "grad_norm": 0.22600287199020386, "learning_rate": 3.278550137749327e-05, "loss": 0.1362, "step": 25749 }, { "epoch": 0.4592801341276353, "grad_norm": 0.29720863699913025, "learning_rate": 3.278402225627367e-05, "loss": 0.0885, "step": 25750 }, { "epoch": 0.459297970249349, "grad_norm": 0.3840457499027252, "learning_rate": 3.278254310488051e-05, "loss": 0.1833, "step": 25751 }, { "epoch": 0.4593158063710627, "grad_norm": 0.27006083726882935, "learning_rate": 3.2781063923319536e-05, "loss": 0.161, "step": 25752 }, { "epoch": 0.45933364249277636, "grad_norm": 0.26204490661621094, "learning_rate": 3.277958471159646e-05, "loss": 0.1046, "step": 25753 }, { "epoch": 0.45935147861449005, "grad_norm": 0.33187493681907654, "learning_rate": 3.277810546971703e-05, "loss": 0.1329, "step": 25754 }, { "epoch": 0.45936931473620374, "grad_norm": 0.30578693747520447, "learning_rate": 3.277662619768698e-05, "loss": 0.1403, "step": 25755 }, { "epoch": 0.4593871508579175, "grad_norm": 0.34515100717544556, "learning_rate": 3.2775146895512034e-05, "loss": 0.1595, "step": 25756 }, { "epoch": 0.45940498697963117, "grad_norm": 0.27839985489845276, "learning_rate": 3.2773667563197943e-05, "loss": 0.146, "step": 25757 }, { "epoch": 0.45942282310134486, "grad_norm": 0.2635040283203125, "learning_rate": 3.277218820075042e-05, "loss": 0.1192, "step": 25758 }, { "epoch": 0.45944065922305855, "grad_norm": 0.30586007237434387, "learning_rate": 3.277070880817521e-05, "loss": 0.1492, "step": 25759 }, { "epoch": 0.45945849534477223, "grad_norm": 0.27949824929237366, "learning_rate": 3.2769229385478064e-05, "loss": 0.1253, "step": 25760 }, { "epoch": 0.4594763314664859, "grad_norm": 0.30623701214790344, "learning_rate": 3.2767749932664694e-05, "loss": 0.1744, "step": 25761 }, { "epoch": 0.4594941675881996, "grad_norm": 0.2526586949825287, "learning_rate": 3.276627044974084e-05, "loss": 0.0899, "step": 25762 }, { "epoch": 0.4595120037099133, "grad_norm": 0.2833310067653656, "learning_rate": 3.276479093671224e-05, "loss": 0.1041, "step": 25763 }, { "epoch": 0.459529839831627, "grad_norm": 0.32900676131248474, "learning_rate": 3.2763311393584635e-05, "loss": 0.1456, "step": 25764 }, { "epoch": 0.45954767595334073, "grad_norm": 0.22483208775520325, "learning_rate": 3.276183182036375e-05, "loss": 0.1556, "step": 25765 }, { "epoch": 0.4595655120750544, "grad_norm": 0.31574106216430664, "learning_rate": 3.276035221705532e-05, "loss": 0.1552, "step": 25766 }, { "epoch": 0.4595833481967681, "grad_norm": 0.22517213225364685, "learning_rate": 3.275887258366508e-05, "loss": 0.1506, "step": 25767 }, { "epoch": 0.4596011843184818, "grad_norm": 0.22395804524421692, "learning_rate": 3.275739292019878e-05, "loss": 0.1131, "step": 25768 }, { "epoch": 0.4596190204401955, "grad_norm": 0.28659528493881226, "learning_rate": 3.275591322666214e-05, "loss": 0.1116, "step": 25769 }, { "epoch": 0.45963685656190917, "grad_norm": 0.38826295733451843, "learning_rate": 3.2754433503060914e-05, "loss": 0.132, "step": 25770 }, { "epoch": 0.45965469268362286, "grad_norm": 0.34235289692878723, "learning_rate": 3.275295374940081e-05, "loss": 0.1004, "step": 25771 }, { "epoch": 0.45967252880533654, "grad_norm": 0.24566218256950378, "learning_rate": 3.2751473965687585e-05, "loss": 0.136, "step": 25772 }, { "epoch": 0.4596903649270503, "grad_norm": 0.29846876859664917, "learning_rate": 3.274999415192696e-05, "loss": 0.1794, "step": 25773 }, { "epoch": 0.459708201048764, "grad_norm": 0.2336914837360382, "learning_rate": 3.274851430812469e-05, "loss": 0.1337, "step": 25774 }, { "epoch": 0.45972603717047766, "grad_norm": 0.4262526035308838, "learning_rate": 3.2747034434286514e-05, "loss": 0.1014, "step": 25775 }, { "epoch": 0.45974387329219135, "grad_norm": 0.3195294141769409, "learning_rate": 3.274555453041814e-05, "loss": 0.2094, "step": 25776 }, { "epoch": 0.45976170941390504, "grad_norm": 0.3057333827018738, "learning_rate": 3.274407459652533e-05, "loss": 0.1279, "step": 25777 }, { "epoch": 0.4597795455356187, "grad_norm": 0.22899553179740906, "learning_rate": 3.2742594632613805e-05, "loss": 0.1405, "step": 25778 }, { "epoch": 0.4597973816573324, "grad_norm": 0.24905577301979065, "learning_rate": 3.274111463868931e-05, "loss": 0.1393, "step": 25779 }, { "epoch": 0.4598152177790461, "grad_norm": 0.21940824389457703, "learning_rate": 3.273963461475759e-05, "loss": 0.1432, "step": 25780 }, { "epoch": 0.4598330539007598, "grad_norm": 0.32560619711875916, "learning_rate": 3.273815456082436e-05, "loss": 0.1551, "step": 25781 }, { "epoch": 0.45985089002247354, "grad_norm": 0.2656085789203644, "learning_rate": 3.273667447689538e-05, "loss": 0.1616, "step": 25782 }, { "epoch": 0.4598687261441872, "grad_norm": 0.27227163314819336, "learning_rate": 3.2735194362976374e-05, "loss": 0.0834, "step": 25783 }, { "epoch": 0.4598865622659009, "grad_norm": 0.2706531584262848, "learning_rate": 3.2733714219073084e-05, "loss": 0.1177, "step": 25784 }, { "epoch": 0.4599043983876146, "grad_norm": 0.3090117871761322, "learning_rate": 3.273223404519125e-05, "loss": 0.176, "step": 25785 }, { "epoch": 0.4599222345093283, "grad_norm": 0.19468435645103455, "learning_rate": 3.27307538413366e-05, "loss": 0.1273, "step": 25786 }, { "epoch": 0.459940070631042, "grad_norm": 0.31341978907585144, "learning_rate": 3.272927360751488e-05, "loss": 0.1941, "step": 25787 }, { "epoch": 0.45995790675275566, "grad_norm": 0.24075721204280853, "learning_rate": 3.272779334373183e-05, "loss": 0.135, "step": 25788 }, { "epoch": 0.45997574287446935, "grad_norm": 0.28112050890922546, "learning_rate": 3.272631304999317e-05, "loss": 0.1821, "step": 25789 }, { "epoch": 0.4599935789961831, "grad_norm": 0.2784784734249115, "learning_rate": 3.2724832726304673e-05, "loss": 0.1521, "step": 25790 }, { "epoch": 0.4600114151178968, "grad_norm": 0.27189746499061584, "learning_rate": 3.272335237267204e-05, "loss": 0.1511, "step": 25791 }, { "epoch": 0.46002925123961047, "grad_norm": 0.253939151763916, "learning_rate": 3.272187198910104e-05, "loss": 0.0698, "step": 25792 }, { "epoch": 0.46004708736132416, "grad_norm": 0.307739794254303, "learning_rate": 3.272039157559738e-05, "loss": 0.1753, "step": 25793 }, { "epoch": 0.46006492348303785, "grad_norm": 0.3136581778526306, "learning_rate": 3.2718911132166826e-05, "loss": 0.116, "step": 25794 }, { "epoch": 0.46008275960475153, "grad_norm": 0.2769233286380768, "learning_rate": 3.27174306588151e-05, "loss": 0.1874, "step": 25795 }, { "epoch": 0.4601005957264652, "grad_norm": 0.18698105216026306, "learning_rate": 3.271595015554796e-05, "loss": 0.0925, "step": 25796 }, { "epoch": 0.4601184318481789, "grad_norm": 0.23098380863666534, "learning_rate": 3.271446962237112e-05, "loss": 0.1232, "step": 25797 }, { "epoch": 0.46013626796989265, "grad_norm": 0.4390886723995209, "learning_rate": 3.2712989059290334e-05, "loss": 0.1363, "step": 25798 }, { "epoch": 0.46015410409160634, "grad_norm": 0.197047621011734, "learning_rate": 3.271150846631134e-05, "loss": 0.1283, "step": 25799 }, { "epoch": 0.46017194021332003, "grad_norm": 0.27739617228507996, "learning_rate": 3.271002784343988e-05, "loss": 0.1401, "step": 25800 }, { "epoch": 0.4601897763350337, "grad_norm": 0.24595533311367035, "learning_rate": 3.270854719068168e-05, "loss": 0.1199, "step": 25801 }, { "epoch": 0.4602076124567474, "grad_norm": 0.20866097509860992, "learning_rate": 3.270706650804249e-05, "loss": 0.1444, "step": 25802 }, { "epoch": 0.4602254485784611, "grad_norm": 0.24671515822410583, "learning_rate": 3.270558579552806e-05, "loss": 0.0954, "step": 25803 }, { "epoch": 0.4602432847001748, "grad_norm": 0.2498752623796463, "learning_rate": 3.27041050531441e-05, "loss": 0.1282, "step": 25804 }, { "epoch": 0.46026112082188847, "grad_norm": 0.275075227022171, "learning_rate": 3.270262428089638e-05, "loss": 0.0933, "step": 25805 }, { "epoch": 0.46027895694360216, "grad_norm": 0.3085978627204895, "learning_rate": 3.270114347879063e-05, "loss": 0.1541, "step": 25806 }, { "epoch": 0.4602967930653159, "grad_norm": 0.32416579127311707, "learning_rate": 3.269966264683258e-05, "loss": 0.1247, "step": 25807 }, { "epoch": 0.4603146291870296, "grad_norm": 0.2567242383956909, "learning_rate": 3.269818178502797e-05, "loss": 0.1654, "step": 25808 }, { "epoch": 0.4603324653087433, "grad_norm": 0.2719188928604126, "learning_rate": 3.269670089338257e-05, "loss": 0.1675, "step": 25809 }, { "epoch": 0.46035030143045697, "grad_norm": 0.3668282628059387, "learning_rate": 3.269521997190209e-05, "loss": 0.0978, "step": 25810 }, { "epoch": 0.46036813755217065, "grad_norm": 0.25756388902664185, "learning_rate": 3.269373902059228e-05, "loss": 0.1716, "step": 25811 }, { "epoch": 0.46038597367388434, "grad_norm": 0.26922938227653503, "learning_rate": 3.269225803945888e-05, "loss": 0.1125, "step": 25812 }, { "epoch": 0.46040380979559803, "grad_norm": 0.27569660544395447, "learning_rate": 3.269077702850763e-05, "loss": 0.1258, "step": 25813 }, { "epoch": 0.4604216459173117, "grad_norm": 0.26394906640052795, "learning_rate": 3.268929598774427e-05, "loss": 0.1753, "step": 25814 }, { "epoch": 0.46043948203902546, "grad_norm": 0.33814337849617004, "learning_rate": 3.268781491717454e-05, "loss": 0.1942, "step": 25815 }, { "epoch": 0.46045731816073915, "grad_norm": 0.2406303882598877, "learning_rate": 3.2686333816804194e-05, "loss": 0.1132, "step": 25816 }, { "epoch": 0.46047515428245284, "grad_norm": 0.26674601435661316, "learning_rate": 3.2684852686638956e-05, "loss": 0.1203, "step": 25817 }, { "epoch": 0.4604929904041665, "grad_norm": 0.21297091245651245, "learning_rate": 3.268337152668458e-05, "loss": 0.0782, "step": 25818 }, { "epoch": 0.4605108265258802, "grad_norm": 0.2654586732387543, "learning_rate": 3.2681890336946795e-05, "loss": 0.1428, "step": 25819 }, { "epoch": 0.4605286626475939, "grad_norm": 0.23363183438777924, "learning_rate": 3.268040911743135e-05, "loss": 0.1557, "step": 25820 }, { "epoch": 0.4605464987693076, "grad_norm": 0.2674151659011841, "learning_rate": 3.2678927868143994e-05, "loss": 0.132, "step": 25821 }, { "epoch": 0.4605643348910213, "grad_norm": 0.2626718580722809, "learning_rate": 3.2677446589090455e-05, "loss": 0.1383, "step": 25822 }, { "epoch": 0.46058217101273496, "grad_norm": 0.20936977863311768, "learning_rate": 3.267596528027648e-05, "loss": 0.1465, "step": 25823 }, { "epoch": 0.4606000071344487, "grad_norm": 0.33624109625816345, "learning_rate": 3.2674483941707826e-05, "loss": 0.1087, "step": 25824 }, { "epoch": 0.4606178432561624, "grad_norm": 0.2843417823314667, "learning_rate": 3.267300257339021e-05, "loss": 0.1601, "step": 25825 }, { "epoch": 0.4606356793778761, "grad_norm": 0.33800917863845825, "learning_rate": 3.267152117532939e-05, "loss": 0.1321, "step": 25826 }, { "epoch": 0.46065351549958977, "grad_norm": 0.27965471148490906, "learning_rate": 3.26700397475311e-05, "loss": 0.162, "step": 25827 }, { "epoch": 0.46067135162130346, "grad_norm": 0.2899395227432251, "learning_rate": 3.266855829000108e-05, "loss": 0.1071, "step": 25828 }, { "epoch": 0.46068918774301715, "grad_norm": 0.48492705821990967, "learning_rate": 3.2667076802745096e-05, "loss": 0.1522, "step": 25829 }, { "epoch": 0.46070702386473084, "grad_norm": 0.26405781507492065, "learning_rate": 3.2665595285768866e-05, "loss": 0.0881, "step": 25830 }, { "epoch": 0.4607248599864445, "grad_norm": 0.2571663558483124, "learning_rate": 3.2664113739078144e-05, "loss": 0.15, "step": 25831 }, { "epoch": 0.46074269610815827, "grad_norm": 0.2735995650291443, "learning_rate": 3.266263216267866e-05, "loss": 0.1405, "step": 25832 }, { "epoch": 0.46076053222987196, "grad_norm": 0.3667232096195221, "learning_rate": 3.266115055657618e-05, "loss": 0.1304, "step": 25833 }, { "epoch": 0.46077836835158564, "grad_norm": 0.38410672545433044, "learning_rate": 3.265966892077643e-05, "loss": 0.2069, "step": 25834 }, { "epoch": 0.46079620447329933, "grad_norm": 0.2521287202835083, "learning_rate": 3.2658187255285156e-05, "loss": 0.1055, "step": 25835 }, { "epoch": 0.460814040595013, "grad_norm": 0.25701242685317993, "learning_rate": 3.26567055601081e-05, "loss": 0.1528, "step": 25836 }, { "epoch": 0.4608318767167267, "grad_norm": 0.2520907521247864, "learning_rate": 3.265522383525101e-05, "loss": 0.1429, "step": 25837 }, { "epoch": 0.4608497128384404, "grad_norm": 0.24482101202011108, "learning_rate": 3.2653742080719635e-05, "loss": 0.0912, "step": 25838 }, { "epoch": 0.4608675489601541, "grad_norm": 0.3421719968318939, "learning_rate": 3.265226029651971e-05, "loss": 0.1392, "step": 25839 }, { "epoch": 0.46088538508186777, "grad_norm": 0.24517175555229187, "learning_rate": 3.265077848265699e-05, "loss": 0.1351, "step": 25840 }, { "epoch": 0.4609032212035815, "grad_norm": 0.27683916687965393, "learning_rate": 3.264929663913719e-05, "loss": 0.178, "step": 25841 }, { "epoch": 0.4609210573252952, "grad_norm": 0.28843027353286743, "learning_rate": 3.264781476596608e-05, "loss": 0.1654, "step": 25842 }, { "epoch": 0.4609388934470089, "grad_norm": 0.21967118978500366, "learning_rate": 3.26463328631494e-05, "loss": 0.1503, "step": 25843 }, { "epoch": 0.4609567295687226, "grad_norm": 0.33800002932548523, "learning_rate": 3.26448509306929e-05, "loss": 0.1768, "step": 25844 }, { "epoch": 0.46097456569043627, "grad_norm": 0.28730538487434387, "learning_rate": 3.264336896860231e-05, "loss": 0.1014, "step": 25845 }, { "epoch": 0.46099240181214995, "grad_norm": 0.2638072669506073, "learning_rate": 3.264188697688339e-05, "loss": 0.1497, "step": 25846 }, { "epoch": 0.46101023793386364, "grad_norm": 0.2450537234544754, "learning_rate": 3.264040495554187e-05, "loss": 0.1339, "step": 25847 }, { "epoch": 0.46102807405557733, "grad_norm": 0.2675429582595825, "learning_rate": 3.26389229045835e-05, "loss": 0.1293, "step": 25848 }, { "epoch": 0.4610459101772911, "grad_norm": 0.3125545084476471, "learning_rate": 3.263744082401403e-05, "loss": 0.2092, "step": 25849 }, { "epoch": 0.46106374629900476, "grad_norm": 0.25436902046203613, "learning_rate": 3.26359587138392e-05, "loss": 0.1066, "step": 25850 }, { "epoch": 0.46108158242071845, "grad_norm": 0.2024439424276352, "learning_rate": 3.263447657406476e-05, "loss": 0.0882, "step": 25851 }, { "epoch": 0.46109941854243214, "grad_norm": 0.2594786286354065, "learning_rate": 3.2632994404696446e-05, "loss": 0.1622, "step": 25852 }, { "epoch": 0.4611172546641458, "grad_norm": 0.4123919904232025, "learning_rate": 3.2631512205740014e-05, "loss": 0.1273, "step": 25853 }, { "epoch": 0.4611350907858595, "grad_norm": 0.23608681559562683, "learning_rate": 3.263002997720121e-05, "loss": 0.1405, "step": 25854 }, { "epoch": 0.4611529269075732, "grad_norm": 0.41029465198516846, "learning_rate": 3.262854771908576e-05, "loss": 0.1477, "step": 25855 }, { "epoch": 0.4611707630292869, "grad_norm": 0.2541080713272095, "learning_rate": 3.2627065431399425e-05, "loss": 0.1281, "step": 25856 }, { "epoch": 0.46118859915100063, "grad_norm": 0.2968452572822571, "learning_rate": 3.2625583114147964e-05, "loss": 0.1029, "step": 25857 }, { "epoch": 0.4612064352727143, "grad_norm": 0.2291620671749115, "learning_rate": 3.262410076733709e-05, "loss": 0.1317, "step": 25858 }, { "epoch": 0.461224271394428, "grad_norm": 0.21105308830738068, "learning_rate": 3.262261839097259e-05, "loss": 0.1566, "step": 25859 }, { "epoch": 0.4612421075161417, "grad_norm": 0.4057336747646332, "learning_rate": 3.262113598506017e-05, "loss": 0.1841, "step": 25860 }, { "epoch": 0.4612599436378554, "grad_norm": 0.28084295988082886, "learning_rate": 3.261965354960561e-05, "loss": 0.1334, "step": 25861 }, { "epoch": 0.4612777797595691, "grad_norm": 0.29075777530670166, "learning_rate": 3.2618171084614634e-05, "loss": 0.0846, "step": 25862 }, { "epoch": 0.46129561588128276, "grad_norm": 0.21737127006053925, "learning_rate": 3.261668859009299e-05, "loss": 0.1555, "step": 25863 }, { "epoch": 0.46131345200299645, "grad_norm": 0.5115792155265808, "learning_rate": 3.261520606604644e-05, "loss": 0.1626, "step": 25864 }, { "epoch": 0.46133128812471014, "grad_norm": 0.24249915778636932, "learning_rate": 3.261372351248072e-05, "loss": 0.13, "step": 25865 }, { "epoch": 0.4613491242464239, "grad_norm": 0.30558910965919495, "learning_rate": 3.261224092940158e-05, "loss": 0.1214, "step": 25866 }, { "epoch": 0.46136696036813757, "grad_norm": 0.32076868414878845, "learning_rate": 3.261075831681475e-05, "loss": 0.1446, "step": 25867 }, { "epoch": 0.46138479648985126, "grad_norm": 0.38136833906173706, "learning_rate": 3.260927567472601e-05, "loss": 0.1579, "step": 25868 }, { "epoch": 0.46140263261156494, "grad_norm": 0.27521535754203796, "learning_rate": 3.260779300314108e-05, "loss": 0.168, "step": 25869 }, { "epoch": 0.46142046873327863, "grad_norm": 0.4319819211959839, "learning_rate": 3.260631030206572e-05, "loss": 0.1866, "step": 25870 }, { "epoch": 0.4614383048549923, "grad_norm": 0.30179375410079956, "learning_rate": 3.260482757150567e-05, "loss": 0.1784, "step": 25871 }, { "epoch": 0.461456140976706, "grad_norm": 0.2616255283355713, "learning_rate": 3.2603344811466685e-05, "loss": 0.1402, "step": 25872 }, { "epoch": 0.4614739770984197, "grad_norm": 0.31946712732315063, "learning_rate": 3.2601862021954504e-05, "loss": 0.1547, "step": 25873 }, { "epoch": 0.46149181322013344, "grad_norm": 0.21861965954303741, "learning_rate": 3.260037920297489e-05, "loss": 0.1873, "step": 25874 }, { "epoch": 0.46150964934184713, "grad_norm": 0.29000288248062134, "learning_rate": 3.259889635453357e-05, "loss": 0.1459, "step": 25875 }, { "epoch": 0.4615274854635608, "grad_norm": 0.3512144088745117, "learning_rate": 3.259741347663632e-05, "loss": 0.1646, "step": 25876 }, { "epoch": 0.4615453215852745, "grad_norm": 0.47261953353881836, "learning_rate": 3.2595930569288844e-05, "loss": 0.1406, "step": 25877 }, { "epoch": 0.4615631577069882, "grad_norm": 0.31997692584991455, "learning_rate": 3.259444763249694e-05, "loss": 0.1581, "step": 25878 }, { "epoch": 0.4615809938287019, "grad_norm": 0.2950047552585602, "learning_rate": 3.259296466626634e-05, "loss": 0.1572, "step": 25879 }, { "epoch": 0.46159882995041557, "grad_norm": 0.20899878442287445, "learning_rate": 3.259148167060276e-05, "loss": 0.1269, "step": 25880 }, { "epoch": 0.46161666607212926, "grad_norm": 0.21753248572349548, "learning_rate": 3.2589998645512e-05, "loss": 0.1244, "step": 25881 }, { "epoch": 0.46163450219384294, "grad_norm": 0.25028809905052185, "learning_rate": 3.2588515590999765e-05, "loss": 0.1273, "step": 25882 }, { "epoch": 0.4616523383155567, "grad_norm": 0.32519659399986267, "learning_rate": 3.258703250707183e-05, "loss": 0.1811, "step": 25883 }, { "epoch": 0.4616701744372704, "grad_norm": 0.23123672604560852, "learning_rate": 3.258554939373394e-05, "loss": 0.1504, "step": 25884 }, { "epoch": 0.46168801055898406, "grad_norm": 0.30120208859443665, "learning_rate": 3.258406625099184e-05, "loss": 0.184, "step": 25885 }, { "epoch": 0.46170584668069775, "grad_norm": 0.19667299091815948, "learning_rate": 3.258258307885127e-05, "loss": 0.1121, "step": 25886 }, { "epoch": 0.46172368280241144, "grad_norm": 0.2335060089826584, "learning_rate": 3.2581099877318e-05, "loss": 0.1539, "step": 25887 }, { "epoch": 0.4617415189241251, "grad_norm": 0.42860716581344604, "learning_rate": 3.2579616646397763e-05, "loss": 0.1867, "step": 25888 }, { "epoch": 0.4617593550458388, "grad_norm": 0.2365170121192932, "learning_rate": 3.257813338609632e-05, "loss": 0.1429, "step": 25889 }, { "epoch": 0.4617771911675525, "grad_norm": 0.2708742618560791, "learning_rate": 3.2576650096419406e-05, "loss": 0.166, "step": 25890 }, { "epoch": 0.46179502728926625, "grad_norm": 0.24951422214508057, "learning_rate": 3.257516677737278e-05, "loss": 0.1058, "step": 25891 }, { "epoch": 0.46181286341097993, "grad_norm": 0.2135237604379654, "learning_rate": 3.25736834289622e-05, "loss": 0.1362, "step": 25892 }, { "epoch": 0.4618306995326936, "grad_norm": 0.3428475856781006, "learning_rate": 3.2572200051193404e-05, "loss": 0.2107, "step": 25893 }, { "epoch": 0.4618485356544073, "grad_norm": 0.36544451117515564, "learning_rate": 3.2570716644072144e-05, "loss": 0.1662, "step": 25894 }, { "epoch": 0.461866371776121, "grad_norm": 0.24160338938236237, "learning_rate": 3.2569233207604174e-05, "loss": 0.1288, "step": 25895 }, { "epoch": 0.4618842078978347, "grad_norm": 0.2374226450920105, "learning_rate": 3.256774974179524e-05, "loss": 0.1049, "step": 25896 }, { "epoch": 0.4619020440195484, "grad_norm": 0.2506173551082611, "learning_rate": 3.256626624665109e-05, "loss": 0.1618, "step": 25897 }, { "epoch": 0.46191988014126206, "grad_norm": 0.33280596137046814, "learning_rate": 3.256478272217748e-05, "loss": 0.1762, "step": 25898 }, { "epoch": 0.4619377162629758, "grad_norm": 0.21386969089508057, "learning_rate": 3.256329916838016e-05, "loss": 0.0788, "step": 25899 }, { "epoch": 0.4619555523846895, "grad_norm": 0.2730884253978729, "learning_rate": 3.256181558526488e-05, "loss": 0.1176, "step": 25900 }, { "epoch": 0.4619733885064032, "grad_norm": 0.2239217460155487, "learning_rate": 3.2560331972837396e-05, "loss": 0.1708, "step": 25901 }, { "epoch": 0.46199122462811687, "grad_norm": 0.25614023208618164, "learning_rate": 3.2558848331103446e-05, "loss": 0.1937, "step": 25902 }, { "epoch": 0.46200906074983056, "grad_norm": 0.2311106026172638, "learning_rate": 3.25573646600688e-05, "loss": 0.105, "step": 25903 }, { "epoch": 0.46202689687154425, "grad_norm": 0.315411239862442, "learning_rate": 3.255588095973919e-05, "loss": 0.1807, "step": 25904 }, { "epoch": 0.46204473299325793, "grad_norm": 0.3713129758834839, "learning_rate": 3.255439723012037e-05, "loss": 0.1344, "step": 25905 }, { "epoch": 0.4620625691149716, "grad_norm": 0.29898330569267273, "learning_rate": 3.25529134712181e-05, "loss": 0.1303, "step": 25906 }, { "epoch": 0.4620804052366853, "grad_norm": 0.19526520371437073, "learning_rate": 3.255142968303814e-05, "loss": 0.1066, "step": 25907 }, { "epoch": 0.46209824135839905, "grad_norm": 0.2991268038749695, "learning_rate": 3.254994586558622e-05, "loss": 0.1358, "step": 25908 }, { "epoch": 0.46211607748011274, "grad_norm": 0.3419950604438782, "learning_rate": 3.254846201886812e-05, "loss": 0.1117, "step": 25909 }, { "epoch": 0.46213391360182643, "grad_norm": 0.23802126944065094, "learning_rate": 3.254697814288955e-05, "loss": 0.1561, "step": 25910 }, { "epoch": 0.4621517497235401, "grad_norm": 0.2340325266122818, "learning_rate": 3.2545494237656295e-05, "loss": 0.1388, "step": 25911 }, { "epoch": 0.4621695858452538, "grad_norm": 0.26105475425720215, "learning_rate": 3.25440103031741e-05, "loss": 0.1928, "step": 25912 }, { "epoch": 0.4621874219669675, "grad_norm": 0.261292427778244, "learning_rate": 3.254252633944872e-05, "loss": 0.1759, "step": 25913 }, { "epoch": 0.4622052580886812, "grad_norm": 0.2863219082355499, "learning_rate": 3.254104234648589e-05, "loss": 0.1639, "step": 25914 }, { "epoch": 0.46222309421039487, "grad_norm": 0.22527258098125458, "learning_rate": 3.253955832429138e-05, "loss": 0.1238, "step": 25915 }, { "epoch": 0.4622409303321086, "grad_norm": 0.28590673208236694, "learning_rate": 3.253807427287095e-05, "loss": 0.14, "step": 25916 }, { "epoch": 0.4622587664538223, "grad_norm": 0.2266944795846939, "learning_rate": 3.253659019223033e-05, "loss": 0.11, "step": 25917 }, { "epoch": 0.462276602575536, "grad_norm": 0.30064821243286133, "learning_rate": 3.253510608237528e-05, "loss": 0.1731, "step": 25918 }, { "epoch": 0.4622944386972497, "grad_norm": 0.253482460975647, "learning_rate": 3.253362194331156e-05, "loss": 0.1901, "step": 25919 }, { "epoch": 0.46231227481896336, "grad_norm": 0.2247164398431778, "learning_rate": 3.2532137775044926e-05, "loss": 0.1295, "step": 25920 }, { "epoch": 0.46233011094067705, "grad_norm": 0.23139981925487518, "learning_rate": 3.253065357758111e-05, "loss": 0.11, "step": 25921 }, { "epoch": 0.46234794706239074, "grad_norm": 0.28722211718559265, "learning_rate": 3.2529169350925894e-05, "loss": 0.1593, "step": 25922 }, { "epoch": 0.46236578318410443, "grad_norm": 0.26677945256233215, "learning_rate": 3.252768509508501e-05, "loss": 0.1362, "step": 25923 }, { "epoch": 0.4623836193058181, "grad_norm": 0.19600580632686615, "learning_rate": 3.252620081006422e-05, "loss": 0.0935, "step": 25924 }, { "epoch": 0.46240145542753186, "grad_norm": 0.22214733064174652, "learning_rate": 3.252471649586927e-05, "loss": 0.1169, "step": 25925 }, { "epoch": 0.46241929154924555, "grad_norm": 0.27019229531288147, "learning_rate": 3.2523232152505925e-05, "loss": 0.17, "step": 25926 }, { "epoch": 0.46243712767095924, "grad_norm": 0.2838871479034424, "learning_rate": 3.252174777997994e-05, "loss": 0.154, "step": 25927 }, { "epoch": 0.4624549637926729, "grad_norm": 0.2956608831882477, "learning_rate": 3.252026337829706e-05, "loss": 0.1467, "step": 25928 }, { "epoch": 0.4624727999143866, "grad_norm": 0.25164660811424255, "learning_rate": 3.251877894746303e-05, "loss": 0.1297, "step": 25929 }, { "epoch": 0.4624906360361003, "grad_norm": 0.2655068039894104, "learning_rate": 3.251729448748363e-05, "loss": 0.1561, "step": 25930 }, { "epoch": 0.462508472157814, "grad_norm": 0.30581045150756836, "learning_rate": 3.2515809998364595e-05, "loss": 0.1809, "step": 25931 }, { "epoch": 0.4625263082795277, "grad_norm": 0.26822715997695923, "learning_rate": 3.251432548011168e-05, "loss": 0.1672, "step": 25932 }, { "epoch": 0.4625441444012414, "grad_norm": 0.2988027334213257, "learning_rate": 3.251284093273065e-05, "loss": 0.1328, "step": 25933 }, { "epoch": 0.4625619805229551, "grad_norm": 0.24633675813674927, "learning_rate": 3.251135635622725e-05, "loss": 0.1612, "step": 25934 }, { "epoch": 0.4625798166446688, "grad_norm": 0.29048851132392883, "learning_rate": 3.2509871750607244e-05, "loss": 0.1887, "step": 25935 }, { "epoch": 0.4625976527663825, "grad_norm": 0.30334049463272095, "learning_rate": 3.2508387115876384e-05, "loss": 0.1707, "step": 25936 }, { "epoch": 0.46261548888809617, "grad_norm": 0.2526455819606781, "learning_rate": 3.2506902452040414e-05, "loss": 0.1164, "step": 25937 }, { "epoch": 0.46263332500980986, "grad_norm": 0.29355186223983765, "learning_rate": 3.25054177591051e-05, "loss": 0.1679, "step": 25938 }, { "epoch": 0.46265116113152355, "grad_norm": 0.17166070640087128, "learning_rate": 3.2503933037076186e-05, "loss": 0.1152, "step": 25939 }, { "epoch": 0.46266899725323724, "grad_norm": 0.347345769405365, "learning_rate": 3.250244828595945e-05, "loss": 0.1546, "step": 25940 }, { "epoch": 0.4626868333749509, "grad_norm": 0.41629549860954285, "learning_rate": 3.2500963505760627e-05, "loss": 0.1663, "step": 25941 }, { "epoch": 0.46270466949666467, "grad_norm": 0.31726574897766113, "learning_rate": 3.2499478696485474e-05, "loss": 0.1519, "step": 25942 }, { "epoch": 0.46272250561837835, "grad_norm": 0.24121001362800598, "learning_rate": 3.2497993858139765e-05, "loss": 0.1636, "step": 25943 }, { "epoch": 0.46274034174009204, "grad_norm": 0.38795900344848633, "learning_rate": 3.2496508990729225e-05, "loss": 0.1713, "step": 25944 }, { "epoch": 0.46275817786180573, "grad_norm": 0.2509864866733551, "learning_rate": 3.249502409425964e-05, "loss": 0.1287, "step": 25945 }, { "epoch": 0.4627760139835194, "grad_norm": 0.24911074340343475, "learning_rate": 3.2493539168736745e-05, "loss": 0.1256, "step": 25946 }, { "epoch": 0.4627938501052331, "grad_norm": 0.24461624026298523, "learning_rate": 3.2492054214166305e-05, "loss": 0.1323, "step": 25947 }, { "epoch": 0.4628116862269468, "grad_norm": 0.32783693075180054, "learning_rate": 3.249056923055408e-05, "loss": 0.1052, "step": 25948 }, { "epoch": 0.4628295223486605, "grad_norm": 0.3568902313709259, "learning_rate": 3.2489084217905816e-05, "loss": 0.2109, "step": 25949 }, { "epoch": 0.4628473584703742, "grad_norm": 0.22166769206523895, "learning_rate": 3.2487599176227286e-05, "loss": 0.1603, "step": 25950 }, { "epoch": 0.4628651945920879, "grad_norm": 0.24175916612148285, "learning_rate": 3.2486114105524224e-05, "loss": 0.0798, "step": 25951 }, { "epoch": 0.4628830307138016, "grad_norm": 0.24645406007766724, "learning_rate": 3.24846290058024e-05, "loss": 0.1331, "step": 25952 }, { "epoch": 0.4629008668355153, "grad_norm": 0.27952513098716736, "learning_rate": 3.248314387706757e-05, "loss": 0.0735, "step": 25953 }, { "epoch": 0.462918702957229, "grad_norm": 0.2802181839942932, "learning_rate": 3.2481658719325495e-05, "loss": 0.1233, "step": 25954 }, { "epoch": 0.46293653907894267, "grad_norm": 0.20891527831554413, "learning_rate": 3.2480173532581914e-05, "loss": 0.1489, "step": 25955 }, { "epoch": 0.46295437520065635, "grad_norm": 0.45281511545181274, "learning_rate": 3.2478688316842606e-05, "loss": 0.1938, "step": 25956 }, { "epoch": 0.46297221132237004, "grad_norm": 0.29913291335105896, "learning_rate": 3.247720307211332e-05, "loss": 0.1341, "step": 25957 }, { "epoch": 0.4629900474440838, "grad_norm": 0.3208201229572296, "learning_rate": 3.247571779839981e-05, "loss": 0.1677, "step": 25958 }, { "epoch": 0.4630078835657975, "grad_norm": 0.22196722030639648, "learning_rate": 3.2474232495707834e-05, "loss": 0.1012, "step": 25959 }, { "epoch": 0.46302571968751116, "grad_norm": 0.38233399391174316, "learning_rate": 3.247274716404315e-05, "loss": 0.2327, "step": 25960 }, { "epoch": 0.46304355580922485, "grad_norm": 0.2365937978029251, "learning_rate": 3.2471261803411525e-05, "loss": 0.112, "step": 25961 }, { "epoch": 0.46306139193093854, "grad_norm": 0.2246352732181549, "learning_rate": 3.24697764138187e-05, "loss": 0.1381, "step": 25962 }, { "epoch": 0.4630792280526522, "grad_norm": 0.25872036814689636, "learning_rate": 3.246829099527044e-05, "loss": 0.132, "step": 25963 }, { "epoch": 0.4630970641743659, "grad_norm": 0.35274624824523926, "learning_rate": 3.246680554777251e-05, "loss": 0.1189, "step": 25964 }, { "epoch": 0.4631149002960796, "grad_norm": 0.24399487674236298, "learning_rate": 3.246532007133067e-05, "loss": 0.1264, "step": 25965 }, { "epoch": 0.4631327364177933, "grad_norm": 0.20977070927619934, "learning_rate": 3.246383456595066e-05, "loss": 0.1388, "step": 25966 }, { "epoch": 0.46315057253950703, "grad_norm": 0.23945015668869019, "learning_rate": 3.246234903163825e-05, "loss": 0.0913, "step": 25967 }, { "epoch": 0.4631684086612207, "grad_norm": 0.2062525451183319, "learning_rate": 3.24608634683992e-05, "loss": 0.1126, "step": 25968 }, { "epoch": 0.4631862447829344, "grad_norm": 0.2679837942123413, "learning_rate": 3.2459377876239274e-05, "loss": 0.1377, "step": 25969 }, { "epoch": 0.4632040809046481, "grad_norm": 0.31055957078933716, "learning_rate": 3.2457892255164214e-05, "loss": 0.1533, "step": 25970 }, { "epoch": 0.4632219170263618, "grad_norm": 0.31911930441856384, "learning_rate": 3.245640660517979e-05, "loss": 0.136, "step": 25971 }, { "epoch": 0.4632397531480755, "grad_norm": 0.21749882400035858, "learning_rate": 3.245492092629176e-05, "loss": 0.1551, "step": 25972 }, { "epoch": 0.46325758926978916, "grad_norm": 0.26170194149017334, "learning_rate": 3.2453435218505877e-05, "loss": 0.1527, "step": 25973 }, { "epoch": 0.46327542539150285, "grad_norm": 0.39017996191978455, "learning_rate": 3.245194948182791e-05, "loss": 0.2313, "step": 25974 }, { "epoch": 0.4632932615132166, "grad_norm": 0.2629111409187317, "learning_rate": 3.2450463716263606e-05, "loss": 0.1434, "step": 25975 }, { "epoch": 0.4633110976349303, "grad_norm": 0.24127335846424103, "learning_rate": 3.244897792181874e-05, "loss": 0.1999, "step": 25976 }, { "epoch": 0.46332893375664397, "grad_norm": 0.24200038611888885, "learning_rate": 3.244749209849906e-05, "loss": 0.1577, "step": 25977 }, { "epoch": 0.46334676987835766, "grad_norm": 0.2859693765640259, "learning_rate": 3.244600624631032e-05, "loss": 0.1158, "step": 25978 }, { "epoch": 0.46336460600007134, "grad_norm": 0.23622198402881622, "learning_rate": 3.24445203652583e-05, "loss": 0.1121, "step": 25979 }, { "epoch": 0.46338244212178503, "grad_norm": 0.24148476123809814, "learning_rate": 3.2443034455348745e-05, "loss": 0.1285, "step": 25980 }, { "epoch": 0.4634002782434987, "grad_norm": 0.22917766869068146, "learning_rate": 3.244154851658742e-05, "loss": 0.1711, "step": 25981 }, { "epoch": 0.4634181143652124, "grad_norm": 0.22683215141296387, "learning_rate": 3.244006254898007e-05, "loss": 0.1141, "step": 25982 }, { "epoch": 0.4634359504869261, "grad_norm": 0.2625378668308258, "learning_rate": 3.243857655253247e-05, "loss": 0.1204, "step": 25983 }, { "epoch": 0.46345378660863984, "grad_norm": 0.3025020360946655, "learning_rate": 3.243709052725039e-05, "loss": 0.1906, "step": 25984 }, { "epoch": 0.4634716227303535, "grad_norm": 0.34796440601348877, "learning_rate": 3.243560447313958e-05, "loss": 0.1577, "step": 25985 }, { "epoch": 0.4634894588520672, "grad_norm": 0.28842583298683167, "learning_rate": 3.243411839020579e-05, "loss": 0.1493, "step": 25986 }, { "epoch": 0.4635072949737809, "grad_norm": 0.3031710684299469, "learning_rate": 3.243263227845479e-05, "loss": 0.1257, "step": 25987 }, { "epoch": 0.4635251310954946, "grad_norm": 0.2704150080680847, "learning_rate": 3.243114613789233e-05, "loss": 0.1353, "step": 25988 }, { "epoch": 0.4635429672172083, "grad_norm": 0.29831618070602417, "learning_rate": 3.24296599685242e-05, "loss": 0.1387, "step": 25989 }, { "epoch": 0.46356080333892197, "grad_norm": 0.3365585505962372, "learning_rate": 3.242817377035613e-05, "loss": 0.1688, "step": 25990 }, { "epoch": 0.46357863946063566, "grad_norm": 0.355507493019104, "learning_rate": 3.24266875433939e-05, "loss": 0.1823, "step": 25991 }, { "epoch": 0.4635964755823494, "grad_norm": 0.2670857608318329, "learning_rate": 3.242520128764326e-05, "loss": 0.1198, "step": 25992 }, { "epoch": 0.4636143117040631, "grad_norm": 0.46618160605430603, "learning_rate": 3.242371500310998e-05, "loss": 0.19, "step": 25993 }, { "epoch": 0.4636321478257768, "grad_norm": 0.24077323079109192, "learning_rate": 3.242222868979981e-05, "loss": 0.1791, "step": 25994 }, { "epoch": 0.46364998394749046, "grad_norm": 0.24090790748596191, "learning_rate": 3.242074234771852e-05, "loss": 0.1386, "step": 25995 }, { "epoch": 0.46366782006920415, "grad_norm": 0.23734600841999054, "learning_rate": 3.241925597687186e-05, "loss": 0.1181, "step": 25996 }, { "epoch": 0.46368565619091784, "grad_norm": 0.27186650037765503, "learning_rate": 3.241776957726562e-05, "loss": 0.1372, "step": 25997 }, { "epoch": 0.4637034923126315, "grad_norm": 0.3167542815208435, "learning_rate": 3.241628314890554e-05, "loss": 0.2001, "step": 25998 }, { "epoch": 0.4637213284343452, "grad_norm": 0.2665323317050934, "learning_rate": 3.241479669179738e-05, "loss": 0.1564, "step": 25999 }, { "epoch": 0.46373916455605896, "grad_norm": 0.2914828956127167, "learning_rate": 3.2413310205946904e-05, "loss": 0.176, "step": 26000 }, { "epoch": 0.46373916455605896, "eval_loss": 0.14186939597129822, "eval_runtime": 108.4364, "eval_samples_per_second": 9.443, "eval_steps_per_second": 1.577, "step": 26000 }, { "epoch": 0.46375700067777265, "grad_norm": 0.27630114555358887, "learning_rate": 3.241182369135988e-05, "loss": 0.1203, "step": 26001 }, { "epoch": 0.46377483679948633, "grad_norm": 0.2262500822544098, "learning_rate": 3.241033714804207e-05, "loss": 0.141, "step": 26002 }, { "epoch": 0.4637926729212, "grad_norm": 0.20729343593120575, "learning_rate": 3.240885057599923e-05, "loss": 0.1336, "step": 26003 }, { "epoch": 0.4638105090429137, "grad_norm": 0.23321793973445892, "learning_rate": 3.2407363975237126e-05, "loss": 0.1561, "step": 26004 }, { "epoch": 0.4638283451646274, "grad_norm": 0.29829907417297363, "learning_rate": 3.2405877345761524e-05, "loss": 0.1255, "step": 26005 }, { "epoch": 0.4638461812863411, "grad_norm": 0.30365437269210815, "learning_rate": 3.240439068757818e-05, "loss": 0.1676, "step": 26006 }, { "epoch": 0.4638640174080548, "grad_norm": 0.32210665941238403, "learning_rate": 3.2402904000692865e-05, "loss": 0.1745, "step": 26007 }, { "epoch": 0.46388185352976846, "grad_norm": 0.27390822768211365, "learning_rate": 3.2401417285111335e-05, "loss": 0.1943, "step": 26008 }, { "epoch": 0.4638996896514822, "grad_norm": 0.3175702691078186, "learning_rate": 3.239993054083935e-05, "loss": 0.1561, "step": 26009 }, { "epoch": 0.4639175257731959, "grad_norm": 0.2405591607093811, "learning_rate": 3.239844376788268e-05, "loss": 0.173, "step": 26010 }, { "epoch": 0.4639353618949096, "grad_norm": 0.2824705243110657, "learning_rate": 3.2396956966247096e-05, "loss": 0.157, "step": 26011 }, { "epoch": 0.46395319801662327, "grad_norm": 0.2979973554611206, "learning_rate": 3.239547013593834e-05, "loss": 0.1333, "step": 26012 }, { "epoch": 0.46397103413833696, "grad_norm": 0.25221627950668335, "learning_rate": 3.23939832769622e-05, "loss": 0.1346, "step": 26013 }, { "epoch": 0.46398887026005065, "grad_norm": 0.22971589863300323, "learning_rate": 3.239249638932441e-05, "loss": 0.1261, "step": 26014 }, { "epoch": 0.46400670638176433, "grad_norm": 0.29105710983276367, "learning_rate": 3.239100947303077e-05, "loss": 0.1449, "step": 26015 }, { "epoch": 0.464024542503478, "grad_norm": 0.30627772212028503, "learning_rate": 3.2389522528087006e-05, "loss": 0.1995, "step": 26016 }, { "epoch": 0.46404237862519176, "grad_norm": 0.32698777318000793, "learning_rate": 3.2388035554498916e-05, "loss": 0.1283, "step": 26017 }, { "epoch": 0.46406021474690545, "grad_norm": 0.2415143847465515, "learning_rate": 3.2386548552272234e-05, "loss": 0.1196, "step": 26018 }, { "epoch": 0.46407805086861914, "grad_norm": 0.24074943363666534, "learning_rate": 3.238506152141275e-05, "loss": 0.1734, "step": 26019 }, { "epoch": 0.46409588699033283, "grad_norm": 0.2436971515417099, "learning_rate": 3.2383574461926214e-05, "loss": 0.1146, "step": 26020 }, { "epoch": 0.4641137231120465, "grad_norm": 0.18990497291088104, "learning_rate": 3.2382087373818395e-05, "loss": 0.1326, "step": 26021 }, { "epoch": 0.4641315592337602, "grad_norm": 0.27914443612098694, "learning_rate": 3.238060025709505e-05, "loss": 0.1401, "step": 26022 }, { "epoch": 0.4641493953554739, "grad_norm": 0.35573136806488037, "learning_rate": 3.237911311176195e-05, "loss": 0.1776, "step": 26023 }, { "epoch": 0.4641672314771876, "grad_norm": 0.31344133615493774, "learning_rate": 3.2377625937824865e-05, "loss": 0.1335, "step": 26024 }, { "epoch": 0.46418506759890127, "grad_norm": 0.2672535479068756, "learning_rate": 3.237613873528955e-05, "loss": 0.1353, "step": 26025 }, { "epoch": 0.464202903720615, "grad_norm": 0.30407804250717163, "learning_rate": 3.2374651504161775e-05, "loss": 0.1492, "step": 26026 }, { "epoch": 0.4642207398423287, "grad_norm": 0.32202044129371643, "learning_rate": 3.2373164244447303e-05, "loss": 0.1905, "step": 26027 }, { "epoch": 0.4642385759640424, "grad_norm": 0.2623530924320221, "learning_rate": 3.23716769561519e-05, "loss": 0.1268, "step": 26028 }, { "epoch": 0.4642564120857561, "grad_norm": 0.2711970806121826, "learning_rate": 3.2370189639281326e-05, "loss": 0.1789, "step": 26029 }, { "epoch": 0.46427424820746976, "grad_norm": 0.23704560101032257, "learning_rate": 3.236870229384136e-05, "loss": 0.1129, "step": 26030 }, { "epoch": 0.46429208432918345, "grad_norm": 0.2677852213382721, "learning_rate": 3.236721491983775e-05, "loss": 0.0899, "step": 26031 }, { "epoch": 0.46430992045089714, "grad_norm": 0.2846788465976715, "learning_rate": 3.236572751727628e-05, "loss": 0.1505, "step": 26032 }, { "epoch": 0.46432775657261083, "grad_norm": 0.36903154850006104, "learning_rate": 3.236424008616269e-05, "loss": 0.1548, "step": 26033 }, { "epoch": 0.46434559269432457, "grad_norm": 0.30848273634910583, "learning_rate": 3.236275262650278e-05, "loss": 0.1804, "step": 26034 }, { "epoch": 0.46436342881603826, "grad_norm": 0.2429845929145813, "learning_rate": 3.236126513830229e-05, "loss": 0.1142, "step": 26035 }, { "epoch": 0.46438126493775195, "grad_norm": 0.26715463399887085, "learning_rate": 3.235977762156699e-05, "loss": 0.1419, "step": 26036 }, { "epoch": 0.46439910105946564, "grad_norm": 0.36638882756233215, "learning_rate": 3.2358290076302657e-05, "loss": 0.1271, "step": 26037 }, { "epoch": 0.4644169371811793, "grad_norm": 0.26610100269317627, "learning_rate": 3.235680250251505e-05, "loss": 0.1367, "step": 26038 }, { "epoch": 0.464434773302893, "grad_norm": 0.23521414399147034, "learning_rate": 3.235531490020993e-05, "loss": 0.1083, "step": 26039 }, { "epoch": 0.4644526094246067, "grad_norm": 0.3382713794708252, "learning_rate": 3.235382726939307e-05, "loss": 0.1976, "step": 26040 }, { "epoch": 0.4644704455463204, "grad_norm": 0.2942153215408325, "learning_rate": 3.235233961007024e-05, "loss": 0.1865, "step": 26041 }, { "epoch": 0.4644882816680341, "grad_norm": 0.2761353850364685, "learning_rate": 3.23508519222472e-05, "loss": 0.1734, "step": 26042 }, { "epoch": 0.4645061177897478, "grad_norm": 0.26857873797416687, "learning_rate": 3.2349364205929716e-05, "loss": 0.1491, "step": 26043 }, { "epoch": 0.4645239539114615, "grad_norm": 0.21701617538928986, "learning_rate": 3.234787646112356e-05, "loss": 0.1427, "step": 26044 }, { "epoch": 0.4645417900331752, "grad_norm": 0.21352718770503998, "learning_rate": 3.23463886878345e-05, "loss": 0.1436, "step": 26045 }, { "epoch": 0.4645596261548889, "grad_norm": 0.19101294875144958, "learning_rate": 3.2344900886068294e-05, "loss": 0.1285, "step": 26046 }, { "epoch": 0.46457746227660257, "grad_norm": 0.1992942988872528, "learning_rate": 3.234341305583072e-05, "loss": 0.1476, "step": 26047 }, { "epoch": 0.46459529839831626, "grad_norm": 0.3391256630420685, "learning_rate": 3.234192519712754e-05, "loss": 0.1251, "step": 26048 }, { "epoch": 0.46461313452002995, "grad_norm": 0.39006611704826355, "learning_rate": 3.2340437309964525e-05, "loss": 0.1321, "step": 26049 }, { "epoch": 0.46463097064174363, "grad_norm": 0.24516627192497253, "learning_rate": 3.233894939434744e-05, "loss": 0.1479, "step": 26050 }, { "epoch": 0.4646488067634574, "grad_norm": 0.3214873969554901, "learning_rate": 3.2337461450282044e-05, "loss": 0.1549, "step": 26051 }, { "epoch": 0.46466664288517107, "grad_norm": 0.2771398723125458, "learning_rate": 3.233597347777412e-05, "loss": 0.0903, "step": 26052 }, { "epoch": 0.46468447900688475, "grad_norm": 0.2523135244846344, "learning_rate": 3.233448547682943e-05, "loss": 0.1039, "step": 26053 }, { "epoch": 0.46470231512859844, "grad_norm": 0.3429401218891144, "learning_rate": 3.233299744745374e-05, "loss": 0.1661, "step": 26054 }, { "epoch": 0.46472015125031213, "grad_norm": 0.28885892033576965, "learning_rate": 3.233150938965281e-05, "loss": 0.1459, "step": 26055 }, { "epoch": 0.4647379873720258, "grad_norm": 0.4163936376571655, "learning_rate": 3.233002130343243e-05, "loss": 0.19, "step": 26056 }, { "epoch": 0.4647558234937395, "grad_norm": 0.2808150351047516, "learning_rate": 3.232853318879835e-05, "loss": 0.1527, "step": 26057 }, { "epoch": 0.4647736596154532, "grad_norm": 0.29562413692474365, "learning_rate": 3.232704504575634e-05, "loss": 0.1255, "step": 26058 }, { "epoch": 0.46479149573716694, "grad_norm": 0.4406629502773285, "learning_rate": 3.232555687431218e-05, "loss": 0.1042, "step": 26059 }, { "epoch": 0.4648093318588806, "grad_norm": 0.24277228116989136, "learning_rate": 3.232406867447163e-05, "loss": 0.1272, "step": 26060 }, { "epoch": 0.4648271679805943, "grad_norm": 0.3693324327468872, "learning_rate": 3.2322580446240456e-05, "loss": 0.2228, "step": 26061 }, { "epoch": 0.464845004102308, "grad_norm": 0.2505847215652466, "learning_rate": 3.2321092189624435e-05, "loss": 0.1497, "step": 26062 }, { "epoch": 0.4648628402240217, "grad_norm": 0.2134181708097458, "learning_rate": 3.2319603904629334e-05, "loss": 0.1548, "step": 26063 }, { "epoch": 0.4648806763457354, "grad_norm": 0.34179818630218506, "learning_rate": 3.231811559126091e-05, "loss": 0.194, "step": 26064 }, { "epoch": 0.46489851246744907, "grad_norm": 0.5091789960861206, "learning_rate": 3.231662724952496e-05, "loss": 0.1507, "step": 26065 }, { "epoch": 0.46491634858916275, "grad_norm": 0.2267841249704361, "learning_rate": 3.231513887942722e-05, "loss": 0.1596, "step": 26066 }, { "epoch": 0.46493418471087644, "grad_norm": 0.27425557374954224, "learning_rate": 3.231365048097348e-05, "loss": 0.1796, "step": 26067 }, { "epoch": 0.4649520208325902, "grad_norm": 0.21191570162773132, "learning_rate": 3.231216205416951e-05, "loss": 0.1149, "step": 26068 }, { "epoch": 0.4649698569543039, "grad_norm": 0.3472452759742737, "learning_rate": 3.231067359902107e-05, "loss": 0.1526, "step": 26069 }, { "epoch": 0.46498769307601756, "grad_norm": 0.25764933228492737, "learning_rate": 3.230918511553393e-05, "loss": 0.1294, "step": 26070 }, { "epoch": 0.46500552919773125, "grad_norm": 0.327184796333313, "learning_rate": 3.230769660371387e-05, "loss": 0.1379, "step": 26071 }, { "epoch": 0.46502336531944494, "grad_norm": 0.25578218698501587, "learning_rate": 3.2306208063566646e-05, "loss": 0.1556, "step": 26072 }, { "epoch": 0.4650412014411586, "grad_norm": 0.26179003715515137, "learning_rate": 3.230471949509804e-05, "loss": 0.1465, "step": 26073 }, { "epoch": 0.4650590375628723, "grad_norm": 0.3551662862300873, "learning_rate": 3.230323089831382e-05, "loss": 0.1229, "step": 26074 }, { "epoch": 0.465076873684586, "grad_norm": 0.2909257709980011, "learning_rate": 3.230174227321976e-05, "loss": 0.1796, "step": 26075 }, { "epoch": 0.46509470980629974, "grad_norm": 0.26737260818481445, "learning_rate": 3.230025361982162e-05, "loss": 0.1555, "step": 26076 }, { "epoch": 0.46511254592801343, "grad_norm": 0.27418631315231323, "learning_rate": 3.229876493812517e-05, "loss": 0.1571, "step": 26077 }, { "epoch": 0.4651303820497271, "grad_norm": 0.377976655960083, "learning_rate": 3.229727622813619e-05, "loss": 0.1707, "step": 26078 }, { "epoch": 0.4651482181714408, "grad_norm": 0.28598037362098694, "learning_rate": 3.229578748986045e-05, "loss": 0.137, "step": 26079 }, { "epoch": 0.4651660542931545, "grad_norm": 0.24126529693603516, "learning_rate": 3.2294298723303715e-05, "loss": 0.1146, "step": 26080 }, { "epoch": 0.4651838904148682, "grad_norm": 0.45753878355026245, "learning_rate": 3.2292809928471765e-05, "loss": 0.1837, "step": 26081 }, { "epoch": 0.46520172653658187, "grad_norm": 0.25630712509155273, "learning_rate": 3.229132110537036e-05, "loss": 0.1432, "step": 26082 }, { "epoch": 0.46521956265829556, "grad_norm": 0.22521261870861053, "learning_rate": 3.228983225400527e-05, "loss": 0.1659, "step": 26083 }, { "epoch": 0.46523739878000925, "grad_norm": 0.2586803436279297, "learning_rate": 3.2288343374382286e-05, "loss": 0.1376, "step": 26084 }, { "epoch": 0.465255234901723, "grad_norm": 0.22216632962226868, "learning_rate": 3.2286854466507155e-05, "loss": 0.1095, "step": 26085 }, { "epoch": 0.4652730710234367, "grad_norm": 0.2442510724067688, "learning_rate": 3.228536553038566e-05, "loss": 0.1695, "step": 26086 }, { "epoch": 0.46529090714515037, "grad_norm": 0.41868990659713745, "learning_rate": 3.2283876566023565e-05, "loss": 0.1761, "step": 26087 }, { "epoch": 0.46530874326686406, "grad_norm": 0.25471076369285583, "learning_rate": 3.228238757342667e-05, "loss": 0.1452, "step": 26088 }, { "epoch": 0.46532657938857774, "grad_norm": 0.2155960500240326, "learning_rate": 3.2280898552600716e-05, "loss": 0.1125, "step": 26089 }, { "epoch": 0.46534441551029143, "grad_norm": 0.25104236602783203, "learning_rate": 3.227940950355147e-05, "loss": 0.1527, "step": 26090 }, { "epoch": 0.4653622516320051, "grad_norm": 0.3023114502429962, "learning_rate": 3.227792042628473e-05, "loss": 0.1748, "step": 26091 }, { "epoch": 0.4653800877537188, "grad_norm": 0.257811963558197, "learning_rate": 3.2276431320806254e-05, "loss": 0.1379, "step": 26092 }, { "epoch": 0.46539792387543255, "grad_norm": 0.24357429146766663, "learning_rate": 3.227494218712183e-05, "loss": 0.1585, "step": 26093 }, { "epoch": 0.46541575999714624, "grad_norm": 0.3331894278526306, "learning_rate": 3.22734530252372e-05, "loss": 0.1613, "step": 26094 }, { "epoch": 0.4654335961188599, "grad_norm": 0.2197878062725067, "learning_rate": 3.2271963835158166e-05, "loss": 0.1371, "step": 26095 }, { "epoch": 0.4654514322405736, "grad_norm": 0.2556401491165161, "learning_rate": 3.227047461689048e-05, "loss": 0.1727, "step": 26096 }, { "epoch": 0.4654692683622873, "grad_norm": 0.2661169767379761, "learning_rate": 3.226898537043993e-05, "loss": 0.1545, "step": 26097 }, { "epoch": 0.465487104484001, "grad_norm": 0.34631234407424927, "learning_rate": 3.226749609581228e-05, "loss": 0.1539, "step": 26098 }, { "epoch": 0.4655049406057147, "grad_norm": 0.29225045442581177, "learning_rate": 3.22660067930133e-05, "loss": 0.1213, "step": 26099 }, { "epoch": 0.46552277672742837, "grad_norm": 0.23258855938911438, "learning_rate": 3.226451746204877e-05, "loss": 0.0938, "step": 26100 }, { "epoch": 0.4655406128491421, "grad_norm": 0.3291206359863281, "learning_rate": 3.226302810292447e-05, "loss": 0.1993, "step": 26101 }, { "epoch": 0.4655584489708558, "grad_norm": 0.32316386699676514, "learning_rate": 3.226153871564615e-05, "loss": 0.163, "step": 26102 }, { "epoch": 0.4655762850925695, "grad_norm": 0.25505632162094116, "learning_rate": 3.226004930021961e-05, "loss": 0.1481, "step": 26103 }, { "epoch": 0.4655941212142832, "grad_norm": 0.25649598240852356, "learning_rate": 3.225855985665061e-05, "loss": 0.1526, "step": 26104 }, { "epoch": 0.46561195733599686, "grad_norm": 0.3256259858608246, "learning_rate": 3.225707038494492e-05, "loss": 0.1765, "step": 26105 }, { "epoch": 0.46562979345771055, "grad_norm": 0.36218512058258057, "learning_rate": 3.2255580885108313e-05, "loss": 0.1235, "step": 26106 }, { "epoch": 0.46564762957942424, "grad_norm": 0.29450666904449463, "learning_rate": 3.225409135714658e-05, "loss": 0.1375, "step": 26107 }, { "epoch": 0.4656654657011379, "grad_norm": 0.29874613881111145, "learning_rate": 3.2252601801065485e-05, "loss": 0.1601, "step": 26108 }, { "epoch": 0.4656833018228516, "grad_norm": 0.2843853831291199, "learning_rate": 3.225111221687079e-05, "loss": 0.1535, "step": 26109 }, { "epoch": 0.46570113794456536, "grad_norm": 0.2098836749792099, "learning_rate": 3.224962260456829e-05, "loss": 0.1364, "step": 26110 }, { "epoch": 0.46571897406627905, "grad_norm": 0.2120732069015503, "learning_rate": 3.224813296416374e-05, "loss": 0.1429, "step": 26111 }, { "epoch": 0.46573681018799273, "grad_norm": 0.3068566620349884, "learning_rate": 3.224664329566293e-05, "loss": 0.124, "step": 26112 }, { "epoch": 0.4657546463097064, "grad_norm": 0.23693221807479858, "learning_rate": 3.224515359907162e-05, "loss": 0.0939, "step": 26113 }, { "epoch": 0.4657724824314201, "grad_norm": 0.31513774394989014, "learning_rate": 3.22436638743956e-05, "loss": 0.1736, "step": 26114 }, { "epoch": 0.4657903185531338, "grad_norm": 0.28280919790267944, "learning_rate": 3.224217412164063e-05, "loss": 0.1424, "step": 26115 }, { "epoch": 0.4658081546748475, "grad_norm": 0.3217497169971466, "learning_rate": 3.2240684340812496e-05, "loss": 0.1927, "step": 26116 }, { "epoch": 0.4658259907965612, "grad_norm": 0.3084224760532379, "learning_rate": 3.223919453191697e-05, "loss": 0.1598, "step": 26117 }, { "epoch": 0.4658438269182749, "grad_norm": 0.3566245138645172, "learning_rate": 3.2237704694959826e-05, "loss": 0.1943, "step": 26118 }, { "epoch": 0.4658616630399886, "grad_norm": 0.26105552911758423, "learning_rate": 3.223621482994683e-05, "loss": 0.1106, "step": 26119 }, { "epoch": 0.4658794991617023, "grad_norm": 0.3417191505432129, "learning_rate": 3.2234724936883774e-05, "loss": 0.2076, "step": 26120 }, { "epoch": 0.465897335283416, "grad_norm": 0.22240227460861206, "learning_rate": 3.2233235015776426e-05, "loss": 0.1533, "step": 26121 }, { "epoch": 0.46591517140512967, "grad_norm": 0.2728138566017151, "learning_rate": 3.223174506663056e-05, "loss": 0.1457, "step": 26122 }, { "epoch": 0.46593300752684336, "grad_norm": 0.2878311276435852, "learning_rate": 3.223025508945195e-05, "loss": 0.0855, "step": 26123 }, { "epoch": 0.46595084364855704, "grad_norm": 0.25562503933906555, "learning_rate": 3.222876508424637e-05, "loss": 0.1163, "step": 26124 }, { "epoch": 0.46596867977027073, "grad_norm": 0.2526286542415619, "learning_rate": 3.2227275051019614e-05, "loss": 0.1285, "step": 26125 }, { "epoch": 0.4659865158919844, "grad_norm": 0.26987364888191223, "learning_rate": 3.222578498977743e-05, "loss": 0.1805, "step": 26126 }, { "epoch": 0.46600435201369816, "grad_norm": 0.30599212646484375, "learning_rate": 3.222429490052561e-05, "loss": 0.1626, "step": 26127 }, { "epoch": 0.46602218813541185, "grad_norm": 0.302566260099411, "learning_rate": 3.222280478326993e-05, "loss": 0.1576, "step": 26128 }, { "epoch": 0.46604002425712554, "grad_norm": 0.22841358184814453, "learning_rate": 3.2221314638016164e-05, "loss": 0.1535, "step": 26129 }, { "epoch": 0.46605786037883923, "grad_norm": 0.2445744276046753, "learning_rate": 3.221982446477009e-05, "loss": 0.1436, "step": 26130 }, { "epoch": 0.4660756965005529, "grad_norm": 0.2645500898361206, "learning_rate": 3.221833426353748e-05, "loss": 0.1718, "step": 26131 }, { "epoch": 0.4660935326222666, "grad_norm": 0.3193371295928955, "learning_rate": 3.221684403432412e-05, "loss": 0.1416, "step": 26132 }, { "epoch": 0.4661113687439803, "grad_norm": 0.31366005539894104, "learning_rate": 3.221535377713577e-05, "loss": 0.196, "step": 26133 }, { "epoch": 0.466129204865694, "grad_norm": 0.27750080823898315, "learning_rate": 3.221386349197822e-05, "loss": 0.1492, "step": 26134 }, { "epoch": 0.4661470409874077, "grad_norm": 0.25234177708625793, "learning_rate": 3.2212373178857244e-05, "loss": 0.1518, "step": 26135 }, { "epoch": 0.4661648771091214, "grad_norm": 0.4431353211402893, "learning_rate": 3.2210882837778615e-05, "loss": 0.131, "step": 26136 }, { "epoch": 0.4661827132308351, "grad_norm": 0.21329589188098907, "learning_rate": 3.2209392468748116e-05, "loss": 0.1387, "step": 26137 }, { "epoch": 0.4662005493525488, "grad_norm": 0.24988535046577454, "learning_rate": 3.220790207177153e-05, "loss": 0.1433, "step": 26138 }, { "epoch": 0.4662183854742625, "grad_norm": 0.32674267888069153, "learning_rate": 3.220641164685462e-05, "loss": 0.1576, "step": 26139 }, { "epoch": 0.46623622159597616, "grad_norm": 0.385643869638443, "learning_rate": 3.220492119400317e-05, "loss": 0.1767, "step": 26140 }, { "epoch": 0.46625405771768985, "grad_norm": 0.2466435730457306, "learning_rate": 3.2203430713222944e-05, "loss": 0.1483, "step": 26141 }, { "epoch": 0.46627189383940354, "grad_norm": 0.22646495699882507, "learning_rate": 3.220194020451975e-05, "loss": 0.1528, "step": 26142 }, { "epoch": 0.4662897299611172, "grad_norm": 0.30912965536117554, "learning_rate": 3.220044966789935e-05, "loss": 0.1406, "step": 26143 }, { "epoch": 0.46630756608283097, "grad_norm": 0.3339691162109375, "learning_rate": 3.2198959103367506e-05, "loss": 0.1392, "step": 26144 }, { "epoch": 0.46632540220454466, "grad_norm": 0.25986963510513306, "learning_rate": 3.219746851093002e-05, "loss": 0.127, "step": 26145 }, { "epoch": 0.46634323832625835, "grad_norm": 0.2913098931312561, "learning_rate": 3.219597789059265e-05, "loss": 0.1771, "step": 26146 }, { "epoch": 0.46636107444797203, "grad_norm": 0.2323073297739029, "learning_rate": 3.2194487242361194e-05, "loss": 0.1556, "step": 26147 }, { "epoch": 0.4663789105696857, "grad_norm": 0.21654458343982697, "learning_rate": 3.2192996566241414e-05, "loss": 0.1557, "step": 26148 }, { "epoch": 0.4663967466913994, "grad_norm": 0.2698739171028137, "learning_rate": 3.2191505862239105e-05, "loss": 0.1202, "step": 26149 }, { "epoch": 0.4664145828131131, "grad_norm": 0.2647920846939087, "learning_rate": 3.219001513036002e-05, "loss": 0.1476, "step": 26150 }, { "epoch": 0.4664324189348268, "grad_norm": 0.23627696931362152, "learning_rate": 3.218852437060996e-05, "loss": 0.172, "step": 26151 }, { "epoch": 0.46645025505654053, "grad_norm": 0.31318241357803345, "learning_rate": 3.2187033582994704e-05, "loss": 0.1284, "step": 26152 }, { "epoch": 0.4664680911782542, "grad_norm": 0.3041784167289734, "learning_rate": 3.218554276752002e-05, "loss": 0.1398, "step": 26153 }, { "epoch": 0.4664859272999679, "grad_norm": 0.2556712031364441, "learning_rate": 3.218405192419168e-05, "loss": 0.1495, "step": 26154 }, { "epoch": 0.4665037634216816, "grad_norm": 0.24803361296653748, "learning_rate": 3.2182561053015486e-05, "loss": 0.1444, "step": 26155 }, { "epoch": 0.4665215995433953, "grad_norm": 0.16730177402496338, "learning_rate": 3.2181070153997204e-05, "loss": 0.11, "step": 26156 }, { "epoch": 0.46653943566510897, "grad_norm": 0.26929745078086853, "learning_rate": 3.217957922714261e-05, "loss": 0.1484, "step": 26157 }, { "epoch": 0.46655727178682266, "grad_norm": 0.32538139820098877, "learning_rate": 3.217808827245748e-05, "loss": 0.1719, "step": 26158 }, { "epoch": 0.46657510790853635, "grad_norm": 0.2770591378211975, "learning_rate": 3.2176597289947616e-05, "loss": 0.1129, "step": 26159 }, { "epoch": 0.4665929440302501, "grad_norm": 0.27576369047164917, "learning_rate": 3.2175106279618775e-05, "loss": 0.1127, "step": 26160 }, { "epoch": 0.4666107801519638, "grad_norm": 0.3002508282661438, "learning_rate": 3.217361524147674e-05, "loss": 0.1458, "step": 26161 }, { "epoch": 0.46662861627367747, "grad_norm": 0.23747000098228455, "learning_rate": 3.21721241755273e-05, "loss": 0.1056, "step": 26162 }, { "epoch": 0.46664645239539115, "grad_norm": 0.25853633880615234, "learning_rate": 3.2170633081776224e-05, "loss": 0.1946, "step": 26163 }, { "epoch": 0.46666428851710484, "grad_norm": 0.5431085228919983, "learning_rate": 3.21691419602293e-05, "loss": 0.3354, "step": 26164 }, { "epoch": 0.46668212463881853, "grad_norm": 0.3621675372123718, "learning_rate": 3.2167650810892305e-05, "loss": 0.1782, "step": 26165 }, { "epoch": 0.4666999607605322, "grad_norm": 0.4184924364089966, "learning_rate": 3.216615963377103e-05, "loss": 0.193, "step": 26166 }, { "epoch": 0.4667177968822459, "grad_norm": 0.2539195716381073, "learning_rate": 3.216466842887123e-05, "loss": 0.1371, "step": 26167 }, { "epoch": 0.4667356330039596, "grad_norm": 0.2213921993970871, "learning_rate": 3.216317719619871e-05, "loss": 0.1359, "step": 26168 }, { "epoch": 0.46675346912567334, "grad_norm": 0.3268488645553589, "learning_rate": 3.2161685935759235e-05, "loss": 0.1401, "step": 26169 }, { "epoch": 0.466771305247387, "grad_norm": 0.3819471001625061, "learning_rate": 3.2160194647558593e-05, "loss": 0.1795, "step": 26170 }, { "epoch": 0.4667891413691007, "grad_norm": 0.27619296312332153, "learning_rate": 3.2158703331602566e-05, "loss": 0.1735, "step": 26171 }, { "epoch": 0.4668069774908144, "grad_norm": 0.29396742582321167, "learning_rate": 3.215721198789693e-05, "loss": 0.1667, "step": 26172 }, { "epoch": 0.4668248136125281, "grad_norm": 0.22877304255962372, "learning_rate": 3.2155720616447474e-05, "loss": 0.1686, "step": 26173 }, { "epoch": 0.4668426497342418, "grad_norm": 0.27977070212364197, "learning_rate": 3.215422921725997e-05, "loss": 0.1475, "step": 26174 }, { "epoch": 0.46686048585595546, "grad_norm": 0.31222712993621826, "learning_rate": 3.21527377903402e-05, "loss": 0.1725, "step": 26175 }, { "epoch": 0.46687832197766915, "grad_norm": 0.25478875637054443, "learning_rate": 3.215124633569395e-05, "loss": 0.1458, "step": 26176 }, { "epoch": 0.4668961580993829, "grad_norm": 0.42479678988456726, "learning_rate": 3.2149754853326994e-05, "loss": 0.1654, "step": 26177 }, { "epoch": 0.4669139942210966, "grad_norm": 0.2332555651664734, "learning_rate": 3.214826334324513e-05, "loss": 0.137, "step": 26178 }, { "epoch": 0.4669318303428103, "grad_norm": 0.26375123858451843, "learning_rate": 3.214677180545412e-05, "loss": 0.1554, "step": 26179 }, { "epoch": 0.46694966646452396, "grad_norm": 0.2504287660121918, "learning_rate": 3.2145280239959756e-05, "loss": 0.1419, "step": 26180 }, { "epoch": 0.46696750258623765, "grad_norm": 0.23868931829929352, "learning_rate": 3.2143788646767825e-05, "loss": 0.2077, "step": 26181 }, { "epoch": 0.46698533870795134, "grad_norm": 0.28528350591659546, "learning_rate": 3.214229702588409e-05, "loss": 0.1334, "step": 26182 }, { "epoch": 0.467003174829665, "grad_norm": 0.28509292006492615, "learning_rate": 3.2140805377314346e-05, "loss": 0.2047, "step": 26183 }, { "epoch": 0.4670210109513787, "grad_norm": 0.3331129252910614, "learning_rate": 3.2139313701064384e-05, "loss": 0.1395, "step": 26184 }, { "epoch": 0.4670388470730924, "grad_norm": 0.2526521384716034, "learning_rate": 3.2137821997139965e-05, "loss": 0.1272, "step": 26185 }, { "epoch": 0.46705668319480614, "grad_norm": 0.21979525685310364, "learning_rate": 3.213633026554689e-05, "loss": 0.1307, "step": 26186 }, { "epoch": 0.46707451931651983, "grad_norm": 0.20031945407390594, "learning_rate": 3.213483850629093e-05, "loss": 0.1178, "step": 26187 }, { "epoch": 0.4670923554382335, "grad_norm": 0.19642189145088196, "learning_rate": 3.213334671937788e-05, "loss": 0.1432, "step": 26188 }, { "epoch": 0.4671101915599472, "grad_norm": 0.36276355385780334, "learning_rate": 3.213185490481351e-05, "loss": 0.12, "step": 26189 }, { "epoch": 0.4671280276816609, "grad_norm": 0.32859084010124207, "learning_rate": 3.2130363062603594e-05, "loss": 0.1636, "step": 26190 }, { "epoch": 0.4671458638033746, "grad_norm": 0.2847687900066376, "learning_rate": 3.212887119275394e-05, "loss": 0.1652, "step": 26191 }, { "epoch": 0.46716369992508827, "grad_norm": 0.3417710065841675, "learning_rate": 3.212737929527032e-05, "loss": 0.0908, "step": 26192 }, { "epoch": 0.46718153604680196, "grad_norm": 0.24598552286624908, "learning_rate": 3.2125887370158515e-05, "loss": 0.1542, "step": 26193 }, { "epoch": 0.4671993721685157, "grad_norm": 0.3142749071121216, "learning_rate": 3.212439541742431e-05, "loss": 0.1332, "step": 26194 }, { "epoch": 0.4672172082902294, "grad_norm": 0.35682913661003113, "learning_rate": 3.212290343707348e-05, "loss": 0.2032, "step": 26195 }, { "epoch": 0.4672350444119431, "grad_norm": 0.3021930158138275, "learning_rate": 3.212141142911183e-05, "loss": 0.1505, "step": 26196 }, { "epoch": 0.46725288053365677, "grad_norm": 0.32572516798973083, "learning_rate": 3.2119919393545117e-05, "loss": 0.2172, "step": 26197 }, { "epoch": 0.46727071665537045, "grad_norm": 0.2606397271156311, "learning_rate": 3.2118427330379144e-05, "loss": 0.1411, "step": 26198 }, { "epoch": 0.46728855277708414, "grad_norm": 0.29652053117752075, "learning_rate": 3.2116935239619685e-05, "loss": 0.0634, "step": 26199 }, { "epoch": 0.46730638889879783, "grad_norm": 0.3495809733867645, "learning_rate": 3.2115443121272524e-05, "loss": 0.086, "step": 26200 }, { "epoch": 0.4673242250205115, "grad_norm": 0.2699562609195709, "learning_rate": 3.211395097534346e-05, "loss": 0.1264, "step": 26201 }, { "epoch": 0.4673420611422252, "grad_norm": 0.26937612891197205, "learning_rate": 3.211245880183825e-05, "loss": 0.1438, "step": 26202 }, { "epoch": 0.46735989726393895, "grad_norm": 0.23679888248443604, "learning_rate": 3.2110966600762704e-05, "loss": 0.1291, "step": 26203 }, { "epoch": 0.46737773338565264, "grad_norm": 0.31273531913757324, "learning_rate": 3.2109474372122584e-05, "loss": 0.1436, "step": 26204 }, { "epoch": 0.4673955695073663, "grad_norm": 0.31768372654914856, "learning_rate": 3.21079821159237e-05, "loss": 0.1285, "step": 26205 }, { "epoch": 0.46741340562908, "grad_norm": 0.29872846603393555, "learning_rate": 3.2106489832171807e-05, "loss": 0.1618, "step": 26206 }, { "epoch": 0.4674312417507937, "grad_norm": 0.2371252179145813, "learning_rate": 3.210499752087272e-05, "loss": 0.1422, "step": 26207 }, { "epoch": 0.4674490778725074, "grad_norm": 0.2899041175842285, "learning_rate": 3.210350518203221e-05, "loss": 0.1996, "step": 26208 }, { "epoch": 0.4674669139942211, "grad_norm": 0.41877254843711853, "learning_rate": 3.2102012815656046e-05, "loss": 0.1797, "step": 26209 }, { "epoch": 0.46748475011593477, "grad_norm": 0.24062591791152954, "learning_rate": 3.2100520421750034e-05, "loss": 0.1186, "step": 26210 }, { "epoch": 0.4675025862376485, "grad_norm": 0.2736894488334656, "learning_rate": 3.209902800031995e-05, "loss": 0.114, "step": 26211 }, { "epoch": 0.4675204223593622, "grad_norm": 0.29374274611473083, "learning_rate": 3.209753555137158e-05, "loss": 0.1925, "step": 26212 }, { "epoch": 0.4675382584810759, "grad_norm": 0.22029341757297516, "learning_rate": 3.209604307491072e-05, "loss": 0.1387, "step": 26213 }, { "epoch": 0.4675560946027896, "grad_norm": 0.3146451413631439, "learning_rate": 3.209455057094315e-05, "loss": 0.179, "step": 26214 }, { "epoch": 0.46757393072450326, "grad_norm": 0.26568135619163513, "learning_rate": 3.2093058039474633e-05, "loss": 0.181, "step": 26215 }, { "epoch": 0.46759176684621695, "grad_norm": 0.3594539165496826, "learning_rate": 3.209156548051099e-05, "loss": 0.1788, "step": 26216 }, { "epoch": 0.46760960296793064, "grad_norm": 0.23460763692855835, "learning_rate": 3.2090072894057977e-05, "loss": 0.1599, "step": 26217 }, { "epoch": 0.4676274390896443, "grad_norm": 0.25468727946281433, "learning_rate": 3.2088580280121394e-05, "loss": 0.136, "step": 26218 }, { "epoch": 0.46764527521135807, "grad_norm": 0.3144964575767517, "learning_rate": 3.208708763870703e-05, "loss": 0.1411, "step": 26219 }, { "epoch": 0.46766311133307176, "grad_norm": 0.26124510169029236, "learning_rate": 3.2085594969820666e-05, "loss": 0.141, "step": 26220 }, { "epoch": 0.46768094745478545, "grad_norm": 0.3724759817123413, "learning_rate": 3.208410227346809e-05, "loss": 0.1772, "step": 26221 }, { "epoch": 0.46769878357649913, "grad_norm": 0.22663894295692444, "learning_rate": 3.208260954965508e-05, "loss": 0.1209, "step": 26222 }, { "epoch": 0.4677166196982128, "grad_norm": 0.2945179343223572, "learning_rate": 3.208111679838744e-05, "loss": 0.1359, "step": 26223 }, { "epoch": 0.4677344558199265, "grad_norm": 0.3263687193393707, "learning_rate": 3.2079624019670933e-05, "loss": 0.1457, "step": 26224 }, { "epoch": 0.4677522919416402, "grad_norm": 0.28166189789772034, "learning_rate": 3.207813121351137e-05, "loss": 0.1624, "step": 26225 }, { "epoch": 0.4677701280633539, "grad_norm": 0.6706572771072388, "learning_rate": 3.207663837991452e-05, "loss": 0.1583, "step": 26226 }, { "epoch": 0.4677879641850676, "grad_norm": 0.19967494904994965, "learning_rate": 3.207514551888618e-05, "loss": 0.163, "step": 26227 }, { "epoch": 0.4678058003067813, "grad_norm": 0.2821843922138214, "learning_rate": 3.207365263043213e-05, "loss": 0.1488, "step": 26228 }, { "epoch": 0.467823636428495, "grad_norm": 0.2672920227050781, "learning_rate": 3.207215971455816e-05, "loss": 0.0851, "step": 26229 }, { "epoch": 0.4678414725502087, "grad_norm": 0.26915085315704346, "learning_rate": 3.207066677127005e-05, "loss": 0.1639, "step": 26230 }, { "epoch": 0.4678593086719224, "grad_norm": 0.28028029203414917, "learning_rate": 3.206917380057359e-05, "loss": 0.1305, "step": 26231 }, { "epoch": 0.46787714479363607, "grad_norm": 0.24594922363758087, "learning_rate": 3.206768080247458e-05, "loss": 0.1267, "step": 26232 }, { "epoch": 0.46789498091534976, "grad_norm": 0.3995407819747925, "learning_rate": 3.20661877769788e-05, "loss": 0.1798, "step": 26233 }, { "epoch": 0.46791281703706344, "grad_norm": 0.30076685547828674, "learning_rate": 3.206469472409204e-05, "loss": 0.1124, "step": 26234 }, { "epoch": 0.46793065315877713, "grad_norm": 0.2927619516849518, "learning_rate": 3.206320164382007e-05, "loss": 0.1535, "step": 26235 }, { "epoch": 0.4679484892804909, "grad_norm": 0.19791756570339203, "learning_rate": 3.20617085361687e-05, "loss": 0.1173, "step": 26236 }, { "epoch": 0.46796632540220456, "grad_norm": 0.33532899618148804, "learning_rate": 3.20602154011437e-05, "loss": 0.1516, "step": 26237 }, { "epoch": 0.46798416152391825, "grad_norm": 0.3091859519481659, "learning_rate": 3.205872223875087e-05, "loss": 0.1259, "step": 26238 }, { "epoch": 0.46800199764563194, "grad_norm": 0.3842073380947113, "learning_rate": 3.2057229048996e-05, "loss": 0.1364, "step": 26239 }, { "epoch": 0.4680198337673456, "grad_norm": 0.24250420928001404, "learning_rate": 3.205573583188486e-05, "loss": 0.1332, "step": 26240 }, { "epoch": 0.4680376698890593, "grad_norm": 0.2488769143819809, "learning_rate": 3.205424258742327e-05, "loss": 0.1598, "step": 26241 }, { "epoch": 0.468055506010773, "grad_norm": 0.5432834625244141, "learning_rate": 3.2052749315616985e-05, "loss": 0.1751, "step": 26242 }, { "epoch": 0.4680733421324867, "grad_norm": 0.4061277210712433, "learning_rate": 3.205125601647181e-05, "loss": 0.1791, "step": 26243 }, { "epoch": 0.4680911782542004, "grad_norm": 0.2756275534629822, "learning_rate": 3.2049762689993536e-05, "loss": 0.1877, "step": 26244 }, { "epoch": 0.4681090143759141, "grad_norm": 0.18963421881198883, "learning_rate": 3.204826933618794e-05, "loss": 0.1165, "step": 26245 }, { "epoch": 0.4681268504976278, "grad_norm": 0.2764233350753784, "learning_rate": 3.2046775955060823e-05, "loss": 0.1928, "step": 26246 }, { "epoch": 0.4681446866193415, "grad_norm": 0.20233765244483948, "learning_rate": 3.2045282546617975e-05, "loss": 0.1189, "step": 26247 }, { "epoch": 0.4681625227410552, "grad_norm": 0.2738974392414093, "learning_rate": 3.2043789110865164e-05, "loss": 0.1613, "step": 26248 }, { "epoch": 0.4681803588627689, "grad_norm": 0.19651754200458527, "learning_rate": 3.20422956478082e-05, "loss": 0.1238, "step": 26249 }, { "epoch": 0.46819819498448256, "grad_norm": 0.27636095881462097, "learning_rate": 3.2040802157452866e-05, "loss": 0.1552, "step": 26250 }, { "epoch": 0.46821603110619625, "grad_norm": 0.29355713725090027, "learning_rate": 3.2039308639804946e-05, "loss": 0.1198, "step": 26251 }, { "epoch": 0.46823386722790994, "grad_norm": 0.26799729466438293, "learning_rate": 3.203781509487024e-05, "loss": 0.1602, "step": 26252 }, { "epoch": 0.4682517033496237, "grad_norm": 0.2301194667816162, "learning_rate": 3.203632152265453e-05, "loss": 0.1404, "step": 26253 }, { "epoch": 0.46826953947133737, "grad_norm": 0.28055834770202637, "learning_rate": 3.2034827923163605e-05, "loss": 0.1512, "step": 26254 }, { "epoch": 0.46828737559305106, "grad_norm": 0.34947171807289124, "learning_rate": 3.2033334296403266e-05, "loss": 0.1523, "step": 26255 }, { "epoch": 0.46830521171476475, "grad_norm": 0.30112358927726746, "learning_rate": 3.203184064237929e-05, "loss": 0.1996, "step": 26256 }, { "epoch": 0.46832304783647843, "grad_norm": 0.32969823479652405, "learning_rate": 3.203034696109748e-05, "loss": 0.1563, "step": 26257 }, { "epoch": 0.4683408839581921, "grad_norm": 0.3396940231323242, "learning_rate": 3.2028853252563594e-05, "loss": 0.1402, "step": 26258 }, { "epoch": 0.4683587200799058, "grad_norm": 0.22942569851875305, "learning_rate": 3.202735951678346e-05, "loss": 0.0856, "step": 26259 }, { "epoch": 0.4683765562016195, "grad_norm": 0.20532183349132538, "learning_rate": 3.2025865753762855e-05, "loss": 0.1298, "step": 26260 }, { "epoch": 0.46839439232333324, "grad_norm": 0.2443859875202179, "learning_rate": 3.202437196350756e-05, "loss": 0.1024, "step": 26261 }, { "epoch": 0.46841222844504693, "grad_norm": 0.20618800818920135, "learning_rate": 3.2022878146023386e-05, "loss": 0.1526, "step": 26262 }, { "epoch": 0.4684300645667606, "grad_norm": 0.21313892304897308, "learning_rate": 3.2021384301316095e-05, "loss": 0.1424, "step": 26263 }, { "epoch": 0.4684479006884743, "grad_norm": 0.22955258190631866, "learning_rate": 3.20198904293915e-05, "loss": 0.1159, "step": 26264 }, { "epoch": 0.468465736810188, "grad_norm": 0.3054492175579071, "learning_rate": 3.2018396530255385e-05, "loss": 0.1339, "step": 26265 }, { "epoch": 0.4684835729319017, "grad_norm": 0.3174167573451996, "learning_rate": 3.201690260391354e-05, "loss": 0.1639, "step": 26266 }, { "epoch": 0.46850140905361537, "grad_norm": 0.27130749821662903, "learning_rate": 3.201540865037175e-05, "loss": 0.2028, "step": 26267 }, { "epoch": 0.46851924517532906, "grad_norm": 0.25795820355415344, "learning_rate": 3.201391466963583e-05, "loss": 0.1609, "step": 26268 }, { "epoch": 0.46853708129704275, "grad_norm": 0.29352548718452454, "learning_rate": 3.201242066171154e-05, "loss": 0.191, "step": 26269 }, { "epoch": 0.4685549174187565, "grad_norm": 0.2807568907737732, "learning_rate": 3.20109266266047e-05, "loss": 0.1168, "step": 26270 }, { "epoch": 0.4685727535404702, "grad_norm": 0.32258519530296326, "learning_rate": 3.200943256432107e-05, "loss": 0.1608, "step": 26271 }, { "epoch": 0.46859058966218387, "grad_norm": 0.2827734351158142, "learning_rate": 3.200793847486647e-05, "loss": 0.1392, "step": 26272 }, { "epoch": 0.46860842578389755, "grad_norm": 0.22582592070102692, "learning_rate": 3.2006444358246676e-05, "loss": 0.1416, "step": 26273 }, { "epoch": 0.46862626190561124, "grad_norm": 0.24394312500953674, "learning_rate": 3.200495021446748e-05, "loss": 0.1146, "step": 26274 }, { "epoch": 0.46864409802732493, "grad_norm": 0.2754935026168823, "learning_rate": 3.200345604353469e-05, "loss": 0.1435, "step": 26275 }, { "epoch": 0.4686619341490386, "grad_norm": 0.27431949973106384, "learning_rate": 3.200196184545407e-05, "loss": 0.1158, "step": 26276 }, { "epoch": 0.4686797702707523, "grad_norm": 0.2708548903465271, "learning_rate": 3.200046762023144e-05, "loss": 0.2015, "step": 26277 }, { "epoch": 0.46869760639246605, "grad_norm": 0.2920028269290924, "learning_rate": 3.199897336787257e-05, "loss": 0.154, "step": 26278 }, { "epoch": 0.46871544251417974, "grad_norm": 0.26886850595474243, "learning_rate": 3.199747908838326e-05, "loss": 0.1598, "step": 26279 }, { "epoch": 0.4687332786358934, "grad_norm": 0.2931189239025116, "learning_rate": 3.199598478176931e-05, "loss": 0.1098, "step": 26280 }, { "epoch": 0.4687511147576071, "grad_norm": 0.35463953018188477, "learning_rate": 3.199449044803651e-05, "loss": 0.1647, "step": 26281 }, { "epoch": 0.4687689508793208, "grad_norm": 0.25132209062576294, "learning_rate": 3.1992996087190643e-05, "loss": 0.1402, "step": 26282 }, { "epoch": 0.4687867870010345, "grad_norm": 0.29833027720451355, "learning_rate": 3.199150169923751e-05, "loss": 0.1286, "step": 26283 }, { "epoch": 0.4688046231227482, "grad_norm": 0.1923772394657135, "learning_rate": 3.1990007284182904e-05, "loss": 0.1355, "step": 26284 }, { "epoch": 0.46882245924446186, "grad_norm": 0.22590340673923492, "learning_rate": 3.198851284203262e-05, "loss": 0.1572, "step": 26285 }, { "epoch": 0.46884029536617555, "grad_norm": 0.2155126929283142, "learning_rate": 3.198701837279244e-05, "loss": 0.1515, "step": 26286 }, { "epoch": 0.4688581314878893, "grad_norm": 0.33970415592193604, "learning_rate": 3.198552387646816e-05, "loss": 0.139, "step": 26287 }, { "epoch": 0.468875967609603, "grad_norm": 0.2502792179584503, "learning_rate": 3.198402935306558e-05, "loss": 0.1222, "step": 26288 }, { "epoch": 0.46889380373131667, "grad_norm": 0.26113376021385193, "learning_rate": 3.198253480259049e-05, "loss": 0.1138, "step": 26289 }, { "epoch": 0.46891163985303036, "grad_norm": 0.2515289783477783, "learning_rate": 3.1981040225048686e-05, "loss": 0.1391, "step": 26290 }, { "epoch": 0.46892947597474405, "grad_norm": 0.29478350281715393, "learning_rate": 3.197954562044596e-05, "loss": 0.1481, "step": 26291 }, { "epoch": 0.46894731209645774, "grad_norm": 0.24080762267112732, "learning_rate": 3.19780509887881e-05, "loss": 0.1387, "step": 26292 }, { "epoch": 0.4689651482181714, "grad_norm": 0.25260424613952637, "learning_rate": 3.197655633008091e-05, "loss": 0.1626, "step": 26293 }, { "epoch": 0.4689829843398851, "grad_norm": 0.23998117446899414, "learning_rate": 3.197506164433017e-05, "loss": 0.1262, "step": 26294 }, { "epoch": 0.46900082046159886, "grad_norm": 0.30281248688697815, "learning_rate": 3.1973566931541684e-05, "loss": 0.1344, "step": 26295 }, { "epoch": 0.46901865658331254, "grad_norm": 0.23442330956459045, "learning_rate": 3.197207219172125e-05, "loss": 0.1322, "step": 26296 }, { "epoch": 0.46903649270502623, "grad_norm": 0.5462908148765564, "learning_rate": 3.1970577424874646e-05, "loss": 0.1583, "step": 26297 }, { "epoch": 0.4690543288267399, "grad_norm": 0.20979632437229156, "learning_rate": 3.1969082631007686e-05, "loss": 0.1652, "step": 26298 }, { "epoch": 0.4690721649484536, "grad_norm": 0.27456435561180115, "learning_rate": 3.196758781012615e-05, "loss": 0.1754, "step": 26299 }, { "epoch": 0.4690900010701673, "grad_norm": 0.36536943912506104, "learning_rate": 3.1966092962235835e-05, "loss": 0.1691, "step": 26300 }, { "epoch": 0.469107837191881, "grad_norm": 0.22177709639072418, "learning_rate": 3.1964598087342544e-05, "loss": 0.1169, "step": 26301 }, { "epoch": 0.46912567331359467, "grad_norm": 0.23451322317123413, "learning_rate": 3.196310318545206e-05, "loss": 0.0926, "step": 26302 }, { "epoch": 0.46914350943530836, "grad_norm": 0.26566970348358154, "learning_rate": 3.196160825657019e-05, "loss": 0.1204, "step": 26303 }, { "epoch": 0.4691613455570221, "grad_norm": 0.361223429441452, "learning_rate": 3.196011330070271e-05, "loss": 0.1998, "step": 26304 }, { "epoch": 0.4691791816787358, "grad_norm": 0.18521316349506378, "learning_rate": 3.195861831785544e-05, "loss": 0.1454, "step": 26305 }, { "epoch": 0.4691970178004495, "grad_norm": 0.23742611706256866, "learning_rate": 3.195712330803415e-05, "loss": 0.1282, "step": 26306 }, { "epoch": 0.46921485392216317, "grad_norm": 0.27079543471336365, "learning_rate": 3.1955628271244655e-05, "loss": 0.111, "step": 26307 }, { "epoch": 0.46923269004387685, "grad_norm": 0.3403595983982086, "learning_rate": 3.195413320749274e-05, "loss": 0.1566, "step": 26308 }, { "epoch": 0.46925052616559054, "grad_norm": 0.2622055411338806, "learning_rate": 3.19526381167842e-05, "loss": 0.1825, "step": 26309 }, { "epoch": 0.46926836228730423, "grad_norm": 0.20057597756385803, "learning_rate": 3.1951142999124836e-05, "loss": 0.0756, "step": 26310 }, { "epoch": 0.4692861984090179, "grad_norm": 0.26419612765312195, "learning_rate": 3.1949647854520446e-05, "loss": 0.1396, "step": 26311 }, { "epoch": 0.46930403453073166, "grad_norm": 0.24994225800037384, "learning_rate": 3.194815268297681e-05, "loss": 0.1484, "step": 26312 }, { "epoch": 0.46932187065244535, "grad_norm": 0.3066805303096771, "learning_rate": 3.194665748449975e-05, "loss": 0.1635, "step": 26313 }, { "epoch": 0.46933970677415904, "grad_norm": 0.28367283940315247, "learning_rate": 3.194516225909503e-05, "loss": 0.1287, "step": 26314 }, { "epoch": 0.4693575428958727, "grad_norm": 0.3292962908744812, "learning_rate": 3.194366700676847e-05, "loss": 0.1833, "step": 26315 }, { "epoch": 0.4693753790175864, "grad_norm": 0.3581288456916809, "learning_rate": 3.194217172752586e-05, "loss": 0.1443, "step": 26316 }, { "epoch": 0.4693932151393001, "grad_norm": 0.39648306369781494, "learning_rate": 3.194067642137299e-05, "loss": 0.1387, "step": 26317 }, { "epoch": 0.4694110512610138, "grad_norm": 0.3037940263748169, "learning_rate": 3.193918108831566e-05, "loss": 0.1474, "step": 26318 }, { "epoch": 0.4694288873827275, "grad_norm": 0.20100311934947968, "learning_rate": 3.193768572835967e-05, "loss": 0.1707, "step": 26319 }, { "epoch": 0.4694467235044412, "grad_norm": 0.5518757104873657, "learning_rate": 3.1936190341510816e-05, "loss": 0.1501, "step": 26320 }, { "epoch": 0.4694645596261549, "grad_norm": 0.23308266699314117, "learning_rate": 3.1934694927774894e-05, "loss": 0.1836, "step": 26321 }, { "epoch": 0.4694823957478686, "grad_norm": 0.30691757798194885, "learning_rate": 3.1933199487157696e-05, "loss": 0.2167, "step": 26322 }, { "epoch": 0.4695002318695823, "grad_norm": 0.3114815950393677, "learning_rate": 3.193170401966502e-05, "loss": 0.1672, "step": 26323 }, { "epoch": 0.469518067991296, "grad_norm": 0.2991527318954468, "learning_rate": 3.193020852530267e-05, "loss": 0.1523, "step": 26324 }, { "epoch": 0.46953590411300966, "grad_norm": 0.3079780638217926, "learning_rate": 3.192871300407643e-05, "loss": 0.1864, "step": 26325 }, { "epoch": 0.46955374023472335, "grad_norm": 0.2281007170677185, "learning_rate": 3.192721745599211e-05, "loss": 0.1402, "step": 26326 }, { "epoch": 0.46957157635643704, "grad_norm": 0.2386164665222168, "learning_rate": 3.1925721881055505e-05, "loss": 0.1398, "step": 26327 }, { "epoch": 0.4695894124781507, "grad_norm": 0.22394618391990662, "learning_rate": 3.192422627927241e-05, "loss": 0.1497, "step": 26328 }, { "epoch": 0.46960724859986447, "grad_norm": 0.2283318191766739, "learning_rate": 3.1922730650648616e-05, "loss": 0.1351, "step": 26329 }, { "epoch": 0.46962508472157816, "grad_norm": 0.31934654712677, "learning_rate": 3.1921234995189935e-05, "loss": 0.1785, "step": 26330 }, { "epoch": 0.46964292084329184, "grad_norm": 0.5170240998268127, "learning_rate": 3.1919739312902155e-05, "loss": 0.1098, "step": 26331 }, { "epoch": 0.46966075696500553, "grad_norm": 0.26554417610168457, "learning_rate": 3.191824360379107e-05, "loss": 0.1397, "step": 26332 }, { "epoch": 0.4696785930867192, "grad_norm": 0.2660197615623474, "learning_rate": 3.19167478678625e-05, "loss": 0.1462, "step": 26333 }, { "epoch": 0.4696964292084329, "grad_norm": 0.29562854766845703, "learning_rate": 3.191525210512221e-05, "loss": 0.1891, "step": 26334 }, { "epoch": 0.4697142653301466, "grad_norm": 0.22776257991790771, "learning_rate": 3.191375631557601e-05, "loss": 0.1422, "step": 26335 }, { "epoch": 0.4697321014518603, "grad_norm": 0.22175659239292145, "learning_rate": 3.1912260499229706e-05, "loss": 0.1233, "step": 26336 }, { "epoch": 0.46974993757357403, "grad_norm": 0.3063209354877472, "learning_rate": 3.1910764656089096e-05, "loss": 0.1813, "step": 26337 }, { "epoch": 0.4697677736952877, "grad_norm": 0.2530362904071808, "learning_rate": 3.190926878615998e-05, "loss": 0.1182, "step": 26338 }, { "epoch": 0.4697856098170014, "grad_norm": 0.307373046875, "learning_rate": 3.190777288944815e-05, "loss": 0.1267, "step": 26339 }, { "epoch": 0.4698034459387151, "grad_norm": 0.22828654944896698, "learning_rate": 3.1906276965959406e-05, "loss": 0.0776, "step": 26340 }, { "epoch": 0.4698212820604288, "grad_norm": 0.3093816637992859, "learning_rate": 3.190478101569955e-05, "loss": 0.1493, "step": 26341 }, { "epoch": 0.46983911818214247, "grad_norm": 0.32637467980384827, "learning_rate": 3.190328503867437e-05, "loss": 0.1022, "step": 26342 }, { "epoch": 0.46985695430385616, "grad_norm": 0.21642765402793884, "learning_rate": 3.190178903488967e-05, "loss": 0.1574, "step": 26343 }, { "epoch": 0.46987479042556984, "grad_norm": 0.24719901382923126, "learning_rate": 3.190029300435127e-05, "loss": 0.1508, "step": 26344 }, { "epoch": 0.46989262654728353, "grad_norm": 0.29670023918151855, "learning_rate": 3.189879694706493e-05, "loss": 0.1764, "step": 26345 }, { "epoch": 0.4699104626689973, "grad_norm": 0.2982756793498993, "learning_rate": 3.1897300863036487e-05, "loss": 0.1753, "step": 26346 }, { "epoch": 0.46992829879071096, "grad_norm": 0.2514650225639343, "learning_rate": 3.189580475227171e-05, "loss": 0.173, "step": 26347 }, { "epoch": 0.46994613491242465, "grad_norm": 0.2989982068538666, "learning_rate": 3.1894308614776424e-05, "loss": 0.1556, "step": 26348 }, { "epoch": 0.46996397103413834, "grad_norm": 0.30362823605537415, "learning_rate": 3.18928124505564e-05, "loss": 0.1192, "step": 26349 }, { "epoch": 0.469981807155852, "grad_norm": 0.29709258675575256, "learning_rate": 3.189131625961747e-05, "loss": 0.0839, "step": 26350 }, { "epoch": 0.4699996432775657, "grad_norm": 0.3220767080783844, "learning_rate": 3.188982004196541e-05, "loss": 0.1582, "step": 26351 }, { "epoch": 0.4700174793992794, "grad_norm": 0.3034157454967499, "learning_rate": 3.1888323797606034e-05, "loss": 0.1448, "step": 26352 }, { "epoch": 0.4700353155209931, "grad_norm": 0.28179022669792175, "learning_rate": 3.188682752654514e-05, "loss": 0.1399, "step": 26353 }, { "epoch": 0.47005315164270683, "grad_norm": 0.2542007565498352, "learning_rate": 3.1885331228788514e-05, "loss": 0.139, "step": 26354 }, { "epoch": 0.4700709877644205, "grad_norm": 0.2888839542865753, "learning_rate": 3.188383490434197e-05, "loss": 0.1799, "step": 26355 }, { "epoch": 0.4700888238861342, "grad_norm": 0.37037986516952515, "learning_rate": 3.18823385532113e-05, "loss": 0.1666, "step": 26356 }, { "epoch": 0.4701066600078479, "grad_norm": 0.26663026213645935, "learning_rate": 3.188084217540231e-05, "loss": 0.1067, "step": 26357 }, { "epoch": 0.4701244961295616, "grad_norm": 0.3092741072177887, "learning_rate": 3.18793457709208e-05, "loss": 0.1224, "step": 26358 }, { "epoch": 0.4701423322512753, "grad_norm": 0.25784868001937866, "learning_rate": 3.187784933977258e-05, "loss": 0.0975, "step": 26359 }, { "epoch": 0.47016016837298896, "grad_norm": 0.31876060366630554, "learning_rate": 3.187635288196342e-05, "loss": 0.2221, "step": 26360 }, { "epoch": 0.47017800449470265, "grad_norm": 0.31552034616470337, "learning_rate": 3.187485639749916e-05, "loss": 0.1451, "step": 26361 }, { "epoch": 0.4701958406164164, "grad_norm": 0.24119937419891357, "learning_rate": 3.1873359886385576e-05, "loss": 0.1143, "step": 26362 }, { "epoch": 0.4702136767381301, "grad_norm": 0.2996779978275299, "learning_rate": 3.187186334862847e-05, "loss": 0.1375, "step": 26363 }, { "epoch": 0.47023151285984377, "grad_norm": 0.33241936564445496, "learning_rate": 3.1870366784233646e-05, "loss": 0.1331, "step": 26364 }, { "epoch": 0.47024934898155746, "grad_norm": 0.33816003799438477, "learning_rate": 3.1868870193206915e-05, "loss": 0.1336, "step": 26365 }, { "epoch": 0.47026718510327115, "grad_norm": 0.2590244710445404, "learning_rate": 3.186737357555407e-05, "loss": 0.1405, "step": 26366 }, { "epoch": 0.47028502122498483, "grad_norm": 0.2345762848854065, "learning_rate": 3.1865876931280915e-05, "loss": 0.1267, "step": 26367 }, { "epoch": 0.4703028573466985, "grad_norm": 0.24232934415340424, "learning_rate": 3.186438026039325e-05, "loss": 0.1177, "step": 26368 }, { "epoch": 0.4703206934684122, "grad_norm": 0.23698510229587555, "learning_rate": 3.186288356289687e-05, "loss": 0.1374, "step": 26369 }, { "epoch": 0.4703385295901259, "grad_norm": 0.3014860451221466, "learning_rate": 3.186138683879758e-05, "loss": 0.1499, "step": 26370 }, { "epoch": 0.47035636571183964, "grad_norm": 0.3066621422767639, "learning_rate": 3.1859890088101193e-05, "loss": 0.1782, "step": 26371 }, { "epoch": 0.47037420183355333, "grad_norm": 0.24974851310253143, "learning_rate": 3.18583933108135e-05, "loss": 0.1186, "step": 26372 }, { "epoch": 0.470392037955267, "grad_norm": 0.3588750958442688, "learning_rate": 3.18568965069403e-05, "loss": 0.161, "step": 26373 }, { "epoch": 0.4704098740769807, "grad_norm": 0.20368777215480804, "learning_rate": 3.185539967648742e-05, "loss": 0.1233, "step": 26374 }, { "epoch": 0.4704277101986944, "grad_norm": 0.2930132746696472, "learning_rate": 3.1853902819460624e-05, "loss": 0.127, "step": 26375 }, { "epoch": 0.4704455463204081, "grad_norm": 0.23821282386779785, "learning_rate": 3.185240593586574e-05, "loss": 0.1589, "step": 26376 }, { "epoch": 0.47046338244212177, "grad_norm": 0.22390709817409515, "learning_rate": 3.1850909025708556e-05, "loss": 0.0778, "step": 26377 }, { "epoch": 0.47048121856383546, "grad_norm": 0.2755146324634552, "learning_rate": 3.18494120889949e-05, "loss": 0.0971, "step": 26378 }, { "epoch": 0.4704990546855492, "grad_norm": 0.34734922647476196, "learning_rate": 3.184791512573054e-05, "loss": 0.1703, "step": 26379 }, { "epoch": 0.4705168908072629, "grad_norm": 0.2981151342391968, "learning_rate": 3.18464181359213e-05, "loss": 0.1331, "step": 26380 }, { "epoch": 0.4705347269289766, "grad_norm": 0.20875748991966248, "learning_rate": 3.184492111957299e-05, "loss": 0.1518, "step": 26381 }, { "epoch": 0.47055256305069026, "grad_norm": 0.24016188085079193, "learning_rate": 3.1843424076691386e-05, "loss": 0.1686, "step": 26382 }, { "epoch": 0.47057039917240395, "grad_norm": 0.2279919981956482, "learning_rate": 3.184192700728231e-05, "loss": 0.1645, "step": 26383 }, { "epoch": 0.47058823529411764, "grad_norm": 0.25243496894836426, "learning_rate": 3.184042991135157e-05, "loss": 0.1474, "step": 26384 }, { "epoch": 0.47060607141583133, "grad_norm": 0.3161037564277649, "learning_rate": 3.183893278890496e-05, "loss": 0.1303, "step": 26385 }, { "epoch": 0.470623907537545, "grad_norm": 0.2923678457736969, "learning_rate": 3.1837435639948276e-05, "loss": 0.1048, "step": 26386 }, { "epoch": 0.4706417436592587, "grad_norm": 0.28258299827575684, "learning_rate": 3.183593846448734e-05, "loss": 0.0938, "step": 26387 }, { "epoch": 0.47065957978097245, "grad_norm": 0.27700623869895935, "learning_rate": 3.183444126252793e-05, "loss": 0.1571, "step": 26388 }, { "epoch": 0.47067741590268614, "grad_norm": 0.3193347752094269, "learning_rate": 3.1832944034075876e-05, "loss": 0.1678, "step": 26389 }, { "epoch": 0.4706952520243998, "grad_norm": 0.27845674753189087, "learning_rate": 3.1831446779136976e-05, "loss": 0.1312, "step": 26390 }, { "epoch": 0.4707130881461135, "grad_norm": 0.33350154757499695, "learning_rate": 3.182994949771701e-05, "loss": 0.1583, "step": 26391 }, { "epoch": 0.4707309242678272, "grad_norm": 0.19883500039577484, "learning_rate": 3.182845218982182e-05, "loss": 0.0911, "step": 26392 }, { "epoch": 0.4707487603895409, "grad_norm": 0.3399393856525421, "learning_rate": 3.1826954855457184e-05, "loss": 0.16, "step": 26393 }, { "epoch": 0.4707665965112546, "grad_norm": 0.21473821997642517, "learning_rate": 3.182545749462892e-05, "loss": 0.1196, "step": 26394 }, { "epoch": 0.47078443263296826, "grad_norm": 0.2845939099788666, "learning_rate": 3.1823960107342815e-05, "loss": 0.1628, "step": 26395 }, { "epoch": 0.470802268754682, "grad_norm": 0.2274755984544754, "learning_rate": 3.182246269360469e-05, "loss": 0.1578, "step": 26396 }, { "epoch": 0.4708201048763957, "grad_norm": 0.21096496284008026, "learning_rate": 3.1820965253420335e-05, "loss": 0.1311, "step": 26397 }, { "epoch": 0.4708379409981094, "grad_norm": 0.22262373566627502, "learning_rate": 3.181946778679557e-05, "loss": 0.1368, "step": 26398 }, { "epoch": 0.47085577711982307, "grad_norm": 0.3591758608818054, "learning_rate": 3.181797029373619e-05, "loss": 0.1724, "step": 26399 }, { "epoch": 0.47087361324153676, "grad_norm": 0.29298755526542664, "learning_rate": 3.1816472774248e-05, "loss": 0.1397, "step": 26400 }, { "epoch": 0.47089144936325045, "grad_norm": 0.2678554654121399, "learning_rate": 3.181497522833681e-05, "loss": 0.188, "step": 26401 }, { "epoch": 0.47090928548496414, "grad_norm": 0.25611796975135803, "learning_rate": 3.181347765600843e-05, "loss": 0.1499, "step": 26402 }, { "epoch": 0.4709271216066778, "grad_norm": 0.24687770009040833, "learning_rate": 3.181198005726864e-05, "loss": 0.1059, "step": 26403 }, { "epoch": 0.4709449577283915, "grad_norm": 0.3149312734603882, "learning_rate": 3.181048243212327e-05, "loss": 0.1703, "step": 26404 }, { "epoch": 0.47096279385010525, "grad_norm": 0.21981625258922577, "learning_rate": 3.1808984780578126e-05, "loss": 0.1217, "step": 26405 }, { "epoch": 0.47098062997181894, "grad_norm": 0.3896492123603821, "learning_rate": 3.1807487102639e-05, "loss": 0.2234, "step": 26406 }, { "epoch": 0.47099846609353263, "grad_norm": 0.22677797079086304, "learning_rate": 3.1805989398311704e-05, "loss": 0.1281, "step": 26407 }, { "epoch": 0.4710163022152463, "grad_norm": 0.2642861306667328, "learning_rate": 3.180449166760204e-05, "loss": 0.1053, "step": 26408 }, { "epoch": 0.47103413833696, "grad_norm": 0.38626107573509216, "learning_rate": 3.180299391051582e-05, "loss": 0.1536, "step": 26409 }, { "epoch": 0.4710519744586737, "grad_norm": 0.3027537167072296, "learning_rate": 3.180149612705884e-05, "loss": 0.1638, "step": 26410 }, { "epoch": 0.4710698105803874, "grad_norm": 0.22869277000427246, "learning_rate": 3.1799998317236914e-05, "loss": 0.16, "step": 26411 }, { "epoch": 0.47108764670210107, "grad_norm": 0.26541826128959656, "learning_rate": 3.179850048105584e-05, "loss": 0.1388, "step": 26412 }, { "epoch": 0.4711054828238148, "grad_norm": 0.2785693407058716, "learning_rate": 3.1797002618521435e-05, "loss": 0.155, "step": 26413 }, { "epoch": 0.4711233189455285, "grad_norm": 0.27491217851638794, "learning_rate": 3.17955047296395e-05, "loss": 0.1553, "step": 26414 }, { "epoch": 0.4711411550672422, "grad_norm": 0.2087952196598053, "learning_rate": 3.179400681441585e-05, "loss": 0.1353, "step": 26415 }, { "epoch": 0.4711589911889559, "grad_norm": 0.24017567932605743, "learning_rate": 3.179250887285628e-05, "loss": 0.1526, "step": 26416 }, { "epoch": 0.47117682731066957, "grad_norm": 0.3256704807281494, "learning_rate": 3.179101090496659e-05, "loss": 0.1681, "step": 26417 }, { "epoch": 0.47119466343238325, "grad_norm": 0.2272716909646988, "learning_rate": 3.17895129107526e-05, "loss": 0.1494, "step": 26418 }, { "epoch": 0.47121249955409694, "grad_norm": 0.28907379508018494, "learning_rate": 3.1788014890220114e-05, "loss": 0.1806, "step": 26419 }, { "epoch": 0.47123033567581063, "grad_norm": 0.3094012141227722, "learning_rate": 3.178651684337494e-05, "loss": 0.1529, "step": 26420 }, { "epoch": 0.4712481717975244, "grad_norm": 0.27203044295310974, "learning_rate": 3.178501877022288e-05, "loss": 0.1896, "step": 26421 }, { "epoch": 0.47126600791923806, "grad_norm": 0.19576877355575562, "learning_rate": 3.178352067076975e-05, "loss": 0.1271, "step": 26422 }, { "epoch": 0.47128384404095175, "grad_norm": 0.3442360758781433, "learning_rate": 3.1782022545021346e-05, "loss": 0.1948, "step": 26423 }, { "epoch": 0.47130168016266544, "grad_norm": 0.22480064630508423, "learning_rate": 3.178052439298348e-05, "loss": 0.1694, "step": 26424 }, { "epoch": 0.4713195162843791, "grad_norm": 0.24323417246341705, "learning_rate": 3.1779026214661953e-05, "loss": 0.1354, "step": 26425 }, { "epoch": 0.4713373524060928, "grad_norm": 0.28382396697998047, "learning_rate": 3.1777528010062584e-05, "loss": 0.1322, "step": 26426 }, { "epoch": 0.4713551885278065, "grad_norm": 0.21106085181236267, "learning_rate": 3.177602977919118e-05, "loss": 0.1582, "step": 26427 }, { "epoch": 0.4713730246495202, "grad_norm": 0.288219153881073, "learning_rate": 3.177453152205354e-05, "loss": 0.1253, "step": 26428 }, { "epoch": 0.4713908607712339, "grad_norm": 0.28570500016212463, "learning_rate": 3.177303323865548e-05, "loss": 0.177, "step": 26429 }, { "epoch": 0.4714086968929476, "grad_norm": 0.26425454020500183, "learning_rate": 3.1771534929002804e-05, "loss": 0.128, "step": 26430 }, { "epoch": 0.4714265330146613, "grad_norm": 0.23358196020126343, "learning_rate": 3.1770036593101314e-05, "loss": 0.1125, "step": 26431 }, { "epoch": 0.471444369136375, "grad_norm": 0.3212023079395294, "learning_rate": 3.1768538230956824e-05, "loss": 0.1415, "step": 26432 }, { "epoch": 0.4714622052580887, "grad_norm": 0.37274155020713806, "learning_rate": 3.1767039842575144e-05, "loss": 0.1486, "step": 26433 }, { "epoch": 0.4714800413798024, "grad_norm": 0.22081796824932098, "learning_rate": 3.176554142796209e-05, "loss": 0.122, "step": 26434 }, { "epoch": 0.47149787750151606, "grad_norm": 0.2711513042449951, "learning_rate": 3.1764042987123446e-05, "loss": 0.1492, "step": 26435 }, { "epoch": 0.47151571362322975, "grad_norm": 0.2879045307636261, "learning_rate": 3.176254452006504e-05, "loss": 0.1784, "step": 26436 }, { "epoch": 0.47153354974494344, "grad_norm": 0.23209641873836517, "learning_rate": 3.1761046026792684e-05, "loss": 0.1635, "step": 26437 }, { "epoch": 0.4715513858666572, "grad_norm": 0.3062979280948639, "learning_rate": 3.1759547507312164e-05, "loss": 0.2206, "step": 26438 }, { "epoch": 0.47156922198837087, "grad_norm": 0.16119593381881714, "learning_rate": 3.175804896162931e-05, "loss": 0.1217, "step": 26439 }, { "epoch": 0.47158705811008456, "grad_norm": 0.2624979317188263, "learning_rate": 3.175655038974992e-05, "loss": 0.1331, "step": 26440 }, { "epoch": 0.47160489423179824, "grad_norm": 1.424967885017395, "learning_rate": 3.175505179167982e-05, "loss": 0.2486, "step": 26441 }, { "epoch": 0.47162273035351193, "grad_norm": 0.2697075605392456, "learning_rate": 3.1753553167424795e-05, "loss": 0.1194, "step": 26442 }, { "epoch": 0.4716405664752256, "grad_norm": 0.19480131566524506, "learning_rate": 3.175205451699067e-05, "loss": 0.1218, "step": 26443 }, { "epoch": 0.4716584025969393, "grad_norm": 0.301111102104187, "learning_rate": 3.175055584038325e-05, "loss": 0.1177, "step": 26444 }, { "epoch": 0.471676238718653, "grad_norm": 0.24969439208507538, "learning_rate": 3.174905713760834e-05, "loss": 0.1505, "step": 26445 }, { "epoch": 0.4716940748403667, "grad_norm": 0.3286374509334564, "learning_rate": 3.174755840867175e-05, "loss": 0.2119, "step": 26446 }, { "epoch": 0.4717119109620804, "grad_norm": 0.2736777067184448, "learning_rate": 3.1746059653579294e-05, "loss": 0.203, "step": 26447 }, { "epoch": 0.4717297470837941, "grad_norm": 0.21302145719528198, "learning_rate": 3.1744560872336794e-05, "loss": 0.0926, "step": 26448 }, { "epoch": 0.4717475832055078, "grad_norm": 0.31996867060661316, "learning_rate": 3.174306206495003e-05, "loss": 0.2061, "step": 26449 }, { "epoch": 0.4717654193272215, "grad_norm": 0.28593701124191284, "learning_rate": 3.1741563231424844e-05, "loss": 0.1055, "step": 26450 }, { "epoch": 0.4717832554489352, "grad_norm": 0.2543777525424957, "learning_rate": 3.174006437176702e-05, "loss": 0.159, "step": 26451 }, { "epoch": 0.47180109157064887, "grad_norm": 0.3063381314277649, "learning_rate": 3.173856548598239e-05, "loss": 0.1834, "step": 26452 }, { "epoch": 0.47181892769236256, "grad_norm": 0.28480780124664307, "learning_rate": 3.173706657407673e-05, "loss": 0.1639, "step": 26453 }, { "epoch": 0.47183676381407624, "grad_norm": 0.2981882691383362, "learning_rate": 3.173556763605589e-05, "loss": 0.1694, "step": 26454 }, { "epoch": 0.47185459993579, "grad_norm": 0.19655829668045044, "learning_rate": 3.1734068671925654e-05, "loss": 0.1196, "step": 26455 }, { "epoch": 0.4718724360575037, "grad_norm": 0.26821285486221313, "learning_rate": 3.1732569681691855e-05, "loss": 0.1286, "step": 26456 }, { "epoch": 0.47189027217921736, "grad_norm": 0.23773056268692017, "learning_rate": 3.173107066536029e-05, "loss": 0.1606, "step": 26457 }, { "epoch": 0.47190810830093105, "grad_norm": 0.3065195083618164, "learning_rate": 3.1729571622936763e-05, "loss": 0.1725, "step": 26458 }, { "epoch": 0.47192594442264474, "grad_norm": 0.39167124032974243, "learning_rate": 3.1728072554427085e-05, "loss": 0.1955, "step": 26459 }, { "epoch": 0.4719437805443584, "grad_norm": 0.2635594606399536, "learning_rate": 3.1726573459837085e-05, "loss": 0.1336, "step": 26460 }, { "epoch": 0.4719616166660721, "grad_norm": 0.19853445887565613, "learning_rate": 3.172507433917256e-05, "loss": 0.1697, "step": 26461 }, { "epoch": 0.4719794527877858, "grad_norm": 0.24068090319633484, "learning_rate": 3.172357519243932e-05, "loss": 0.1265, "step": 26462 }, { "epoch": 0.47199728890949955, "grad_norm": 0.3256964385509491, "learning_rate": 3.172207601964319e-05, "loss": 0.1572, "step": 26463 }, { "epoch": 0.47201512503121323, "grad_norm": 0.2837182879447937, "learning_rate": 3.172057682078996e-05, "loss": 0.1719, "step": 26464 }, { "epoch": 0.4720329611529269, "grad_norm": 0.219710111618042, "learning_rate": 3.1719077595885466e-05, "loss": 0.1571, "step": 26465 }, { "epoch": 0.4720507972746406, "grad_norm": 0.31797677278518677, "learning_rate": 3.1717578344935495e-05, "loss": 0.166, "step": 26466 }, { "epoch": 0.4720686333963543, "grad_norm": 0.26471418142318726, "learning_rate": 3.171607906794587e-05, "loss": 0.153, "step": 26467 }, { "epoch": 0.472086469518068, "grad_norm": 0.2208862602710724, "learning_rate": 3.1714579764922406e-05, "loss": 0.1309, "step": 26468 }, { "epoch": 0.4721043056397817, "grad_norm": 0.33635571599006653, "learning_rate": 3.1713080435870915e-05, "loss": 0.1682, "step": 26469 }, { "epoch": 0.47212214176149536, "grad_norm": 0.2618339955806732, "learning_rate": 3.171158108079721e-05, "loss": 0.119, "step": 26470 }, { "epoch": 0.47213997788320905, "grad_norm": 0.26608848571777344, "learning_rate": 3.171008169970709e-05, "loss": 0.1462, "step": 26471 }, { "epoch": 0.4721578140049228, "grad_norm": 0.264061838388443, "learning_rate": 3.170858229260638e-05, "loss": 0.1114, "step": 26472 }, { "epoch": 0.4721756501266365, "grad_norm": 0.20839554071426392, "learning_rate": 3.1707082859500883e-05, "loss": 0.1441, "step": 26473 }, { "epoch": 0.47219348624835017, "grad_norm": 0.28644636273384094, "learning_rate": 3.170558340039642e-05, "loss": 0.1158, "step": 26474 }, { "epoch": 0.47221132237006386, "grad_norm": 0.22905872762203217, "learning_rate": 3.17040839152988e-05, "loss": 0.1214, "step": 26475 }, { "epoch": 0.47222915849177755, "grad_norm": 0.2504383325576782, "learning_rate": 3.170258440421384e-05, "loss": 0.1088, "step": 26476 }, { "epoch": 0.47224699461349123, "grad_norm": 0.27762576937675476, "learning_rate": 3.1701084867147334e-05, "loss": 0.1568, "step": 26477 }, { "epoch": 0.4722648307352049, "grad_norm": 0.2974173128604889, "learning_rate": 3.1699585304105126e-05, "loss": 0.1722, "step": 26478 }, { "epoch": 0.4722826668569186, "grad_norm": 0.1970190703868866, "learning_rate": 3.1698085715092996e-05, "loss": 0.1147, "step": 26479 }, { "epoch": 0.47230050297863235, "grad_norm": 0.31518593430519104, "learning_rate": 3.169658610011678e-05, "loss": 0.1229, "step": 26480 }, { "epoch": 0.47231833910034604, "grad_norm": 0.33436256647109985, "learning_rate": 3.1695086459182276e-05, "loss": 0.1727, "step": 26481 }, { "epoch": 0.47233617522205973, "grad_norm": 0.23115403950214386, "learning_rate": 3.169358679229531e-05, "loss": 0.1531, "step": 26482 }, { "epoch": 0.4723540113437734, "grad_norm": 0.24127766489982605, "learning_rate": 3.16920870994617e-05, "loss": 0.1335, "step": 26483 }, { "epoch": 0.4723718474654871, "grad_norm": 0.3136756122112274, "learning_rate": 3.169058738068723e-05, "loss": 0.1732, "step": 26484 }, { "epoch": 0.4723896835872008, "grad_norm": 0.293433278799057, "learning_rate": 3.168908763597775e-05, "loss": 0.1296, "step": 26485 }, { "epoch": 0.4724075197089145, "grad_norm": 0.32146790623664856, "learning_rate": 3.168758786533904e-05, "loss": 0.1448, "step": 26486 }, { "epoch": 0.47242535583062817, "grad_norm": 0.3155902028083801, "learning_rate": 3.168608806877694e-05, "loss": 0.1718, "step": 26487 }, { "epoch": 0.47244319195234186, "grad_norm": 0.3479894697666168, "learning_rate": 3.168458824629725e-05, "loss": 0.1304, "step": 26488 }, { "epoch": 0.4724610280740556, "grad_norm": 0.24790723621845245, "learning_rate": 3.168308839790579e-05, "loss": 0.1311, "step": 26489 }, { "epoch": 0.4724788641957693, "grad_norm": 0.2899687886238098, "learning_rate": 3.1681588523608364e-05, "loss": 0.1745, "step": 26490 }, { "epoch": 0.472496700317483, "grad_norm": 0.27624866366386414, "learning_rate": 3.1680088623410803e-05, "loss": 0.1714, "step": 26491 }, { "epoch": 0.47251453643919666, "grad_norm": 0.22364988923072815, "learning_rate": 3.1678588697318904e-05, "loss": 0.0864, "step": 26492 }, { "epoch": 0.47253237256091035, "grad_norm": 0.3170121908187866, "learning_rate": 3.167708874533849e-05, "loss": 0.1783, "step": 26493 }, { "epoch": 0.47255020868262404, "grad_norm": 0.35619670152664185, "learning_rate": 3.167558876747537e-05, "loss": 0.1507, "step": 26494 }, { "epoch": 0.47256804480433773, "grad_norm": 0.2822248637676239, "learning_rate": 3.167408876373537e-05, "loss": 0.1183, "step": 26495 }, { "epoch": 0.4725858809260514, "grad_norm": 0.3348664343357086, "learning_rate": 3.1672588734124295e-05, "loss": 0.1543, "step": 26496 }, { "epoch": 0.47260371704776516, "grad_norm": 0.38687536120414734, "learning_rate": 3.167108867864796e-05, "loss": 0.144, "step": 26497 }, { "epoch": 0.47262155316947885, "grad_norm": 0.28511950373649597, "learning_rate": 3.1669588597312184e-05, "loss": 0.1702, "step": 26498 }, { "epoch": 0.47263938929119254, "grad_norm": 0.17796646058559418, "learning_rate": 3.166808849012277e-05, "loss": 0.0965, "step": 26499 }, { "epoch": 0.4726572254129062, "grad_norm": 0.33366072177886963, "learning_rate": 3.1666588357085556e-05, "loss": 0.165, "step": 26500 }, { "epoch": 0.4726750615346199, "grad_norm": 0.23638556897640228, "learning_rate": 3.166508819820633e-05, "loss": 0.1254, "step": 26501 }, { "epoch": 0.4726928976563336, "grad_norm": 0.2803076207637787, "learning_rate": 3.1663588013490924e-05, "loss": 0.1253, "step": 26502 }, { "epoch": 0.4727107337780473, "grad_norm": 0.3209938704967499, "learning_rate": 3.166208780294515e-05, "loss": 0.2013, "step": 26503 }, { "epoch": 0.472728569899761, "grad_norm": 0.2803567945957184, "learning_rate": 3.166058756657483e-05, "loss": 0.1051, "step": 26504 }, { "epoch": 0.47274640602147466, "grad_norm": 0.18961292505264282, "learning_rate": 3.1659087304385756e-05, "loss": 0.1107, "step": 26505 }, { "epoch": 0.4727642421431884, "grad_norm": 0.2234373539686203, "learning_rate": 3.1657587016383776e-05, "loss": 0.123, "step": 26506 }, { "epoch": 0.4727820782649021, "grad_norm": 0.2961442470550537, "learning_rate": 3.165608670257468e-05, "loss": 0.1509, "step": 26507 }, { "epoch": 0.4727999143866158, "grad_norm": 0.20827454328536987, "learning_rate": 3.165458636296429e-05, "loss": 0.1165, "step": 26508 }, { "epoch": 0.47281775050832947, "grad_norm": 0.23899710178375244, "learning_rate": 3.165308599755843e-05, "loss": 0.1725, "step": 26509 }, { "epoch": 0.47283558663004316, "grad_norm": 0.2794351577758789, "learning_rate": 3.1651585606362914e-05, "loss": 0.1591, "step": 26510 }, { "epoch": 0.47285342275175685, "grad_norm": 0.217891663312912, "learning_rate": 3.165008518938355e-05, "loss": 0.1397, "step": 26511 }, { "epoch": 0.47287125887347053, "grad_norm": 0.31379568576812744, "learning_rate": 3.1648584746626167e-05, "loss": 0.1559, "step": 26512 }, { "epoch": 0.4728890949951842, "grad_norm": 0.20858775079250336, "learning_rate": 3.164708427809657e-05, "loss": 0.1375, "step": 26513 }, { "epoch": 0.47290693111689797, "grad_norm": 0.17133578658103943, "learning_rate": 3.164558378380057e-05, "loss": 0.1072, "step": 26514 }, { "epoch": 0.47292476723861165, "grad_norm": 0.27931275963783264, "learning_rate": 3.1644083263743994e-05, "loss": 0.1604, "step": 26515 }, { "epoch": 0.47294260336032534, "grad_norm": 0.28538671135902405, "learning_rate": 3.1642582717932656e-05, "loss": 0.2105, "step": 26516 }, { "epoch": 0.47296043948203903, "grad_norm": 0.23764726519584656, "learning_rate": 3.164108214637238e-05, "loss": 0.1574, "step": 26517 }, { "epoch": 0.4729782756037527, "grad_norm": 0.25231626629829407, "learning_rate": 3.163958154906898e-05, "loss": 0.1755, "step": 26518 }, { "epoch": 0.4729961117254664, "grad_norm": 0.29099997878074646, "learning_rate": 3.163808092602826e-05, "loss": 0.1405, "step": 26519 }, { "epoch": 0.4730139478471801, "grad_norm": 0.24946630001068115, "learning_rate": 3.163658027725604e-05, "loss": 0.1338, "step": 26520 }, { "epoch": 0.4730317839688938, "grad_norm": 0.3589918613433838, "learning_rate": 3.163507960275815e-05, "loss": 0.1053, "step": 26521 }, { "epoch": 0.4730496200906075, "grad_norm": 0.21702727675437927, "learning_rate": 3.16335789025404e-05, "loss": 0.1338, "step": 26522 }, { "epoch": 0.4730674562123212, "grad_norm": 0.23356683552265167, "learning_rate": 3.1632078176608604e-05, "loss": 0.1392, "step": 26523 }, { "epoch": 0.4730852923340349, "grad_norm": 0.22037291526794434, "learning_rate": 3.1630577424968585e-05, "loss": 0.0898, "step": 26524 }, { "epoch": 0.4731031284557486, "grad_norm": 0.34863466024398804, "learning_rate": 3.162907664762616e-05, "loss": 0.2478, "step": 26525 }, { "epoch": 0.4731209645774623, "grad_norm": 0.3291824162006378, "learning_rate": 3.162757584458714e-05, "loss": 0.1611, "step": 26526 }, { "epoch": 0.47313880069917597, "grad_norm": 0.24684789776802063, "learning_rate": 3.162607501585735e-05, "loss": 0.1359, "step": 26527 }, { "epoch": 0.47315663682088965, "grad_norm": 0.23944151401519775, "learning_rate": 3.1624574161442614e-05, "loss": 0.1329, "step": 26528 }, { "epoch": 0.47317447294260334, "grad_norm": 0.22520282864570618, "learning_rate": 3.162307328134872e-05, "loss": 0.1717, "step": 26529 }, { "epoch": 0.47319230906431703, "grad_norm": 0.29841431975364685, "learning_rate": 3.1621572375581516e-05, "loss": 0.1314, "step": 26530 }, { "epoch": 0.4732101451860308, "grad_norm": 0.3050844371318817, "learning_rate": 3.162007144414681e-05, "loss": 0.1835, "step": 26531 }, { "epoch": 0.47322798130774446, "grad_norm": 0.2689076364040375, "learning_rate": 3.161857048705042e-05, "loss": 0.18, "step": 26532 }, { "epoch": 0.47324581742945815, "grad_norm": 0.29678064584732056, "learning_rate": 3.161706950429817e-05, "loss": 0.1353, "step": 26533 }, { "epoch": 0.47326365355117184, "grad_norm": 0.30316436290740967, "learning_rate": 3.161556849589587e-05, "loss": 0.2003, "step": 26534 }, { "epoch": 0.4732814896728855, "grad_norm": 0.26531943678855896, "learning_rate": 3.161406746184934e-05, "loss": 0.1264, "step": 26535 }, { "epoch": 0.4732993257945992, "grad_norm": 0.33824265003204346, "learning_rate": 3.1612566402164396e-05, "loss": 0.1212, "step": 26536 }, { "epoch": 0.4733171619163129, "grad_norm": 0.2977240979671478, "learning_rate": 3.161106531684687e-05, "loss": 0.1368, "step": 26537 }, { "epoch": 0.4733349980380266, "grad_norm": 0.37242844700813293, "learning_rate": 3.160956420590257e-05, "loss": 0.1722, "step": 26538 }, { "epoch": 0.47335283415974033, "grad_norm": 0.21689192950725555, "learning_rate": 3.160806306933731e-05, "loss": 0.1159, "step": 26539 }, { "epoch": 0.473370670281454, "grad_norm": 0.25579482316970825, "learning_rate": 3.1606561907156914e-05, "loss": 0.1159, "step": 26540 }, { "epoch": 0.4733885064031677, "grad_norm": 0.1851266771554947, "learning_rate": 3.160506071936721e-05, "loss": 0.1217, "step": 26541 }, { "epoch": 0.4734063425248814, "grad_norm": 0.3675920367240906, "learning_rate": 3.1603559505974004e-05, "loss": 0.1947, "step": 26542 }, { "epoch": 0.4734241786465951, "grad_norm": 0.24690604209899902, "learning_rate": 3.1602058266983123e-05, "loss": 0.1729, "step": 26543 }, { "epoch": 0.47344201476830877, "grad_norm": 0.2714647352695465, "learning_rate": 3.160055700240038e-05, "loss": 0.1922, "step": 26544 }, { "epoch": 0.47345985089002246, "grad_norm": 0.2930722236633301, "learning_rate": 3.159905571223161e-05, "loss": 0.1539, "step": 26545 }, { "epoch": 0.47347768701173615, "grad_norm": 0.23162326216697693, "learning_rate": 3.159755439648261e-05, "loss": 0.1746, "step": 26546 }, { "epoch": 0.47349552313344984, "grad_norm": 0.29259929060935974, "learning_rate": 3.159605305515921e-05, "loss": 0.146, "step": 26547 }, { "epoch": 0.4735133592551636, "grad_norm": 0.268196165561676, "learning_rate": 3.1594551688267236e-05, "loss": 0.1618, "step": 26548 }, { "epoch": 0.47353119537687727, "grad_norm": 0.26541832089424133, "learning_rate": 3.15930502958125e-05, "loss": 0.2037, "step": 26549 }, { "epoch": 0.47354903149859096, "grad_norm": 0.38825973868370056, "learning_rate": 3.1591548877800825e-05, "loss": 0.1796, "step": 26550 }, { "epoch": 0.47356686762030464, "grad_norm": 0.24469010531902313, "learning_rate": 3.159004743423802e-05, "loss": 0.1685, "step": 26551 }, { "epoch": 0.47358470374201833, "grad_norm": 0.36846232414245605, "learning_rate": 3.158854596512993e-05, "loss": 0.1688, "step": 26552 }, { "epoch": 0.473602539863732, "grad_norm": 0.30020350217819214, "learning_rate": 3.158704447048235e-05, "loss": 0.1917, "step": 26553 }, { "epoch": 0.4736203759854457, "grad_norm": 0.23571336269378662, "learning_rate": 3.1585542950301116e-05, "loss": 0.1888, "step": 26554 }, { "epoch": 0.4736382121071594, "grad_norm": 0.34698784351348877, "learning_rate": 3.158404140459204e-05, "loss": 0.1488, "step": 26555 }, { "epoch": 0.47365604822887314, "grad_norm": 0.2997521460056305, "learning_rate": 3.1582539833360946e-05, "loss": 0.1726, "step": 26556 }, { "epoch": 0.4736738843505868, "grad_norm": 0.25175920128822327, "learning_rate": 3.158103823661366e-05, "loss": 0.1172, "step": 26557 }, { "epoch": 0.4736917204723005, "grad_norm": 0.2800270617008209, "learning_rate": 3.157953661435599e-05, "loss": 0.1829, "step": 26558 }, { "epoch": 0.4737095565940142, "grad_norm": 0.23580262064933777, "learning_rate": 3.1578034966593756e-05, "loss": 0.1397, "step": 26559 }, { "epoch": 0.4737273927157279, "grad_norm": 0.2947693467140198, "learning_rate": 3.15765332933328e-05, "loss": 0.1425, "step": 26560 }, { "epoch": 0.4737452288374416, "grad_norm": 0.21910950541496277, "learning_rate": 3.157503159457892e-05, "loss": 0.1117, "step": 26561 }, { "epoch": 0.47376306495915527, "grad_norm": 0.2973783314228058, "learning_rate": 3.157352987033796e-05, "loss": 0.1351, "step": 26562 }, { "epoch": 0.47378090108086895, "grad_norm": 0.23400671780109406, "learning_rate": 3.1572028120615715e-05, "loss": 0.2029, "step": 26563 }, { "epoch": 0.47379873720258264, "grad_norm": 0.24722535908222198, "learning_rate": 3.157052634541802e-05, "loss": 0.1164, "step": 26564 }, { "epoch": 0.4738165733242964, "grad_norm": 0.25167903304100037, "learning_rate": 3.15690245447507e-05, "loss": 0.1333, "step": 26565 }, { "epoch": 0.4738344094460101, "grad_norm": 0.2707163989543915, "learning_rate": 3.156752271861957e-05, "loss": 0.1164, "step": 26566 }, { "epoch": 0.47385224556772376, "grad_norm": 0.24458569288253784, "learning_rate": 3.156602086703046e-05, "loss": 0.1346, "step": 26567 }, { "epoch": 0.47387008168943745, "grad_norm": 0.24327470362186432, "learning_rate": 3.156451898998917e-05, "loss": 0.1602, "step": 26568 }, { "epoch": 0.47388791781115114, "grad_norm": 0.27301260828971863, "learning_rate": 3.1563017087501555e-05, "loss": 0.1544, "step": 26569 }, { "epoch": 0.4739057539328648, "grad_norm": 0.24105463922023773, "learning_rate": 3.1561515159573405e-05, "loss": 0.1448, "step": 26570 }, { "epoch": 0.4739235900545785, "grad_norm": 0.25364959239959717, "learning_rate": 3.156001320621056e-05, "loss": 0.1246, "step": 26571 }, { "epoch": 0.4739414261762922, "grad_norm": 0.3247831165790558, "learning_rate": 3.155851122741884e-05, "loss": 0.1936, "step": 26572 }, { "epoch": 0.47395926229800595, "grad_norm": 0.2784326374530792, "learning_rate": 3.1557009223204065e-05, "loss": 0.1411, "step": 26573 }, { "epoch": 0.47397709841971963, "grad_norm": 0.5130375623703003, "learning_rate": 3.155550719357205e-05, "loss": 0.2661, "step": 26574 }, { "epoch": 0.4739949345414333, "grad_norm": 0.21962743997573853, "learning_rate": 3.1554005138528644e-05, "loss": 0.1297, "step": 26575 }, { "epoch": 0.474012770663147, "grad_norm": 0.23967252671718597, "learning_rate": 3.155250305807964e-05, "loss": 0.1141, "step": 26576 }, { "epoch": 0.4740306067848607, "grad_norm": 0.2910381853580475, "learning_rate": 3.155100095223087e-05, "loss": 0.1637, "step": 26577 }, { "epoch": 0.4740484429065744, "grad_norm": 0.3584441840648651, "learning_rate": 3.1549498820988156e-05, "loss": 0.2059, "step": 26578 }, { "epoch": 0.4740662790282881, "grad_norm": 0.2497677505016327, "learning_rate": 3.1547996664357315e-05, "loss": 0.1633, "step": 26579 }, { "epoch": 0.47408411515000176, "grad_norm": 0.22797313332557678, "learning_rate": 3.154649448234419e-05, "loss": 0.1593, "step": 26580 }, { "epoch": 0.4741019512717155, "grad_norm": 0.2294081747531891, "learning_rate": 3.154499227495459e-05, "loss": 0.132, "step": 26581 }, { "epoch": 0.4741197873934292, "grad_norm": 0.23380063474178314, "learning_rate": 3.1543490042194336e-05, "loss": 0.1664, "step": 26582 }, { "epoch": 0.4741376235151429, "grad_norm": 0.26418888568878174, "learning_rate": 3.1541987784069256e-05, "loss": 0.1391, "step": 26583 }, { "epoch": 0.47415545963685657, "grad_norm": 0.2400120198726654, "learning_rate": 3.154048550058517e-05, "loss": 0.1343, "step": 26584 }, { "epoch": 0.47417329575857026, "grad_norm": 0.3527982532978058, "learning_rate": 3.15389831917479e-05, "loss": 0.196, "step": 26585 }, { "epoch": 0.47419113188028394, "grad_norm": 0.2657168507575989, "learning_rate": 3.153748085756328e-05, "loss": 0.1348, "step": 26586 }, { "epoch": 0.47420896800199763, "grad_norm": 0.25882789492607117, "learning_rate": 3.153597849803712e-05, "loss": 0.1737, "step": 26587 }, { "epoch": 0.4742268041237113, "grad_norm": 0.2524459958076477, "learning_rate": 3.153447611317526e-05, "loss": 0.1738, "step": 26588 }, { "epoch": 0.474244640245425, "grad_norm": 0.3235996961593628, "learning_rate": 3.1532973702983506e-05, "loss": 0.1502, "step": 26589 }, { "epoch": 0.47426247636713875, "grad_norm": 0.24306495487689972, "learning_rate": 3.153147126746769e-05, "loss": 0.1299, "step": 26590 }, { "epoch": 0.47428031248885244, "grad_norm": 0.22534674406051636, "learning_rate": 3.152996880663363e-05, "loss": 0.1479, "step": 26591 }, { "epoch": 0.47429814861056613, "grad_norm": 0.20552285015583038, "learning_rate": 3.152846632048716e-05, "loss": 0.0877, "step": 26592 }, { "epoch": 0.4743159847322798, "grad_norm": 0.24186621606349945, "learning_rate": 3.1526963809034104e-05, "loss": 0.217, "step": 26593 }, { "epoch": 0.4743338208539935, "grad_norm": 0.29868051409721375, "learning_rate": 3.1525461272280274e-05, "loss": 0.1163, "step": 26594 }, { "epoch": 0.4743516569757072, "grad_norm": 0.3221137523651123, "learning_rate": 3.152395871023152e-05, "loss": 0.0844, "step": 26595 }, { "epoch": 0.4743694930974209, "grad_norm": 0.3390181064605713, "learning_rate": 3.1522456122893627e-05, "loss": 0.0881, "step": 26596 }, { "epoch": 0.47438732921913457, "grad_norm": 0.29285022616386414, "learning_rate": 3.152095351027246e-05, "loss": 0.1522, "step": 26597 }, { "epoch": 0.4744051653408483, "grad_norm": 1.3998613357543945, "learning_rate": 3.151945087237381e-05, "loss": 0.1752, "step": 26598 }, { "epoch": 0.474423001462562, "grad_norm": 0.2050648182630539, "learning_rate": 3.151794820920352e-05, "loss": 0.1065, "step": 26599 }, { "epoch": 0.4744408375842757, "grad_norm": 0.23183481395244598, "learning_rate": 3.151644552076741e-05, "loss": 0.1425, "step": 26600 }, { "epoch": 0.4744586737059894, "grad_norm": 0.31935393810272217, "learning_rate": 3.1514942807071315e-05, "loss": 0.2051, "step": 26601 }, { "epoch": 0.47447650982770306, "grad_norm": 0.29622718691825867, "learning_rate": 3.1513440068121044e-05, "loss": 0.1657, "step": 26602 }, { "epoch": 0.47449434594941675, "grad_norm": 0.23380009829998016, "learning_rate": 3.1511937303922435e-05, "loss": 0.1634, "step": 26603 }, { "epoch": 0.47451218207113044, "grad_norm": 0.4176250100135803, "learning_rate": 3.15104345144813e-05, "loss": 0.1935, "step": 26604 }, { "epoch": 0.4745300181928441, "grad_norm": 0.24143612384796143, "learning_rate": 3.150893169980348e-05, "loss": 0.0811, "step": 26605 }, { "epoch": 0.4745478543145578, "grad_norm": 0.27351275086402893, "learning_rate": 3.150742885989478e-05, "loss": 0.1289, "step": 26606 }, { "epoch": 0.47456569043627156, "grad_norm": 0.2599388360977173, "learning_rate": 3.150592599476105e-05, "loss": 0.1583, "step": 26607 }, { "epoch": 0.47458352655798525, "grad_norm": 0.35098227858543396, "learning_rate": 3.150442310440811e-05, "loss": 0.1778, "step": 26608 }, { "epoch": 0.47460136267969893, "grad_norm": 0.2600686252117157, "learning_rate": 3.150292018884177e-05, "loss": 0.1183, "step": 26609 }, { "epoch": 0.4746191988014126, "grad_norm": 0.37164050340652466, "learning_rate": 3.1501417248067865e-05, "loss": 0.189, "step": 26610 }, { "epoch": 0.4746370349231263, "grad_norm": 0.26129043102264404, "learning_rate": 3.149991428209222e-05, "loss": 0.1572, "step": 26611 }, { "epoch": 0.47465487104484, "grad_norm": 0.2669675350189209, "learning_rate": 3.149841129092066e-05, "loss": 0.0944, "step": 26612 }, { "epoch": 0.4746727071665537, "grad_norm": 0.2776889204978943, "learning_rate": 3.1496908274559016e-05, "loss": 0.1492, "step": 26613 }, { "epoch": 0.4746905432882674, "grad_norm": 0.424248069524765, "learning_rate": 3.149540523301312e-05, "loss": 0.1835, "step": 26614 }, { "epoch": 0.4747083794099811, "grad_norm": 0.29181039333343506, "learning_rate": 3.149390216628879e-05, "loss": 0.1233, "step": 26615 }, { "epoch": 0.4747262155316948, "grad_norm": 0.20913194119930267, "learning_rate": 3.1492399074391835e-05, "loss": 0.1525, "step": 26616 }, { "epoch": 0.4747440516534085, "grad_norm": 0.2814866602420807, "learning_rate": 3.1490895957328116e-05, "loss": 0.1444, "step": 26617 }, { "epoch": 0.4747618877751222, "grad_norm": 0.22930529713630676, "learning_rate": 3.148939281510343e-05, "loss": 0.0575, "step": 26618 }, { "epoch": 0.47477972389683587, "grad_norm": 0.2853902280330658, "learning_rate": 3.1487889647723625e-05, "loss": 0.1244, "step": 26619 }, { "epoch": 0.47479756001854956, "grad_norm": 0.24450060725212097, "learning_rate": 3.1486386455194515e-05, "loss": 0.1376, "step": 26620 }, { "epoch": 0.47481539614026325, "grad_norm": 0.5030564069747925, "learning_rate": 3.148488323752193e-05, "loss": 0.1536, "step": 26621 }, { "epoch": 0.47483323226197693, "grad_norm": 0.21783877909183502, "learning_rate": 3.1483379994711705e-05, "loss": 0.1256, "step": 26622 }, { "epoch": 0.4748510683836907, "grad_norm": 0.20736421644687653, "learning_rate": 3.148187672676965e-05, "loss": 0.1254, "step": 26623 }, { "epoch": 0.47486890450540437, "grad_norm": 0.36123204231262207, "learning_rate": 3.14803734337016e-05, "loss": 0.1179, "step": 26624 }, { "epoch": 0.47488674062711805, "grad_norm": 0.36821067333221436, "learning_rate": 3.1478870115513406e-05, "loss": 0.112, "step": 26625 }, { "epoch": 0.47490457674883174, "grad_norm": 0.29902219772338867, "learning_rate": 3.1477366772210846e-05, "loss": 0.1734, "step": 26626 }, { "epoch": 0.47492241287054543, "grad_norm": 0.27828463912010193, "learning_rate": 3.147586340379979e-05, "loss": 0.1618, "step": 26627 }, { "epoch": 0.4749402489922591, "grad_norm": 0.31777480244636536, "learning_rate": 3.147436001028605e-05, "loss": 0.1793, "step": 26628 }, { "epoch": 0.4749580851139728, "grad_norm": 0.25407347083091736, "learning_rate": 3.147285659167545e-05, "loss": 0.1175, "step": 26629 }, { "epoch": 0.4749759212356865, "grad_norm": 0.2659320831298828, "learning_rate": 3.147135314797383e-05, "loss": 0.1676, "step": 26630 }, { "epoch": 0.4749937573574002, "grad_norm": 0.30362290143966675, "learning_rate": 3.1469849679187e-05, "loss": 0.1823, "step": 26631 }, { "epoch": 0.4750115934791139, "grad_norm": 0.23539355397224426, "learning_rate": 3.1468346185320804e-05, "loss": 0.1205, "step": 26632 }, { "epoch": 0.4750294296008276, "grad_norm": 0.2805580794811249, "learning_rate": 3.146684266638107e-05, "loss": 0.182, "step": 26633 }, { "epoch": 0.4750472657225413, "grad_norm": 0.23611187934875488, "learning_rate": 3.1465339122373606e-05, "loss": 0.1365, "step": 26634 }, { "epoch": 0.475065101844255, "grad_norm": 0.1864871382713318, "learning_rate": 3.146383555330426e-05, "loss": 0.1334, "step": 26635 }, { "epoch": 0.4750829379659687, "grad_norm": 0.21819111704826355, "learning_rate": 3.146233195917886e-05, "loss": 0.1549, "step": 26636 }, { "epoch": 0.47510077408768236, "grad_norm": 0.31759074330329895, "learning_rate": 3.1460828340003225e-05, "loss": 0.1889, "step": 26637 }, { "epoch": 0.47511861020939605, "grad_norm": 0.30069538950920105, "learning_rate": 3.145932469578319e-05, "loss": 0.1707, "step": 26638 }, { "epoch": 0.47513644633110974, "grad_norm": 0.2910155653953552, "learning_rate": 3.145782102652458e-05, "loss": 0.1578, "step": 26639 }, { "epoch": 0.4751542824528235, "grad_norm": 0.2799510061740875, "learning_rate": 3.145631733223323e-05, "loss": 0.1176, "step": 26640 }, { "epoch": 0.4751721185745372, "grad_norm": 0.21098117530345917, "learning_rate": 3.1454813612914956e-05, "loss": 0.1382, "step": 26641 }, { "epoch": 0.47518995469625086, "grad_norm": 0.2942548394203186, "learning_rate": 3.14533098685756e-05, "loss": 0.1069, "step": 26642 }, { "epoch": 0.47520779081796455, "grad_norm": 0.2770611345767975, "learning_rate": 3.1451806099220984e-05, "loss": 0.1661, "step": 26643 }, { "epoch": 0.47522562693967824, "grad_norm": 0.18634948134422302, "learning_rate": 3.145030230485694e-05, "loss": 0.1143, "step": 26644 }, { "epoch": 0.4752434630613919, "grad_norm": 0.2504843473434448, "learning_rate": 3.14487984854893e-05, "loss": 0.118, "step": 26645 }, { "epoch": 0.4752612991831056, "grad_norm": 0.2074119597673416, "learning_rate": 3.144729464112388e-05, "loss": 0.0969, "step": 26646 }, { "epoch": 0.4752791353048193, "grad_norm": 0.2745310664176941, "learning_rate": 3.1445790771766524e-05, "loss": 0.1811, "step": 26647 }, { "epoch": 0.475296971426533, "grad_norm": 0.3306925296783447, "learning_rate": 3.1444286877423054e-05, "loss": 0.173, "step": 26648 }, { "epoch": 0.47531480754824673, "grad_norm": 0.23517920076847076, "learning_rate": 3.1442782958099316e-05, "loss": 0.1278, "step": 26649 }, { "epoch": 0.4753326436699604, "grad_norm": 0.25629228353500366, "learning_rate": 3.144127901380111e-05, "loss": 0.0984, "step": 26650 }, { "epoch": 0.4753504797916741, "grad_norm": 0.2576800286769867, "learning_rate": 3.143977504453429e-05, "loss": 0.1631, "step": 26651 }, { "epoch": 0.4753683159133878, "grad_norm": 0.22790028154850006, "learning_rate": 3.143827105030467e-05, "loss": 0.1534, "step": 26652 }, { "epoch": 0.4753861520351015, "grad_norm": 0.34455347061157227, "learning_rate": 3.14367670311181e-05, "loss": 0.1249, "step": 26653 }, { "epoch": 0.47540398815681517, "grad_norm": 0.3306369185447693, "learning_rate": 3.143526298698039e-05, "loss": 0.1035, "step": 26654 }, { "epoch": 0.47542182427852886, "grad_norm": 0.519098162651062, "learning_rate": 3.143375891789738e-05, "loss": 0.1819, "step": 26655 }, { "epoch": 0.47543966040024255, "grad_norm": 0.22022035717964172, "learning_rate": 3.14322548238749e-05, "loss": 0.1629, "step": 26656 }, { "epoch": 0.4754574965219563, "grad_norm": 0.21873527765274048, "learning_rate": 3.143075070491877e-05, "loss": 0.1546, "step": 26657 }, { "epoch": 0.47547533264367, "grad_norm": 0.24396440386772156, "learning_rate": 3.142924656103484e-05, "loss": 0.1383, "step": 26658 }, { "epoch": 0.47549316876538367, "grad_norm": 0.2099212259054184, "learning_rate": 3.1427742392228925e-05, "loss": 0.1401, "step": 26659 }, { "epoch": 0.47551100488709735, "grad_norm": 0.2894495725631714, "learning_rate": 3.1426238198506865e-05, "loss": 0.1842, "step": 26660 }, { "epoch": 0.47552884100881104, "grad_norm": 0.24795302748680115, "learning_rate": 3.1424733979874474e-05, "loss": 0.1228, "step": 26661 }, { "epoch": 0.47554667713052473, "grad_norm": 0.2640261650085449, "learning_rate": 3.1423229736337607e-05, "loss": 0.1527, "step": 26662 }, { "epoch": 0.4755645132522384, "grad_norm": 0.1835392266511917, "learning_rate": 3.142172546790208e-05, "loss": 0.0815, "step": 26663 }, { "epoch": 0.4755823493739521, "grad_norm": 0.22116541862487793, "learning_rate": 3.142022117457372e-05, "loss": 0.1137, "step": 26664 }, { "epoch": 0.4756001854956658, "grad_norm": 0.37021613121032715, "learning_rate": 3.141871685635837e-05, "loss": 0.193, "step": 26665 }, { "epoch": 0.47561802161737954, "grad_norm": 0.2605608105659485, "learning_rate": 3.1417212513261865e-05, "loss": 0.1421, "step": 26666 }, { "epoch": 0.4756358577390932, "grad_norm": 0.32819122076034546, "learning_rate": 3.141570814529001e-05, "loss": 0.1668, "step": 26667 }, { "epoch": 0.4756536938608069, "grad_norm": 0.25336000323295593, "learning_rate": 3.141420375244867e-05, "loss": 0.1502, "step": 26668 }, { "epoch": 0.4756715299825206, "grad_norm": 0.29337984323501587, "learning_rate": 3.1412699334743655e-05, "loss": 0.1388, "step": 26669 }, { "epoch": 0.4756893661042343, "grad_norm": 0.21162143349647522, "learning_rate": 3.14111948921808e-05, "loss": 0.1178, "step": 26670 }, { "epoch": 0.475707202225948, "grad_norm": 0.28766298294067383, "learning_rate": 3.140969042476595e-05, "loss": 0.1829, "step": 26671 }, { "epoch": 0.47572503834766167, "grad_norm": 0.23621641099452972, "learning_rate": 3.1408185932504915e-05, "loss": 0.1327, "step": 26672 }, { "epoch": 0.47574287446937535, "grad_norm": 0.2442992776632309, "learning_rate": 3.1406681415403545e-05, "loss": 0.1749, "step": 26673 }, { "epoch": 0.4757607105910891, "grad_norm": 0.25909215211868286, "learning_rate": 3.1405176873467656e-05, "loss": 0.1801, "step": 26674 }, { "epoch": 0.4757785467128028, "grad_norm": 0.2717550992965698, "learning_rate": 3.14036723067031e-05, "loss": 0.2002, "step": 26675 }, { "epoch": 0.4757963828345165, "grad_norm": 0.1897938847541809, "learning_rate": 3.140216771511569e-05, "loss": 0.1229, "step": 26676 }, { "epoch": 0.47581421895623016, "grad_norm": 0.32422974705696106, "learning_rate": 3.140066309871127e-05, "loss": 0.1357, "step": 26677 }, { "epoch": 0.47583205507794385, "grad_norm": 0.28003808856010437, "learning_rate": 3.139915845749566e-05, "loss": 0.1385, "step": 26678 }, { "epoch": 0.47584989119965754, "grad_norm": 0.27112826704978943, "learning_rate": 3.139765379147472e-05, "loss": 0.2031, "step": 26679 }, { "epoch": 0.4758677273213712, "grad_norm": 0.24170328676700592, "learning_rate": 3.1396149100654257e-05, "loss": 0.1545, "step": 26680 }, { "epoch": 0.4758855634430849, "grad_norm": 0.3290562331676483, "learning_rate": 3.13946443850401e-05, "loss": 0.1675, "step": 26681 }, { "epoch": 0.47590339956479866, "grad_norm": 0.3371567130088806, "learning_rate": 3.13931396446381e-05, "loss": 0.1702, "step": 26682 }, { "epoch": 0.47592123568651234, "grad_norm": 0.279109925031662, "learning_rate": 3.139163487945409e-05, "loss": 0.1471, "step": 26683 }, { "epoch": 0.47593907180822603, "grad_norm": 0.24332238733768463, "learning_rate": 3.139013008949389e-05, "loss": 0.16, "step": 26684 }, { "epoch": 0.4759569079299397, "grad_norm": 0.2929777503013611, "learning_rate": 3.138862527476334e-05, "loss": 0.147, "step": 26685 }, { "epoch": 0.4759747440516534, "grad_norm": 0.2846218943595886, "learning_rate": 3.138712043526827e-05, "loss": 0.1428, "step": 26686 }, { "epoch": 0.4759925801733671, "grad_norm": 0.3117104768753052, "learning_rate": 3.1385615571014516e-05, "loss": 0.2169, "step": 26687 }, { "epoch": 0.4760104162950808, "grad_norm": 0.29619720578193665, "learning_rate": 3.1384110682007914e-05, "loss": 0.1469, "step": 26688 }, { "epoch": 0.4760282524167945, "grad_norm": 0.2650678753852844, "learning_rate": 3.138260576825428e-05, "loss": 0.1139, "step": 26689 }, { "epoch": 0.47604608853850816, "grad_norm": 0.28708723187446594, "learning_rate": 3.1381100829759476e-05, "loss": 0.1748, "step": 26690 }, { "epoch": 0.4760639246602219, "grad_norm": 0.3410675823688507, "learning_rate": 3.137959586652931e-05, "loss": 0.0811, "step": 26691 }, { "epoch": 0.4760817607819356, "grad_norm": 0.2603628933429718, "learning_rate": 3.137809087856964e-05, "loss": 0.1051, "step": 26692 }, { "epoch": 0.4760995969036493, "grad_norm": 0.31957879662513733, "learning_rate": 3.137658586588628e-05, "loss": 0.1106, "step": 26693 }, { "epoch": 0.47611743302536297, "grad_norm": 0.5554287433624268, "learning_rate": 3.137508082848507e-05, "loss": 0.1919, "step": 26694 }, { "epoch": 0.47613526914707666, "grad_norm": 0.2722870707511902, "learning_rate": 3.137357576637184e-05, "loss": 0.1869, "step": 26695 }, { "epoch": 0.47615310526879034, "grad_norm": 0.2875826358795166, "learning_rate": 3.137207067955243e-05, "loss": 0.1314, "step": 26696 }, { "epoch": 0.47617094139050403, "grad_norm": 0.22378547489643097, "learning_rate": 3.1370565568032687e-05, "loss": 0.1353, "step": 26697 }, { "epoch": 0.4761887775122177, "grad_norm": 0.36931565403938293, "learning_rate": 3.136906043181842e-05, "loss": 0.165, "step": 26698 }, { "epoch": 0.47620661363393146, "grad_norm": 0.24093963205814362, "learning_rate": 3.136755527091548e-05, "loss": 0.1887, "step": 26699 }, { "epoch": 0.47622444975564515, "grad_norm": 0.2697581946849823, "learning_rate": 3.1366050085329694e-05, "loss": 0.1626, "step": 26700 }, { "epoch": 0.47624228587735884, "grad_norm": 0.24962428212165833, "learning_rate": 3.136454487506691e-05, "loss": 0.183, "step": 26701 }, { "epoch": 0.4762601219990725, "grad_norm": 0.3362979590892792, "learning_rate": 3.136303964013293e-05, "loss": 0.1822, "step": 26702 }, { "epoch": 0.4762779581207862, "grad_norm": 0.2727450430393219, "learning_rate": 3.136153438053362e-05, "loss": 0.1804, "step": 26703 }, { "epoch": 0.4762957942424999, "grad_norm": 0.21144120395183563, "learning_rate": 3.1360029096274806e-05, "loss": 0.1251, "step": 26704 }, { "epoch": 0.4763136303642136, "grad_norm": 0.2462942749261856, "learning_rate": 3.1358523787362327e-05, "loss": 0.1693, "step": 26705 }, { "epoch": 0.4763314664859273, "grad_norm": 0.3199939429759979, "learning_rate": 3.135701845380201e-05, "loss": 0.1641, "step": 26706 }, { "epoch": 0.47634930260764097, "grad_norm": 0.34898555278778076, "learning_rate": 3.13555130955997e-05, "loss": 0.1611, "step": 26707 }, { "epoch": 0.4763671387293547, "grad_norm": 0.2827635407447815, "learning_rate": 3.135400771276122e-05, "loss": 0.1526, "step": 26708 }, { "epoch": 0.4763849748510684, "grad_norm": 0.2632668912410736, "learning_rate": 3.1352502305292406e-05, "loss": 0.1594, "step": 26709 }, { "epoch": 0.4764028109727821, "grad_norm": 0.2850704789161682, "learning_rate": 3.1350996873199104e-05, "loss": 0.1348, "step": 26710 }, { "epoch": 0.4764206470944958, "grad_norm": 0.3156528174877167, "learning_rate": 3.134949141648715e-05, "loss": 0.211, "step": 26711 }, { "epoch": 0.47643848321620946, "grad_norm": 0.3261871039867401, "learning_rate": 3.134798593516237e-05, "loss": 0.1768, "step": 26712 }, { "epoch": 0.47645631933792315, "grad_norm": 0.313679575920105, "learning_rate": 3.13464804292306e-05, "loss": 0.1972, "step": 26713 }, { "epoch": 0.47647415545963684, "grad_norm": 0.3305988907814026, "learning_rate": 3.134497489869769e-05, "loss": 0.2131, "step": 26714 }, { "epoch": 0.4764919915813505, "grad_norm": 0.2083570957183838, "learning_rate": 3.1343469343569456e-05, "loss": 0.1095, "step": 26715 }, { "epoch": 0.47650982770306427, "grad_norm": 0.2725713551044464, "learning_rate": 3.134196376385175e-05, "loss": 0.1414, "step": 26716 }, { "epoch": 0.47652766382477796, "grad_norm": 0.27093926072120667, "learning_rate": 3.134045815955039e-05, "loss": 0.1489, "step": 26717 }, { "epoch": 0.47654549994649165, "grad_norm": 0.21418976783752441, "learning_rate": 3.133895253067124e-05, "loss": 0.1284, "step": 26718 }, { "epoch": 0.47656333606820533, "grad_norm": 0.313117653131485, "learning_rate": 3.133744687722011e-05, "loss": 0.1493, "step": 26719 }, { "epoch": 0.476581172189919, "grad_norm": 0.17176547646522522, "learning_rate": 3.1335941199202855e-05, "loss": 0.1314, "step": 26720 }, { "epoch": 0.4765990083116327, "grad_norm": 0.3212798833847046, "learning_rate": 3.13344354966253e-05, "loss": 0.1568, "step": 26721 }, { "epoch": 0.4766168444333464, "grad_norm": 0.2611295282840729, "learning_rate": 3.133292976949328e-05, "loss": 0.115, "step": 26722 }, { "epoch": 0.4766346805550601, "grad_norm": 0.2615753412246704, "learning_rate": 3.1331424017812644e-05, "loss": 0.1556, "step": 26723 }, { "epoch": 0.47665251667677383, "grad_norm": 0.33312904834747314, "learning_rate": 3.1329918241589215e-05, "loss": 0.151, "step": 26724 }, { "epoch": 0.4766703527984875, "grad_norm": 0.2551664710044861, "learning_rate": 3.132841244082885e-05, "loss": 0.1592, "step": 26725 }, { "epoch": 0.4766881889202012, "grad_norm": 0.27993300557136536, "learning_rate": 3.1326906615537355e-05, "loss": 0.1694, "step": 26726 }, { "epoch": 0.4767060250419149, "grad_norm": 0.23331375420093536, "learning_rate": 3.13254007657206e-05, "loss": 0.1292, "step": 26727 }, { "epoch": 0.4767238611636286, "grad_norm": 0.20556502044200897, "learning_rate": 3.1323894891384396e-05, "loss": 0.1375, "step": 26728 }, { "epoch": 0.47674169728534227, "grad_norm": 0.30760133266448975, "learning_rate": 3.1322388992534604e-05, "loss": 0.1186, "step": 26729 }, { "epoch": 0.47675953340705596, "grad_norm": 0.18481747806072235, "learning_rate": 3.132088306917703e-05, "loss": 0.1028, "step": 26730 }, { "epoch": 0.47677736952876965, "grad_norm": 0.3166261911392212, "learning_rate": 3.131937712131754e-05, "loss": 0.1684, "step": 26731 }, { "epoch": 0.47679520565048333, "grad_norm": 0.3804336190223694, "learning_rate": 3.131787114896196e-05, "loss": 0.1433, "step": 26732 }, { "epoch": 0.4768130417721971, "grad_norm": 0.3028728663921356, "learning_rate": 3.1316365152116135e-05, "loss": 0.1548, "step": 26733 }, { "epoch": 0.47683087789391077, "grad_norm": 0.3954489529132843, "learning_rate": 3.1314859130785894e-05, "loss": 0.1965, "step": 26734 }, { "epoch": 0.47684871401562445, "grad_norm": 0.19537022709846497, "learning_rate": 3.131335308497708e-05, "loss": 0.1208, "step": 26735 }, { "epoch": 0.47686655013733814, "grad_norm": 0.25616368651390076, "learning_rate": 3.131184701469553e-05, "loss": 0.163, "step": 26736 }, { "epoch": 0.47688438625905183, "grad_norm": 0.2302490770816803, "learning_rate": 3.131034091994707e-05, "loss": 0.1329, "step": 26737 }, { "epoch": 0.4769022223807655, "grad_norm": 0.28550970554351807, "learning_rate": 3.130883480073755e-05, "loss": 0.1659, "step": 26738 }, { "epoch": 0.4769200585024792, "grad_norm": 0.29962217807769775, "learning_rate": 3.130732865707281e-05, "loss": 0.132, "step": 26739 }, { "epoch": 0.4769378946241929, "grad_norm": 0.24919135868549347, "learning_rate": 3.1305822488958694e-05, "loss": 0.1409, "step": 26740 }, { "epoch": 0.47695573074590664, "grad_norm": 0.2445518672466278, "learning_rate": 3.130431629640103e-05, "loss": 0.1575, "step": 26741 }, { "epoch": 0.4769735668676203, "grad_norm": 0.31576380133628845, "learning_rate": 3.1302810079405654e-05, "loss": 0.1242, "step": 26742 }, { "epoch": 0.476991402989334, "grad_norm": 0.36300721764564514, "learning_rate": 3.1301303837978415e-05, "loss": 0.1468, "step": 26743 }, { "epoch": 0.4770092391110477, "grad_norm": 0.27243566513061523, "learning_rate": 3.129979757212513e-05, "loss": 0.168, "step": 26744 }, { "epoch": 0.4770270752327614, "grad_norm": 0.29267990589141846, "learning_rate": 3.1298291281851664e-05, "loss": 0.1304, "step": 26745 }, { "epoch": 0.4770449113544751, "grad_norm": 0.21837356686592102, "learning_rate": 3.1296784967163854e-05, "loss": 0.1917, "step": 26746 }, { "epoch": 0.47706274747618876, "grad_norm": 0.22631166875362396, "learning_rate": 3.129527862806753e-05, "loss": 0.0898, "step": 26747 }, { "epoch": 0.47708058359790245, "grad_norm": 0.20745904743671417, "learning_rate": 3.1293772264568524e-05, "loss": 0.0834, "step": 26748 }, { "epoch": 0.47709841971961614, "grad_norm": 0.2255771905183792, "learning_rate": 3.1292265876672686e-05, "loss": 0.1478, "step": 26749 }, { "epoch": 0.4771162558413299, "grad_norm": 0.28668999671936035, "learning_rate": 3.1290759464385844e-05, "loss": 0.1301, "step": 26750 }, { "epoch": 0.47713409196304357, "grad_norm": 0.24045246839523315, "learning_rate": 3.128925302771385e-05, "loss": 0.1882, "step": 26751 }, { "epoch": 0.47715192808475726, "grad_norm": 0.3463227450847626, "learning_rate": 3.128774656666254e-05, "loss": 0.156, "step": 26752 }, { "epoch": 0.47716976420647095, "grad_norm": 0.28448840975761414, "learning_rate": 3.128624008123775e-05, "loss": 0.1414, "step": 26753 }, { "epoch": 0.47718760032818464, "grad_norm": 0.283145546913147, "learning_rate": 3.128473357144533e-05, "loss": 0.1245, "step": 26754 }, { "epoch": 0.4772054364498983, "grad_norm": 0.24987533688545227, "learning_rate": 3.128322703729111e-05, "loss": 0.1467, "step": 26755 }, { "epoch": 0.477223272571612, "grad_norm": 0.3295406699180603, "learning_rate": 3.128172047878093e-05, "loss": 0.1493, "step": 26756 }, { "epoch": 0.4772411086933257, "grad_norm": 0.3394148647785187, "learning_rate": 3.1280213895920634e-05, "loss": 0.1207, "step": 26757 }, { "epoch": 0.47725894481503944, "grad_norm": 0.33238524198532104, "learning_rate": 3.127870728871606e-05, "loss": 0.1307, "step": 26758 }, { "epoch": 0.47727678093675313, "grad_norm": 0.3225302994251251, "learning_rate": 3.127720065717304e-05, "loss": 0.1835, "step": 26759 }, { "epoch": 0.4772946170584668, "grad_norm": 0.21729303896427155, "learning_rate": 3.127569400129743e-05, "loss": 0.1063, "step": 26760 }, { "epoch": 0.4773124531801805, "grad_norm": 0.3240685760974884, "learning_rate": 3.1274187321095056e-05, "loss": 0.2023, "step": 26761 }, { "epoch": 0.4773302893018942, "grad_norm": 0.3455943763256073, "learning_rate": 3.127268061657177e-05, "loss": 0.1667, "step": 26762 }, { "epoch": 0.4773481254236079, "grad_norm": 0.21467365324497223, "learning_rate": 3.127117388773341e-05, "loss": 0.1464, "step": 26763 }, { "epoch": 0.47736596154532157, "grad_norm": 0.27158910036087036, "learning_rate": 3.126966713458581e-05, "loss": 0.2106, "step": 26764 }, { "epoch": 0.47738379766703526, "grad_norm": 0.22854214906692505, "learning_rate": 3.126816035713481e-05, "loss": 0.1252, "step": 26765 }, { "epoch": 0.47740163378874895, "grad_norm": 0.28195810317993164, "learning_rate": 3.126665355538626e-05, "loss": 0.1365, "step": 26766 }, { "epoch": 0.4774194699104627, "grad_norm": 0.2766095697879791, "learning_rate": 3.126514672934599e-05, "loss": 0.1624, "step": 26767 }, { "epoch": 0.4774373060321764, "grad_norm": 0.2377602607011795, "learning_rate": 3.126363987901986e-05, "loss": 0.1708, "step": 26768 }, { "epoch": 0.47745514215389007, "grad_norm": 0.23954246938228607, "learning_rate": 3.1262133004413696e-05, "loss": 0.1572, "step": 26769 }, { "epoch": 0.47747297827560375, "grad_norm": 0.22584986686706543, "learning_rate": 3.126062610553334e-05, "loss": 0.1169, "step": 26770 }, { "epoch": 0.47749081439731744, "grad_norm": 0.23570430278778076, "learning_rate": 3.125911918238463e-05, "loss": 0.0877, "step": 26771 }, { "epoch": 0.47750865051903113, "grad_norm": 0.2953796088695526, "learning_rate": 3.125761223497341e-05, "loss": 0.0967, "step": 26772 }, { "epoch": 0.4775264866407448, "grad_norm": 0.2231953889131546, "learning_rate": 3.125610526330553e-05, "loss": 0.1599, "step": 26773 }, { "epoch": 0.4775443227624585, "grad_norm": 0.33506304025650024, "learning_rate": 3.1254598267386826e-05, "loss": 0.1411, "step": 26774 }, { "epoch": 0.47756215888417225, "grad_norm": 0.2571059465408325, "learning_rate": 3.125309124722314e-05, "loss": 0.11, "step": 26775 }, { "epoch": 0.47757999500588594, "grad_norm": 0.2351292371749878, "learning_rate": 3.125158420282031e-05, "loss": 0.139, "step": 26776 }, { "epoch": 0.4775978311275996, "grad_norm": 0.2855851352214813, "learning_rate": 3.125007713418418e-05, "loss": 0.1339, "step": 26777 }, { "epoch": 0.4776156672493133, "grad_norm": 0.26687389612197876, "learning_rate": 3.1248570041320594e-05, "loss": 0.1525, "step": 26778 }, { "epoch": 0.477633503371027, "grad_norm": 0.25127851963043213, "learning_rate": 3.124706292423539e-05, "loss": 0.1743, "step": 26779 }, { "epoch": 0.4776513394927407, "grad_norm": 0.23393309116363525, "learning_rate": 3.124555578293441e-05, "loss": 0.1118, "step": 26780 }, { "epoch": 0.4776691756144544, "grad_norm": 0.25191599130630493, "learning_rate": 3.1244048617423504e-05, "loss": 0.1743, "step": 26781 }, { "epoch": 0.47768701173616807, "grad_norm": 0.3310117721557617, "learning_rate": 3.1242541427708504e-05, "loss": 0.1609, "step": 26782 }, { "epoch": 0.4777048478578818, "grad_norm": 0.5342959761619568, "learning_rate": 3.1241034213795264e-05, "loss": 0.1776, "step": 26783 }, { "epoch": 0.4777226839795955, "grad_norm": 0.2696291506290436, "learning_rate": 3.123952697568962e-05, "loss": 0.1562, "step": 26784 }, { "epoch": 0.4777405201013092, "grad_norm": 0.3045158386230469, "learning_rate": 3.1238019713397406e-05, "loss": 0.1707, "step": 26785 }, { "epoch": 0.4777583562230229, "grad_norm": 0.22710925340652466, "learning_rate": 3.123651242692448e-05, "loss": 0.1729, "step": 26786 }, { "epoch": 0.47777619234473656, "grad_norm": 0.3497841954231262, "learning_rate": 3.123500511627667e-05, "loss": 0.1491, "step": 26787 }, { "epoch": 0.47779402846645025, "grad_norm": 0.3096001446247101, "learning_rate": 3.123349778145983e-05, "loss": 0.1477, "step": 26788 }, { "epoch": 0.47781186458816394, "grad_norm": 0.20097851753234863, "learning_rate": 3.12319904224798e-05, "loss": 0.1251, "step": 26789 }, { "epoch": 0.4778297007098776, "grad_norm": 0.2788364887237549, "learning_rate": 3.123048303934243e-05, "loss": 0.1519, "step": 26790 }, { "epoch": 0.4778475368315913, "grad_norm": 0.21522000432014465, "learning_rate": 3.1228975632053546e-05, "loss": 0.1329, "step": 26791 }, { "epoch": 0.47786537295330506, "grad_norm": 0.2439911812543869, "learning_rate": 3.1227468200619006e-05, "loss": 0.1062, "step": 26792 }, { "epoch": 0.47788320907501874, "grad_norm": 0.3866584897041321, "learning_rate": 3.1225960745044644e-05, "loss": 0.1775, "step": 26793 }, { "epoch": 0.47790104519673243, "grad_norm": 0.27161309123039246, "learning_rate": 3.1224453265336314e-05, "loss": 0.1537, "step": 26794 }, { "epoch": 0.4779188813184461, "grad_norm": 0.2869950532913208, "learning_rate": 3.1222945761499843e-05, "loss": 0.1028, "step": 26795 }, { "epoch": 0.4779367174401598, "grad_norm": 0.2506483197212219, "learning_rate": 3.12214382335411e-05, "loss": 0.1462, "step": 26796 }, { "epoch": 0.4779545535618735, "grad_norm": 0.3127689063549042, "learning_rate": 3.12199306814659e-05, "loss": 0.1724, "step": 26797 }, { "epoch": 0.4779723896835872, "grad_norm": 0.22565217316150665, "learning_rate": 3.121842310528011e-05, "loss": 0.1195, "step": 26798 }, { "epoch": 0.4779902258053009, "grad_norm": 0.19420625269412994, "learning_rate": 3.121691550498956e-05, "loss": 0.1326, "step": 26799 }, { "epoch": 0.4780080619270146, "grad_norm": 0.2743839621543884, "learning_rate": 3.1215407880600096e-05, "loss": 0.1545, "step": 26800 }, { "epoch": 0.4780258980487283, "grad_norm": 0.2599467635154724, "learning_rate": 3.121390023211757e-05, "loss": 0.1313, "step": 26801 }, { "epoch": 0.478043734170442, "grad_norm": 0.22616392374038696, "learning_rate": 3.1212392559547814e-05, "loss": 0.1225, "step": 26802 }, { "epoch": 0.4780615702921557, "grad_norm": 0.23243194818496704, "learning_rate": 3.121088486289669e-05, "loss": 0.1832, "step": 26803 }, { "epoch": 0.47807940641386937, "grad_norm": 0.23705218732357025, "learning_rate": 3.120937714217002e-05, "loss": 0.115, "step": 26804 }, { "epoch": 0.47809724253558306, "grad_norm": 0.25697460770606995, "learning_rate": 3.120786939737367e-05, "loss": 0.1286, "step": 26805 }, { "epoch": 0.47811507865729674, "grad_norm": 0.178166463971138, "learning_rate": 3.1206361628513456e-05, "loss": 0.1258, "step": 26806 }, { "epoch": 0.47813291477901043, "grad_norm": 0.3773612976074219, "learning_rate": 3.120485383559525e-05, "loss": 0.1303, "step": 26807 }, { "epoch": 0.4781507509007241, "grad_norm": 0.3702050745487213, "learning_rate": 3.120334601862489e-05, "loss": 0.1412, "step": 26808 }, { "epoch": 0.47816858702243786, "grad_norm": 0.2967703938484192, "learning_rate": 3.120183817760822e-05, "loss": 0.1549, "step": 26809 }, { "epoch": 0.47818642314415155, "grad_norm": 0.3417518436908722, "learning_rate": 3.120033031255108e-05, "loss": 0.1623, "step": 26810 }, { "epoch": 0.47820425926586524, "grad_norm": 0.2530301809310913, "learning_rate": 3.119882242345932e-05, "loss": 0.1336, "step": 26811 }, { "epoch": 0.4782220953875789, "grad_norm": 0.2536642253398895, "learning_rate": 3.1197314510338786e-05, "loss": 0.1089, "step": 26812 }, { "epoch": 0.4782399315092926, "grad_norm": 0.3018265664577484, "learning_rate": 3.1195806573195314e-05, "loss": 0.1206, "step": 26813 }, { "epoch": 0.4782577676310063, "grad_norm": 0.2743295729160309, "learning_rate": 3.119429861203476e-05, "loss": 0.1477, "step": 26814 }, { "epoch": 0.47827560375272, "grad_norm": 0.23781079053878784, "learning_rate": 3.119279062686296e-05, "loss": 0.1188, "step": 26815 }, { "epoch": 0.4782934398744337, "grad_norm": 0.33741384744644165, "learning_rate": 3.119128261768577e-05, "loss": 0.1691, "step": 26816 }, { "epoch": 0.4783112759961474, "grad_norm": 0.33613476157188416, "learning_rate": 3.1189774584509024e-05, "loss": 0.1396, "step": 26817 }, { "epoch": 0.4783291121178611, "grad_norm": 0.2095946967601776, "learning_rate": 3.1188266527338584e-05, "loss": 0.1444, "step": 26818 }, { "epoch": 0.4783469482395748, "grad_norm": 0.2285582572221756, "learning_rate": 3.118675844618027e-05, "loss": 0.1866, "step": 26819 }, { "epoch": 0.4783647843612885, "grad_norm": 0.3779899775981903, "learning_rate": 3.1185250341039965e-05, "loss": 0.1456, "step": 26820 }, { "epoch": 0.4783826204830022, "grad_norm": 0.23052087426185608, "learning_rate": 3.1183742211923475e-05, "loss": 0.1414, "step": 26821 }, { "epoch": 0.47840045660471586, "grad_norm": 0.251264750957489, "learning_rate": 3.118223405883667e-05, "loss": 0.1401, "step": 26822 }, { "epoch": 0.47841829272642955, "grad_norm": 0.3534930646419525, "learning_rate": 3.118072588178539e-05, "loss": 0.1341, "step": 26823 }, { "epoch": 0.47843612884814324, "grad_norm": 0.23159746825695038, "learning_rate": 3.1179217680775485e-05, "loss": 0.1347, "step": 26824 }, { "epoch": 0.478453964969857, "grad_norm": 0.23824146389961243, "learning_rate": 3.11777094558128e-05, "loss": 0.1206, "step": 26825 }, { "epoch": 0.47847180109157067, "grad_norm": 0.29496538639068604, "learning_rate": 3.117620120690317e-05, "loss": 0.1087, "step": 26826 }, { "epoch": 0.47848963721328436, "grad_norm": 0.27531182765960693, "learning_rate": 3.117469293405246e-05, "loss": 0.1474, "step": 26827 }, { "epoch": 0.47850747333499805, "grad_norm": 0.3209870755672455, "learning_rate": 3.11731846372665e-05, "loss": 0.1638, "step": 26828 }, { "epoch": 0.47852530945671173, "grad_norm": 0.23279798030853271, "learning_rate": 3.1171676316551153e-05, "loss": 0.1212, "step": 26829 }, { "epoch": 0.4785431455784254, "grad_norm": 0.2647162675857544, "learning_rate": 3.1170167971912246e-05, "loss": 0.1494, "step": 26830 }, { "epoch": 0.4785609817001391, "grad_norm": 0.32629281282424927, "learning_rate": 3.116865960335565e-05, "loss": 0.2031, "step": 26831 }, { "epoch": 0.4785788178218528, "grad_norm": 0.24953068792819977, "learning_rate": 3.116715121088718e-05, "loss": 0.1488, "step": 26832 }, { "epoch": 0.4785966539435665, "grad_norm": 0.21855202317237854, "learning_rate": 3.1165642794512724e-05, "loss": 0.1352, "step": 26833 }, { "epoch": 0.47861449006528023, "grad_norm": 0.3009147346019745, "learning_rate": 3.11641343542381e-05, "loss": 0.1691, "step": 26834 }, { "epoch": 0.4786323261869939, "grad_norm": 0.25184929370880127, "learning_rate": 3.1162625890069154e-05, "loss": 0.1347, "step": 26835 }, { "epoch": 0.4786501623087076, "grad_norm": 0.27973997592926025, "learning_rate": 3.116111740201174e-05, "loss": 0.1669, "step": 26836 }, { "epoch": 0.4786679984304213, "grad_norm": 0.20674775540828705, "learning_rate": 3.1159608890071715e-05, "loss": 0.132, "step": 26837 }, { "epoch": 0.478685834552135, "grad_norm": 0.24774637818336487, "learning_rate": 3.1158100354254924e-05, "loss": 0.1642, "step": 26838 }, { "epoch": 0.47870367067384867, "grad_norm": 0.25628334283828735, "learning_rate": 3.11565917945672e-05, "loss": 0.147, "step": 26839 }, { "epoch": 0.47872150679556236, "grad_norm": 0.21749669313430786, "learning_rate": 3.11550832110144e-05, "loss": 0.1401, "step": 26840 }, { "epoch": 0.47873934291727605, "grad_norm": 0.29271650314331055, "learning_rate": 3.1153574603602375e-05, "loss": 0.1625, "step": 26841 }, { "epoch": 0.4787571790389898, "grad_norm": 0.29296764731407166, "learning_rate": 3.1152065972336964e-05, "loss": 0.154, "step": 26842 }, { "epoch": 0.4787750151607035, "grad_norm": 0.22280484437942505, "learning_rate": 3.115055731722402e-05, "loss": 0.0953, "step": 26843 }, { "epoch": 0.47879285128241716, "grad_norm": 0.27112606167793274, "learning_rate": 3.11490486382694e-05, "loss": 0.2115, "step": 26844 }, { "epoch": 0.47881068740413085, "grad_norm": 0.27489474415779114, "learning_rate": 3.1147539935478935e-05, "loss": 0.1453, "step": 26845 }, { "epoch": 0.47882852352584454, "grad_norm": 0.18704353272914886, "learning_rate": 3.114603120885849e-05, "loss": 0.0846, "step": 26846 }, { "epoch": 0.47884635964755823, "grad_norm": 0.23803308606147766, "learning_rate": 3.11445224584139e-05, "loss": 0.1511, "step": 26847 }, { "epoch": 0.4788641957692719, "grad_norm": 0.22296997904777527, "learning_rate": 3.114301368415102e-05, "loss": 0.1541, "step": 26848 }, { "epoch": 0.4788820318909856, "grad_norm": 0.267245352268219, "learning_rate": 3.1141504886075695e-05, "loss": 0.1384, "step": 26849 }, { "epoch": 0.4788998680126993, "grad_norm": 0.2892456650733948, "learning_rate": 3.113999606419378e-05, "loss": 0.1375, "step": 26850 }, { "epoch": 0.47891770413441304, "grad_norm": 0.3024786114692688, "learning_rate": 3.113848721851113e-05, "loss": 0.1139, "step": 26851 }, { "epoch": 0.4789355402561267, "grad_norm": 0.4629128575325012, "learning_rate": 3.113697834903356e-05, "loss": 0.1468, "step": 26852 }, { "epoch": 0.4789533763778404, "grad_norm": 0.21727323532104492, "learning_rate": 3.113546945576696e-05, "loss": 0.1694, "step": 26853 }, { "epoch": 0.4789712124995541, "grad_norm": 0.33874839544296265, "learning_rate": 3.113396053871715e-05, "loss": 0.1671, "step": 26854 }, { "epoch": 0.4789890486212678, "grad_norm": 0.22276677191257477, "learning_rate": 3.1132451597889996e-05, "loss": 0.1516, "step": 26855 }, { "epoch": 0.4790068847429815, "grad_norm": 0.26417648792266846, "learning_rate": 3.113094263329134e-05, "loss": 0.1837, "step": 26856 }, { "epoch": 0.47902472086469516, "grad_norm": 0.299300879240036, "learning_rate": 3.112943364492703e-05, "loss": 0.1344, "step": 26857 }, { "epoch": 0.47904255698640885, "grad_norm": 0.31141749024391174, "learning_rate": 3.112792463280292e-05, "loss": 0.1842, "step": 26858 }, { "epoch": 0.4790603931081226, "grad_norm": 0.2354898750782013, "learning_rate": 3.112641559692486e-05, "loss": 0.1295, "step": 26859 }, { "epoch": 0.4790782292298363, "grad_norm": 0.361444354057312, "learning_rate": 3.11249065372987e-05, "loss": 0.1836, "step": 26860 }, { "epoch": 0.47909606535154997, "grad_norm": 0.25282618403434753, "learning_rate": 3.112339745393029e-05, "loss": 0.1605, "step": 26861 }, { "epoch": 0.47911390147326366, "grad_norm": 0.2857251763343811, "learning_rate": 3.1121888346825465e-05, "loss": 0.1725, "step": 26862 }, { "epoch": 0.47913173759497735, "grad_norm": 0.23793472349643707, "learning_rate": 3.1120379215990085e-05, "loss": 0.1511, "step": 26863 }, { "epoch": 0.47914957371669104, "grad_norm": 0.2909567952156067, "learning_rate": 3.111887006143001e-05, "loss": 0.1242, "step": 26864 }, { "epoch": 0.4791674098384047, "grad_norm": 0.2157069891691208, "learning_rate": 3.1117360883151074e-05, "loss": 0.1461, "step": 26865 }, { "epoch": 0.4791852459601184, "grad_norm": 0.20435114204883575, "learning_rate": 3.1115851681159147e-05, "loss": 0.1117, "step": 26866 }, { "epoch": 0.4792030820818321, "grad_norm": 0.23241065442562103, "learning_rate": 3.1114342455460054e-05, "loss": 0.144, "step": 26867 }, { "epoch": 0.47922091820354584, "grad_norm": 0.3007977604866028, "learning_rate": 3.1112833206059665e-05, "loss": 0.1938, "step": 26868 }, { "epoch": 0.47923875432525953, "grad_norm": 0.23746216297149658, "learning_rate": 3.111132393296382e-05, "loss": 0.1428, "step": 26869 }, { "epoch": 0.4792565904469732, "grad_norm": 0.23492389917373657, "learning_rate": 3.110981463617837e-05, "loss": 0.1942, "step": 26870 }, { "epoch": 0.4792744265686869, "grad_norm": 0.2607043981552124, "learning_rate": 3.110830531570917e-05, "loss": 0.1229, "step": 26871 }, { "epoch": 0.4792922626904006, "grad_norm": 0.21305793523788452, "learning_rate": 3.1106795971562076e-05, "loss": 0.1461, "step": 26872 }, { "epoch": 0.4793100988121143, "grad_norm": 0.32901135087013245, "learning_rate": 3.110528660374292e-05, "loss": 0.142, "step": 26873 }, { "epoch": 0.47932793493382797, "grad_norm": 0.26443949341773987, "learning_rate": 3.1103777212257575e-05, "loss": 0.1249, "step": 26874 }, { "epoch": 0.47934577105554166, "grad_norm": 0.24153360724449158, "learning_rate": 3.110226779711187e-05, "loss": 0.1809, "step": 26875 }, { "epoch": 0.4793636071772554, "grad_norm": 0.2590068280696869, "learning_rate": 3.110075835831168e-05, "loss": 0.1182, "step": 26876 }, { "epoch": 0.4793814432989691, "grad_norm": 0.3583972752094269, "learning_rate": 3.109924889586283e-05, "loss": 0.223, "step": 26877 }, { "epoch": 0.4793992794206828, "grad_norm": 0.39803624153137207, "learning_rate": 3.1097739409771194e-05, "loss": 0.1851, "step": 26878 }, { "epoch": 0.47941711554239647, "grad_norm": 0.33544570207595825, "learning_rate": 3.1096229900042615e-05, "loss": 0.1839, "step": 26879 }, { "epoch": 0.47943495166411015, "grad_norm": 0.2965698540210724, "learning_rate": 3.109472036668294e-05, "loss": 0.1018, "step": 26880 }, { "epoch": 0.47945278778582384, "grad_norm": 0.2768281400203705, "learning_rate": 3.109321080969803e-05, "loss": 0.1614, "step": 26881 }, { "epoch": 0.47947062390753753, "grad_norm": 0.2305006980895996, "learning_rate": 3.109170122909372e-05, "loss": 0.1609, "step": 26882 }, { "epoch": 0.4794884600292512, "grad_norm": 0.24510516226291656, "learning_rate": 3.1090191624875875e-05, "loss": 0.1272, "step": 26883 }, { "epoch": 0.47950629615096496, "grad_norm": 0.2029527723789215, "learning_rate": 3.108868199705034e-05, "loss": 0.1564, "step": 26884 }, { "epoch": 0.47952413227267865, "grad_norm": 0.317044734954834, "learning_rate": 3.108717234562298e-05, "loss": 0.1425, "step": 26885 }, { "epoch": 0.47954196839439234, "grad_norm": 0.2231772243976593, "learning_rate": 3.108566267059963e-05, "loss": 0.1742, "step": 26886 }, { "epoch": 0.479559804516106, "grad_norm": 0.20359310507774353, "learning_rate": 3.1084152971986155e-05, "loss": 0.1164, "step": 26887 }, { "epoch": 0.4795776406378197, "grad_norm": 0.24313755333423615, "learning_rate": 3.10826432497884e-05, "loss": 0.1124, "step": 26888 }, { "epoch": 0.4795954767595334, "grad_norm": 0.24253372848033905, "learning_rate": 3.1081133504012224e-05, "loss": 0.1406, "step": 26889 }, { "epoch": 0.4796133128812471, "grad_norm": 0.20761600136756897, "learning_rate": 3.1079623734663465e-05, "loss": 0.1582, "step": 26890 }, { "epoch": 0.4796311490029608, "grad_norm": 0.2471642941236496, "learning_rate": 3.107811394174798e-05, "loss": 0.1496, "step": 26891 }, { "epoch": 0.47964898512467447, "grad_norm": 0.20048244297504425, "learning_rate": 3.1076604125271644e-05, "loss": 0.1271, "step": 26892 }, { "epoch": 0.4796668212463882, "grad_norm": 0.22641848027706146, "learning_rate": 3.107509428524028e-05, "loss": 0.1311, "step": 26893 }, { "epoch": 0.4796846573681019, "grad_norm": 0.26793721318244934, "learning_rate": 3.107358442165976e-05, "loss": 0.1143, "step": 26894 }, { "epoch": 0.4797024934898156, "grad_norm": 0.25153848528862, "learning_rate": 3.1072074534535916e-05, "loss": 0.1505, "step": 26895 }, { "epoch": 0.4797203296115293, "grad_norm": 0.22778263688087463, "learning_rate": 3.1070564623874625e-05, "loss": 0.106, "step": 26896 }, { "epoch": 0.47973816573324296, "grad_norm": 0.2666641175746918, "learning_rate": 3.106905468968172e-05, "loss": 0.122, "step": 26897 }, { "epoch": 0.47975600185495665, "grad_norm": 0.2887571156024933, "learning_rate": 3.106754473196307e-05, "loss": 0.1307, "step": 26898 }, { "epoch": 0.47977383797667034, "grad_norm": 0.2980011999607086, "learning_rate": 3.106603475072452e-05, "loss": 0.1073, "step": 26899 }, { "epoch": 0.479791674098384, "grad_norm": 0.31376636028289795, "learning_rate": 3.106452474597192e-05, "loss": 0.1096, "step": 26900 }, { "epoch": 0.47980951022009777, "grad_norm": 0.30685168504714966, "learning_rate": 3.106301471771113e-05, "loss": 0.1168, "step": 26901 }, { "epoch": 0.47982734634181146, "grad_norm": 0.3056320548057556, "learning_rate": 3.106150466594801e-05, "loss": 0.1571, "step": 26902 }, { "epoch": 0.47984518246352514, "grad_norm": 0.42128077149391174, "learning_rate": 3.105999459068839e-05, "loss": 0.1397, "step": 26903 }, { "epoch": 0.47986301858523883, "grad_norm": 0.37117356061935425, "learning_rate": 3.105848449193814e-05, "loss": 0.1407, "step": 26904 }, { "epoch": 0.4798808547069525, "grad_norm": 0.2590087354183197, "learning_rate": 3.1056974369703115e-05, "loss": 0.1771, "step": 26905 }, { "epoch": 0.4798986908286662, "grad_norm": 0.2574358880519867, "learning_rate": 3.105546422398916e-05, "loss": 0.1101, "step": 26906 }, { "epoch": 0.4799165269503799, "grad_norm": 0.34375739097595215, "learning_rate": 3.105395405480215e-05, "loss": 0.1291, "step": 26907 }, { "epoch": 0.4799343630720936, "grad_norm": 0.18603846430778503, "learning_rate": 3.10524438621479e-05, "loss": 0.1025, "step": 26908 }, { "epoch": 0.47995219919380727, "grad_norm": 0.2082604020833969, "learning_rate": 3.105093364603231e-05, "loss": 0.1559, "step": 26909 }, { "epoch": 0.479970035315521, "grad_norm": 0.3786512613296509, "learning_rate": 3.10494234064612e-05, "loss": 0.2184, "step": 26910 }, { "epoch": 0.4799878714372347, "grad_norm": 0.200238436460495, "learning_rate": 3.1047913143440436e-05, "loss": 0.1297, "step": 26911 }, { "epoch": 0.4800057075589484, "grad_norm": 0.22389757633209229, "learning_rate": 3.104640285697587e-05, "loss": 0.1229, "step": 26912 }, { "epoch": 0.4800235436806621, "grad_norm": 0.21933946013450623, "learning_rate": 3.104489254707336e-05, "loss": 0.1544, "step": 26913 }, { "epoch": 0.48004137980237577, "grad_norm": 0.26832059025764465, "learning_rate": 3.104338221373876e-05, "loss": 0.1536, "step": 26914 }, { "epoch": 0.48005921592408946, "grad_norm": 0.24429138004779816, "learning_rate": 3.104187185697792e-05, "loss": 0.1235, "step": 26915 }, { "epoch": 0.48007705204580314, "grad_norm": 0.32528597116470337, "learning_rate": 3.1040361476796705e-05, "loss": 0.1545, "step": 26916 }, { "epoch": 0.48009488816751683, "grad_norm": 0.22587130963802338, "learning_rate": 3.103885107320096e-05, "loss": 0.1595, "step": 26917 }, { "epoch": 0.4801127242892306, "grad_norm": 0.2778416574001312, "learning_rate": 3.1037340646196534e-05, "loss": 0.1663, "step": 26918 }, { "epoch": 0.48013056041094426, "grad_norm": 0.3093664348125458, "learning_rate": 3.1035830195789295e-05, "loss": 0.1392, "step": 26919 }, { "epoch": 0.48014839653265795, "grad_norm": 0.1803629845380783, "learning_rate": 3.10343197219851e-05, "loss": 0.0912, "step": 26920 }, { "epoch": 0.48016623265437164, "grad_norm": 0.305999755859375, "learning_rate": 3.1032809224789795e-05, "loss": 0.1372, "step": 26921 }, { "epoch": 0.4801840687760853, "grad_norm": 0.231741800904274, "learning_rate": 3.103129870420923e-05, "loss": 0.1729, "step": 26922 }, { "epoch": 0.480201904897799, "grad_norm": 0.254946231842041, "learning_rate": 3.1029788160249275e-05, "loss": 0.1531, "step": 26923 }, { "epoch": 0.4802197410195127, "grad_norm": 0.25297650694847107, "learning_rate": 3.1028277592915785e-05, "loss": 0.1966, "step": 26924 }, { "epoch": 0.4802375771412264, "grad_norm": 0.20638325810432434, "learning_rate": 3.1026767002214594e-05, "loss": 0.1033, "step": 26925 }, { "epoch": 0.48025541326294013, "grad_norm": 0.2511560916900635, "learning_rate": 3.102525638815158e-05, "loss": 0.1599, "step": 26926 }, { "epoch": 0.4802732493846538, "grad_norm": 0.22767919301986694, "learning_rate": 3.102374575073259e-05, "loss": 0.1474, "step": 26927 }, { "epoch": 0.4802910855063675, "grad_norm": 0.39325883984565735, "learning_rate": 3.102223508996348e-05, "loss": 0.1644, "step": 26928 }, { "epoch": 0.4803089216280812, "grad_norm": 0.2564949691295624, "learning_rate": 3.102072440585011e-05, "loss": 0.142, "step": 26929 }, { "epoch": 0.4803267577497949, "grad_norm": 0.2197631448507309, "learning_rate": 3.101921369839833e-05, "loss": 0.1341, "step": 26930 }, { "epoch": 0.4803445938715086, "grad_norm": 0.37748482823371887, "learning_rate": 3.1017702967614e-05, "loss": 0.1393, "step": 26931 }, { "epoch": 0.48036242999322226, "grad_norm": 0.23145487904548645, "learning_rate": 3.101619221350298e-05, "loss": 0.1454, "step": 26932 }, { "epoch": 0.48038026611493595, "grad_norm": 0.2263801395893097, "learning_rate": 3.1014681436071116e-05, "loss": 0.1584, "step": 26933 }, { "epoch": 0.48039810223664964, "grad_norm": 0.23662111163139343, "learning_rate": 3.101317063532426e-05, "loss": 0.1336, "step": 26934 }, { "epoch": 0.4804159383583634, "grad_norm": 0.260810911655426, "learning_rate": 3.10116598112683e-05, "loss": 0.1993, "step": 26935 }, { "epoch": 0.48043377448007707, "grad_norm": 0.2356175184249878, "learning_rate": 3.101014896390905e-05, "loss": 0.1296, "step": 26936 }, { "epoch": 0.48045161060179076, "grad_norm": 0.23916806280612946, "learning_rate": 3.1008638093252395e-05, "loss": 0.1538, "step": 26937 }, { "epoch": 0.48046944672350445, "grad_norm": 0.3416164219379425, "learning_rate": 3.100712719930418e-05, "loss": 0.1982, "step": 26938 }, { "epoch": 0.48048728284521813, "grad_norm": 0.23779907822608948, "learning_rate": 3.100561628207026e-05, "loss": 0.1245, "step": 26939 }, { "epoch": 0.4805051189669318, "grad_norm": 0.30729466676712036, "learning_rate": 3.100410534155651e-05, "loss": 0.1666, "step": 26940 }, { "epoch": 0.4805229550886455, "grad_norm": 0.45495182275772095, "learning_rate": 3.100259437776877e-05, "loss": 0.1716, "step": 26941 }, { "epoch": 0.4805407912103592, "grad_norm": 0.23640476167201996, "learning_rate": 3.10010833907129e-05, "loss": 0.1222, "step": 26942 }, { "epoch": 0.48055862733207294, "grad_norm": 0.2531169354915619, "learning_rate": 3.099957238039476e-05, "loss": 0.1341, "step": 26943 }, { "epoch": 0.48057646345378663, "grad_norm": 0.3022766709327698, "learning_rate": 3.0998061346820206e-05, "loss": 0.2402, "step": 26944 }, { "epoch": 0.4805942995755003, "grad_norm": 0.2547677755355835, "learning_rate": 3.099655028999508e-05, "loss": 0.144, "step": 26945 }, { "epoch": 0.480612135697214, "grad_norm": 0.26292699575424194, "learning_rate": 3.099503920992527e-05, "loss": 0.1572, "step": 26946 }, { "epoch": 0.4806299718189277, "grad_norm": 0.2665134370326996, "learning_rate": 3.099352810661661e-05, "loss": 0.1433, "step": 26947 }, { "epoch": 0.4806478079406414, "grad_norm": 0.27100810408592224, "learning_rate": 3.099201698007497e-05, "loss": 0.1245, "step": 26948 }, { "epoch": 0.48066564406235507, "grad_norm": 0.3097866177558899, "learning_rate": 3.0990505830306196e-05, "loss": 0.1747, "step": 26949 }, { "epoch": 0.48068348018406876, "grad_norm": 0.3356071412563324, "learning_rate": 3.098899465731617e-05, "loss": 0.1092, "step": 26950 }, { "epoch": 0.48070131630578244, "grad_norm": 0.32404643297195435, "learning_rate": 3.098748346111071e-05, "loss": 0.1325, "step": 26951 }, { "epoch": 0.4807191524274962, "grad_norm": 0.27789992094039917, "learning_rate": 3.09859722416957e-05, "loss": 0.1427, "step": 26952 }, { "epoch": 0.4807369885492099, "grad_norm": 0.2856275141239166, "learning_rate": 3.0984460999077e-05, "loss": 0.1299, "step": 26953 }, { "epoch": 0.48075482467092356, "grad_norm": 0.2191135734319687, "learning_rate": 3.098294973326046e-05, "loss": 0.1154, "step": 26954 }, { "epoch": 0.48077266079263725, "grad_norm": 0.1872471272945404, "learning_rate": 3.098143844425194e-05, "loss": 0.0617, "step": 26955 }, { "epoch": 0.48079049691435094, "grad_norm": 0.3164249360561371, "learning_rate": 3.097992713205731e-05, "loss": 0.1459, "step": 26956 }, { "epoch": 0.48080833303606463, "grad_norm": 0.3594079315662384, "learning_rate": 3.097841579668241e-05, "loss": 0.2561, "step": 26957 }, { "epoch": 0.4808261691577783, "grad_norm": 0.3132946193218231, "learning_rate": 3.09769044381331e-05, "loss": 0.1655, "step": 26958 }, { "epoch": 0.480844005279492, "grad_norm": 0.27492833137512207, "learning_rate": 3.097539305641524e-05, "loss": 0.1608, "step": 26959 }, { "epoch": 0.48086184140120575, "grad_norm": 0.39647388458251953, "learning_rate": 3.09738816515347e-05, "loss": 0.1424, "step": 26960 }, { "epoch": 0.48087967752291944, "grad_norm": 0.3303094208240509, "learning_rate": 3.097237022349734e-05, "loss": 0.1861, "step": 26961 }, { "epoch": 0.4808975136446331, "grad_norm": 0.20275112986564636, "learning_rate": 3.0970858772309e-05, "loss": 0.1219, "step": 26962 }, { "epoch": 0.4809153497663468, "grad_norm": 0.22518904507160187, "learning_rate": 3.096934729797555e-05, "loss": 0.1251, "step": 26963 }, { "epoch": 0.4809331858880605, "grad_norm": 0.3026747405529022, "learning_rate": 3.096783580050284e-05, "loss": 0.1307, "step": 26964 }, { "epoch": 0.4809510220097742, "grad_norm": 0.2873251736164093, "learning_rate": 3.096632427989675e-05, "loss": 0.1032, "step": 26965 }, { "epoch": 0.4809688581314879, "grad_norm": 0.20177914202213287, "learning_rate": 3.096481273616312e-05, "loss": 0.1477, "step": 26966 }, { "epoch": 0.48098669425320156, "grad_norm": 0.3231706917285919, "learning_rate": 3.096330116930782e-05, "loss": 0.1837, "step": 26967 }, { "epoch": 0.48100453037491525, "grad_norm": 0.22386451065540314, "learning_rate": 3.09617895793367e-05, "loss": 0.1611, "step": 26968 }, { "epoch": 0.481022366496629, "grad_norm": 0.2567024230957031, "learning_rate": 3.096027796625563e-05, "loss": 0.1441, "step": 26969 }, { "epoch": 0.4810402026183427, "grad_norm": 0.35814276337623596, "learning_rate": 3.0958766330070463e-05, "loss": 0.1393, "step": 26970 }, { "epoch": 0.48105803874005637, "grad_norm": 0.30965086817741394, "learning_rate": 3.095725467078706e-05, "loss": 0.2281, "step": 26971 }, { "epoch": 0.48107587486177006, "grad_norm": 0.27966707944869995, "learning_rate": 3.095574298841128e-05, "loss": 0.1094, "step": 26972 }, { "epoch": 0.48109371098348375, "grad_norm": 0.25909602642059326, "learning_rate": 3.095423128294898e-05, "loss": 0.1418, "step": 26973 }, { "epoch": 0.48111154710519743, "grad_norm": 0.31567519903182983, "learning_rate": 3.095271955440602e-05, "loss": 0.1417, "step": 26974 }, { "epoch": 0.4811293832269111, "grad_norm": 0.2649823725223541, "learning_rate": 3.0951207802788264e-05, "loss": 0.1107, "step": 26975 }, { "epoch": 0.4811472193486248, "grad_norm": 0.27317014336586, "learning_rate": 3.0949696028101574e-05, "loss": 0.0916, "step": 26976 }, { "epoch": 0.48116505547033855, "grad_norm": 0.32236748933792114, "learning_rate": 3.094818423035181e-05, "loss": 0.1384, "step": 26977 }, { "epoch": 0.48118289159205224, "grad_norm": 0.1466619372367859, "learning_rate": 3.094667240954483e-05, "loss": 0.1034, "step": 26978 }, { "epoch": 0.48120072771376593, "grad_norm": 0.34723901748657227, "learning_rate": 3.094516056568649e-05, "loss": 0.1322, "step": 26979 }, { "epoch": 0.4812185638354796, "grad_norm": 0.3337315618991852, "learning_rate": 3.094364869878265e-05, "loss": 0.227, "step": 26980 }, { "epoch": 0.4812363999571933, "grad_norm": 0.2675262689590454, "learning_rate": 3.0942136808839176e-05, "loss": 0.1116, "step": 26981 }, { "epoch": 0.481254236078907, "grad_norm": 0.35388287901878357, "learning_rate": 3.0940624895861936e-05, "loss": 0.1275, "step": 26982 }, { "epoch": 0.4812720722006207, "grad_norm": 0.2760636806488037, "learning_rate": 3.093911295985678e-05, "loss": 0.1237, "step": 26983 }, { "epoch": 0.48128990832233437, "grad_norm": 0.3363461494445801, "learning_rate": 3.0937601000829567e-05, "loss": 0.1148, "step": 26984 }, { "epoch": 0.4813077444440481, "grad_norm": 0.32991641759872437, "learning_rate": 3.093608901878616e-05, "loss": 0.1899, "step": 26985 }, { "epoch": 0.4813255805657618, "grad_norm": 0.2606499195098877, "learning_rate": 3.093457701373243e-05, "loss": 0.1378, "step": 26986 }, { "epoch": 0.4813434166874755, "grad_norm": 0.25897732377052307, "learning_rate": 3.093306498567422e-05, "loss": 0.1551, "step": 26987 }, { "epoch": 0.4813612528091892, "grad_norm": 0.27342841029167175, "learning_rate": 3.09315529346174e-05, "loss": 0.1226, "step": 26988 }, { "epoch": 0.48137908893090287, "grad_norm": 0.3533625304698944, "learning_rate": 3.093004086056784e-05, "loss": 0.112, "step": 26989 }, { "epoch": 0.48139692505261655, "grad_norm": 0.29467374086380005, "learning_rate": 3.092852876353139e-05, "loss": 0.2224, "step": 26990 }, { "epoch": 0.48141476117433024, "grad_norm": 0.26189154386520386, "learning_rate": 3.092701664351392e-05, "loss": 0.1847, "step": 26991 }, { "epoch": 0.48143259729604393, "grad_norm": 0.22214268147945404, "learning_rate": 3.092550450052128e-05, "loss": 0.1015, "step": 26992 }, { "epoch": 0.4814504334177576, "grad_norm": 0.2892412841320038, "learning_rate": 3.092399233455934e-05, "loss": 0.1618, "step": 26993 }, { "epoch": 0.48146826953947136, "grad_norm": 0.25335508584976196, "learning_rate": 3.0922480145633965e-05, "loss": 0.1479, "step": 26994 }, { "epoch": 0.48148610566118505, "grad_norm": 0.2531585991382599, "learning_rate": 3.0920967933751e-05, "loss": 0.1472, "step": 26995 }, { "epoch": 0.48150394178289874, "grad_norm": 0.21424445509910583, "learning_rate": 3.0919455698916326e-05, "loss": 0.1066, "step": 26996 }, { "epoch": 0.4815217779046124, "grad_norm": 0.24043121933937073, "learning_rate": 3.091794344113579e-05, "loss": 0.1145, "step": 26997 }, { "epoch": 0.4815396140263261, "grad_norm": 0.27867183089256287, "learning_rate": 3.091643116041528e-05, "loss": 0.1506, "step": 26998 }, { "epoch": 0.4815574501480398, "grad_norm": 0.28481653332710266, "learning_rate": 3.091491885676062e-05, "loss": 0.1239, "step": 26999 }, { "epoch": 0.4815752862697535, "grad_norm": 0.2043977826833725, "learning_rate": 3.09134065301777e-05, "loss": 0.1449, "step": 27000 }, { "epoch": 0.4815752862697535, "eval_loss": 0.13947582244873047, "eval_runtime": 107.8396, "eval_samples_per_second": 9.496, "eval_steps_per_second": 1.586, "step": 27000 }, { "epoch": 0.4815931223914672, "grad_norm": 0.4226484000682831, "learning_rate": 3.0911894180672366e-05, "loss": 0.1076, "step": 27001 }, { "epoch": 0.4816109585131809, "grad_norm": 0.22894150018692017, "learning_rate": 3.0910381808250496e-05, "loss": 0.1684, "step": 27002 }, { "epoch": 0.4816287946348946, "grad_norm": 0.2135767787694931, "learning_rate": 3.090886941291794e-05, "loss": 0.0945, "step": 27003 }, { "epoch": 0.4816466307566083, "grad_norm": 0.23432646691799164, "learning_rate": 3.090735699468057e-05, "loss": 0.1297, "step": 27004 }, { "epoch": 0.481664466878322, "grad_norm": 0.25252532958984375, "learning_rate": 3.0905844553544236e-05, "loss": 0.1378, "step": 27005 }, { "epoch": 0.48168230300003567, "grad_norm": 0.3878104090690613, "learning_rate": 3.090433208951482e-05, "loss": 0.1623, "step": 27006 }, { "epoch": 0.48170013912174936, "grad_norm": 0.2725524604320526, "learning_rate": 3.090281960259817e-05, "loss": 0.1284, "step": 27007 }, { "epoch": 0.48171797524346305, "grad_norm": 0.286504864692688, "learning_rate": 3.0901307092800145e-05, "loss": 0.1385, "step": 27008 }, { "epoch": 0.48173581136517674, "grad_norm": 0.31102314591407776, "learning_rate": 3.089979456012663e-05, "loss": 0.1167, "step": 27009 }, { "epoch": 0.4817536474868904, "grad_norm": 0.22377517819404602, "learning_rate": 3.089828200458346e-05, "loss": 0.1043, "step": 27010 }, { "epoch": 0.48177148360860417, "grad_norm": 0.3582703769207001, "learning_rate": 3.089676942617652e-05, "loss": 0.1357, "step": 27011 }, { "epoch": 0.48178931973031786, "grad_norm": 0.216938778758049, "learning_rate": 3.0895256824911654e-05, "loss": 0.1355, "step": 27012 }, { "epoch": 0.48180715585203154, "grad_norm": 0.28301793336868286, "learning_rate": 3.089374420079475e-05, "loss": 0.1788, "step": 27013 }, { "epoch": 0.48182499197374523, "grad_norm": 0.1755129098892212, "learning_rate": 3.0892231553831646e-05, "loss": 0.121, "step": 27014 }, { "epoch": 0.4818428280954589, "grad_norm": 0.17529180645942688, "learning_rate": 3.0890718884028224e-05, "loss": 0.1356, "step": 27015 }, { "epoch": 0.4818606642171726, "grad_norm": 0.3269725441932678, "learning_rate": 3.088920619139033e-05, "loss": 0.1983, "step": 27016 }, { "epoch": 0.4818785003388863, "grad_norm": 0.26815667748451233, "learning_rate": 3.088769347592386e-05, "loss": 0.1886, "step": 27017 }, { "epoch": 0.4818963364606, "grad_norm": 0.2617287039756775, "learning_rate": 3.088618073763464e-05, "loss": 0.1765, "step": 27018 }, { "epoch": 0.4819141725823137, "grad_norm": 0.2688720226287842, "learning_rate": 3.088466797652856e-05, "loss": 0.1372, "step": 27019 }, { "epoch": 0.4819320087040274, "grad_norm": 0.23563289642333984, "learning_rate": 3.088315519261147e-05, "loss": 0.1439, "step": 27020 }, { "epoch": 0.4819498448257411, "grad_norm": 0.24001914262771606, "learning_rate": 3.0881642385889245e-05, "loss": 0.123, "step": 27021 }, { "epoch": 0.4819676809474548, "grad_norm": 0.2797130346298218, "learning_rate": 3.088012955636773e-05, "loss": 0.1003, "step": 27022 }, { "epoch": 0.4819855170691685, "grad_norm": 0.25982335209846497, "learning_rate": 3.0878616704052806e-05, "loss": 0.1663, "step": 27023 }, { "epoch": 0.48200335319088217, "grad_norm": 0.20879769325256348, "learning_rate": 3.0877103828950337e-05, "loss": 0.1085, "step": 27024 }, { "epoch": 0.48202118931259585, "grad_norm": 0.30116739869117737, "learning_rate": 3.087559093106618e-05, "loss": 0.1366, "step": 27025 }, { "epoch": 0.48203902543430954, "grad_norm": 0.29739004373550415, "learning_rate": 3.087407801040621e-05, "loss": 0.1394, "step": 27026 }, { "epoch": 0.48205686155602323, "grad_norm": 0.22542817890644073, "learning_rate": 3.0872565066976275e-05, "loss": 0.1399, "step": 27027 }, { "epoch": 0.482074697677737, "grad_norm": 0.2542777359485626, "learning_rate": 3.087105210078226e-05, "loss": 0.1196, "step": 27028 }, { "epoch": 0.48209253379945066, "grad_norm": 0.3396031856536865, "learning_rate": 3.0869539111830006e-05, "loss": 0.1077, "step": 27029 }, { "epoch": 0.48211036992116435, "grad_norm": 0.28776851296424866, "learning_rate": 3.08680261001254e-05, "loss": 0.1258, "step": 27030 }, { "epoch": 0.48212820604287804, "grad_norm": 0.3039550483226776, "learning_rate": 3.0866513065674295e-05, "loss": 0.1614, "step": 27031 }, { "epoch": 0.4821460421645917, "grad_norm": 0.31225457787513733, "learning_rate": 3.0865000008482564e-05, "loss": 0.1693, "step": 27032 }, { "epoch": 0.4821638782863054, "grad_norm": 0.3079989552497864, "learning_rate": 3.086348692855606e-05, "loss": 0.1385, "step": 27033 }, { "epoch": 0.4821817144080191, "grad_norm": 0.24990880489349365, "learning_rate": 3.086197382590067e-05, "loss": 0.1223, "step": 27034 }, { "epoch": 0.4821995505297328, "grad_norm": 0.20213203132152557, "learning_rate": 3.086046070052223e-05, "loss": 0.163, "step": 27035 }, { "epoch": 0.48221738665144653, "grad_norm": 0.25982651114463806, "learning_rate": 3.085894755242662e-05, "loss": 0.077, "step": 27036 }, { "epoch": 0.4822352227731602, "grad_norm": 0.2415047138929367, "learning_rate": 3.085743438161972e-05, "loss": 0.1363, "step": 27037 }, { "epoch": 0.4822530588948739, "grad_norm": 0.233364999294281, "learning_rate": 3.0855921188107364e-05, "loss": 0.1186, "step": 27038 }, { "epoch": 0.4822708950165876, "grad_norm": 0.27403080463409424, "learning_rate": 3.085440797189545e-05, "loss": 0.0811, "step": 27039 }, { "epoch": 0.4822887311383013, "grad_norm": 0.2892562747001648, "learning_rate": 3.0852894732989815e-05, "loss": 0.1534, "step": 27040 }, { "epoch": 0.482306567260015, "grad_norm": 0.28563547134399414, "learning_rate": 3.085138147139635e-05, "loss": 0.1761, "step": 27041 }, { "epoch": 0.48232440338172866, "grad_norm": 0.3108138144016266, "learning_rate": 3.08498681871209e-05, "loss": 0.1338, "step": 27042 }, { "epoch": 0.48234223950344235, "grad_norm": 0.26626822352409363, "learning_rate": 3.0848354880169346e-05, "loss": 0.0957, "step": 27043 }, { "epoch": 0.4823600756251561, "grad_norm": 0.22019726037979126, "learning_rate": 3.0846841550547546e-05, "loss": 0.1388, "step": 27044 }, { "epoch": 0.4823779117468698, "grad_norm": 0.33581289649009705, "learning_rate": 3.084532819826137e-05, "loss": 0.169, "step": 27045 }, { "epoch": 0.48239574786858347, "grad_norm": 0.19992482662200928, "learning_rate": 3.084381482331668e-05, "loss": 0.1231, "step": 27046 }, { "epoch": 0.48241358399029716, "grad_norm": 0.2791059911251068, "learning_rate": 3.084230142571935e-05, "loss": 0.1987, "step": 27047 }, { "epoch": 0.48243142011201084, "grad_norm": 0.27307823300361633, "learning_rate": 3.0840788005475246e-05, "loss": 0.1054, "step": 27048 }, { "epoch": 0.48244925623372453, "grad_norm": 0.22794122993946075, "learning_rate": 3.083927456259022e-05, "loss": 0.166, "step": 27049 }, { "epoch": 0.4824670923554382, "grad_norm": 0.30295199155807495, "learning_rate": 3.083776109707015e-05, "loss": 0.1609, "step": 27050 }, { "epoch": 0.4824849284771519, "grad_norm": 0.23303623497486115, "learning_rate": 3.08362476089209e-05, "loss": 0.1467, "step": 27051 }, { "epoch": 0.4825027645988656, "grad_norm": 0.2229575365781784, "learning_rate": 3.083473409814835e-05, "loss": 0.1276, "step": 27052 }, { "epoch": 0.48252060072057934, "grad_norm": 0.28668197989463806, "learning_rate": 3.0833220564758346e-05, "loss": 0.1584, "step": 27053 }, { "epoch": 0.48253843684229303, "grad_norm": 0.23593303561210632, "learning_rate": 3.083170700875677e-05, "loss": 0.1379, "step": 27054 }, { "epoch": 0.4825562729640067, "grad_norm": 0.3035764992237091, "learning_rate": 3.0830193430149476e-05, "loss": 0.1692, "step": 27055 }, { "epoch": 0.4825741090857204, "grad_norm": 0.2567789852619171, "learning_rate": 3.082867982894235e-05, "loss": 0.1555, "step": 27056 }, { "epoch": 0.4825919452074341, "grad_norm": 0.2320074439048767, "learning_rate": 3.082716620514123e-05, "loss": 0.1707, "step": 27057 }, { "epoch": 0.4826097813291478, "grad_norm": 0.29797905683517456, "learning_rate": 3.082565255875202e-05, "loss": 0.16, "step": 27058 }, { "epoch": 0.48262761745086147, "grad_norm": 0.2768057584762573, "learning_rate": 3.082413888978056e-05, "loss": 0.1478, "step": 27059 }, { "epoch": 0.48264545357257516, "grad_norm": 0.2559477388858795, "learning_rate": 3.082262519823273e-05, "loss": 0.1898, "step": 27060 }, { "epoch": 0.4826632896942889, "grad_norm": 0.22729475796222687, "learning_rate": 3.0821111484114395e-05, "loss": 0.1155, "step": 27061 }, { "epoch": 0.4826811258160026, "grad_norm": 0.2949030101299286, "learning_rate": 3.081959774743141e-05, "loss": 0.1289, "step": 27062 }, { "epoch": 0.4826989619377163, "grad_norm": 0.4954430162906647, "learning_rate": 3.081808398818966e-05, "loss": 0.2021, "step": 27063 }, { "epoch": 0.48271679805942996, "grad_norm": 0.272966593503952, "learning_rate": 3.0816570206395004e-05, "loss": 0.1389, "step": 27064 }, { "epoch": 0.48273463418114365, "grad_norm": 0.21344424784183502, "learning_rate": 3.0815056402053325e-05, "loss": 0.1301, "step": 27065 }, { "epoch": 0.48275247030285734, "grad_norm": 0.33520445227622986, "learning_rate": 3.0813542575170466e-05, "loss": 0.1551, "step": 27066 }, { "epoch": 0.482770306424571, "grad_norm": 0.35573261976242065, "learning_rate": 3.0812028725752316e-05, "loss": 0.1598, "step": 27067 }, { "epoch": 0.4827881425462847, "grad_norm": 0.3074539601802826, "learning_rate": 3.081051485380473e-05, "loss": 0.1607, "step": 27068 }, { "epoch": 0.4828059786679984, "grad_norm": 0.3038214147090912, "learning_rate": 3.080900095933359e-05, "loss": 0.1679, "step": 27069 }, { "epoch": 0.48282381478971215, "grad_norm": 0.3289279639720917, "learning_rate": 3.0807487042344746e-05, "loss": 0.1544, "step": 27070 }, { "epoch": 0.48284165091142583, "grad_norm": 0.27637359499931335, "learning_rate": 3.080597310284408e-05, "loss": 0.1257, "step": 27071 }, { "epoch": 0.4828594870331395, "grad_norm": 0.22734405100345612, "learning_rate": 3.080445914083745e-05, "loss": 0.132, "step": 27072 }, { "epoch": 0.4828773231548532, "grad_norm": 0.22987398505210876, "learning_rate": 3.0802945156330745e-05, "loss": 0.1594, "step": 27073 }, { "epoch": 0.4828951592765669, "grad_norm": 0.33401110768318176, "learning_rate": 3.080143114932981e-05, "loss": 0.2106, "step": 27074 }, { "epoch": 0.4829129953982806, "grad_norm": 0.23963074386119843, "learning_rate": 3.0799917119840535e-05, "loss": 0.1384, "step": 27075 }, { "epoch": 0.4829308315199943, "grad_norm": 0.28968027234077454, "learning_rate": 3.079840306786877e-05, "loss": 0.1273, "step": 27076 }, { "epoch": 0.48294866764170796, "grad_norm": 0.24933290481567383, "learning_rate": 3.079688899342039e-05, "loss": 0.1139, "step": 27077 }, { "epoch": 0.4829665037634217, "grad_norm": 0.22661608457565308, "learning_rate": 3.0795374896501266e-05, "loss": 0.147, "step": 27078 }, { "epoch": 0.4829843398851354, "grad_norm": 0.2771577537059784, "learning_rate": 3.079386077711727e-05, "loss": 0.1204, "step": 27079 }, { "epoch": 0.4830021760068491, "grad_norm": 0.2487345039844513, "learning_rate": 3.079234663527427e-05, "loss": 0.1382, "step": 27080 }, { "epoch": 0.48302001212856277, "grad_norm": 0.4305541217327118, "learning_rate": 3.079083247097813e-05, "loss": 0.2463, "step": 27081 }, { "epoch": 0.48303784825027646, "grad_norm": 0.18258030712604523, "learning_rate": 3.078931828423473e-05, "loss": 0.1343, "step": 27082 }, { "epoch": 0.48305568437199015, "grad_norm": 0.39898568391799927, "learning_rate": 3.0787804075049926e-05, "loss": 0.1215, "step": 27083 }, { "epoch": 0.48307352049370383, "grad_norm": 0.2975720465183258, "learning_rate": 3.078628984342959e-05, "loss": 0.1792, "step": 27084 }, { "epoch": 0.4830913566154175, "grad_norm": 0.273441880941391, "learning_rate": 3.078477558937961e-05, "loss": 0.1182, "step": 27085 }, { "epoch": 0.48310919273713127, "grad_norm": 0.26858997344970703, "learning_rate": 3.078326131290583e-05, "loss": 0.1499, "step": 27086 }, { "epoch": 0.48312702885884495, "grad_norm": 0.2902086079120636, "learning_rate": 3.0781747014014146e-05, "loss": 0.1386, "step": 27087 }, { "epoch": 0.48314486498055864, "grad_norm": 0.2149176150560379, "learning_rate": 3.0780232692710396e-05, "loss": 0.1124, "step": 27088 }, { "epoch": 0.48316270110227233, "grad_norm": 0.32025763392448425, "learning_rate": 3.077871834900048e-05, "loss": 0.1648, "step": 27089 }, { "epoch": 0.483180537223986, "grad_norm": 0.30817756056785583, "learning_rate": 3.077720398289025e-05, "loss": 0.1602, "step": 27090 }, { "epoch": 0.4831983733456997, "grad_norm": 0.23591212928295135, "learning_rate": 3.0775689594385585e-05, "loss": 0.1603, "step": 27091 }, { "epoch": 0.4832162094674134, "grad_norm": 0.241417795419693, "learning_rate": 3.077417518349235e-05, "loss": 0.1565, "step": 27092 }, { "epoch": 0.4832340455891271, "grad_norm": 0.25818952918052673, "learning_rate": 3.077266075021642e-05, "loss": 0.1776, "step": 27093 }, { "epoch": 0.48325188171084077, "grad_norm": 0.31792545318603516, "learning_rate": 3.0771146294563656e-05, "loss": 0.1571, "step": 27094 }, { "epoch": 0.4832697178325545, "grad_norm": 0.20953741669654846, "learning_rate": 3.0769631816539947e-05, "loss": 0.1441, "step": 27095 }, { "epoch": 0.4832875539542682, "grad_norm": 0.22194012999534607, "learning_rate": 3.076811731615114e-05, "loss": 0.0815, "step": 27096 }, { "epoch": 0.4833053900759819, "grad_norm": 0.318523108959198, "learning_rate": 3.0766602793403134e-05, "loss": 0.1549, "step": 27097 }, { "epoch": 0.4833232261976956, "grad_norm": 0.2722551226615906, "learning_rate": 3.076508824830177e-05, "loss": 0.1445, "step": 27098 }, { "epoch": 0.48334106231940926, "grad_norm": 0.2218547761440277, "learning_rate": 3.076357368085293e-05, "loss": 0.1162, "step": 27099 }, { "epoch": 0.48335889844112295, "grad_norm": 0.26550573110580444, "learning_rate": 3.07620590910625e-05, "loss": 0.0919, "step": 27100 }, { "epoch": 0.48337673456283664, "grad_norm": 0.45003244280815125, "learning_rate": 3.076054447893633e-05, "loss": 0.2033, "step": 27101 }, { "epoch": 0.48339457068455033, "grad_norm": 0.31724047660827637, "learning_rate": 3.075902984448031e-05, "loss": 0.1198, "step": 27102 }, { "epoch": 0.48341240680626407, "grad_norm": 0.29529982805252075, "learning_rate": 3.075751518770029e-05, "loss": 0.1202, "step": 27103 }, { "epoch": 0.48343024292797776, "grad_norm": 0.2640722990036011, "learning_rate": 3.075600050860216e-05, "loss": 0.2108, "step": 27104 }, { "epoch": 0.48344807904969145, "grad_norm": 0.25682541728019714, "learning_rate": 3.075448580719178e-05, "loss": 0.1084, "step": 27105 }, { "epoch": 0.48346591517140514, "grad_norm": 0.29415833950042725, "learning_rate": 3.0752971083475025e-05, "loss": 0.1155, "step": 27106 }, { "epoch": 0.4834837512931188, "grad_norm": 0.2655586898326874, "learning_rate": 3.075145633745777e-05, "loss": 0.1648, "step": 27107 }, { "epoch": 0.4835015874148325, "grad_norm": 0.2796500325202942, "learning_rate": 3.074994156914589e-05, "loss": 0.1207, "step": 27108 }, { "epoch": 0.4835194235365462, "grad_norm": 0.32488369941711426, "learning_rate": 3.0748426778545234e-05, "loss": 0.1777, "step": 27109 }, { "epoch": 0.4835372596582599, "grad_norm": 0.24566836655139923, "learning_rate": 3.0746911965661706e-05, "loss": 0.2071, "step": 27110 }, { "epoch": 0.4835550957799736, "grad_norm": 0.22565458714962006, "learning_rate": 3.074539713050115e-05, "loss": 0.1467, "step": 27111 }, { "epoch": 0.4835729319016873, "grad_norm": 0.21871069073677063, "learning_rate": 3.0743882273069456e-05, "loss": 0.0904, "step": 27112 }, { "epoch": 0.483590768023401, "grad_norm": 0.23211175203323364, "learning_rate": 3.074236739337249e-05, "loss": 0.1908, "step": 27113 }, { "epoch": 0.4836086041451147, "grad_norm": 0.2156677544116974, "learning_rate": 3.074085249141613e-05, "loss": 0.1518, "step": 27114 }, { "epoch": 0.4836264402668284, "grad_norm": 0.273124635219574, "learning_rate": 3.073933756720624e-05, "loss": 0.1254, "step": 27115 }, { "epoch": 0.48364427638854207, "grad_norm": 0.23021823167800903, "learning_rate": 3.073782262074869e-05, "loss": 0.1362, "step": 27116 }, { "epoch": 0.48366211251025576, "grad_norm": 0.424717515707016, "learning_rate": 3.073630765204936e-05, "loss": 0.1478, "step": 27117 }, { "epoch": 0.48367994863196945, "grad_norm": 0.29039236903190613, "learning_rate": 3.073479266111412e-05, "loss": 0.1898, "step": 27118 }, { "epoch": 0.48369778475368314, "grad_norm": 0.27556338906288147, "learning_rate": 3.073327764794884e-05, "loss": 0.1304, "step": 27119 }, { "epoch": 0.4837156208753969, "grad_norm": 0.24541257321834564, "learning_rate": 3.07317626125594e-05, "loss": 0.134, "step": 27120 }, { "epoch": 0.48373345699711057, "grad_norm": 0.2787431478500366, "learning_rate": 3.0730247554951675e-05, "loss": 0.1568, "step": 27121 }, { "epoch": 0.48375129311882425, "grad_norm": 0.24350681900978088, "learning_rate": 3.072873247513152e-05, "loss": 0.135, "step": 27122 }, { "epoch": 0.48376912924053794, "grad_norm": 0.3390022814273834, "learning_rate": 3.0727217373104826e-05, "loss": 0.182, "step": 27123 }, { "epoch": 0.48378696536225163, "grad_norm": 0.3546014130115509, "learning_rate": 3.072570224887746e-05, "loss": 0.1965, "step": 27124 }, { "epoch": 0.4838048014839653, "grad_norm": 0.24808244407176971, "learning_rate": 3.0724187102455293e-05, "loss": 0.1546, "step": 27125 }, { "epoch": 0.483822637605679, "grad_norm": 0.29959049820899963, "learning_rate": 3.07226719338442e-05, "loss": 0.1469, "step": 27126 }, { "epoch": 0.4838404737273927, "grad_norm": 0.42251360416412354, "learning_rate": 3.072115674305005e-05, "loss": 0.1134, "step": 27127 }, { "epoch": 0.4838583098491064, "grad_norm": 0.24596382677555084, "learning_rate": 3.071964153007872e-05, "loss": 0.1516, "step": 27128 }, { "epoch": 0.4838761459708201, "grad_norm": 0.2794867157936096, "learning_rate": 3.071812629493609e-05, "loss": 0.1387, "step": 27129 }, { "epoch": 0.4838939820925338, "grad_norm": 0.21823757886886597, "learning_rate": 3.071661103762803e-05, "loss": 0.1244, "step": 27130 }, { "epoch": 0.4839118182142475, "grad_norm": 0.24210450053215027, "learning_rate": 3.07150957581604e-05, "loss": 0.1296, "step": 27131 }, { "epoch": 0.4839296543359612, "grad_norm": 0.1930825561285019, "learning_rate": 3.071358045653909e-05, "loss": 0.1029, "step": 27132 }, { "epoch": 0.4839474904576749, "grad_norm": 0.26027199625968933, "learning_rate": 3.071206513276997e-05, "loss": 0.1147, "step": 27133 }, { "epoch": 0.48396532657938857, "grad_norm": 0.31662362813949585, "learning_rate": 3.071054978685891e-05, "loss": 0.1256, "step": 27134 }, { "epoch": 0.48398316270110225, "grad_norm": 0.3016536831855774, "learning_rate": 3.070903441881179e-05, "loss": 0.089, "step": 27135 }, { "epoch": 0.48400099882281594, "grad_norm": 0.2770494520664215, "learning_rate": 3.070751902863448e-05, "loss": 0.1711, "step": 27136 }, { "epoch": 0.4840188349445297, "grad_norm": 0.4436330199241638, "learning_rate": 3.0706003616332853e-05, "loss": 0.1378, "step": 27137 }, { "epoch": 0.4840366710662434, "grad_norm": 0.28773120045661926, "learning_rate": 3.070448818191279e-05, "loss": 0.1641, "step": 27138 }, { "epoch": 0.48405450718795706, "grad_norm": 0.27296674251556396, "learning_rate": 3.0702972725380155e-05, "loss": 0.1238, "step": 27139 }, { "epoch": 0.48407234330967075, "grad_norm": 0.39131787419319153, "learning_rate": 3.0701457246740826e-05, "loss": 0.1644, "step": 27140 }, { "epoch": 0.48409017943138444, "grad_norm": 0.28994521498680115, "learning_rate": 3.0699941746000686e-05, "loss": 0.1657, "step": 27141 }, { "epoch": 0.4841080155530981, "grad_norm": 0.18912653625011444, "learning_rate": 3.06984262231656e-05, "loss": 0.1208, "step": 27142 }, { "epoch": 0.4841258516748118, "grad_norm": 0.19882464408874512, "learning_rate": 3.069691067824145e-05, "loss": 0.1323, "step": 27143 }, { "epoch": 0.4841436877965255, "grad_norm": 0.30060574412345886, "learning_rate": 3.06953951112341e-05, "loss": 0.1798, "step": 27144 }, { "epoch": 0.48416152391823924, "grad_norm": 0.35380762815475464, "learning_rate": 3.069387952214944e-05, "loss": 0.1753, "step": 27145 }, { "epoch": 0.48417936003995293, "grad_norm": 0.4506976306438446, "learning_rate": 3.069236391099333e-05, "loss": 0.163, "step": 27146 }, { "epoch": 0.4841971961616666, "grad_norm": 0.22866684198379517, "learning_rate": 3.069084827777165e-05, "loss": 0.1538, "step": 27147 }, { "epoch": 0.4842150322833803, "grad_norm": 0.22546716034412384, "learning_rate": 3.068933262249027e-05, "loss": 0.1268, "step": 27148 }, { "epoch": 0.484232868405094, "grad_norm": 0.2038814276456833, "learning_rate": 3.0687816945155085e-05, "loss": 0.1169, "step": 27149 }, { "epoch": 0.4842507045268077, "grad_norm": 0.2168342024087906, "learning_rate": 3.068630124577196e-05, "loss": 0.1499, "step": 27150 }, { "epoch": 0.4842685406485214, "grad_norm": 0.21687765419483185, "learning_rate": 3.0684785524346754e-05, "loss": 0.1143, "step": 27151 }, { "epoch": 0.48428637677023506, "grad_norm": 0.3289444148540497, "learning_rate": 3.068326978088536e-05, "loss": 0.1028, "step": 27152 }, { "epoch": 0.48430421289194875, "grad_norm": 0.31328248977661133, "learning_rate": 3.0681754015393654e-05, "loss": 0.1231, "step": 27153 }, { "epoch": 0.4843220490136625, "grad_norm": 0.2642780840396881, "learning_rate": 3.06802382278775e-05, "loss": 0.1128, "step": 27154 }, { "epoch": 0.4843398851353762, "grad_norm": 0.23576690256595612, "learning_rate": 3.0678722418342785e-05, "loss": 0.1419, "step": 27155 }, { "epoch": 0.48435772125708987, "grad_norm": 0.2915872037410736, "learning_rate": 3.0677206586795384e-05, "loss": 0.1783, "step": 27156 }, { "epoch": 0.48437555737880356, "grad_norm": 0.2980562746524811, "learning_rate": 3.0675690733241167e-05, "loss": 0.1425, "step": 27157 }, { "epoch": 0.48439339350051724, "grad_norm": 0.2599024474620819, "learning_rate": 3.0674174857686014e-05, "loss": 0.1416, "step": 27158 }, { "epoch": 0.48441122962223093, "grad_norm": 0.25335493683815, "learning_rate": 3.0672658960135793e-05, "loss": 0.1439, "step": 27159 }, { "epoch": 0.4844290657439446, "grad_norm": 0.2693977355957031, "learning_rate": 3.067114304059639e-05, "loss": 0.1072, "step": 27160 }, { "epoch": 0.4844469018656583, "grad_norm": 0.30277130007743835, "learning_rate": 3.066962709907367e-05, "loss": 0.1251, "step": 27161 }, { "epoch": 0.48446473798737205, "grad_norm": 0.2464374154806137, "learning_rate": 3.066811113557353e-05, "loss": 0.1484, "step": 27162 }, { "epoch": 0.48448257410908574, "grad_norm": 0.25897088646888733, "learning_rate": 3.066659515010183e-05, "loss": 0.1592, "step": 27163 }, { "epoch": 0.4845004102307994, "grad_norm": 0.22919850051403046, "learning_rate": 3.0665079142664446e-05, "loss": 0.091, "step": 27164 }, { "epoch": 0.4845182463525131, "grad_norm": 0.23086853325366974, "learning_rate": 3.0663563113267266e-05, "loss": 0.1237, "step": 27165 }, { "epoch": 0.4845360824742268, "grad_norm": 0.22809967398643494, "learning_rate": 3.066204706191616e-05, "loss": 0.1225, "step": 27166 }, { "epoch": 0.4845539185959405, "grad_norm": 0.2634361982345581, "learning_rate": 3.066053098861699e-05, "loss": 0.1446, "step": 27167 }, { "epoch": 0.4845717547176542, "grad_norm": 0.28495100140571594, "learning_rate": 3.0659014893375655e-05, "loss": 0.1178, "step": 27168 }, { "epoch": 0.48458959083936787, "grad_norm": 0.26449814438819885, "learning_rate": 3.0657498776198025e-05, "loss": 0.1571, "step": 27169 }, { "epoch": 0.48460742696108156, "grad_norm": 0.21386142075061798, "learning_rate": 3.065598263708997e-05, "loss": 0.0789, "step": 27170 }, { "epoch": 0.4846252630827953, "grad_norm": 0.3007747232913971, "learning_rate": 3.065446647605739e-05, "loss": 0.1563, "step": 27171 }, { "epoch": 0.484643099204509, "grad_norm": 0.33034080266952515, "learning_rate": 3.0652950293106125e-05, "loss": 0.1969, "step": 27172 }, { "epoch": 0.4846609353262227, "grad_norm": 0.20527958869934082, "learning_rate": 3.065143408824208e-05, "loss": 0.0939, "step": 27173 }, { "epoch": 0.48467877144793636, "grad_norm": 0.3309456408023834, "learning_rate": 3.0649917861471125e-05, "loss": 0.2742, "step": 27174 }, { "epoch": 0.48469660756965005, "grad_norm": 0.2243259698152542, "learning_rate": 3.064840161279913e-05, "loss": 0.1334, "step": 27175 }, { "epoch": 0.48471444369136374, "grad_norm": 0.21642421185970306, "learning_rate": 3.0646885342231985e-05, "loss": 0.145, "step": 27176 }, { "epoch": 0.4847322798130774, "grad_norm": 0.21992802619934082, "learning_rate": 3.064536904977557e-05, "loss": 0.1196, "step": 27177 }, { "epoch": 0.4847501159347911, "grad_norm": 0.27333512902259827, "learning_rate": 3.064385273543574e-05, "loss": 0.1283, "step": 27178 }, { "epoch": 0.48476795205650486, "grad_norm": 0.2596698999404907, "learning_rate": 3.06423363992184e-05, "loss": 0.1262, "step": 27179 }, { "epoch": 0.48478578817821855, "grad_norm": 0.25320670008659363, "learning_rate": 3.0640820041129414e-05, "loss": 0.1464, "step": 27180 }, { "epoch": 0.48480362429993223, "grad_norm": 0.31184887886047363, "learning_rate": 3.063930366117466e-05, "loss": 0.106, "step": 27181 }, { "epoch": 0.4848214604216459, "grad_norm": 0.34344539046287537, "learning_rate": 3.063778725936001e-05, "loss": 0.1753, "step": 27182 }, { "epoch": 0.4848392965433596, "grad_norm": 0.33374086022377014, "learning_rate": 3.063627083569135e-05, "loss": 0.1642, "step": 27183 }, { "epoch": 0.4848571326650733, "grad_norm": 0.3592602610588074, "learning_rate": 3.063475439017456e-05, "loss": 0.1736, "step": 27184 }, { "epoch": 0.484874968786787, "grad_norm": 0.26294299960136414, "learning_rate": 3.063323792281552e-05, "loss": 0.2282, "step": 27185 }, { "epoch": 0.4848928049085007, "grad_norm": 0.21310549974441528, "learning_rate": 3.0631721433620104e-05, "loss": 0.1253, "step": 27186 }, { "epoch": 0.4849106410302144, "grad_norm": 0.2431284487247467, "learning_rate": 3.063020492259418e-05, "loss": 0.1055, "step": 27187 }, { "epoch": 0.4849284771519281, "grad_norm": 0.28226086497306824, "learning_rate": 3.062868838974365e-05, "loss": 0.1046, "step": 27188 }, { "epoch": 0.4849463132736418, "grad_norm": 0.34102001786231995, "learning_rate": 3.062717183507437e-05, "loss": 0.1687, "step": 27189 }, { "epoch": 0.4849641493953555, "grad_norm": 0.2814805209636688, "learning_rate": 3.062565525859224e-05, "loss": 0.1272, "step": 27190 }, { "epoch": 0.48498198551706917, "grad_norm": 0.25693613290786743, "learning_rate": 3.062413866030311e-05, "loss": 0.2097, "step": 27191 }, { "epoch": 0.48499982163878286, "grad_norm": 0.30521929264068604, "learning_rate": 3.06226220402129e-05, "loss": 0.1427, "step": 27192 }, { "epoch": 0.48501765776049655, "grad_norm": 0.3133265972137451, "learning_rate": 3.0621105398327446e-05, "loss": 0.1067, "step": 27193 }, { "epoch": 0.48503549388221023, "grad_norm": 0.2955072820186615, "learning_rate": 3.0619588734652656e-05, "loss": 0.1338, "step": 27194 }, { "epoch": 0.4850533300039239, "grad_norm": 0.3286081850528717, "learning_rate": 3.0618072049194394e-05, "loss": 0.1099, "step": 27195 }, { "epoch": 0.48507116612563767, "grad_norm": 0.31009331345558167, "learning_rate": 3.061655534195854e-05, "loss": 0.1602, "step": 27196 }, { "epoch": 0.48508900224735135, "grad_norm": 0.23093637824058533, "learning_rate": 3.061503861295099e-05, "loss": 0.1186, "step": 27197 }, { "epoch": 0.48510683836906504, "grad_norm": 0.23135803639888763, "learning_rate": 3.0613521862177596e-05, "loss": 0.1703, "step": 27198 }, { "epoch": 0.48512467449077873, "grad_norm": 0.2465144395828247, "learning_rate": 3.0612005089644266e-05, "loss": 0.1279, "step": 27199 }, { "epoch": 0.4851425106124924, "grad_norm": 0.22441820800304413, "learning_rate": 3.061048829535685e-05, "loss": 0.12, "step": 27200 }, { "epoch": 0.4851603467342061, "grad_norm": 0.2618945837020874, "learning_rate": 3.0608971479321266e-05, "loss": 0.1198, "step": 27201 }, { "epoch": 0.4851781828559198, "grad_norm": 0.19926472008228302, "learning_rate": 3.060745464154335e-05, "loss": 0.1316, "step": 27202 }, { "epoch": 0.4851960189776335, "grad_norm": 0.30909961462020874, "learning_rate": 3.0605937782029015e-05, "loss": 0.1567, "step": 27203 }, { "epoch": 0.4852138550993472, "grad_norm": 0.217269629240036, "learning_rate": 3.060442090078412e-05, "loss": 0.1187, "step": 27204 }, { "epoch": 0.4852316912210609, "grad_norm": 0.29075273871421814, "learning_rate": 3.060290399781456e-05, "loss": 0.1964, "step": 27205 }, { "epoch": 0.4852495273427746, "grad_norm": 0.18378223478794098, "learning_rate": 3.060138707312622e-05, "loss": 0.0483, "step": 27206 }, { "epoch": 0.4852673634644883, "grad_norm": 0.24114476144313812, "learning_rate": 3.059987012672495e-05, "loss": 0.1442, "step": 27207 }, { "epoch": 0.485285199586202, "grad_norm": 0.3498968183994293, "learning_rate": 3.059835315861666e-05, "loss": 0.1545, "step": 27208 }, { "epoch": 0.48530303570791566, "grad_norm": 0.21598756313323975, "learning_rate": 3.059683616880721e-05, "loss": 0.1493, "step": 27209 }, { "epoch": 0.48532087182962935, "grad_norm": 0.2919982969760895, "learning_rate": 3.0595319157302496e-05, "loss": 0.1415, "step": 27210 }, { "epoch": 0.48533870795134304, "grad_norm": 0.28294768929481506, "learning_rate": 3.0593802124108396e-05, "loss": 0.1338, "step": 27211 }, { "epoch": 0.48535654407305673, "grad_norm": 0.3219447135925293, "learning_rate": 3.059228506923079e-05, "loss": 0.1693, "step": 27212 }, { "epoch": 0.48537438019477047, "grad_norm": 0.2757996618747711, "learning_rate": 3.059076799267554e-05, "loss": 0.1678, "step": 27213 }, { "epoch": 0.48539221631648416, "grad_norm": 0.28392598032951355, "learning_rate": 3.0589250894448556e-05, "loss": 0.1969, "step": 27214 }, { "epoch": 0.48541005243819785, "grad_norm": 0.26357629895210266, "learning_rate": 3.05877337745557e-05, "loss": 0.1293, "step": 27215 }, { "epoch": 0.48542788855991154, "grad_norm": 0.27374133467674255, "learning_rate": 3.058621663300286e-05, "loss": 0.1789, "step": 27216 }, { "epoch": 0.4854457246816252, "grad_norm": 0.2714899182319641, "learning_rate": 3.058469946979591e-05, "loss": 0.1853, "step": 27217 }, { "epoch": 0.4854635608033389, "grad_norm": 0.27141493558883667, "learning_rate": 3.058318228494074e-05, "loss": 0.1519, "step": 27218 }, { "epoch": 0.4854813969250526, "grad_norm": 0.3366071581840515, "learning_rate": 3.058166507844323e-05, "loss": 0.1488, "step": 27219 }, { "epoch": 0.4854992330467663, "grad_norm": 0.2763836979866028, "learning_rate": 3.058014785030925e-05, "loss": 0.1612, "step": 27220 }, { "epoch": 0.48551706916848003, "grad_norm": 0.3584231436252594, "learning_rate": 3.05786306005447e-05, "loss": 0.1502, "step": 27221 }, { "epoch": 0.4855349052901937, "grad_norm": 0.28474316000938416, "learning_rate": 3.057711332915544e-05, "loss": 0.1573, "step": 27222 }, { "epoch": 0.4855527414119074, "grad_norm": 0.2725260257720947, "learning_rate": 3.057559603614737e-05, "loss": 0.1354, "step": 27223 }, { "epoch": 0.4855705775336211, "grad_norm": 0.3297487795352936, "learning_rate": 3.057407872152636e-05, "loss": 0.1032, "step": 27224 }, { "epoch": 0.4855884136553348, "grad_norm": 0.31142011284828186, "learning_rate": 3.05725613852983e-05, "loss": 0.1947, "step": 27225 }, { "epoch": 0.48560624977704847, "grad_norm": 0.31022921204566956, "learning_rate": 3.057104402746906e-05, "loss": 0.1045, "step": 27226 }, { "epoch": 0.48562408589876216, "grad_norm": 0.4807033836841583, "learning_rate": 3.0569526648044536e-05, "loss": 0.1215, "step": 27227 }, { "epoch": 0.48564192202047585, "grad_norm": 0.22525370121002197, "learning_rate": 3.05680092470306e-05, "loss": 0.1023, "step": 27228 }, { "epoch": 0.48565975814218953, "grad_norm": 0.28735658526420593, "learning_rate": 3.056649182443314e-05, "loss": 0.1575, "step": 27229 }, { "epoch": 0.4856775942639033, "grad_norm": 0.3156735301017761, "learning_rate": 3.056497438025803e-05, "loss": 0.1394, "step": 27230 }, { "epoch": 0.48569543038561697, "grad_norm": 0.2701871395111084, "learning_rate": 3.056345691451116e-05, "loss": 0.1575, "step": 27231 }, { "epoch": 0.48571326650733065, "grad_norm": 0.2798452377319336, "learning_rate": 3.0561939427198414e-05, "loss": 0.1458, "step": 27232 }, { "epoch": 0.48573110262904434, "grad_norm": 0.2689918279647827, "learning_rate": 3.056042191832567e-05, "loss": 0.1799, "step": 27233 }, { "epoch": 0.48574893875075803, "grad_norm": 0.2624211013317108, "learning_rate": 3.05589043878988e-05, "loss": 0.1306, "step": 27234 }, { "epoch": 0.4857667748724717, "grad_norm": 0.4467747211456299, "learning_rate": 3.05573868359237e-05, "loss": 0.1993, "step": 27235 }, { "epoch": 0.4857846109941854, "grad_norm": 0.2985488772392273, "learning_rate": 3.055586926240626e-05, "loss": 0.1735, "step": 27236 }, { "epoch": 0.4858024471158991, "grad_norm": 0.33233118057250977, "learning_rate": 3.0554351667352335e-05, "loss": 0.2119, "step": 27237 }, { "epoch": 0.48582028323761284, "grad_norm": 0.25685766339302063, "learning_rate": 3.055283405076784e-05, "loss": 0.0981, "step": 27238 }, { "epoch": 0.4858381193593265, "grad_norm": 0.24434791505336761, "learning_rate": 3.0551316412658626e-05, "loss": 0.1352, "step": 27239 }, { "epoch": 0.4858559554810402, "grad_norm": 0.2511981129646301, "learning_rate": 3.0549798753030604e-05, "loss": 0.1355, "step": 27240 }, { "epoch": 0.4858737916027539, "grad_norm": 0.41593602299690247, "learning_rate": 3.054828107188964e-05, "loss": 0.1864, "step": 27241 }, { "epoch": 0.4858916277244676, "grad_norm": 0.24475279450416565, "learning_rate": 3.0546763369241634e-05, "loss": 0.1515, "step": 27242 }, { "epoch": 0.4859094638461813, "grad_norm": 0.25201985239982605, "learning_rate": 3.054524564509244e-05, "loss": 0.1166, "step": 27243 }, { "epoch": 0.48592729996789497, "grad_norm": 0.3088558614253998, "learning_rate": 3.054372789944797e-05, "loss": 0.157, "step": 27244 }, { "epoch": 0.48594513608960865, "grad_norm": 0.3116265833377838, "learning_rate": 3.054221013231409e-05, "loss": 0.1684, "step": 27245 }, { "epoch": 0.4859629722113224, "grad_norm": 0.2777627110481262, "learning_rate": 3.054069234369669e-05, "loss": 0.1592, "step": 27246 }, { "epoch": 0.4859808083330361, "grad_norm": 0.3016069531440735, "learning_rate": 3.053917453360166e-05, "loss": 0.1531, "step": 27247 }, { "epoch": 0.4859986444547498, "grad_norm": 0.1716867983341217, "learning_rate": 3.053765670203487e-05, "loss": 0.116, "step": 27248 }, { "epoch": 0.48601648057646346, "grad_norm": 0.22268109023571014, "learning_rate": 3.0536138849002215e-05, "loss": 0.1412, "step": 27249 }, { "epoch": 0.48603431669817715, "grad_norm": 0.37913307547569275, "learning_rate": 3.053462097450956e-05, "loss": 0.1293, "step": 27250 }, { "epoch": 0.48605215281989084, "grad_norm": 0.27252355217933655, "learning_rate": 3.0533103078562816e-05, "loss": 0.1845, "step": 27251 }, { "epoch": 0.4860699889416045, "grad_norm": 0.2057403028011322, "learning_rate": 3.053158516116785e-05, "loss": 0.1379, "step": 27252 }, { "epoch": 0.4860878250633182, "grad_norm": 0.3789529800415039, "learning_rate": 3.053006722233055e-05, "loss": 0.2164, "step": 27253 }, { "epoch": 0.4861056611850319, "grad_norm": 0.25591522455215454, "learning_rate": 3.05285492620568e-05, "loss": 0.1123, "step": 27254 }, { "epoch": 0.48612349730674564, "grad_norm": 0.2654406726360321, "learning_rate": 3.052703128035248e-05, "loss": 0.1261, "step": 27255 }, { "epoch": 0.48614133342845933, "grad_norm": 0.26605573296546936, "learning_rate": 3.052551327722348e-05, "loss": 0.1655, "step": 27256 }, { "epoch": 0.486159169550173, "grad_norm": 0.2640789747238159, "learning_rate": 3.052399525267569e-05, "loss": 0.1628, "step": 27257 }, { "epoch": 0.4861770056718867, "grad_norm": 0.22468449175357819, "learning_rate": 3.052247720671497e-05, "loss": 0.1051, "step": 27258 }, { "epoch": 0.4861948417936004, "grad_norm": 0.21288982033729553, "learning_rate": 3.052095913934723e-05, "loss": 0.1165, "step": 27259 }, { "epoch": 0.4862126779153141, "grad_norm": 0.25478121638298035, "learning_rate": 3.0519441050578346e-05, "loss": 0.1855, "step": 27260 }, { "epoch": 0.4862305140370278, "grad_norm": 0.2562662661075592, "learning_rate": 3.05179229404142e-05, "loss": 0.132, "step": 27261 }, { "epoch": 0.48624835015874146, "grad_norm": 0.19263970851898193, "learning_rate": 3.051640480886068e-05, "loss": 0.1076, "step": 27262 }, { "epoch": 0.4862661862804552, "grad_norm": 0.29618728160858154, "learning_rate": 3.051488665592367e-05, "loss": 0.1597, "step": 27263 }, { "epoch": 0.4862840224021689, "grad_norm": 0.23893561959266663, "learning_rate": 3.051336848160906e-05, "loss": 0.1426, "step": 27264 }, { "epoch": 0.4863018585238826, "grad_norm": 0.32210400700569153, "learning_rate": 3.0511850285922715e-05, "loss": 0.1889, "step": 27265 }, { "epoch": 0.48631969464559627, "grad_norm": 0.22814425826072693, "learning_rate": 3.0510332068870544e-05, "loss": 0.1193, "step": 27266 }, { "epoch": 0.48633753076730996, "grad_norm": 0.3275805413722992, "learning_rate": 3.050881383045842e-05, "loss": 0.1625, "step": 27267 }, { "epoch": 0.48635536688902364, "grad_norm": 0.23945313692092896, "learning_rate": 3.050729557069224e-05, "loss": 0.1415, "step": 27268 }, { "epoch": 0.48637320301073733, "grad_norm": 0.26737454533576965, "learning_rate": 3.050577728957787e-05, "loss": 0.1628, "step": 27269 }, { "epoch": 0.486391039132451, "grad_norm": 0.2799249589443207, "learning_rate": 3.050425898712121e-05, "loss": 0.1253, "step": 27270 }, { "epoch": 0.4864088752541647, "grad_norm": 0.22880299389362335, "learning_rate": 3.0502740663328138e-05, "loss": 0.1357, "step": 27271 }, { "epoch": 0.48642671137587845, "grad_norm": 0.3262365758419037, "learning_rate": 3.050122231820454e-05, "loss": 0.1635, "step": 27272 }, { "epoch": 0.48644454749759214, "grad_norm": 0.21146617829799652, "learning_rate": 3.0499703951756313e-05, "loss": 0.1373, "step": 27273 }, { "epoch": 0.4864623836193058, "grad_norm": 0.25496119260787964, "learning_rate": 3.049818556398933e-05, "loss": 0.1046, "step": 27274 }, { "epoch": 0.4864802197410195, "grad_norm": 0.2394377738237381, "learning_rate": 3.049666715490948e-05, "loss": 0.1295, "step": 27275 }, { "epoch": 0.4864980558627332, "grad_norm": 0.26413217186927795, "learning_rate": 3.049514872452265e-05, "loss": 0.1242, "step": 27276 }, { "epoch": 0.4865158919844469, "grad_norm": 0.24838456511497498, "learning_rate": 3.0493630272834728e-05, "loss": 0.1334, "step": 27277 }, { "epoch": 0.4865337281061606, "grad_norm": 0.29534652829170227, "learning_rate": 3.049211179985159e-05, "loss": 0.1522, "step": 27278 }, { "epoch": 0.48655156422787427, "grad_norm": 0.2617117464542389, "learning_rate": 3.0490593305579136e-05, "loss": 0.101, "step": 27279 }, { "epoch": 0.486569400349588, "grad_norm": 0.310689240694046, "learning_rate": 3.0489074790023244e-05, "loss": 0.1596, "step": 27280 }, { "epoch": 0.4865872364713017, "grad_norm": 0.30059751868247986, "learning_rate": 3.0487556253189802e-05, "loss": 0.1624, "step": 27281 }, { "epoch": 0.4866050725930154, "grad_norm": 0.2293773740530014, "learning_rate": 3.0486037695084697e-05, "loss": 0.1444, "step": 27282 }, { "epoch": 0.4866229087147291, "grad_norm": 0.38112732768058777, "learning_rate": 3.0484519115713816e-05, "loss": 0.1805, "step": 27283 }, { "epoch": 0.48664074483644276, "grad_norm": 0.23375114798545837, "learning_rate": 3.048300051508305e-05, "loss": 0.1461, "step": 27284 }, { "epoch": 0.48665858095815645, "grad_norm": 0.3298053443431854, "learning_rate": 3.0481481893198273e-05, "loss": 0.1842, "step": 27285 }, { "epoch": 0.48667641707987014, "grad_norm": 0.24933935701847076, "learning_rate": 3.0479963250065378e-05, "loss": 0.1386, "step": 27286 }, { "epoch": 0.4866942532015838, "grad_norm": 0.3003977835178375, "learning_rate": 3.0478444585690252e-05, "loss": 0.1364, "step": 27287 }, { "epoch": 0.48671208932329757, "grad_norm": 0.30580267310142517, "learning_rate": 3.047692590007879e-05, "loss": 0.0891, "step": 27288 }, { "epoch": 0.48672992544501126, "grad_norm": 0.2590245306491852, "learning_rate": 3.0475407193236864e-05, "loss": 0.156, "step": 27289 }, { "epoch": 0.48674776156672495, "grad_norm": 0.2779429256916046, "learning_rate": 3.0473888465170376e-05, "loss": 0.1801, "step": 27290 }, { "epoch": 0.48676559768843863, "grad_norm": 0.2095423936843872, "learning_rate": 3.04723697158852e-05, "loss": 0.1498, "step": 27291 }, { "epoch": 0.4867834338101523, "grad_norm": 0.25905272364616394, "learning_rate": 3.047085094538723e-05, "loss": 0.1397, "step": 27292 }, { "epoch": 0.486801269931866, "grad_norm": 0.23495237529277802, "learning_rate": 3.0469332153682352e-05, "loss": 0.1353, "step": 27293 }, { "epoch": 0.4868191060535797, "grad_norm": 0.2629874348640442, "learning_rate": 3.046781334077645e-05, "loss": 0.1354, "step": 27294 }, { "epoch": 0.4868369421752934, "grad_norm": 0.2586022615432739, "learning_rate": 3.0466294506675417e-05, "loss": 0.1446, "step": 27295 }, { "epoch": 0.4868547782970071, "grad_norm": 0.30887869000434875, "learning_rate": 3.0464775651385147e-05, "loss": 0.119, "step": 27296 }, { "epoch": 0.4868726144187208, "grad_norm": 0.42592737078666687, "learning_rate": 3.0463256774911514e-05, "loss": 0.1737, "step": 27297 }, { "epoch": 0.4868904505404345, "grad_norm": 0.23727940022945404, "learning_rate": 3.046173787726041e-05, "loss": 0.1334, "step": 27298 }, { "epoch": 0.4869082866621482, "grad_norm": 0.2822306454181671, "learning_rate": 3.046021895843772e-05, "loss": 0.1536, "step": 27299 }, { "epoch": 0.4869261227838619, "grad_norm": 0.3753502666950226, "learning_rate": 3.0458700018449337e-05, "loss": 0.1118, "step": 27300 }, { "epoch": 0.48694395890557557, "grad_norm": 0.22475706040859222, "learning_rate": 3.0457181057301154e-05, "loss": 0.1106, "step": 27301 }, { "epoch": 0.48696179502728926, "grad_norm": 0.31406259536743164, "learning_rate": 3.045566207499904e-05, "loss": 0.1984, "step": 27302 }, { "epoch": 0.48697963114900295, "grad_norm": 0.2767631411552429, "learning_rate": 3.0454143071548908e-05, "loss": 0.1253, "step": 27303 }, { "epoch": 0.48699746727071663, "grad_norm": 0.2754286527633667, "learning_rate": 3.0452624046956623e-05, "loss": 0.1483, "step": 27304 }, { "epoch": 0.4870153033924304, "grad_norm": 0.36240652203559875, "learning_rate": 3.0451105001228097e-05, "loss": 0.1468, "step": 27305 }, { "epoch": 0.48703313951414406, "grad_norm": 0.2550050616264343, "learning_rate": 3.0449585934369196e-05, "loss": 0.1389, "step": 27306 }, { "epoch": 0.48705097563585775, "grad_norm": 0.20303471386432648, "learning_rate": 3.0448066846385815e-05, "loss": 0.1174, "step": 27307 }, { "epoch": 0.48706881175757144, "grad_norm": 0.4444245994091034, "learning_rate": 3.044654773728385e-05, "loss": 0.1363, "step": 27308 }, { "epoch": 0.48708664787928513, "grad_norm": 0.2130480259656906, "learning_rate": 3.0445028607069188e-05, "loss": 0.1239, "step": 27309 }, { "epoch": 0.4871044840009988, "grad_norm": 0.2550525665283203, "learning_rate": 3.0443509455747706e-05, "loss": 0.1328, "step": 27310 }, { "epoch": 0.4871223201227125, "grad_norm": 0.2371637523174286, "learning_rate": 3.0441990283325304e-05, "loss": 0.1323, "step": 27311 }, { "epoch": 0.4871401562444262, "grad_norm": 0.26530876755714417, "learning_rate": 3.0440471089807876e-05, "loss": 0.148, "step": 27312 }, { "epoch": 0.4871579923661399, "grad_norm": 0.2489183098077774, "learning_rate": 3.0438951875201293e-05, "loss": 0.1367, "step": 27313 }, { "epoch": 0.4871758284878536, "grad_norm": 0.32706326246261597, "learning_rate": 3.043743263951146e-05, "loss": 0.1312, "step": 27314 }, { "epoch": 0.4871936646095673, "grad_norm": 0.3285485506057739, "learning_rate": 3.043591338274425e-05, "loss": 0.1337, "step": 27315 }, { "epoch": 0.487211500731281, "grad_norm": 0.2609952390193939, "learning_rate": 3.0434394104905577e-05, "loss": 0.1695, "step": 27316 }, { "epoch": 0.4872293368529947, "grad_norm": 0.22378557920455933, "learning_rate": 3.043287480600131e-05, "loss": 0.1744, "step": 27317 }, { "epoch": 0.4872471729747084, "grad_norm": 0.2997693717479706, "learning_rate": 3.0431355486037343e-05, "loss": 0.1289, "step": 27318 }, { "epoch": 0.48726500909642206, "grad_norm": 0.39024093747138977, "learning_rate": 3.0429836145019562e-05, "loss": 0.2342, "step": 27319 }, { "epoch": 0.48728284521813575, "grad_norm": 0.22317062318325043, "learning_rate": 3.0428316782953863e-05, "loss": 0.1395, "step": 27320 }, { "epoch": 0.48730068133984944, "grad_norm": 0.24340015649795532, "learning_rate": 3.0426797399846134e-05, "loss": 0.1506, "step": 27321 }, { "epoch": 0.4873185174615632, "grad_norm": 0.34011828899383545, "learning_rate": 3.0425277995702268e-05, "loss": 0.117, "step": 27322 }, { "epoch": 0.48733635358327687, "grad_norm": 0.333850622177124, "learning_rate": 3.0423758570528142e-05, "loss": 0.0828, "step": 27323 }, { "epoch": 0.48735418970499056, "grad_norm": 0.34431585669517517, "learning_rate": 3.0422239124329666e-05, "loss": 0.0981, "step": 27324 }, { "epoch": 0.48737202582670425, "grad_norm": 0.634473979473114, "learning_rate": 3.0420719657112718e-05, "loss": 0.1293, "step": 27325 }, { "epoch": 0.48738986194841794, "grad_norm": 0.2658355236053467, "learning_rate": 3.0419200168883176e-05, "loss": 0.1194, "step": 27326 }, { "epoch": 0.4874076980701316, "grad_norm": 0.2890503704547882, "learning_rate": 3.0417680659646946e-05, "loss": 0.1215, "step": 27327 }, { "epoch": 0.4874255341918453, "grad_norm": 0.305399626493454, "learning_rate": 3.0416161129409916e-05, "loss": 0.1407, "step": 27328 }, { "epoch": 0.487443370313559, "grad_norm": 0.23517367243766785, "learning_rate": 3.0414641578177982e-05, "loss": 0.1261, "step": 27329 }, { "epoch": 0.4874612064352727, "grad_norm": 0.2857036888599396, "learning_rate": 3.041312200595702e-05, "loss": 0.1355, "step": 27330 }, { "epoch": 0.48747904255698643, "grad_norm": 0.24496567249298096, "learning_rate": 3.0411602412752925e-05, "loss": 0.1594, "step": 27331 }, { "epoch": 0.4874968786787001, "grad_norm": 0.3168342709541321, "learning_rate": 3.0410082798571593e-05, "loss": 0.1339, "step": 27332 }, { "epoch": 0.4875147148004138, "grad_norm": 0.23017621040344238, "learning_rate": 3.0408563163418918e-05, "loss": 0.1385, "step": 27333 }, { "epoch": 0.4875325509221275, "grad_norm": 0.2749054729938507, "learning_rate": 3.0407043507300775e-05, "loss": 0.1886, "step": 27334 }, { "epoch": 0.4875503870438412, "grad_norm": 0.16834156215190887, "learning_rate": 3.0405523830223065e-05, "loss": 0.0992, "step": 27335 }, { "epoch": 0.48756822316555487, "grad_norm": 0.22413797676563263, "learning_rate": 3.040400413219168e-05, "loss": 0.14, "step": 27336 }, { "epoch": 0.48758605928726856, "grad_norm": 0.31531214714050293, "learning_rate": 3.0402484413212513e-05, "loss": 0.1844, "step": 27337 }, { "epoch": 0.48760389540898225, "grad_norm": 0.3473220467567444, "learning_rate": 3.0400964673291444e-05, "loss": 0.1437, "step": 27338 }, { "epoch": 0.487621731530696, "grad_norm": 0.2839626967906952, "learning_rate": 3.0399444912434373e-05, "loss": 0.1666, "step": 27339 }, { "epoch": 0.4876395676524097, "grad_norm": 0.23532170057296753, "learning_rate": 3.0397925130647186e-05, "loss": 0.1724, "step": 27340 }, { "epoch": 0.48765740377412337, "grad_norm": 0.2965671420097351, "learning_rate": 3.0396405327935778e-05, "loss": 0.1184, "step": 27341 }, { "epoch": 0.48767523989583705, "grad_norm": 0.2658535838127136, "learning_rate": 3.0394885504306037e-05, "loss": 0.2124, "step": 27342 }, { "epoch": 0.48769307601755074, "grad_norm": 0.21778833866119385, "learning_rate": 3.0393365659763863e-05, "loss": 0.1314, "step": 27343 }, { "epoch": 0.48771091213926443, "grad_norm": 0.2593882977962494, "learning_rate": 3.0391845794315137e-05, "loss": 0.1275, "step": 27344 }, { "epoch": 0.4877287482609781, "grad_norm": 0.21858806908130646, "learning_rate": 3.039032590796575e-05, "loss": 0.1136, "step": 27345 }, { "epoch": 0.4877465843826918, "grad_norm": 0.37158480286598206, "learning_rate": 3.038880600072161e-05, "loss": 0.1292, "step": 27346 }, { "epoch": 0.48776442050440555, "grad_norm": 0.3079637885093689, "learning_rate": 3.0387286072588584e-05, "loss": 0.1451, "step": 27347 }, { "epoch": 0.48778225662611924, "grad_norm": 0.3274306356906891, "learning_rate": 3.038576612357258e-05, "loss": 0.1498, "step": 27348 }, { "epoch": 0.4878000927478329, "grad_norm": 0.3026948571205139, "learning_rate": 3.0384246153679487e-05, "loss": 0.11, "step": 27349 }, { "epoch": 0.4878179288695466, "grad_norm": 0.3100977838039398, "learning_rate": 3.0382726162915197e-05, "loss": 0.1491, "step": 27350 }, { "epoch": 0.4878357649912603, "grad_norm": 0.22274494171142578, "learning_rate": 3.0381206151285607e-05, "loss": 0.1368, "step": 27351 }, { "epoch": 0.487853601112974, "grad_norm": 0.20299595594406128, "learning_rate": 3.0379686118796596e-05, "loss": 0.2092, "step": 27352 }, { "epoch": 0.4878714372346877, "grad_norm": 0.2284993678331375, "learning_rate": 3.0378166065454068e-05, "loss": 0.1335, "step": 27353 }, { "epoch": 0.48788927335640137, "grad_norm": 0.264803409576416, "learning_rate": 3.0376645991263908e-05, "loss": 0.0985, "step": 27354 }, { "epoch": 0.48790710947811505, "grad_norm": 0.22501897811889648, "learning_rate": 3.037512589623201e-05, "loss": 0.0989, "step": 27355 }, { "epoch": 0.4879249455998288, "grad_norm": 0.2307986468076706, "learning_rate": 3.037360578036426e-05, "loss": 0.1323, "step": 27356 }, { "epoch": 0.4879427817215425, "grad_norm": 0.33018019795417786, "learning_rate": 3.0372085643666577e-05, "loss": 0.137, "step": 27357 }, { "epoch": 0.4879606178432562, "grad_norm": 0.2736720144748688, "learning_rate": 3.037056548614482e-05, "loss": 0.1962, "step": 27358 }, { "epoch": 0.48797845396496986, "grad_norm": 0.25060150027275085, "learning_rate": 3.03690453078049e-05, "loss": 0.1238, "step": 27359 }, { "epoch": 0.48799629008668355, "grad_norm": 0.26136261224746704, "learning_rate": 3.0367525108652706e-05, "loss": 0.1641, "step": 27360 }, { "epoch": 0.48801412620839724, "grad_norm": 0.24233616888523102, "learning_rate": 3.0366004888694132e-05, "loss": 0.1297, "step": 27361 }, { "epoch": 0.4880319623301109, "grad_norm": 0.2826780378818512, "learning_rate": 3.036448464793507e-05, "loss": 0.1441, "step": 27362 }, { "epoch": 0.4880497984518246, "grad_norm": 0.24363841116428375, "learning_rate": 3.036296438638141e-05, "loss": 0.1993, "step": 27363 }, { "epoch": 0.48806763457353836, "grad_norm": 0.22785243391990662, "learning_rate": 3.0361444104039055e-05, "loss": 0.0982, "step": 27364 }, { "epoch": 0.48808547069525204, "grad_norm": 0.21507996320724487, "learning_rate": 3.035992380091388e-05, "loss": 0.1331, "step": 27365 }, { "epoch": 0.48810330681696573, "grad_norm": 0.19429700076580048, "learning_rate": 3.0358403477011797e-05, "loss": 0.1262, "step": 27366 }, { "epoch": 0.4881211429386794, "grad_norm": 0.2522944211959839, "learning_rate": 3.0356883132338687e-05, "loss": 0.1772, "step": 27367 }, { "epoch": 0.4881389790603931, "grad_norm": 0.2989320755004883, "learning_rate": 3.0355362766900453e-05, "loss": 0.1165, "step": 27368 }, { "epoch": 0.4881568151821068, "grad_norm": 0.3227361738681793, "learning_rate": 3.0353842380702975e-05, "loss": 0.0887, "step": 27369 }, { "epoch": 0.4881746513038205, "grad_norm": 0.4426630437374115, "learning_rate": 3.035232197375216e-05, "loss": 0.1368, "step": 27370 }, { "epoch": 0.48819248742553417, "grad_norm": 0.21288444101810455, "learning_rate": 3.0350801546053898e-05, "loss": 0.0839, "step": 27371 }, { "epoch": 0.48821032354724786, "grad_norm": 0.25482863187789917, "learning_rate": 3.0349281097614078e-05, "loss": 0.1225, "step": 27372 }, { "epoch": 0.4882281596689616, "grad_norm": 0.2689240872859955, "learning_rate": 3.0347760628438597e-05, "loss": 0.1689, "step": 27373 }, { "epoch": 0.4882459957906753, "grad_norm": 0.25223878026008606, "learning_rate": 3.0346240138533354e-05, "loss": 0.1649, "step": 27374 }, { "epoch": 0.488263831912389, "grad_norm": 0.2771747410297394, "learning_rate": 3.034471962790423e-05, "loss": 0.1555, "step": 27375 }, { "epoch": 0.48828166803410267, "grad_norm": 0.2560770511627197, "learning_rate": 3.034319909655713e-05, "loss": 0.1375, "step": 27376 }, { "epoch": 0.48829950415581636, "grad_norm": 0.2753375172615051, "learning_rate": 3.0341678544497947e-05, "loss": 0.1653, "step": 27377 }, { "epoch": 0.48831734027753004, "grad_norm": 0.2662998139858246, "learning_rate": 3.034015797173257e-05, "loss": 0.1163, "step": 27378 }, { "epoch": 0.48833517639924373, "grad_norm": 0.3011186122894287, "learning_rate": 3.03386373782669e-05, "loss": 0.193, "step": 27379 }, { "epoch": 0.4883530125209574, "grad_norm": 0.3543972969055176, "learning_rate": 3.033711676410682e-05, "loss": 0.1501, "step": 27380 }, { "epoch": 0.48837084864267116, "grad_norm": 0.28629258275032043, "learning_rate": 3.033559612925824e-05, "loss": 0.107, "step": 27381 }, { "epoch": 0.48838868476438485, "grad_norm": 0.3023747205734253, "learning_rate": 3.033407547372704e-05, "loss": 0.1242, "step": 27382 }, { "epoch": 0.48840652088609854, "grad_norm": 0.20361687242984772, "learning_rate": 3.0332554797519124e-05, "loss": 0.1347, "step": 27383 }, { "epoch": 0.4884243570078122, "grad_norm": 0.24906346201896667, "learning_rate": 3.0331034100640383e-05, "loss": 0.1759, "step": 27384 }, { "epoch": 0.4884421931295259, "grad_norm": 0.23788444697856903, "learning_rate": 3.032951338309672e-05, "loss": 0.1474, "step": 27385 }, { "epoch": 0.4884600292512396, "grad_norm": 0.18916882574558258, "learning_rate": 3.032799264489401e-05, "loss": 0.1335, "step": 27386 }, { "epoch": 0.4884778653729533, "grad_norm": 0.22644850611686707, "learning_rate": 3.032647188603817e-05, "loss": 0.1471, "step": 27387 }, { "epoch": 0.488495701494667, "grad_norm": 0.35181525349617004, "learning_rate": 3.0324951106535082e-05, "loss": 0.1971, "step": 27388 }, { "epoch": 0.4885135376163807, "grad_norm": 0.24308420717716217, "learning_rate": 3.0323430306390642e-05, "loss": 0.0956, "step": 27389 }, { "epoch": 0.4885313737380944, "grad_norm": 0.36524635553359985, "learning_rate": 3.032190948561075e-05, "loss": 0.1359, "step": 27390 }, { "epoch": 0.4885492098598081, "grad_norm": 0.2894512414932251, "learning_rate": 3.03203886442013e-05, "loss": 0.1718, "step": 27391 }, { "epoch": 0.4885670459815218, "grad_norm": 0.262677937746048, "learning_rate": 3.0318867782168186e-05, "loss": 0.1546, "step": 27392 }, { "epoch": 0.4885848821032355, "grad_norm": 0.31588712334632874, "learning_rate": 3.0317346899517295e-05, "loss": 0.1508, "step": 27393 }, { "epoch": 0.48860271822494916, "grad_norm": 0.2818751633167267, "learning_rate": 3.0315825996254542e-05, "loss": 0.1521, "step": 27394 }, { "epoch": 0.48862055434666285, "grad_norm": 0.2366069108247757, "learning_rate": 3.031430507238581e-05, "loss": 0.0787, "step": 27395 }, { "epoch": 0.48863839046837654, "grad_norm": 0.3727762997150421, "learning_rate": 3.0312784127916993e-05, "loss": 0.1801, "step": 27396 }, { "epoch": 0.4886562265900902, "grad_norm": 0.29055285453796387, "learning_rate": 3.031126316285398e-05, "loss": 0.1692, "step": 27397 }, { "epoch": 0.48867406271180397, "grad_norm": 0.2740151882171631, "learning_rate": 3.0309742177202695e-05, "loss": 0.1494, "step": 27398 }, { "epoch": 0.48869189883351766, "grad_norm": 0.2600267231464386, "learning_rate": 3.0308221170969002e-05, "loss": 0.1669, "step": 27399 }, { "epoch": 0.48870973495523135, "grad_norm": 0.2821851670742035, "learning_rate": 3.0306700144158817e-05, "loss": 0.1363, "step": 27400 }, { "epoch": 0.48872757107694503, "grad_norm": 0.3379646837711334, "learning_rate": 3.0305179096778026e-05, "loss": 0.1555, "step": 27401 }, { "epoch": 0.4887454071986587, "grad_norm": 0.25313127040863037, "learning_rate": 3.030365802883253e-05, "loss": 0.143, "step": 27402 }, { "epoch": 0.4887632433203724, "grad_norm": 0.21434620022773743, "learning_rate": 3.0302136940328223e-05, "loss": 0.1444, "step": 27403 }, { "epoch": 0.4887810794420861, "grad_norm": 0.23814326524734497, "learning_rate": 3.0300615831271e-05, "loss": 0.1035, "step": 27404 }, { "epoch": 0.4887989155637998, "grad_norm": 0.3455928564071655, "learning_rate": 3.0299094701666765e-05, "loss": 0.1652, "step": 27405 }, { "epoch": 0.48881675168551353, "grad_norm": 0.26397237181663513, "learning_rate": 3.0297573551521406e-05, "loss": 0.1655, "step": 27406 }, { "epoch": 0.4888345878072272, "grad_norm": 0.29323145747184753, "learning_rate": 3.0296052380840824e-05, "loss": 0.1346, "step": 27407 }, { "epoch": 0.4888524239289409, "grad_norm": 0.2768794894218445, "learning_rate": 3.0294531189630908e-05, "loss": 0.1491, "step": 27408 }, { "epoch": 0.4888702600506546, "grad_norm": 0.22584381699562073, "learning_rate": 3.029300997789757e-05, "loss": 0.151, "step": 27409 }, { "epoch": 0.4888880961723683, "grad_norm": 0.25385189056396484, "learning_rate": 3.0291488745646685e-05, "loss": 0.17, "step": 27410 }, { "epoch": 0.48890593229408197, "grad_norm": 0.2069319486618042, "learning_rate": 3.028996749288417e-05, "loss": 0.1355, "step": 27411 }, { "epoch": 0.48892376841579566, "grad_norm": 0.24361161887645721, "learning_rate": 3.0288446219615906e-05, "loss": 0.1375, "step": 27412 }, { "epoch": 0.48894160453750934, "grad_norm": 0.20962321758270264, "learning_rate": 3.028692492584781e-05, "loss": 0.1016, "step": 27413 }, { "epoch": 0.48895944065922303, "grad_norm": 0.29396456480026245, "learning_rate": 3.0285403611585755e-05, "loss": 0.1891, "step": 27414 }, { "epoch": 0.4889772767809368, "grad_norm": 0.2472020983695984, "learning_rate": 3.0283882276835664e-05, "loss": 0.1157, "step": 27415 }, { "epoch": 0.48899511290265046, "grad_norm": 0.22812482714653015, "learning_rate": 3.0282360921603414e-05, "loss": 0.1622, "step": 27416 }, { "epoch": 0.48901294902436415, "grad_norm": 0.31664395332336426, "learning_rate": 3.0280839545894903e-05, "loss": 0.1012, "step": 27417 }, { "epoch": 0.48903078514607784, "grad_norm": 0.2688933312892914, "learning_rate": 3.0279318149716034e-05, "loss": 0.1178, "step": 27418 }, { "epoch": 0.48904862126779153, "grad_norm": 0.23485180735588074, "learning_rate": 3.0277796733072707e-05, "loss": 0.07, "step": 27419 }, { "epoch": 0.4890664573895052, "grad_norm": 0.26901087164878845, "learning_rate": 3.0276275295970823e-05, "loss": 0.1517, "step": 27420 }, { "epoch": 0.4890842935112189, "grad_norm": 0.15468831360340118, "learning_rate": 3.0274753838416266e-05, "loss": 0.0828, "step": 27421 }, { "epoch": 0.4891021296329326, "grad_norm": 0.2339048832654953, "learning_rate": 3.027323236041495e-05, "loss": 0.1433, "step": 27422 }, { "epoch": 0.48911996575464634, "grad_norm": 0.2595570981502533, "learning_rate": 3.0271710861972753e-05, "loss": 0.1971, "step": 27423 }, { "epoch": 0.48913780187636, "grad_norm": 0.26092326641082764, "learning_rate": 3.0270189343095585e-05, "loss": 0.1634, "step": 27424 }, { "epoch": 0.4891556379980737, "grad_norm": 0.29813894629478455, "learning_rate": 3.0268667803789347e-05, "loss": 0.2104, "step": 27425 }, { "epoch": 0.4891734741197874, "grad_norm": 0.3102382719516754, "learning_rate": 3.0267146244059936e-05, "loss": 0.115, "step": 27426 }, { "epoch": 0.4891913102415011, "grad_norm": 0.2704358398914337, "learning_rate": 3.026562466391324e-05, "loss": 0.1026, "step": 27427 }, { "epoch": 0.4892091463632148, "grad_norm": 0.2531728148460388, "learning_rate": 3.026410306335517e-05, "loss": 0.133, "step": 27428 }, { "epoch": 0.48922698248492846, "grad_norm": 0.29078641533851624, "learning_rate": 3.026258144239162e-05, "loss": 0.1399, "step": 27429 }, { "epoch": 0.48924481860664215, "grad_norm": 0.23377011716365814, "learning_rate": 3.026105980102848e-05, "loss": 0.1403, "step": 27430 }, { "epoch": 0.48926265472835584, "grad_norm": 0.22940313816070557, "learning_rate": 3.0259538139271652e-05, "loss": 0.1281, "step": 27431 }, { "epoch": 0.4892804908500696, "grad_norm": 0.47694000601768494, "learning_rate": 3.0258016457127047e-05, "loss": 0.1888, "step": 27432 }, { "epoch": 0.48929832697178327, "grad_norm": 0.3156335949897766, "learning_rate": 3.025649475460055e-05, "loss": 0.1128, "step": 27433 }, { "epoch": 0.48931616309349696, "grad_norm": 0.2901115119457245, "learning_rate": 3.0254973031698064e-05, "loss": 0.1204, "step": 27434 }, { "epoch": 0.48933399921521065, "grad_norm": 0.2870045304298401, "learning_rate": 3.0253451288425493e-05, "loss": 0.1467, "step": 27435 }, { "epoch": 0.48935183533692433, "grad_norm": 0.39747709035873413, "learning_rate": 3.025192952478872e-05, "loss": 0.2356, "step": 27436 }, { "epoch": 0.489369671458638, "grad_norm": 0.33862990140914917, "learning_rate": 3.0250407740793664e-05, "loss": 0.1443, "step": 27437 }, { "epoch": 0.4893875075803517, "grad_norm": 0.310565322637558, "learning_rate": 3.024888593644621e-05, "loss": 0.1044, "step": 27438 }, { "epoch": 0.4894053437020654, "grad_norm": 0.2805270850658417, "learning_rate": 3.0247364111752258e-05, "loss": 0.193, "step": 27439 }, { "epoch": 0.48942317982377914, "grad_norm": 0.20183080434799194, "learning_rate": 3.024584226671771e-05, "loss": 0.1507, "step": 27440 }, { "epoch": 0.48944101594549283, "grad_norm": 0.30187568068504333, "learning_rate": 3.024432040134847e-05, "loss": 0.1669, "step": 27441 }, { "epoch": 0.4894588520672065, "grad_norm": 0.2592521905899048, "learning_rate": 3.0242798515650435e-05, "loss": 0.1926, "step": 27442 }, { "epoch": 0.4894766881889202, "grad_norm": 0.29906854033470154, "learning_rate": 3.02412766096295e-05, "loss": 0.1631, "step": 27443 }, { "epoch": 0.4894945243106339, "grad_norm": 0.21281097829341888, "learning_rate": 3.0239754683291567e-05, "loss": 0.1203, "step": 27444 }, { "epoch": 0.4895123604323476, "grad_norm": 0.38983625173568726, "learning_rate": 3.023823273664253e-05, "loss": 0.1442, "step": 27445 }, { "epoch": 0.48953019655406127, "grad_norm": 0.23149482905864716, "learning_rate": 3.023671076968829e-05, "loss": 0.0654, "step": 27446 }, { "epoch": 0.48954803267577496, "grad_norm": 0.2669079601764679, "learning_rate": 3.0235188782434757e-05, "loss": 0.1142, "step": 27447 }, { "epoch": 0.4895658687974887, "grad_norm": 0.3289227783679962, "learning_rate": 3.0233666774887833e-05, "loss": 0.1875, "step": 27448 }, { "epoch": 0.4895837049192024, "grad_norm": 0.39887478947639465, "learning_rate": 3.023214474705339e-05, "loss": 0.1652, "step": 27449 }, { "epoch": 0.4896015410409161, "grad_norm": 0.2522299289703369, "learning_rate": 3.0230622698937366e-05, "loss": 0.1658, "step": 27450 }, { "epoch": 0.48961937716262977, "grad_norm": 0.2716444730758667, "learning_rate": 3.0229100630545632e-05, "loss": 0.1804, "step": 27451 }, { "epoch": 0.48963721328434345, "grad_norm": 0.23780399560928345, "learning_rate": 3.0227578541884095e-05, "loss": 0.1475, "step": 27452 }, { "epoch": 0.48965504940605714, "grad_norm": 0.22552287578582764, "learning_rate": 3.022605643295866e-05, "loss": 0.1007, "step": 27453 }, { "epoch": 0.48967288552777083, "grad_norm": 0.24677103757858276, "learning_rate": 3.0224534303775232e-05, "loss": 0.0985, "step": 27454 }, { "epoch": 0.4896907216494845, "grad_norm": 0.2954382598400116, "learning_rate": 3.0223012154339704e-05, "loss": 0.14, "step": 27455 }, { "epoch": 0.4897085577711982, "grad_norm": 0.27266794443130493, "learning_rate": 3.0221489984657968e-05, "loss": 0.1604, "step": 27456 }, { "epoch": 0.48972639389291195, "grad_norm": 0.27781030535697937, "learning_rate": 3.021996779473594e-05, "loss": 0.1985, "step": 27457 }, { "epoch": 0.48974423001462564, "grad_norm": 0.2853183448314667, "learning_rate": 3.021844558457951e-05, "loss": 0.1739, "step": 27458 }, { "epoch": 0.4897620661363393, "grad_norm": 0.3584592938423157, "learning_rate": 3.021692335419458e-05, "loss": 0.1238, "step": 27459 }, { "epoch": 0.489779902258053, "grad_norm": 0.21173353493213654, "learning_rate": 3.021540110358706e-05, "loss": 0.0968, "step": 27460 }, { "epoch": 0.4897977383797667, "grad_norm": 0.4052639901638031, "learning_rate": 3.0213878832762843e-05, "loss": 0.221, "step": 27461 }, { "epoch": 0.4898155745014804, "grad_norm": 0.25534844398498535, "learning_rate": 3.0212356541727828e-05, "loss": 0.0455, "step": 27462 }, { "epoch": 0.4898334106231941, "grad_norm": 0.20178954303264618, "learning_rate": 3.0210834230487923e-05, "loss": 0.1476, "step": 27463 }, { "epoch": 0.48985124674490776, "grad_norm": 0.27798745036125183, "learning_rate": 3.020931189904902e-05, "loss": 0.1799, "step": 27464 }, { "epoch": 0.4898690828666215, "grad_norm": 0.3439313769340515, "learning_rate": 3.020778954741703e-05, "loss": 0.1369, "step": 27465 }, { "epoch": 0.4898869189883352, "grad_norm": 0.3136689364910126, "learning_rate": 3.020626717559784e-05, "loss": 0.1571, "step": 27466 }, { "epoch": 0.4899047551100489, "grad_norm": 0.35819634795188904, "learning_rate": 3.0204744783597365e-05, "loss": 0.1008, "step": 27467 }, { "epoch": 0.48992259123176257, "grad_norm": 0.291364848613739, "learning_rate": 3.0203222371421507e-05, "loss": 0.1599, "step": 27468 }, { "epoch": 0.48994042735347626, "grad_norm": 0.2212088406085968, "learning_rate": 3.0201699939076155e-05, "loss": 0.1075, "step": 27469 }, { "epoch": 0.48995826347518995, "grad_norm": 0.2668302357196808, "learning_rate": 3.020017748656722e-05, "loss": 0.1444, "step": 27470 }, { "epoch": 0.48997609959690364, "grad_norm": 0.3390072286128998, "learning_rate": 3.01986550139006e-05, "loss": 0.1113, "step": 27471 }, { "epoch": 0.4899939357186173, "grad_norm": 0.3463013768196106, "learning_rate": 3.0197132521082204e-05, "loss": 0.1858, "step": 27472 }, { "epoch": 0.490011771840331, "grad_norm": 0.3130691349506378, "learning_rate": 3.019561000811792e-05, "loss": 0.176, "step": 27473 }, { "epoch": 0.49002960796204476, "grad_norm": 0.32623130083084106, "learning_rate": 3.0194087475013655e-05, "loss": 0.0689, "step": 27474 }, { "epoch": 0.49004744408375844, "grad_norm": 0.28828662633895874, "learning_rate": 3.0192564921775312e-05, "loss": 0.1205, "step": 27475 }, { "epoch": 0.49006528020547213, "grad_norm": 0.35746586322784424, "learning_rate": 3.0191042348408803e-05, "loss": 0.1345, "step": 27476 }, { "epoch": 0.4900831163271858, "grad_norm": 0.2819545269012451, "learning_rate": 3.0189519754920013e-05, "loss": 0.1398, "step": 27477 }, { "epoch": 0.4901009524488995, "grad_norm": 0.20441186428070068, "learning_rate": 3.0187997141314862e-05, "loss": 0.1648, "step": 27478 }, { "epoch": 0.4901187885706132, "grad_norm": 0.247260183095932, "learning_rate": 3.0186474507599234e-05, "loss": 0.1281, "step": 27479 }, { "epoch": 0.4901366246923269, "grad_norm": 0.29136478900909424, "learning_rate": 3.0184951853779042e-05, "loss": 0.1359, "step": 27480 }, { "epoch": 0.49015446081404057, "grad_norm": 0.23198296129703522, "learning_rate": 3.018342917986019e-05, "loss": 0.1378, "step": 27481 }, { "epoch": 0.4901722969357543, "grad_norm": 0.24682170152664185, "learning_rate": 3.0181906485848567e-05, "loss": 0.1384, "step": 27482 }, { "epoch": 0.490190133057468, "grad_norm": 0.2727537155151367, "learning_rate": 3.0180383771750092e-05, "loss": 0.1637, "step": 27483 }, { "epoch": 0.4902079691791817, "grad_norm": 0.2801114618778229, "learning_rate": 3.0178861037570655e-05, "loss": 0.1787, "step": 27484 }, { "epoch": 0.4902258053008954, "grad_norm": 0.30123138427734375, "learning_rate": 3.017733828331617e-05, "loss": 0.1531, "step": 27485 }, { "epoch": 0.49024364142260907, "grad_norm": 0.26603224873542786, "learning_rate": 3.0175815508992528e-05, "loss": 0.169, "step": 27486 }, { "epoch": 0.49026147754432275, "grad_norm": 0.21042069792747498, "learning_rate": 3.0174292714605636e-05, "loss": 0.1038, "step": 27487 }, { "epoch": 0.49027931366603644, "grad_norm": 0.2981683909893036, "learning_rate": 3.01727699001614e-05, "loss": 0.0856, "step": 27488 }, { "epoch": 0.49029714978775013, "grad_norm": 0.2670917510986328, "learning_rate": 3.0171247065665726e-05, "loss": 0.1033, "step": 27489 }, { "epoch": 0.4903149859094638, "grad_norm": 0.24902333319187164, "learning_rate": 3.0169724211124506e-05, "loss": 0.1172, "step": 27490 }, { "epoch": 0.49033282203117756, "grad_norm": 0.2571159303188324, "learning_rate": 3.0168201336543655e-05, "loss": 0.1391, "step": 27491 }, { "epoch": 0.49035065815289125, "grad_norm": 0.22074784338474274, "learning_rate": 3.0166678441929063e-05, "loss": 0.1413, "step": 27492 }, { "epoch": 0.49036849427460494, "grad_norm": 0.2892563045024872, "learning_rate": 3.0165155527286653e-05, "loss": 0.1667, "step": 27493 }, { "epoch": 0.4903863303963186, "grad_norm": 0.2722818851470947, "learning_rate": 3.0163632592622303e-05, "loss": 0.1388, "step": 27494 }, { "epoch": 0.4904041665180323, "grad_norm": 0.2804606258869171, "learning_rate": 3.0162109637941938e-05, "loss": 0.1964, "step": 27495 }, { "epoch": 0.490422002639746, "grad_norm": 0.3598021864891052, "learning_rate": 3.0160586663251455e-05, "loss": 0.1094, "step": 27496 }, { "epoch": 0.4904398387614597, "grad_norm": 0.2383718192577362, "learning_rate": 3.015906366855675e-05, "loss": 0.1325, "step": 27497 }, { "epoch": 0.4904576748831734, "grad_norm": 0.22849123179912567, "learning_rate": 3.0157540653863736e-05, "loss": 0.1276, "step": 27498 }, { "epoch": 0.4904755110048871, "grad_norm": 0.405903697013855, "learning_rate": 3.0156017619178307e-05, "loss": 0.1348, "step": 27499 }, { "epoch": 0.4904933471266008, "grad_norm": 0.19192469120025635, "learning_rate": 3.015449456450638e-05, "loss": 0.1421, "step": 27500 }, { "epoch": 0.4905111832483145, "grad_norm": 0.23690274357795715, "learning_rate": 3.0152971489853842e-05, "loss": 0.0872, "step": 27501 }, { "epoch": 0.4905290193700282, "grad_norm": 0.2556398808956146, "learning_rate": 3.0151448395226615e-05, "loss": 0.1546, "step": 27502 }, { "epoch": 0.4905468554917419, "grad_norm": 0.23648172616958618, "learning_rate": 3.0149925280630593e-05, "loss": 0.1284, "step": 27503 }, { "epoch": 0.49056469161345556, "grad_norm": 0.23430638015270233, "learning_rate": 3.0148402146071685e-05, "loss": 0.1498, "step": 27504 }, { "epoch": 0.49058252773516925, "grad_norm": 0.24758735299110413, "learning_rate": 3.0146878991555786e-05, "loss": 0.1492, "step": 27505 }, { "epoch": 0.49060036385688294, "grad_norm": 0.3409648835659027, "learning_rate": 3.014535581708881e-05, "loss": 0.1167, "step": 27506 }, { "epoch": 0.4906181999785967, "grad_norm": 0.23154838383197784, "learning_rate": 3.014383262267666e-05, "loss": 0.115, "step": 27507 }, { "epoch": 0.49063603610031037, "grad_norm": 0.2456117570400238, "learning_rate": 3.014230940832523e-05, "loss": 0.1259, "step": 27508 }, { "epoch": 0.49065387222202406, "grad_norm": 0.41577792167663574, "learning_rate": 3.0140786174040442e-05, "loss": 0.1777, "step": 27509 }, { "epoch": 0.49067170834373774, "grad_norm": 0.2501739263534546, "learning_rate": 3.0139262919828186e-05, "loss": 0.1147, "step": 27510 }, { "epoch": 0.49068954446545143, "grad_norm": 0.25977393984794617, "learning_rate": 3.0137739645694376e-05, "loss": 0.112, "step": 27511 }, { "epoch": 0.4907073805871651, "grad_norm": 0.31532108783721924, "learning_rate": 3.0136216351644906e-05, "loss": 0.1576, "step": 27512 }, { "epoch": 0.4907252167088788, "grad_norm": 0.2619518041610718, "learning_rate": 3.0134693037685697e-05, "loss": 0.1338, "step": 27513 }, { "epoch": 0.4907430528305925, "grad_norm": 0.2229187786579132, "learning_rate": 3.0133169703822634e-05, "loss": 0.1668, "step": 27514 }, { "epoch": 0.4907608889523062, "grad_norm": 0.30031391978263855, "learning_rate": 3.0131646350061638e-05, "loss": 0.1124, "step": 27515 }, { "epoch": 0.49077872507401993, "grad_norm": 0.2917827069759369, "learning_rate": 3.0130122976408602e-05, "loss": 0.1176, "step": 27516 }, { "epoch": 0.4907965611957336, "grad_norm": 0.23008359968662262, "learning_rate": 3.0128599582869445e-05, "loss": 0.1303, "step": 27517 }, { "epoch": 0.4908143973174473, "grad_norm": 0.2603848874568939, "learning_rate": 3.012707616945006e-05, "loss": 0.1536, "step": 27518 }, { "epoch": 0.490832233439161, "grad_norm": 0.2413429319858551, "learning_rate": 3.012555273615636e-05, "loss": 0.1495, "step": 27519 }, { "epoch": 0.4908500695608747, "grad_norm": 0.25921598076820374, "learning_rate": 3.0124029282994247e-05, "loss": 0.1163, "step": 27520 }, { "epoch": 0.49086790568258837, "grad_norm": 0.27238136529922485, "learning_rate": 3.0122505809969627e-05, "loss": 0.1933, "step": 27521 }, { "epoch": 0.49088574180430206, "grad_norm": 0.28039273619651794, "learning_rate": 3.0120982317088403e-05, "loss": 0.1186, "step": 27522 }, { "epoch": 0.49090357792601574, "grad_norm": 0.26747840642929077, "learning_rate": 3.0119458804356483e-05, "loss": 0.0806, "step": 27523 }, { "epoch": 0.4909214140477295, "grad_norm": 0.2762358486652374, "learning_rate": 3.0117935271779775e-05, "loss": 0.1591, "step": 27524 }, { "epoch": 0.4909392501694432, "grad_norm": 0.28732550144195557, "learning_rate": 3.011641171936418e-05, "loss": 0.2027, "step": 27525 }, { "epoch": 0.49095708629115686, "grad_norm": 0.31902605295181274, "learning_rate": 3.0114888147115612e-05, "loss": 0.1622, "step": 27526 }, { "epoch": 0.49097492241287055, "grad_norm": 0.2513086199760437, "learning_rate": 3.011336455503996e-05, "loss": 0.1298, "step": 27527 }, { "epoch": 0.49099275853458424, "grad_norm": 0.3024000823497772, "learning_rate": 3.0111840943143145e-05, "loss": 0.1448, "step": 27528 }, { "epoch": 0.4910105946562979, "grad_norm": 0.24043670296669006, "learning_rate": 3.011031731143107e-05, "loss": 0.1144, "step": 27529 }, { "epoch": 0.4910284307780116, "grad_norm": 0.22187557816505432, "learning_rate": 3.010879365990964e-05, "loss": 0.1299, "step": 27530 }, { "epoch": 0.4910462668997253, "grad_norm": 0.2667667269706726, "learning_rate": 3.0107269988584764e-05, "loss": 0.1575, "step": 27531 }, { "epoch": 0.491064103021439, "grad_norm": 0.2809300422668457, "learning_rate": 3.0105746297462346e-05, "loss": 0.1256, "step": 27532 }, { "epoch": 0.49108193914315273, "grad_norm": 0.30818885564804077, "learning_rate": 3.0104222586548285e-05, "loss": 0.1787, "step": 27533 }, { "epoch": 0.4910997752648664, "grad_norm": 0.3898325264453888, "learning_rate": 3.01026988558485e-05, "loss": 0.1831, "step": 27534 }, { "epoch": 0.4911176113865801, "grad_norm": 0.2780079245567322, "learning_rate": 3.010117510536889e-05, "loss": 0.1018, "step": 27535 }, { "epoch": 0.4911354475082938, "grad_norm": 0.2041061669588089, "learning_rate": 3.0099651335115364e-05, "loss": 0.14, "step": 27536 }, { "epoch": 0.4911532836300075, "grad_norm": 0.2900209128856659, "learning_rate": 3.009812754509383e-05, "loss": 0.1018, "step": 27537 }, { "epoch": 0.4911711197517212, "grad_norm": 0.2860986292362213, "learning_rate": 3.0096603735310187e-05, "loss": 0.1392, "step": 27538 }, { "epoch": 0.49118895587343486, "grad_norm": 0.585249662399292, "learning_rate": 3.009507990577035e-05, "loss": 0.1587, "step": 27539 }, { "epoch": 0.49120679199514855, "grad_norm": 0.26620766520500183, "learning_rate": 3.0093556056480227e-05, "loss": 0.1084, "step": 27540 }, { "epoch": 0.4912246281168623, "grad_norm": 0.2916359603404999, "learning_rate": 3.0092032187445725e-05, "loss": 0.1641, "step": 27541 }, { "epoch": 0.491242464238576, "grad_norm": 0.2687723934650421, "learning_rate": 3.0090508298672736e-05, "loss": 0.1353, "step": 27542 }, { "epoch": 0.49126030036028967, "grad_norm": 0.22274333238601685, "learning_rate": 3.0088984390167186e-05, "loss": 0.1402, "step": 27543 }, { "epoch": 0.49127813648200336, "grad_norm": 0.2707686722278595, "learning_rate": 3.0087460461934968e-05, "loss": 0.1581, "step": 27544 }, { "epoch": 0.49129597260371705, "grad_norm": 0.2288791686296463, "learning_rate": 3.0085936513982006e-05, "loss": 0.1128, "step": 27545 }, { "epoch": 0.49131380872543073, "grad_norm": 0.2257990837097168, "learning_rate": 3.008441254631419e-05, "loss": 0.1302, "step": 27546 }, { "epoch": 0.4913316448471444, "grad_norm": 0.2584269046783447, "learning_rate": 3.0082888558937438e-05, "loss": 0.1727, "step": 27547 }, { "epoch": 0.4913494809688581, "grad_norm": 0.24260324239730835, "learning_rate": 3.008136455185766e-05, "loss": 0.1493, "step": 27548 }, { "epoch": 0.49136731709057185, "grad_norm": 0.30001112818717957, "learning_rate": 3.0079840525080748e-05, "loss": 0.1934, "step": 27549 }, { "epoch": 0.49138515321228554, "grad_norm": 0.24751876294612885, "learning_rate": 3.0078316478612623e-05, "loss": 0.1142, "step": 27550 }, { "epoch": 0.49140298933399923, "grad_norm": 0.3574122190475464, "learning_rate": 3.0076792412459188e-05, "loss": 0.1174, "step": 27551 }, { "epoch": 0.4914208254557129, "grad_norm": 0.22561073303222656, "learning_rate": 3.0075268326626356e-05, "loss": 0.1648, "step": 27552 }, { "epoch": 0.4914386615774266, "grad_norm": 0.25146549940109253, "learning_rate": 3.0073744221120025e-05, "loss": 0.1655, "step": 27553 }, { "epoch": 0.4914564976991403, "grad_norm": 0.19077861309051514, "learning_rate": 3.007222009594612e-05, "loss": 0.1548, "step": 27554 }, { "epoch": 0.491474333820854, "grad_norm": 0.2860119640827179, "learning_rate": 3.0070695951110528e-05, "loss": 0.159, "step": 27555 }, { "epoch": 0.49149216994256767, "grad_norm": 0.1798303872346878, "learning_rate": 3.0069171786619167e-05, "loss": 0.136, "step": 27556 }, { "epoch": 0.49151000606428136, "grad_norm": 0.25867077708244324, "learning_rate": 3.0067647602477946e-05, "loss": 0.1385, "step": 27557 }, { "epoch": 0.4915278421859951, "grad_norm": 0.24283356964588165, "learning_rate": 3.006612339869278e-05, "loss": 0.1396, "step": 27558 }, { "epoch": 0.4915456783077088, "grad_norm": 0.20348159968852997, "learning_rate": 3.006459917526956e-05, "loss": 0.097, "step": 27559 }, { "epoch": 0.4915635144294225, "grad_norm": 0.31267812848091125, "learning_rate": 3.0063074932214215e-05, "loss": 0.1396, "step": 27560 }, { "epoch": 0.49158135055113616, "grad_norm": 0.39648309350013733, "learning_rate": 3.006155066953264e-05, "loss": 0.1681, "step": 27561 }, { "epoch": 0.49159918667284985, "grad_norm": 0.2350505292415619, "learning_rate": 3.006002638723074e-05, "loss": 0.157, "step": 27562 }, { "epoch": 0.49161702279456354, "grad_norm": 0.2715829908847809, "learning_rate": 3.005850208531443e-05, "loss": 0.2128, "step": 27563 }, { "epoch": 0.49163485891627723, "grad_norm": 0.3013695180416107, "learning_rate": 3.0056977763789623e-05, "loss": 0.1759, "step": 27564 }, { "epoch": 0.4916526950379909, "grad_norm": 0.17881599068641663, "learning_rate": 3.0055453422662223e-05, "loss": 0.1114, "step": 27565 }, { "epoch": 0.49167053115970466, "grad_norm": 0.23384027183055878, "learning_rate": 3.0053929061938135e-05, "loss": 0.1236, "step": 27566 }, { "epoch": 0.49168836728141835, "grad_norm": 0.3760800063610077, "learning_rate": 3.005240468162328e-05, "loss": 0.17, "step": 27567 }, { "epoch": 0.49170620340313204, "grad_norm": 0.3806721270084381, "learning_rate": 3.005088028172356e-05, "loss": 0.1155, "step": 27568 }, { "epoch": 0.4917240395248457, "grad_norm": 0.3928253650665283, "learning_rate": 3.004935586224488e-05, "loss": 0.155, "step": 27569 }, { "epoch": 0.4917418756465594, "grad_norm": 0.31668326258659363, "learning_rate": 3.004783142319315e-05, "loss": 0.0758, "step": 27570 }, { "epoch": 0.4917597117682731, "grad_norm": 0.2437426745891571, "learning_rate": 3.0046306964574283e-05, "loss": 0.0983, "step": 27571 }, { "epoch": 0.4917775478899868, "grad_norm": 0.26245933771133423, "learning_rate": 3.0044782486394184e-05, "loss": 0.1463, "step": 27572 }, { "epoch": 0.4917953840117005, "grad_norm": 0.26438385248184204, "learning_rate": 3.0043257988658774e-05, "loss": 0.1379, "step": 27573 }, { "epoch": 0.49181322013341416, "grad_norm": 0.237799271941185, "learning_rate": 3.0041733471373955e-05, "loss": 0.1099, "step": 27574 }, { "epoch": 0.4918310562551279, "grad_norm": 0.33988502621650696, "learning_rate": 3.0040208934545626e-05, "loss": 0.2688, "step": 27575 }, { "epoch": 0.4918488923768416, "grad_norm": 0.23251087963581085, "learning_rate": 3.0038684378179717e-05, "loss": 0.1604, "step": 27576 }, { "epoch": 0.4918667284985553, "grad_norm": 0.27852708101272583, "learning_rate": 3.0037159802282122e-05, "loss": 0.1308, "step": 27577 }, { "epoch": 0.49188456462026897, "grad_norm": 0.20254004001617432, "learning_rate": 3.0035635206858747e-05, "loss": 0.1465, "step": 27578 }, { "epoch": 0.49190240074198266, "grad_norm": 0.24009934067726135, "learning_rate": 3.0034110591915522e-05, "loss": 0.1421, "step": 27579 }, { "epoch": 0.49192023686369635, "grad_norm": 0.2905464172363281, "learning_rate": 3.0032585957458342e-05, "loss": 0.1713, "step": 27580 }, { "epoch": 0.49193807298541004, "grad_norm": 0.2547030448913574, "learning_rate": 3.003106130349312e-05, "loss": 0.1582, "step": 27581 }, { "epoch": 0.4919559091071237, "grad_norm": 0.24700869619846344, "learning_rate": 3.0029536630025772e-05, "loss": 0.1137, "step": 27582 }, { "epoch": 0.49197374522883747, "grad_norm": 0.2629300355911255, "learning_rate": 3.0028011937062193e-05, "loss": 0.1325, "step": 27583 }, { "epoch": 0.49199158135055115, "grad_norm": 0.44348493218421936, "learning_rate": 3.0026487224608312e-05, "loss": 0.1508, "step": 27584 }, { "epoch": 0.49200941747226484, "grad_norm": 0.40411919355392456, "learning_rate": 3.0024962492670023e-05, "loss": 0.1575, "step": 27585 }, { "epoch": 0.49202725359397853, "grad_norm": 0.2483748495578766, "learning_rate": 3.0023437741253253e-05, "loss": 0.1438, "step": 27586 }, { "epoch": 0.4920450897156922, "grad_norm": 0.29653963446617126, "learning_rate": 3.0021912970363903e-05, "loss": 0.1372, "step": 27587 }, { "epoch": 0.4920629258374059, "grad_norm": 0.20083899796009064, "learning_rate": 3.0020388180007876e-05, "loss": 0.1275, "step": 27588 }, { "epoch": 0.4920807619591196, "grad_norm": 0.22859717905521393, "learning_rate": 3.0018863370191098e-05, "loss": 0.1642, "step": 27589 }, { "epoch": 0.4920985980808333, "grad_norm": 0.3652629256248474, "learning_rate": 3.0017338540919464e-05, "loss": 0.1873, "step": 27590 }, { "epoch": 0.49211643420254697, "grad_norm": 0.20172858238220215, "learning_rate": 3.001581369219889e-05, "loss": 0.1098, "step": 27591 }, { "epoch": 0.4921342703242607, "grad_norm": 0.27154991030693054, "learning_rate": 3.0014288824035297e-05, "loss": 0.1451, "step": 27592 }, { "epoch": 0.4921521064459744, "grad_norm": 0.24188734591007233, "learning_rate": 3.0012763936434592e-05, "loss": 0.1138, "step": 27593 }, { "epoch": 0.4921699425676881, "grad_norm": 0.2573421001434326, "learning_rate": 3.0011239029402677e-05, "loss": 0.1287, "step": 27594 }, { "epoch": 0.4921877786894018, "grad_norm": 0.22781307995319366, "learning_rate": 3.0009714102945474e-05, "loss": 0.104, "step": 27595 }, { "epoch": 0.49220561481111547, "grad_norm": 0.2729334831237793, "learning_rate": 3.0008189157068882e-05, "loss": 0.1604, "step": 27596 }, { "epoch": 0.49222345093282915, "grad_norm": 0.29051733016967773, "learning_rate": 3.0006664191778827e-05, "loss": 0.1406, "step": 27597 }, { "epoch": 0.49224128705454284, "grad_norm": 0.3005540668964386, "learning_rate": 3.00051392070812e-05, "loss": 0.1445, "step": 27598 }, { "epoch": 0.49225912317625653, "grad_norm": 0.33935075998306274, "learning_rate": 3.0003614202981932e-05, "loss": 0.1756, "step": 27599 }, { "epoch": 0.4922769592979703, "grad_norm": 0.22830133140087128, "learning_rate": 3.000208917948693e-05, "loss": 0.095, "step": 27600 }, { "epoch": 0.49229479541968396, "grad_norm": 0.24258065223693848, "learning_rate": 3.0000564136602098e-05, "loss": 0.1652, "step": 27601 }, { "epoch": 0.49231263154139765, "grad_norm": 0.3200227916240692, "learning_rate": 2.9999039074333357e-05, "loss": 0.1237, "step": 27602 }, { "epoch": 0.49233046766311134, "grad_norm": 0.2693004012107849, "learning_rate": 2.999751399268661e-05, "loss": 0.1336, "step": 27603 }, { "epoch": 0.492348303784825, "grad_norm": 0.29738491773605347, "learning_rate": 2.999598889166778e-05, "loss": 0.1072, "step": 27604 }, { "epoch": 0.4923661399065387, "grad_norm": 0.26984646916389465, "learning_rate": 2.9994463771282755e-05, "loss": 0.1046, "step": 27605 }, { "epoch": 0.4923839760282524, "grad_norm": 0.32763803005218506, "learning_rate": 2.9992938631537482e-05, "loss": 0.1474, "step": 27606 }, { "epoch": 0.4924018121499661, "grad_norm": 0.30050089955329895, "learning_rate": 2.9991413472437842e-05, "loss": 0.104, "step": 27607 }, { "epoch": 0.49241964827167983, "grad_norm": 0.2945813536643982, "learning_rate": 2.9989888293989763e-05, "loss": 0.125, "step": 27608 }, { "epoch": 0.4924374843933935, "grad_norm": 0.26110172271728516, "learning_rate": 2.9988363096199156e-05, "loss": 0.1211, "step": 27609 }, { "epoch": 0.4924553205151072, "grad_norm": 0.26588115096092224, "learning_rate": 2.998683787907193e-05, "loss": 0.1314, "step": 27610 }, { "epoch": 0.4924731566368209, "grad_norm": 0.23548169434070587, "learning_rate": 2.9985312642613995e-05, "loss": 0.1015, "step": 27611 }, { "epoch": 0.4924909927585346, "grad_norm": 0.3043283522129059, "learning_rate": 2.998378738683127e-05, "loss": 0.1502, "step": 27612 }, { "epoch": 0.4925088288802483, "grad_norm": 0.29870033264160156, "learning_rate": 2.9982262111729665e-05, "loss": 0.1486, "step": 27613 }, { "epoch": 0.49252666500196196, "grad_norm": 0.25260472297668457, "learning_rate": 2.9980736817315082e-05, "loss": 0.115, "step": 27614 }, { "epoch": 0.49254450112367565, "grad_norm": 0.29032596945762634, "learning_rate": 2.9979211503593453e-05, "loss": 0.1479, "step": 27615 }, { "epoch": 0.49256233724538934, "grad_norm": 0.35725653171539307, "learning_rate": 2.9977686170570675e-05, "loss": 0.1433, "step": 27616 }, { "epoch": 0.4925801733671031, "grad_norm": 0.20266664028167725, "learning_rate": 2.997616081825267e-05, "loss": 0.1032, "step": 27617 }, { "epoch": 0.49259800948881677, "grad_norm": 0.2522314488887787, "learning_rate": 2.9974635446645344e-05, "loss": 0.1271, "step": 27618 }, { "epoch": 0.49261584561053046, "grad_norm": 0.3580878973007202, "learning_rate": 2.9973110055754612e-05, "loss": 0.209, "step": 27619 }, { "epoch": 0.49263368173224414, "grad_norm": 0.19904747605323792, "learning_rate": 2.9971584645586387e-05, "loss": 0.1029, "step": 27620 }, { "epoch": 0.49265151785395783, "grad_norm": 0.27453264594078064, "learning_rate": 2.9970059216146583e-05, "loss": 0.1591, "step": 27621 }, { "epoch": 0.4926693539756715, "grad_norm": 0.22794201970100403, "learning_rate": 2.9968533767441113e-05, "loss": 0.1569, "step": 27622 }, { "epoch": 0.4926871900973852, "grad_norm": 0.21620889008045197, "learning_rate": 2.9967008299475896e-05, "loss": 0.1096, "step": 27623 }, { "epoch": 0.4927050262190989, "grad_norm": 0.34933048486709595, "learning_rate": 2.9965482812256834e-05, "loss": 0.142, "step": 27624 }, { "epoch": 0.49272286234081264, "grad_norm": 0.20878398418426514, "learning_rate": 2.9963957305789853e-05, "loss": 0.1291, "step": 27625 }, { "epoch": 0.4927406984625263, "grad_norm": 0.30119627714157104, "learning_rate": 2.9962431780080845e-05, "loss": 0.1504, "step": 27626 }, { "epoch": 0.49275853458424, "grad_norm": 0.23445358872413635, "learning_rate": 2.9960906235135743e-05, "loss": 0.133, "step": 27627 }, { "epoch": 0.4927763707059537, "grad_norm": 0.27041730284690857, "learning_rate": 2.9959380670960464e-05, "loss": 0.147, "step": 27628 }, { "epoch": 0.4927942068276674, "grad_norm": 0.34073033928871155, "learning_rate": 2.9957855087560903e-05, "loss": 0.1634, "step": 27629 }, { "epoch": 0.4928120429493811, "grad_norm": 0.30418482422828674, "learning_rate": 2.995632948494299e-05, "loss": 0.1616, "step": 27630 }, { "epoch": 0.49282987907109477, "grad_norm": 0.32524460554122925, "learning_rate": 2.9954803863112622e-05, "loss": 0.2172, "step": 27631 }, { "epoch": 0.49284771519280846, "grad_norm": 0.26666396856307983, "learning_rate": 2.9953278222075738e-05, "loss": 0.1423, "step": 27632 }, { "epoch": 0.49286555131452214, "grad_norm": 0.4993481934070587, "learning_rate": 2.995175256183822e-05, "loss": 0.1531, "step": 27633 }, { "epoch": 0.4928833874362359, "grad_norm": 0.18934115767478943, "learning_rate": 2.9950226882406006e-05, "loss": 0.1076, "step": 27634 }, { "epoch": 0.4929012235579496, "grad_norm": 0.23731137812137604, "learning_rate": 2.9948701183785e-05, "loss": 0.1318, "step": 27635 }, { "epoch": 0.49291905967966326, "grad_norm": 0.23826810717582703, "learning_rate": 2.9947175465981124e-05, "loss": 0.146, "step": 27636 }, { "epoch": 0.49293689580137695, "grad_norm": 0.3153833746910095, "learning_rate": 2.9945649729000286e-05, "loss": 0.132, "step": 27637 }, { "epoch": 0.49295473192309064, "grad_norm": 0.2083572894334793, "learning_rate": 2.9944123972848402e-05, "loss": 0.1338, "step": 27638 }, { "epoch": 0.4929725680448043, "grad_norm": 0.37656208872795105, "learning_rate": 2.9942598197531385e-05, "loss": 0.1179, "step": 27639 }, { "epoch": 0.492990404166518, "grad_norm": 0.2984860837459564, "learning_rate": 2.994107240305515e-05, "loss": 0.1154, "step": 27640 }, { "epoch": 0.4930082402882317, "grad_norm": 0.3810182809829712, "learning_rate": 2.9939546589425616e-05, "loss": 0.1936, "step": 27641 }, { "epoch": 0.49302607640994545, "grad_norm": 0.25900816917419434, "learning_rate": 2.993802075664869e-05, "loss": 0.1149, "step": 27642 }, { "epoch": 0.49304391253165913, "grad_norm": 0.35352107882499695, "learning_rate": 2.9936494904730288e-05, "loss": 0.1303, "step": 27643 }, { "epoch": 0.4930617486533728, "grad_norm": 0.1907660961151123, "learning_rate": 2.9934969033676325e-05, "loss": 0.1175, "step": 27644 }, { "epoch": 0.4930795847750865, "grad_norm": 0.3007625639438629, "learning_rate": 2.9933443143492722e-05, "loss": 0.1575, "step": 27645 }, { "epoch": 0.4930974208968002, "grad_norm": 0.21162590384483337, "learning_rate": 2.9931917234185393e-05, "loss": 0.111, "step": 27646 }, { "epoch": 0.4931152570185139, "grad_norm": 0.2759343385696411, "learning_rate": 2.993039130576024e-05, "loss": 0.1182, "step": 27647 }, { "epoch": 0.4931330931402276, "grad_norm": 0.31473293900489807, "learning_rate": 2.992886535822319e-05, "loss": 0.179, "step": 27648 }, { "epoch": 0.49315092926194126, "grad_norm": 0.24355410039424896, "learning_rate": 2.9927339391580157e-05, "loss": 0.1423, "step": 27649 }, { "epoch": 0.493168765383655, "grad_norm": 0.2825729548931122, "learning_rate": 2.9925813405837056e-05, "loss": 0.1715, "step": 27650 }, { "epoch": 0.4931866015053687, "grad_norm": 0.2509865462779999, "learning_rate": 2.9924287400999802e-05, "loss": 0.1121, "step": 27651 }, { "epoch": 0.4932044376270824, "grad_norm": 0.26842087507247925, "learning_rate": 2.992276137707431e-05, "loss": 0.1387, "step": 27652 }, { "epoch": 0.49322227374879607, "grad_norm": 0.30520492792129517, "learning_rate": 2.9921235334066487e-05, "loss": 0.1151, "step": 27653 }, { "epoch": 0.49324010987050976, "grad_norm": 0.22917208075523376, "learning_rate": 2.991970927198226e-05, "loss": 0.1321, "step": 27654 }, { "epoch": 0.49325794599222345, "grad_norm": 0.2446434497833252, "learning_rate": 2.9918183190827542e-05, "loss": 0.1225, "step": 27655 }, { "epoch": 0.49327578211393713, "grad_norm": 0.20324060320854187, "learning_rate": 2.9916657090608246e-05, "loss": 0.1208, "step": 27656 }, { "epoch": 0.4932936182356508, "grad_norm": 0.2528868317604065, "learning_rate": 2.9915130971330292e-05, "loss": 0.1561, "step": 27657 }, { "epoch": 0.4933114543573645, "grad_norm": 0.29077768325805664, "learning_rate": 2.9913604832999592e-05, "loss": 0.1313, "step": 27658 }, { "epoch": 0.49332929047907825, "grad_norm": 0.43253135681152344, "learning_rate": 2.991207867562206e-05, "loss": 0.1786, "step": 27659 }, { "epoch": 0.49334712660079194, "grad_norm": 0.2216760367155075, "learning_rate": 2.9910552499203614e-05, "loss": 0.1437, "step": 27660 }, { "epoch": 0.49336496272250563, "grad_norm": 0.2433215230703354, "learning_rate": 2.9909026303750164e-05, "loss": 0.1511, "step": 27661 }, { "epoch": 0.4933827988442193, "grad_norm": 0.2987543046474457, "learning_rate": 2.9907500089267648e-05, "loss": 0.1448, "step": 27662 }, { "epoch": 0.493400634965933, "grad_norm": 0.22920256853103638, "learning_rate": 2.9905973855761955e-05, "loss": 0.1385, "step": 27663 }, { "epoch": 0.4934184710876467, "grad_norm": 0.28606343269348145, "learning_rate": 2.9904447603239023e-05, "loss": 0.1198, "step": 27664 }, { "epoch": 0.4934363072093604, "grad_norm": 0.3272809088230133, "learning_rate": 2.9902921331704752e-05, "loss": 0.1339, "step": 27665 }, { "epoch": 0.49345414333107407, "grad_norm": 0.26525336503982544, "learning_rate": 2.9901395041165064e-05, "loss": 0.1417, "step": 27666 }, { "epoch": 0.4934719794527878, "grad_norm": 0.2768530547618866, "learning_rate": 2.9899868731625874e-05, "loss": 0.177, "step": 27667 }, { "epoch": 0.4934898155745015, "grad_norm": 0.3807823657989502, "learning_rate": 2.98983424030931e-05, "loss": 0.2308, "step": 27668 }, { "epoch": 0.4935076516962152, "grad_norm": 0.24528251588344574, "learning_rate": 2.9896816055572668e-05, "loss": 0.1171, "step": 27669 }, { "epoch": 0.4935254878179289, "grad_norm": 0.24487866461277008, "learning_rate": 2.9895289689070476e-05, "loss": 0.1553, "step": 27670 }, { "epoch": 0.49354332393964256, "grad_norm": 0.3777882754802704, "learning_rate": 2.989376330359246e-05, "loss": 0.0869, "step": 27671 }, { "epoch": 0.49356116006135625, "grad_norm": 0.25599420070648193, "learning_rate": 2.9892236899144515e-05, "loss": 0.1539, "step": 27672 }, { "epoch": 0.49357899618306994, "grad_norm": 0.31662672758102417, "learning_rate": 2.989071047573258e-05, "loss": 0.1741, "step": 27673 }, { "epoch": 0.49359683230478363, "grad_norm": 0.31823235750198364, "learning_rate": 2.9889184033362556e-05, "loss": 0.1319, "step": 27674 }, { "epoch": 0.4936146684264973, "grad_norm": 0.2882575988769531, "learning_rate": 2.9887657572040368e-05, "loss": 0.1962, "step": 27675 }, { "epoch": 0.49363250454821106, "grad_norm": 0.2059096395969391, "learning_rate": 2.988613109177193e-05, "loss": 0.1061, "step": 27676 }, { "epoch": 0.49365034066992475, "grad_norm": 0.17289742827415466, "learning_rate": 2.9884604592563165e-05, "loss": 0.0821, "step": 27677 }, { "epoch": 0.49366817679163844, "grad_norm": 0.23387284576892853, "learning_rate": 2.9883078074419984e-05, "loss": 0.1192, "step": 27678 }, { "epoch": 0.4936860129133521, "grad_norm": 0.2925841212272644, "learning_rate": 2.98815515373483e-05, "loss": 0.1517, "step": 27679 }, { "epoch": 0.4937038490350658, "grad_norm": 0.22345633804798126, "learning_rate": 2.9880024981354044e-05, "loss": 0.1276, "step": 27680 }, { "epoch": 0.4937216851567795, "grad_norm": 0.4029824733734131, "learning_rate": 2.9878498406443122e-05, "loss": 0.1731, "step": 27681 }, { "epoch": 0.4937395212784932, "grad_norm": 0.224434956908226, "learning_rate": 2.987697181262145e-05, "loss": 0.1406, "step": 27682 }, { "epoch": 0.4937573574002069, "grad_norm": 0.388540118932724, "learning_rate": 2.987544519989496e-05, "loss": 0.153, "step": 27683 }, { "epoch": 0.4937751935219206, "grad_norm": 0.30785760283470154, "learning_rate": 2.9873918568269554e-05, "loss": 0.1888, "step": 27684 }, { "epoch": 0.4937930296436343, "grad_norm": 0.24741925299167633, "learning_rate": 2.9872391917751156e-05, "loss": 0.0887, "step": 27685 }, { "epoch": 0.493810865765348, "grad_norm": 0.1698487251996994, "learning_rate": 2.9870865248345693e-05, "loss": 0.0701, "step": 27686 }, { "epoch": 0.4938287018870617, "grad_norm": 0.2524876892566681, "learning_rate": 2.986933856005906e-05, "loss": 0.133, "step": 27687 }, { "epoch": 0.49384653800877537, "grad_norm": 0.23333775997161865, "learning_rate": 2.98678118528972e-05, "loss": 0.1252, "step": 27688 }, { "epoch": 0.49386437413048906, "grad_norm": 0.25426897406578064, "learning_rate": 2.986628512686601e-05, "loss": 0.1375, "step": 27689 }, { "epoch": 0.49388221025220275, "grad_norm": 0.2026497721672058, "learning_rate": 2.986475838197143e-05, "loss": 0.1332, "step": 27690 }, { "epoch": 0.49390004637391643, "grad_norm": 0.19517701864242554, "learning_rate": 2.9863231618219367e-05, "loss": 0.1386, "step": 27691 }, { "epoch": 0.4939178824956301, "grad_norm": 0.22057275474071503, "learning_rate": 2.986170483561573e-05, "loss": 0.1362, "step": 27692 }, { "epoch": 0.49393571861734387, "grad_norm": 0.23425064980983734, "learning_rate": 2.9860178034166452e-05, "loss": 0.1142, "step": 27693 }, { "epoch": 0.49395355473905755, "grad_norm": 0.3758876621723175, "learning_rate": 2.9858651213877437e-05, "loss": 0.14, "step": 27694 }, { "epoch": 0.49397139086077124, "grad_norm": 0.19376304745674133, "learning_rate": 2.9857124374754615e-05, "loss": 0.1434, "step": 27695 }, { "epoch": 0.49398922698248493, "grad_norm": 0.28832682967185974, "learning_rate": 2.9855597516803903e-05, "loss": 0.1327, "step": 27696 }, { "epoch": 0.4940070631041986, "grad_norm": 0.28042230010032654, "learning_rate": 2.9854070640031218e-05, "loss": 0.1753, "step": 27697 }, { "epoch": 0.4940248992259123, "grad_norm": 0.2371305227279663, "learning_rate": 2.9852543744442475e-05, "loss": 0.1001, "step": 27698 }, { "epoch": 0.494042735347626, "grad_norm": 0.24829888343811035, "learning_rate": 2.9851016830043604e-05, "loss": 0.0878, "step": 27699 }, { "epoch": 0.4940605714693397, "grad_norm": 0.33452898263931274, "learning_rate": 2.984948989684051e-05, "loss": 0.1556, "step": 27700 }, { "epoch": 0.4940784075910534, "grad_norm": 0.3049828112125397, "learning_rate": 2.9847962944839126e-05, "loss": 0.1314, "step": 27701 }, { "epoch": 0.4940962437127671, "grad_norm": 0.28771519660949707, "learning_rate": 2.984643597404535e-05, "loss": 0.1602, "step": 27702 }, { "epoch": 0.4941140798344808, "grad_norm": 0.19846481084823608, "learning_rate": 2.9844908984465125e-05, "loss": 0.1109, "step": 27703 }, { "epoch": 0.4941319159561945, "grad_norm": 0.343536376953125, "learning_rate": 2.9843381976104357e-05, "loss": 0.1925, "step": 27704 }, { "epoch": 0.4941497520779082, "grad_norm": 0.3101692795753479, "learning_rate": 2.9841854948968962e-05, "loss": 0.1337, "step": 27705 }, { "epoch": 0.49416758819962187, "grad_norm": 0.2644140124320984, "learning_rate": 2.9840327903064875e-05, "loss": 0.1282, "step": 27706 }, { "epoch": 0.49418542432133555, "grad_norm": 0.23767289519309998, "learning_rate": 2.9838800838397995e-05, "loss": 0.1652, "step": 27707 }, { "epoch": 0.49420326044304924, "grad_norm": 0.2844628691673279, "learning_rate": 2.9837273754974258e-05, "loss": 0.1184, "step": 27708 }, { "epoch": 0.494221096564763, "grad_norm": 0.31627243757247925, "learning_rate": 2.9835746652799574e-05, "loss": 0.091, "step": 27709 }, { "epoch": 0.4942389326864767, "grad_norm": 0.2738164961338043, "learning_rate": 2.983421953187987e-05, "loss": 0.1286, "step": 27710 }, { "epoch": 0.49425676880819036, "grad_norm": 0.2729830741882324, "learning_rate": 2.9832692392221057e-05, "loss": 0.1612, "step": 27711 }, { "epoch": 0.49427460492990405, "grad_norm": 0.21243572235107422, "learning_rate": 2.983116523382906e-05, "loss": 0.1264, "step": 27712 }, { "epoch": 0.49429244105161774, "grad_norm": 0.3365642726421356, "learning_rate": 2.98296380567098e-05, "loss": 0.1272, "step": 27713 }, { "epoch": 0.4943102771733314, "grad_norm": 0.2932683229446411, "learning_rate": 2.9828110860869197e-05, "loss": 0.1549, "step": 27714 }, { "epoch": 0.4943281132950451, "grad_norm": 0.29253682494163513, "learning_rate": 2.9826583646313166e-05, "loss": 0.1713, "step": 27715 }, { "epoch": 0.4943459494167588, "grad_norm": 0.27886298298835754, "learning_rate": 2.9825056413047624e-05, "loss": 0.1223, "step": 27716 }, { "epoch": 0.4943637855384725, "grad_norm": 0.20484280586242676, "learning_rate": 2.98235291610785e-05, "loss": 0.0968, "step": 27717 }, { "epoch": 0.49438162166018623, "grad_norm": 0.2996967136859894, "learning_rate": 2.9822001890411716e-05, "loss": 0.1307, "step": 27718 }, { "epoch": 0.4943994577818999, "grad_norm": 0.23785392940044403, "learning_rate": 2.982047460105319e-05, "loss": 0.1306, "step": 27719 }, { "epoch": 0.4944172939036136, "grad_norm": 0.3025568723678589, "learning_rate": 2.981894729300883e-05, "loss": 0.1554, "step": 27720 }, { "epoch": 0.4944351300253273, "grad_norm": 0.29137152433395386, "learning_rate": 2.981741996628457e-05, "loss": 0.1419, "step": 27721 }, { "epoch": 0.494452966147041, "grad_norm": 0.2891000509262085, "learning_rate": 2.9815892620886326e-05, "loss": 0.1496, "step": 27722 }, { "epoch": 0.49447080226875467, "grad_norm": 0.4134567677974701, "learning_rate": 2.9814365256820015e-05, "loss": 0.1739, "step": 27723 }, { "epoch": 0.49448863839046836, "grad_norm": 0.3712529242038727, "learning_rate": 2.9812837874091565e-05, "loss": 0.1901, "step": 27724 }, { "epoch": 0.49450647451218205, "grad_norm": 0.1878279596567154, "learning_rate": 2.98113104727069e-05, "loss": 0.1404, "step": 27725 }, { "epoch": 0.4945243106338958, "grad_norm": 0.2331874817609787, "learning_rate": 2.9809783052671918e-05, "loss": 0.1201, "step": 27726 }, { "epoch": 0.4945421467556095, "grad_norm": 0.22781163454055786, "learning_rate": 2.9808255613992568e-05, "loss": 0.1489, "step": 27727 }, { "epoch": 0.49455998287732317, "grad_norm": 0.3174269199371338, "learning_rate": 2.9806728156674756e-05, "loss": 0.1774, "step": 27728 }, { "epoch": 0.49457781899903686, "grad_norm": 0.2981187105178833, "learning_rate": 2.980520068072441e-05, "loss": 0.1565, "step": 27729 }, { "epoch": 0.49459565512075054, "grad_norm": 0.34540411829948425, "learning_rate": 2.9803673186147433e-05, "loss": 0.1802, "step": 27730 }, { "epoch": 0.49461349124246423, "grad_norm": 0.3372819721698761, "learning_rate": 2.980214567294977e-05, "loss": 0.2015, "step": 27731 }, { "epoch": 0.4946313273641779, "grad_norm": 0.3403260409832001, "learning_rate": 2.9800618141137332e-05, "loss": 0.1245, "step": 27732 }, { "epoch": 0.4946491634858916, "grad_norm": 0.2683867812156677, "learning_rate": 2.9799090590716037e-05, "loss": 0.1243, "step": 27733 }, { "epoch": 0.4946669996076053, "grad_norm": 0.1998906433582306, "learning_rate": 2.979756302169181e-05, "loss": 0.1138, "step": 27734 }, { "epoch": 0.49468483572931904, "grad_norm": 0.3172931373119354, "learning_rate": 2.9796035434070575e-05, "loss": 0.1467, "step": 27735 }, { "epoch": 0.4947026718510327, "grad_norm": 0.2523716986179352, "learning_rate": 2.979450782785825e-05, "loss": 0.1575, "step": 27736 }, { "epoch": 0.4947205079727464, "grad_norm": 0.29659193754196167, "learning_rate": 2.9792980203060743e-05, "loss": 0.1648, "step": 27737 }, { "epoch": 0.4947383440944601, "grad_norm": 0.32037606835365295, "learning_rate": 2.9791452559684002e-05, "loss": 0.1281, "step": 27738 }, { "epoch": 0.4947561802161738, "grad_norm": 0.2830572724342346, "learning_rate": 2.9789924897733934e-05, "loss": 0.1366, "step": 27739 }, { "epoch": 0.4947740163378875, "grad_norm": 0.23512092232704163, "learning_rate": 2.9788397217216462e-05, "loss": 0.1196, "step": 27740 }, { "epoch": 0.49479185245960117, "grad_norm": 0.2612616717815399, "learning_rate": 2.978686951813751e-05, "loss": 0.1752, "step": 27741 }, { "epoch": 0.49480968858131485, "grad_norm": 0.2290542721748352, "learning_rate": 2.9785341800502998e-05, "loss": 0.1288, "step": 27742 }, { "epoch": 0.4948275247030286, "grad_norm": 0.22217997908592224, "learning_rate": 2.978381406431885e-05, "loss": 0.1219, "step": 27743 }, { "epoch": 0.4948453608247423, "grad_norm": 0.1958005577325821, "learning_rate": 2.978228630959098e-05, "loss": 0.1369, "step": 27744 }, { "epoch": 0.494863196946456, "grad_norm": 0.2611028254032135, "learning_rate": 2.9780758536325327e-05, "loss": 0.1885, "step": 27745 }, { "epoch": 0.49488103306816966, "grad_norm": 0.3484054505825043, "learning_rate": 2.977923074452779e-05, "loss": 0.1415, "step": 27746 }, { "epoch": 0.49489886918988335, "grad_norm": 0.31591013073921204, "learning_rate": 2.9777702934204316e-05, "loss": 0.2276, "step": 27747 }, { "epoch": 0.49491670531159704, "grad_norm": 0.25888514518737793, "learning_rate": 2.9776175105360805e-05, "loss": 0.1456, "step": 27748 }, { "epoch": 0.4949345414333107, "grad_norm": 0.23392897844314575, "learning_rate": 2.9774647258003196e-05, "loss": 0.0924, "step": 27749 }, { "epoch": 0.4949523775550244, "grad_norm": 0.40121981501579285, "learning_rate": 2.9773119392137406e-05, "loss": 0.1579, "step": 27750 }, { "epoch": 0.49497021367673816, "grad_norm": 0.3367566764354706, "learning_rate": 2.9771591507769347e-05, "loss": 0.1771, "step": 27751 }, { "epoch": 0.49498804979845185, "grad_norm": 0.2644231617450714, "learning_rate": 2.9770063604904958e-05, "loss": 0.2018, "step": 27752 }, { "epoch": 0.49500588592016553, "grad_norm": 0.2466106414794922, "learning_rate": 2.9768535683550158e-05, "loss": 0.106, "step": 27753 }, { "epoch": 0.4950237220418792, "grad_norm": 0.23186130821704865, "learning_rate": 2.9767007743710863e-05, "loss": 0.1123, "step": 27754 }, { "epoch": 0.4950415581635929, "grad_norm": 0.26247891783714294, "learning_rate": 2.9765479785393002e-05, "loss": 0.1182, "step": 27755 }, { "epoch": 0.4950593942853066, "grad_norm": 0.26620256900787354, "learning_rate": 2.976395180860249e-05, "loss": 0.159, "step": 27756 }, { "epoch": 0.4950772304070203, "grad_norm": 0.27991393208503723, "learning_rate": 2.9762423813345262e-05, "loss": 0.1584, "step": 27757 }, { "epoch": 0.495095066528734, "grad_norm": 0.20036830008029938, "learning_rate": 2.9760895799627232e-05, "loss": 0.1158, "step": 27758 }, { "epoch": 0.49511290265044766, "grad_norm": 0.4034297466278076, "learning_rate": 2.9759367767454322e-05, "loss": 0.1144, "step": 27759 }, { "epoch": 0.4951307387721614, "grad_norm": 0.2466028481721878, "learning_rate": 2.975783971683247e-05, "loss": 0.1273, "step": 27760 }, { "epoch": 0.4951485748938751, "grad_norm": 0.36630794405937195, "learning_rate": 2.9756311647767577e-05, "loss": 0.1831, "step": 27761 }, { "epoch": 0.4951664110155888, "grad_norm": 0.23322570323944092, "learning_rate": 2.975478356026558e-05, "loss": 0.149, "step": 27762 }, { "epoch": 0.49518424713730247, "grad_norm": 0.2638810873031616, "learning_rate": 2.9753255454332395e-05, "loss": 0.1295, "step": 27763 }, { "epoch": 0.49520208325901616, "grad_norm": 0.2458961457014084, "learning_rate": 2.9751727329973955e-05, "loss": 0.1053, "step": 27764 }, { "epoch": 0.49521991938072984, "grad_norm": 0.3017711639404297, "learning_rate": 2.9750199187196174e-05, "loss": 0.0805, "step": 27765 }, { "epoch": 0.49523775550244353, "grad_norm": 0.29882705211639404, "learning_rate": 2.974867102600499e-05, "loss": 0.2002, "step": 27766 }, { "epoch": 0.4952555916241572, "grad_norm": 0.26527103781700134, "learning_rate": 2.9747142846406312e-05, "loss": 0.1655, "step": 27767 }, { "epoch": 0.49527342774587096, "grad_norm": 0.2811010479927063, "learning_rate": 2.9745614648406073e-05, "loss": 0.1368, "step": 27768 }, { "epoch": 0.49529126386758465, "grad_norm": 0.27112844586372375, "learning_rate": 2.974408643201018e-05, "loss": 0.1755, "step": 27769 }, { "epoch": 0.49530909998929834, "grad_norm": 0.22731705009937286, "learning_rate": 2.9742558197224585e-05, "loss": 0.2092, "step": 27770 }, { "epoch": 0.49532693611101203, "grad_norm": 0.26577648520469666, "learning_rate": 2.974102994405519e-05, "loss": 0.1202, "step": 27771 }, { "epoch": 0.4953447722327257, "grad_norm": 0.24133983254432678, "learning_rate": 2.973950167250792e-05, "loss": 0.1214, "step": 27772 }, { "epoch": 0.4953626083544394, "grad_norm": 0.3082998991012573, "learning_rate": 2.9737973382588714e-05, "loss": 0.1353, "step": 27773 }, { "epoch": 0.4953804444761531, "grad_norm": 0.21491719782352448, "learning_rate": 2.9736445074303477e-05, "loss": 0.1397, "step": 27774 }, { "epoch": 0.4953982805978668, "grad_norm": 0.34926852583885193, "learning_rate": 2.9734916747658147e-05, "loss": 0.1793, "step": 27775 }, { "epoch": 0.49541611671958047, "grad_norm": 0.34190186858177185, "learning_rate": 2.9733388402658646e-05, "loss": 0.1546, "step": 27776 }, { "epoch": 0.4954339528412942, "grad_norm": 0.2932499945163727, "learning_rate": 2.97318600393109e-05, "loss": 0.1328, "step": 27777 }, { "epoch": 0.4954517889630079, "grad_norm": 0.3202677071094513, "learning_rate": 2.9730331657620826e-05, "loss": 0.1207, "step": 27778 }, { "epoch": 0.4954696250847216, "grad_norm": 0.30953875184059143, "learning_rate": 2.9728803257594345e-05, "loss": 0.165, "step": 27779 }, { "epoch": 0.4954874612064353, "grad_norm": 0.29354506731033325, "learning_rate": 2.9727274839237396e-05, "loss": 0.1148, "step": 27780 }, { "epoch": 0.49550529732814896, "grad_norm": 0.22380825877189636, "learning_rate": 2.97257464025559e-05, "loss": 0.1218, "step": 27781 }, { "epoch": 0.49552313344986265, "grad_norm": 0.2687002122402191, "learning_rate": 2.9724217947555777e-05, "loss": 0.149, "step": 27782 }, { "epoch": 0.49554096957157634, "grad_norm": 0.2262597382068634, "learning_rate": 2.9722689474242955e-05, "loss": 0.1466, "step": 27783 }, { "epoch": 0.49555880569329, "grad_norm": 0.17526397109031677, "learning_rate": 2.972116098262336e-05, "loss": 0.097, "step": 27784 }, { "epoch": 0.49557664181500377, "grad_norm": 0.3570282757282257, "learning_rate": 2.9719632472702908e-05, "loss": 0.1078, "step": 27785 }, { "epoch": 0.49559447793671746, "grad_norm": 0.22851315140724182, "learning_rate": 2.971810394448753e-05, "loss": 0.1338, "step": 27786 }, { "epoch": 0.49561231405843115, "grad_norm": 0.36523857712745667, "learning_rate": 2.9716575397983148e-05, "loss": 0.1642, "step": 27787 }, { "epoch": 0.49563015018014484, "grad_norm": 0.3585837483406067, "learning_rate": 2.9715046833195704e-05, "loss": 0.1287, "step": 27788 }, { "epoch": 0.4956479863018585, "grad_norm": 0.25841131806373596, "learning_rate": 2.9713518250131095e-05, "loss": 0.1317, "step": 27789 }, { "epoch": 0.4956658224235722, "grad_norm": 0.2565012276172638, "learning_rate": 2.9711989648795273e-05, "loss": 0.148, "step": 27790 }, { "epoch": 0.4956836585452859, "grad_norm": 0.3194092810153961, "learning_rate": 2.971046102919415e-05, "loss": 0.1185, "step": 27791 }, { "epoch": 0.4957014946669996, "grad_norm": 0.2027805745601654, "learning_rate": 2.9708932391333643e-05, "loss": 0.1052, "step": 27792 }, { "epoch": 0.4957193307887133, "grad_norm": 0.2346637099981308, "learning_rate": 2.9707403735219697e-05, "loss": 0.1163, "step": 27793 }, { "epoch": 0.495737166910427, "grad_norm": 0.2699218988418579, "learning_rate": 2.9705875060858224e-05, "loss": 0.1413, "step": 27794 }, { "epoch": 0.4957550030321407, "grad_norm": 0.26824429631233215, "learning_rate": 2.9704346368255155e-05, "loss": 0.1143, "step": 27795 }, { "epoch": 0.4957728391538544, "grad_norm": 0.326987624168396, "learning_rate": 2.9702817657416422e-05, "loss": 0.1186, "step": 27796 }, { "epoch": 0.4957906752755681, "grad_norm": 0.30650341510772705, "learning_rate": 2.9701288928347938e-05, "loss": 0.1515, "step": 27797 }, { "epoch": 0.49580851139728177, "grad_norm": 0.31967130303382874, "learning_rate": 2.9699760181055637e-05, "loss": 0.1177, "step": 27798 }, { "epoch": 0.49582634751899546, "grad_norm": 0.3324146866798401, "learning_rate": 2.969823141554543e-05, "loss": 0.1683, "step": 27799 }, { "epoch": 0.49584418364070915, "grad_norm": 0.26720014214515686, "learning_rate": 2.9696702631823263e-05, "loss": 0.1595, "step": 27800 }, { "epoch": 0.49586201976242283, "grad_norm": 0.31063705682754517, "learning_rate": 2.969517382989506e-05, "loss": 0.1915, "step": 27801 }, { "epoch": 0.4958798558841366, "grad_norm": 0.22244539856910706, "learning_rate": 2.9693645009766737e-05, "loss": 0.0797, "step": 27802 }, { "epoch": 0.49589769200585027, "grad_norm": 0.28870487213134766, "learning_rate": 2.969211617144423e-05, "loss": 0.1635, "step": 27803 }, { "epoch": 0.49591552812756395, "grad_norm": 0.18169677257537842, "learning_rate": 2.9690587314933448e-05, "loss": 0.1022, "step": 27804 }, { "epoch": 0.49593336424927764, "grad_norm": 0.30563223361968994, "learning_rate": 2.968905844024034e-05, "loss": 0.1199, "step": 27805 }, { "epoch": 0.49595120037099133, "grad_norm": 0.20923562347888947, "learning_rate": 2.968752954737082e-05, "loss": 0.1462, "step": 27806 }, { "epoch": 0.495969036492705, "grad_norm": 0.26435256004333496, "learning_rate": 2.968600063633081e-05, "loss": 0.1307, "step": 27807 }, { "epoch": 0.4959868726144187, "grad_norm": 0.22661389410495758, "learning_rate": 2.968447170712625e-05, "loss": 0.0988, "step": 27808 }, { "epoch": 0.4960047087361324, "grad_norm": 0.21868455410003662, "learning_rate": 2.968294275976306e-05, "loss": 0.1125, "step": 27809 }, { "epoch": 0.49602254485784614, "grad_norm": 0.29917147755622864, "learning_rate": 2.9681413794247164e-05, "loss": 0.1526, "step": 27810 }, { "epoch": 0.4960403809795598, "grad_norm": 0.20681817829608917, "learning_rate": 2.967988481058449e-05, "loss": 0.1014, "step": 27811 }, { "epoch": 0.4960582171012735, "grad_norm": 0.2031608521938324, "learning_rate": 2.9678355808780974e-05, "loss": 0.1582, "step": 27812 }, { "epoch": 0.4960760532229872, "grad_norm": 0.24757565557956696, "learning_rate": 2.9676826788842527e-05, "loss": 0.1362, "step": 27813 }, { "epoch": 0.4960938893447009, "grad_norm": 0.350021630525589, "learning_rate": 2.9675297750775082e-05, "loss": 0.1636, "step": 27814 }, { "epoch": 0.4961117254664146, "grad_norm": 0.19894543290138245, "learning_rate": 2.967376869458457e-05, "loss": 0.1657, "step": 27815 }, { "epoch": 0.49612956158812827, "grad_norm": 0.22180263698101044, "learning_rate": 2.9672239620276922e-05, "loss": 0.1456, "step": 27816 }, { "epoch": 0.49614739770984195, "grad_norm": 0.3438716232776642, "learning_rate": 2.9670710527858052e-05, "loss": 0.1585, "step": 27817 }, { "epoch": 0.49616523383155564, "grad_norm": 0.25440147519111633, "learning_rate": 2.96691814173339e-05, "loss": 0.1638, "step": 27818 }, { "epoch": 0.4961830699532694, "grad_norm": 0.23777243494987488, "learning_rate": 2.9667652288710383e-05, "loss": 0.1206, "step": 27819 }, { "epoch": 0.4962009060749831, "grad_norm": 0.26888108253479004, "learning_rate": 2.9666123141993435e-05, "loss": 0.1266, "step": 27820 }, { "epoch": 0.49621874219669676, "grad_norm": 0.2419235110282898, "learning_rate": 2.966459397718898e-05, "loss": 0.1874, "step": 27821 }, { "epoch": 0.49623657831841045, "grad_norm": 0.2995932102203369, "learning_rate": 2.966306479430296e-05, "loss": 0.1937, "step": 27822 }, { "epoch": 0.49625441444012414, "grad_norm": 0.20871558785438538, "learning_rate": 2.9661535593341283e-05, "loss": 0.142, "step": 27823 }, { "epoch": 0.4962722505618378, "grad_norm": 0.22967679798603058, "learning_rate": 2.966000637430988e-05, "loss": 0.1348, "step": 27824 }, { "epoch": 0.4962900866835515, "grad_norm": 0.22010403871536255, "learning_rate": 2.965847713721469e-05, "loss": 0.09, "step": 27825 }, { "epoch": 0.4963079228052652, "grad_norm": 0.3423563838005066, "learning_rate": 2.9656947882061625e-05, "loss": 0.1666, "step": 27826 }, { "epoch": 0.49632575892697894, "grad_norm": 0.24587838351726532, "learning_rate": 2.9655418608856627e-05, "loss": 0.1179, "step": 27827 }, { "epoch": 0.49634359504869263, "grad_norm": 0.31879591941833496, "learning_rate": 2.9653889317605616e-05, "loss": 0.1192, "step": 27828 }, { "epoch": 0.4963614311704063, "grad_norm": 0.2893899977207184, "learning_rate": 2.9652360008314528e-05, "loss": 0.1397, "step": 27829 }, { "epoch": 0.49637926729212, "grad_norm": 0.30011263489723206, "learning_rate": 2.9650830680989278e-05, "loss": 0.1153, "step": 27830 }, { "epoch": 0.4963971034138337, "grad_norm": 0.2200906127691269, "learning_rate": 2.964930133563581e-05, "loss": 0.1555, "step": 27831 }, { "epoch": 0.4964149395355474, "grad_norm": 0.2774388790130615, "learning_rate": 2.9647771972260035e-05, "loss": 0.1681, "step": 27832 }, { "epoch": 0.49643277565726107, "grad_norm": 0.3050696849822998, "learning_rate": 2.96462425908679e-05, "loss": 0.1308, "step": 27833 }, { "epoch": 0.49645061177897476, "grad_norm": 0.24673138558864594, "learning_rate": 2.9644713191465316e-05, "loss": 0.1343, "step": 27834 }, { "epoch": 0.49646844790068845, "grad_norm": 0.2587926983833313, "learning_rate": 2.9643183774058224e-05, "loss": 0.1542, "step": 27835 }, { "epoch": 0.4964862840224022, "grad_norm": 0.3508484363555908, "learning_rate": 2.9641654338652546e-05, "loss": 0.1545, "step": 27836 }, { "epoch": 0.4965041201441159, "grad_norm": 0.33448004722595215, "learning_rate": 2.9640124885254218e-05, "loss": 0.1173, "step": 27837 }, { "epoch": 0.49652195626582957, "grad_norm": 0.313690185546875, "learning_rate": 2.963859541386916e-05, "loss": 0.1579, "step": 27838 }, { "epoch": 0.49653979238754326, "grad_norm": 0.26195430755615234, "learning_rate": 2.9637065924503298e-05, "loss": 0.1237, "step": 27839 }, { "epoch": 0.49655762850925694, "grad_norm": 0.34702929854393005, "learning_rate": 2.9635536417162574e-05, "loss": 0.131, "step": 27840 }, { "epoch": 0.49657546463097063, "grad_norm": 0.38884323835372925, "learning_rate": 2.96340068918529e-05, "loss": 0.2393, "step": 27841 }, { "epoch": 0.4965933007526843, "grad_norm": 0.3461587429046631, "learning_rate": 2.963247734858023e-05, "loss": 0.1266, "step": 27842 }, { "epoch": 0.496611136874398, "grad_norm": 0.2655712366104126, "learning_rate": 2.9630947787350467e-05, "loss": 0.1298, "step": 27843 }, { "epoch": 0.49662897299611175, "grad_norm": 0.16946347057819366, "learning_rate": 2.962941820816956e-05, "loss": 0.0715, "step": 27844 }, { "epoch": 0.49664680911782544, "grad_norm": 0.2938988506793976, "learning_rate": 2.962788861104342e-05, "loss": 0.1095, "step": 27845 }, { "epoch": 0.4966646452395391, "grad_norm": 0.27131253480911255, "learning_rate": 2.962635899597799e-05, "loss": 0.097, "step": 27846 }, { "epoch": 0.4966824813612528, "grad_norm": 0.25252237915992737, "learning_rate": 2.962482936297919e-05, "loss": 0.143, "step": 27847 }, { "epoch": 0.4967003174829665, "grad_norm": 0.2072848677635193, "learning_rate": 2.9623299712052954e-05, "loss": 0.1174, "step": 27848 }, { "epoch": 0.4967181536046802, "grad_norm": 0.29337868094444275, "learning_rate": 2.9621770043205215e-05, "loss": 0.1432, "step": 27849 }, { "epoch": 0.4967359897263939, "grad_norm": 0.3335683345794678, "learning_rate": 2.9620240356441897e-05, "loss": 0.1371, "step": 27850 }, { "epoch": 0.49675382584810757, "grad_norm": 0.22608454525470734, "learning_rate": 2.9618710651768938e-05, "loss": 0.1291, "step": 27851 }, { "epoch": 0.49677166196982125, "grad_norm": 0.29111024737358093, "learning_rate": 2.9617180929192257e-05, "loss": 0.1228, "step": 27852 }, { "epoch": 0.496789498091535, "grad_norm": 0.2770387530326843, "learning_rate": 2.9615651188717786e-05, "loss": 0.1454, "step": 27853 }, { "epoch": 0.4968073342132487, "grad_norm": 0.3288304805755615, "learning_rate": 2.961412143035146e-05, "loss": 0.1269, "step": 27854 }, { "epoch": 0.4968251703349624, "grad_norm": 0.29333335161209106, "learning_rate": 2.9612591654099198e-05, "loss": 0.1242, "step": 27855 }, { "epoch": 0.49684300645667606, "grad_norm": 0.2045074701309204, "learning_rate": 2.9611061859966942e-05, "loss": 0.1074, "step": 27856 }, { "epoch": 0.49686084257838975, "grad_norm": 0.23224858939647675, "learning_rate": 2.9609532047960624e-05, "loss": 0.1042, "step": 27857 }, { "epoch": 0.49687867870010344, "grad_norm": 0.2689198851585388, "learning_rate": 2.9608002218086163e-05, "loss": 0.1584, "step": 27858 }, { "epoch": 0.4968965148218171, "grad_norm": 0.36132392287254333, "learning_rate": 2.960647237034949e-05, "loss": 0.166, "step": 27859 }, { "epoch": 0.4969143509435308, "grad_norm": 0.3105090260505676, "learning_rate": 2.9604942504756545e-05, "loss": 0.1507, "step": 27860 }, { "epoch": 0.49693218706524456, "grad_norm": 0.2498639076948166, "learning_rate": 2.9603412621313254e-05, "loss": 0.139, "step": 27861 }, { "epoch": 0.49695002318695825, "grad_norm": 0.27536413073539734, "learning_rate": 2.9601882720025537e-05, "loss": 0.1865, "step": 27862 }, { "epoch": 0.49696785930867193, "grad_norm": 0.38503387570381165, "learning_rate": 2.960035280089934e-05, "loss": 0.184, "step": 27863 }, { "epoch": 0.4969856954303856, "grad_norm": 0.24762022495269775, "learning_rate": 2.9598822863940584e-05, "loss": 0.1506, "step": 27864 }, { "epoch": 0.4970035315520993, "grad_norm": 0.1912391036748886, "learning_rate": 2.95972929091552e-05, "loss": 0.0989, "step": 27865 }, { "epoch": 0.497021367673813, "grad_norm": 0.3228663206100464, "learning_rate": 2.9595762936549132e-05, "loss": 0.1474, "step": 27866 }, { "epoch": 0.4970392037955267, "grad_norm": 0.307590126991272, "learning_rate": 2.9594232946128287e-05, "loss": 0.1637, "step": 27867 }, { "epoch": 0.4970570399172404, "grad_norm": 0.19887320697307587, "learning_rate": 2.9592702937898607e-05, "loss": 0.1218, "step": 27868 }, { "epoch": 0.4970748760389541, "grad_norm": 0.261941522359848, "learning_rate": 2.9591172911866027e-05, "loss": 0.1753, "step": 27869 }, { "epoch": 0.4970927121606678, "grad_norm": 0.21067079901695251, "learning_rate": 2.958964286803648e-05, "loss": 0.1178, "step": 27870 }, { "epoch": 0.4971105482823815, "grad_norm": 0.2668260335922241, "learning_rate": 2.958811280641589e-05, "loss": 0.1397, "step": 27871 }, { "epoch": 0.4971283844040952, "grad_norm": 0.30177226662635803, "learning_rate": 2.958658272701019e-05, "loss": 0.107, "step": 27872 }, { "epoch": 0.49714622052580887, "grad_norm": 0.47434917092323303, "learning_rate": 2.958505262982531e-05, "loss": 0.1431, "step": 27873 }, { "epoch": 0.49716405664752256, "grad_norm": 0.2704737186431885, "learning_rate": 2.958352251486719e-05, "loss": 0.132, "step": 27874 }, { "epoch": 0.49718189276923624, "grad_norm": 0.2231382578611374, "learning_rate": 2.9581992382141744e-05, "loss": 0.1249, "step": 27875 }, { "epoch": 0.49719972889094993, "grad_norm": 0.21486921608448029, "learning_rate": 2.9580462231654914e-05, "loss": 0.1102, "step": 27876 }, { "epoch": 0.4972175650126636, "grad_norm": 0.3190035820007324, "learning_rate": 2.9578932063412634e-05, "loss": 0.2299, "step": 27877 }, { "epoch": 0.49723540113437736, "grad_norm": 0.20680023729801178, "learning_rate": 2.957740187742083e-05, "loss": 0.1434, "step": 27878 }, { "epoch": 0.49725323725609105, "grad_norm": 0.48658859729766846, "learning_rate": 2.957587167368544e-05, "loss": 0.1483, "step": 27879 }, { "epoch": 0.49727107337780474, "grad_norm": 0.22653689980506897, "learning_rate": 2.9574341452212382e-05, "loss": 0.1534, "step": 27880 }, { "epoch": 0.49728890949951843, "grad_norm": 0.34906628727912903, "learning_rate": 2.95728112130076e-05, "loss": 0.1459, "step": 27881 }, { "epoch": 0.4973067456212321, "grad_norm": 0.25396373867988586, "learning_rate": 2.9571280956077026e-05, "loss": 0.1393, "step": 27882 }, { "epoch": 0.4973245817429458, "grad_norm": 0.2544762194156647, "learning_rate": 2.9569750681426584e-05, "loss": 0.1212, "step": 27883 }, { "epoch": 0.4973424178646595, "grad_norm": 0.25442975759506226, "learning_rate": 2.9568220389062206e-05, "loss": 0.0889, "step": 27884 }, { "epoch": 0.4973602539863732, "grad_norm": 0.39988458156585693, "learning_rate": 2.956669007898984e-05, "loss": 0.1558, "step": 27885 }, { "epoch": 0.4973780901080869, "grad_norm": 0.2710377275943756, "learning_rate": 2.956515975121539e-05, "loss": 0.1399, "step": 27886 }, { "epoch": 0.4973959262298006, "grad_norm": 0.2345605194568634, "learning_rate": 2.956362940574482e-05, "loss": 0.1286, "step": 27887 }, { "epoch": 0.4974137623515143, "grad_norm": 0.36714980006217957, "learning_rate": 2.9562099042584046e-05, "loss": 0.1865, "step": 27888 }, { "epoch": 0.497431598473228, "grad_norm": 0.27104154229164124, "learning_rate": 2.956056866173899e-05, "loss": 0.0925, "step": 27889 }, { "epoch": 0.4974494345949417, "grad_norm": 0.29537317156791687, "learning_rate": 2.95590382632156e-05, "loss": 0.1455, "step": 27890 }, { "epoch": 0.49746727071665536, "grad_norm": 0.19378677010536194, "learning_rate": 2.9557507847019793e-05, "loss": 0.1596, "step": 27891 }, { "epoch": 0.49748510683836905, "grad_norm": 0.29182490706443787, "learning_rate": 2.9555977413157525e-05, "loss": 0.1782, "step": 27892 }, { "epoch": 0.49750294296008274, "grad_norm": 0.276284784078598, "learning_rate": 2.9554446961634708e-05, "loss": 0.1852, "step": 27893 }, { "epoch": 0.4975207790817964, "grad_norm": 0.2724047005176544, "learning_rate": 2.9552916492457288e-05, "loss": 0.13, "step": 27894 }, { "epoch": 0.49753861520351017, "grad_norm": 0.3606939911842346, "learning_rate": 2.9551386005631188e-05, "loss": 0.1805, "step": 27895 }, { "epoch": 0.49755645132522386, "grad_norm": 0.22548851370811462, "learning_rate": 2.9549855501162337e-05, "loss": 0.1526, "step": 27896 }, { "epoch": 0.49757428744693755, "grad_norm": 0.24227149784564972, "learning_rate": 2.9548324979056678e-05, "loss": 0.132, "step": 27897 }, { "epoch": 0.49759212356865123, "grad_norm": 0.2506137490272522, "learning_rate": 2.9546794439320147e-05, "loss": 0.1553, "step": 27898 }, { "epoch": 0.4976099596903649, "grad_norm": 0.25468847155570984, "learning_rate": 2.9545263881958662e-05, "loss": 0.1014, "step": 27899 }, { "epoch": 0.4976277958120786, "grad_norm": 0.46975842118263245, "learning_rate": 2.954373330697817e-05, "loss": 0.1779, "step": 27900 }, { "epoch": 0.4976456319337923, "grad_norm": 0.3091696798801422, "learning_rate": 2.9542202714384605e-05, "loss": 0.206, "step": 27901 }, { "epoch": 0.497663468055506, "grad_norm": 0.2690056562423706, "learning_rate": 2.954067210418388e-05, "loss": 0.1393, "step": 27902 }, { "epoch": 0.49768130417721973, "grad_norm": 0.47849467396736145, "learning_rate": 2.953914147638195e-05, "loss": 0.1618, "step": 27903 }, { "epoch": 0.4976991402989334, "grad_norm": 0.2702648937702179, "learning_rate": 2.953761083098473e-05, "loss": 0.182, "step": 27904 }, { "epoch": 0.4977169764206471, "grad_norm": 0.25787919759750366, "learning_rate": 2.9536080167998176e-05, "loss": 0.1976, "step": 27905 }, { "epoch": 0.4977348125423608, "grad_norm": 0.29339367151260376, "learning_rate": 2.9534549487428197e-05, "loss": 0.2173, "step": 27906 }, { "epoch": 0.4977526486640745, "grad_norm": 0.265498548746109, "learning_rate": 2.9533018789280746e-05, "loss": 0.1374, "step": 27907 }, { "epoch": 0.49777048478578817, "grad_norm": 0.28753966093063354, "learning_rate": 2.9531488073561746e-05, "loss": 0.1441, "step": 27908 }, { "epoch": 0.49778832090750186, "grad_norm": 0.3045188784599304, "learning_rate": 2.9529957340277136e-05, "loss": 0.1064, "step": 27909 }, { "epoch": 0.49780615702921555, "grad_norm": 0.26599541306495667, "learning_rate": 2.9528426589432845e-05, "loss": 0.1323, "step": 27910 }, { "epoch": 0.4978239931509293, "grad_norm": 0.19216345250606537, "learning_rate": 2.95268958210348e-05, "loss": 0.0914, "step": 27911 }, { "epoch": 0.497841829272643, "grad_norm": 0.22652976214885712, "learning_rate": 2.9525365035088954e-05, "loss": 0.1341, "step": 27912 }, { "epoch": 0.49785966539435667, "grad_norm": 0.321074903011322, "learning_rate": 2.9523834231601227e-05, "loss": 0.2251, "step": 27913 }, { "epoch": 0.49787750151607035, "grad_norm": 0.26399973034858704, "learning_rate": 2.9522303410577552e-05, "loss": 0.1418, "step": 27914 }, { "epoch": 0.49789533763778404, "grad_norm": 0.2515855133533478, "learning_rate": 2.9520772572023874e-05, "loss": 0.1267, "step": 27915 }, { "epoch": 0.49791317375949773, "grad_norm": 0.21967852115631104, "learning_rate": 2.951924171594612e-05, "loss": 0.1344, "step": 27916 }, { "epoch": 0.4979310098812114, "grad_norm": 0.3357223868370056, "learning_rate": 2.951771084235022e-05, "loss": 0.1358, "step": 27917 }, { "epoch": 0.4979488460029251, "grad_norm": 0.30676183104515076, "learning_rate": 2.9516179951242113e-05, "loss": 0.2006, "step": 27918 }, { "epoch": 0.4979666821246388, "grad_norm": 0.2936556339263916, "learning_rate": 2.951464904262773e-05, "loss": 0.1357, "step": 27919 }, { "epoch": 0.49798451824635254, "grad_norm": 0.23046720027923584, "learning_rate": 2.9513118116513017e-05, "loss": 0.1329, "step": 27920 }, { "epoch": 0.4980023543680662, "grad_norm": 0.2276427447795868, "learning_rate": 2.951158717290389e-05, "loss": 0.1578, "step": 27921 }, { "epoch": 0.4980201904897799, "grad_norm": 0.32960057258605957, "learning_rate": 2.9510056211806298e-05, "loss": 0.2434, "step": 27922 }, { "epoch": 0.4980380266114936, "grad_norm": 0.25639766454696655, "learning_rate": 2.9508525233226164e-05, "loss": 0.1164, "step": 27923 }, { "epoch": 0.4980558627332073, "grad_norm": 0.3169064223766327, "learning_rate": 2.9506994237169433e-05, "loss": 0.1859, "step": 27924 }, { "epoch": 0.498073698854921, "grad_norm": 0.2877647578716278, "learning_rate": 2.9505463223642037e-05, "loss": 0.1278, "step": 27925 }, { "epoch": 0.49809153497663466, "grad_norm": 0.1823241412639618, "learning_rate": 2.9503932192649907e-05, "loss": 0.1247, "step": 27926 }, { "epoch": 0.49810937109834835, "grad_norm": 0.23231762647628784, "learning_rate": 2.9502401144198984e-05, "loss": 0.1317, "step": 27927 }, { "epoch": 0.4981272072200621, "grad_norm": 0.3350907266139984, "learning_rate": 2.95008700782952e-05, "loss": 0.1325, "step": 27928 }, { "epoch": 0.4981450433417758, "grad_norm": 0.29064926505088806, "learning_rate": 2.949933899494449e-05, "loss": 0.196, "step": 27929 }, { "epoch": 0.49816287946348947, "grad_norm": 0.18521523475646973, "learning_rate": 2.949780789415278e-05, "loss": 0.1689, "step": 27930 }, { "epoch": 0.49818071558520316, "grad_norm": 0.29793670773506165, "learning_rate": 2.9496276775926017e-05, "loss": 0.1177, "step": 27931 }, { "epoch": 0.49819855170691685, "grad_norm": 0.19556596875190735, "learning_rate": 2.949474564027013e-05, "loss": 0.0887, "step": 27932 }, { "epoch": 0.49821638782863054, "grad_norm": 0.33334097266197205, "learning_rate": 2.9493214487191063e-05, "loss": 0.1732, "step": 27933 }, { "epoch": 0.4982342239503442, "grad_norm": 0.28000178933143616, "learning_rate": 2.949168331669474e-05, "loss": 0.176, "step": 27934 }, { "epoch": 0.4982520600720579, "grad_norm": 0.3112078607082367, "learning_rate": 2.94901521287871e-05, "loss": 0.1725, "step": 27935 }, { "epoch": 0.4982698961937716, "grad_norm": 0.21778663992881775, "learning_rate": 2.948862092347408e-05, "loss": 0.1071, "step": 27936 }, { "epoch": 0.49828773231548534, "grad_norm": 0.5297493934631348, "learning_rate": 2.9487089700761624e-05, "loss": 0.1849, "step": 27937 }, { "epoch": 0.49830556843719903, "grad_norm": 0.2754274010658264, "learning_rate": 2.948555846065565e-05, "loss": 0.1286, "step": 27938 }, { "epoch": 0.4983234045589127, "grad_norm": 0.20975162088871002, "learning_rate": 2.9484027203162097e-05, "loss": 0.1566, "step": 27939 }, { "epoch": 0.4983412406806264, "grad_norm": 0.3835209012031555, "learning_rate": 2.9482495928286908e-05, "loss": 0.1824, "step": 27940 }, { "epoch": 0.4983590768023401, "grad_norm": 0.2465917468070984, "learning_rate": 2.9480964636036023e-05, "loss": 0.1136, "step": 27941 }, { "epoch": 0.4983769129240538, "grad_norm": 0.2265441119670868, "learning_rate": 2.9479433326415372e-05, "loss": 0.1536, "step": 27942 }, { "epoch": 0.49839474904576747, "grad_norm": 0.2865673303604126, "learning_rate": 2.947790199943089e-05, "loss": 0.1274, "step": 27943 }, { "epoch": 0.49841258516748116, "grad_norm": 0.25633201003074646, "learning_rate": 2.9476370655088508e-05, "loss": 0.1572, "step": 27944 }, { "epoch": 0.4984304212891949, "grad_norm": 0.222824826836586, "learning_rate": 2.9474839293394162e-05, "loss": 0.1332, "step": 27945 }, { "epoch": 0.4984482574109086, "grad_norm": 0.3386722207069397, "learning_rate": 2.9473307914353808e-05, "loss": 0.1541, "step": 27946 }, { "epoch": 0.4984660935326223, "grad_norm": 0.22178107500076294, "learning_rate": 2.947177651797336e-05, "loss": 0.1557, "step": 27947 }, { "epoch": 0.49848392965433597, "grad_norm": 0.3116909861564636, "learning_rate": 2.9470245104258766e-05, "loss": 0.1103, "step": 27948 }, { "epoch": 0.49850176577604965, "grad_norm": 0.32038989663124084, "learning_rate": 2.946871367321595e-05, "loss": 0.1539, "step": 27949 }, { "epoch": 0.49851960189776334, "grad_norm": 0.37848106026649475, "learning_rate": 2.9467182224850865e-05, "loss": 0.1365, "step": 27950 }, { "epoch": 0.49853743801947703, "grad_norm": 0.4757857322692871, "learning_rate": 2.9465650759169428e-05, "loss": 0.2003, "step": 27951 }, { "epoch": 0.4985552741411907, "grad_norm": 0.2234267145395279, "learning_rate": 2.9464119276177593e-05, "loss": 0.136, "step": 27952 }, { "epoch": 0.4985731102629044, "grad_norm": 0.3395175635814667, "learning_rate": 2.9462587775881284e-05, "loss": 0.2647, "step": 27953 }, { "epoch": 0.49859094638461815, "grad_norm": 0.2047962099313736, "learning_rate": 2.9461056258286456e-05, "loss": 0.1191, "step": 27954 }, { "epoch": 0.49860878250633184, "grad_norm": 0.35745835304260254, "learning_rate": 2.945952472339903e-05, "loss": 0.1312, "step": 27955 }, { "epoch": 0.4986266186280455, "grad_norm": 0.25644558668136597, "learning_rate": 2.945799317122494e-05, "loss": 0.155, "step": 27956 }, { "epoch": 0.4986444547497592, "grad_norm": 0.3524622619152069, "learning_rate": 2.945646160177013e-05, "loss": 0.2007, "step": 27957 }, { "epoch": 0.4986622908714729, "grad_norm": 0.31439030170440674, "learning_rate": 2.9454930015040534e-05, "loss": 0.1278, "step": 27958 }, { "epoch": 0.4986801269931866, "grad_norm": 0.24150004982948303, "learning_rate": 2.9453398411042094e-05, "loss": 0.1409, "step": 27959 }, { "epoch": 0.4986979631149003, "grad_norm": 0.2034335434436798, "learning_rate": 2.9451866789780735e-05, "loss": 0.1131, "step": 27960 }, { "epoch": 0.49871579923661397, "grad_norm": 0.20568163692951202, "learning_rate": 2.9450335151262416e-05, "loss": 0.158, "step": 27961 }, { "epoch": 0.4987336353583277, "grad_norm": 0.23867696523666382, "learning_rate": 2.9448803495493054e-05, "loss": 0.1213, "step": 27962 }, { "epoch": 0.4987514714800414, "grad_norm": 0.21701084077358246, "learning_rate": 2.9447271822478596e-05, "loss": 0.1413, "step": 27963 }, { "epoch": 0.4987693076017551, "grad_norm": 0.2105296552181244, "learning_rate": 2.9445740132224975e-05, "loss": 0.0792, "step": 27964 }, { "epoch": 0.4987871437234688, "grad_norm": 0.3016611933708191, "learning_rate": 2.944420842473813e-05, "loss": 0.1429, "step": 27965 }, { "epoch": 0.49880497984518246, "grad_norm": 0.1890726089477539, "learning_rate": 2.9442676700023996e-05, "loss": 0.1377, "step": 27966 }, { "epoch": 0.49882281596689615, "grad_norm": 0.2599705755710602, "learning_rate": 2.9441144958088513e-05, "loss": 0.0986, "step": 27967 }, { "epoch": 0.49884065208860984, "grad_norm": 0.3319588303565979, "learning_rate": 2.943961319893762e-05, "loss": 0.1805, "step": 27968 }, { "epoch": 0.4988584882103235, "grad_norm": 0.2364499568939209, "learning_rate": 2.9438081422577253e-05, "loss": 0.1404, "step": 27969 }, { "epoch": 0.49887632433203727, "grad_norm": 0.4357466697692871, "learning_rate": 2.943654962901335e-05, "loss": 0.1712, "step": 27970 }, { "epoch": 0.49889416045375096, "grad_norm": 0.23609469830989838, "learning_rate": 2.9435017818251844e-05, "loss": 0.1527, "step": 27971 }, { "epoch": 0.49891199657546464, "grad_norm": 0.2735869288444519, "learning_rate": 2.943348599029868e-05, "loss": 0.1455, "step": 27972 }, { "epoch": 0.49892983269717833, "grad_norm": 0.27534282207489014, "learning_rate": 2.943195414515979e-05, "loss": 0.1676, "step": 27973 }, { "epoch": 0.498947668818892, "grad_norm": 0.2754566967487335, "learning_rate": 2.943042228284112e-05, "loss": 0.1453, "step": 27974 }, { "epoch": 0.4989655049406057, "grad_norm": 0.3426382541656494, "learning_rate": 2.9428890403348597e-05, "loss": 0.201, "step": 27975 }, { "epoch": 0.4989833410623194, "grad_norm": 0.2707737684249878, "learning_rate": 2.9427358506688174e-05, "loss": 0.1642, "step": 27976 }, { "epoch": 0.4990011771840331, "grad_norm": 0.24972344934940338, "learning_rate": 2.9425826592865773e-05, "loss": 0.1431, "step": 27977 }, { "epoch": 0.4990190133057468, "grad_norm": 0.26107680797576904, "learning_rate": 2.9424294661887346e-05, "loss": 0.0909, "step": 27978 }, { "epoch": 0.4990368494274605, "grad_norm": 0.21545089781284332, "learning_rate": 2.9422762713758816e-05, "loss": 0.1501, "step": 27979 }, { "epoch": 0.4990546855491742, "grad_norm": 0.2637833058834076, "learning_rate": 2.9421230748486133e-05, "loss": 0.1205, "step": 27980 }, { "epoch": 0.4990725216708879, "grad_norm": 0.24294264614582062, "learning_rate": 2.941969876607524e-05, "loss": 0.1369, "step": 27981 }, { "epoch": 0.4990903577926016, "grad_norm": 0.427143931388855, "learning_rate": 2.9418166766532064e-05, "loss": 0.1718, "step": 27982 }, { "epoch": 0.49910819391431527, "grad_norm": 0.26317328214645386, "learning_rate": 2.9416634749862542e-05, "loss": 0.1589, "step": 27983 }, { "epoch": 0.49912603003602896, "grad_norm": 0.43047598004341125, "learning_rate": 2.9415102716072622e-05, "loss": 0.1823, "step": 27984 }, { "epoch": 0.49914386615774264, "grad_norm": 0.24958008527755737, "learning_rate": 2.9413570665168245e-05, "loss": 0.1179, "step": 27985 }, { "epoch": 0.49916170227945633, "grad_norm": 0.28731048107147217, "learning_rate": 2.9412038597155334e-05, "loss": 0.1377, "step": 27986 }, { "epoch": 0.4991795384011701, "grad_norm": 0.21803952753543854, "learning_rate": 2.9410506512039838e-05, "loss": 0.1197, "step": 27987 }, { "epoch": 0.49919737452288376, "grad_norm": 0.23055711388587952, "learning_rate": 2.94089744098277e-05, "loss": 0.1036, "step": 27988 }, { "epoch": 0.49921521064459745, "grad_norm": 0.2937275767326355, "learning_rate": 2.9407442290524854e-05, "loss": 0.2049, "step": 27989 }, { "epoch": 0.49923304676631114, "grad_norm": 0.22427405416965485, "learning_rate": 2.9405910154137238e-05, "loss": 0.1503, "step": 27990 }, { "epoch": 0.4992508828880248, "grad_norm": 0.2825898230075836, "learning_rate": 2.9404378000670795e-05, "loss": 0.1722, "step": 27991 }, { "epoch": 0.4992687190097385, "grad_norm": 0.21711206436157227, "learning_rate": 2.940284583013146e-05, "loss": 0.1219, "step": 27992 }, { "epoch": 0.4992865551314522, "grad_norm": 0.24050657451152802, "learning_rate": 2.9401313642525175e-05, "loss": 0.1446, "step": 27993 }, { "epoch": 0.4993043912531659, "grad_norm": 0.3528558015823364, "learning_rate": 2.9399781437857877e-05, "loss": 0.1245, "step": 27994 }, { "epoch": 0.4993222273748796, "grad_norm": 0.2966325879096985, "learning_rate": 2.9398249216135503e-05, "loss": 0.1834, "step": 27995 }, { "epoch": 0.4993400634965933, "grad_norm": 0.19626571238040924, "learning_rate": 2.9396716977364002e-05, "loss": 0.1152, "step": 27996 }, { "epoch": 0.499357899618307, "grad_norm": 0.22221416234970093, "learning_rate": 2.93951847215493e-05, "loss": 0.1201, "step": 27997 }, { "epoch": 0.4993757357400207, "grad_norm": 0.23627491295337677, "learning_rate": 2.9393652448697355e-05, "loss": 0.1094, "step": 27998 }, { "epoch": 0.4993935718617344, "grad_norm": 0.23147229850292206, "learning_rate": 2.9392120158814084e-05, "loss": 0.0889, "step": 27999 }, { "epoch": 0.4994114079834481, "grad_norm": 0.38820022344589233, "learning_rate": 2.9390587851905444e-05, "loss": 0.2184, "step": 28000 }, { "epoch": 0.4994114079834481, "eval_loss": 0.13733762502670288, "eval_runtime": 106.8398, "eval_samples_per_second": 9.584, "eval_steps_per_second": 1.601, "step": 28000 }, { "epoch": 0.49942924410516176, "grad_norm": 0.2396186888217926, "learning_rate": 2.9389055527977367e-05, "loss": 0.1347, "step": 28001 }, { "epoch": 0.49944708022687545, "grad_norm": 0.220523402094841, "learning_rate": 2.9387523187035797e-05, "loss": 0.0866, "step": 28002 }, { "epoch": 0.49946491634858914, "grad_norm": 0.22361794114112854, "learning_rate": 2.9385990829086673e-05, "loss": 0.134, "step": 28003 }, { "epoch": 0.4994827524703029, "grad_norm": 0.2309955358505249, "learning_rate": 2.9384458454135933e-05, "loss": 0.1628, "step": 28004 }, { "epoch": 0.49950058859201657, "grad_norm": 0.22957615554332733, "learning_rate": 2.9382926062189515e-05, "loss": 0.1305, "step": 28005 }, { "epoch": 0.49951842471373026, "grad_norm": 0.22241808474063873, "learning_rate": 2.9381393653253368e-05, "loss": 0.0978, "step": 28006 }, { "epoch": 0.49953626083544395, "grad_norm": 0.23145662248134613, "learning_rate": 2.9379861227333417e-05, "loss": 0.1658, "step": 28007 }, { "epoch": 0.49955409695715763, "grad_norm": 0.2550261616706848, "learning_rate": 2.9378328784435616e-05, "loss": 0.18, "step": 28008 }, { "epoch": 0.4995719330788713, "grad_norm": 0.25448763370513916, "learning_rate": 2.9376796324565904e-05, "loss": 0.1105, "step": 28009 }, { "epoch": 0.499589769200585, "grad_norm": 0.26080530881881714, "learning_rate": 2.937526384773021e-05, "loss": 0.1501, "step": 28010 }, { "epoch": 0.4996076053222987, "grad_norm": 0.20976018905639648, "learning_rate": 2.9373731353934487e-05, "loss": 0.1387, "step": 28011 }, { "epoch": 0.49962544144401244, "grad_norm": 0.21771611273288727, "learning_rate": 2.9372198843184667e-05, "loss": 0.1405, "step": 28012 }, { "epoch": 0.49964327756572613, "grad_norm": 0.22740884125232697, "learning_rate": 2.93706663154867e-05, "loss": 0.1275, "step": 28013 }, { "epoch": 0.4996611136874398, "grad_norm": 0.3152305781841278, "learning_rate": 2.9369133770846515e-05, "loss": 0.1187, "step": 28014 }, { "epoch": 0.4996789498091535, "grad_norm": 0.3350315988063812, "learning_rate": 2.9367601209270056e-05, "loss": 0.2001, "step": 28015 }, { "epoch": 0.4996967859308672, "grad_norm": 0.21111929416656494, "learning_rate": 2.9366068630763273e-05, "loss": 0.1257, "step": 28016 }, { "epoch": 0.4997146220525809, "grad_norm": 0.27207595109939575, "learning_rate": 2.93645360353321e-05, "loss": 0.1317, "step": 28017 }, { "epoch": 0.49973245817429457, "grad_norm": 0.310823917388916, "learning_rate": 2.936300342298247e-05, "loss": 0.1433, "step": 28018 }, { "epoch": 0.49975029429600826, "grad_norm": 0.29535984992980957, "learning_rate": 2.9361470793720346e-05, "loss": 0.1631, "step": 28019 }, { "epoch": 0.49976813041772195, "grad_norm": 0.20434069633483887, "learning_rate": 2.9359938147551646e-05, "loss": 0.1134, "step": 28020 }, { "epoch": 0.4997859665394357, "grad_norm": 0.248887300491333, "learning_rate": 2.935840548448232e-05, "loss": 0.1513, "step": 28021 }, { "epoch": 0.4998038026611494, "grad_norm": 0.3449693024158478, "learning_rate": 2.935687280451831e-05, "loss": 0.1891, "step": 28022 }, { "epoch": 0.49982163878286306, "grad_norm": 0.3072223663330078, "learning_rate": 2.935534010766555e-05, "loss": 0.1703, "step": 28023 }, { "epoch": 0.49983947490457675, "grad_norm": 0.24689984321594238, "learning_rate": 2.9353807393929998e-05, "loss": 0.1385, "step": 28024 }, { "epoch": 0.49985731102629044, "grad_norm": 0.36406317353248596, "learning_rate": 2.9352274663317575e-05, "loss": 0.1707, "step": 28025 }, { "epoch": 0.49987514714800413, "grad_norm": 0.300327867269516, "learning_rate": 2.935074191583424e-05, "loss": 0.1543, "step": 28026 }, { "epoch": 0.4998929832697178, "grad_norm": 0.26289102435112, "learning_rate": 2.9349209151485925e-05, "loss": 0.1576, "step": 28027 }, { "epoch": 0.4999108193914315, "grad_norm": 0.25302886962890625, "learning_rate": 2.9347676370278564e-05, "loss": 0.2007, "step": 28028 }, { "epoch": 0.49992865551314525, "grad_norm": 0.26356586813926697, "learning_rate": 2.9346143572218116e-05, "loss": 0.1528, "step": 28029 }, { "epoch": 0.49994649163485894, "grad_norm": 0.2535671591758728, "learning_rate": 2.934461075731052e-05, "loss": 0.1699, "step": 28030 }, { "epoch": 0.4999643277565726, "grad_norm": 0.30741581320762634, "learning_rate": 2.93430779255617e-05, "loss": 0.1339, "step": 28031 }, { "epoch": 0.4999821638782863, "grad_norm": 0.26062649488449097, "learning_rate": 2.9341545076977622e-05, "loss": 0.1114, "step": 28032 }, { "epoch": 0.5, "grad_norm": 0.2643240690231323, "learning_rate": 2.9340012211564212e-05, "loss": 0.1453, "step": 28033 }, { "epoch": 0.5000178361217137, "grad_norm": 0.26569658517837524, "learning_rate": 2.9338479329327412e-05, "loss": 0.1136, "step": 28034 }, { "epoch": 0.5000356722434274, "grad_norm": 0.23257343471050262, "learning_rate": 2.9336946430273167e-05, "loss": 0.125, "step": 28035 }, { "epoch": 0.5000535083651411, "grad_norm": 0.19785386323928833, "learning_rate": 2.9335413514407418e-05, "loss": 0.1075, "step": 28036 }, { "epoch": 0.5000713444868548, "grad_norm": 0.2894573211669922, "learning_rate": 2.933388058173612e-05, "loss": 0.1792, "step": 28037 }, { "epoch": 0.5000891806085684, "grad_norm": 0.19879817962646484, "learning_rate": 2.9332347632265188e-05, "loss": 0.1502, "step": 28038 }, { "epoch": 0.5001070167302821, "grad_norm": 0.2950558364391327, "learning_rate": 2.9330814666000596e-05, "loss": 0.0814, "step": 28039 }, { "epoch": 0.5001248528519958, "grad_norm": 0.31717249751091003, "learning_rate": 2.932928168294826e-05, "loss": 0.2298, "step": 28040 }, { "epoch": 0.5001426889737095, "grad_norm": 0.24027322232723236, "learning_rate": 2.9327748683114137e-05, "loss": 0.147, "step": 28041 }, { "epoch": 0.5001605250954233, "grad_norm": 0.2599031329154968, "learning_rate": 2.9326215666504163e-05, "loss": 0.1065, "step": 28042 }, { "epoch": 0.500178361217137, "grad_norm": 0.2908802628517151, "learning_rate": 2.9324682633124285e-05, "loss": 0.0837, "step": 28043 }, { "epoch": 0.5001961973388507, "grad_norm": 0.28757938742637634, "learning_rate": 2.9323149582980436e-05, "loss": 0.1416, "step": 28044 }, { "epoch": 0.5002140334605644, "grad_norm": 0.35413914918899536, "learning_rate": 2.9321616516078575e-05, "loss": 0.1395, "step": 28045 }, { "epoch": 0.500231869582278, "grad_norm": 0.29072460532188416, "learning_rate": 2.932008343242464e-05, "loss": 0.1576, "step": 28046 }, { "epoch": 0.5002497057039917, "grad_norm": 0.24231556057929993, "learning_rate": 2.9318550332024563e-05, "loss": 0.1536, "step": 28047 }, { "epoch": 0.5002675418257054, "grad_norm": 0.24185895919799805, "learning_rate": 2.9317017214884297e-05, "loss": 0.1559, "step": 28048 }, { "epoch": 0.5002853779474191, "grad_norm": 0.2856906056404114, "learning_rate": 2.9315484081009774e-05, "loss": 0.1168, "step": 28049 }, { "epoch": 0.5003032140691328, "grad_norm": 0.2460201531648636, "learning_rate": 2.9313950930406947e-05, "loss": 0.1381, "step": 28050 }, { "epoch": 0.5003210501908465, "grad_norm": 0.27226993441581726, "learning_rate": 2.931241776308175e-05, "loss": 0.1041, "step": 28051 }, { "epoch": 0.5003388863125602, "grad_norm": 0.3510323762893677, "learning_rate": 2.9310884579040143e-05, "loss": 0.1613, "step": 28052 }, { "epoch": 0.5003567224342739, "grad_norm": 0.3239705264568329, "learning_rate": 2.9309351378288052e-05, "loss": 0.1665, "step": 28053 }, { "epoch": 0.5003745585559876, "grad_norm": 0.3006664216518402, "learning_rate": 2.9307818160831435e-05, "loss": 0.1006, "step": 28054 }, { "epoch": 0.5003923946777012, "grad_norm": 0.21220754086971283, "learning_rate": 2.930628492667622e-05, "loss": 0.1015, "step": 28055 }, { "epoch": 0.5004102307994149, "grad_norm": 0.16951516270637512, "learning_rate": 2.9304751675828358e-05, "loss": 0.1297, "step": 28056 }, { "epoch": 0.5004280669211286, "grad_norm": 0.24412105977535248, "learning_rate": 2.9303218408293787e-05, "loss": 0.1433, "step": 28057 }, { "epoch": 0.5004459030428423, "grad_norm": 0.30340859293937683, "learning_rate": 2.9301685124078465e-05, "loss": 0.1274, "step": 28058 }, { "epoch": 0.5004637391645561, "grad_norm": 0.3519989550113678, "learning_rate": 2.9300151823188326e-05, "loss": 0.136, "step": 28059 }, { "epoch": 0.5004815752862698, "grad_norm": 0.31953418254852295, "learning_rate": 2.9298618505629305e-05, "loss": 0.1773, "step": 28060 }, { "epoch": 0.5004994114079835, "grad_norm": 0.23292499780654907, "learning_rate": 2.9297085171407363e-05, "loss": 0.1361, "step": 28061 }, { "epoch": 0.5005172475296972, "grad_norm": 0.31692078709602356, "learning_rate": 2.9295551820528423e-05, "loss": 0.1573, "step": 28062 }, { "epoch": 0.5005350836514109, "grad_norm": 0.24037839472293854, "learning_rate": 2.9294018452998445e-05, "loss": 0.1545, "step": 28063 }, { "epoch": 0.5005529197731245, "grad_norm": 0.23084452748298645, "learning_rate": 2.9292485068823365e-05, "loss": 0.1497, "step": 28064 }, { "epoch": 0.5005707558948382, "grad_norm": 0.2341664731502533, "learning_rate": 2.9290951668009143e-05, "loss": 0.1326, "step": 28065 }, { "epoch": 0.5005885920165519, "grad_norm": 0.17126047611236572, "learning_rate": 2.92894182505617e-05, "loss": 0.1162, "step": 28066 }, { "epoch": 0.5006064281382656, "grad_norm": 0.30533409118652344, "learning_rate": 2.9287884816486992e-05, "loss": 0.1145, "step": 28067 }, { "epoch": 0.5006242642599793, "grad_norm": 0.250066876411438, "learning_rate": 2.9286351365790965e-05, "loss": 0.1489, "step": 28068 }, { "epoch": 0.500642100381693, "grad_norm": 0.32610979676246643, "learning_rate": 2.928481789847956e-05, "loss": 0.1515, "step": 28069 }, { "epoch": 0.5006599365034067, "grad_norm": 0.22377827763557434, "learning_rate": 2.9283284414558715e-05, "loss": 0.0895, "step": 28070 }, { "epoch": 0.5006777726251204, "grad_norm": 0.21274669468402863, "learning_rate": 2.928175091403438e-05, "loss": 0.132, "step": 28071 }, { "epoch": 0.500695608746834, "grad_norm": 0.23008203506469727, "learning_rate": 2.9280217396912508e-05, "loss": 0.1428, "step": 28072 }, { "epoch": 0.5007134448685477, "grad_norm": 0.22746393084526062, "learning_rate": 2.9278683863199023e-05, "loss": 0.1338, "step": 28073 }, { "epoch": 0.5007312809902614, "grad_norm": 0.33229076862335205, "learning_rate": 2.927715031289989e-05, "loss": 0.1585, "step": 28074 }, { "epoch": 0.5007491171119751, "grad_norm": 0.2531786262989044, "learning_rate": 2.9275616746021045e-05, "loss": 0.1531, "step": 28075 }, { "epoch": 0.5007669532336889, "grad_norm": 0.27786120772361755, "learning_rate": 2.9274083162568433e-05, "loss": 0.1494, "step": 28076 }, { "epoch": 0.5007847893554026, "grad_norm": 0.3160673975944519, "learning_rate": 2.927254956254799e-05, "loss": 0.1645, "step": 28077 }, { "epoch": 0.5008026254771163, "grad_norm": 0.2560996115207672, "learning_rate": 2.927101594596568e-05, "loss": 0.1539, "step": 28078 }, { "epoch": 0.50082046159883, "grad_norm": 0.2621341645717621, "learning_rate": 2.9269482312827434e-05, "loss": 0.141, "step": 28079 }, { "epoch": 0.5008382977205437, "grad_norm": 0.29709592461586, "learning_rate": 2.92679486631392e-05, "loss": 0.1218, "step": 28080 }, { "epoch": 0.5008561338422574, "grad_norm": 0.3116741180419922, "learning_rate": 2.9266414996906923e-05, "loss": 0.1596, "step": 28081 }, { "epoch": 0.500873969963971, "grad_norm": 0.25773724913597107, "learning_rate": 2.9264881314136544e-05, "loss": 0.1331, "step": 28082 }, { "epoch": 0.5008918060856847, "grad_norm": 0.31263741850852966, "learning_rate": 2.9263347614834013e-05, "loss": 0.1309, "step": 28083 }, { "epoch": 0.5009096422073984, "grad_norm": 0.25896355509757996, "learning_rate": 2.9261813899005273e-05, "loss": 0.1254, "step": 28084 }, { "epoch": 0.5009274783291121, "grad_norm": 0.26880183815956116, "learning_rate": 2.9260280166656277e-05, "loss": 0.0779, "step": 28085 }, { "epoch": 0.5009453144508258, "grad_norm": 0.39075830578804016, "learning_rate": 2.925874641779296e-05, "loss": 0.1839, "step": 28086 }, { "epoch": 0.5009631505725395, "grad_norm": 0.2828160524368286, "learning_rate": 2.9257212652421274e-05, "loss": 0.1238, "step": 28087 }, { "epoch": 0.5009809866942532, "grad_norm": 0.3004515469074249, "learning_rate": 2.9255678870547153e-05, "loss": 0.1461, "step": 28088 }, { "epoch": 0.5009988228159669, "grad_norm": 0.3946534991264343, "learning_rate": 2.925414507217656e-05, "loss": 0.1487, "step": 28089 }, { "epoch": 0.5010166589376805, "grad_norm": 0.2745427191257477, "learning_rate": 2.9252611257315428e-05, "loss": 0.1333, "step": 28090 }, { "epoch": 0.5010344950593942, "grad_norm": 0.2747195065021515, "learning_rate": 2.9251077425969697e-05, "loss": 0.1347, "step": 28091 }, { "epoch": 0.501052331181108, "grad_norm": 0.25806090235710144, "learning_rate": 2.9249543578145333e-05, "loss": 0.1472, "step": 28092 }, { "epoch": 0.5010701673028217, "grad_norm": 0.2558298408985138, "learning_rate": 2.9248009713848268e-05, "loss": 0.1931, "step": 28093 }, { "epoch": 0.5010880034245354, "grad_norm": 0.277619868516922, "learning_rate": 2.9246475833084446e-05, "loss": 0.1391, "step": 28094 }, { "epoch": 0.5011058395462491, "grad_norm": 0.19680927693843842, "learning_rate": 2.9244941935859826e-05, "loss": 0.1236, "step": 28095 }, { "epoch": 0.5011236756679628, "grad_norm": 0.26855865120887756, "learning_rate": 2.9243408022180334e-05, "loss": 0.1138, "step": 28096 }, { "epoch": 0.5011415117896765, "grad_norm": 0.2550387978553772, "learning_rate": 2.9241874092051937e-05, "loss": 0.1761, "step": 28097 }, { "epoch": 0.5011593479113902, "grad_norm": 0.30739662051200867, "learning_rate": 2.9240340145480566e-05, "loss": 0.1593, "step": 28098 }, { "epoch": 0.5011771840331039, "grad_norm": 0.3022283613681793, "learning_rate": 2.9238806182472166e-05, "loss": 0.1008, "step": 28099 }, { "epoch": 0.5011950201548175, "grad_norm": 0.3105224370956421, "learning_rate": 2.9237272203032702e-05, "loss": 0.1477, "step": 28100 }, { "epoch": 0.5012128562765312, "grad_norm": 0.3185834586620331, "learning_rate": 2.9235738207168097e-05, "loss": 0.1282, "step": 28101 }, { "epoch": 0.5012306923982449, "grad_norm": 0.30447420477867126, "learning_rate": 2.9234204194884314e-05, "loss": 0.1537, "step": 28102 }, { "epoch": 0.5012485285199586, "grad_norm": 0.26840248703956604, "learning_rate": 2.923267016618729e-05, "loss": 0.1867, "step": 28103 }, { "epoch": 0.5012663646416723, "grad_norm": 0.2757900059223175, "learning_rate": 2.923113612108297e-05, "loss": 0.152, "step": 28104 }, { "epoch": 0.501284200763386, "grad_norm": 0.3213600516319275, "learning_rate": 2.922960205957731e-05, "loss": 0.1979, "step": 28105 }, { "epoch": 0.5013020368850997, "grad_norm": 0.240966334939003, "learning_rate": 2.9228067981676256e-05, "loss": 0.1324, "step": 28106 }, { "epoch": 0.5013198730068134, "grad_norm": 0.1754409819841385, "learning_rate": 2.922653388738574e-05, "loss": 0.1252, "step": 28107 }, { "epoch": 0.501337709128527, "grad_norm": 0.2842355966567993, "learning_rate": 2.9224999776711726e-05, "loss": 0.1963, "step": 28108 }, { "epoch": 0.5013555452502408, "grad_norm": 0.26824405789375305, "learning_rate": 2.9223465649660152e-05, "loss": 0.1652, "step": 28109 }, { "epoch": 0.5013733813719545, "grad_norm": 0.24093066155910492, "learning_rate": 2.9221931506236973e-05, "loss": 0.1239, "step": 28110 }, { "epoch": 0.5013912174936682, "grad_norm": 0.2823273539543152, "learning_rate": 2.922039734644812e-05, "loss": 0.1501, "step": 28111 }, { "epoch": 0.5014090536153819, "grad_norm": 0.3238869607448578, "learning_rate": 2.921886317029955e-05, "loss": 0.1873, "step": 28112 }, { "epoch": 0.5014268897370956, "grad_norm": 0.2070161998271942, "learning_rate": 2.9217328977797208e-05, "loss": 0.1172, "step": 28113 }, { "epoch": 0.5014447258588093, "grad_norm": 0.28239917755126953, "learning_rate": 2.9215794768947046e-05, "loss": 0.1446, "step": 28114 }, { "epoch": 0.501462561980523, "grad_norm": 0.30700185894966125, "learning_rate": 2.9214260543755013e-05, "loss": 0.2055, "step": 28115 }, { "epoch": 0.5014803981022367, "grad_norm": 0.17265966534614563, "learning_rate": 2.9212726302227038e-05, "loss": 0.1165, "step": 28116 }, { "epoch": 0.5014982342239503, "grad_norm": 0.27358657121658325, "learning_rate": 2.9211192044369094e-05, "loss": 0.1357, "step": 28117 }, { "epoch": 0.501516070345664, "grad_norm": 0.2435189187526703, "learning_rate": 2.9209657770187104e-05, "loss": 0.1136, "step": 28118 }, { "epoch": 0.5015339064673777, "grad_norm": 0.27064594626426697, "learning_rate": 2.9208123479687032e-05, "loss": 0.1544, "step": 28119 }, { "epoch": 0.5015517425890914, "grad_norm": 0.243107408285141, "learning_rate": 2.9206589172874816e-05, "loss": 0.1317, "step": 28120 }, { "epoch": 0.5015695787108051, "grad_norm": 0.44382965564727783, "learning_rate": 2.9205054849756412e-05, "loss": 0.1056, "step": 28121 }, { "epoch": 0.5015874148325188, "grad_norm": 0.2630468010902405, "learning_rate": 2.920352051033776e-05, "loss": 0.1409, "step": 28122 }, { "epoch": 0.5016052509542325, "grad_norm": 0.2559202015399933, "learning_rate": 2.9201986154624815e-05, "loss": 0.1295, "step": 28123 }, { "epoch": 0.5016230870759462, "grad_norm": 0.2731707990169525, "learning_rate": 2.9200451782623517e-05, "loss": 0.1352, "step": 28124 }, { "epoch": 0.5016409231976598, "grad_norm": 0.2575231194496155, "learning_rate": 2.919891739433982e-05, "loss": 0.1671, "step": 28125 }, { "epoch": 0.5016587593193736, "grad_norm": 0.36431142687797546, "learning_rate": 2.9197382989779666e-05, "loss": 0.1618, "step": 28126 }, { "epoch": 0.5016765954410873, "grad_norm": 0.23700128495693207, "learning_rate": 2.9195848568949002e-05, "loss": 0.1081, "step": 28127 }, { "epoch": 0.501694431562801, "grad_norm": 0.22517183423042297, "learning_rate": 2.9194314131853785e-05, "loss": 0.1166, "step": 28128 }, { "epoch": 0.5017122676845147, "grad_norm": 0.2879955470561981, "learning_rate": 2.9192779678499956e-05, "loss": 0.1059, "step": 28129 }, { "epoch": 0.5017301038062284, "grad_norm": 0.27152031660079956, "learning_rate": 2.919124520889347e-05, "loss": 0.1499, "step": 28130 }, { "epoch": 0.5017479399279421, "grad_norm": 0.2744602560997009, "learning_rate": 2.9189710723040263e-05, "loss": 0.1517, "step": 28131 }, { "epoch": 0.5017657760496558, "grad_norm": 0.22146444022655487, "learning_rate": 2.918817622094629e-05, "loss": 0.1808, "step": 28132 }, { "epoch": 0.5017836121713695, "grad_norm": 0.35656702518463135, "learning_rate": 2.91866417026175e-05, "loss": 0.1111, "step": 28133 }, { "epoch": 0.5018014482930832, "grad_norm": 0.2832266688346863, "learning_rate": 2.9185107168059843e-05, "loss": 0.1262, "step": 28134 }, { "epoch": 0.5018192844147968, "grad_norm": 0.31262874603271484, "learning_rate": 2.9183572617279265e-05, "loss": 0.2218, "step": 28135 }, { "epoch": 0.5018371205365105, "grad_norm": 0.3318476676940918, "learning_rate": 2.918203805028172e-05, "loss": 0.1578, "step": 28136 }, { "epoch": 0.5018549566582242, "grad_norm": 0.2821923792362213, "learning_rate": 2.9180503467073138e-05, "loss": 0.1653, "step": 28137 }, { "epoch": 0.5018727927799379, "grad_norm": 0.20795147120952606, "learning_rate": 2.9178968867659496e-05, "loss": 0.1116, "step": 28138 }, { "epoch": 0.5018906289016516, "grad_norm": 0.32274147868156433, "learning_rate": 2.9177434252046714e-05, "loss": 0.103, "step": 28139 }, { "epoch": 0.5019084650233653, "grad_norm": 0.4081878066062927, "learning_rate": 2.917589962024076e-05, "loss": 0.0963, "step": 28140 }, { "epoch": 0.501926301145079, "grad_norm": 0.3755194842815399, "learning_rate": 2.917436497224758e-05, "loss": 0.1182, "step": 28141 }, { "epoch": 0.5019441372667927, "grad_norm": 0.24923057854175568, "learning_rate": 2.917283030807311e-05, "loss": 0.0863, "step": 28142 }, { "epoch": 0.5019619733885065, "grad_norm": 0.19700871407985687, "learning_rate": 2.9171295627723323e-05, "loss": 0.1098, "step": 28143 }, { "epoch": 0.5019798095102201, "grad_norm": 0.2449912279844284, "learning_rate": 2.9169760931204136e-05, "loss": 0.1364, "step": 28144 }, { "epoch": 0.5019976456319338, "grad_norm": 0.2614738941192627, "learning_rate": 2.916822621852153e-05, "loss": 0.1443, "step": 28145 }, { "epoch": 0.5020154817536475, "grad_norm": 0.2839575707912445, "learning_rate": 2.916669148968143e-05, "loss": 0.1517, "step": 28146 }, { "epoch": 0.5020333178753612, "grad_norm": 0.4459647834300995, "learning_rate": 2.9165156744689798e-05, "loss": 0.1994, "step": 28147 }, { "epoch": 0.5020511539970749, "grad_norm": 0.22343763709068298, "learning_rate": 2.9163621983552582e-05, "loss": 0.1402, "step": 28148 }, { "epoch": 0.5020689901187886, "grad_norm": 0.26836588978767395, "learning_rate": 2.916208720627573e-05, "loss": 0.128, "step": 28149 }, { "epoch": 0.5020868262405023, "grad_norm": 0.23422901332378387, "learning_rate": 2.9160552412865182e-05, "loss": 0.0989, "step": 28150 }, { "epoch": 0.502104662362216, "grad_norm": 0.24315550923347473, "learning_rate": 2.9159017603326904e-05, "loss": 0.1224, "step": 28151 }, { "epoch": 0.5021224984839296, "grad_norm": 0.3060329854488373, "learning_rate": 2.9157482777666838e-05, "loss": 0.1303, "step": 28152 }, { "epoch": 0.5021403346056433, "grad_norm": 0.2087785005569458, "learning_rate": 2.915594793589093e-05, "loss": 0.1532, "step": 28153 }, { "epoch": 0.502158170727357, "grad_norm": 0.32457199692726135, "learning_rate": 2.9154413078005126e-05, "loss": 0.1332, "step": 28154 }, { "epoch": 0.5021760068490707, "grad_norm": 0.2215084284543991, "learning_rate": 2.9152878204015386e-05, "loss": 0.1148, "step": 28155 }, { "epoch": 0.5021938429707844, "grad_norm": 0.2640739679336548, "learning_rate": 2.9151343313927663e-05, "loss": 0.143, "step": 28156 }, { "epoch": 0.5022116790924981, "grad_norm": 0.22357462346553802, "learning_rate": 2.9149808407747893e-05, "loss": 0.1446, "step": 28157 }, { "epoch": 0.5022295152142118, "grad_norm": 0.23636764287948608, "learning_rate": 2.9148273485482036e-05, "loss": 0.1612, "step": 28158 }, { "epoch": 0.5022473513359255, "grad_norm": 0.2029261291027069, "learning_rate": 2.9146738547136037e-05, "loss": 0.1144, "step": 28159 }, { "epoch": 0.5022651874576393, "grad_norm": 0.23585352301597595, "learning_rate": 2.9145203592715843e-05, "loss": 0.1953, "step": 28160 }, { "epoch": 0.502283023579353, "grad_norm": 0.24664238095283508, "learning_rate": 2.914366862222741e-05, "loss": 0.1368, "step": 28161 }, { "epoch": 0.5023008597010666, "grad_norm": 0.4473843574523926, "learning_rate": 2.914213363567669e-05, "loss": 0.1382, "step": 28162 }, { "epoch": 0.5023186958227803, "grad_norm": 0.2319299876689911, "learning_rate": 2.9140598633069628e-05, "loss": 0.1524, "step": 28163 }, { "epoch": 0.502336531944494, "grad_norm": 0.21977467834949493, "learning_rate": 2.9139063614412176e-05, "loss": 0.0822, "step": 28164 }, { "epoch": 0.5023543680662077, "grad_norm": 0.2436297982931137, "learning_rate": 2.9137528579710284e-05, "loss": 0.1433, "step": 28165 }, { "epoch": 0.5023722041879214, "grad_norm": 0.2653481960296631, "learning_rate": 2.9135993528969895e-05, "loss": 0.1572, "step": 28166 }, { "epoch": 0.5023900403096351, "grad_norm": 0.268472820520401, "learning_rate": 2.9134458462196974e-05, "loss": 0.2238, "step": 28167 }, { "epoch": 0.5024078764313488, "grad_norm": 0.3164266049861908, "learning_rate": 2.9132923379397458e-05, "loss": 0.208, "step": 28168 }, { "epoch": 0.5024257125530625, "grad_norm": 0.31102389097213745, "learning_rate": 2.913138828057731e-05, "loss": 0.2051, "step": 28169 }, { "epoch": 0.5024435486747761, "grad_norm": 0.22614945471286774, "learning_rate": 2.9129853165742472e-05, "loss": 0.1181, "step": 28170 }, { "epoch": 0.5024613847964898, "grad_norm": 0.20448987185955048, "learning_rate": 2.91283180348989e-05, "loss": 0.1799, "step": 28171 }, { "epoch": 0.5024792209182035, "grad_norm": 0.2348880171775818, "learning_rate": 2.9126782888052535e-05, "loss": 0.1019, "step": 28172 }, { "epoch": 0.5024970570399172, "grad_norm": 0.39700719714164734, "learning_rate": 2.9125247725209337e-05, "loss": 0.1301, "step": 28173 }, { "epoch": 0.5025148931616309, "grad_norm": 0.4070872366428375, "learning_rate": 2.9123712546375255e-05, "loss": 0.1456, "step": 28174 }, { "epoch": 0.5025327292833446, "grad_norm": 0.29996219277381897, "learning_rate": 2.9122177351556235e-05, "loss": 0.1371, "step": 28175 }, { "epoch": 0.5025505654050583, "grad_norm": 0.3382759690284729, "learning_rate": 2.912064214075823e-05, "loss": 0.2226, "step": 28176 }, { "epoch": 0.5025684015267721, "grad_norm": 0.2362937331199646, "learning_rate": 2.9119106913987205e-05, "loss": 0.1617, "step": 28177 }, { "epoch": 0.5025862376484858, "grad_norm": 0.21494202315807343, "learning_rate": 2.9117571671249095e-05, "loss": 0.1644, "step": 28178 }, { "epoch": 0.5026040737701994, "grad_norm": 0.3636217713356018, "learning_rate": 2.9116036412549845e-05, "loss": 0.1487, "step": 28179 }, { "epoch": 0.5026219098919131, "grad_norm": 0.22337442636489868, "learning_rate": 2.911450113789543e-05, "loss": 0.1564, "step": 28180 }, { "epoch": 0.5026397460136268, "grad_norm": 0.2899104356765747, "learning_rate": 2.9112965847291768e-05, "loss": 0.1852, "step": 28181 }, { "epoch": 0.5026575821353405, "grad_norm": 0.3071236312389374, "learning_rate": 2.9111430540744843e-05, "loss": 0.2012, "step": 28182 }, { "epoch": 0.5026754182570542, "grad_norm": 0.26008909940719604, "learning_rate": 2.9109895218260597e-05, "loss": 0.1325, "step": 28183 }, { "epoch": 0.5026932543787679, "grad_norm": 0.2218368649482727, "learning_rate": 2.910835987984497e-05, "loss": 0.1619, "step": 28184 }, { "epoch": 0.5027110905004816, "grad_norm": 0.32528555393218994, "learning_rate": 2.9106824525503922e-05, "loss": 0.1404, "step": 28185 }, { "epoch": 0.5027289266221953, "grad_norm": 0.24894194304943085, "learning_rate": 2.910528915524341e-05, "loss": 0.1128, "step": 28186 }, { "epoch": 0.502746762743909, "grad_norm": 0.29863473773002625, "learning_rate": 2.9103753769069374e-05, "loss": 0.1777, "step": 28187 }, { "epoch": 0.5027645988656226, "grad_norm": 0.24769367277622223, "learning_rate": 2.910221836698777e-05, "loss": 0.12, "step": 28188 }, { "epoch": 0.5027824349873363, "grad_norm": 0.22818894684314728, "learning_rate": 2.910068294900455e-05, "loss": 0.1569, "step": 28189 }, { "epoch": 0.50280027110905, "grad_norm": 0.24973297119140625, "learning_rate": 2.9099147515125675e-05, "loss": 0.1192, "step": 28190 }, { "epoch": 0.5028181072307637, "grad_norm": 0.5419824719429016, "learning_rate": 2.9097612065357083e-05, "loss": 0.1244, "step": 28191 }, { "epoch": 0.5028359433524774, "grad_norm": 0.178416445851326, "learning_rate": 2.909607659970473e-05, "loss": 0.099, "step": 28192 }, { "epoch": 0.5028537794741912, "grad_norm": 0.25704649090766907, "learning_rate": 2.9094541118174574e-05, "loss": 0.1142, "step": 28193 }, { "epoch": 0.5028716155959049, "grad_norm": 0.3065432608127594, "learning_rate": 2.9093005620772555e-05, "loss": 0.1248, "step": 28194 }, { "epoch": 0.5028894517176186, "grad_norm": 0.3015022277832031, "learning_rate": 2.9091470107504638e-05, "loss": 0.1401, "step": 28195 }, { "epoch": 0.5029072878393323, "grad_norm": 0.3590488135814667, "learning_rate": 2.9089934578376767e-05, "loss": 0.1253, "step": 28196 }, { "epoch": 0.5029251239610459, "grad_norm": 0.3722882866859436, "learning_rate": 2.90883990333949e-05, "loss": 0.1322, "step": 28197 }, { "epoch": 0.5029429600827596, "grad_norm": 0.3550504148006439, "learning_rate": 2.908686347256498e-05, "loss": 0.1139, "step": 28198 }, { "epoch": 0.5029607962044733, "grad_norm": 0.23661009967327118, "learning_rate": 2.9085327895892974e-05, "loss": 0.1097, "step": 28199 }, { "epoch": 0.502978632326187, "grad_norm": 0.2474004179239273, "learning_rate": 2.9083792303384815e-05, "loss": 0.1441, "step": 28200 }, { "epoch": 0.5029964684479007, "grad_norm": 0.20673780143260956, "learning_rate": 2.908225669504648e-05, "loss": 0.1023, "step": 28201 }, { "epoch": 0.5030143045696144, "grad_norm": 0.31982874870300293, "learning_rate": 2.90807210708839e-05, "loss": 0.1562, "step": 28202 }, { "epoch": 0.5030321406913281, "grad_norm": 0.44826820492744446, "learning_rate": 2.9079185430903035e-05, "loss": 0.2, "step": 28203 }, { "epoch": 0.5030499768130418, "grad_norm": 0.22934645414352417, "learning_rate": 2.9077649775109843e-05, "loss": 0.1327, "step": 28204 }, { "epoch": 0.5030678129347554, "grad_norm": 0.22432422637939453, "learning_rate": 2.9076114103510265e-05, "loss": 0.1463, "step": 28205 }, { "epoch": 0.5030856490564691, "grad_norm": 0.2702496647834778, "learning_rate": 2.907457841611027e-05, "loss": 0.1029, "step": 28206 }, { "epoch": 0.5031034851781828, "grad_norm": 0.25634831190109253, "learning_rate": 2.90730427129158e-05, "loss": 0.1095, "step": 28207 }, { "epoch": 0.5031213212998965, "grad_norm": 0.2893608808517456, "learning_rate": 2.90715069939328e-05, "loss": 0.2039, "step": 28208 }, { "epoch": 0.5031391574216102, "grad_norm": 0.434030145406723, "learning_rate": 2.906997125916724e-05, "loss": 0.14, "step": 28209 }, { "epoch": 0.503156993543324, "grad_norm": 0.33444851636886597, "learning_rate": 2.906843550862507e-05, "loss": 0.2418, "step": 28210 }, { "epoch": 0.5031748296650377, "grad_norm": 0.30654022097587585, "learning_rate": 2.9066899742312226e-05, "loss": 0.1692, "step": 28211 }, { "epoch": 0.5031926657867514, "grad_norm": 0.23410923779010773, "learning_rate": 2.9065363960234686e-05, "loss": 0.139, "step": 28212 }, { "epoch": 0.5032105019084651, "grad_norm": 0.2185935229063034, "learning_rate": 2.906382816239839e-05, "loss": 0.0798, "step": 28213 }, { "epoch": 0.5032283380301787, "grad_norm": 0.2545168995857239, "learning_rate": 2.906229234880929e-05, "loss": 0.1167, "step": 28214 }, { "epoch": 0.5032461741518924, "grad_norm": 0.2124333381652832, "learning_rate": 2.9060756519473337e-05, "loss": 0.1339, "step": 28215 }, { "epoch": 0.5032640102736061, "grad_norm": 0.24294736981391907, "learning_rate": 2.905922067439649e-05, "loss": 0.1306, "step": 28216 }, { "epoch": 0.5032818463953198, "grad_norm": 0.25309163331985474, "learning_rate": 2.905768481358471e-05, "loss": 0.1361, "step": 28217 }, { "epoch": 0.5032996825170335, "grad_norm": 0.32499727606773376, "learning_rate": 2.9056148937043936e-05, "loss": 0.1304, "step": 28218 }, { "epoch": 0.5033175186387472, "grad_norm": 0.21370266377925873, "learning_rate": 2.905461304478013e-05, "loss": 0.1149, "step": 28219 }, { "epoch": 0.5033353547604609, "grad_norm": 0.3214581608772278, "learning_rate": 2.9053077136799245e-05, "loss": 0.1526, "step": 28220 }, { "epoch": 0.5033531908821746, "grad_norm": 0.2663993537425995, "learning_rate": 2.9051541213107232e-05, "loss": 0.1126, "step": 28221 }, { "epoch": 0.5033710270038882, "grad_norm": 0.2632231116294861, "learning_rate": 2.9050005273710045e-05, "loss": 0.1417, "step": 28222 }, { "epoch": 0.5033888631256019, "grad_norm": 0.31506532430648804, "learning_rate": 2.9048469318613635e-05, "loss": 0.1822, "step": 28223 }, { "epoch": 0.5034066992473156, "grad_norm": 0.18968191742897034, "learning_rate": 2.904693334782396e-05, "loss": 0.1211, "step": 28224 }, { "epoch": 0.5034245353690293, "grad_norm": 0.3112109899520874, "learning_rate": 2.9045397361346983e-05, "loss": 0.1286, "step": 28225 }, { "epoch": 0.503442371490743, "grad_norm": 0.22733506560325623, "learning_rate": 2.904386135918864e-05, "loss": 0.0959, "step": 28226 }, { "epoch": 0.5034602076124568, "grad_norm": 0.3000713884830475, "learning_rate": 2.9042325341354898e-05, "loss": 0.1165, "step": 28227 }, { "epoch": 0.5034780437341705, "grad_norm": 0.27964308857917786, "learning_rate": 2.90407893078517e-05, "loss": 0.1904, "step": 28228 }, { "epoch": 0.5034958798558842, "grad_norm": 0.2866894602775574, "learning_rate": 2.9039253258685017e-05, "loss": 0.1439, "step": 28229 }, { "epoch": 0.5035137159775979, "grad_norm": 0.3150649666786194, "learning_rate": 2.9037717193860788e-05, "loss": 0.1577, "step": 28230 }, { "epoch": 0.5035315520993116, "grad_norm": 0.31004220247268677, "learning_rate": 2.9036181113384965e-05, "loss": 0.185, "step": 28231 }, { "epoch": 0.5035493882210252, "grad_norm": 0.29367175698280334, "learning_rate": 2.9034645017263522e-05, "loss": 0.1619, "step": 28232 }, { "epoch": 0.5035672243427389, "grad_norm": 0.3524821102619171, "learning_rate": 2.903310890550239e-05, "loss": 0.1607, "step": 28233 }, { "epoch": 0.5035850604644526, "grad_norm": 0.25724029541015625, "learning_rate": 2.903157277810755e-05, "loss": 0.1416, "step": 28234 }, { "epoch": 0.5036028965861663, "grad_norm": 0.22763384878635406, "learning_rate": 2.9030036635084928e-05, "loss": 0.068, "step": 28235 }, { "epoch": 0.50362073270788, "grad_norm": 0.27549588680267334, "learning_rate": 2.9028500476440497e-05, "loss": 0.1306, "step": 28236 }, { "epoch": 0.5036385688295937, "grad_norm": 0.20183268189430237, "learning_rate": 2.9026964302180203e-05, "loss": 0.1216, "step": 28237 }, { "epoch": 0.5036564049513074, "grad_norm": 0.24084806442260742, "learning_rate": 2.9025428112310014e-05, "loss": 0.1604, "step": 28238 }, { "epoch": 0.5036742410730211, "grad_norm": 0.25227150321006775, "learning_rate": 2.9023891906835866e-05, "loss": 0.1119, "step": 28239 }, { "epoch": 0.5036920771947347, "grad_norm": 0.3735066056251526, "learning_rate": 2.902235568576373e-05, "loss": 0.1263, "step": 28240 }, { "epoch": 0.5037099133164484, "grad_norm": 0.3639550805091858, "learning_rate": 2.9020819449099552e-05, "loss": 0.1085, "step": 28241 }, { "epoch": 0.5037277494381621, "grad_norm": 0.21803152561187744, "learning_rate": 2.901928319684929e-05, "loss": 0.1101, "step": 28242 }, { "epoch": 0.5037455855598758, "grad_norm": 0.2249739021062851, "learning_rate": 2.9017746929018897e-05, "loss": 0.1194, "step": 28243 }, { "epoch": 0.5037634216815896, "grad_norm": 0.3105986714363098, "learning_rate": 2.9016210645614322e-05, "loss": 0.1695, "step": 28244 }, { "epoch": 0.5037812578033033, "grad_norm": 0.2683711647987366, "learning_rate": 2.901467434664154e-05, "loss": 0.1242, "step": 28245 }, { "epoch": 0.503799093925017, "grad_norm": 0.319701224565506, "learning_rate": 2.9013138032106485e-05, "loss": 0.1428, "step": 28246 }, { "epoch": 0.5038169300467307, "grad_norm": 0.43664389848709106, "learning_rate": 2.9011601702015124e-05, "loss": 0.1651, "step": 28247 }, { "epoch": 0.5038347661684444, "grad_norm": 0.34010130167007446, "learning_rate": 2.9010065356373405e-05, "loss": 0.1177, "step": 28248 }, { "epoch": 0.503852602290158, "grad_norm": 0.23218445479869843, "learning_rate": 2.9008528995187296e-05, "loss": 0.1557, "step": 28249 }, { "epoch": 0.5038704384118717, "grad_norm": 0.3078397810459137, "learning_rate": 2.9006992618462736e-05, "loss": 0.1593, "step": 28250 }, { "epoch": 0.5038882745335854, "grad_norm": 0.2370034009218216, "learning_rate": 2.900545622620569e-05, "loss": 0.1081, "step": 28251 }, { "epoch": 0.5039061106552991, "grad_norm": 0.2142411470413208, "learning_rate": 2.9003919818422116e-05, "loss": 0.1324, "step": 28252 }, { "epoch": 0.5039239467770128, "grad_norm": 0.24562764167785645, "learning_rate": 2.9002383395117965e-05, "loss": 0.1711, "step": 28253 }, { "epoch": 0.5039417828987265, "grad_norm": 0.2884778678417206, "learning_rate": 2.900084695629919e-05, "loss": 0.1495, "step": 28254 }, { "epoch": 0.5039596190204402, "grad_norm": 0.2911554276943207, "learning_rate": 2.8999310501971755e-05, "loss": 0.0916, "step": 28255 }, { "epoch": 0.5039774551421539, "grad_norm": 0.23309870064258575, "learning_rate": 2.899777403214161e-05, "loss": 0.0948, "step": 28256 }, { "epoch": 0.5039952912638676, "grad_norm": 0.21861794590950012, "learning_rate": 2.8996237546814713e-05, "loss": 0.1512, "step": 28257 }, { "epoch": 0.5040131273855812, "grad_norm": 0.2875767946243286, "learning_rate": 2.8994701045997014e-05, "loss": 0.137, "step": 28258 }, { "epoch": 0.5040309635072949, "grad_norm": 0.2580637037754059, "learning_rate": 2.8993164529694476e-05, "loss": 0.1328, "step": 28259 }, { "epoch": 0.5040487996290086, "grad_norm": 0.26604339480400085, "learning_rate": 2.8991627997913055e-05, "loss": 0.1949, "step": 28260 }, { "epoch": 0.5040666357507224, "grad_norm": 0.20167092978954315, "learning_rate": 2.8990091450658707e-05, "loss": 0.099, "step": 28261 }, { "epoch": 0.5040844718724361, "grad_norm": 0.3219318389892578, "learning_rate": 2.8988554887937384e-05, "loss": 0.1312, "step": 28262 }, { "epoch": 0.5041023079941498, "grad_norm": 0.26011037826538086, "learning_rate": 2.8987018309755044e-05, "loss": 0.2164, "step": 28263 }, { "epoch": 0.5041201441158635, "grad_norm": 0.2656829059123993, "learning_rate": 2.898548171611764e-05, "loss": 0.1538, "step": 28264 }, { "epoch": 0.5041379802375772, "grad_norm": 0.2955317199230194, "learning_rate": 2.8983945107031136e-05, "loss": 0.1893, "step": 28265 }, { "epoch": 0.5041558163592909, "grad_norm": 0.249686598777771, "learning_rate": 2.8982408482501487e-05, "loss": 0.0937, "step": 28266 }, { "epoch": 0.5041736524810045, "grad_norm": 0.2201281189918518, "learning_rate": 2.8980871842534646e-05, "loss": 0.1007, "step": 28267 }, { "epoch": 0.5041914886027182, "grad_norm": 0.24558743834495544, "learning_rate": 2.8979335187136576e-05, "loss": 0.1196, "step": 28268 }, { "epoch": 0.5042093247244319, "grad_norm": 0.2904684543609619, "learning_rate": 2.8977798516313226e-05, "loss": 0.1576, "step": 28269 }, { "epoch": 0.5042271608461456, "grad_norm": 0.20844466984272003, "learning_rate": 2.897626183007055e-05, "loss": 0.1248, "step": 28270 }, { "epoch": 0.5042449969678593, "grad_norm": 0.32644855976104736, "learning_rate": 2.897472512841451e-05, "loss": 0.1425, "step": 28271 }, { "epoch": 0.504262833089573, "grad_norm": 0.3606814742088318, "learning_rate": 2.897318841135106e-05, "loss": 0.2302, "step": 28272 }, { "epoch": 0.5042806692112867, "grad_norm": 0.1962312012910843, "learning_rate": 2.897165167888617e-05, "loss": 0.1213, "step": 28273 }, { "epoch": 0.5042985053330004, "grad_norm": 0.2603902220726013, "learning_rate": 2.897011493102578e-05, "loss": 0.1162, "step": 28274 }, { "epoch": 0.504316341454714, "grad_norm": 0.24425619840621948, "learning_rate": 2.896857816777585e-05, "loss": 0.156, "step": 28275 }, { "epoch": 0.5043341775764277, "grad_norm": 0.2703363299369812, "learning_rate": 2.8967041389142346e-05, "loss": 0.1484, "step": 28276 }, { "epoch": 0.5043520136981414, "grad_norm": 0.21123532950878143, "learning_rate": 2.896550459513122e-05, "loss": 0.0945, "step": 28277 }, { "epoch": 0.5043698498198552, "grad_norm": 0.24760466814041138, "learning_rate": 2.8963967785748425e-05, "loss": 0.1609, "step": 28278 }, { "epoch": 0.5043876859415689, "grad_norm": 0.1943018138408661, "learning_rate": 2.896243096099992e-05, "loss": 0.1618, "step": 28279 }, { "epoch": 0.5044055220632826, "grad_norm": 0.29512542486190796, "learning_rate": 2.8960894120891662e-05, "loss": 0.1593, "step": 28280 }, { "epoch": 0.5044233581849963, "grad_norm": 0.28630539774894714, "learning_rate": 2.895935726542962e-05, "loss": 0.1401, "step": 28281 }, { "epoch": 0.50444119430671, "grad_norm": 0.21738514304161072, "learning_rate": 2.895782039461974e-05, "loss": 0.1214, "step": 28282 }, { "epoch": 0.5044590304284237, "grad_norm": 0.2422303706407547, "learning_rate": 2.8956283508467975e-05, "loss": 0.185, "step": 28283 }, { "epoch": 0.5044768665501373, "grad_norm": 0.14903074502944946, "learning_rate": 2.8954746606980295e-05, "loss": 0.0847, "step": 28284 }, { "epoch": 0.504494702671851, "grad_norm": 0.2515467703342438, "learning_rate": 2.8953209690162642e-05, "loss": 0.117, "step": 28285 }, { "epoch": 0.5045125387935647, "grad_norm": 0.39284393191337585, "learning_rate": 2.895167275802099e-05, "loss": 0.1475, "step": 28286 }, { "epoch": 0.5045303749152784, "grad_norm": 0.23131625354290009, "learning_rate": 2.8950135810561286e-05, "loss": 0.1638, "step": 28287 }, { "epoch": 0.5045482110369921, "grad_norm": 0.23975437879562378, "learning_rate": 2.8948598847789494e-05, "loss": 0.1097, "step": 28288 }, { "epoch": 0.5045660471587058, "grad_norm": 0.3290809690952301, "learning_rate": 2.8947061869711566e-05, "loss": 0.1602, "step": 28289 }, { "epoch": 0.5045838832804195, "grad_norm": 0.25540485978126526, "learning_rate": 2.894552487633347e-05, "loss": 0.1524, "step": 28290 }, { "epoch": 0.5046017194021332, "grad_norm": 0.2619209587574005, "learning_rate": 2.8943987867661148e-05, "loss": 0.1292, "step": 28291 }, { "epoch": 0.5046195555238469, "grad_norm": 0.32695481181144714, "learning_rate": 2.894245084370057e-05, "loss": 0.1407, "step": 28292 }, { "epoch": 0.5046373916455605, "grad_norm": 0.2673144042491913, "learning_rate": 2.894091380445769e-05, "loss": 0.1397, "step": 28293 }, { "epoch": 0.5046552277672743, "grad_norm": 0.22678114473819733, "learning_rate": 2.8939376749938472e-05, "loss": 0.1296, "step": 28294 }, { "epoch": 0.504673063888988, "grad_norm": 0.28748729825019836, "learning_rate": 2.8937839680148872e-05, "loss": 0.1152, "step": 28295 }, { "epoch": 0.5046909000107017, "grad_norm": 0.3260626494884491, "learning_rate": 2.8936302595094833e-05, "loss": 0.1862, "step": 28296 }, { "epoch": 0.5047087361324154, "grad_norm": 0.2241354137659073, "learning_rate": 2.8934765494782333e-05, "loss": 0.1071, "step": 28297 }, { "epoch": 0.5047265722541291, "grad_norm": 0.26125675439834595, "learning_rate": 2.8933228379217324e-05, "loss": 0.1384, "step": 28298 }, { "epoch": 0.5047444083758428, "grad_norm": 0.2865285277366638, "learning_rate": 2.8931691248405756e-05, "loss": 0.1669, "step": 28299 }, { "epoch": 0.5047622444975565, "grad_norm": 0.22164495289325714, "learning_rate": 2.8930154102353602e-05, "loss": 0.1405, "step": 28300 }, { "epoch": 0.5047800806192702, "grad_norm": 0.24465897679328918, "learning_rate": 2.892861694106681e-05, "loss": 0.1087, "step": 28301 }, { "epoch": 0.5047979167409838, "grad_norm": 0.23553021252155304, "learning_rate": 2.8927079764551344e-05, "loss": 0.1178, "step": 28302 }, { "epoch": 0.5048157528626975, "grad_norm": 0.28661873936653137, "learning_rate": 2.8925542572813164e-05, "loss": 0.1829, "step": 28303 }, { "epoch": 0.5048335889844112, "grad_norm": 0.33187881112098694, "learning_rate": 2.8924005365858216e-05, "loss": 0.1375, "step": 28304 }, { "epoch": 0.5048514251061249, "grad_norm": 0.3010408282279968, "learning_rate": 2.8922468143692477e-05, "loss": 0.1465, "step": 28305 }, { "epoch": 0.5048692612278386, "grad_norm": 0.20376047492027283, "learning_rate": 2.8920930906321887e-05, "loss": 0.0954, "step": 28306 }, { "epoch": 0.5048870973495523, "grad_norm": 0.21004408597946167, "learning_rate": 2.891939365375242e-05, "loss": 0.1216, "step": 28307 }, { "epoch": 0.504904933471266, "grad_norm": 0.2306356579065323, "learning_rate": 2.8917856385990034e-05, "loss": 0.156, "step": 28308 }, { "epoch": 0.5049227695929797, "grad_norm": 0.26142948865890503, "learning_rate": 2.8916319103040672e-05, "loss": 0.1748, "step": 28309 }, { "epoch": 0.5049406057146933, "grad_norm": 0.26618692278862, "learning_rate": 2.891478180491032e-05, "loss": 0.1531, "step": 28310 }, { "epoch": 0.5049584418364071, "grad_norm": 0.22935238480567932, "learning_rate": 2.891324449160491e-05, "loss": 0.1113, "step": 28311 }, { "epoch": 0.5049762779581208, "grad_norm": 0.21562381088733673, "learning_rate": 2.8911707163130413e-05, "loss": 0.1217, "step": 28312 }, { "epoch": 0.5049941140798345, "grad_norm": 0.28413572907447815, "learning_rate": 2.8910169819492784e-05, "loss": 0.1567, "step": 28313 }, { "epoch": 0.5050119502015482, "grad_norm": 0.3368399441242218, "learning_rate": 2.8908632460698e-05, "loss": 0.2026, "step": 28314 }, { "epoch": 0.5050297863232619, "grad_norm": 0.3194226920604706, "learning_rate": 2.8907095086751996e-05, "loss": 0.1683, "step": 28315 }, { "epoch": 0.5050476224449756, "grad_norm": 0.28431981801986694, "learning_rate": 2.8905557697660746e-05, "loss": 0.1467, "step": 28316 }, { "epoch": 0.5050654585666893, "grad_norm": 0.28889966011047363, "learning_rate": 2.89040202934302e-05, "loss": 0.1439, "step": 28317 }, { "epoch": 0.505083294688403, "grad_norm": 0.24825017154216766, "learning_rate": 2.8902482874066333e-05, "loss": 0.144, "step": 28318 }, { "epoch": 0.5051011308101166, "grad_norm": 0.2697658836841583, "learning_rate": 2.890094543957509e-05, "loss": 0.1255, "step": 28319 }, { "epoch": 0.5051189669318303, "grad_norm": 0.18925027549266815, "learning_rate": 2.889940798996243e-05, "loss": 0.1064, "step": 28320 }, { "epoch": 0.505136803053544, "grad_norm": 0.25063151121139526, "learning_rate": 2.889787052523432e-05, "loss": 0.1348, "step": 28321 }, { "epoch": 0.5051546391752577, "grad_norm": 0.4020143151283264, "learning_rate": 2.8896333045396724e-05, "loss": 0.1762, "step": 28322 }, { "epoch": 0.5051724752969714, "grad_norm": 0.2611508071422577, "learning_rate": 2.8894795550455595e-05, "loss": 0.12, "step": 28323 }, { "epoch": 0.5051903114186851, "grad_norm": 0.28865477442741394, "learning_rate": 2.889325804041688e-05, "loss": 0.1435, "step": 28324 }, { "epoch": 0.5052081475403988, "grad_norm": 0.23400035500526428, "learning_rate": 2.889172051528657e-05, "loss": 0.1477, "step": 28325 }, { "epoch": 0.5052259836621125, "grad_norm": 0.320441871881485, "learning_rate": 2.8890182975070594e-05, "loss": 0.1369, "step": 28326 }, { "epoch": 0.5052438197838262, "grad_norm": 0.2101871222257614, "learning_rate": 2.888864541977493e-05, "loss": 0.1112, "step": 28327 }, { "epoch": 0.50526165590554, "grad_norm": 0.4844701290130615, "learning_rate": 2.8887107849405533e-05, "loss": 0.1169, "step": 28328 }, { "epoch": 0.5052794920272536, "grad_norm": 0.22285085916519165, "learning_rate": 2.8885570263968363e-05, "loss": 0.1778, "step": 28329 }, { "epoch": 0.5052973281489673, "grad_norm": 0.34672802686691284, "learning_rate": 2.888403266346938e-05, "loss": 0.1473, "step": 28330 }, { "epoch": 0.505315164270681, "grad_norm": 0.20867227017879486, "learning_rate": 2.888249504791455e-05, "loss": 0.1265, "step": 28331 }, { "epoch": 0.5053330003923947, "grad_norm": 0.3154470920562744, "learning_rate": 2.888095741730982e-05, "loss": 0.1871, "step": 28332 }, { "epoch": 0.5053508365141084, "grad_norm": 0.20947100222110748, "learning_rate": 2.8879419771661166e-05, "loss": 0.1261, "step": 28333 }, { "epoch": 0.5053686726358221, "grad_norm": 0.22799675166606903, "learning_rate": 2.8877882110974534e-05, "loss": 0.1309, "step": 28334 }, { "epoch": 0.5053865087575358, "grad_norm": 0.1632799655199051, "learning_rate": 2.8876344435255897e-05, "loss": 0.1125, "step": 28335 }, { "epoch": 0.5054043448792495, "grad_norm": 0.2476794272661209, "learning_rate": 2.8874806744511206e-05, "loss": 0.1771, "step": 28336 }, { "epoch": 0.5054221810009631, "grad_norm": 0.24419334530830383, "learning_rate": 2.887326903874643e-05, "loss": 0.1097, "step": 28337 }, { "epoch": 0.5054400171226768, "grad_norm": 0.19359739124774933, "learning_rate": 2.8871731317967527e-05, "loss": 0.127, "step": 28338 }, { "epoch": 0.5054578532443905, "grad_norm": 0.2425316572189331, "learning_rate": 2.887019358218045e-05, "loss": 0.163, "step": 28339 }, { "epoch": 0.5054756893661042, "grad_norm": 0.3257550299167633, "learning_rate": 2.8868655831391166e-05, "loss": 0.1482, "step": 28340 }, { "epoch": 0.5054935254878179, "grad_norm": 0.2643504738807678, "learning_rate": 2.8867118065605635e-05, "loss": 0.1716, "step": 28341 }, { "epoch": 0.5055113616095316, "grad_norm": 0.20067425072193146, "learning_rate": 2.886558028482983e-05, "loss": 0.1005, "step": 28342 }, { "epoch": 0.5055291977312453, "grad_norm": 0.28453224897384644, "learning_rate": 2.8864042489069687e-05, "loss": 0.1977, "step": 28343 }, { "epoch": 0.505547033852959, "grad_norm": 0.25305116176605225, "learning_rate": 2.8862504678331192e-05, "loss": 0.1427, "step": 28344 }, { "epoch": 0.5055648699746728, "grad_norm": 0.30163607001304626, "learning_rate": 2.8860966852620286e-05, "loss": 0.1851, "step": 28345 }, { "epoch": 0.5055827060963864, "grad_norm": 0.2377101182937622, "learning_rate": 2.8859429011942947e-05, "loss": 0.1403, "step": 28346 }, { "epoch": 0.5056005422181001, "grad_norm": 0.31126388907432556, "learning_rate": 2.8857891156305116e-05, "loss": 0.1061, "step": 28347 }, { "epoch": 0.5056183783398138, "grad_norm": 0.2425568550825119, "learning_rate": 2.8856353285712777e-05, "loss": 0.0923, "step": 28348 }, { "epoch": 0.5056362144615275, "grad_norm": 0.2648632228374481, "learning_rate": 2.885481540017188e-05, "loss": 0.133, "step": 28349 }, { "epoch": 0.5056540505832412, "grad_norm": 0.2518453896045685, "learning_rate": 2.885327749968838e-05, "loss": 0.1581, "step": 28350 }, { "epoch": 0.5056718867049549, "grad_norm": 0.16720138490200043, "learning_rate": 2.8851739584268257e-05, "loss": 0.1488, "step": 28351 }, { "epoch": 0.5056897228266686, "grad_norm": 0.25937044620513916, "learning_rate": 2.885020165391745e-05, "loss": 0.21, "step": 28352 }, { "epoch": 0.5057075589483823, "grad_norm": 0.36026614904403687, "learning_rate": 2.884866370864194e-05, "loss": 0.1414, "step": 28353 }, { "epoch": 0.505725395070096, "grad_norm": 0.27204209566116333, "learning_rate": 2.8847125748447672e-05, "loss": 0.1515, "step": 28354 }, { "epoch": 0.5057432311918096, "grad_norm": 0.3236941397190094, "learning_rate": 2.884558777334062e-05, "loss": 0.1315, "step": 28355 }, { "epoch": 0.5057610673135233, "grad_norm": 0.2813650965690613, "learning_rate": 2.8844049783326736e-05, "loss": 0.1268, "step": 28356 }, { "epoch": 0.505778903435237, "grad_norm": 0.21412873268127441, "learning_rate": 2.8842511778411997e-05, "loss": 0.1262, "step": 28357 }, { "epoch": 0.5057967395569507, "grad_norm": 0.2654881179332733, "learning_rate": 2.8840973758602352e-05, "loss": 0.0994, "step": 28358 }, { "epoch": 0.5058145756786644, "grad_norm": 0.2818746268749237, "learning_rate": 2.883943572390377e-05, "loss": 0.1089, "step": 28359 }, { "epoch": 0.5058324118003781, "grad_norm": 0.1742342859506607, "learning_rate": 2.88378976743222e-05, "loss": 0.1293, "step": 28360 }, { "epoch": 0.5058502479220918, "grad_norm": 0.23831379413604736, "learning_rate": 2.8836359609863615e-05, "loss": 0.1271, "step": 28361 }, { "epoch": 0.5058680840438056, "grad_norm": 0.2732433080673218, "learning_rate": 2.883482153053398e-05, "loss": 0.1041, "step": 28362 }, { "epoch": 0.5058859201655193, "grad_norm": 0.2501196563243866, "learning_rate": 2.883328343633925e-05, "loss": 0.1351, "step": 28363 }, { "epoch": 0.5059037562872329, "grad_norm": 0.23516647517681122, "learning_rate": 2.8831745327285386e-05, "loss": 0.1341, "step": 28364 }, { "epoch": 0.5059215924089466, "grad_norm": 0.33683860301971436, "learning_rate": 2.8830207203378357e-05, "loss": 0.167, "step": 28365 }, { "epoch": 0.5059394285306603, "grad_norm": 0.2710850238800049, "learning_rate": 2.8828669064624124e-05, "loss": 0.1, "step": 28366 }, { "epoch": 0.505957264652374, "grad_norm": 0.2861916720867157, "learning_rate": 2.8827130911028644e-05, "loss": 0.1899, "step": 28367 }, { "epoch": 0.5059751007740877, "grad_norm": 0.23177196085453033, "learning_rate": 2.882559274259788e-05, "loss": 0.1277, "step": 28368 }, { "epoch": 0.5059929368958014, "grad_norm": 0.28973355889320374, "learning_rate": 2.88240545593378e-05, "loss": 0.1628, "step": 28369 }, { "epoch": 0.5060107730175151, "grad_norm": 0.26916858553886414, "learning_rate": 2.882251636125437e-05, "loss": 0.1361, "step": 28370 }, { "epoch": 0.5060286091392288, "grad_norm": 0.2533247172832489, "learning_rate": 2.882097814835354e-05, "loss": 0.1578, "step": 28371 }, { "epoch": 0.5060464452609424, "grad_norm": 0.40038228034973145, "learning_rate": 2.8819439920641276e-05, "loss": 0.1232, "step": 28372 }, { "epoch": 0.5060642813826561, "grad_norm": 0.27214571833610535, "learning_rate": 2.881790167812355e-05, "loss": 0.1674, "step": 28373 }, { "epoch": 0.5060821175043698, "grad_norm": 0.2790907025337219, "learning_rate": 2.881636342080632e-05, "loss": 0.1524, "step": 28374 }, { "epoch": 0.5060999536260835, "grad_norm": 0.41377803683280945, "learning_rate": 2.881482514869554e-05, "loss": 0.1551, "step": 28375 }, { "epoch": 0.5061177897477972, "grad_norm": 0.23858848214149475, "learning_rate": 2.8813286861797178e-05, "loss": 0.1251, "step": 28376 }, { "epoch": 0.5061356258695109, "grad_norm": 0.3751448392868042, "learning_rate": 2.8811748560117208e-05, "loss": 0.1869, "step": 28377 }, { "epoch": 0.5061534619912246, "grad_norm": 0.27639347314834595, "learning_rate": 2.881021024366158e-05, "loss": 0.1687, "step": 28378 }, { "epoch": 0.5061712981129384, "grad_norm": 0.25463035702705383, "learning_rate": 2.8808671912436262e-05, "loss": 0.0999, "step": 28379 }, { "epoch": 0.5061891342346521, "grad_norm": 0.32845792174339294, "learning_rate": 2.8807133566447213e-05, "loss": 0.1824, "step": 28380 }, { "epoch": 0.5062069703563657, "grad_norm": 0.2344057559967041, "learning_rate": 2.880559520570041e-05, "loss": 0.151, "step": 28381 }, { "epoch": 0.5062248064780794, "grad_norm": 0.38982972502708435, "learning_rate": 2.8804056830201793e-05, "loss": 0.1802, "step": 28382 }, { "epoch": 0.5062426425997931, "grad_norm": 0.2142881602048874, "learning_rate": 2.8802518439957342e-05, "loss": 0.1841, "step": 28383 }, { "epoch": 0.5062604787215068, "grad_norm": 0.3108970522880554, "learning_rate": 2.8800980034973014e-05, "loss": 0.1515, "step": 28384 }, { "epoch": 0.5062783148432205, "grad_norm": 0.26968348026275635, "learning_rate": 2.879944161525478e-05, "loss": 0.1831, "step": 28385 }, { "epoch": 0.5062961509649342, "grad_norm": 0.2866678535938263, "learning_rate": 2.8797903180808594e-05, "loss": 0.1477, "step": 28386 }, { "epoch": 0.5063139870866479, "grad_norm": 0.35315167903900146, "learning_rate": 2.8796364731640428e-05, "loss": 0.1428, "step": 28387 }, { "epoch": 0.5063318232083616, "grad_norm": 0.3027825355529785, "learning_rate": 2.879482626775624e-05, "loss": 0.1612, "step": 28388 }, { "epoch": 0.5063496593300753, "grad_norm": 0.36291801929473877, "learning_rate": 2.8793287789161995e-05, "loss": 0.1158, "step": 28389 }, { "epoch": 0.5063674954517889, "grad_norm": 0.21735769510269165, "learning_rate": 2.8791749295863646e-05, "loss": 0.1392, "step": 28390 }, { "epoch": 0.5063853315735026, "grad_norm": 0.2526581585407257, "learning_rate": 2.879021078786718e-05, "loss": 0.098, "step": 28391 }, { "epoch": 0.5064031676952163, "grad_norm": 0.35680288076400757, "learning_rate": 2.878867226517854e-05, "loss": 0.1956, "step": 28392 }, { "epoch": 0.50642100381693, "grad_norm": 0.2951757311820984, "learning_rate": 2.87871337278037e-05, "loss": 0.1527, "step": 28393 }, { "epoch": 0.5064388399386437, "grad_norm": 0.2806277573108673, "learning_rate": 2.8785595175748624e-05, "loss": 0.1279, "step": 28394 }, { "epoch": 0.5064566760603575, "grad_norm": 0.21698053181171417, "learning_rate": 2.878405660901927e-05, "loss": 0.1361, "step": 28395 }, { "epoch": 0.5064745121820712, "grad_norm": 0.25057244300842285, "learning_rate": 2.878251802762161e-05, "loss": 0.1388, "step": 28396 }, { "epoch": 0.5064923483037849, "grad_norm": 0.2653912901878357, "learning_rate": 2.8780979431561593e-05, "loss": 0.1745, "step": 28397 }, { "epoch": 0.5065101844254986, "grad_norm": 0.2689267694950104, "learning_rate": 2.877944082084521e-05, "loss": 0.1215, "step": 28398 }, { "epoch": 0.5065280205472122, "grad_norm": 0.36103591322898865, "learning_rate": 2.8777902195478396e-05, "loss": 0.1203, "step": 28399 }, { "epoch": 0.5065458566689259, "grad_norm": 0.2123267501592636, "learning_rate": 2.877636355546714e-05, "loss": 0.1156, "step": 28400 }, { "epoch": 0.5065636927906396, "grad_norm": 0.30385056138038635, "learning_rate": 2.8774824900817388e-05, "loss": 0.1828, "step": 28401 }, { "epoch": 0.5065815289123533, "grad_norm": 0.2396973818540573, "learning_rate": 2.8773286231535107e-05, "loss": 0.1337, "step": 28402 }, { "epoch": 0.506599365034067, "grad_norm": 0.33189648389816284, "learning_rate": 2.877174754762627e-05, "loss": 0.1545, "step": 28403 }, { "epoch": 0.5066172011557807, "grad_norm": 0.23831751942634583, "learning_rate": 2.877020884909683e-05, "loss": 0.1211, "step": 28404 }, { "epoch": 0.5066350372774944, "grad_norm": 0.36035892367362976, "learning_rate": 2.8768670135952768e-05, "loss": 0.1438, "step": 28405 }, { "epoch": 0.5066528733992081, "grad_norm": 0.2645803689956665, "learning_rate": 2.8767131408200033e-05, "loss": 0.0785, "step": 28406 }, { "epoch": 0.5066707095209217, "grad_norm": 0.24531646072864532, "learning_rate": 2.8765592665844603e-05, "loss": 0.1409, "step": 28407 }, { "epoch": 0.5066885456426354, "grad_norm": 0.2863783836364746, "learning_rate": 2.876405390889243e-05, "loss": 0.171, "step": 28408 }, { "epoch": 0.5067063817643491, "grad_norm": 0.31837233901023865, "learning_rate": 2.876251513734948e-05, "loss": 0.1294, "step": 28409 }, { "epoch": 0.5067242178860628, "grad_norm": 0.22746196389198303, "learning_rate": 2.876097635122173e-05, "loss": 0.1222, "step": 28410 }, { "epoch": 0.5067420540077765, "grad_norm": 0.26692891120910645, "learning_rate": 2.8759437550515128e-05, "loss": 0.1436, "step": 28411 }, { "epoch": 0.5067598901294903, "grad_norm": 0.266908198595047, "learning_rate": 2.875789873523565e-05, "loss": 0.0977, "step": 28412 }, { "epoch": 0.506777726251204, "grad_norm": 0.3461291193962097, "learning_rate": 2.8756359905389264e-05, "loss": 0.1501, "step": 28413 }, { "epoch": 0.5067955623729177, "grad_norm": 0.2428554892539978, "learning_rate": 2.875482106098193e-05, "loss": 0.121, "step": 28414 }, { "epoch": 0.5068133984946314, "grad_norm": 0.2869994044303894, "learning_rate": 2.8753282202019603e-05, "loss": 0.1528, "step": 28415 }, { "epoch": 0.506831234616345, "grad_norm": 0.21481558680534363, "learning_rate": 2.875174332850827e-05, "loss": 0.1349, "step": 28416 }, { "epoch": 0.5068490707380587, "grad_norm": 0.3269020915031433, "learning_rate": 2.8750204440453872e-05, "loss": 0.1684, "step": 28417 }, { "epoch": 0.5068669068597724, "grad_norm": 0.32559534907341003, "learning_rate": 2.874866553786239e-05, "loss": 0.1526, "step": 28418 }, { "epoch": 0.5068847429814861, "grad_norm": 0.2766841650009155, "learning_rate": 2.874712662073979e-05, "loss": 0.1298, "step": 28419 }, { "epoch": 0.5069025791031998, "grad_norm": 0.2073928266763687, "learning_rate": 2.874558768909203e-05, "loss": 0.179, "step": 28420 }, { "epoch": 0.5069204152249135, "grad_norm": 0.33086246252059937, "learning_rate": 2.874404874292508e-05, "loss": 0.1165, "step": 28421 }, { "epoch": 0.5069382513466272, "grad_norm": 0.2925296425819397, "learning_rate": 2.8742509782244904e-05, "loss": 0.1392, "step": 28422 }, { "epoch": 0.5069560874683409, "grad_norm": 0.2516690194606781, "learning_rate": 2.8740970807057467e-05, "loss": 0.1243, "step": 28423 }, { "epoch": 0.5069739235900546, "grad_norm": 0.2913403809070587, "learning_rate": 2.8739431817368728e-05, "loss": 0.1606, "step": 28424 }, { "epoch": 0.5069917597117682, "grad_norm": 0.26513007283210754, "learning_rate": 2.8737892813184663e-05, "loss": 0.1404, "step": 28425 }, { "epoch": 0.5070095958334819, "grad_norm": 0.3299475908279419, "learning_rate": 2.8736353794511246e-05, "loss": 0.145, "step": 28426 }, { "epoch": 0.5070274319551956, "grad_norm": 0.2627675533294678, "learning_rate": 2.8734814761354423e-05, "loss": 0.1394, "step": 28427 }, { "epoch": 0.5070452680769093, "grad_norm": 0.2725169360637665, "learning_rate": 2.8733275713720166e-05, "loss": 0.1437, "step": 28428 }, { "epoch": 0.5070631041986231, "grad_norm": 0.22264286875724792, "learning_rate": 2.873173665161445e-05, "loss": 0.0541, "step": 28429 }, { "epoch": 0.5070809403203368, "grad_norm": 0.2680964171886444, "learning_rate": 2.873019757504322e-05, "loss": 0.1445, "step": 28430 }, { "epoch": 0.5070987764420505, "grad_norm": 0.3315582573413849, "learning_rate": 2.8728658484012465e-05, "loss": 0.1693, "step": 28431 }, { "epoch": 0.5071166125637642, "grad_norm": 0.26628589630126953, "learning_rate": 2.8727119378528138e-05, "loss": 0.1336, "step": 28432 }, { "epoch": 0.5071344486854779, "grad_norm": 0.38926175236701965, "learning_rate": 2.8725580258596218e-05, "loss": 0.1417, "step": 28433 }, { "epoch": 0.5071522848071915, "grad_norm": 0.2617027759552002, "learning_rate": 2.8724041124222657e-05, "loss": 0.2018, "step": 28434 }, { "epoch": 0.5071701209289052, "grad_norm": 0.3515075743198395, "learning_rate": 2.8722501975413422e-05, "loss": 0.1508, "step": 28435 }, { "epoch": 0.5071879570506189, "grad_norm": 0.23065081238746643, "learning_rate": 2.872096281217449e-05, "loss": 0.1546, "step": 28436 }, { "epoch": 0.5072057931723326, "grad_norm": 0.27275601029396057, "learning_rate": 2.8719423634511823e-05, "loss": 0.0949, "step": 28437 }, { "epoch": 0.5072236292940463, "grad_norm": 0.32661938667297363, "learning_rate": 2.8717884442431374e-05, "loss": 0.1606, "step": 28438 }, { "epoch": 0.50724146541576, "grad_norm": 0.4000622034072876, "learning_rate": 2.8716345235939128e-05, "loss": 0.1797, "step": 28439 }, { "epoch": 0.5072593015374737, "grad_norm": 0.23305268585681915, "learning_rate": 2.8714806015041047e-05, "loss": 0.1224, "step": 28440 }, { "epoch": 0.5072771376591874, "grad_norm": 0.2751278877258301, "learning_rate": 2.8713266779743087e-05, "loss": 0.1696, "step": 28441 }, { "epoch": 0.507294973780901, "grad_norm": 0.28718727827072144, "learning_rate": 2.871172753005123e-05, "loss": 0.2082, "step": 28442 }, { "epoch": 0.5073128099026147, "grad_norm": 0.19972676038742065, "learning_rate": 2.8710188265971434e-05, "loss": 0.1356, "step": 28443 }, { "epoch": 0.5073306460243284, "grad_norm": 0.32930871844291687, "learning_rate": 2.870864898750966e-05, "loss": 0.1624, "step": 28444 }, { "epoch": 0.5073484821460421, "grad_norm": 0.24793067574501038, "learning_rate": 2.8707109694671886e-05, "loss": 0.1065, "step": 28445 }, { "epoch": 0.5073663182677559, "grad_norm": 0.2668672800064087, "learning_rate": 2.8705570387464074e-05, "loss": 0.1249, "step": 28446 }, { "epoch": 0.5073841543894696, "grad_norm": 0.3631262183189392, "learning_rate": 2.8704031065892194e-05, "loss": 0.1614, "step": 28447 }, { "epoch": 0.5074019905111833, "grad_norm": 0.32281559705734253, "learning_rate": 2.870249172996221e-05, "loss": 0.0852, "step": 28448 }, { "epoch": 0.507419826632897, "grad_norm": 0.3494997024536133, "learning_rate": 2.8700952379680086e-05, "loss": 0.1527, "step": 28449 }, { "epoch": 0.5074376627546107, "grad_norm": 0.23304596543312073, "learning_rate": 2.8699413015051796e-05, "loss": 0.1141, "step": 28450 }, { "epoch": 0.5074554988763244, "grad_norm": 0.2574032247066498, "learning_rate": 2.86978736360833e-05, "loss": 0.1534, "step": 28451 }, { "epoch": 0.507473334998038, "grad_norm": 0.18970376253128052, "learning_rate": 2.869633424278057e-05, "loss": 0.1376, "step": 28452 }, { "epoch": 0.5074911711197517, "grad_norm": 0.2882007360458374, "learning_rate": 2.8694794835149575e-05, "loss": 0.1419, "step": 28453 }, { "epoch": 0.5075090072414654, "grad_norm": 0.28692206740379333, "learning_rate": 2.8693255413196274e-05, "loss": 0.1327, "step": 28454 }, { "epoch": 0.5075268433631791, "grad_norm": 0.26517432928085327, "learning_rate": 2.8691715976926642e-05, "loss": 0.082, "step": 28455 }, { "epoch": 0.5075446794848928, "grad_norm": 0.28659650683403015, "learning_rate": 2.869017652634664e-05, "loss": 0.1863, "step": 28456 }, { "epoch": 0.5075625156066065, "grad_norm": 0.2675664722919464, "learning_rate": 2.868863706146225e-05, "loss": 0.1419, "step": 28457 }, { "epoch": 0.5075803517283202, "grad_norm": 0.21762660145759583, "learning_rate": 2.8687097582279417e-05, "loss": 0.1289, "step": 28458 }, { "epoch": 0.5075981878500339, "grad_norm": 0.23660603165626526, "learning_rate": 2.868555808880412e-05, "loss": 0.1544, "step": 28459 }, { "epoch": 0.5076160239717475, "grad_norm": 0.2530672550201416, "learning_rate": 2.8684018581042333e-05, "loss": 0.1611, "step": 28460 }, { "epoch": 0.5076338600934612, "grad_norm": 0.2534952163696289, "learning_rate": 2.8682479059000017e-05, "loss": 0.1222, "step": 28461 }, { "epoch": 0.5076516962151749, "grad_norm": 0.24667608737945557, "learning_rate": 2.8680939522683136e-05, "loss": 0.1207, "step": 28462 }, { "epoch": 0.5076695323368887, "grad_norm": 0.3288152515888214, "learning_rate": 2.867939997209767e-05, "loss": 0.1734, "step": 28463 }, { "epoch": 0.5076873684586024, "grad_norm": 0.2570081651210785, "learning_rate": 2.8677860407249574e-05, "loss": 0.1242, "step": 28464 }, { "epoch": 0.5077052045803161, "grad_norm": 0.2850131690502167, "learning_rate": 2.867632082814482e-05, "loss": 0.1193, "step": 28465 }, { "epoch": 0.5077230407020298, "grad_norm": 0.23317448794841766, "learning_rate": 2.8674781234789378e-05, "loss": 0.1543, "step": 28466 }, { "epoch": 0.5077408768237435, "grad_norm": 0.21266146004199982, "learning_rate": 2.8673241627189212e-05, "loss": 0.1105, "step": 28467 }, { "epoch": 0.5077587129454572, "grad_norm": 0.25855812430381775, "learning_rate": 2.8671702005350298e-05, "loss": 0.1176, "step": 28468 }, { "epoch": 0.5077765490671708, "grad_norm": 0.20435065031051636, "learning_rate": 2.867016236927859e-05, "loss": 0.1333, "step": 28469 }, { "epoch": 0.5077943851888845, "grad_norm": 0.35112375020980835, "learning_rate": 2.8668622718980077e-05, "loss": 0.207, "step": 28470 }, { "epoch": 0.5078122213105982, "grad_norm": 0.40474754571914673, "learning_rate": 2.8667083054460707e-05, "loss": 0.202, "step": 28471 }, { "epoch": 0.5078300574323119, "grad_norm": 0.2701167166233063, "learning_rate": 2.8665543375726454e-05, "loss": 0.1236, "step": 28472 }, { "epoch": 0.5078478935540256, "grad_norm": 0.22002114355564117, "learning_rate": 2.8664003682783298e-05, "loss": 0.0965, "step": 28473 }, { "epoch": 0.5078657296757393, "grad_norm": 0.2836028039455414, "learning_rate": 2.8662463975637195e-05, "loss": 0.1512, "step": 28474 }, { "epoch": 0.507883565797453, "grad_norm": 0.22661170363426208, "learning_rate": 2.8660924254294115e-05, "loss": 0.1311, "step": 28475 }, { "epoch": 0.5079014019191667, "grad_norm": 0.3529321253299713, "learning_rate": 2.865938451876003e-05, "loss": 0.1428, "step": 28476 }, { "epoch": 0.5079192380408803, "grad_norm": 0.20662051439285278, "learning_rate": 2.8657844769040904e-05, "loss": 0.115, "step": 28477 }, { "epoch": 0.507937074162594, "grad_norm": 0.29631802439689636, "learning_rate": 2.865630500514271e-05, "loss": 0.1876, "step": 28478 }, { "epoch": 0.5079549102843077, "grad_norm": 0.22083105146884918, "learning_rate": 2.8654765227071413e-05, "loss": 0.1469, "step": 28479 }, { "epoch": 0.5079727464060215, "grad_norm": 0.24177899956703186, "learning_rate": 2.865322543483298e-05, "loss": 0.1275, "step": 28480 }, { "epoch": 0.5079905825277352, "grad_norm": 0.26579251885414124, "learning_rate": 2.8651685628433396e-05, "loss": 0.1149, "step": 28481 }, { "epoch": 0.5080084186494489, "grad_norm": 0.3118237555027008, "learning_rate": 2.865014580787861e-05, "loss": 0.1271, "step": 28482 }, { "epoch": 0.5080262547711626, "grad_norm": 0.3080024719238281, "learning_rate": 2.8648605973174597e-05, "loss": 0.1481, "step": 28483 }, { "epoch": 0.5080440908928763, "grad_norm": 0.14927025139331818, "learning_rate": 2.8647066124327326e-05, "loss": 0.0791, "step": 28484 }, { "epoch": 0.50806192701459, "grad_norm": 0.2248726338148117, "learning_rate": 2.864552626134277e-05, "loss": 0.1025, "step": 28485 }, { "epoch": 0.5080797631363037, "grad_norm": 0.3614726662635803, "learning_rate": 2.8643986384226895e-05, "loss": 0.1782, "step": 28486 }, { "epoch": 0.5080975992580173, "grad_norm": 0.3290412127971649, "learning_rate": 2.8642446492985665e-05, "loss": 0.1325, "step": 28487 }, { "epoch": 0.508115435379731, "grad_norm": 0.3148520886898041, "learning_rate": 2.864090658762506e-05, "loss": 0.1227, "step": 28488 }, { "epoch": 0.5081332715014447, "grad_norm": 0.25307032465934753, "learning_rate": 2.8639366668151047e-05, "loss": 0.1439, "step": 28489 }, { "epoch": 0.5081511076231584, "grad_norm": 0.3114222288131714, "learning_rate": 2.8637826734569584e-05, "loss": 0.1466, "step": 28490 }, { "epoch": 0.5081689437448721, "grad_norm": 0.19577671587467194, "learning_rate": 2.8636286786886653e-05, "loss": 0.0834, "step": 28491 }, { "epoch": 0.5081867798665858, "grad_norm": 0.2320423424243927, "learning_rate": 2.8634746825108216e-05, "loss": 0.1019, "step": 28492 }, { "epoch": 0.5082046159882995, "grad_norm": 0.24921290576457977, "learning_rate": 2.8633206849240246e-05, "loss": 0.1161, "step": 28493 }, { "epoch": 0.5082224521100132, "grad_norm": 0.24708881974220276, "learning_rate": 2.8631666859288707e-05, "loss": 0.1491, "step": 28494 }, { "epoch": 0.5082402882317268, "grad_norm": 0.2295462042093277, "learning_rate": 2.8630126855259575e-05, "loss": 0.0968, "step": 28495 }, { "epoch": 0.5082581243534405, "grad_norm": 0.2319304198026657, "learning_rate": 2.862858683715882e-05, "loss": 0.1322, "step": 28496 }, { "epoch": 0.5082759604751543, "grad_norm": 0.30353572964668274, "learning_rate": 2.8627046804992406e-05, "loss": 0.1499, "step": 28497 }, { "epoch": 0.508293796596868, "grad_norm": 0.20552609860897064, "learning_rate": 2.862550675876632e-05, "loss": 0.0737, "step": 28498 }, { "epoch": 0.5083116327185817, "grad_norm": 0.2148098349571228, "learning_rate": 2.86239666984865e-05, "loss": 0.1245, "step": 28499 }, { "epoch": 0.5083294688402954, "grad_norm": 0.2352498173713684, "learning_rate": 2.8622426624158938e-05, "loss": 0.1187, "step": 28500 }, { "epoch": 0.5083473049620091, "grad_norm": 0.34014391899108887, "learning_rate": 2.8620886535789597e-05, "loss": 0.1764, "step": 28501 }, { "epoch": 0.5083651410837228, "grad_norm": 0.24722465872764587, "learning_rate": 2.861934643338446e-05, "loss": 0.1969, "step": 28502 }, { "epoch": 0.5083829772054365, "grad_norm": 0.2576411962509155, "learning_rate": 2.8617806316949475e-05, "loss": 0.1372, "step": 28503 }, { "epoch": 0.5084008133271501, "grad_norm": 0.2918015420436859, "learning_rate": 2.8616266186490627e-05, "loss": 0.1298, "step": 28504 }, { "epoch": 0.5084186494488638, "grad_norm": 0.3275103271007538, "learning_rate": 2.8614726042013885e-05, "loss": 0.1697, "step": 28505 }, { "epoch": 0.5084364855705775, "grad_norm": 0.30394643545150757, "learning_rate": 2.8613185883525212e-05, "loss": 0.1108, "step": 28506 }, { "epoch": 0.5084543216922912, "grad_norm": 0.22448478639125824, "learning_rate": 2.861164571103058e-05, "loss": 0.1437, "step": 28507 }, { "epoch": 0.5084721578140049, "grad_norm": 0.30676203966140747, "learning_rate": 2.8610105524535967e-05, "loss": 0.1606, "step": 28508 }, { "epoch": 0.5084899939357186, "grad_norm": 0.2928142845630646, "learning_rate": 2.860856532404734e-05, "loss": 0.1578, "step": 28509 }, { "epoch": 0.5085078300574323, "grad_norm": 0.23866738379001617, "learning_rate": 2.860702510957066e-05, "loss": 0.1044, "step": 28510 }, { "epoch": 0.508525666179146, "grad_norm": 0.4757208228111267, "learning_rate": 2.8605484881111917e-05, "loss": 0.1826, "step": 28511 }, { "epoch": 0.5085435023008597, "grad_norm": 0.33099836111068726, "learning_rate": 2.860394463867706e-05, "loss": 0.1494, "step": 28512 }, { "epoch": 0.5085613384225735, "grad_norm": 0.30314525961875916, "learning_rate": 2.8602404382272075e-05, "loss": 0.1648, "step": 28513 }, { "epoch": 0.5085791745442871, "grad_norm": 0.2715233564376831, "learning_rate": 2.8600864111902913e-05, "loss": 0.1461, "step": 28514 }, { "epoch": 0.5085970106660008, "grad_norm": 0.2651790380477905, "learning_rate": 2.859932382757557e-05, "loss": 0.1876, "step": 28515 }, { "epoch": 0.5086148467877145, "grad_norm": 0.2851487398147583, "learning_rate": 2.8597783529295997e-05, "loss": 0.1122, "step": 28516 }, { "epoch": 0.5086326829094282, "grad_norm": 0.3177490532398224, "learning_rate": 2.859624321707018e-05, "loss": 0.1848, "step": 28517 }, { "epoch": 0.5086505190311419, "grad_norm": 0.3167870342731476, "learning_rate": 2.8594702890904078e-05, "loss": 0.1535, "step": 28518 }, { "epoch": 0.5086683551528556, "grad_norm": 0.3049619495868683, "learning_rate": 2.859316255080367e-05, "loss": 0.1518, "step": 28519 }, { "epoch": 0.5086861912745693, "grad_norm": 0.22853955626487732, "learning_rate": 2.8591622196774925e-05, "loss": 0.1153, "step": 28520 }, { "epoch": 0.508704027396283, "grad_norm": 0.22894343733787537, "learning_rate": 2.85900818288238e-05, "loss": 0.112, "step": 28521 }, { "epoch": 0.5087218635179966, "grad_norm": 0.23513081669807434, "learning_rate": 2.8588541446956286e-05, "loss": 0.1555, "step": 28522 }, { "epoch": 0.5087396996397103, "grad_norm": 0.25258374214172363, "learning_rate": 2.8587001051178343e-05, "loss": 0.14, "step": 28523 }, { "epoch": 0.508757535761424, "grad_norm": 0.4361419081687927, "learning_rate": 2.8585460641495947e-05, "loss": 0.1317, "step": 28524 }, { "epoch": 0.5087753718831377, "grad_norm": 0.1856044977903366, "learning_rate": 2.8583920217915066e-05, "loss": 0.0932, "step": 28525 }, { "epoch": 0.5087932080048514, "grad_norm": 0.28256869316101074, "learning_rate": 2.8582379780441675e-05, "loss": 0.1364, "step": 28526 }, { "epoch": 0.5088110441265651, "grad_norm": 0.2961440980434418, "learning_rate": 2.8580839329081742e-05, "loss": 0.1078, "step": 28527 }, { "epoch": 0.5088288802482788, "grad_norm": 0.21577990055084229, "learning_rate": 2.8579298863841236e-05, "loss": 0.1299, "step": 28528 }, { "epoch": 0.5088467163699925, "grad_norm": 0.2719804644584656, "learning_rate": 2.8577758384726133e-05, "loss": 0.1086, "step": 28529 }, { "epoch": 0.5088645524917063, "grad_norm": 0.3241913914680481, "learning_rate": 2.8576217891742408e-05, "loss": 0.1579, "step": 28530 }, { "epoch": 0.50888238861342, "grad_norm": 0.19262385368347168, "learning_rate": 2.8574677384896016e-05, "loss": 0.0771, "step": 28531 }, { "epoch": 0.5089002247351336, "grad_norm": 0.33867642283439636, "learning_rate": 2.857313686419295e-05, "loss": 0.1581, "step": 28532 }, { "epoch": 0.5089180608568473, "grad_norm": 0.2797696590423584, "learning_rate": 2.8571596329639173e-05, "loss": 0.158, "step": 28533 }, { "epoch": 0.508935896978561, "grad_norm": 0.23965167999267578, "learning_rate": 2.8570055781240645e-05, "loss": 0.1528, "step": 28534 }, { "epoch": 0.5089537331002747, "grad_norm": 0.3141854703426361, "learning_rate": 2.856851521900335e-05, "loss": 0.1523, "step": 28535 }, { "epoch": 0.5089715692219884, "grad_norm": 0.2644658088684082, "learning_rate": 2.856697464293326e-05, "loss": 0.1391, "step": 28536 }, { "epoch": 0.5089894053437021, "grad_norm": 0.2711368203163147, "learning_rate": 2.8565434053036344e-05, "loss": 0.1473, "step": 28537 }, { "epoch": 0.5090072414654158, "grad_norm": 0.23995690047740936, "learning_rate": 2.8563893449318575e-05, "loss": 0.1157, "step": 28538 }, { "epoch": 0.5090250775871294, "grad_norm": 0.35455048084259033, "learning_rate": 2.8562352831785925e-05, "loss": 0.1987, "step": 28539 }, { "epoch": 0.5090429137088431, "grad_norm": 0.24381007254123688, "learning_rate": 2.856081220044436e-05, "loss": 0.0913, "step": 28540 }, { "epoch": 0.5090607498305568, "grad_norm": 0.3457053303718567, "learning_rate": 2.8559271555299865e-05, "loss": 0.1, "step": 28541 }, { "epoch": 0.5090785859522705, "grad_norm": 0.22499744594097137, "learning_rate": 2.8557730896358398e-05, "loss": 0.1109, "step": 28542 }, { "epoch": 0.5090964220739842, "grad_norm": 0.2553752064704895, "learning_rate": 2.8556190223625933e-05, "loss": 0.1445, "step": 28543 }, { "epoch": 0.5091142581956979, "grad_norm": 0.26085275411605835, "learning_rate": 2.855464953710845e-05, "loss": 0.1452, "step": 28544 }, { "epoch": 0.5091320943174116, "grad_norm": 0.3246316611766815, "learning_rate": 2.8553108836811927e-05, "loss": 0.1073, "step": 28545 }, { "epoch": 0.5091499304391253, "grad_norm": 0.26541852951049805, "learning_rate": 2.8551568122742323e-05, "loss": 0.1714, "step": 28546 }, { "epoch": 0.5091677665608391, "grad_norm": 0.25668826699256897, "learning_rate": 2.8550027394905608e-05, "loss": 0.1036, "step": 28547 }, { "epoch": 0.5091856026825528, "grad_norm": 0.3039669394493103, "learning_rate": 2.854848665330776e-05, "loss": 0.11, "step": 28548 }, { "epoch": 0.5092034388042664, "grad_norm": 0.2453691065311432, "learning_rate": 2.8546945897954758e-05, "loss": 0.075, "step": 28549 }, { "epoch": 0.5092212749259801, "grad_norm": 0.48145225644111633, "learning_rate": 2.8545405128852566e-05, "loss": 0.1648, "step": 28550 }, { "epoch": 0.5092391110476938, "grad_norm": 0.3121007978916168, "learning_rate": 2.854386434600716e-05, "loss": 0.135, "step": 28551 }, { "epoch": 0.5092569471694075, "grad_norm": 0.23640026152133942, "learning_rate": 2.854232354942451e-05, "loss": 0.1614, "step": 28552 }, { "epoch": 0.5092747832911212, "grad_norm": 0.29564833641052246, "learning_rate": 2.8540782739110593e-05, "loss": 0.1972, "step": 28553 }, { "epoch": 0.5092926194128349, "grad_norm": 0.2820846140384674, "learning_rate": 2.853924191507138e-05, "loss": 0.1741, "step": 28554 }, { "epoch": 0.5093104555345486, "grad_norm": 0.20693929493427277, "learning_rate": 2.853770107731284e-05, "loss": 0.1018, "step": 28555 }, { "epoch": 0.5093282916562623, "grad_norm": 0.20320770144462585, "learning_rate": 2.8536160225840946e-05, "loss": 0.1558, "step": 28556 }, { "epoch": 0.5093461277779759, "grad_norm": 0.2440221905708313, "learning_rate": 2.8534619360661674e-05, "loss": 0.1045, "step": 28557 }, { "epoch": 0.5093639638996896, "grad_norm": 0.3163812756538391, "learning_rate": 2.8533078481781007e-05, "loss": 0.1478, "step": 28558 }, { "epoch": 0.5093818000214033, "grad_norm": 0.2869386076927185, "learning_rate": 2.8531537589204904e-05, "loss": 0.1386, "step": 28559 }, { "epoch": 0.509399636143117, "grad_norm": 0.22747382521629333, "learning_rate": 2.8529996682939337e-05, "loss": 0.1514, "step": 28560 }, { "epoch": 0.5094174722648307, "grad_norm": 0.21826334297657013, "learning_rate": 2.8528455762990287e-05, "loss": 0.1249, "step": 28561 }, { "epoch": 0.5094353083865444, "grad_norm": 0.258172869682312, "learning_rate": 2.8526914829363716e-05, "loss": 0.1497, "step": 28562 }, { "epoch": 0.5094531445082581, "grad_norm": 0.361729234457016, "learning_rate": 2.852537388206561e-05, "loss": 0.1704, "step": 28563 }, { "epoch": 0.5094709806299719, "grad_norm": 0.24234500527381897, "learning_rate": 2.8523832921101933e-05, "loss": 0.1657, "step": 28564 }, { "epoch": 0.5094888167516856, "grad_norm": 0.23297803103923798, "learning_rate": 2.8522291946478673e-05, "loss": 0.1299, "step": 28565 }, { "epoch": 0.5095066528733992, "grad_norm": 0.2698739171028137, "learning_rate": 2.852075095820178e-05, "loss": 0.1123, "step": 28566 }, { "epoch": 0.5095244889951129, "grad_norm": 0.26719093322753906, "learning_rate": 2.8519209956277254e-05, "loss": 0.1588, "step": 28567 }, { "epoch": 0.5095423251168266, "grad_norm": 0.27653738856315613, "learning_rate": 2.8517668940711046e-05, "loss": 0.0995, "step": 28568 }, { "epoch": 0.5095601612385403, "grad_norm": 0.21178923547267914, "learning_rate": 2.8516127911509143e-05, "loss": 0.127, "step": 28569 }, { "epoch": 0.509577997360254, "grad_norm": 0.22715769708156586, "learning_rate": 2.8514586868677507e-05, "loss": 0.0907, "step": 28570 }, { "epoch": 0.5095958334819677, "grad_norm": 0.3355378806591034, "learning_rate": 2.8513045812222122e-05, "loss": 0.1417, "step": 28571 }, { "epoch": 0.5096136696036814, "grad_norm": 0.3025701642036438, "learning_rate": 2.8511504742148958e-05, "loss": 0.159, "step": 28572 }, { "epoch": 0.5096315057253951, "grad_norm": 0.2599545121192932, "learning_rate": 2.850996365846399e-05, "loss": 0.1723, "step": 28573 }, { "epoch": 0.5096493418471087, "grad_norm": 0.26544317603111267, "learning_rate": 2.850842256117319e-05, "loss": 0.1622, "step": 28574 }, { "epoch": 0.5096671779688224, "grad_norm": 0.21109437942504883, "learning_rate": 2.850688145028253e-05, "loss": 0.1271, "step": 28575 }, { "epoch": 0.5096850140905361, "grad_norm": 0.260470986366272, "learning_rate": 2.850534032579799e-05, "loss": 0.1763, "step": 28576 }, { "epoch": 0.5097028502122498, "grad_norm": 0.2698030471801758, "learning_rate": 2.8503799187725534e-05, "loss": 0.1021, "step": 28577 }, { "epoch": 0.5097206863339635, "grad_norm": 0.4732128381729126, "learning_rate": 2.850225803607115e-05, "loss": 0.1681, "step": 28578 }, { "epoch": 0.5097385224556772, "grad_norm": 0.22712358832359314, "learning_rate": 2.85007168708408e-05, "loss": 0.1406, "step": 28579 }, { "epoch": 0.5097563585773909, "grad_norm": 0.19389115273952484, "learning_rate": 2.8499175692040465e-05, "loss": 0.1336, "step": 28580 }, { "epoch": 0.5097741946991047, "grad_norm": 0.28978896141052246, "learning_rate": 2.8497634499676112e-05, "loss": 0.1558, "step": 28581 }, { "epoch": 0.5097920308208184, "grad_norm": 0.21501219272613525, "learning_rate": 2.8496093293753727e-05, "loss": 0.1477, "step": 28582 }, { "epoch": 0.509809866942532, "grad_norm": 0.25573790073394775, "learning_rate": 2.849455207427927e-05, "loss": 0.1353, "step": 28583 }, { "epoch": 0.5098277030642457, "grad_norm": 0.21876104176044464, "learning_rate": 2.849301084125872e-05, "loss": 0.1711, "step": 28584 }, { "epoch": 0.5098455391859594, "grad_norm": 0.34798502922058105, "learning_rate": 2.8491469594698063e-05, "loss": 0.17, "step": 28585 }, { "epoch": 0.5098633753076731, "grad_norm": 0.22214873135089874, "learning_rate": 2.8489928334603255e-05, "loss": 0.1234, "step": 28586 }, { "epoch": 0.5098812114293868, "grad_norm": 0.2880497872829437, "learning_rate": 2.8488387060980288e-05, "loss": 0.1117, "step": 28587 }, { "epoch": 0.5098990475511005, "grad_norm": 0.2621387243270874, "learning_rate": 2.848684577383512e-05, "loss": 0.1633, "step": 28588 }, { "epoch": 0.5099168836728142, "grad_norm": 0.22223816812038422, "learning_rate": 2.848530447317374e-05, "loss": 0.1015, "step": 28589 }, { "epoch": 0.5099347197945279, "grad_norm": 0.3039596974849701, "learning_rate": 2.8483763159002113e-05, "loss": 0.096, "step": 28590 }, { "epoch": 0.5099525559162416, "grad_norm": 0.25319209694862366, "learning_rate": 2.8482221831326213e-05, "loss": 0.1389, "step": 28591 }, { "epoch": 0.5099703920379552, "grad_norm": 0.25262385606765747, "learning_rate": 2.848068049015202e-05, "loss": 0.1647, "step": 28592 }, { "epoch": 0.5099882281596689, "grad_norm": 0.19870199263095856, "learning_rate": 2.847913913548551e-05, "loss": 0.101, "step": 28593 }, { "epoch": 0.5100060642813826, "grad_norm": 0.27417927980422974, "learning_rate": 2.8477597767332654e-05, "loss": 0.1921, "step": 28594 }, { "epoch": 0.5100239004030963, "grad_norm": 0.4326969087123871, "learning_rate": 2.847605638569943e-05, "loss": 0.1727, "step": 28595 }, { "epoch": 0.51004173652481, "grad_norm": 0.2687114477157593, "learning_rate": 2.8474514990591806e-05, "loss": 0.2068, "step": 28596 }, { "epoch": 0.5100595726465237, "grad_norm": 0.29581889510154724, "learning_rate": 2.8472973582015772e-05, "loss": 0.1286, "step": 28597 }, { "epoch": 0.5100774087682375, "grad_norm": 0.19792883098125458, "learning_rate": 2.847143215997728e-05, "loss": 0.1387, "step": 28598 }, { "epoch": 0.5100952448899512, "grad_norm": 0.2532748579978943, "learning_rate": 2.8469890724482322e-05, "loss": 0.1024, "step": 28599 }, { "epoch": 0.5101130810116649, "grad_norm": 0.2700733542442322, "learning_rate": 2.8468349275536877e-05, "loss": 0.146, "step": 28600 }, { "epoch": 0.5101309171333785, "grad_norm": 0.26082083582878113, "learning_rate": 2.8466807813146902e-05, "loss": 0.164, "step": 28601 }, { "epoch": 0.5101487532550922, "grad_norm": 0.26076796650886536, "learning_rate": 2.8465266337318386e-05, "loss": 0.1446, "step": 28602 }, { "epoch": 0.5101665893768059, "grad_norm": 0.2745343744754791, "learning_rate": 2.84637248480573e-05, "loss": 0.1486, "step": 28603 }, { "epoch": 0.5101844254985196, "grad_norm": 0.21207405626773834, "learning_rate": 2.846218334536962e-05, "loss": 0.1316, "step": 28604 }, { "epoch": 0.5102022616202333, "grad_norm": 0.3733825385570526, "learning_rate": 2.846064182926132e-05, "loss": 0.1324, "step": 28605 }, { "epoch": 0.510220097741947, "grad_norm": 0.195739284157753, "learning_rate": 2.8459100299738384e-05, "loss": 0.1162, "step": 28606 }, { "epoch": 0.5102379338636607, "grad_norm": 0.24634745717048645, "learning_rate": 2.8457558756806773e-05, "loss": 0.1546, "step": 28607 }, { "epoch": 0.5102557699853744, "grad_norm": 0.31158387660980225, "learning_rate": 2.8456017200472478e-05, "loss": 0.1332, "step": 28608 }, { "epoch": 0.510273606107088, "grad_norm": 0.3077595829963684, "learning_rate": 2.8454475630741463e-05, "loss": 0.1154, "step": 28609 }, { "epoch": 0.5102914422288017, "grad_norm": 0.27323028445243835, "learning_rate": 2.8452934047619707e-05, "loss": 0.1267, "step": 28610 }, { "epoch": 0.5103092783505154, "grad_norm": 0.34198784828186035, "learning_rate": 2.8451392451113185e-05, "loss": 0.191, "step": 28611 }, { "epoch": 0.5103271144722291, "grad_norm": 0.2741488218307495, "learning_rate": 2.844985084122787e-05, "loss": 0.1378, "step": 28612 }, { "epoch": 0.5103449505939428, "grad_norm": 0.2992853820323944, "learning_rate": 2.8448309217969748e-05, "loss": 0.1613, "step": 28613 }, { "epoch": 0.5103627867156566, "grad_norm": 0.21999169886112213, "learning_rate": 2.8446767581344787e-05, "loss": 0.1076, "step": 28614 }, { "epoch": 0.5103806228373703, "grad_norm": 0.20979440212249756, "learning_rate": 2.8445225931358965e-05, "loss": 0.1378, "step": 28615 }, { "epoch": 0.510398458959084, "grad_norm": 0.18849465250968933, "learning_rate": 2.8443684268018256e-05, "loss": 0.1507, "step": 28616 }, { "epoch": 0.5104162950807977, "grad_norm": 0.32718199491500854, "learning_rate": 2.844214259132864e-05, "loss": 0.1533, "step": 28617 }, { "epoch": 0.5104341312025114, "grad_norm": 0.1834608018398285, "learning_rate": 2.8440600901296087e-05, "loss": 0.1264, "step": 28618 }, { "epoch": 0.510451967324225, "grad_norm": 0.24632766842842102, "learning_rate": 2.8439059197926576e-05, "loss": 0.1537, "step": 28619 }, { "epoch": 0.5104698034459387, "grad_norm": 0.38008156418800354, "learning_rate": 2.8437517481226085e-05, "loss": 0.174, "step": 28620 }, { "epoch": 0.5104876395676524, "grad_norm": 0.24725519120693207, "learning_rate": 2.843597575120059e-05, "loss": 0.1926, "step": 28621 }, { "epoch": 0.5105054756893661, "grad_norm": 0.3012906610965729, "learning_rate": 2.8434434007856064e-05, "loss": 0.1471, "step": 28622 }, { "epoch": 0.5105233118110798, "grad_norm": 0.22625456750392914, "learning_rate": 2.843289225119849e-05, "loss": 0.1292, "step": 28623 }, { "epoch": 0.5105411479327935, "grad_norm": 0.2392953783273697, "learning_rate": 2.8431350481233837e-05, "loss": 0.1287, "step": 28624 }, { "epoch": 0.5105589840545072, "grad_norm": 0.298268586397171, "learning_rate": 2.842980869796808e-05, "loss": 0.1281, "step": 28625 }, { "epoch": 0.5105768201762209, "grad_norm": 0.49562084674835205, "learning_rate": 2.8428266901407208e-05, "loss": 0.158, "step": 28626 }, { "epoch": 0.5105946562979345, "grad_norm": 0.2313910722732544, "learning_rate": 2.8426725091557182e-05, "loss": 0.1679, "step": 28627 }, { "epoch": 0.5106124924196482, "grad_norm": 0.2302023321390152, "learning_rate": 2.8425183268423993e-05, "loss": 0.1354, "step": 28628 }, { "epoch": 0.5106303285413619, "grad_norm": 0.28523245453834534, "learning_rate": 2.8423641432013602e-05, "loss": 0.1808, "step": 28629 }, { "epoch": 0.5106481646630756, "grad_norm": 0.3322940766811371, "learning_rate": 2.8422099582332008e-05, "loss": 0.1447, "step": 28630 }, { "epoch": 0.5106660007847894, "grad_norm": 0.3432726263999939, "learning_rate": 2.842055771938516e-05, "loss": 0.2508, "step": 28631 }, { "epoch": 0.5106838369065031, "grad_norm": 0.2665051519870758, "learning_rate": 2.8419015843179054e-05, "loss": 0.1281, "step": 28632 }, { "epoch": 0.5107016730282168, "grad_norm": 0.2627442479133606, "learning_rate": 2.841747395371966e-05, "loss": 0.1217, "step": 28633 }, { "epoch": 0.5107195091499305, "grad_norm": 0.29333555698394775, "learning_rate": 2.841593205101296e-05, "loss": 0.1636, "step": 28634 }, { "epoch": 0.5107373452716442, "grad_norm": 0.2493095099925995, "learning_rate": 2.8414390135064922e-05, "loss": 0.1143, "step": 28635 }, { "epoch": 0.5107551813933578, "grad_norm": 0.3318060636520386, "learning_rate": 2.8412848205881537e-05, "loss": 0.2112, "step": 28636 }, { "epoch": 0.5107730175150715, "grad_norm": 0.27241435647010803, "learning_rate": 2.8411306263468775e-05, "loss": 0.113, "step": 28637 }, { "epoch": 0.5107908536367852, "grad_norm": 0.39097660779953003, "learning_rate": 2.8409764307832605e-05, "loss": 0.1383, "step": 28638 }, { "epoch": 0.5108086897584989, "grad_norm": 0.290276437997818, "learning_rate": 2.8408222338979008e-05, "loss": 0.1646, "step": 28639 }, { "epoch": 0.5108265258802126, "grad_norm": 0.23668363690376282, "learning_rate": 2.8406680356913967e-05, "loss": 0.1713, "step": 28640 }, { "epoch": 0.5108443620019263, "grad_norm": 0.23128637671470642, "learning_rate": 2.840513836164346e-05, "loss": 0.1455, "step": 28641 }, { "epoch": 0.51086219812364, "grad_norm": 0.23550868034362793, "learning_rate": 2.8403596353173456e-05, "loss": 0.1782, "step": 28642 }, { "epoch": 0.5108800342453537, "grad_norm": 0.17445312440395355, "learning_rate": 2.8402054331509943e-05, "loss": 0.1261, "step": 28643 }, { "epoch": 0.5108978703670674, "grad_norm": 0.20496228337287903, "learning_rate": 2.8400512296658887e-05, "loss": 0.1285, "step": 28644 }, { "epoch": 0.510915706488781, "grad_norm": 0.3230728507041931, "learning_rate": 2.8398970248626272e-05, "loss": 0.1989, "step": 28645 }, { "epoch": 0.5109335426104947, "grad_norm": 0.26608991622924805, "learning_rate": 2.8397428187418074e-05, "loss": 0.1009, "step": 28646 }, { "epoch": 0.5109513787322084, "grad_norm": 0.26584088802337646, "learning_rate": 2.8395886113040272e-05, "loss": 0.143, "step": 28647 }, { "epoch": 0.5109692148539222, "grad_norm": 0.30108442902565, "learning_rate": 2.8394344025498836e-05, "loss": 0.1051, "step": 28648 }, { "epoch": 0.5109870509756359, "grad_norm": 0.3000594675540924, "learning_rate": 2.839280192479976e-05, "loss": 0.1581, "step": 28649 }, { "epoch": 0.5110048870973496, "grad_norm": 0.2545219957828522, "learning_rate": 2.839125981094901e-05, "loss": 0.1622, "step": 28650 }, { "epoch": 0.5110227232190633, "grad_norm": 0.23320001363754272, "learning_rate": 2.8389717683952566e-05, "loss": 0.1001, "step": 28651 }, { "epoch": 0.511040559340777, "grad_norm": 0.2879813611507416, "learning_rate": 2.8388175543816405e-05, "loss": 0.087, "step": 28652 }, { "epoch": 0.5110583954624907, "grad_norm": 0.257871150970459, "learning_rate": 2.8386633390546493e-05, "loss": 0.1668, "step": 28653 }, { "epoch": 0.5110762315842043, "grad_norm": 0.28548383712768555, "learning_rate": 2.8385091224148834e-05, "loss": 0.1172, "step": 28654 }, { "epoch": 0.511094067705918, "grad_norm": 0.30526936054229736, "learning_rate": 2.8383549044629388e-05, "loss": 0.1627, "step": 28655 }, { "epoch": 0.5111119038276317, "grad_norm": 0.2569371163845062, "learning_rate": 2.838200685199414e-05, "loss": 0.1542, "step": 28656 }, { "epoch": 0.5111297399493454, "grad_norm": 0.24707630276679993, "learning_rate": 2.838046464624906e-05, "loss": 0.1723, "step": 28657 }, { "epoch": 0.5111475760710591, "grad_norm": 0.23386873304843903, "learning_rate": 2.837892242740014e-05, "loss": 0.1189, "step": 28658 }, { "epoch": 0.5111654121927728, "grad_norm": 0.23468729853630066, "learning_rate": 2.8377380195453346e-05, "loss": 0.1238, "step": 28659 }, { "epoch": 0.5111832483144865, "grad_norm": 0.21325302124023438, "learning_rate": 2.837583795041465e-05, "loss": 0.1123, "step": 28660 }, { "epoch": 0.5112010844362002, "grad_norm": 0.21917521953582764, "learning_rate": 2.8374295692290053e-05, "loss": 0.1037, "step": 28661 }, { "epoch": 0.5112189205579138, "grad_norm": 0.2470393180847168, "learning_rate": 2.8372753421085518e-05, "loss": 0.1534, "step": 28662 }, { "epoch": 0.5112367566796275, "grad_norm": 0.2687867283821106, "learning_rate": 2.8371211136807026e-05, "loss": 0.123, "step": 28663 }, { "epoch": 0.5112545928013412, "grad_norm": 0.3077491819858551, "learning_rate": 2.836966883946055e-05, "loss": 0.1182, "step": 28664 }, { "epoch": 0.511272428923055, "grad_norm": 0.1964159458875656, "learning_rate": 2.836812652905208e-05, "loss": 0.1368, "step": 28665 }, { "epoch": 0.5112902650447687, "grad_norm": 0.2795601487159729, "learning_rate": 2.8366584205587588e-05, "loss": 0.132, "step": 28666 }, { "epoch": 0.5113081011664824, "grad_norm": 0.22139166295528412, "learning_rate": 2.8365041869073045e-05, "loss": 0.1265, "step": 28667 }, { "epoch": 0.5113259372881961, "grad_norm": 0.30906009674072266, "learning_rate": 2.8363499519514442e-05, "loss": 0.0853, "step": 28668 }, { "epoch": 0.5113437734099098, "grad_norm": 0.2961386442184448, "learning_rate": 2.8361957156917756e-05, "loss": 0.1125, "step": 28669 }, { "epoch": 0.5113616095316235, "grad_norm": 0.27009424567222595, "learning_rate": 2.836041478128896e-05, "loss": 0.1513, "step": 28670 }, { "epoch": 0.5113794456533372, "grad_norm": 0.26191455125808716, "learning_rate": 2.835887239263404e-05, "loss": 0.1369, "step": 28671 }, { "epoch": 0.5113972817750508, "grad_norm": 0.2946007251739502, "learning_rate": 2.8357329990958963e-05, "loss": 0.1748, "step": 28672 }, { "epoch": 0.5114151178967645, "grad_norm": 0.2295503169298172, "learning_rate": 2.8355787576269723e-05, "loss": 0.1266, "step": 28673 }, { "epoch": 0.5114329540184782, "grad_norm": 0.4403449296951294, "learning_rate": 2.8354245148572283e-05, "loss": 0.1942, "step": 28674 }, { "epoch": 0.5114507901401919, "grad_norm": 0.17378412187099457, "learning_rate": 2.835270270787263e-05, "loss": 0.095, "step": 28675 }, { "epoch": 0.5114686262619056, "grad_norm": 0.29376813769340515, "learning_rate": 2.8351160254176757e-05, "loss": 0.1833, "step": 28676 }, { "epoch": 0.5114864623836193, "grad_norm": 0.28688934445381165, "learning_rate": 2.834961778749061e-05, "loss": 0.1553, "step": 28677 }, { "epoch": 0.511504298505333, "grad_norm": 0.2363552302122116, "learning_rate": 2.8348075307820205e-05, "loss": 0.1763, "step": 28678 }, { "epoch": 0.5115221346270467, "grad_norm": 0.29156407713890076, "learning_rate": 2.8346532815171496e-05, "loss": 0.1538, "step": 28679 }, { "epoch": 0.5115399707487603, "grad_norm": 0.19915670156478882, "learning_rate": 2.8344990309550467e-05, "loss": 0.1316, "step": 28680 }, { "epoch": 0.511557806870474, "grad_norm": 0.2654586434364319, "learning_rate": 2.8343447790963102e-05, "loss": 0.1589, "step": 28681 }, { "epoch": 0.5115756429921878, "grad_norm": 0.283134788274765, "learning_rate": 2.8341905259415384e-05, "loss": 0.1719, "step": 28682 }, { "epoch": 0.5115934791139015, "grad_norm": 0.2221931517124176, "learning_rate": 2.8340362714913283e-05, "loss": 0.1279, "step": 28683 }, { "epoch": 0.5116113152356152, "grad_norm": 0.23733840882778168, "learning_rate": 2.8338820157462786e-05, "loss": 0.1268, "step": 28684 }, { "epoch": 0.5116291513573289, "grad_norm": 0.23013168573379517, "learning_rate": 2.833727758706986e-05, "loss": 0.095, "step": 28685 }, { "epoch": 0.5116469874790426, "grad_norm": 0.24538858234882355, "learning_rate": 2.8335735003740504e-05, "loss": 0.1708, "step": 28686 }, { "epoch": 0.5116648236007563, "grad_norm": 0.27597978711128235, "learning_rate": 2.833419240748068e-05, "loss": 0.1391, "step": 28687 }, { "epoch": 0.51168265972247, "grad_norm": 0.2346007227897644, "learning_rate": 2.8332649798296373e-05, "loss": 0.1523, "step": 28688 }, { "epoch": 0.5117004958441836, "grad_norm": 0.19158942997455597, "learning_rate": 2.8331107176193573e-05, "loss": 0.1112, "step": 28689 }, { "epoch": 0.5117183319658973, "grad_norm": 0.24004830420017242, "learning_rate": 2.8329564541178243e-05, "loss": 0.1208, "step": 28690 }, { "epoch": 0.511736168087611, "grad_norm": 0.24585741758346558, "learning_rate": 2.832802189325638e-05, "loss": 0.1404, "step": 28691 }, { "epoch": 0.5117540042093247, "grad_norm": 0.2926566004753113, "learning_rate": 2.8326479232433945e-05, "loss": 0.1064, "step": 28692 }, { "epoch": 0.5117718403310384, "grad_norm": 0.25121474266052246, "learning_rate": 2.8324936558716934e-05, "loss": 0.1604, "step": 28693 }, { "epoch": 0.5117896764527521, "grad_norm": 0.3177691102027893, "learning_rate": 2.8323393872111314e-05, "loss": 0.1291, "step": 28694 }, { "epoch": 0.5118075125744658, "grad_norm": 0.2946058213710785, "learning_rate": 2.8321851172623075e-05, "loss": 0.1448, "step": 28695 }, { "epoch": 0.5118253486961795, "grad_norm": 0.28573042154312134, "learning_rate": 2.8320308460258194e-05, "loss": 0.1375, "step": 28696 }, { "epoch": 0.5118431848178931, "grad_norm": 0.3390047550201416, "learning_rate": 2.831876573502265e-05, "loss": 0.127, "step": 28697 }, { "epoch": 0.5118610209396068, "grad_norm": 0.1729322373867035, "learning_rate": 2.831722299692242e-05, "loss": 0.0932, "step": 28698 }, { "epoch": 0.5118788570613206, "grad_norm": 0.2633526921272278, "learning_rate": 2.83156802459635e-05, "loss": 0.1593, "step": 28699 }, { "epoch": 0.5118966931830343, "grad_norm": 0.3222138285636902, "learning_rate": 2.8314137482151847e-05, "loss": 0.1141, "step": 28700 }, { "epoch": 0.511914529304748, "grad_norm": 0.19847998023033142, "learning_rate": 2.8312594705493455e-05, "loss": 0.1185, "step": 28701 }, { "epoch": 0.5119323654264617, "grad_norm": 0.23680081963539124, "learning_rate": 2.83110519159943e-05, "loss": 0.1294, "step": 28702 }, { "epoch": 0.5119502015481754, "grad_norm": 0.26284313201904297, "learning_rate": 2.8309509113660365e-05, "loss": 0.1454, "step": 28703 }, { "epoch": 0.5119680376698891, "grad_norm": 0.2582609951496124, "learning_rate": 2.8307966298497634e-05, "loss": 0.1414, "step": 28704 }, { "epoch": 0.5119858737916028, "grad_norm": 0.2555851936340332, "learning_rate": 2.8306423470512078e-05, "loss": 0.1085, "step": 28705 }, { "epoch": 0.5120037099133165, "grad_norm": 0.2710530459880829, "learning_rate": 2.830488062970969e-05, "loss": 0.1287, "step": 28706 }, { "epoch": 0.5120215460350301, "grad_norm": 0.24250569939613342, "learning_rate": 2.830333777609644e-05, "loss": 0.1305, "step": 28707 }, { "epoch": 0.5120393821567438, "grad_norm": 0.373189240694046, "learning_rate": 2.8301794909678302e-05, "loss": 0.0808, "step": 28708 }, { "epoch": 0.5120572182784575, "grad_norm": 0.29538479447364807, "learning_rate": 2.8300252030461273e-05, "loss": 0.1275, "step": 28709 }, { "epoch": 0.5120750544001712, "grad_norm": 0.25075945258140564, "learning_rate": 2.8298709138451335e-05, "loss": 0.1134, "step": 28710 }, { "epoch": 0.5120928905218849, "grad_norm": 0.44663530588150024, "learning_rate": 2.829716623365445e-05, "loss": 0.1212, "step": 28711 }, { "epoch": 0.5121107266435986, "grad_norm": 0.3136363923549652, "learning_rate": 2.829562331607662e-05, "loss": 0.2078, "step": 28712 }, { "epoch": 0.5121285627653123, "grad_norm": 0.3370586931705475, "learning_rate": 2.8294080385723808e-05, "loss": 0.1434, "step": 28713 }, { "epoch": 0.512146398887026, "grad_norm": 0.24719923734664917, "learning_rate": 2.8292537442602008e-05, "loss": 0.1783, "step": 28714 }, { "epoch": 0.5121642350087398, "grad_norm": 0.17714382708072662, "learning_rate": 2.8290994486717194e-05, "loss": 0.1159, "step": 28715 }, { "epoch": 0.5121820711304534, "grad_norm": 0.27581316232681274, "learning_rate": 2.828945151807534e-05, "loss": 0.1695, "step": 28716 }, { "epoch": 0.5121999072521671, "grad_norm": 0.3174869120121002, "learning_rate": 2.828790853668245e-05, "loss": 0.2087, "step": 28717 }, { "epoch": 0.5122177433738808, "grad_norm": 0.22335317730903625, "learning_rate": 2.828636554254448e-05, "loss": 0.139, "step": 28718 }, { "epoch": 0.5122355794955945, "grad_norm": 0.5008512735366821, "learning_rate": 2.828482253566743e-05, "loss": 0.2166, "step": 28719 }, { "epoch": 0.5122534156173082, "grad_norm": 0.22603140771389008, "learning_rate": 2.8283279516057266e-05, "loss": 0.165, "step": 28720 }, { "epoch": 0.5122712517390219, "grad_norm": 0.22612769901752472, "learning_rate": 2.8281736483719983e-05, "loss": 0.1707, "step": 28721 }, { "epoch": 0.5122890878607356, "grad_norm": 0.3717956840991974, "learning_rate": 2.828019343866155e-05, "loss": 0.1535, "step": 28722 }, { "epoch": 0.5123069239824493, "grad_norm": 0.27614501118659973, "learning_rate": 2.8278650380887956e-05, "loss": 0.1075, "step": 28723 }, { "epoch": 0.512324760104163, "grad_norm": 0.22543823719024658, "learning_rate": 2.827710731040518e-05, "loss": 0.1443, "step": 28724 }, { "epoch": 0.5123425962258766, "grad_norm": 0.3627166748046875, "learning_rate": 2.827556422721921e-05, "loss": 0.0981, "step": 28725 }, { "epoch": 0.5123604323475903, "grad_norm": 0.277536541223526, "learning_rate": 2.8274021131336014e-05, "loss": 0.1323, "step": 28726 }, { "epoch": 0.512378268469304, "grad_norm": 0.3194122314453125, "learning_rate": 2.827247802276159e-05, "loss": 0.1264, "step": 28727 }, { "epoch": 0.5123961045910177, "grad_norm": 0.30672580003738403, "learning_rate": 2.8270934901501906e-05, "loss": 0.1662, "step": 28728 }, { "epoch": 0.5124139407127314, "grad_norm": 0.49692416191101074, "learning_rate": 2.8269391767562947e-05, "loss": 0.1486, "step": 28729 }, { "epoch": 0.5124317768344451, "grad_norm": 0.2641540467739105, "learning_rate": 2.8267848620950697e-05, "loss": 0.1231, "step": 28730 }, { "epoch": 0.5124496129561588, "grad_norm": 0.20069244503974915, "learning_rate": 2.8266305461671138e-05, "loss": 0.1016, "step": 28731 }, { "epoch": 0.5124674490778726, "grad_norm": 0.30080023407936096, "learning_rate": 2.8264762289730252e-05, "loss": 0.1103, "step": 28732 }, { "epoch": 0.5124852851995862, "grad_norm": 0.275672048330307, "learning_rate": 2.826321910513402e-05, "loss": 0.1384, "step": 28733 }, { "epoch": 0.5125031213212999, "grad_norm": 0.17145149409770966, "learning_rate": 2.8261675907888425e-05, "loss": 0.0755, "step": 28734 }, { "epoch": 0.5125209574430136, "grad_norm": 0.3689696788787842, "learning_rate": 2.8260132697999443e-05, "loss": 0.155, "step": 28735 }, { "epoch": 0.5125387935647273, "grad_norm": 0.24058884382247925, "learning_rate": 2.825858947547306e-05, "loss": 0.1661, "step": 28736 }, { "epoch": 0.512556629686441, "grad_norm": 0.3380197584629059, "learning_rate": 2.825704624031526e-05, "loss": 0.1562, "step": 28737 }, { "epoch": 0.5125744658081547, "grad_norm": 0.23685264587402344, "learning_rate": 2.8255502992532028e-05, "loss": 0.086, "step": 28738 }, { "epoch": 0.5125923019298684, "grad_norm": 0.28849929571151733, "learning_rate": 2.825395973212934e-05, "loss": 0.1409, "step": 28739 }, { "epoch": 0.5126101380515821, "grad_norm": 0.2545168995857239, "learning_rate": 2.825241645911318e-05, "loss": 0.1142, "step": 28740 }, { "epoch": 0.5126279741732958, "grad_norm": 0.20858325064182281, "learning_rate": 2.825087317348953e-05, "loss": 0.1255, "step": 28741 }, { "epoch": 0.5126458102950094, "grad_norm": 0.25887784361839294, "learning_rate": 2.8249329875264376e-05, "loss": 0.1615, "step": 28742 }, { "epoch": 0.5126636464167231, "grad_norm": 0.2657526135444641, "learning_rate": 2.824778656444369e-05, "loss": 0.1299, "step": 28743 }, { "epoch": 0.5126814825384368, "grad_norm": 0.32634103298187256, "learning_rate": 2.824624324103346e-05, "loss": 0.1199, "step": 28744 }, { "epoch": 0.5126993186601505, "grad_norm": 0.28554731607437134, "learning_rate": 2.824469990503968e-05, "loss": 0.1511, "step": 28745 }, { "epoch": 0.5127171547818642, "grad_norm": 0.2940383851528168, "learning_rate": 2.8243156556468314e-05, "loss": 0.1381, "step": 28746 }, { "epoch": 0.5127349909035779, "grad_norm": 0.24180209636688232, "learning_rate": 2.824161319532536e-05, "loss": 0.0842, "step": 28747 }, { "epoch": 0.5127528270252916, "grad_norm": 0.333478182554245, "learning_rate": 2.8240069821616788e-05, "loss": 0.1247, "step": 28748 }, { "epoch": 0.5127706631470054, "grad_norm": 0.26317405700683594, "learning_rate": 2.8238526435348594e-05, "loss": 0.1552, "step": 28749 }, { "epoch": 0.5127884992687191, "grad_norm": 0.25221842527389526, "learning_rate": 2.8236983036526742e-05, "loss": 0.1411, "step": 28750 }, { "epoch": 0.5128063353904327, "grad_norm": 0.3700776994228363, "learning_rate": 2.8235439625157235e-05, "loss": 0.1272, "step": 28751 }, { "epoch": 0.5128241715121464, "grad_norm": 0.21291691064834595, "learning_rate": 2.8233896201246036e-05, "loss": 0.1165, "step": 28752 }, { "epoch": 0.5128420076338601, "grad_norm": 0.24549849331378937, "learning_rate": 2.8232352764799153e-05, "loss": 0.1523, "step": 28753 }, { "epoch": 0.5128598437555738, "grad_norm": 0.22554031014442444, "learning_rate": 2.8230809315822542e-05, "loss": 0.1307, "step": 28754 }, { "epoch": 0.5128776798772875, "grad_norm": 0.3477083146572113, "learning_rate": 2.8229265854322206e-05, "loss": 0.1703, "step": 28755 }, { "epoch": 0.5128955159990012, "grad_norm": 0.2978888750076294, "learning_rate": 2.8227722380304116e-05, "loss": 0.1492, "step": 28756 }, { "epoch": 0.5129133521207149, "grad_norm": 0.19415195286273956, "learning_rate": 2.822617889377426e-05, "loss": 0.1371, "step": 28757 }, { "epoch": 0.5129311882424286, "grad_norm": 0.19340279698371887, "learning_rate": 2.8224635394738617e-05, "loss": 0.1135, "step": 28758 }, { "epoch": 0.5129490243641422, "grad_norm": 0.21840186417102814, "learning_rate": 2.8223091883203178e-05, "loss": 0.1527, "step": 28759 }, { "epoch": 0.5129668604858559, "grad_norm": 0.22684365510940552, "learning_rate": 2.8221548359173922e-05, "loss": 0.1246, "step": 28760 }, { "epoch": 0.5129846966075696, "grad_norm": 0.29964983463287354, "learning_rate": 2.822000482265683e-05, "loss": 0.1442, "step": 28761 }, { "epoch": 0.5130025327292833, "grad_norm": 0.258208304643631, "learning_rate": 2.821846127365789e-05, "loss": 0.1426, "step": 28762 }, { "epoch": 0.513020368850997, "grad_norm": 0.24436752498149872, "learning_rate": 2.8216917712183077e-05, "loss": 0.1582, "step": 28763 }, { "epoch": 0.5130382049727107, "grad_norm": 0.21674521267414093, "learning_rate": 2.8215374138238382e-05, "loss": 0.0975, "step": 28764 }, { "epoch": 0.5130560410944244, "grad_norm": 0.24736134707927704, "learning_rate": 2.8213830551829784e-05, "loss": 0.1507, "step": 28765 }, { "epoch": 0.5130738772161382, "grad_norm": 0.3155052661895752, "learning_rate": 2.8212286952963273e-05, "loss": 0.0944, "step": 28766 }, { "epoch": 0.5130917133378519, "grad_norm": 0.2506276071071625, "learning_rate": 2.821074334164483e-05, "loss": 0.1681, "step": 28767 }, { "epoch": 0.5131095494595656, "grad_norm": 0.28828078508377075, "learning_rate": 2.8209199717880435e-05, "loss": 0.1197, "step": 28768 }, { "epoch": 0.5131273855812792, "grad_norm": 0.29900696873664856, "learning_rate": 2.8207656081676078e-05, "loss": 0.1812, "step": 28769 }, { "epoch": 0.5131452217029929, "grad_norm": 0.39720383286476135, "learning_rate": 2.820611243303773e-05, "loss": 0.2044, "step": 28770 }, { "epoch": 0.5131630578247066, "grad_norm": 0.19839033484458923, "learning_rate": 2.8204568771971385e-05, "loss": 0.1195, "step": 28771 }, { "epoch": 0.5131808939464203, "grad_norm": 0.3253510594367981, "learning_rate": 2.8203025098483022e-05, "loss": 0.1982, "step": 28772 }, { "epoch": 0.513198730068134, "grad_norm": 0.28392675518989563, "learning_rate": 2.8201481412578634e-05, "loss": 0.1196, "step": 28773 }, { "epoch": 0.5132165661898477, "grad_norm": 0.2275567650794983, "learning_rate": 2.819993771426419e-05, "loss": 0.1427, "step": 28774 }, { "epoch": 0.5132344023115614, "grad_norm": 0.18309307098388672, "learning_rate": 2.819839400354569e-05, "loss": 0.1007, "step": 28775 }, { "epoch": 0.513252238433275, "grad_norm": 0.25341901183128357, "learning_rate": 2.8196850280429103e-05, "loss": 0.14, "step": 28776 }, { "epoch": 0.5132700745549887, "grad_norm": 0.2880527377128601, "learning_rate": 2.819530654492043e-05, "loss": 0.1697, "step": 28777 }, { "epoch": 0.5132879106767024, "grad_norm": 0.27188563346862793, "learning_rate": 2.819376279702564e-05, "loss": 0.1272, "step": 28778 }, { "epoch": 0.5133057467984161, "grad_norm": 0.3012101352214813, "learning_rate": 2.8192219036750716e-05, "loss": 0.1609, "step": 28779 }, { "epoch": 0.5133235829201298, "grad_norm": 0.3392595648765564, "learning_rate": 2.8190675264101657e-05, "loss": 0.1441, "step": 28780 }, { "epoch": 0.5133414190418435, "grad_norm": 0.25709715485572815, "learning_rate": 2.8189131479084436e-05, "loss": 0.1336, "step": 28781 }, { "epoch": 0.5133592551635572, "grad_norm": 0.26314452290534973, "learning_rate": 2.8187587681705047e-05, "loss": 0.0769, "step": 28782 }, { "epoch": 0.513377091285271, "grad_norm": 0.32637593150138855, "learning_rate": 2.818604387196946e-05, "loss": 0.1257, "step": 28783 }, { "epoch": 0.5133949274069847, "grad_norm": 0.2575835883617401, "learning_rate": 2.8184500049883662e-05, "loss": 0.1775, "step": 28784 }, { "epoch": 0.5134127635286984, "grad_norm": 0.2551773190498352, "learning_rate": 2.8182956215453642e-05, "loss": 0.0962, "step": 28785 }, { "epoch": 0.513430599650412, "grad_norm": 0.31901121139526367, "learning_rate": 2.8181412368685395e-05, "loss": 0.1381, "step": 28786 }, { "epoch": 0.5134484357721257, "grad_norm": 0.3186083137989044, "learning_rate": 2.8179868509584884e-05, "loss": 0.1079, "step": 28787 }, { "epoch": 0.5134662718938394, "grad_norm": 0.24258184432983398, "learning_rate": 2.8178324638158115e-05, "loss": 0.1428, "step": 28788 }, { "epoch": 0.5134841080155531, "grad_norm": 0.29395216703414917, "learning_rate": 2.8176780754411053e-05, "loss": 0.1395, "step": 28789 }, { "epoch": 0.5135019441372668, "grad_norm": 0.29596075415611267, "learning_rate": 2.8175236858349696e-05, "loss": 0.178, "step": 28790 }, { "epoch": 0.5135197802589805, "grad_norm": 0.33360156416893005, "learning_rate": 2.8173692949980023e-05, "loss": 0.2014, "step": 28791 }, { "epoch": 0.5135376163806942, "grad_norm": 0.3463611900806427, "learning_rate": 2.8172149029308016e-05, "loss": 0.1489, "step": 28792 }, { "epoch": 0.5135554525024079, "grad_norm": 0.2758581340312958, "learning_rate": 2.8170605096339665e-05, "loss": 0.1172, "step": 28793 }, { "epoch": 0.5135732886241215, "grad_norm": 0.2756361663341522, "learning_rate": 2.816906115108096e-05, "loss": 0.1612, "step": 28794 }, { "epoch": 0.5135911247458352, "grad_norm": 0.2590934634208679, "learning_rate": 2.8167517193537878e-05, "loss": 0.1609, "step": 28795 }, { "epoch": 0.5136089608675489, "grad_norm": 0.28928935527801514, "learning_rate": 2.81659732237164e-05, "loss": 0.159, "step": 28796 }, { "epoch": 0.5136267969892626, "grad_norm": 0.28954124450683594, "learning_rate": 2.8164429241622525e-05, "loss": 0.1501, "step": 28797 }, { "epoch": 0.5136446331109763, "grad_norm": 0.24776677787303925, "learning_rate": 2.8162885247262222e-05, "loss": 0.1319, "step": 28798 }, { "epoch": 0.51366246923269, "grad_norm": 0.20022942125797272, "learning_rate": 2.816134124064148e-05, "loss": 0.1087, "step": 28799 }, { "epoch": 0.5136803053544038, "grad_norm": 0.2850240170955658, "learning_rate": 2.8159797221766293e-05, "loss": 0.1373, "step": 28800 }, { "epoch": 0.5136981414761175, "grad_norm": 0.29663124680519104, "learning_rate": 2.8158253190642643e-05, "loss": 0.1874, "step": 28801 }, { "epoch": 0.5137159775978312, "grad_norm": 0.30229130387306213, "learning_rate": 2.8156709147276504e-05, "loss": 0.1656, "step": 28802 }, { "epoch": 0.5137338137195449, "grad_norm": 0.21825046837329865, "learning_rate": 2.815516509167388e-05, "loss": 0.1447, "step": 28803 }, { "epoch": 0.5137516498412585, "grad_norm": 0.1991797536611557, "learning_rate": 2.8153621023840744e-05, "loss": 0.1276, "step": 28804 }, { "epoch": 0.5137694859629722, "grad_norm": 0.2877190411090851, "learning_rate": 2.815207694378308e-05, "loss": 0.0983, "step": 28805 }, { "epoch": 0.5137873220846859, "grad_norm": 0.42595672607421875, "learning_rate": 2.815053285150688e-05, "loss": 0.1153, "step": 28806 }, { "epoch": 0.5138051582063996, "grad_norm": 0.35596680641174316, "learning_rate": 2.8148988747018124e-05, "loss": 0.1587, "step": 28807 }, { "epoch": 0.5138229943281133, "grad_norm": 0.30313313007354736, "learning_rate": 2.8147444630322805e-05, "loss": 0.1167, "step": 28808 }, { "epoch": 0.513840830449827, "grad_norm": 0.23832905292510986, "learning_rate": 2.8145900501426897e-05, "loss": 0.1077, "step": 28809 }, { "epoch": 0.5138586665715407, "grad_norm": 0.26534879207611084, "learning_rate": 2.81443563603364e-05, "loss": 0.1696, "step": 28810 }, { "epoch": 0.5138765026932544, "grad_norm": 0.20336323976516724, "learning_rate": 2.814281220705728e-05, "loss": 0.1211, "step": 28811 }, { "epoch": 0.513894338814968, "grad_norm": 0.27276936173439026, "learning_rate": 2.8141268041595542e-05, "loss": 0.1533, "step": 28812 }, { "epoch": 0.5139121749366817, "grad_norm": 0.24836784601211548, "learning_rate": 2.8139723863957157e-05, "loss": 0.0976, "step": 28813 }, { "epoch": 0.5139300110583954, "grad_norm": 0.20647704601287842, "learning_rate": 2.813817967414813e-05, "loss": 0.1285, "step": 28814 }, { "epoch": 0.5139478471801091, "grad_norm": 0.22772224247455597, "learning_rate": 2.8136635472174427e-05, "loss": 0.1249, "step": 28815 }, { "epoch": 0.5139656833018229, "grad_norm": 0.23963771760463715, "learning_rate": 2.8135091258042045e-05, "loss": 0.1704, "step": 28816 }, { "epoch": 0.5139835194235366, "grad_norm": 0.24223528802394867, "learning_rate": 2.8133547031756962e-05, "loss": 0.1176, "step": 28817 }, { "epoch": 0.5140013555452503, "grad_norm": 0.22984950244426727, "learning_rate": 2.8132002793325173e-05, "loss": 0.1379, "step": 28818 }, { "epoch": 0.514019191666964, "grad_norm": 0.2879546284675598, "learning_rate": 2.8130458542752657e-05, "loss": 0.1281, "step": 28819 }, { "epoch": 0.5140370277886777, "grad_norm": 0.2941230237483978, "learning_rate": 2.8128914280045405e-05, "loss": 0.1844, "step": 28820 }, { "epoch": 0.5140548639103913, "grad_norm": 0.23569253087043762, "learning_rate": 2.8127370005209397e-05, "loss": 0.1205, "step": 28821 }, { "epoch": 0.514072700032105, "grad_norm": 0.29706260561943054, "learning_rate": 2.8125825718250627e-05, "loss": 0.1731, "step": 28822 }, { "epoch": 0.5140905361538187, "grad_norm": 0.3536146581172943, "learning_rate": 2.8124281419175073e-05, "loss": 0.1269, "step": 28823 }, { "epoch": 0.5141083722755324, "grad_norm": 0.2389230877161026, "learning_rate": 2.8122737107988727e-05, "loss": 0.125, "step": 28824 }, { "epoch": 0.5141262083972461, "grad_norm": 0.33298254013061523, "learning_rate": 2.8121192784697576e-05, "loss": 0.1626, "step": 28825 }, { "epoch": 0.5141440445189598, "grad_norm": 0.3502786457538605, "learning_rate": 2.8119648449307596e-05, "loss": 0.1093, "step": 28826 }, { "epoch": 0.5141618806406735, "grad_norm": 0.18796348571777344, "learning_rate": 2.8118104101824784e-05, "loss": 0.1098, "step": 28827 }, { "epoch": 0.5141797167623872, "grad_norm": 0.23002715408802032, "learning_rate": 2.8116559742255123e-05, "loss": 0.1131, "step": 28828 }, { "epoch": 0.5141975528841009, "grad_norm": 0.24341636896133423, "learning_rate": 2.8115015370604608e-05, "loss": 0.1394, "step": 28829 }, { "epoch": 0.5142153890058145, "grad_norm": 0.365753173828125, "learning_rate": 2.8113470986879207e-05, "loss": 0.1331, "step": 28830 }, { "epoch": 0.5142332251275282, "grad_norm": 0.2563750147819519, "learning_rate": 2.8111926591084926e-05, "loss": 0.1522, "step": 28831 }, { "epoch": 0.5142510612492419, "grad_norm": 0.2598399519920349, "learning_rate": 2.8110382183227736e-05, "loss": 0.1143, "step": 28832 }, { "epoch": 0.5142688973709557, "grad_norm": 0.3067164123058319, "learning_rate": 2.810883776331364e-05, "loss": 0.1051, "step": 28833 }, { "epoch": 0.5142867334926694, "grad_norm": 0.20387589931488037, "learning_rate": 2.8107293331348605e-05, "loss": 0.1195, "step": 28834 }, { "epoch": 0.5143045696143831, "grad_norm": 0.21908961236476898, "learning_rate": 2.810574888733863e-05, "loss": 0.1221, "step": 28835 }, { "epoch": 0.5143224057360968, "grad_norm": 0.3255521357059479, "learning_rate": 2.81042044312897e-05, "loss": 0.174, "step": 28836 }, { "epoch": 0.5143402418578105, "grad_norm": 0.24000081419944763, "learning_rate": 2.81026599632078e-05, "loss": 0.1527, "step": 28837 }, { "epoch": 0.5143580779795242, "grad_norm": 0.24934221804141998, "learning_rate": 2.810111548309892e-05, "loss": 0.1142, "step": 28838 }, { "epoch": 0.5143759141012378, "grad_norm": 0.28087320923805237, "learning_rate": 2.809957099096905e-05, "loss": 0.153, "step": 28839 }, { "epoch": 0.5143937502229515, "grad_norm": 0.269821435213089, "learning_rate": 2.8098026486824165e-05, "loss": 0.1001, "step": 28840 }, { "epoch": 0.5144115863446652, "grad_norm": 0.24234291911125183, "learning_rate": 2.8096481970670256e-05, "loss": 0.161, "step": 28841 }, { "epoch": 0.5144294224663789, "grad_norm": 0.22775107622146606, "learning_rate": 2.8094937442513324e-05, "loss": 0.1224, "step": 28842 }, { "epoch": 0.5144472585880926, "grad_norm": 0.332675039768219, "learning_rate": 2.809339290235934e-05, "loss": 0.2071, "step": 28843 }, { "epoch": 0.5144650947098063, "grad_norm": 0.18648183345794678, "learning_rate": 2.80918483502143e-05, "loss": 0.1092, "step": 28844 }, { "epoch": 0.51448293083152, "grad_norm": 0.24979731440544128, "learning_rate": 2.8090303786084183e-05, "loss": 0.1822, "step": 28845 }, { "epoch": 0.5145007669532337, "grad_norm": 0.25101134181022644, "learning_rate": 2.8088759209974984e-05, "loss": 0.1212, "step": 28846 }, { "epoch": 0.5145186030749473, "grad_norm": 0.25630924105644226, "learning_rate": 2.8087214621892687e-05, "loss": 0.2035, "step": 28847 }, { "epoch": 0.514536439196661, "grad_norm": 0.23433223366737366, "learning_rate": 2.808567002184328e-05, "loss": 0.1194, "step": 28848 }, { "epoch": 0.5145542753183747, "grad_norm": 0.2651732265949249, "learning_rate": 2.808412540983275e-05, "loss": 0.1289, "step": 28849 }, { "epoch": 0.5145721114400885, "grad_norm": 0.29305675625801086, "learning_rate": 2.8082580785867085e-05, "loss": 0.2218, "step": 28850 }, { "epoch": 0.5145899475618022, "grad_norm": 0.24327361583709717, "learning_rate": 2.808103614995228e-05, "loss": 0.116, "step": 28851 }, { "epoch": 0.5146077836835159, "grad_norm": 0.3182009756565094, "learning_rate": 2.80794915020943e-05, "loss": 0.1883, "step": 28852 }, { "epoch": 0.5146256198052296, "grad_norm": 0.21386811137199402, "learning_rate": 2.8077946842299158e-05, "loss": 0.1179, "step": 28853 }, { "epoch": 0.5146434559269433, "grad_norm": 0.28595349192619324, "learning_rate": 2.8076402170572825e-05, "loss": 0.1424, "step": 28854 }, { "epoch": 0.514661292048657, "grad_norm": 0.3199631869792938, "learning_rate": 2.8074857486921298e-05, "loss": 0.1098, "step": 28855 }, { "epoch": 0.5146791281703706, "grad_norm": 0.21425190567970276, "learning_rate": 2.8073312791350554e-05, "loss": 0.0952, "step": 28856 }, { "epoch": 0.5146969642920843, "grad_norm": 0.20657610893249512, "learning_rate": 2.80717680838666e-05, "loss": 0.0961, "step": 28857 }, { "epoch": 0.514714800413798, "grad_norm": 0.23270902037620544, "learning_rate": 2.80702233644754e-05, "loss": 0.132, "step": 28858 }, { "epoch": 0.5147326365355117, "grad_norm": 0.29292598366737366, "learning_rate": 2.8068678633182967e-05, "loss": 0.1006, "step": 28859 }, { "epoch": 0.5147504726572254, "grad_norm": 0.36392742395401, "learning_rate": 2.8067133889995273e-05, "loss": 0.1672, "step": 28860 }, { "epoch": 0.5147683087789391, "grad_norm": 0.2515296936035156, "learning_rate": 2.8065589134918296e-05, "loss": 0.1099, "step": 28861 }, { "epoch": 0.5147861449006528, "grad_norm": 0.46453461050987244, "learning_rate": 2.806404436795805e-05, "loss": 0.1492, "step": 28862 }, { "epoch": 0.5148039810223665, "grad_norm": 0.28568825125694275, "learning_rate": 2.80624995891205e-05, "loss": 0.1408, "step": 28863 }, { "epoch": 0.5148218171440802, "grad_norm": 0.21573366224765778, "learning_rate": 2.8060954798411656e-05, "loss": 0.1344, "step": 28864 }, { "epoch": 0.5148396532657938, "grad_norm": 0.3369169235229492, "learning_rate": 2.8059409995837482e-05, "loss": 0.1965, "step": 28865 }, { "epoch": 0.5148574893875075, "grad_norm": 0.3836381733417511, "learning_rate": 2.8057865181403987e-05, "loss": 0.124, "step": 28866 }, { "epoch": 0.5148753255092213, "grad_norm": 0.2967154085636139, "learning_rate": 2.8056320355117143e-05, "loss": 0.1414, "step": 28867 }, { "epoch": 0.514893161630935, "grad_norm": 0.22564348578453064, "learning_rate": 2.8054775516982944e-05, "loss": 0.0991, "step": 28868 }, { "epoch": 0.5149109977526487, "grad_norm": 0.3808143734931946, "learning_rate": 2.8053230667007387e-05, "loss": 0.2282, "step": 28869 }, { "epoch": 0.5149288338743624, "grad_norm": 0.2590887248516083, "learning_rate": 2.8051685805196455e-05, "loss": 0.1223, "step": 28870 }, { "epoch": 0.5149466699960761, "grad_norm": 0.30746832489967346, "learning_rate": 2.805014093155613e-05, "loss": 0.1409, "step": 28871 }, { "epoch": 0.5149645061177898, "grad_norm": 0.2892451286315918, "learning_rate": 2.8048596046092406e-05, "loss": 0.1193, "step": 28872 }, { "epoch": 0.5149823422395035, "grad_norm": 0.2334245890378952, "learning_rate": 2.8047051148811275e-05, "loss": 0.0944, "step": 28873 }, { "epoch": 0.5150001783612171, "grad_norm": 0.22783909738063812, "learning_rate": 2.8045506239718717e-05, "loss": 0.1899, "step": 28874 }, { "epoch": 0.5150180144829308, "grad_norm": 0.21311908960342407, "learning_rate": 2.8043961318820722e-05, "loss": 0.1148, "step": 28875 }, { "epoch": 0.5150358506046445, "grad_norm": 0.268890380859375, "learning_rate": 2.8042416386123284e-05, "loss": 0.1612, "step": 28876 }, { "epoch": 0.5150536867263582, "grad_norm": 0.4682801365852356, "learning_rate": 2.8040871441632393e-05, "loss": 0.1884, "step": 28877 }, { "epoch": 0.5150715228480719, "grad_norm": 0.27963343262672424, "learning_rate": 2.8039326485354028e-05, "loss": 0.153, "step": 28878 }, { "epoch": 0.5150893589697856, "grad_norm": 0.21740885078907013, "learning_rate": 2.8037781517294194e-05, "loss": 0.084, "step": 28879 }, { "epoch": 0.5151071950914993, "grad_norm": 0.2583710849285126, "learning_rate": 2.803623653745886e-05, "loss": 0.1095, "step": 28880 }, { "epoch": 0.515125031213213, "grad_norm": 0.22081707417964935, "learning_rate": 2.803469154585403e-05, "loss": 0.1078, "step": 28881 }, { "epoch": 0.5151428673349266, "grad_norm": 0.22243289649486542, "learning_rate": 2.8033146542485683e-05, "loss": 0.1812, "step": 28882 }, { "epoch": 0.5151607034566403, "grad_norm": 0.2706349492073059, "learning_rate": 2.8031601527359813e-05, "loss": 0.1602, "step": 28883 }, { "epoch": 0.5151785395783541, "grad_norm": 0.22824732959270477, "learning_rate": 2.8030056500482404e-05, "loss": 0.1276, "step": 28884 }, { "epoch": 0.5151963757000678, "grad_norm": 0.186862051486969, "learning_rate": 2.8028511461859457e-05, "loss": 0.1105, "step": 28885 }, { "epoch": 0.5152142118217815, "grad_norm": 0.27251294255256653, "learning_rate": 2.802696641149696e-05, "loss": 0.1169, "step": 28886 }, { "epoch": 0.5152320479434952, "grad_norm": 0.3063884377479553, "learning_rate": 2.8025421349400883e-05, "loss": 0.1421, "step": 28887 }, { "epoch": 0.5152498840652089, "grad_norm": 0.22104158997535706, "learning_rate": 2.802387627557724e-05, "loss": 0.1263, "step": 28888 }, { "epoch": 0.5152677201869226, "grad_norm": 0.3139147162437439, "learning_rate": 2.8022331190031987e-05, "loss": 0.2088, "step": 28889 }, { "epoch": 0.5152855563086363, "grad_norm": 0.2928353250026703, "learning_rate": 2.802078609277115e-05, "loss": 0.1138, "step": 28890 }, { "epoch": 0.51530339243035, "grad_norm": 0.35080447793006897, "learning_rate": 2.8019240983800698e-05, "loss": 0.1461, "step": 28891 }, { "epoch": 0.5153212285520636, "grad_norm": 0.26810023188591003, "learning_rate": 2.8017695863126635e-05, "loss": 0.0917, "step": 28892 }, { "epoch": 0.5153390646737773, "grad_norm": 0.2373628467321396, "learning_rate": 2.8016150730754932e-05, "loss": 0.1108, "step": 28893 }, { "epoch": 0.515356900795491, "grad_norm": 0.3014053702354431, "learning_rate": 2.801460558669159e-05, "loss": 0.1879, "step": 28894 }, { "epoch": 0.5153747369172047, "grad_norm": 0.2637397348880768, "learning_rate": 2.801306043094259e-05, "loss": 0.1452, "step": 28895 }, { "epoch": 0.5153925730389184, "grad_norm": 0.28406721353530884, "learning_rate": 2.8011515263513927e-05, "loss": 0.1463, "step": 28896 }, { "epoch": 0.5154104091606321, "grad_norm": 0.24499158561229706, "learning_rate": 2.8009970084411596e-05, "loss": 0.1317, "step": 28897 }, { "epoch": 0.5154282452823458, "grad_norm": 0.30253899097442627, "learning_rate": 2.8008424893641582e-05, "loss": 0.1543, "step": 28898 }, { "epoch": 0.5154460814040595, "grad_norm": 0.47803977131843567, "learning_rate": 2.8006879691209877e-05, "loss": 0.1407, "step": 28899 }, { "epoch": 0.5154639175257731, "grad_norm": 0.24904556572437286, "learning_rate": 2.800533447712246e-05, "loss": 0.1447, "step": 28900 }, { "epoch": 0.5154817536474869, "grad_norm": 0.2349046766757965, "learning_rate": 2.8003789251385333e-05, "loss": 0.1567, "step": 28901 }, { "epoch": 0.5154995897692006, "grad_norm": 0.24299566447734833, "learning_rate": 2.8002244014004475e-05, "loss": 0.114, "step": 28902 }, { "epoch": 0.5155174258909143, "grad_norm": 0.2111862152814865, "learning_rate": 2.800069876498589e-05, "loss": 0.1045, "step": 28903 }, { "epoch": 0.515535262012628, "grad_norm": 0.2480131983757019, "learning_rate": 2.7999153504335552e-05, "loss": 0.0966, "step": 28904 }, { "epoch": 0.5155530981343417, "grad_norm": 0.29171517491340637, "learning_rate": 2.7997608232059468e-05, "loss": 0.2029, "step": 28905 }, { "epoch": 0.5155709342560554, "grad_norm": 0.25620126724243164, "learning_rate": 2.7996062948163616e-05, "loss": 0.1987, "step": 28906 }, { "epoch": 0.5155887703777691, "grad_norm": 0.26390090584754944, "learning_rate": 2.799451765265399e-05, "loss": 0.1601, "step": 28907 }, { "epoch": 0.5156066064994828, "grad_norm": 0.3143174350261688, "learning_rate": 2.7992972345536572e-05, "loss": 0.1274, "step": 28908 }, { "epoch": 0.5156244426211964, "grad_norm": 0.3383817672729492, "learning_rate": 2.799142702681737e-05, "loss": 0.1601, "step": 28909 }, { "epoch": 0.5156422787429101, "grad_norm": 0.3445188105106354, "learning_rate": 2.7989881696502357e-05, "loss": 0.1802, "step": 28910 }, { "epoch": 0.5156601148646238, "grad_norm": 0.22919434309005737, "learning_rate": 2.798833635459753e-05, "loss": 0.0902, "step": 28911 }, { "epoch": 0.5156779509863375, "grad_norm": 0.2137250304222107, "learning_rate": 2.7986791001108888e-05, "loss": 0.1091, "step": 28912 }, { "epoch": 0.5156957871080512, "grad_norm": 0.24877431988716125, "learning_rate": 2.79852456360424e-05, "loss": 0.158, "step": 28913 }, { "epoch": 0.5157136232297649, "grad_norm": 0.2536190152168274, "learning_rate": 2.798370025940408e-05, "loss": 0.1133, "step": 28914 }, { "epoch": 0.5157314593514786, "grad_norm": 0.2222224920988083, "learning_rate": 2.7982154871199894e-05, "loss": 0.1428, "step": 28915 }, { "epoch": 0.5157492954731923, "grad_norm": 0.33202672004699707, "learning_rate": 2.7980609471435854e-05, "loss": 0.1561, "step": 28916 }, { "epoch": 0.5157671315949061, "grad_norm": 0.46549445390701294, "learning_rate": 2.797906406011794e-05, "loss": 0.1488, "step": 28917 }, { "epoch": 0.5157849677166197, "grad_norm": 0.18265798687934875, "learning_rate": 2.7977518637252146e-05, "loss": 0.1164, "step": 28918 }, { "epoch": 0.5158028038383334, "grad_norm": 0.25831854343414307, "learning_rate": 2.7975973202844463e-05, "loss": 0.1553, "step": 28919 }, { "epoch": 0.5158206399600471, "grad_norm": 0.2652304470539093, "learning_rate": 2.7974427756900878e-05, "loss": 0.1064, "step": 28920 }, { "epoch": 0.5158384760817608, "grad_norm": 0.258872389793396, "learning_rate": 2.7972882299427382e-05, "loss": 0.1748, "step": 28921 }, { "epoch": 0.5158563122034745, "grad_norm": 0.26258233189582825, "learning_rate": 2.7971336830429973e-05, "loss": 0.1822, "step": 28922 }, { "epoch": 0.5158741483251882, "grad_norm": 0.27374476194381714, "learning_rate": 2.796979134991463e-05, "loss": 0.1309, "step": 28923 }, { "epoch": 0.5158919844469019, "grad_norm": 0.31991007924079895, "learning_rate": 2.7968245857887353e-05, "loss": 0.1804, "step": 28924 }, { "epoch": 0.5159098205686156, "grad_norm": 0.21791322529315948, "learning_rate": 2.796670035435413e-05, "loss": 0.1494, "step": 28925 }, { "epoch": 0.5159276566903293, "grad_norm": 0.28283265233039856, "learning_rate": 2.796515483932095e-05, "loss": 0.1455, "step": 28926 }, { "epoch": 0.5159454928120429, "grad_norm": 0.34426149725914, "learning_rate": 2.7963609312793813e-05, "loss": 0.1569, "step": 28927 }, { "epoch": 0.5159633289337566, "grad_norm": 0.27180472016334534, "learning_rate": 2.7962063774778696e-05, "loss": 0.1472, "step": 28928 }, { "epoch": 0.5159811650554703, "grad_norm": 0.29504522681236267, "learning_rate": 2.7960518225281603e-05, "loss": 0.1326, "step": 28929 }, { "epoch": 0.515999001177184, "grad_norm": 0.38925954699516296, "learning_rate": 2.7958972664308507e-05, "loss": 0.1284, "step": 28930 }, { "epoch": 0.5160168372988977, "grad_norm": 0.21360723674297333, "learning_rate": 2.7957427091865417e-05, "loss": 0.0769, "step": 28931 }, { "epoch": 0.5160346734206114, "grad_norm": 0.22349615395069122, "learning_rate": 2.7955881507958316e-05, "loss": 0.1205, "step": 28932 }, { "epoch": 0.5160525095423251, "grad_norm": 0.2895841598510742, "learning_rate": 2.7954335912593206e-05, "loss": 0.1502, "step": 28933 }, { "epoch": 0.5160703456640389, "grad_norm": 0.22992043197155, "learning_rate": 2.7952790305776062e-05, "loss": 0.1245, "step": 28934 }, { "epoch": 0.5160881817857526, "grad_norm": 0.3140040934085846, "learning_rate": 2.7951244687512884e-05, "loss": 0.1358, "step": 28935 }, { "epoch": 0.5161060179074662, "grad_norm": 0.3326644003391266, "learning_rate": 2.7949699057809665e-05, "loss": 0.1577, "step": 28936 }, { "epoch": 0.5161238540291799, "grad_norm": 0.3024000823497772, "learning_rate": 2.794815341667239e-05, "loss": 0.1246, "step": 28937 }, { "epoch": 0.5161416901508936, "grad_norm": 0.2273421734571457, "learning_rate": 2.7946607764107046e-05, "loss": 0.1294, "step": 28938 }, { "epoch": 0.5161595262726073, "grad_norm": 0.27051490545272827, "learning_rate": 2.7945062100119644e-05, "loss": 0.1231, "step": 28939 }, { "epoch": 0.516177362394321, "grad_norm": 0.2705560624599457, "learning_rate": 2.7943516424716172e-05, "loss": 0.1417, "step": 28940 }, { "epoch": 0.5161951985160347, "grad_norm": 0.21922354400157928, "learning_rate": 2.79419707379026e-05, "loss": 0.1094, "step": 28941 }, { "epoch": 0.5162130346377484, "grad_norm": 0.4108131527900696, "learning_rate": 2.7940425039684943e-05, "loss": 0.1551, "step": 28942 }, { "epoch": 0.5162308707594621, "grad_norm": 0.24285483360290527, "learning_rate": 2.7938879330069173e-05, "loss": 0.1516, "step": 28943 }, { "epoch": 0.5162487068811757, "grad_norm": 0.23992125689983368, "learning_rate": 2.793733360906129e-05, "loss": 0.1225, "step": 28944 }, { "epoch": 0.5162665430028894, "grad_norm": 0.27711063623428345, "learning_rate": 2.793578787666729e-05, "loss": 0.1758, "step": 28945 }, { "epoch": 0.5162843791246031, "grad_norm": 0.2801652252674103, "learning_rate": 2.7934242132893173e-05, "loss": 0.1871, "step": 28946 }, { "epoch": 0.5163022152463168, "grad_norm": 0.2829630374908447, "learning_rate": 2.793269637774491e-05, "loss": 0.1377, "step": 28947 }, { "epoch": 0.5163200513680305, "grad_norm": 0.28745269775390625, "learning_rate": 2.7931150611228507e-05, "loss": 0.154, "step": 28948 }, { "epoch": 0.5163378874897442, "grad_norm": 0.3312828242778778, "learning_rate": 2.792960483334995e-05, "loss": 0.1329, "step": 28949 }, { "epoch": 0.5163557236114579, "grad_norm": 0.27629029750823975, "learning_rate": 2.7928059044115233e-05, "loss": 0.1651, "step": 28950 }, { "epoch": 0.5163735597331717, "grad_norm": 0.22156387567520142, "learning_rate": 2.792651324353035e-05, "loss": 0.1333, "step": 28951 }, { "epoch": 0.5163913958548854, "grad_norm": 0.315434992313385, "learning_rate": 2.7924967431601285e-05, "loss": 0.1783, "step": 28952 }, { "epoch": 0.516409231976599, "grad_norm": 0.25512686371803284, "learning_rate": 2.7923421608334043e-05, "loss": 0.1461, "step": 28953 }, { "epoch": 0.5164270680983127, "grad_norm": 0.21157903969287872, "learning_rate": 2.79218757737346e-05, "loss": 0.099, "step": 28954 }, { "epoch": 0.5164449042200264, "grad_norm": 0.547171950340271, "learning_rate": 2.792032992780897e-05, "loss": 0.1832, "step": 28955 }, { "epoch": 0.5164627403417401, "grad_norm": 0.30515196919441223, "learning_rate": 2.791878407056312e-05, "loss": 0.1639, "step": 28956 }, { "epoch": 0.5164805764634538, "grad_norm": 0.3291718661785126, "learning_rate": 2.7917238202003065e-05, "loss": 0.1372, "step": 28957 }, { "epoch": 0.5164984125851675, "grad_norm": 0.2584816813468933, "learning_rate": 2.7915692322134777e-05, "loss": 0.1391, "step": 28958 }, { "epoch": 0.5165162487068812, "grad_norm": 0.27601930499076843, "learning_rate": 2.7914146430964265e-05, "loss": 0.1998, "step": 28959 }, { "epoch": 0.5165340848285949, "grad_norm": 0.20140008628368378, "learning_rate": 2.791260052849751e-05, "loss": 0.1309, "step": 28960 }, { "epoch": 0.5165519209503086, "grad_norm": 0.24996884167194366, "learning_rate": 2.7911054614740516e-05, "loss": 0.0593, "step": 28961 }, { "epoch": 0.5165697570720222, "grad_norm": 0.25987932085990906, "learning_rate": 2.7909508689699264e-05, "loss": 0.1437, "step": 28962 }, { "epoch": 0.5165875931937359, "grad_norm": 0.4900023937225342, "learning_rate": 2.7907962753379764e-05, "loss": 0.1584, "step": 28963 }, { "epoch": 0.5166054293154496, "grad_norm": 0.2832129895687103, "learning_rate": 2.7906416805787984e-05, "loss": 0.1665, "step": 28964 }, { "epoch": 0.5166232654371633, "grad_norm": 0.23083336651325226, "learning_rate": 2.790487084692993e-05, "loss": 0.1452, "step": 28965 }, { "epoch": 0.516641101558877, "grad_norm": 0.30606067180633545, "learning_rate": 2.7903324876811593e-05, "loss": 0.1336, "step": 28966 }, { "epoch": 0.5166589376805907, "grad_norm": 0.2836165428161621, "learning_rate": 2.7901778895438963e-05, "loss": 0.1383, "step": 28967 }, { "epoch": 0.5166767738023045, "grad_norm": 0.26837658882141113, "learning_rate": 2.790023290281804e-05, "loss": 0.1804, "step": 28968 }, { "epoch": 0.5166946099240182, "grad_norm": 0.29959067702293396, "learning_rate": 2.7898686898954813e-05, "loss": 0.1142, "step": 28969 }, { "epoch": 0.5167124460457319, "grad_norm": 0.17766804993152618, "learning_rate": 2.7897140883855276e-05, "loss": 0.1368, "step": 28970 }, { "epoch": 0.5167302821674455, "grad_norm": 0.2631548345088959, "learning_rate": 2.7895594857525413e-05, "loss": 0.1992, "step": 28971 }, { "epoch": 0.5167481182891592, "grad_norm": 0.21336770057678223, "learning_rate": 2.789404881997123e-05, "loss": 0.0953, "step": 28972 }, { "epoch": 0.5167659544108729, "grad_norm": 0.24261608719825745, "learning_rate": 2.7892502771198707e-05, "loss": 0.153, "step": 28973 }, { "epoch": 0.5167837905325866, "grad_norm": 0.25070300698280334, "learning_rate": 2.7890956711213854e-05, "loss": 0.1566, "step": 28974 }, { "epoch": 0.5168016266543003, "grad_norm": 0.1853751540184021, "learning_rate": 2.788941064002265e-05, "loss": 0.1083, "step": 28975 }, { "epoch": 0.516819462776014, "grad_norm": 0.2470073103904724, "learning_rate": 2.78878645576311e-05, "loss": 0.1427, "step": 28976 }, { "epoch": 0.5168372988977277, "grad_norm": 0.35444435477256775, "learning_rate": 2.7886318464045174e-05, "loss": 0.1034, "step": 28977 }, { "epoch": 0.5168551350194414, "grad_norm": 0.23380325734615326, "learning_rate": 2.7884772359270893e-05, "loss": 0.1285, "step": 28978 }, { "epoch": 0.516872971141155, "grad_norm": 0.22492867708206177, "learning_rate": 2.7883226243314238e-05, "loss": 0.1235, "step": 28979 }, { "epoch": 0.5168908072628687, "grad_norm": 0.2778223156929016, "learning_rate": 2.788168011618119e-05, "loss": 0.119, "step": 28980 }, { "epoch": 0.5169086433845824, "grad_norm": 0.22704172134399414, "learning_rate": 2.788013397787777e-05, "loss": 0.0989, "step": 28981 }, { "epoch": 0.5169264795062961, "grad_norm": 0.242509663105011, "learning_rate": 2.7878587828409948e-05, "loss": 0.1184, "step": 28982 }, { "epoch": 0.5169443156280098, "grad_norm": 0.2854163944721222, "learning_rate": 2.7877041667783726e-05, "loss": 0.1994, "step": 28983 }, { "epoch": 0.5169621517497235, "grad_norm": 0.22003023326396942, "learning_rate": 2.7875495496005093e-05, "loss": 0.137, "step": 28984 }, { "epoch": 0.5169799878714373, "grad_norm": 0.20889660716056824, "learning_rate": 2.7873949313080055e-05, "loss": 0.1318, "step": 28985 }, { "epoch": 0.516997823993151, "grad_norm": 0.2641153335571289, "learning_rate": 2.787240311901459e-05, "loss": 0.1394, "step": 28986 }, { "epoch": 0.5170156601148647, "grad_norm": 0.23657356202602386, "learning_rate": 2.7870856913814703e-05, "loss": 0.0823, "step": 28987 }, { "epoch": 0.5170334962365783, "grad_norm": 0.27231404185295105, "learning_rate": 2.786931069748638e-05, "loss": 0.1608, "step": 28988 }, { "epoch": 0.517051332358292, "grad_norm": 0.31724461913108826, "learning_rate": 2.7867764470035617e-05, "loss": 0.1265, "step": 28989 }, { "epoch": 0.5170691684800057, "grad_norm": 0.27262791991233826, "learning_rate": 2.786621823146841e-05, "loss": 0.1516, "step": 28990 }, { "epoch": 0.5170870046017194, "grad_norm": 0.18548011779785156, "learning_rate": 2.786467198179075e-05, "loss": 0.0942, "step": 28991 }, { "epoch": 0.5171048407234331, "grad_norm": 0.25160813331604004, "learning_rate": 2.7863125721008637e-05, "loss": 0.1057, "step": 28992 }, { "epoch": 0.5171226768451468, "grad_norm": 0.20386798679828644, "learning_rate": 2.786157944912805e-05, "loss": 0.099, "step": 28993 }, { "epoch": 0.5171405129668605, "grad_norm": 0.28697773814201355, "learning_rate": 2.7860033166155004e-05, "loss": 0.1494, "step": 28994 }, { "epoch": 0.5171583490885742, "grad_norm": 0.3194122910499573, "learning_rate": 2.785848687209547e-05, "loss": 0.1452, "step": 28995 }, { "epoch": 0.5171761852102879, "grad_norm": 0.2933448553085327, "learning_rate": 2.7856940566955463e-05, "loss": 0.1454, "step": 28996 }, { "epoch": 0.5171940213320015, "grad_norm": 0.2207578867673874, "learning_rate": 2.785539425074096e-05, "loss": 0.1257, "step": 28997 }, { "epoch": 0.5172118574537152, "grad_norm": 0.258308470249176, "learning_rate": 2.7853847923457975e-05, "loss": 0.1435, "step": 28998 }, { "epoch": 0.5172296935754289, "grad_norm": 0.20282821357250214, "learning_rate": 2.7852301585112478e-05, "loss": 0.1141, "step": 28999 }, { "epoch": 0.5172475296971426, "grad_norm": 0.2658439874649048, "learning_rate": 2.7850755235710473e-05, "loss": 0.1725, "step": 29000 }, { "epoch": 0.5172475296971426, "eval_loss": 0.13551442325115204, "eval_runtime": 106.861, "eval_samples_per_second": 9.583, "eval_steps_per_second": 1.6, "step": 29000 }, { "epoch": 0.5172653658188563, "grad_norm": 0.24678544700145721, "learning_rate": 2.7849208875257965e-05, "loss": 0.141, "step": 29001 }, { "epoch": 0.5172832019405701, "grad_norm": 0.3237248957157135, "learning_rate": 2.7847662503760937e-05, "loss": 0.1341, "step": 29002 }, { "epoch": 0.5173010380622838, "grad_norm": 0.21365462243556976, "learning_rate": 2.7846116121225385e-05, "loss": 0.1237, "step": 29003 }, { "epoch": 0.5173188741839975, "grad_norm": 0.3641394376754761, "learning_rate": 2.7844569727657304e-05, "loss": 0.1521, "step": 29004 }, { "epoch": 0.5173367103057112, "grad_norm": 0.49921318888664246, "learning_rate": 2.784302332306269e-05, "loss": 0.178, "step": 29005 }, { "epoch": 0.5173545464274248, "grad_norm": 0.23799487948417664, "learning_rate": 2.7841476907447534e-05, "loss": 0.1186, "step": 29006 }, { "epoch": 0.5173723825491385, "grad_norm": 0.27667132019996643, "learning_rate": 2.783993048081783e-05, "loss": 0.1007, "step": 29007 }, { "epoch": 0.5173902186708522, "grad_norm": 0.29321256279945374, "learning_rate": 2.7838384043179572e-05, "loss": 0.1526, "step": 29008 }, { "epoch": 0.5174080547925659, "grad_norm": 0.2686637043952942, "learning_rate": 2.783683759453876e-05, "loss": 0.1477, "step": 29009 }, { "epoch": 0.5174258909142796, "grad_norm": 0.36251071095466614, "learning_rate": 2.7835291134901387e-05, "loss": 0.1421, "step": 29010 }, { "epoch": 0.5174437270359933, "grad_norm": 0.39520522952079773, "learning_rate": 2.7833744664273454e-05, "loss": 0.1732, "step": 29011 }, { "epoch": 0.517461563157707, "grad_norm": 0.2549844980239868, "learning_rate": 2.7832198182660936e-05, "loss": 0.154, "step": 29012 }, { "epoch": 0.5174793992794207, "grad_norm": 0.21941344439983368, "learning_rate": 2.7830651690069848e-05, "loss": 0.1374, "step": 29013 }, { "epoch": 0.5174972354011343, "grad_norm": 0.2945975065231323, "learning_rate": 2.7829105186506167e-05, "loss": 0.1344, "step": 29014 }, { "epoch": 0.517515071522848, "grad_norm": 0.25093361735343933, "learning_rate": 2.7827558671975902e-05, "loss": 0.0928, "step": 29015 }, { "epoch": 0.5175329076445617, "grad_norm": 0.2622643709182739, "learning_rate": 2.782601214648504e-05, "loss": 0.1334, "step": 29016 }, { "epoch": 0.5175507437662754, "grad_norm": 0.30701369047164917, "learning_rate": 2.7824465610039585e-05, "loss": 0.1423, "step": 29017 }, { "epoch": 0.5175685798879892, "grad_norm": 0.2798667252063751, "learning_rate": 2.7822919062645526e-05, "loss": 0.1538, "step": 29018 }, { "epoch": 0.5175864160097029, "grad_norm": 0.3005349636077881, "learning_rate": 2.7821372504308852e-05, "loss": 0.1775, "step": 29019 }, { "epoch": 0.5176042521314166, "grad_norm": 0.2491803914308548, "learning_rate": 2.7819825935035567e-05, "loss": 0.0998, "step": 29020 }, { "epoch": 0.5176220882531303, "grad_norm": 0.3449251651763916, "learning_rate": 2.7818279354831656e-05, "loss": 0.147, "step": 29021 }, { "epoch": 0.517639924374844, "grad_norm": 0.33878713846206665, "learning_rate": 2.781673276370313e-05, "loss": 0.1641, "step": 29022 }, { "epoch": 0.5176577604965577, "grad_norm": 0.23291771113872528, "learning_rate": 2.781518616165597e-05, "loss": 0.141, "step": 29023 }, { "epoch": 0.5176755966182713, "grad_norm": 0.24511079490184784, "learning_rate": 2.781363954869618e-05, "loss": 0.1458, "step": 29024 }, { "epoch": 0.517693432739985, "grad_norm": 0.26316049695014954, "learning_rate": 2.7812092924829748e-05, "loss": 0.131, "step": 29025 }, { "epoch": 0.5177112688616987, "grad_norm": 0.24760164320468903, "learning_rate": 2.781054629006268e-05, "loss": 0.081, "step": 29026 }, { "epoch": 0.5177291049834124, "grad_norm": 0.3154909610748291, "learning_rate": 2.780899964440095e-05, "loss": 0.1492, "step": 29027 }, { "epoch": 0.5177469411051261, "grad_norm": 0.33660760521888733, "learning_rate": 2.7807452987850575e-05, "loss": 0.1145, "step": 29028 }, { "epoch": 0.5177647772268398, "grad_norm": 0.1884072870016098, "learning_rate": 2.780590632041754e-05, "loss": 0.1326, "step": 29029 }, { "epoch": 0.5177826133485535, "grad_norm": 0.2741774618625641, "learning_rate": 2.7804359642107846e-05, "loss": 0.0649, "step": 29030 }, { "epoch": 0.5178004494702672, "grad_norm": 0.2877926528453827, "learning_rate": 2.7802812952927492e-05, "loss": 0.1806, "step": 29031 }, { "epoch": 0.5178182855919808, "grad_norm": 0.20064689218997955, "learning_rate": 2.7801266252882457e-05, "loss": 0.1136, "step": 29032 }, { "epoch": 0.5178361217136945, "grad_norm": 0.32631775736808777, "learning_rate": 2.7799719541978754e-05, "loss": 0.125, "step": 29033 }, { "epoch": 0.5178539578354082, "grad_norm": 0.30685681104660034, "learning_rate": 2.7798172820222363e-05, "loss": 0.2023, "step": 29034 }, { "epoch": 0.517871793957122, "grad_norm": 0.25200074911117554, "learning_rate": 2.779662608761929e-05, "loss": 0.1331, "step": 29035 }, { "epoch": 0.5178896300788357, "grad_norm": 0.2657131254673004, "learning_rate": 2.7795079344175524e-05, "loss": 0.1312, "step": 29036 }, { "epoch": 0.5179074662005494, "grad_norm": 0.19913235306739807, "learning_rate": 2.7793532589897077e-05, "loss": 0.0629, "step": 29037 }, { "epoch": 0.5179253023222631, "grad_norm": 0.25144246220588684, "learning_rate": 2.7791985824789923e-05, "loss": 0.1778, "step": 29038 }, { "epoch": 0.5179431384439768, "grad_norm": 0.29484477639198303, "learning_rate": 2.7790439048860073e-05, "loss": 0.1781, "step": 29039 }, { "epoch": 0.5179609745656905, "grad_norm": 0.33371952176094055, "learning_rate": 2.7788892262113514e-05, "loss": 0.1563, "step": 29040 }, { "epoch": 0.5179788106874041, "grad_norm": 0.19438757002353668, "learning_rate": 2.7787345464556247e-05, "loss": 0.122, "step": 29041 }, { "epoch": 0.5179966468091178, "grad_norm": 0.20084218680858612, "learning_rate": 2.7785798656194258e-05, "loss": 0.1433, "step": 29042 }, { "epoch": 0.5180144829308315, "grad_norm": 0.27779170870780945, "learning_rate": 2.7784251837033564e-05, "loss": 0.1443, "step": 29043 }, { "epoch": 0.5180323190525452, "grad_norm": 0.2793269157409668, "learning_rate": 2.7782705007080146e-05, "loss": 0.1246, "step": 29044 }, { "epoch": 0.5180501551742589, "grad_norm": 0.28280022740364075, "learning_rate": 2.7781158166339997e-05, "loss": 0.1882, "step": 29045 }, { "epoch": 0.5180679912959726, "grad_norm": 0.262104868888855, "learning_rate": 2.7779611314819125e-05, "loss": 0.17, "step": 29046 }, { "epoch": 0.5180858274176863, "grad_norm": 0.30261287093162537, "learning_rate": 2.7778064452523512e-05, "loss": 0.1248, "step": 29047 }, { "epoch": 0.5181036635394, "grad_norm": 0.2441805750131607, "learning_rate": 2.777651757945916e-05, "loss": 0.1328, "step": 29048 }, { "epoch": 0.5181214996611136, "grad_norm": 0.3091019093990326, "learning_rate": 2.7774970695632075e-05, "loss": 0.1663, "step": 29049 }, { "epoch": 0.5181393357828273, "grad_norm": 0.255980908870697, "learning_rate": 2.7773423801048244e-05, "loss": 0.1682, "step": 29050 }, { "epoch": 0.518157171904541, "grad_norm": 0.28301292657852173, "learning_rate": 2.777187689571366e-05, "loss": 0.1742, "step": 29051 }, { "epoch": 0.5181750080262548, "grad_norm": 0.23590096831321716, "learning_rate": 2.777032997963433e-05, "loss": 0.1414, "step": 29052 }, { "epoch": 0.5181928441479685, "grad_norm": 0.26343271136283875, "learning_rate": 2.776878305281624e-05, "loss": 0.147, "step": 29053 }, { "epoch": 0.5182106802696822, "grad_norm": 0.21160916984081268, "learning_rate": 2.7767236115265395e-05, "loss": 0.1092, "step": 29054 }, { "epoch": 0.5182285163913959, "grad_norm": 0.25611087679862976, "learning_rate": 2.776568916698778e-05, "loss": 0.143, "step": 29055 }, { "epoch": 0.5182463525131096, "grad_norm": 0.19979433715343475, "learning_rate": 2.7764142207989396e-05, "loss": 0.122, "step": 29056 }, { "epoch": 0.5182641886348233, "grad_norm": 0.288221538066864, "learning_rate": 2.776259523827625e-05, "loss": 0.1439, "step": 29057 }, { "epoch": 0.518282024756537, "grad_norm": 0.2494753897190094, "learning_rate": 2.7761048257854327e-05, "loss": 0.1135, "step": 29058 }, { "epoch": 0.5182998608782506, "grad_norm": 0.27413010597229004, "learning_rate": 2.7759501266729632e-05, "loss": 0.1447, "step": 29059 }, { "epoch": 0.5183176969999643, "grad_norm": 0.2546256184577942, "learning_rate": 2.7757954264908153e-05, "loss": 0.1294, "step": 29060 }, { "epoch": 0.518335533121678, "grad_norm": 0.23120713233947754, "learning_rate": 2.7756407252395893e-05, "loss": 0.0793, "step": 29061 }, { "epoch": 0.5183533692433917, "grad_norm": 0.23434863984584808, "learning_rate": 2.7754860229198843e-05, "loss": 0.1116, "step": 29062 }, { "epoch": 0.5183712053651054, "grad_norm": 0.21776700019836426, "learning_rate": 2.7753313195323005e-05, "loss": 0.1217, "step": 29063 }, { "epoch": 0.5183890414868191, "grad_norm": 0.261416494846344, "learning_rate": 2.7751766150774373e-05, "loss": 0.1134, "step": 29064 }, { "epoch": 0.5184068776085328, "grad_norm": 0.23069989681243896, "learning_rate": 2.775021909555895e-05, "loss": 0.1155, "step": 29065 }, { "epoch": 0.5184247137302465, "grad_norm": 0.30875062942504883, "learning_rate": 2.7748672029682722e-05, "loss": 0.1332, "step": 29066 }, { "epoch": 0.5184425498519601, "grad_norm": 0.23155012726783752, "learning_rate": 2.7747124953151694e-05, "loss": 0.1438, "step": 29067 }, { "epoch": 0.5184603859736738, "grad_norm": 0.20740477740764618, "learning_rate": 2.774557786597186e-05, "loss": 0.1139, "step": 29068 }, { "epoch": 0.5184782220953876, "grad_norm": 0.2374761551618576, "learning_rate": 2.7744030768149222e-05, "loss": 0.1396, "step": 29069 }, { "epoch": 0.5184960582171013, "grad_norm": 0.23060667514801025, "learning_rate": 2.7742483659689765e-05, "loss": 0.0981, "step": 29070 }, { "epoch": 0.518513894338815, "grad_norm": 0.27698296308517456, "learning_rate": 2.77409365405995e-05, "loss": 0.0668, "step": 29071 }, { "epoch": 0.5185317304605287, "grad_norm": 0.3301070034503937, "learning_rate": 2.773938941088442e-05, "loss": 0.103, "step": 29072 }, { "epoch": 0.5185495665822424, "grad_norm": 0.3216072618961334, "learning_rate": 2.773784227055051e-05, "loss": 0.1423, "step": 29073 }, { "epoch": 0.5185674027039561, "grad_norm": 0.22094807028770447, "learning_rate": 2.7736295119603794e-05, "loss": 0.1941, "step": 29074 }, { "epoch": 0.5185852388256698, "grad_norm": 0.2325967699289322, "learning_rate": 2.7734747958050238e-05, "loss": 0.0998, "step": 29075 }, { "epoch": 0.5186030749473834, "grad_norm": 0.22630323469638824, "learning_rate": 2.773320078589586e-05, "loss": 0.1234, "step": 29076 }, { "epoch": 0.5186209110690971, "grad_norm": 0.25922587513923645, "learning_rate": 2.773165360314665e-05, "loss": 0.1545, "step": 29077 }, { "epoch": 0.5186387471908108, "grad_norm": 0.23307378590106964, "learning_rate": 2.7730106409808615e-05, "loss": 0.1202, "step": 29078 }, { "epoch": 0.5186565833125245, "grad_norm": 0.2333347648382187, "learning_rate": 2.7728559205887735e-05, "loss": 0.1586, "step": 29079 }, { "epoch": 0.5186744194342382, "grad_norm": 0.3560292720794678, "learning_rate": 2.7727011991390022e-05, "loss": 0.1561, "step": 29080 }, { "epoch": 0.5186922555559519, "grad_norm": 0.2181674987077713, "learning_rate": 2.7725464766321467e-05, "loss": 0.138, "step": 29081 }, { "epoch": 0.5187100916776656, "grad_norm": 0.2910957932472229, "learning_rate": 2.7723917530688077e-05, "loss": 0.1424, "step": 29082 }, { "epoch": 0.5187279277993793, "grad_norm": 0.23398922383785248, "learning_rate": 2.772237028449583e-05, "loss": 0.1344, "step": 29083 }, { "epoch": 0.518745763921093, "grad_norm": 0.23091459274291992, "learning_rate": 2.7720823027750738e-05, "loss": 0.1199, "step": 29084 }, { "epoch": 0.5187636000428066, "grad_norm": 0.2950792610645294, "learning_rate": 2.7719275760458803e-05, "loss": 0.1691, "step": 29085 }, { "epoch": 0.5187814361645204, "grad_norm": 0.2599247395992279, "learning_rate": 2.771772848262601e-05, "loss": 0.1803, "step": 29086 }, { "epoch": 0.5187992722862341, "grad_norm": 0.23546212911605835, "learning_rate": 2.771618119425837e-05, "loss": 0.1322, "step": 29087 }, { "epoch": 0.5188171084079478, "grad_norm": 0.28911495208740234, "learning_rate": 2.7714633895361863e-05, "loss": 0.1238, "step": 29088 }, { "epoch": 0.5188349445296615, "grad_norm": 0.20387057960033417, "learning_rate": 2.7713086585942504e-05, "loss": 0.0752, "step": 29089 }, { "epoch": 0.5188527806513752, "grad_norm": 0.27801382541656494, "learning_rate": 2.771153926600628e-05, "loss": 0.0968, "step": 29090 }, { "epoch": 0.5188706167730889, "grad_norm": 0.2624642848968506, "learning_rate": 2.7709991935559192e-05, "loss": 0.1015, "step": 29091 }, { "epoch": 0.5188884528948026, "grad_norm": 0.25566697120666504, "learning_rate": 2.7708444594607247e-05, "loss": 0.1366, "step": 29092 }, { "epoch": 0.5189062890165163, "grad_norm": 0.2571468949317932, "learning_rate": 2.7706897243156433e-05, "loss": 0.1173, "step": 29093 }, { "epoch": 0.5189241251382299, "grad_norm": 0.32615068554878235, "learning_rate": 2.7705349881212744e-05, "loss": 0.0906, "step": 29094 }, { "epoch": 0.5189419612599436, "grad_norm": 0.30232304334640503, "learning_rate": 2.7703802508782194e-05, "loss": 0.1591, "step": 29095 }, { "epoch": 0.5189597973816573, "grad_norm": 0.2673504948616028, "learning_rate": 2.7702255125870767e-05, "loss": 0.1273, "step": 29096 }, { "epoch": 0.518977633503371, "grad_norm": 0.24881090223789215, "learning_rate": 2.770070773248446e-05, "loss": 0.1444, "step": 29097 }, { "epoch": 0.5189954696250847, "grad_norm": 0.2517539858818054, "learning_rate": 2.7699160328629286e-05, "loss": 0.1363, "step": 29098 }, { "epoch": 0.5190133057467984, "grad_norm": 0.2921959459781647, "learning_rate": 2.7697612914311233e-05, "loss": 0.2051, "step": 29099 }, { "epoch": 0.5190311418685121, "grad_norm": 0.2753399610519409, "learning_rate": 2.76960654895363e-05, "loss": 0.1304, "step": 29100 }, { "epoch": 0.5190489779902258, "grad_norm": 0.2771969139575958, "learning_rate": 2.7694518054310482e-05, "loss": 0.1829, "step": 29101 }, { "epoch": 0.5190668141119394, "grad_norm": 0.29123929142951965, "learning_rate": 2.7692970608639785e-05, "loss": 0.1062, "step": 29102 }, { "epoch": 0.5190846502336532, "grad_norm": 0.25604504346847534, "learning_rate": 2.7691423152530203e-05, "loss": 0.1249, "step": 29103 }, { "epoch": 0.5191024863553669, "grad_norm": 0.22162814438343048, "learning_rate": 2.768987568598773e-05, "loss": 0.1399, "step": 29104 }, { "epoch": 0.5191203224770806, "grad_norm": 0.18678194284439087, "learning_rate": 2.7688328209018377e-05, "loss": 0.1101, "step": 29105 }, { "epoch": 0.5191381585987943, "grad_norm": 0.217605322599411, "learning_rate": 2.7686780721628136e-05, "loss": 0.1188, "step": 29106 }, { "epoch": 0.519155994720508, "grad_norm": 0.20708641409873962, "learning_rate": 2.7685233223823e-05, "loss": 0.0879, "step": 29107 }, { "epoch": 0.5191738308422217, "grad_norm": 0.493600994348526, "learning_rate": 2.7683685715608975e-05, "loss": 0.1455, "step": 29108 }, { "epoch": 0.5191916669639354, "grad_norm": 0.3135494589805603, "learning_rate": 2.7682138196992065e-05, "loss": 0.1341, "step": 29109 }, { "epoch": 0.5192095030856491, "grad_norm": 0.3242650330066681, "learning_rate": 2.7680590667978246e-05, "loss": 0.1608, "step": 29110 }, { "epoch": 0.5192273392073627, "grad_norm": 0.2908034026622772, "learning_rate": 2.7679043128573538e-05, "loss": 0.1196, "step": 29111 }, { "epoch": 0.5192451753290764, "grad_norm": 0.24959696829319, "learning_rate": 2.767749557878393e-05, "loss": 0.079, "step": 29112 }, { "epoch": 0.5192630114507901, "grad_norm": 0.248470738530159, "learning_rate": 2.7675948018615435e-05, "loss": 0.1372, "step": 29113 }, { "epoch": 0.5192808475725038, "grad_norm": 0.3410813808441162, "learning_rate": 2.7674400448074033e-05, "loss": 0.1531, "step": 29114 }, { "epoch": 0.5192986836942175, "grad_norm": 0.18837669491767883, "learning_rate": 2.7672852867165734e-05, "loss": 0.1322, "step": 29115 }, { "epoch": 0.5193165198159312, "grad_norm": 0.34150800108909607, "learning_rate": 2.767130527589653e-05, "loss": 0.1526, "step": 29116 }, { "epoch": 0.5193343559376449, "grad_norm": 0.24078314006328583, "learning_rate": 2.7669757674272436e-05, "loss": 0.1193, "step": 29117 }, { "epoch": 0.5193521920593586, "grad_norm": 0.3139224052429199, "learning_rate": 2.766821006229942e-05, "loss": 0.1463, "step": 29118 }, { "epoch": 0.5193700281810724, "grad_norm": 0.30436229705810547, "learning_rate": 2.766666243998351e-05, "loss": 0.1565, "step": 29119 }, { "epoch": 0.519387864302786, "grad_norm": 0.20879687368869781, "learning_rate": 2.7665114807330696e-05, "loss": 0.1221, "step": 29120 }, { "epoch": 0.5194057004244997, "grad_norm": 0.2561609447002411, "learning_rate": 2.766356716434698e-05, "loss": 0.1266, "step": 29121 }, { "epoch": 0.5194235365462134, "grad_norm": 0.1767602264881134, "learning_rate": 2.766201951103835e-05, "loss": 0.0932, "step": 29122 }, { "epoch": 0.5194413726679271, "grad_norm": 0.37322792410850525, "learning_rate": 2.7660471847410817e-05, "loss": 0.1719, "step": 29123 }, { "epoch": 0.5194592087896408, "grad_norm": 0.28258487582206726, "learning_rate": 2.765892417347038e-05, "loss": 0.1231, "step": 29124 }, { "epoch": 0.5194770449113545, "grad_norm": 0.2252558022737503, "learning_rate": 2.7657376489223023e-05, "loss": 0.1084, "step": 29125 }, { "epoch": 0.5194948810330682, "grad_norm": 0.36280569434165955, "learning_rate": 2.765582879467477e-05, "loss": 0.1446, "step": 29126 }, { "epoch": 0.5195127171547819, "grad_norm": 0.25010618567466736, "learning_rate": 2.7654281089831603e-05, "loss": 0.1232, "step": 29127 }, { "epoch": 0.5195305532764956, "grad_norm": 0.3785548210144043, "learning_rate": 2.7652733374699523e-05, "loss": 0.2013, "step": 29128 }, { "epoch": 0.5195483893982092, "grad_norm": 0.23961438238620758, "learning_rate": 2.7651185649284533e-05, "loss": 0.0947, "step": 29129 }, { "epoch": 0.5195662255199229, "grad_norm": 0.24644187092781067, "learning_rate": 2.7649637913592642e-05, "loss": 0.1389, "step": 29130 }, { "epoch": 0.5195840616416366, "grad_norm": 0.34896811842918396, "learning_rate": 2.7648090167629826e-05, "loss": 0.1084, "step": 29131 }, { "epoch": 0.5196018977633503, "grad_norm": 0.30557531118392944, "learning_rate": 2.76465424114021e-05, "loss": 0.1599, "step": 29132 }, { "epoch": 0.519619733885064, "grad_norm": 0.2294258177280426, "learning_rate": 2.7644994644915457e-05, "loss": 0.1322, "step": 29133 }, { "epoch": 0.5196375700067777, "grad_norm": 0.34130120277404785, "learning_rate": 2.7643446868175916e-05, "loss": 0.2066, "step": 29134 }, { "epoch": 0.5196554061284914, "grad_norm": 0.2521969676017761, "learning_rate": 2.7641899081189447e-05, "loss": 0.1307, "step": 29135 }, { "epoch": 0.5196732422502052, "grad_norm": 0.3334324061870575, "learning_rate": 2.7640351283962073e-05, "loss": 0.1601, "step": 29136 }, { "epoch": 0.5196910783719189, "grad_norm": 0.23689663410186768, "learning_rate": 2.7638803476499793e-05, "loss": 0.1399, "step": 29137 }, { "epoch": 0.5197089144936325, "grad_norm": 0.31018728017807007, "learning_rate": 2.7637255658808586e-05, "loss": 0.14, "step": 29138 }, { "epoch": 0.5197267506153462, "grad_norm": 0.29798370599746704, "learning_rate": 2.7635707830894465e-05, "loss": 0.0937, "step": 29139 }, { "epoch": 0.5197445867370599, "grad_norm": 0.23557382822036743, "learning_rate": 2.763415999276343e-05, "loss": 0.1255, "step": 29140 }, { "epoch": 0.5197624228587736, "grad_norm": 0.2728688716888428, "learning_rate": 2.763261214442149e-05, "loss": 0.1028, "step": 29141 }, { "epoch": 0.5197802589804873, "grad_norm": 0.24097716808319092, "learning_rate": 2.763106428587463e-05, "loss": 0.1255, "step": 29142 }, { "epoch": 0.519798095102201, "grad_norm": 0.2832989990711212, "learning_rate": 2.762951641712886e-05, "loss": 0.1287, "step": 29143 }, { "epoch": 0.5198159312239147, "grad_norm": 0.2786170542240143, "learning_rate": 2.7627968538190175e-05, "loss": 0.0946, "step": 29144 }, { "epoch": 0.5198337673456284, "grad_norm": 0.3092043697834015, "learning_rate": 2.7626420649064578e-05, "loss": 0.1662, "step": 29145 }, { "epoch": 0.519851603467342, "grad_norm": 0.365028440952301, "learning_rate": 2.7624872749758058e-05, "loss": 0.1975, "step": 29146 }, { "epoch": 0.5198694395890557, "grad_norm": 0.3091890513896942, "learning_rate": 2.7623324840276632e-05, "loss": 0.1636, "step": 29147 }, { "epoch": 0.5198872757107694, "grad_norm": 0.2252136915922165, "learning_rate": 2.7621776920626285e-05, "loss": 0.1453, "step": 29148 }, { "epoch": 0.5199051118324831, "grad_norm": 0.25473907589912415, "learning_rate": 2.7620228990813036e-05, "loss": 0.1226, "step": 29149 }, { "epoch": 0.5199229479541968, "grad_norm": 0.2998908460140228, "learning_rate": 2.7618681050842877e-05, "loss": 0.1069, "step": 29150 }, { "epoch": 0.5199407840759105, "grad_norm": 0.18929672241210938, "learning_rate": 2.7617133100721793e-05, "loss": 0.1073, "step": 29151 }, { "epoch": 0.5199586201976242, "grad_norm": 0.2511807382106781, "learning_rate": 2.7615585140455802e-05, "loss": 0.1323, "step": 29152 }, { "epoch": 0.519976456319338, "grad_norm": 0.22078782320022583, "learning_rate": 2.76140371700509e-05, "loss": 0.1434, "step": 29153 }, { "epoch": 0.5199942924410517, "grad_norm": 0.16528822481632233, "learning_rate": 2.7612489189513092e-05, "loss": 0.0767, "step": 29154 }, { "epoch": 0.5200121285627654, "grad_norm": 0.206413134932518, "learning_rate": 2.7610941198848366e-05, "loss": 0.1483, "step": 29155 }, { "epoch": 0.520029964684479, "grad_norm": 0.30228081345558167, "learning_rate": 2.760939319806274e-05, "loss": 0.1735, "step": 29156 }, { "epoch": 0.5200478008061927, "grad_norm": 0.24667786061763763, "learning_rate": 2.7607845187162196e-05, "loss": 0.1105, "step": 29157 }, { "epoch": 0.5200656369279064, "grad_norm": 0.3133133053779602, "learning_rate": 2.7606297166152745e-05, "loss": 0.1391, "step": 29158 }, { "epoch": 0.5200834730496201, "grad_norm": 0.26266568899154663, "learning_rate": 2.7604749135040387e-05, "loss": 0.1392, "step": 29159 }, { "epoch": 0.5201013091713338, "grad_norm": 0.2189272940158844, "learning_rate": 2.7603201093831115e-05, "loss": 0.1236, "step": 29160 }, { "epoch": 0.5201191452930475, "grad_norm": 0.31974124908447266, "learning_rate": 2.7601653042530946e-05, "loss": 0.1515, "step": 29161 }, { "epoch": 0.5201369814147612, "grad_norm": 0.3024396002292633, "learning_rate": 2.760010498114587e-05, "loss": 0.1081, "step": 29162 }, { "epoch": 0.5201548175364749, "grad_norm": 0.2115083634853363, "learning_rate": 2.759855690968189e-05, "loss": 0.1417, "step": 29163 }, { "epoch": 0.5201726536581885, "grad_norm": 0.30354738235473633, "learning_rate": 2.7597008828145e-05, "loss": 0.1179, "step": 29164 }, { "epoch": 0.5201904897799022, "grad_norm": 0.18906204402446747, "learning_rate": 2.7595460736541214e-05, "loss": 0.1044, "step": 29165 }, { "epoch": 0.5202083259016159, "grad_norm": 0.46158620715141296, "learning_rate": 2.7593912634876517e-05, "loss": 0.1332, "step": 29166 }, { "epoch": 0.5202261620233296, "grad_norm": 0.2531507909297943, "learning_rate": 2.7592364523156922e-05, "loss": 0.1007, "step": 29167 }, { "epoch": 0.5202439981450433, "grad_norm": 0.35190489888191223, "learning_rate": 2.7590816401388426e-05, "loss": 0.1051, "step": 29168 }, { "epoch": 0.520261834266757, "grad_norm": 0.29423987865448, "learning_rate": 2.758926826957704e-05, "loss": 0.181, "step": 29169 }, { "epoch": 0.5202796703884708, "grad_norm": 0.2975088953971863, "learning_rate": 2.7587720127728745e-05, "loss": 0.1165, "step": 29170 }, { "epoch": 0.5202975065101845, "grad_norm": 0.2691587209701538, "learning_rate": 2.7586171975849556e-05, "loss": 0.1373, "step": 29171 }, { "epoch": 0.5203153426318982, "grad_norm": 0.2392454892396927, "learning_rate": 2.7584623813945472e-05, "loss": 0.1274, "step": 29172 }, { "epoch": 0.5203331787536118, "grad_norm": 0.20323362946510315, "learning_rate": 2.75830756420225e-05, "loss": 0.1225, "step": 29173 }, { "epoch": 0.5203510148753255, "grad_norm": 0.21179316937923431, "learning_rate": 2.7581527460086614e-05, "loss": 0.1458, "step": 29174 }, { "epoch": 0.5203688509970392, "grad_norm": 0.22890907526016235, "learning_rate": 2.7579979268143858e-05, "loss": 0.1143, "step": 29175 }, { "epoch": 0.5203866871187529, "grad_norm": 0.20152054727077484, "learning_rate": 2.7578431066200204e-05, "loss": 0.1288, "step": 29176 }, { "epoch": 0.5204045232404666, "grad_norm": 0.35340768098831177, "learning_rate": 2.757688285426166e-05, "loss": 0.1303, "step": 29177 }, { "epoch": 0.5204223593621803, "grad_norm": 0.2403675615787506, "learning_rate": 2.7575334632334225e-05, "loss": 0.0759, "step": 29178 }, { "epoch": 0.520440195483894, "grad_norm": 0.30929702520370483, "learning_rate": 2.7573786400423908e-05, "loss": 0.1185, "step": 29179 }, { "epoch": 0.5204580316056077, "grad_norm": 0.2516457736492157, "learning_rate": 2.75722381585367e-05, "loss": 0.148, "step": 29180 }, { "epoch": 0.5204758677273214, "grad_norm": 0.31668320298194885, "learning_rate": 2.7570689906678615e-05, "loss": 0.1023, "step": 29181 }, { "epoch": 0.520493703849035, "grad_norm": 0.24320733547210693, "learning_rate": 2.7569141644855645e-05, "loss": 0.1016, "step": 29182 }, { "epoch": 0.5205115399707487, "grad_norm": 0.2117556780576706, "learning_rate": 2.7567593373073796e-05, "loss": 0.0875, "step": 29183 }, { "epoch": 0.5205293760924624, "grad_norm": 0.3125084638595581, "learning_rate": 2.7566045091339073e-05, "loss": 0.1164, "step": 29184 }, { "epoch": 0.5205472122141761, "grad_norm": 0.2813771963119507, "learning_rate": 2.7564496799657462e-05, "loss": 0.1099, "step": 29185 }, { "epoch": 0.5205650483358898, "grad_norm": 0.3593194782733917, "learning_rate": 2.7562948498034985e-05, "loss": 0.2107, "step": 29186 }, { "epoch": 0.5205828844576036, "grad_norm": 0.24496445059776306, "learning_rate": 2.7561400186477627e-05, "loss": 0.1343, "step": 29187 }, { "epoch": 0.5206007205793173, "grad_norm": 0.26318058371543884, "learning_rate": 2.7559851864991397e-05, "loss": 0.158, "step": 29188 }, { "epoch": 0.520618556701031, "grad_norm": 0.5840705633163452, "learning_rate": 2.7558303533582308e-05, "loss": 0.2025, "step": 29189 }, { "epoch": 0.5206363928227447, "grad_norm": 0.19877250492572784, "learning_rate": 2.755675519225634e-05, "loss": 0.1418, "step": 29190 }, { "epoch": 0.5206542289444583, "grad_norm": 0.21138867735862732, "learning_rate": 2.7555206841019515e-05, "loss": 0.1066, "step": 29191 }, { "epoch": 0.520672065066172, "grad_norm": 0.34063977003097534, "learning_rate": 2.7553658479877814e-05, "loss": 0.1652, "step": 29192 }, { "epoch": 0.5206899011878857, "grad_norm": 0.24009549617767334, "learning_rate": 2.7552110108837263e-05, "loss": 0.133, "step": 29193 }, { "epoch": 0.5207077373095994, "grad_norm": 0.242417111992836, "learning_rate": 2.7550561727903845e-05, "loss": 0.1395, "step": 29194 }, { "epoch": 0.5207255734313131, "grad_norm": 0.21727785468101501, "learning_rate": 2.7549013337083567e-05, "loss": 0.1326, "step": 29195 }, { "epoch": 0.5207434095530268, "grad_norm": 0.2895382344722748, "learning_rate": 2.7547464936382433e-05, "loss": 0.1272, "step": 29196 }, { "epoch": 0.5207612456747405, "grad_norm": 0.31297650933265686, "learning_rate": 2.7545916525806446e-05, "loss": 0.1758, "step": 29197 }, { "epoch": 0.5207790817964542, "grad_norm": 0.3134779930114746, "learning_rate": 2.754436810536161e-05, "loss": 0.186, "step": 29198 }, { "epoch": 0.5207969179181678, "grad_norm": 0.2492859810590744, "learning_rate": 2.7542819675053927e-05, "loss": 0.1513, "step": 29199 }, { "epoch": 0.5208147540398815, "grad_norm": 0.26196470856666565, "learning_rate": 2.7541271234889393e-05, "loss": 0.0971, "step": 29200 }, { "epoch": 0.5208325901615952, "grad_norm": 0.2854453921318054, "learning_rate": 2.753972278487401e-05, "loss": 0.1331, "step": 29201 }, { "epoch": 0.5208504262833089, "grad_norm": 0.24749097228050232, "learning_rate": 2.7538174325013788e-05, "loss": 0.0945, "step": 29202 }, { "epoch": 0.5208682624050226, "grad_norm": 0.2798328101634979, "learning_rate": 2.7536625855314725e-05, "loss": 0.1631, "step": 29203 }, { "epoch": 0.5208860985267364, "grad_norm": 0.3242731988430023, "learning_rate": 2.753507737578283e-05, "loss": 0.1241, "step": 29204 }, { "epoch": 0.5209039346484501, "grad_norm": 0.37805265188217163, "learning_rate": 2.7533528886424092e-05, "loss": 0.0872, "step": 29205 }, { "epoch": 0.5209217707701638, "grad_norm": 0.5689181089401245, "learning_rate": 2.7531980387244526e-05, "loss": 0.1625, "step": 29206 }, { "epoch": 0.5209396068918775, "grad_norm": 0.341597318649292, "learning_rate": 2.7530431878250124e-05, "loss": 0.1528, "step": 29207 }, { "epoch": 0.5209574430135911, "grad_norm": 0.21227316558361053, "learning_rate": 2.7528883359446895e-05, "loss": 0.1148, "step": 29208 }, { "epoch": 0.5209752791353048, "grad_norm": 0.2937573790550232, "learning_rate": 2.7527334830840838e-05, "loss": 0.1443, "step": 29209 }, { "epoch": 0.5209931152570185, "grad_norm": 0.23038238286972046, "learning_rate": 2.7525786292437967e-05, "loss": 0.1435, "step": 29210 }, { "epoch": 0.5210109513787322, "grad_norm": 0.22587664425373077, "learning_rate": 2.752423774424427e-05, "loss": 0.1457, "step": 29211 }, { "epoch": 0.5210287875004459, "grad_norm": 0.24479340016841888, "learning_rate": 2.752268918626576e-05, "loss": 0.1733, "step": 29212 }, { "epoch": 0.5210466236221596, "grad_norm": 0.23688150942325592, "learning_rate": 2.7521140618508434e-05, "loss": 0.1631, "step": 29213 }, { "epoch": 0.5210644597438733, "grad_norm": 0.245303213596344, "learning_rate": 2.7519592040978297e-05, "loss": 0.172, "step": 29214 }, { "epoch": 0.521082295865587, "grad_norm": 0.2524663805961609, "learning_rate": 2.7518043453681347e-05, "loss": 0.1346, "step": 29215 }, { "epoch": 0.5211001319873007, "grad_norm": 0.3133960962295532, "learning_rate": 2.7516494856623593e-05, "loss": 0.1394, "step": 29216 }, { "epoch": 0.5211179681090143, "grad_norm": 0.24628432095050812, "learning_rate": 2.7514946249811035e-05, "loss": 0.1725, "step": 29217 }, { "epoch": 0.521135804230728, "grad_norm": 0.240559920668602, "learning_rate": 2.7513397633249677e-05, "loss": 0.1251, "step": 29218 }, { "epoch": 0.5211536403524417, "grad_norm": 0.3326775133609772, "learning_rate": 2.751184900694552e-05, "loss": 0.1541, "step": 29219 }, { "epoch": 0.5211714764741555, "grad_norm": 0.2751973569393158, "learning_rate": 2.7510300370904574e-05, "loss": 0.1813, "step": 29220 }, { "epoch": 0.5211893125958692, "grad_norm": 0.2630683481693268, "learning_rate": 2.7508751725132837e-05, "loss": 0.1645, "step": 29221 }, { "epoch": 0.5212071487175829, "grad_norm": 0.36335527896881104, "learning_rate": 2.7507203069636305e-05, "loss": 0.1944, "step": 29222 }, { "epoch": 0.5212249848392966, "grad_norm": 0.31136658787727356, "learning_rate": 2.7505654404420987e-05, "loss": 0.168, "step": 29223 }, { "epoch": 0.5212428209610103, "grad_norm": 0.24197950959205627, "learning_rate": 2.750410572949289e-05, "loss": 0.0985, "step": 29224 }, { "epoch": 0.521260657082724, "grad_norm": 0.2367485612630844, "learning_rate": 2.7502557044858017e-05, "loss": 0.1347, "step": 29225 }, { "epoch": 0.5212784932044376, "grad_norm": 0.2614814341068268, "learning_rate": 2.7501008350522368e-05, "loss": 0.1297, "step": 29226 }, { "epoch": 0.5212963293261513, "grad_norm": 0.22772814333438873, "learning_rate": 2.749945964649195e-05, "loss": 0.1097, "step": 29227 }, { "epoch": 0.521314165447865, "grad_norm": 0.28534895181655884, "learning_rate": 2.7497910932772763e-05, "loss": 0.1612, "step": 29228 }, { "epoch": 0.5213320015695787, "grad_norm": 0.28451213240623474, "learning_rate": 2.7496362209370796e-05, "loss": 0.131, "step": 29229 }, { "epoch": 0.5213498376912924, "grad_norm": 0.35889771580696106, "learning_rate": 2.749481347629208e-05, "loss": 0.1324, "step": 29230 }, { "epoch": 0.5213676738130061, "grad_norm": 0.4010239839553833, "learning_rate": 2.7493264733542602e-05, "loss": 0.1401, "step": 29231 }, { "epoch": 0.5213855099347198, "grad_norm": 0.24921678006649017, "learning_rate": 2.7491715981128374e-05, "loss": 0.161, "step": 29232 }, { "epoch": 0.5214033460564335, "grad_norm": 0.21963492035865784, "learning_rate": 2.7490167219055385e-05, "loss": 0.1274, "step": 29233 }, { "epoch": 0.5214211821781471, "grad_norm": 0.25317490100860596, "learning_rate": 2.7488618447329655e-05, "loss": 0.1928, "step": 29234 }, { "epoch": 0.5214390182998608, "grad_norm": 0.31491875648498535, "learning_rate": 2.7487069665957176e-05, "loss": 0.1343, "step": 29235 }, { "epoch": 0.5214568544215745, "grad_norm": 0.262983500957489, "learning_rate": 2.748552087494396e-05, "loss": 0.1497, "step": 29236 }, { "epoch": 0.5214746905432883, "grad_norm": 0.22808369994163513, "learning_rate": 2.7483972074296e-05, "loss": 0.1837, "step": 29237 }, { "epoch": 0.521492526665002, "grad_norm": 0.2774639129638672, "learning_rate": 2.7482423264019315e-05, "loss": 0.1462, "step": 29238 }, { "epoch": 0.5215103627867157, "grad_norm": 0.2729647755622864, "learning_rate": 2.748087444411989e-05, "loss": 0.1498, "step": 29239 }, { "epoch": 0.5215281989084294, "grad_norm": 0.259782999753952, "learning_rate": 2.747932561460375e-05, "loss": 0.1505, "step": 29240 }, { "epoch": 0.5215460350301431, "grad_norm": 0.20661120116710663, "learning_rate": 2.747777677547689e-05, "loss": 0.1396, "step": 29241 }, { "epoch": 0.5215638711518568, "grad_norm": 0.47408100962638855, "learning_rate": 2.7476227926745297e-05, "loss": 0.1596, "step": 29242 }, { "epoch": 0.5215817072735704, "grad_norm": 0.21687044203281403, "learning_rate": 2.7474679068414992e-05, "loss": 0.1039, "step": 29243 }, { "epoch": 0.5215995433952841, "grad_norm": 0.21080181002616882, "learning_rate": 2.7473130200491982e-05, "loss": 0.0841, "step": 29244 }, { "epoch": 0.5216173795169978, "grad_norm": 0.23617619276046753, "learning_rate": 2.7471581322982266e-05, "loss": 0.1861, "step": 29245 }, { "epoch": 0.5216352156387115, "grad_norm": 0.2647800147533417, "learning_rate": 2.747003243589184e-05, "loss": 0.1325, "step": 29246 }, { "epoch": 0.5216530517604252, "grad_norm": 0.3150482475757599, "learning_rate": 2.7468483539226724e-05, "loss": 0.1759, "step": 29247 }, { "epoch": 0.5216708878821389, "grad_norm": 0.2943536341190338, "learning_rate": 2.7466934632992908e-05, "loss": 0.1252, "step": 29248 }, { "epoch": 0.5216887240038526, "grad_norm": 0.3113054931163788, "learning_rate": 2.7465385717196402e-05, "loss": 0.1267, "step": 29249 }, { "epoch": 0.5217065601255663, "grad_norm": 0.26860058307647705, "learning_rate": 2.7463836791843206e-05, "loss": 0.1281, "step": 29250 }, { "epoch": 0.52172439624728, "grad_norm": 0.20125220715999603, "learning_rate": 2.7462287856939334e-05, "loss": 0.0949, "step": 29251 }, { "epoch": 0.5217422323689936, "grad_norm": 0.2599751353263855, "learning_rate": 2.7460738912490776e-05, "loss": 0.116, "step": 29252 }, { "epoch": 0.5217600684907073, "grad_norm": 0.3939947485923767, "learning_rate": 2.745918995850355e-05, "loss": 0.2015, "step": 29253 }, { "epoch": 0.5217779046124211, "grad_norm": 0.20629280805587769, "learning_rate": 2.745764099498366e-05, "loss": 0.1276, "step": 29254 }, { "epoch": 0.5217957407341348, "grad_norm": 0.22894804179668427, "learning_rate": 2.7456092021937097e-05, "loss": 0.1373, "step": 29255 }, { "epoch": 0.5218135768558485, "grad_norm": 0.37440380454063416, "learning_rate": 2.7454543039369868e-05, "loss": 0.1045, "step": 29256 }, { "epoch": 0.5218314129775622, "grad_norm": 0.2669689357280731, "learning_rate": 2.745299404728799e-05, "loss": 0.1817, "step": 29257 }, { "epoch": 0.5218492490992759, "grad_norm": 0.2596376836299896, "learning_rate": 2.745144504569746e-05, "loss": 0.1442, "step": 29258 }, { "epoch": 0.5218670852209896, "grad_norm": 0.2961863577365875, "learning_rate": 2.7449896034604276e-05, "loss": 0.1603, "step": 29259 }, { "epoch": 0.5218849213427033, "grad_norm": 0.24457305669784546, "learning_rate": 2.744834701401446e-05, "loss": 0.1184, "step": 29260 }, { "epoch": 0.5219027574644169, "grad_norm": 0.29331278800964355, "learning_rate": 2.744679798393399e-05, "loss": 0.1251, "step": 29261 }, { "epoch": 0.5219205935861306, "grad_norm": 0.4266640245914459, "learning_rate": 2.7445248944368902e-05, "loss": 0.1462, "step": 29262 }, { "epoch": 0.5219384297078443, "grad_norm": 0.2676289677619934, "learning_rate": 2.7443699895325176e-05, "loss": 0.1904, "step": 29263 }, { "epoch": 0.521956265829558, "grad_norm": 0.2730562686920166, "learning_rate": 2.7442150836808822e-05, "loss": 0.1253, "step": 29264 }, { "epoch": 0.5219741019512717, "grad_norm": 0.27988651394844055, "learning_rate": 2.7440601768825852e-05, "loss": 0.1588, "step": 29265 }, { "epoch": 0.5219919380729854, "grad_norm": 0.3188876211643219, "learning_rate": 2.7439052691382273e-05, "loss": 0.1372, "step": 29266 }, { "epoch": 0.5220097741946991, "grad_norm": 0.25485074520111084, "learning_rate": 2.7437503604484078e-05, "loss": 0.1114, "step": 29267 }, { "epoch": 0.5220276103164128, "grad_norm": 0.35971009731292725, "learning_rate": 2.7435954508137275e-05, "loss": 0.1477, "step": 29268 }, { "epoch": 0.5220454464381264, "grad_norm": 0.25260525941848755, "learning_rate": 2.7434405402347873e-05, "loss": 0.1092, "step": 29269 }, { "epoch": 0.5220632825598401, "grad_norm": 0.26016655564308167, "learning_rate": 2.743285628712187e-05, "loss": 0.165, "step": 29270 }, { "epoch": 0.5220811186815539, "grad_norm": 0.2793441712856293, "learning_rate": 2.7431307162465282e-05, "loss": 0.2104, "step": 29271 }, { "epoch": 0.5220989548032676, "grad_norm": 0.29178282618522644, "learning_rate": 2.74297580283841e-05, "loss": 0.1044, "step": 29272 }, { "epoch": 0.5221167909249813, "grad_norm": 0.32044875621795654, "learning_rate": 2.7428208884884347e-05, "loss": 0.1868, "step": 29273 }, { "epoch": 0.522134627046695, "grad_norm": 0.2564692497253418, "learning_rate": 2.742665973197201e-05, "loss": 0.1178, "step": 29274 }, { "epoch": 0.5221524631684087, "grad_norm": 0.22253145277500153, "learning_rate": 2.7425110569653106e-05, "loss": 0.1343, "step": 29275 }, { "epoch": 0.5221702992901224, "grad_norm": 0.2613027095794678, "learning_rate": 2.7423561397933635e-05, "loss": 0.1427, "step": 29276 }, { "epoch": 0.5221881354118361, "grad_norm": 0.22599023580551147, "learning_rate": 2.7422012216819603e-05, "loss": 0.1203, "step": 29277 }, { "epoch": 0.5222059715335498, "grad_norm": 0.30845287442207336, "learning_rate": 2.742046302631701e-05, "loss": 0.1195, "step": 29278 }, { "epoch": 0.5222238076552634, "grad_norm": 0.2735668420791626, "learning_rate": 2.7418913826431874e-05, "loss": 0.143, "step": 29279 }, { "epoch": 0.5222416437769771, "grad_norm": 0.2763333320617676, "learning_rate": 2.7417364617170195e-05, "loss": 0.1242, "step": 29280 }, { "epoch": 0.5222594798986908, "grad_norm": 0.26139646768569946, "learning_rate": 2.7415815398537964e-05, "loss": 0.142, "step": 29281 }, { "epoch": 0.5222773160204045, "grad_norm": 0.32071706652641296, "learning_rate": 2.741426617054121e-05, "loss": 0.1671, "step": 29282 }, { "epoch": 0.5222951521421182, "grad_norm": 0.2634124457836151, "learning_rate": 2.7412716933185918e-05, "loss": 0.1413, "step": 29283 }, { "epoch": 0.5223129882638319, "grad_norm": 0.25278130173683167, "learning_rate": 2.7411167686478102e-05, "loss": 0.1116, "step": 29284 }, { "epoch": 0.5223308243855456, "grad_norm": 0.25004836916923523, "learning_rate": 2.740961843042377e-05, "loss": 0.1082, "step": 29285 }, { "epoch": 0.5223486605072593, "grad_norm": 0.29337209463119507, "learning_rate": 2.7408069165028928e-05, "loss": 0.1598, "step": 29286 }, { "epoch": 0.5223664966289729, "grad_norm": 0.19883732497692108, "learning_rate": 2.740651989029957e-05, "loss": 0.1237, "step": 29287 }, { "epoch": 0.5223843327506867, "grad_norm": 0.37855368852615356, "learning_rate": 2.7404970606241715e-05, "loss": 0.1433, "step": 29288 }, { "epoch": 0.5224021688724004, "grad_norm": 0.3001745343208313, "learning_rate": 2.740342131286136e-05, "loss": 0.0821, "step": 29289 }, { "epoch": 0.5224200049941141, "grad_norm": 0.35412755608558655, "learning_rate": 2.740187201016452e-05, "loss": 0.1582, "step": 29290 }, { "epoch": 0.5224378411158278, "grad_norm": 0.25114208459854126, "learning_rate": 2.740032269815719e-05, "loss": 0.1417, "step": 29291 }, { "epoch": 0.5224556772375415, "grad_norm": 0.2741473913192749, "learning_rate": 2.7398773376845384e-05, "loss": 0.1382, "step": 29292 }, { "epoch": 0.5224735133592552, "grad_norm": 0.25901567935943604, "learning_rate": 2.73972240462351e-05, "loss": 0.1593, "step": 29293 }, { "epoch": 0.5224913494809689, "grad_norm": 0.23876120150089264, "learning_rate": 2.7395674706332347e-05, "loss": 0.1332, "step": 29294 }, { "epoch": 0.5225091856026826, "grad_norm": 0.30361801385879517, "learning_rate": 2.7394125357143135e-05, "loss": 0.1273, "step": 29295 }, { "epoch": 0.5225270217243962, "grad_norm": 0.2836470603942871, "learning_rate": 2.7392575998673465e-05, "loss": 0.1423, "step": 29296 }, { "epoch": 0.5225448578461099, "grad_norm": 0.2937440276145935, "learning_rate": 2.7391026630929344e-05, "loss": 0.1517, "step": 29297 }, { "epoch": 0.5225626939678236, "grad_norm": 0.28235161304473877, "learning_rate": 2.7389477253916777e-05, "loss": 0.1778, "step": 29298 }, { "epoch": 0.5225805300895373, "grad_norm": 0.31325188279151917, "learning_rate": 2.738792786764177e-05, "loss": 0.1573, "step": 29299 }, { "epoch": 0.522598366211251, "grad_norm": 0.22566524147987366, "learning_rate": 2.738637847211033e-05, "loss": 0.1324, "step": 29300 }, { "epoch": 0.5226162023329647, "grad_norm": 0.3627447187900543, "learning_rate": 2.7384829067328465e-05, "loss": 0.1744, "step": 29301 }, { "epoch": 0.5226340384546784, "grad_norm": 0.2041521668434143, "learning_rate": 2.7383279653302175e-05, "loss": 0.1194, "step": 29302 }, { "epoch": 0.5226518745763921, "grad_norm": 0.2611396312713623, "learning_rate": 2.7381730230037477e-05, "loss": 0.1443, "step": 29303 }, { "epoch": 0.5226697106981057, "grad_norm": 0.23618021607398987, "learning_rate": 2.7380180797540363e-05, "loss": 0.1517, "step": 29304 }, { "epoch": 0.5226875468198195, "grad_norm": 0.429051011800766, "learning_rate": 2.7378631355816854e-05, "loss": 0.1712, "step": 29305 }, { "epoch": 0.5227053829415332, "grad_norm": 0.19206973910331726, "learning_rate": 2.7377081904872938e-05, "loss": 0.1587, "step": 29306 }, { "epoch": 0.5227232190632469, "grad_norm": 0.3254934549331665, "learning_rate": 2.7375532444714635e-05, "loss": 0.1174, "step": 29307 }, { "epoch": 0.5227410551849606, "grad_norm": 0.2245730310678482, "learning_rate": 2.737398297534795e-05, "loss": 0.181, "step": 29308 }, { "epoch": 0.5227588913066743, "grad_norm": 0.24527154862880707, "learning_rate": 2.7372433496778887e-05, "loss": 0.1603, "step": 29309 }, { "epoch": 0.522776727428388, "grad_norm": 0.2554319202899933, "learning_rate": 2.7370884009013452e-05, "loss": 0.1258, "step": 29310 }, { "epoch": 0.5227945635501017, "grad_norm": 0.25624948740005493, "learning_rate": 2.7369334512057653e-05, "loss": 0.1187, "step": 29311 }, { "epoch": 0.5228123996718154, "grad_norm": 0.3218866288661957, "learning_rate": 2.736778500591749e-05, "loss": 0.1471, "step": 29312 }, { "epoch": 0.522830235793529, "grad_norm": 0.2931661009788513, "learning_rate": 2.736623549059898e-05, "loss": 0.1467, "step": 29313 }, { "epoch": 0.5228480719152427, "grad_norm": 0.284283310174942, "learning_rate": 2.7364685966108122e-05, "loss": 0.1063, "step": 29314 }, { "epoch": 0.5228659080369564, "grad_norm": 0.23836104571819305, "learning_rate": 2.736313643245092e-05, "loss": 0.1161, "step": 29315 }, { "epoch": 0.5228837441586701, "grad_norm": 0.3303387761116028, "learning_rate": 2.7361586889633396e-05, "loss": 0.1341, "step": 29316 }, { "epoch": 0.5229015802803838, "grad_norm": 0.2390783131122589, "learning_rate": 2.7360037337661536e-05, "loss": 0.1362, "step": 29317 }, { "epoch": 0.5229194164020975, "grad_norm": 0.21096543967723846, "learning_rate": 2.7358487776541365e-05, "loss": 0.095, "step": 29318 }, { "epoch": 0.5229372525238112, "grad_norm": 0.2093539834022522, "learning_rate": 2.735693820627887e-05, "loss": 0.1408, "step": 29319 }, { "epoch": 0.5229550886455249, "grad_norm": 0.30725032091140747, "learning_rate": 2.7355388626880075e-05, "loss": 0.1338, "step": 29320 }, { "epoch": 0.5229729247672386, "grad_norm": 0.1974681168794632, "learning_rate": 2.735383903835098e-05, "loss": 0.0951, "step": 29321 }, { "epoch": 0.5229907608889524, "grad_norm": 0.36075061559677124, "learning_rate": 2.7352289440697587e-05, "loss": 0.146, "step": 29322 }, { "epoch": 0.523008597010666, "grad_norm": 0.22667114436626434, "learning_rate": 2.7350739833925914e-05, "loss": 0.1223, "step": 29323 }, { "epoch": 0.5230264331323797, "grad_norm": 0.2949930429458618, "learning_rate": 2.734919021804196e-05, "loss": 0.1318, "step": 29324 }, { "epoch": 0.5230442692540934, "grad_norm": 0.45312586426734924, "learning_rate": 2.7347640593051732e-05, "loss": 0.1577, "step": 29325 }, { "epoch": 0.5230621053758071, "grad_norm": 0.18610809743404388, "learning_rate": 2.7346090958961234e-05, "loss": 0.127, "step": 29326 }, { "epoch": 0.5230799414975208, "grad_norm": 0.3281126618385315, "learning_rate": 2.7344541315776482e-05, "loss": 0.1535, "step": 29327 }, { "epoch": 0.5230977776192345, "grad_norm": 0.23575715720653534, "learning_rate": 2.7342991663503477e-05, "loss": 0.1605, "step": 29328 }, { "epoch": 0.5231156137409482, "grad_norm": 0.25598421692848206, "learning_rate": 2.7341442002148225e-05, "loss": 0.1259, "step": 29329 }, { "epoch": 0.5231334498626619, "grad_norm": 0.3533547818660736, "learning_rate": 2.7339892331716737e-05, "loss": 0.1431, "step": 29330 }, { "epoch": 0.5231512859843755, "grad_norm": 0.31133317947387695, "learning_rate": 2.733834265221502e-05, "loss": 0.1339, "step": 29331 }, { "epoch": 0.5231691221060892, "grad_norm": 0.25769299268722534, "learning_rate": 2.7336792963649076e-05, "loss": 0.116, "step": 29332 }, { "epoch": 0.5231869582278029, "grad_norm": 0.20682445168495178, "learning_rate": 2.7335243266024917e-05, "loss": 0.139, "step": 29333 }, { "epoch": 0.5232047943495166, "grad_norm": 0.3065132200717926, "learning_rate": 2.7333693559348554e-05, "loss": 0.1417, "step": 29334 }, { "epoch": 0.5232226304712303, "grad_norm": 0.3109782934188843, "learning_rate": 2.733214384362598e-05, "loss": 0.1765, "step": 29335 }, { "epoch": 0.523240466592944, "grad_norm": 0.23374749720096588, "learning_rate": 2.7330594118863218e-05, "loss": 0.1415, "step": 29336 }, { "epoch": 0.5232583027146577, "grad_norm": 0.29835987091064453, "learning_rate": 2.7329044385066266e-05, "loss": 0.1742, "step": 29337 }, { "epoch": 0.5232761388363715, "grad_norm": 0.25371772050857544, "learning_rate": 2.7327494642241132e-05, "loss": 0.1149, "step": 29338 }, { "epoch": 0.5232939749580852, "grad_norm": 0.2961961627006531, "learning_rate": 2.7325944890393823e-05, "loss": 0.1162, "step": 29339 }, { "epoch": 0.5233118110797988, "grad_norm": 0.23892581462860107, "learning_rate": 2.732439512953035e-05, "loss": 0.1237, "step": 29340 }, { "epoch": 0.5233296472015125, "grad_norm": 0.21712057292461395, "learning_rate": 2.7322845359656717e-05, "loss": 0.1122, "step": 29341 }, { "epoch": 0.5233474833232262, "grad_norm": 0.21730738878250122, "learning_rate": 2.7321295580778937e-05, "loss": 0.113, "step": 29342 }, { "epoch": 0.5233653194449399, "grad_norm": 0.30356866121292114, "learning_rate": 2.731974579290301e-05, "loss": 0.1537, "step": 29343 }, { "epoch": 0.5233831555666536, "grad_norm": 0.25511807203292847, "learning_rate": 2.7318195996034952e-05, "loss": 0.1031, "step": 29344 }, { "epoch": 0.5234009916883673, "grad_norm": 0.2114313542842865, "learning_rate": 2.731664619018076e-05, "loss": 0.1112, "step": 29345 }, { "epoch": 0.523418827810081, "grad_norm": 0.2681681215763092, "learning_rate": 2.7315096375346456e-05, "loss": 0.1144, "step": 29346 }, { "epoch": 0.5234366639317947, "grad_norm": 0.30722877383232117, "learning_rate": 2.7313546551538027e-05, "loss": 0.1638, "step": 29347 }, { "epoch": 0.5234545000535084, "grad_norm": 0.3154377043247223, "learning_rate": 2.7311996718761494e-05, "loss": 0.1363, "step": 29348 }, { "epoch": 0.523472336175222, "grad_norm": 0.1898418515920639, "learning_rate": 2.731044687702287e-05, "loss": 0.1374, "step": 29349 }, { "epoch": 0.5234901722969357, "grad_norm": 0.32936128973960876, "learning_rate": 2.7308897026328156e-05, "loss": 0.1515, "step": 29350 }, { "epoch": 0.5235080084186494, "grad_norm": 0.21824435889720917, "learning_rate": 2.7307347166683362e-05, "loss": 0.1338, "step": 29351 }, { "epoch": 0.5235258445403631, "grad_norm": 0.2480928599834442, "learning_rate": 2.7305797298094483e-05, "loss": 0.1383, "step": 29352 }, { "epoch": 0.5235436806620768, "grad_norm": 0.27887973189353943, "learning_rate": 2.7304247420567546e-05, "loss": 0.1274, "step": 29353 }, { "epoch": 0.5235615167837905, "grad_norm": 0.26893147826194763, "learning_rate": 2.7302697534108545e-05, "loss": 0.1282, "step": 29354 }, { "epoch": 0.5235793529055043, "grad_norm": 0.23445633053779602, "learning_rate": 2.730114763872349e-05, "loss": 0.1175, "step": 29355 }, { "epoch": 0.523597189027218, "grad_norm": 0.22287671267986298, "learning_rate": 2.7299597734418396e-05, "loss": 0.1091, "step": 29356 }, { "epoch": 0.5236150251489317, "grad_norm": 0.3193022906780243, "learning_rate": 2.729804782119927e-05, "loss": 0.1483, "step": 29357 }, { "epoch": 0.5236328612706453, "grad_norm": 0.2530090808868408, "learning_rate": 2.7296497899072114e-05, "loss": 0.1366, "step": 29358 }, { "epoch": 0.523650697392359, "grad_norm": 0.2959394156932831, "learning_rate": 2.7294947968042944e-05, "loss": 0.1365, "step": 29359 }, { "epoch": 0.5236685335140727, "grad_norm": 0.24657073616981506, "learning_rate": 2.7293398028117757e-05, "loss": 0.0942, "step": 29360 }, { "epoch": 0.5236863696357864, "grad_norm": 0.2343170940876007, "learning_rate": 2.729184807930256e-05, "loss": 0.1508, "step": 29361 }, { "epoch": 0.5237042057575001, "grad_norm": 0.27657824754714966, "learning_rate": 2.7290298121603387e-05, "loss": 0.1351, "step": 29362 }, { "epoch": 0.5237220418792138, "grad_norm": 0.38412174582481384, "learning_rate": 2.7288748155026213e-05, "loss": 0.1497, "step": 29363 }, { "epoch": 0.5237398780009275, "grad_norm": 0.227009579539299, "learning_rate": 2.7287198179577066e-05, "loss": 0.139, "step": 29364 }, { "epoch": 0.5237577141226412, "grad_norm": 0.2263375222682953, "learning_rate": 2.7285648195261948e-05, "loss": 0.1453, "step": 29365 }, { "epoch": 0.5237755502443548, "grad_norm": 0.24828296899795532, "learning_rate": 2.7284098202086872e-05, "loss": 0.1319, "step": 29366 }, { "epoch": 0.5237933863660685, "grad_norm": 0.2947194278240204, "learning_rate": 2.7282548200057835e-05, "loss": 0.1392, "step": 29367 }, { "epoch": 0.5238112224877822, "grad_norm": 0.2659969627857208, "learning_rate": 2.7280998189180858e-05, "loss": 0.1533, "step": 29368 }, { "epoch": 0.5238290586094959, "grad_norm": 0.3143240511417389, "learning_rate": 2.7279448169461934e-05, "loss": 0.098, "step": 29369 }, { "epoch": 0.5238468947312096, "grad_norm": 0.29897889494895935, "learning_rate": 2.727789814090709e-05, "loss": 0.1136, "step": 29370 }, { "epoch": 0.5238647308529233, "grad_norm": 0.25768452882766724, "learning_rate": 2.727634810352233e-05, "loss": 0.1933, "step": 29371 }, { "epoch": 0.5238825669746371, "grad_norm": 0.23185980319976807, "learning_rate": 2.7274798057313654e-05, "loss": 0.1158, "step": 29372 }, { "epoch": 0.5239004030963508, "grad_norm": 0.24120916426181793, "learning_rate": 2.7273248002287083e-05, "loss": 0.1081, "step": 29373 }, { "epoch": 0.5239182392180645, "grad_norm": 0.24477148056030273, "learning_rate": 2.727169793844861e-05, "loss": 0.1545, "step": 29374 }, { "epoch": 0.5239360753397782, "grad_norm": 0.21501465141773224, "learning_rate": 2.7270147865804248e-05, "loss": 0.1414, "step": 29375 }, { "epoch": 0.5239539114614918, "grad_norm": 0.261867880821228, "learning_rate": 2.7268597784360007e-05, "loss": 0.2095, "step": 29376 }, { "epoch": 0.5239717475832055, "grad_norm": 0.2604585587978363, "learning_rate": 2.7267047694121905e-05, "loss": 0.1726, "step": 29377 }, { "epoch": 0.5239895837049192, "grad_norm": 0.24295467138290405, "learning_rate": 2.726549759509594e-05, "loss": 0.1615, "step": 29378 }, { "epoch": 0.5240074198266329, "grad_norm": 0.27478599548339844, "learning_rate": 2.7263947487288128e-05, "loss": 0.1132, "step": 29379 }, { "epoch": 0.5240252559483466, "grad_norm": 0.21894565224647522, "learning_rate": 2.7262397370704468e-05, "loss": 0.1587, "step": 29380 }, { "epoch": 0.5240430920700603, "grad_norm": 0.28471311926841736, "learning_rate": 2.7260847245350978e-05, "loss": 0.1028, "step": 29381 }, { "epoch": 0.524060928191774, "grad_norm": 0.22251319885253906, "learning_rate": 2.725929711123366e-05, "loss": 0.1533, "step": 29382 }, { "epoch": 0.5240787643134877, "grad_norm": 0.23817679286003113, "learning_rate": 2.725774696835853e-05, "loss": 0.1807, "step": 29383 }, { "epoch": 0.5240966004352013, "grad_norm": 0.2621144652366638, "learning_rate": 2.7256196816731588e-05, "loss": 0.1495, "step": 29384 }, { "epoch": 0.524114436556915, "grad_norm": 0.21168941259384155, "learning_rate": 2.7254646656358852e-05, "loss": 0.1189, "step": 29385 }, { "epoch": 0.5241322726786287, "grad_norm": 0.30611488223075867, "learning_rate": 2.7253096487246332e-05, "loss": 0.169, "step": 29386 }, { "epoch": 0.5241501088003424, "grad_norm": 0.22719596326351166, "learning_rate": 2.725154630940002e-05, "loss": 0.1107, "step": 29387 }, { "epoch": 0.5241679449220561, "grad_norm": 0.4431948661804199, "learning_rate": 2.7249996122825944e-05, "loss": 0.1026, "step": 29388 }, { "epoch": 0.5241857810437699, "grad_norm": 0.3256751298904419, "learning_rate": 2.7248445927530098e-05, "loss": 0.1285, "step": 29389 }, { "epoch": 0.5242036171654836, "grad_norm": 0.3353295624256134, "learning_rate": 2.7246895723518513e-05, "loss": 0.1366, "step": 29390 }, { "epoch": 0.5242214532871973, "grad_norm": 0.32560133934020996, "learning_rate": 2.7245345510797172e-05, "loss": 0.1232, "step": 29391 }, { "epoch": 0.524239289408911, "grad_norm": 0.3134133219718933, "learning_rate": 2.7243795289372104e-05, "loss": 0.173, "step": 29392 }, { "epoch": 0.5242571255306246, "grad_norm": 0.31444886326789856, "learning_rate": 2.7242245059249305e-05, "loss": 0.1311, "step": 29393 }, { "epoch": 0.5242749616523383, "grad_norm": 0.1758672595024109, "learning_rate": 2.7240694820434793e-05, "loss": 0.126, "step": 29394 }, { "epoch": 0.524292797774052, "grad_norm": 0.28313782811164856, "learning_rate": 2.723914457293457e-05, "loss": 0.1766, "step": 29395 }, { "epoch": 0.5243106338957657, "grad_norm": 0.2278400957584381, "learning_rate": 2.723759431675465e-05, "loss": 0.1617, "step": 29396 }, { "epoch": 0.5243284700174794, "grad_norm": 0.2927832007408142, "learning_rate": 2.7236044051901044e-05, "loss": 0.187, "step": 29397 }, { "epoch": 0.5243463061391931, "grad_norm": 0.24268564581871033, "learning_rate": 2.723449377837976e-05, "loss": 0.1541, "step": 29398 }, { "epoch": 0.5243641422609068, "grad_norm": 0.304359495639801, "learning_rate": 2.7232943496196806e-05, "loss": 0.1118, "step": 29399 }, { "epoch": 0.5243819783826205, "grad_norm": 0.27364951372146606, "learning_rate": 2.723139320535819e-05, "loss": 0.1522, "step": 29400 }, { "epoch": 0.5243998145043341, "grad_norm": 0.2424466460943222, "learning_rate": 2.722984290586993e-05, "loss": 0.1116, "step": 29401 }, { "epoch": 0.5244176506260478, "grad_norm": 0.278877854347229, "learning_rate": 2.722829259773802e-05, "loss": 0.1135, "step": 29402 }, { "epoch": 0.5244354867477615, "grad_norm": 0.21040643751621246, "learning_rate": 2.7226742280968475e-05, "loss": 0.1299, "step": 29403 }, { "epoch": 0.5244533228694752, "grad_norm": 0.401019811630249, "learning_rate": 2.7225191955567314e-05, "loss": 0.1865, "step": 29404 }, { "epoch": 0.5244711589911889, "grad_norm": 0.3226037621498108, "learning_rate": 2.7223641621540542e-05, "loss": 0.1821, "step": 29405 }, { "epoch": 0.5244889951129027, "grad_norm": 0.2710559368133545, "learning_rate": 2.7222091278894163e-05, "loss": 0.17, "step": 29406 }, { "epoch": 0.5245068312346164, "grad_norm": 0.22361299395561218, "learning_rate": 2.7220540927634198e-05, "loss": 0.1389, "step": 29407 }, { "epoch": 0.5245246673563301, "grad_norm": 0.2204882651567459, "learning_rate": 2.721899056776664e-05, "loss": 0.1559, "step": 29408 }, { "epoch": 0.5245425034780438, "grad_norm": 0.361446738243103, "learning_rate": 2.7217440199297516e-05, "loss": 0.1443, "step": 29409 }, { "epoch": 0.5245603395997575, "grad_norm": 0.23979291319847107, "learning_rate": 2.7215889822232817e-05, "loss": 0.1441, "step": 29410 }, { "epoch": 0.5245781757214711, "grad_norm": 0.28126832842826843, "learning_rate": 2.7214339436578573e-05, "loss": 0.1148, "step": 29411 }, { "epoch": 0.5245960118431848, "grad_norm": 0.24983297288417816, "learning_rate": 2.7212789042340786e-05, "loss": 0.1452, "step": 29412 }, { "epoch": 0.5246138479648985, "grad_norm": 0.2227584719657898, "learning_rate": 2.721123863952546e-05, "loss": 0.1237, "step": 29413 }, { "epoch": 0.5246316840866122, "grad_norm": 0.24605344235897064, "learning_rate": 2.7209688228138612e-05, "loss": 0.1661, "step": 29414 }, { "epoch": 0.5246495202083259, "grad_norm": 0.20384852588176727, "learning_rate": 2.7208137808186247e-05, "loss": 0.1141, "step": 29415 }, { "epoch": 0.5246673563300396, "grad_norm": 0.26670730113983154, "learning_rate": 2.7206587379674376e-05, "loss": 0.151, "step": 29416 }, { "epoch": 0.5246851924517533, "grad_norm": 0.2451811134815216, "learning_rate": 2.7205036942609007e-05, "loss": 0.1256, "step": 29417 }, { "epoch": 0.524703028573467, "grad_norm": 0.2369154691696167, "learning_rate": 2.720348649699616e-05, "loss": 0.1511, "step": 29418 }, { "epoch": 0.5247208646951806, "grad_norm": 0.2820444107055664, "learning_rate": 2.7201936042841835e-05, "loss": 0.1515, "step": 29419 }, { "epoch": 0.5247387008168943, "grad_norm": 0.22452563047409058, "learning_rate": 2.7200385580152056e-05, "loss": 0.1305, "step": 29420 }, { "epoch": 0.524756536938608, "grad_norm": 0.2157796174287796, "learning_rate": 2.7198835108932808e-05, "loss": 0.1389, "step": 29421 }, { "epoch": 0.5247743730603217, "grad_norm": 0.26535478234291077, "learning_rate": 2.719728462919012e-05, "loss": 0.1293, "step": 29422 }, { "epoch": 0.5247922091820355, "grad_norm": 0.22747619450092316, "learning_rate": 2.719573414093e-05, "loss": 0.1243, "step": 29423 }, { "epoch": 0.5248100453037492, "grad_norm": 0.2745887041091919, "learning_rate": 2.7194183644158454e-05, "loss": 0.1299, "step": 29424 }, { "epoch": 0.5248278814254629, "grad_norm": 0.28680193424224854, "learning_rate": 2.7192633138881495e-05, "loss": 0.1361, "step": 29425 }, { "epoch": 0.5248457175471766, "grad_norm": 0.25914618372917175, "learning_rate": 2.7191082625105137e-05, "loss": 0.1092, "step": 29426 }, { "epoch": 0.5248635536688903, "grad_norm": 0.24438418447971344, "learning_rate": 2.718953210283538e-05, "loss": 0.1247, "step": 29427 }, { "epoch": 0.524881389790604, "grad_norm": 0.30035993456840515, "learning_rate": 2.7187981572078242e-05, "loss": 0.1604, "step": 29428 }, { "epoch": 0.5248992259123176, "grad_norm": 0.2815878093242645, "learning_rate": 2.718643103283973e-05, "loss": 0.0614, "step": 29429 }, { "epoch": 0.5249170620340313, "grad_norm": 0.2685695290565491, "learning_rate": 2.718488048512586e-05, "loss": 0.1562, "step": 29430 }, { "epoch": 0.524934898155745, "grad_norm": 0.27362924814224243, "learning_rate": 2.7183329928942636e-05, "loss": 0.114, "step": 29431 }, { "epoch": 0.5249527342774587, "grad_norm": 0.2638143002986908, "learning_rate": 2.7181779364296067e-05, "loss": 0.1283, "step": 29432 }, { "epoch": 0.5249705703991724, "grad_norm": 0.23861396312713623, "learning_rate": 2.7180228791192176e-05, "loss": 0.1443, "step": 29433 }, { "epoch": 0.5249884065208861, "grad_norm": 0.27368634939193726, "learning_rate": 2.717867820963696e-05, "loss": 0.1901, "step": 29434 }, { "epoch": 0.5250062426425998, "grad_norm": 0.29530012607574463, "learning_rate": 2.7177127619636437e-05, "loss": 0.1261, "step": 29435 }, { "epoch": 0.5250240787643135, "grad_norm": 0.36335819959640503, "learning_rate": 2.7175577021196615e-05, "loss": 0.2225, "step": 29436 }, { "epoch": 0.5250419148860271, "grad_norm": 0.3202860653400421, "learning_rate": 2.71740264143235e-05, "loss": 0.1513, "step": 29437 }, { "epoch": 0.5250597510077408, "grad_norm": 0.32527410984039307, "learning_rate": 2.7172475799023116e-05, "loss": 0.1164, "step": 29438 }, { "epoch": 0.5250775871294546, "grad_norm": 0.24065177142620087, "learning_rate": 2.717092517530146e-05, "loss": 0.1396, "step": 29439 }, { "epoch": 0.5250954232511683, "grad_norm": 0.1808624565601349, "learning_rate": 2.7169374543164556e-05, "loss": 0.101, "step": 29440 }, { "epoch": 0.525113259372882, "grad_norm": 0.27624067664146423, "learning_rate": 2.71678239026184e-05, "loss": 0.1357, "step": 29441 }, { "epoch": 0.5251310954945957, "grad_norm": 0.25203272700309753, "learning_rate": 2.716627325366901e-05, "loss": 0.1475, "step": 29442 }, { "epoch": 0.5251489316163094, "grad_norm": 0.2955394685268402, "learning_rate": 2.7164722596322394e-05, "loss": 0.1507, "step": 29443 }, { "epoch": 0.5251667677380231, "grad_norm": 0.27341705560684204, "learning_rate": 2.716317193058457e-05, "loss": 0.1288, "step": 29444 }, { "epoch": 0.5251846038597368, "grad_norm": 0.34787026047706604, "learning_rate": 2.716162125646154e-05, "loss": 0.121, "step": 29445 }, { "epoch": 0.5252024399814504, "grad_norm": 0.3075961172580719, "learning_rate": 2.716007057395933e-05, "loss": 0.1456, "step": 29446 }, { "epoch": 0.5252202761031641, "grad_norm": 0.29321229457855225, "learning_rate": 2.7158519883083926e-05, "loss": 0.1201, "step": 29447 }, { "epoch": 0.5252381122248778, "grad_norm": 0.311604768037796, "learning_rate": 2.715696918384137e-05, "loss": 0.1686, "step": 29448 }, { "epoch": 0.5252559483465915, "grad_norm": 0.287415474653244, "learning_rate": 2.7155418476237644e-05, "loss": 0.1175, "step": 29449 }, { "epoch": 0.5252737844683052, "grad_norm": 0.2872602939605713, "learning_rate": 2.715386776027878e-05, "loss": 0.1229, "step": 29450 }, { "epoch": 0.5252916205900189, "grad_norm": 0.2948637902736664, "learning_rate": 2.715231703597077e-05, "loss": 0.1046, "step": 29451 }, { "epoch": 0.5253094567117326, "grad_norm": 0.33960095047950745, "learning_rate": 2.7150766303319637e-05, "loss": 0.1757, "step": 29452 }, { "epoch": 0.5253272928334463, "grad_norm": 0.3231659531593323, "learning_rate": 2.71492155623314e-05, "loss": 0.2059, "step": 29453 }, { "epoch": 0.52534512895516, "grad_norm": 0.2295851707458496, "learning_rate": 2.714766481301205e-05, "loss": 0.1708, "step": 29454 }, { "epoch": 0.5253629650768736, "grad_norm": 0.3205930292606354, "learning_rate": 2.7146114055367622e-05, "loss": 0.1164, "step": 29455 }, { "epoch": 0.5253808011985874, "grad_norm": 0.2763197422027588, "learning_rate": 2.7144563289404107e-05, "loss": 0.1028, "step": 29456 }, { "epoch": 0.5253986373203011, "grad_norm": 0.37040671706199646, "learning_rate": 2.714301251512753e-05, "loss": 0.1638, "step": 29457 }, { "epoch": 0.5254164734420148, "grad_norm": 0.23512087762355804, "learning_rate": 2.714146173254389e-05, "loss": 0.1503, "step": 29458 }, { "epoch": 0.5254343095637285, "grad_norm": 0.3356296420097351, "learning_rate": 2.7139910941659204e-05, "loss": 0.1714, "step": 29459 }, { "epoch": 0.5254521456854422, "grad_norm": 0.3051658868789673, "learning_rate": 2.713836014247948e-05, "loss": 0.1644, "step": 29460 }, { "epoch": 0.5254699818071559, "grad_norm": 0.48958054184913635, "learning_rate": 2.713680933501074e-05, "loss": 0.1532, "step": 29461 }, { "epoch": 0.5254878179288696, "grad_norm": 0.21316632628440857, "learning_rate": 2.7135258519258992e-05, "loss": 0.1452, "step": 29462 }, { "epoch": 0.5255056540505832, "grad_norm": 0.3810008764266968, "learning_rate": 2.7133707695230243e-05, "loss": 0.1792, "step": 29463 }, { "epoch": 0.5255234901722969, "grad_norm": 0.30944982171058655, "learning_rate": 2.7132156862930503e-05, "loss": 0.1735, "step": 29464 }, { "epoch": 0.5255413262940106, "grad_norm": 0.25096839666366577, "learning_rate": 2.713060602236579e-05, "loss": 0.1322, "step": 29465 }, { "epoch": 0.5255591624157243, "grad_norm": 0.30388572812080383, "learning_rate": 2.712905517354211e-05, "loss": 0.171, "step": 29466 }, { "epoch": 0.525576998537438, "grad_norm": 0.2303423285484314, "learning_rate": 2.7127504316465473e-05, "loss": 0.1428, "step": 29467 }, { "epoch": 0.5255948346591517, "grad_norm": 0.18653607368469238, "learning_rate": 2.7125953451141907e-05, "loss": 0.1105, "step": 29468 }, { "epoch": 0.5256126707808654, "grad_norm": 0.3784129023551941, "learning_rate": 2.7124402577577397e-05, "loss": 0.124, "step": 29469 }, { "epoch": 0.5256305069025791, "grad_norm": 0.2851652204990387, "learning_rate": 2.712285169577798e-05, "loss": 0.1557, "step": 29470 }, { "epoch": 0.5256483430242928, "grad_norm": 0.3392007350921631, "learning_rate": 2.712130080574965e-05, "loss": 0.1505, "step": 29471 }, { "epoch": 0.5256661791460064, "grad_norm": 0.24897627532482147, "learning_rate": 2.711974990749842e-05, "loss": 0.1158, "step": 29472 }, { "epoch": 0.5256840152677202, "grad_norm": 0.5230845212936401, "learning_rate": 2.7118199001030315e-05, "loss": 0.2621, "step": 29473 }, { "epoch": 0.5257018513894339, "grad_norm": 0.23026961088180542, "learning_rate": 2.7116648086351338e-05, "loss": 0.1139, "step": 29474 }, { "epoch": 0.5257196875111476, "grad_norm": 0.2702396810054779, "learning_rate": 2.7115097163467502e-05, "loss": 0.1738, "step": 29475 }, { "epoch": 0.5257375236328613, "grad_norm": 0.2927078902721405, "learning_rate": 2.7113546232384824e-05, "loss": 0.1282, "step": 29476 }, { "epoch": 0.525755359754575, "grad_norm": 0.2175307720899582, "learning_rate": 2.7111995293109304e-05, "loss": 0.1177, "step": 29477 }, { "epoch": 0.5257731958762887, "grad_norm": 0.2614126205444336, "learning_rate": 2.7110444345646964e-05, "loss": 0.124, "step": 29478 }, { "epoch": 0.5257910319980024, "grad_norm": 0.28760653734207153, "learning_rate": 2.710889339000381e-05, "loss": 0.1105, "step": 29479 }, { "epoch": 0.525808868119716, "grad_norm": 0.24923793971538544, "learning_rate": 2.710734242618585e-05, "loss": 0.1531, "step": 29480 }, { "epoch": 0.5258267042414297, "grad_norm": 0.3242799639701843, "learning_rate": 2.7105791454199113e-05, "loss": 0.1365, "step": 29481 }, { "epoch": 0.5258445403631434, "grad_norm": 0.3814876675605774, "learning_rate": 2.71042404740496e-05, "loss": 0.1704, "step": 29482 }, { "epoch": 0.5258623764848571, "grad_norm": 0.25780394673347473, "learning_rate": 2.7102689485743326e-05, "loss": 0.1415, "step": 29483 }, { "epoch": 0.5258802126065708, "grad_norm": 0.3115689754486084, "learning_rate": 2.7101138489286293e-05, "loss": 0.1374, "step": 29484 }, { "epoch": 0.5258980487282845, "grad_norm": 0.3063698709011078, "learning_rate": 2.709958748468453e-05, "loss": 0.1437, "step": 29485 }, { "epoch": 0.5259158848499982, "grad_norm": 0.23593035340309143, "learning_rate": 2.709803647194404e-05, "loss": 0.1417, "step": 29486 }, { "epoch": 0.5259337209717119, "grad_norm": 0.2669139802455902, "learning_rate": 2.709648545107083e-05, "loss": 0.1469, "step": 29487 }, { "epoch": 0.5259515570934256, "grad_norm": 0.25483858585357666, "learning_rate": 2.7094934422070917e-05, "loss": 0.1639, "step": 29488 }, { "epoch": 0.5259693932151392, "grad_norm": 0.262802392244339, "learning_rate": 2.7093383384950322e-05, "loss": 0.1661, "step": 29489 }, { "epoch": 0.525987229336853, "grad_norm": 0.19983015954494476, "learning_rate": 2.709183233971505e-05, "loss": 0.1162, "step": 29490 }, { "epoch": 0.5260050654585667, "grad_norm": 0.25494176149368286, "learning_rate": 2.709028128637111e-05, "loss": 0.0973, "step": 29491 }, { "epoch": 0.5260229015802804, "grad_norm": 0.23474548757076263, "learning_rate": 2.708873022492452e-05, "loss": 0.123, "step": 29492 }, { "epoch": 0.5260407377019941, "grad_norm": 0.24141724407672882, "learning_rate": 2.7087179155381282e-05, "loss": 0.1155, "step": 29493 }, { "epoch": 0.5260585738237078, "grad_norm": 0.29320254921913147, "learning_rate": 2.7085628077747427e-05, "loss": 0.1415, "step": 29494 }, { "epoch": 0.5260764099454215, "grad_norm": 0.28299564123153687, "learning_rate": 2.708407699202895e-05, "loss": 0.1262, "step": 29495 }, { "epoch": 0.5260942460671352, "grad_norm": 0.7342714071273804, "learning_rate": 2.708252589823188e-05, "loss": 0.1037, "step": 29496 }, { "epoch": 0.5261120821888489, "grad_norm": 0.45992082357406616, "learning_rate": 2.7080974796362213e-05, "loss": 0.1468, "step": 29497 }, { "epoch": 0.5261299183105625, "grad_norm": 0.4210277795791626, "learning_rate": 2.7079423686425976e-05, "loss": 0.1196, "step": 29498 }, { "epoch": 0.5261477544322762, "grad_norm": 0.21317805349826813, "learning_rate": 2.7077872568429164e-05, "loss": 0.1207, "step": 29499 }, { "epoch": 0.5261655905539899, "grad_norm": 0.2553720772266388, "learning_rate": 2.7076321442377805e-05, "loss": 0.1382, "step": 29500 }, { "epoch": 0.5261834266757036, "grad_norm": 0.3327461779117584, "learning_rate": 2.7074770308277904e-05, "loss": 0.1278, "step": 29501 }, { "epoch": 0.5262012627974173, "grad_norm": 0.22602009773254395, "learning_rate": 2.7073219166135483e-05, "loss": 0.1458, "step": 29502 }, { "epoch": 0.526219098919131, "grad_norm": 0.2794208228588104, "learning_rate": 2.707166801595655e-05, "loss": 0.0882, "step": 29503 }, { "epoch": 0.5262369350408447, "grad_norm": 0.4029618203639984, "learning_rate": 2.707011685774711e-05, "loss": 0.2117, "step": 29504 }, { "epoch": 0.5262547711625584, "grad_norm": 0.48170581459999084, "learning_rate": 2.7068565691513186e-05, "loss": 0.1854, "step": 29505 }, { "epoch": 0.526272607284272, "grad_norm": 0.2870563566684723, "learning_rate": 2.706701451726078e-05, "loss": 0.1438, "step": 29506 }, { "epoch": 0.5262904434059859, "grad_norm": 0.20260033011436462, "learning_rate": 2.7065463334995916e-05, "loss": 0.1208, "step": 29507 }, { "epoch": 0.5263082795276995, "grad_norm": 0.2535092532634735, "learning_rate": 2.70639121447246e-05, "loss": 0.158, "step": 29508 }, { "epoch": 0.5263261156494132, "grad_norm": 0.2354477047920227, "learning_rate": 2.7062360946452858e-05, "loss": 0.1688, "step": 29509 }, { "epoch": 0.5263439517711269, "grad_norm": 0.2548867166042328, "learning_rate": 2.706080974018668e-05, "loss": 0.1388, "step": 29510 }, { "epoch": 0.5263617878928406, "grad_norm": 0.2875618636608124, "learning_rate": 2.7059258525932103e-05, "loss": 0.15, "step": 29511 }, { "epoch": 0.5263796240145543, "grad_norm": 0.22793486714363098, "learning_rate": 2.7057707303695118e-05, "loss": 0.1571, "step": 29512 }, { "epoch": 0.526397460136268, "grad_norm": 0.2482042759656906, "learning_rate": 2.7056156073481758e-05, "loss": 0.149, "step": 29513 }, { "epoch": 0.5264152962579817, "grad_norm": 0.2522822618484497, "learning_rate": 2.7054604835298015e-05, "loss": 0.1271, "step": 29514 }, { "epoch": 0.5264331323796954, "grad_norm": 0.27462905645370483, "learning_rate": 2.7053053589149923e-05, "loss": 0.1818, "step": 29515 }, { "epoch": 0.526450968501409, "grad_norm": 0.2821716070175171, "learning_rate": 2.7051502335043488e-05, "loss": 0.2011, "step": 29516 }, { "epoch": 0.5264688046231227, "grad_norm": 0.3535119891166687, "learning_rate": 2.704995107298472e-05, "loss": 0.1497, "step": 29517 }, { "epoch": 0.5264866407448364, "grad_norm": 0.2884179651737213, "learning_rate": 2.7048399802979635e-05, "loss": 0.1137, "step": 29518 }, { "epoch": 0.5265044768665501, "grad_norm": 0.2683907449245453, "learning_rate": 2.704684852503424e-05, "loss": 0.1806, "step": 29519 }, { "epoch": 0.5265223129882638, "grad_norm": 0.3006495535373688, "learning_rate": 2.704529723915455e-05, "loss": 0.1311, "step": 29520 }, { "epoch": 0.5265401491099775, "grad_norm": 0.3350886106491089, "learning_rate": 2.7043745945346587e-05, "loss": 0.1889, "step": 29521 }, { "epoch": 0.5265579852316912, "grad_norm": 0.263200968503952, "learning_rate": 2.704219464361636e-05, "loss": 0.1609, "step": 29522 }, { "epoch": 0.5265758213534049, "grad_norm": 0.2262687087059021, "learning_rate": 2.704064333396988e-05, "loss": 0.1655, "step": 29523 }, { "epoch": 0.5265936574751187, "grad_norm": 0.38764479756355286, "learning_rate": 2.703909201641316e-05, "loss": 0.1276, "step": 29524 }, { "epoch": 0.5266114935968323, "grad_norm": 0.2452140897512436, "learning_rate": 2.703754069095221e-05, "loss": 0.1555, "step": 29525 }, { "epoch": 0.526629329718546, "grad_norm": 0.2584201395511627, "learning_rate": 2.703598935759306e-05, "loss": 0.1059, "step": 29526 }, { "epoch": 0.5266471658402597, "grad_norm": 0.31932732462882996, "learning_rate": 2.7034438016341706e-05, "loss": 0.1245, "step": 29527 }, { "epoch": 0.5266650019619734, "grad_norm": 0.26007887721061707, "learning_rate": 2.7032886667204165e-05, "loss": 0.1917, "step": 29528 }, { "epoch": 0.5266828380836871, "grad_norm": 0.2648650109767914, "learning_rate": 2.7031335310186456e-05, "loss": 0.1272, "step": 29529 }, { "epoch": 0.5267006742054008, "grad_norm": 0.2847784757614136, "learning_rate": 2.7029783945294597e-05, "loss": 0.1792, "step": 29530 }, { "epoch": 0.5267185103271145, "grad_norm": 0.29020044207572937, "learning_rate": 2.702823257253459e-05, "loss": 0.1502, "step": 29531 }, { "epoch": 0.5267363464488282, "grad_norm": 0.3325560390949249, "learning_rate": 2.7026681191912446e-05, "loss": 0.1535, "step": 29532 }, { "epoch": 0.5267541825705419, "grad_norm": 0.21549859642982483, "learning_rate": 2.7025129803434192e-05, "loss": 0.1081, "step": 29533 }, { "epoch": 0.5267720186922555, "grad_norm": 0.2609044313430786, "learning_rate": 2.7023578407105833e-05, "loss": 0.1439, "step": 29534 }, { "epoch": 0.5267898548139692, "grad_norm": 0.26960626244544983, "learning_rate": 2.702202700293338e-05, "loss": 0.1535, "step": 29535 }, { "epoch": 0.5268076909356829, "grad_norm": 0.2711268365383148, "learning_rate": 2.702047559092286e-05, "loss": 0.171, "step": 29536 }, { "epoch": 0.5268255270573966, "grad_norm": 0.21222415566444397, "learning_rate": 2.7018924171080278e-05, "loss": 0.0975, "step": 29537 }, { "epoch": 0.5268433631791103, "grad_norm": 0.28257322311401367, "learning_rate": 2.7017372743411646e-05, "loss": 0.1564, "step": 29538 }, { "epoch": 0.526861199300824, "grad_norm": 0.25108975172042847, "learning_rate": 2.7015821307922982e-05, "loss": 0.1491, "step": 29539 }, { "epoch": 0.5268790354225378, "grad_norm": 0.29895803332328796, "learning_rate": 2.70142698646203e-05, "loss": 0.1501, "step": 29540 }, { "epoch": 0.5268968715442515, "grad_norm": 0.1883089244365692, "learning_rate": 2.701271841350961e-05, "loss": 0.1175, "step": 29541 }, { "epoch": 0.5269147076659652, "grad_norm": 0.21324759721755981, "learning_rate": 2.7011166954596923e-05, "loss": 0.1199, "step": 29542 }, { "epoch": 0.5269325437876788, "grad_norm": 0.26009321212768555, "learning_rate": 2.7009615487888268e-05, "loss": 0.134, "step": 29543 }, { "epoch": 0.5269503799093925, "grad_norm": 0.31335973739624023, "learning_rate": 2.7008064013389644e-05, "loss": 0.1513, "step": 29544 }, { "epoch": 0.5269682160311062, "grad_norm": 0.31468507647514343, "learning_rate": 2.700651253110707e-05, "loss": 0.1181, "step": 29545 }, { "epoch": 0.5269860521528199, "grad_norm": 0.24568194150924683, "learning_rate": 2.7004961041046566e-05, "loss": 0.1637, "step": 29546 }, { "epoch": 0.5270038882745336, "grad_norm": 0.3065956234931946, "learning_rate": 2.7003409543214136e-05, "loss": 0.1175, "step": 29547 }, { "epoch": 0.5270217243962473, "grad_norm": 0.26989251375198364, "learning_rate": 2.7001858037615796e-05, "loss": 0.1209, "step": 29548 }, { "epoch": 0.527039560517961, "grad_norm": 0.3446468114852905, "learning_rate": 2.7000306524257564e-05, "loss": 0.1534, "step": 29549 }, { "epoch": 0.5270573966396747, "grad_norm": 0.27404987812042236, "learning_rate": 2.699875500314546e-05, "loss": 0.1546, "step": 29550 }, { "epoch": 0.5270752327613883, "grad_norm": 0.28950831294059753, "learning_rate": 2.699720347428548e-05, "loss": 0.1027, "step": 29551 }, { "epoch": 0.527093068883102, "grad_norm": 0.15191729366779327, "learning_rate": 2.699565193768366e-05, "loss": 0.1116, "step": 29552 }, { "epoch": 0.5271109050048157, "grad_norm": 0.2926084101200104, "learning_rate": 2.6994100393345993e-05, "loss": 0.1555, "step": 29553 }, { "epoch": 0.5271287411265294, "grad_norm": 0.3048332929611206, "learning_rate": 2.699254884127852e-05, "loss": 0.1286, "step": 29554 }, { "epoch": 0.5271465772482431, "grad_norm": 0.23489142954349518, "learning_rate": 2.6990997281487224e-05, "loss": 0.1303, "step": 29555 }, { "epoch": 0.5271644133699568, "grad_norm": 0.2898871600627899, "learning_rate": 2.698944571397814e-05, "loss": 0.1194, "step": 29556 }, { "epoch": 0.5271822494916706, "grad_norm": 0.26908111572265625, "learning_rate": 2.698789413875728e-05, "loss": 0.1384, "step": 29557 }, { "epoch": 0.5272000856133843, "grad_norm": 0.22471243143081665, "learning_rate": 2.698634255583065e-05, "loss": 0.1266, "step": 29558 }, { "epoch": 0.527217921735098, "grad_norm": 0.22601579129695892, "learning_rate": 2.698479096520428e-05, "loss": 0.1509, "step": 29559 }, { "epoch": 0.5272357578568116, "grad_norm": 0.23397766053676605, "learning_rate": 2.6983239366884165e-05, "loss": 0.1265, "step": 29560 }, { "epoch": 0.5272535939785253, "grad_norm": 0.22585737705230713, "learning_rate": 2.698168776087634e-05, "loss": 0.1266, "step": 29561 }, { "epoch": 0.527271430100239, "grad_norm": 0.2820310592651367, "learning_rate": 2.6980136147186796e-05, "loss": 0.1338, "step": 29562 }, { "epoch": 0.5272892662219527, "grad_norm": 0.23659257590770721, "learning_rate": 2.6978584525821566e-05, "loss": 0.1454, "step": 29563 }, { "epoch": 0.5273071023436664, "grad_norm": 0.2303420454263687, "learning_rate": 2.6977032896786657e-05, "loss": 0.1094, "step": 29564 }, { "epoch": 0.5273249384653801, "grad_norm": 0.22334200143814087, "learning_rate": 2.697548126008809e-05, "loss": 0.0577, "step": 29565 }, { "epoch": 0.5273427745870938, "grad_norm": 0.21660497784614563, "learning_rate": 2.6973929615731873e-05, "loss": 0.0816, "step": 29566 }, { "epoch": 0.5273606107088075, "grad_norm": 0.38114750385284424, "learning_rate": 2.6972377963724028e-05, "loss": 0.1306, "step": 29567 }, { "epoch": 0.5273784468305212, "grad_norm": 0.2799783945083618, "learning_rate": 2.6970826304070558e-05, "loss": 0.1357, "step": 29568 }, { "epoch": 0.5273962829522348, "grad_norm": 0.23243758082389832, "learning_rate": 2.696927463677748e-05, "loss": 0.1711, "step": 29569 }, { "epoch": 0.5274141190739485, "grad_norm": 0.23562775552272797, "learning_rate": 2.696772296185082e-05, "loss": 0.1615, "step": 29570 }, { "epoch": 0.5274319551956622, "grad_norm": 0.20844227075576782, "learning_rate": 2.696617127929659e-05, "loss": 0.1214, "step": 29571 }, { "epoch": 0.5274497913173759, "grad_norm": 0.23749417066574097, "learning_rate": 2.6964619589120798e-05, "loss": 0.1121, "step": 29572 }, { "epoch": 0.5274676274390896, "grad_norm": 0.27115121483802795, "learning_rate": 2.696306789132946e-05, "loss": 0.1387, "step": 29573 }, { "epoch": 0.5274854635608034, "grad_norm": 0.2638593912124634, "learning_rate": 2.6961516185928597e-05, "loss": 0.1229, "step": 29574 }, { "epoch": 0.5275032996825171, "grad_norm": 0.31775814294815063, "learning_rate": 2.6959964472924215e-05, "loss": 0.0988, "step": 29575 }, { "epoch": 0.5275211358042308, "grad_norm": 0.3245813846588135, "learning_rate": 2.6958412752322333e-05, "loss": 0.1409, "step": 29576 }, { "epoch": 0.5275389719259445, "grad_norm": 0.24844306707382202, "learning_rate": 2.6956861024128966e-05, "loss": 0.178, "step": 29577 }, { "epoch": 0.5275568080476581, "grad_norm": 0.23932389914989471, "learning_rate": 2.6955309288350135e-05, "loss": 0.1535, "step": 29578 }, { "epoch": 0.5275746441693718, "grad_norm": 0.3015996515750885, "learning_rate": 2.695375754499185e-05, "loss": 0.1651, "step": 29579 }, { "epoch": 0.5275924802910855, "grad_norm": 0.22868657112121582, "learning_rate": 2.695220579406012e-05, "loss": 0.1318, "step": 29580 }, { "epoch": 0.5276103164127992, "grad_norm": 0.2838252782821655, "learning_rate": 2.6950654035560967e-05, "loss": 0.1814, "step": 29581 }, { "epoch": 0.5276281525345129, "grad_norm": 0.2079337239265442, "learning_rate": 2.6949102269500413e-05, "loss": 0.1209, "step": 29582 }, { "epoch": 0.5276459886562266, "grad_norm": 0.29225945472717285, "learning_rate": 2.694755049588446e-05, "loss": 0.1612, "step": 29583 }, { "epoch": 0.5276638247779403, "grad_norm": 0.40615150332450867, "learning_rate": 2.6945998714719127e-05, "loss": 0.1316, "step": 29584 }, { "epoch": 0.527681660899654, "grad_norm": 0.2622811794281006, "learning_rate": 2.6944446926010436e-05, "loss": 0.1351, "step": 29585 }, { "epoch": 0.5276994970213676, "grad_norm": 0.23110252618789673, "learning_rate": 2.694289512976439e-05, "loss": 0.1074, "step": 29586 }, { "epoch": 0.5277173331430813, "grad_norm": 0.2519962191581726, "learning_rate": 2.6941343325987016e-05, "loss": 0.1509, "step": 29587 }, { "epoch": 0.527735169264795, "grad_norm": 0.335615873336792, "learning_rate": 2.6939791514684325e-05, "loss": 0.1742, "step": 29588 }, { "epoch": 0.5277530053865087, "grad_norm": 0.24000419676303864, "learning_rate": 2.6938239695862332e-05, "loss": 0.1727, "step": 29589 }, { "epoch": 0.5277708415082224, "grad_norm": 0.24141176044940948, "learning_rate": 2.6936687869527056e-05, "loss": 0.1193, "step": 29590 }, { "epoch": 0.5277886776299362, "grad_norm": 0.40963640809059143, "learning_rate": 2.69351360356845e-05, "loss": 0.1632, "step": 29591 }, { "epoch": 0.5278065137516499, "grad_norm": 0.27701905369758606, "learning_rate": 2.693358419434069e-05, "loss": 0.1825, "step": 29592 }, { "epoch": 0.5278243498733636, "grad_norm": 0.23306919634342194, "learning_rate": 2.6932032345501646e-05, "loss": 0.1179, "step": 29593 }, { "epoch": 0.5278421859950773, "grad_norm": 0.22983503341674805, "learning_rate": 2.693048048917337e-05, "loss": 0.1632, "step": 29594 }, { "epoch": 0.527860022116791, "grad_norm": 0.26778826117515564, "learning_rate": 2.6928928625361893e-05, "loss": 0.0902, "step": 29595 }, { "epoch": 0.5278778582385046, "grad_norm": 0.2940341532230377, "learning_rate": 2.692737675407322e-05, "loss": 0.1856, "step": 29596 }, { "epoch": 0.5278956943602183, "grad_norm": 0.29812338948249817, "learning_rate": 2.692582487531336e-05, "loss": 0.1616, "step": 29597 }, { "epoch": 0.527913530481932, "grad_norm": 0.19200968742370605, "learning_rate": 2.692427298908835e-05, "loss": 0.0886, "step": 29598 }, { "epoch": 0.5279313666036457, "grad_norm": 0.28245770931243896, "learning_rate": 2.6922721095404187e-05, "loss": 0.1792, "step": 29599 }, { "epoch": 0.5279492027253594, "grad_norm": 0.23266640305519104, "learning_rate": 2.69211691942669e-05, "loss": 0.1049, "step": 29600 }, { "epoch": 0.5279670388470731, "grad_norm": 0.2918422222137451, "learning_rate": 2.6919617285682487e-05, "loss": 0.1096, "step": 29601 }, { "epoch": 0.5279848749687868, "grad_norm": 0.3453911244869232, "learning_rate": 2.6918065369656982e-05, "loss": 0.1528, "step": 29602 }, { "epoch": 0.5280027110905005, "grad_norm": 0.24862413108348846, "learning_rate": 2.691651344619639e-05, "loss": 0.1857, "step": 29603 }, { "epoch": 0.5280205472122141, "grad_norm": 0.28764235973358154, "learning_rate": 2.6914961515306726e-05, "loss": 0.1483, "step": 29604 }, { "epoch": 0.5280383833339278, "grad_norm": 0.2671939730644226, "learning_rate": 2.6913409576994016e-05, "loss": 0.1891, "step": 29605 }, { "epoch": 0.5280562194556415, "grad_norm": 0.4348195493221283, "learning_rate": 2.691185763126427e-05, "loss": 0.13, "step": 29606 }, { "epoch": 0.5280740555773552, "grad_norm": 0.32455480098724365, "learning_rate": 2.6910305678123498e-05, "loss": 0.1296, "step": 29607 }, { "epoch": 0.528091891699069, "grad_norm": 0.38393643498420715, "learning_rate": 2.6908753717577728e-05, "loss": 0.1668, "step": 29608 }, { "epoch": 0.5281097278207827, "grad_norm": 0.2736952006816864, "learning_rate": 2.6907201749632966e-05, "loss": 0.1148, "step": 29609 }, { "epoch": 0.5281275639424964, "grad_norm": 0.34408512711524963, "learning_rate": 2.6905649774295232e-05, "loss": 0.1225, "step": 29610 }, { "epoch": 0.5281454000642101, "grad_norm": 0.2795182168483734, "learning_rate": 2.6904097791570538e-05, "loss": 0.1444, "step": 29611 }, { "epoch": 0.5281632361859238, "grad_norm": 0.24202504754066467, "learning_rate": 2.69025458014649e-05, "loss": 0.1344, "step": 29612 }, { "epoch": 0.5281810723076374, "grad_norm": 0.48582082986831665, "learning_rate": 2.6900993803984347e-05, "loss": 0.1911, "step": 29613 }, { "epoch": 0.5281989084293511, "grad_norm": 0.28955477476119995, "learning_rate": 2.6899441799134885e-05, "loss": 0.1472, "step": 29614 }, { "epoch": 0.5282167445510648, "grad_norm": 0.3031627833843231, "learning_rate": 2.6897889786922524e-05, "loss": 0.1929, "step": 29615 }, { "epoch": 0.5282345806727785, "grad_norm": 0.24173744022846222, "learning_rate": 2.689633776735329e-05, "loss": 0.1285, "step": 29616 }, { "epoch": 0.5282524167944922, "grad_norm": 0.23135331273078918, "learning_rate": 2.6894785740433198e-05, "loss": 0.1228, "step": 29617 }, { "epoch": 0.5282702529162059, "grad_norm": 0.2616306245326996, "learning_rate": 2.6893233706168253e-05, "loss": 0.1783, "step": 29618 }, { "epoch": 0.5282880890379196, "grad_norm": 0.23648065328598022, "learning_rate": 2.689168166456449e-05, "loss": 0.0934, "step": 29619 }, { "epoch": 0.5283059251596333, "grad_norm": 0.24911677837371826, "learning_rate": 2.689012961562791e-05, "loss": 0.104, "step": 29620 }, { "epoch": 0.528323761281347, "grad_norm": 0.2668570578098297, "learning_rate": 2.6888577559364542e-05, "loss": 0.1233, "step": 29621 }, { "epoch": 0.5283415974030606, "grad_norm": 0.3501882553100586, "learning_rate": 2.6887025495780392e-05, "loss": 0.1567, "step": 29622 }, { "epoch": 0.5283594335247743, "grad_norm": 0.22387148439884186, "learning_rate": 2.6885473424881475e-05, "loss": 0.1439, "step": 29623 }, { "epoch": 0.528377269646488, "grad_norm": 0.25209546089172363, "learning_rate": 2.6883921346673813e-05, "loss": 0.1373, "step": 29624 }, { "epoch": 0.5283951057682018, "grad_norm": 0.25188687443733215, "learning_rate": 2.688236926116342e-05, "loss": 0.1448, "step": 29625 }, { "epoch": 0.5284129418899155, "grad_norm": 0.3165016770362854, "learning_rate": 2.688081716835632e-05, "loss": 0.1273, "step": 29626 }, { "epoch": 0.5284307780116292, "grad_norm": 0.2478540688753128, "learning_rate": 2.687926506825852e-05, "loss": 0.184, "step": 29627 }, { "epoch": 0.5284486141333429, "grad_norm": 0.3281519114971161, "learning_rate": 2.687771296087604e-05, "loss": 0.1659, "step": 29628 }, { "epoch": 0.5284664502550566, "grad_norm": 0.23523716628551483, "learning_rate": 2.6876160846214892e-05, "loss": 0.1309, "step": 29629 }, { "epoch": 0.5284842863767703, "grad_norm": 0.4224299192428589, "learning_rate": 2.6874608724281102e-05, "loss": 0.1256, "step": 29630 }, { "epoch": 0.5285021224984839, "grad_norm": 0.33960893750190735, "learning_rate": 2.6873056595080674e-05, "loss": 0.1542, "step": 29631 }, { "epoch": 0.5285199586201976, "grad_norm": 0.16312552988529205, "learning_rate": 2.687150445861964e-05, "loss": 0.0935, "step": 29632 }, { "epoch": 0.5285377947419113, "grad_norm": 0.2274068146944046, "learning_rate": 2.6869952314904e-05, "loss": 0.119, "step": 29633 }, { "epoch": 0.528555630863625, "grad_norm": 0.2133219838142395, "learning_rate": 2.686840016393979e-05, "loss": 0.1243, "step": 29634 }, { "epoch": 0.5285734669853387, "grad_norm": 0.27035537362098694, "learning_rate": 2.686684800573301e-05, "loss": 0.1155, "step": 29635 }, { "epoch": 0.5285913031070524, "grad_norm": 0.2978620231151581, "learning_rate": 2.686529584028968e-05, "loss": 0.104, "step": 29636 }, { "epoch": 0.5286091392287661, "grad_norm": 0.37502193450927734, "learning_rate": 2.686374366761582e-05, "loss": 0.1485, "step": 29637 }, { "epoch": 0.5286269753504798, "grad_norm": 0.28957220911979675, "learning_rate": 2.6862191487717448e-05, "loss": 0.13, "step": 29638 }, { "epoch": 0.5286448114721934, "grad_norm": 0.2958044409751892, "learning_rate": 2.6860639300600577e-05, "loss": 0.1565, "step": 29639 }, { "epoch": 0.5286626475939071, "grad_norm": 0.2503563463687897, "learning_rate": 2.685908710627122e-05, "loss": 0.1419, "step": 29640 }, { "epoch": 0.5286804837156209, "grad_norm": 0.2666504681110382, "learning_rate": 2.685753490473541e-05, "loss": 0.1417, "step": 29641 }, { "epoch": 0.5286983198373346, "grad_norm": 0.33652621507644653, "learning_rate": 2.6855982695999142e-05, "loss": 0.141, "step": 29642 }, { "epoch": 0.5287161559590483, "grad_norm": 0.2587040960788727, "learning_rate": 2.6854430480068456e-05, "loss": 0.1308, "step": 29643 }, { "epoch": 0.528733992080762, "grad_norm": 0.31061822175979614, "learning_rate": 2.685287825694935e-05, "loss": 0.1143, "step": 29644 }, { "epoch": 0.5287518282024757, "grad_norm": 0.3000825345516205, "learning_rate": 2.685132602664785e-05, "loss": 0.1805, "step": 29645 }, { "epoch": 0.5287696643241894, "grad_norm": 0.1863195300102234, "learning_rate": 2.6849773789169963e-05, "loss": 0.0964, "step": 29646 }, { "epoch": 0.5287875004459031, "grad_norm": 0.23746301233768463, "learning_rate": 2.6848221544521722e-05, "loss": 0.1174, "step": 29647 }, { "epoch": 0.5288053365676167, "grad_norm": 0.23783206939697266, "learning_rate": 2.6846669292709135e-05, "loss": 0.1421, "step": 29648 }, { "epoch": 0.5288231726893304, "grad_norm": 0.25436627864837646, "learning_rate": 2.684511703373822e-05, "loss": 0.1211, "step": 29649 }, { "epoch": 0.5288410088110441, "grad_norm": 0.2817104160785675, "learning_rate": 2.6843564767615e-05, "loss": 0.1881, "step": 29650 }, { "epoch": 0.5288588449327578, "grad_norm": 0.30766186118125916, "learning_rate": 2.6842012494345475e-05, "loss": 0.1358, "step": 29651 }, { "epoch": 0.5288766810544715, "grad_norm": 0.20351248979568481, "learning_rate": 2.6840460213935676e-05, "loss": 0.1352, "step": 29652 }, { "epoch": 0.5288945171761852, "grad_norm": 0.21055904030799866, "learning_rate": 2.6838907926391614e-05, "loss": 0.1063, "step": 29653 }, { "epoch": 0.5289123532978989, "grad_norm": 0.35401269793510437, "learning_rate": 2.6837355631719325e-05, "loss": 0.2398, "step": 29654 }, { "epoch": 0.5289301894196126, "grad_norm": 0.2958665192127228, "learning_rate": 2.6835803329924798e-05, "loss": 0.1446, "step": 29655 }, { "epoch": 0.5289480255413262, "grad_norm": 0.27006688714027405, "learning_rate": 2.683425102101407e-05, "loss": 0.161, "step": 29656 }, { "epoch": 0.5289658616630399, "grad_norm": 0.20635706186294556, "learning_rate": 2.683269870499314e-05, "loss": 0.096, "step": 29657 }, { "epoch": 0.5289836977847537, "grad_norm": 0.22154195606708527, "learning_rate": 2.6831146381868054e-05, "loss": 0.1421, "step": 29658 }, { "epoch": 0.5290015339064674, "grad_norm": 0.23413850367069244, "learning_rate": 2.6829594051644803e-05, "loss": 0.1423, "step": 29659 }, { "epoch": 0.5290193700281811, "grad_norm": 0.24983985722064972, "learning_rate": 2.682804171432941e-05, "loss": 0.1313, "step": 29660 }, { "epoch": 0.5290372061498948, "grad_norm": 0.33375123143196106, "learning_rate": 2.68264893699279e-05, "loss": 0.2191, "step": 29661 }, { "epoch": 0.5290550422716085, "grad_norm": 0.27024921774864197, "learning_rate": 2.6824937018446283e-05, "loss": 0.1562, "step": 29662 }, { "epoch": 0.5290728783933222, "grad_norm": 0.25165677070617676, "learning_rate": 2.6823384659890587e-05, "loss": 0.0682, "step": 29663 }, { "epoch": 0.5290907145150359, "grad_norm": 0.2871391773223877, "learning_rate": 2.6821832294266817e-05, "loss": 0.1407, "step": 29664 }, { "epoch": 0.5291085506367496, "grad_norm": 0.2757948637008667, "learning_rate": 2.6820279921581e-05, "loss": 0.1626, "step": 29665 }, { "epoch": 0.5291263867584632, "grad_norm": 0.2632501423358917, "learning_rate": 2.6818727541839145e-05, "loss": 0.1607, "step": 29666 }, { "epoch": 0.5291442228801769, "grad_norm": 0.3199211359024048, "learning_rate": 2.681717515504727e-05, "loss": 0.1263, "step": 29667 }, { "epoch": 0.5291620590018906, "grad_norm": 0.3743148446083069, "learning_rate": 2.68156227612114e-05, "loss": 0.1301, "step": 29668 }, { "epoch": 0.5291798951236043, "grad_norm": 0.2938719689846039, "learning_rate": 2.6814070360337556e-05, "loss": 0.146, "step": 29669 }, { "epoch": 0.529197731245318, "grad_norm": 0.28218135237693787, "learning_rate": 2.6812517952431738e-05, "loss": 0.139, "step": 29670 }, { "epoch": 0.5292155673670317, "grad_norm": 0.31690269708633423, "learning_rate": 2.6810965537499982e-05, "loss": 0.1806, "step": 29671 }, { "epoch": 0.5292334034887454, "grad_norm": 0.22488749027252197, "learning_rate": 2.6809413115548298e-05, "loss": 0.1303, "step": 29672 }, { "epoch": 0.5292512396104591, "grad_norm": 0.2351931631565094, "learning_rate": 2.6807860686582696e-05, "loss": 0.1121, "step": 29673 }, { "epoch": 0.5292690757321727, "grad_norm": 0.23707011342048645, "learning_rate": 2.6806308250609212e-05, "loss": 0.178, "step": 29674 }, { "epoch": 0.5292869118538865, "grad_norm": 0.26332613825798035, "learning_rate": 2.6804755807633848e-05, "loss": 0.1472, "step": 29675 }, { "epoch": 0.5293047479756002, "grad_norm": 0.2231779396533966, "learning_rate": 2.6803203357662633e-05, "loss": 0.0987, "step": 29676 }, { "epoch": 0.5293225840973139, "grad_norm": 0.24613699316978455, "learning_rate": 2.680165090070157e-05, "loss": 0.0976, "step": 29677 }, { "epoch": 0.5293404202190276, "grad_norm": 0.20408323407173157, "learning_rate": 2.6800098436756692e-05, "loss": 0.1107, "step": 29678 }, { "epoch": 0.5293582563407413, "grad_norm": 0.24747255444526672, "learning_rate": 2.6798545965834005e-05, "loss": 0.1432, "step": 29679 }, { "epoch": 0.529376092462455, "grad_norm": 0.2198696732521057, "learning_rate": 2.679699348793953e-05, "loss": 0.131, "step": 29680 }, { "epoch": 0.5293939285841687, "grad_norm": 0.2480219304561615, "learning_rate": 2.6795441003079297e-05, "loss": 0.188, "step": 29681 }, { "epoch": 0.5294117647058824, "grad_norm": 0.22994859516620636, "learning_rate": 2.6793888511259314e-05, "loss": 0.1038, "step": 29682 }, { "epoch": 0.529429600827596, "grad_norm": 0.29792824387550354, "learning_rate": 2.679233601248559e-05, "loss": 0.1203, "step": 29683 }, { "epoch": 0.5294474369493097, "grad_norm": 0.21882866322994232, "learning_rate": 2.6790783506764167e-05, "loss": 0.1758, "step": 29684 }, { "epoch": 0.5294652730710234, "grad_norm": 0.275244802236557, "learning_rate": 2.6789230994101035e-05, "loss": 0.1348, "step": 29685 }, { "epoch": 0.5294831091927371, "grad_norm": 0.28200778365135193, "learning_rate": 2.6787678474502237e-05, "loss": 0.1247, "step": 29686 }, { "epoch": 0.5295009453144508, "grad_norm": 0.3244102895259857, "learning_rate": 2.678612594797377e-05, "loss": 0.1275, "step": 29687 }, { "epoch": 0.5295187814361645, "grad_norm": 0.31253206729888916, "learning_rate": 2.6784573414521662e-05, "loss": 0.1195, "step": 29688 }, { "epoch": 0.5295366175578782, "grad_norm": 0.21333463490009308, "learning_rate": 2.6783020874151943e-05, "loss": 0.1227, "step": 29689 }, { "epoch": 0.5295544536795919, "grad_norm": 0.29453352093696594, "learning_rate": 2.6781468326870607e-05, "loss": 0.1571, "step": 29690 }, { "epoch": 0.5295722898013056, "grad_norm": 0.3719899654388428, "learning_rate": 2.6779915772683694e-05, "loss": 0.1878, "step": 29691 }, { "epoch": 0.5295901259230194, "grad_norm": 0.20396637916564941, "learning_rate": 2.6778363211597202e-05, "loss": 0.0886, "step": 29692 }, { "epoch": 0.529607962044733, "grad_norm": 0.26895245909690857, "learning_rate": 2.6776810643617173e-05, "loss": 0.1078, "step": 29693 }, { "epoch": 0.5296257981664467, "grad_norm": 0.2998652458190918, "learning_rate": 2.6775258068749598e-05, "loss": 0.1118, "step": 29694 }, { "epoch": 0.5296436342881604, "grad_norm": 0.3036497235298157, "learning_rate": 2.6773705487000517e-05, "loss": 0.1487, "step": 29695 }, { "epoch": 0.5296614704098741, "grad_norm": 0.2659032642841339, "learning_rate": 2.6772152898375934e-05, "loss": 0.1517, "step": 29696 }, { "epoch": 0.5296793065315878, "grad_norm": 0.2947097420692444, "learning_rate": 2.6770600302881886e-05, "loss": 0.1667, "step": 29697 }, { "epoch": 0.5296971426533015, "grad_norm": 0.3916162848472595, "learning_rate": 2.6769047700524375e-05, "loss": 0.1186, "step": 29698 }, { "epoch": 0.5297149787750152, "grad_norm": 0.3474830687046051, "learning_rate": 2.6767495091309424e-05, "loss": 0.1478, "step": 29699 }, { "epoch": 0.5297328148967289, "grad_norm": 0.32286831736564636, "learning_rate": 2.676594247524305e-05, "loss": 0.1396, "step": 29700 }, { "epoch": 0.5297506510184425, "grad_norm": 0.43405163288116455, "learning_rate": 2.6764389852331275e-05, "loss": 0.1474, "step": 29701 }, { "epoch": 0.5297684871401562, "grad_norm": 0.2987808287143707, "learning_rate": 2.676283722258011e-05, "loss": 0.1519, "step": 29702 }, { "epoch": 0.5297863232618699, "grad_norm": 0.2750794291496277, "learning_rate": 2.6761284585995584e-05, "loss": 0.175, "step": 29703 }, { "epoch": 0.5298041593835836, "grad_norm": 0.238461434841156, "learning_rate": 2.6759731942583714e-05, "loss": 0.1357, "step": 29704 }, { "epoch": 0.5298219955052973, "grad_norm": 0.3430945873260498, "learning_rate": 2.675817929235051e-05, "loss": 0.1359, "step": 29705 }, { "epoch": 0.529839831627011, "grad_norm": 0.2710132300853729, "learning_rate": 2.6756626635302e-05, "loss": 0.1154, "step": 29706 }, { "epoch": 0.5298576677487247, "grad_norm": 0.2584246098995209, "learning_rate": 2.6755073971444195e-05, "loss": 0.0837, "step": 29707 }, { "epoch": 0.5298755038704384, "grad_norm": 0.20218966901302338, "learning_rate": 2.6753521300783112e-05, "loss": 0.046, "step": 29708 }, { "epoch": 0.5298933399921522, "grad_norm": 0.3276652693748474, "learning_rate": 2.675196862332478e-05, "loss": 0.1813, "step": 29709 }, { "epoch": 0.5299111761138658, "grad_norm": 0.6983379125595093, "learning_rate": 2.6750415939075218e-05, "loss": 0.1238, "step": 29710 }, { "epoch": 0.5299290122355795, "grad_norm": 0.24598878622055054, "learning_rate": 2.674886324804043e-05, "loss": 0.1885, "step": 29711 }, { "epoch": 0.5299468483572932, "grad_norm": 0.2559693157672882, "learning_rate": 2.674731055022645e-05, "loss": 0.1202, "step": 29712 }, { "epoch": 0.5299646844790069, "grad_norm": 0.24974419176578522, "learning_rate": 2.6745757845639297e-05, "loss": 0.1501, "step": 29713 }, { "epoch": 0.5299825206007206, "grad_norm": 0.2565857768058777, "learning_rate": 2.6744205134284972e-05, "loss": 0.0881, "step": 29714 }, { "epoch": 0.5300003567224343, "grad_norm": 0.25720903277397156, "learning_rate": 2.6742652416169506e-05, "loss": 0.1274, "step": 29715 }, { "epoch": 0.530018192844148, "grad_norm": 0.22872160375118256, "learning_rate": 2.6741099691298914e-05, "loss": 0.0985, "step": 29716 }, { "epoch": 0.5300360289658617, "grad_norm": 0.25498610734939575, "learning_rate": 2.6739546959679228e-05, "loss": 0.0875, "step": 29717 }, { "epoch": 0.5300538650875753, "grad_norm": 0.34247514605522156, "learning_rate": 2.6737994221316452e-05, "loss": 0.1209, "step": 29718 }, { "epoch": 0.530071701209289, "grad_norm": 0.24290986359119415, "learning_rate": 2.6736441476216616e-05, "loss": 0.1383, "step": 29719 }, { "epoch": 0.5300895373310027, "grad_norm": 0.28888919949531555, "learning_rate": 2.6734888724385732e-05, "loss": 0.1683, "step": 29720 }, { "epoch": 0.5301073734527164, "grad_norm": 0.30773797631263733, "learning_rate": 2.673333596582982e-05, "loss": 0.1082, "step": 29721 }, { "epoch": 0.5301252095744301, "grad_norm": 0.2061411738395691, "learning_rate": 2.6731783200554895e-05, "loss": 0.1251, "step": 29722 }, { "epoch": 0.5301430456961438, "grad_norm": 0.3899582624435425, "learning_rate": 2.6730230428566978e-05, "loss": 0.1771, "step": 29723 }, { "epoch": 0.5301608818178575, "grad_norm": 0.2877986431121826, "learning_rate": 2.672867764987209e-05, "loss": 0.1336, "step": 29724 }, { "epoch": 0.5301787179395712, "grad_norm": 0.24312570691108704, "learning_rate": 2.6727124864476255e-05, "loss": 0.1526, "step": 29725 }, { "epoch": 0.530196554061285, "grad_norm": 0.27368050813674927, "learning_rate": 2.6725572072385485e-05, "loss": 0.0723, "step": 29726 }, { "epoch": 0.5302143901829987, "grad_norm": 0.227010577917099, "learning_rate": 2.672401927360581e-05, "loss": 0.0941, "step": 29727 }, { "epoch": 0.5302322263047123, "grad_norm": 0.3303409516811371, "learning_rate": 2.6722466468143232e-05, "loss": 0.1767, "step": 29728 }, { "epoch": 0.530250062426426, "grad_norm": 0.21052126586437225, "learning_rate": 2.6720913656003778e-05, "loss": 0.1243, "step": 29729 }, { "epoch": 0.5302678985481397, "grad_norm": 0.2581946551799774, "learning_rate": 2.6719360837193475e-05, "loss": 0.1482, "step": 29730 }, { "epoch": 0.5302857346698534, "grad_norm": 0.22533701360225677, "learning_rate": 2.6717808011718327e-05, "loss": 0.1331, "step": 29731 }, { "epoch": 0.5303035707915671, "grad_norm": 0.26006996631622314, "learning_rate": 2.6716255179584372e-05, "loss": 0.1714, "step": 29732 }, { "epoch": 0.5303214069132808, "grad_norm": 0.27289044857025146, "learning_rate": 2.671470234079761e-05, "loss": 0.1056, "step": 29733 }, { "epoch": 0.5303392430349945, "grad_norm": 0.37725868821144104, "learning_rate": 2.6713149495364076e-05, "loss": 0.1461, "step": 29734 }, { "epoch": 0.5303570791567082, "grad_norm": 0.2387055903673172, "learning_rate": 2.6711596643289783e-05, "loss": 0.1111, "step": 29735 }, { "epoch": 0.5303749152784218, "grad_norm": 0.28241604566574097, "learning_rate": 2.671004378458074e-05, "loss": 0.174, "step": 29736 }, { "epoch": 0.5303927514001355, "grad_norm": 0.25692611932754517, "learning_rate": 2.6708490919242983e-05, "loss": 0.1295, "step": 29737 }, { "epoch": 0.5304105875218492, "grad_norm": 0.24634061753749847, "learning_rate": 2.6706938047282532e-05, "loss": 0.1471, "step": 29738 }, { "epoch": 0.5304284236435629, "grad_norm": 0.22360451519489288, "learning_rate": 2.6705385168705388e-05, "loss": 0.1389, "step": 29739 }, { "epoch": 0.5304462597652766, "grad_norm": 0.26613274216651917, "learning_rate": 2.6703832283517595e-05, "loss": 0.12, "step": 29740 }, { "epoch": 0.5304640958869903, "grad_norm": 0.32375869154930115, "learning_rate": 2.6702279391725155e-05, "loss": 0.1344, "step": 29741 }, { "epoch": 0.5304819320087041, "grad_norm": 0.2760258913040161, "learning_rate": 2.6700726493334087e-05, "loss": 0.1294, "step": 29742 }, { "epoch": 0.5304997681304178, "grad_norm": 0.3204035758972168, "learning_rate": 2.6699173588350416e-05, "loss": 0.1836, "step": 29743 }, { "epoch": 0.5305176042521315, "grad_norm": 0.28693145513534546, "learning_rate": 2.669762067678016e-05, "loss": 0.1686, "step": 29744 }, { "epoch": 0.5305354403738451, "grad_norm": 0.35571402311325073, "learning_rate": 2.669606775862935e-05, "loss": 0.1634, "step": 29745 }, { "epoch": 0.5305532764955588, "grad_norm": 0.19635014235973358, "learning_rate": 2.6694514833903982e-05, "loss": 0.0426, "step": 29746 }, { "epoch": 0.5305711126172725, "grad_norm": 0.5483512282371521, "learning_rate": 2.6692961902610103e-05, "loss": 0.2172, "step": 29747 }, { "epoch": 0.5305889487389862, "grad_norm": 0.20793403685092926, "learning_rate": 2.669140896475371e-05, "loss": 0.0944, "step": 29748 }, { "epoch": 0.5306067848606999, "grad_norm": 0.2743823826313019, "learning_rate": 2.6689856020340835e-05, "loss": 0.1246, "step": 29749 }, { "epoch": 0.5306246209824136, "grad_norm": 0.27652284502983093, "learning_rate": 2.6688303069377492e-05, "loss": 0.1256, "step": 29750 }, { "epoch": 0.5306424571041273, "grad_norm": 0.24813875555992126, "learning_rate": 2.66867501118697e-05, "loss": 0.1316, "step": 29751 }, { "epoch": 0.530660293225841, "grad_norm": 0.19636662304401398, "learning_rate": 2.6685197147823487e-05, "loss": 0.1251, "step": 29752 }, { "epoch": 0.5306781293475547, "grad_norm": 0.3265813887119293, "learning_rate": 2.668364417724487e-05, "loss": 0.1243, "step": 29753 }, { "epoch": 0.5306959654692683, "grad_norm": 0.28732502460479736, "learning_rate": 2.668209120013987e-05, "loss": 0.1251, "step": 29754 }, { "epoch": 0.530713801590982, "grad_norm": 0.2857404351234436, "learning_rate": 2.6680538216514493e-05, "loss": 0.1169, "step": 29755 }, { "epoch": 0.5307316377126957, "grad_norm": 0.2612438201904297, "learning_rate": 2.6678985226374775e-05, "loss": 0.0855, "step": 29756 }, { "epoch": 0.5307494738344094, "grad_norm": 0.338833212852478, "learning_rate": 2.6677432229726723e-05, "loss": 0.1922, "step": 29757 }, { "epoch": 0.5307673099561231, "grad_norm": 0.1790684461593628, "learning_rate": 2.667587922657638e-05, "loss": 0.1214, "step": 29758 }, { "epoch": 0.5307851460778369, "grad_norm": 0.28192517161369324, "learning_rate": 2.6674326216929736e-05, "loss": 0.1728, "step": 29759 }, { "epoch": 0.5308029821995506, "grad_norm": 0.2998497784137726, "learning_rate": 2.6672773200792832e-05, "loss": 0.1297, "step": 29760 }, { "epoch": 0.5308208183212643, "grad_norm": 0.16805927455425262, "learning_rate": 2.667122017817168e-05, "loss": 0.1043, "step": 29761 }, { "epoch": 0.530838654442978, "grad_norm": 0.2797226011753082, "learning_rate": 2.6669667149072303e-05, "loss": 0.1465, "step": 29762 }, { "epoch": 0.5308564905646916, "grad_norm": 0.27864915132522583, "learning_rate": 2.6668114113500715e-05, "loss": 0.1243, "step": 29763 }, { "epoch": 0.5308743266864053, "grad_norm": 0.2488710731267929, "learning_rate": 2.6666561071462943e-05, "loss": 0.1477, "step": 29764 }, { "epoch": 0.530892162808119, "grad_norm": 0.25625959038734436, "learning_rate": 2.6665008022965e-05, "loss": 0.1583, "step": 29765 }, { "epoch": 0.5309099989298327, "grad_norm": 0.24668513238430023, "learning_rate": 2.666345496801292e-05, "loss": 0.1245, "step": 29766 }, { "epoch": 0.5309278350515464, "grad_norm": 0.3247588872909546, "learning_rate": 2.6661901906612712e-05, "loss": 0.0717, "step": 29767 }, { "epoch": 0.5309456711732601, "grad_norm": 0.24523504078388214, "learning_rate": 2.6660348838770392e-05, "loss": 0.1394, "step": 29768 }, { "epoch": 0.5309635072949738, "grad_norm": 0.3074474334716797, "learning_rate": 2.6658795764491995e-05, "loss": 0.1392, "step": 29769 }, { "epoch": 0.5309813434166875, "grad_norm": 0.24126280844211578, "learning_rate": 2.6657242683783523e-05, "loss": 0.1548, "step": 29770 }, { "epoch": 0.5309991795384011, "grad_norm": 0.21336881816387177, "learning_rate": 2.6655689596651014e-05, "loss": 0.0555, "step": 29771 }, { "epoch": 0.5310170156601148, "grad_norm": 0.23083241283893585, "learning_rate": 2.665413650310047e-05, "loss": 0.1052, "step": 29772 }, { "epoch": 0.5310348517818285, "grad_norm": 0.42378973960876465, "learning_rate": 2.665258340313793e-05, "loss": 0.1737, "step": 29773 }, { "epoch": 0.5310526879035422, "grad_norm": 0.2683773338794708, "learning_rate": 2.6651030296769402e-05, "loss": 0.0945, "step": 29774 }, { "epoch": 0.5310705240252559, "grad_norm": 0.25914040207862854, "learning_rate": 2.664947718400092e-05, "loss": 0.1695, "step": 29775 }, { "epoch": 0.5310883601469697, "grad_norm": 0.2754868268966675, "learning_rate": 2.6647924064838487e-05, "loss": 0.1908, "step": 29776 }, { "epoch": 0.5311061962686834, "grad_norm": 0.277584046125412, "learning_rate": 2.6646370939288128e-05, "loss": 0.1157, "step": 29777 }, { "epoch": 0.5311240323903971, "grad_norm": 0.2447315752506256, "learning_rate": 2.6644817807355867e-05, "loss": 0.1698, "step": 29778 }, { "epoch": 0.5311418685121108, "grad_norm": 0.25382477045059204, "learning_rate": 2.6643264669047728e-05, "loss": 0.1165, "step": 29779 }, { "epoch": 0.5311597046338244, "grad_norm": 0.22990615665912628, "learning_rate": 2.6641711524369735e-05, "loss": 0.1301, "step": 29780 }, { "epoch": 0.5311775407555381, "grad_norm": 0.27463415265083313, "learning_rate": 2.664015837332789e-05, "loss": 0.122, "step": 29781 }, { "epoch": 0.5311953768772518, "grad_norm": 0.28555572032928467, "learning_rate": 2.663860521592823e-05, "loss": 0.1084, "step": 29782 }, { "epoch": 0.5312132129989655, "grad_norm": 0.26648399233818054, "learning_rate": 2.6637052052176764e-05, "loss": 0.1537, "step": 29783 }, { "epoch": 0.5312310491206792, "grad_norm": 0.35596129298210144, "learning_rate": 2.6635498882079522e-05, "loss": 0.1812, "step": 29784 }, { "epoch": 0.5312488852423929, "grad_norm": 0.3660792112350464, "learning_rate": 2.663394570564252e-05, "loss": 0.2194, "step": 29785 }, { "epoch": 0.5312667213641066, "grad_norm": 0.35417160391807556, "learning_rate": 2.6632392522871786e-05, "loss": 0.1558, "step": 29786 }, { "epoch": 0.5312845574858203, "grad_norm": 0.2802301347255707, "learning_rate": 2.663083933377333e-05, "loss": 0.1853, "step": 29787 }, { "epoch": 0.531302393607534, "grad_norm": 0.20734675228595734, "learning_rate": 2.6629286138353184e-05, "loss": 0.1531, "step": 29788 }, { "epoch": 0.5313202297292476, "grad_norm": 0.28117266297340393, "learning_rate": 2.662773293661735e-05, "loss": 0.1478, "step": 29789 }, { "epoch": 0.5313380658509613, "grad_norm": 0.27478399872779846, "learning_rate": 2.662617972857187e-05, "loss": 0.1511, "step": 29790 }, { "epoch": 0.531355901972675, "grad_norm": 0.22104164958000183, "learning_rate": 2.662462651422275e-05, "loss": 0.138, "step": 29791 }, { "epoch": 0.5313737380943887, "grad_norm": 0.23585538566112518, "learning_rate": 2.6623073293576018e-05, "loss": 0.1246, "step": 29792 }, { "epoch": 0.5313915742161025, "grad_norm": 0.22908110916614532, "learning_rate": 2.6621520066637702e-05, "loss": 0.1173, "step": 29793 }, { "epoch": 0.5314094103378162, "grad_norm": 0.29057371616363525, "learning_rate": 2.6619966833413802e-05, "loss": 0.1129, "step": 29794 }, { "epoch": 0.5314272464595299, "grad_norm": 0.23905280232429504, "learning_rate": 2.6618413593910363e-05, "loss": 0.1638, "step": 29795 }, { "epoch": 0.5314450825812436, "grad_norm": 0.2564665675163269, "learning_rate": 2.6616860348133384e-05, "loss": 0.0953, "step": 29796 }, { "epoch": 0.5314629187029573, "grad_norm": 0.2837539613246918, "learning_rate": 2.6615307096088903e-05, "loss": 0.1605, "step": 29797 }, { "epoch": 0.5314807548246709, "grad_norm": 0.29691603779792786, "learning_rate": 2.6613753837782928e-05, "loss": 0.0925, "step": 29798 }, { "epoch": 0.5314985909463846, "grad_norm": 0.2663421332836151, "learning_rate": 2.6612200573221486e-05, "loss": 0.1285, "step": 29799 }, { "epoch": 0.5315164270680983, "grad_norm": 0.3599540889263153, "learning_rate": 2.6610647302410597e-05, "loss": 0.1512, "step": 29800 }, { "epoch": 0.531534263189812, "grad_norm": 0.23450668156147003, "learning_rate": 2.6609094025356286e-05, "loss": 0.1182, "step": 29801 }, { "epoch": 0.5315520993115257, "grad_norm": 0.2937523126602173, "learning_rate": 2.6607540742064567e-05, "loss": 0.1358, "step": 29802 }, { "epoch": 0.5315699354332394, "grad_norm": 0.22504177689552307, "learning_rate": 2.6605987452541474e-05, "loss": 0.1122, "step": 29803 }, { "epoch": 0.5315877715549531, "grad_norm": 0.2189985066652298, "learning_rate": 2.6604434156793014e-05, "loss": 0.0763, "step": 29804 }, { "epoch": 0.5316056076766668, "grad_norm": 0.2387552559375763, "learning_rate": 2.6602880854825208e-05, "loss": 0.0898, "step": 29805 }, { "epoch": 0.5316234437983804, "grad_norm": 0.23725664615631104, "learning_rate": 2.6601327546644083e-05, "loss": 0.1453, "step": 29806 }, { "epoch": 0.5316412799200941, "grad_norm": 0.2280396670103073, "learning_rate": 2.6599774232255663e-05, "loss": 0.1296, "step": 29807 }, { "epoch": 0.5316591160418078, "grad_norm": 0.3512893319129944, "learning_rate": 2.6598220911665973e-05, "loss": 0.0848, "step": 29808 }, { "epoch": 0.5316769521635215, "grad_norm": 0.26400449872016907, "learning_rate": 2.6596667584881012e-05, "loss": 0.1198, "step": 29809 }, { "epoch": 0.5316947882852353, "grad_norm": 0.368965744972229, "learning_rate": 2.6595114251906827e-05, "loss": 0.1685, "step": 29810 }, { "epoch": 0.531712624406949, "grad_norm": 0.25596120953559875, "learning_rate": 2.6593560912749422e-05, "loss": 0.1499, "step": 29811 }, { "epoch": 0.5317304605286627, "grad_norm": 0.3207376301288605, "learning_rate": 2.6592007567414823e-05, "loss": 0.1718, "step": 29812 }, { "epoch": 0.5317482966503764, "grad_norm": 0.2451477348804474, "learning_rate": 2.6590454215909054e-05, "loss": 0.1451, "step": 29813 }, { "epoch": 0.5317661327720901, "grad_norm": 0.24433328211307526, "learning_rate": 2.658890085823814e-05, "loss": 0.1233, "step": 29814 }, { "epoch": 0.5317839688938037, "grad_norm": 0.21517139673233032, "learning_rate": 2.6587347494408094e-05, "loss": 0.1412, "step": 29815 }, { "epoch": 0.5318018050155174, "grad_norm": 0.2929937243461609, "learning_rate": 2.6585794124424944e-05, "loss": 0.1371, "step": 29816 }, { "epoch": 0.5318196411372311, "grad_norm": 0.3339700400829315, "learning_rate": 2.6584240748294704e-05, "loss": 0.1404, "step": 29817 }, { "epoch": 0.5318374772589448, "grad_norm": 0.2703548073768616, "learning_rate": 2.6582687366023407e-05, "loss": 0.1635, "step": 29818 }, { "epoch": 0.5318553133806585, "grad_norm": 0.21247000992298126, "learning_rate": 2.6581133977617058e-05, "loss": 0.1018, "step": 29819 }, { "epoch": 0.5318731495023722, "grad_norm": 0.27769261598587036, "learning_rate": 2.6579580583081686e-05, "loss": 0.1979, "step": 29820 }, { "epoch": 0.5318909856240859, "grad_norm": 0.283643513917923, "learning_rate": 2.6578027182423327e-05, "loss": 0.1044, "step": 29821 }, { "epoch": 0.5319088217457996, "grad_norm": 0.2439984828233719, "learning_rate": 2.657647377564798e-05, "loss": 0.1181, "step": 29822 }, { "epoch": 0.5319266578675133, "grad_norm": 0.27115002274513245, "learning_rate": 2.657492036276168e-05, "loss": 0.0853, "step": 29823 }, { "epoch": 0.5319444939892269, "grad_norm": 0.31924304366111755, "learning_rate": 2.6573366943770438e-05, "loss": 0.1259, "step": 29824 }, { "epoch": 0.5319623301109406, "grad_norm": 0.24381551146507263, "learning_rate": 2.6571813518680294e-05, "loss": 0.1525, "step": 29825 }, { "epoch": 0.5319801662326543, "grad_norm": 0.2539726197719574, "learning_rate": 2.6570260087497246e-05, "loss": 0.092, "step": 29826 }, { "epoch": 0.5319980023543681, "grad_norm": 0.31162208318710327, "learning_rate": 2.656870665022733e-05, "loss": 0.1345, "step": 29827 }, { "epoch": 0.5320158384760818, "grad_norm": 0.2673042416572571, "learning_rate": 2.6567153206876566e-05, "loss": 0.17, "step": 29828 }, { "epoch": 0.5320336745977955, "grad_norm": 0.287284255027771, "learning_rate": 2.6565599757450982e-05, "loss": 0.1497, "step": 29829 }, { "epoch": 0.5320515107195092, "grad_norm": 0.23218514025211334, "learning_rate": 2.6564046301956584e-05, "loss": 0.1215, "step": 29830 }, { "epoch": 0.5320693468412229, "grad_norm": 0.23794060945510864, "learning_rate": 2.6562492840399412e-05, "loss": 0.1534, "step": 29831 }, { "epoch": 0.5320871829629366, "grad_norm": 0.2640847861766815, "learning_rate": 2.6560939372785467e-05, "loss": 0.1588, "step": 29832 }, { "epoch": 0.5321050190846502, "grad_norm": 0.2858348786830902, "learning_rate": 2.6559385899120785e-05, "loss": 0.1343, "step": 29833 }, { "epoch": 0.5321228552063639, "grad_norm": 0.17319445312023163, "learning_rate": 2.655783241941139e-05, "loss": 0.08, "step": 29834 }, { "epoch": 0.5321406913280776, "grad_norm": 0.4091813564300537, "learning_rate": 2.655627893366329e-05, "loss": 0.1332, "step": 29835 }, { "epoch": 0.5321585274497913, "grad_norm": 0.22048220038414001, "learning_rate": 2.6554725441882526e-05, "loss": 0.1042, "step": 29836 }, { "epoch": 0.532176363571505, "grad_norm": 0.27298569679260254, "learning_rate": 2.65531719440751e-05, "loss": 0.124, "step": 29837 }, { "epoch": 0.5321941996932187, "grad_norm": 0.3180537521839142, "learning_rate": 2.655161844024705e-05, "loss": 0.1959, "step": 29838 }, { "epoch": 0.5322120358149324, "grad_norm": 0.3155593276023865, "learning_rate": 2.6550064930404384e-05, "loss": 0.16, "step": 29839 }, { "epoch": 0.5322298719366461, "grad_norm": 0.2793496251106262, "learning_rate": 2.6548511414553136e-05, "loss": 0.1217, "step": 29840 }, { "epoch": 0.5322477080583597, "grad_norm": 0.2504490613937378, "learning_rate": 2.6546957892699313e-05, "loss": 0.143, "step": 29841 }, { "epoch": 0.5322655441800734, "grad_norm": 0.21099653840065002, "learning_rate": 2.6545404364848965e-05, "loss": 0.1174, "step": 29842 }, { "epoch": 0.5322833803017872, "grad_norm": 0.26392391324043274, "learning_rate": 2.654385083100808e-05, "loss": 0.1004, "step": 29843 }, { "epoch": 0.5323012164235009, "grad_norm": 0.23668253421783447, "learning_rate": 2.6542297291182704e-05, "loss": 0.1493, "step": 29844 }, { "epoch": 0.5323190525452146, "grad_norm": 0.2799191474914551, "learning_rate": 2.6540743745378855e-05, "loss": 0.1354, "step": 29845 }, { "epoch": 0.5323368886669283, "grad_norm": 0.23976179957389832, "learning_rate": 2.653919019360254e-05, "loss": 0.1821, "step": 29846 }, { "epoch": 0.532354724788642, "grad_norm": 0.28765764832496643, "learning_rate": 2.6537636635859797e-05, "loss": 0.0989, "step": 29847 }, { "epoch": 0.5323725609103557, "grad_norm": 0.35809677839279175, "learning_rate": 2.653608307215664e-05, "loss": 0.1578, "step": 29848 }, { "epoch": 0.5323903970320694, "grad_norm": 0.44214388728141785, "learning_rate": 2.6534529502499105e-05, "loss": 0.1761, "step": 29849 }, { "epoch": 0.532408233153783, "grad_norm": 0.38853806257247925, "learning_rate": 2.6532975926893195e-05, "loss": 0.1776, "step": 29850 }, { "epoch": 0.5324260692754967, "grad_norm": 0.2627245783805847, "learning_rate": 2.6531422345344943e-05, "loss": 0.144, "step": 29851 }, { "epoch": 0.5324439053972104, "grad_norm": 0.26276645064353943, "learning_rate": 2.6529868757860365e-05, "loss": 0.1406, "step": 29852 }, { "epoch": 0.5324617415189241, "grad_norm": 0.27102428674697876, "learning_rate": 2.6528315164445493e-05, "loss": 0.199, "step": 29853 }, { "epoch": 0.5324795776406378, "grad_norm": 0.21290412545204163, "learning_rate": 2.652676156510634e-05, "loss": 0.1238, "step": 29854 }, { "epoch": 0.5324974137623515, "grad_norm": 0.18232671916484833, "learning_rate": 2.6525207959848934e-05, "loss": 0.1155, "step": 29855 }, { "epoch": 0.5325152498840652, "grad_norm": 0.2418159693479538, "learning_rate": 2.6523654348679296e-05, "loss": 0.1393, "step": 29856 }, { "epoch": 0.5325330860057789, "grad_norm": 0.30120325088500977, "learning_rate": 2.6522100731603446e-05, "loss": 0.1769, "step": 29857 }, { "epoch": 0.5325509221274926, "grad_norm": 0.48727932572364807, "learning_rate": 2.6520547108627408e-05, "loss": 0.1995, "step": 29858 }, { "epoch": 0.5325687582492062, "grad_norm": 0.22200995683670044, "learning_rate": 2.6518993479757204e-05, "loss": 0.1206, "step": 29859 }, { "epoch": 0.53258659437092, "grad_norm": 0.42962202429771423, "learning_rate": 2.6517439844998853e-05, "loss": 0.161, "step": 29860 }, { "epoch": 0.5326044304926337, "grad_norm": 0.3077550232410431, "learning_rate": 2.6515886204358387e-05, "loss": 0.1597, "step": 29861 }, { "epoch": 0.5326222666143474, "grad_norm": 0.2222198098897934, "learning_rate": 2.651433255784182e-05, "loss": 0.1206, "step": 29862 }, { "epoch": 0.5326401027360611, "grad_norm": 0.21353916823863983, "learning_rate": 2.6512778905455176e-05, "loss": 0.1198, "step": 29863 }, { "epoch": 0.5326579388577748, "grad_norm": 0.3128848373889923, "learning_rate": 2.6511225247204485e-05, "loss": 0.1392, "step": 29864 }, { "epoch": 0.5326757749794885, "grad_norm": 0.2974700629711151, "learning_rate": 2.650967158309575e-05, "loss": 0.1377, "step": 29865 }, { "epoch": 0.5326936111012022, "grad_norm": 0.2117987871170044, "learning_rate": 2.6508117913135023e-05, "loss": 0.1397, "step": 29866 }, { "epoch": 0.5327114472229159, "grad_norm": 0.27953919768333435, "learning_rate": 2.6506564237328297e-05, "loss": 0.1442, "step": 29867 }, { "epoch": 0.5327292833446295, "grad_norm": 0.18889255821704865, "learning_rate": 2.6505010555681608e-05, "loss": 0.139, "step": 29868 }, { "epoch": 0.5327471194663432, "grad_norm": 0.2776961028575897, "learning_rate": 2.6503456868200983e-05, "loss": 0.146, "step": 29869 }, { "epoch": 0.5327649555880569, "grad_norm": 0.2660003900527954, "learning_rate": 2.650190317489244e-05, "loss": 0.153, "step": 29870 }, { "epoch": 0.5327827917097706, "grad_norm": 0.25715041160583496, "learning_rate": 2.6500349475762003e-05, "loss": 0.1067, "step": 29871 }, { "epoch": 0.5328006278314843, "grad_norm": 0.21787290275096893, "learning_rate": 2.6498795770815694e-05, "loss": 0.0863, "step": 29872 }, { "epoch": 0.532818463953198, "grad_norm": 0.2636333405971527, "learning_rate": 2.6497242060059534e-05, "loss": 0.0907, "step": 29873 }, { "epoch": 0.5328363000749117, "grad_norm": 0.2915153503417969, "learning_rate": 2.6495688343499543e-05, "loss": 0.121, "step": 29874 }, { "epoch": 0.5328541361966254, "grad_norm": 0.2586997449398041, "learning_rate": 2.6494134621141743e-05, "loss": 0.1219, "step": 29875 }, { "epoch": 0.532871972318339, "grad_norm": 0.2664002478122711, "learning_rate": 2.6492580892992165e-05, "loss": 0.1605, "step": 29876 }, { "epoch": 0.5328898084400528, "grad_norm": 0.29478463530540466, "learning_rate": 2.6491027159056834e-05, "loss": 0.1458, "step": 29877 }, { "epoch": 0.5329076445617665, "grad_norm": 0.20806017518043518, "learning_rate": 2.648947341934176e-05, "loss": 0.1218, "step": 29878 }, { "epoch": 0.5329254806834802, "grad_norm": 0.2276647537946701, "learning_rate": 2.6487919673852978e-05, "loss": 0.1051, "step": 29879 }, { "epoch": 0.5329433168051939, "grad_norm": 0.31864044070243835, "learning_rate": 2.64863659225965e-05, "loss": 0.181, "step": 29880 }, { "epoch": 0.5329611529269076, "grad_norm": 0.24166466295719147, "learning_rate": 2.648481216557836e-05, "loss": 0.1445, "step": 29881 }, { "epoch": 0.5329789890486213, "grad_norm": 0.2853158116340637, "learning_rate": 2.6483258402804566e-05, "loss": 0.108, "step": 29882 }, { "epoch": 0.532996825170335, "grad_norm": 0.32880693674087524, "learning_rate": 2.6481704634281163e-05, "loss": 0.1699, "step": 29883 }, { "epoch": 0.5330146612920487, "grad_norm": 0.24686409533023834, "learning_rate": 2.648015086001415e-05, "loss": 0.1173, "step": 29884 }, { "epoch": 0.5330324974137624, "grad_norm": 0.3073842525482178, "learning_rate": 2.647859708000957e-05, "loss": 0.1895, "step": 29885 }, { "epoch": 0.533050333535476, "grad_norm": 0.2646964490413666, "learning_rate": 2.6477043294273434e-05, "loss": 0.1197, "step": 29886 }, { "epoch": 0.5330681696571897, "grad_norm": 0.3958754241466522, "learning_rate": 2.6475489502811762e-05, "loss": 0.1571, "step": 29887 }, { "epoch": 0.5330860057789034, "grad_norm": 0.24821855127811432, "learning_rate": 2.6473935705630588e-05, "loss": 0.1514, "step": 29888 }, { "epoch": 0.5331038419006171, "grad_norm": 0.24609972536563873, "learning_rate": 2.6472381902735927e-05, "loss": 0.1458, "step": 29889 }, { "epoch": 0.5331216780223308, "grad_norm": 0.25449633598327637, "learning_rate": 2.6470828094133808e-05, "loss": 0.1124, "step": 29890 }, { "epoch": 0.5331395141440445, "grad_norm": 0.27728673815727234, "learning_rate": 2.6469274279830253e-05, "loss": 0.0901, "step": 29891 }, { "epoch": 0.5331573502657582, "grad_norm": 0.3672589063644409, "learning_rate": 2.6467720459831285e-05, "loss": 0.1397, "step": 29892 }, { "epoch": 0.5331751863874719, "grad_norm": 0.2867465317249298, "learning_rate": 2.646616663414292e-05, "loss": 0.1947, "step": 29893 }, { "epoch": 0.5331930225091857, "grad_norm": 0.40466591715812683, "learning_rate": 2.6464612802771193e-05, "loss": 0.1654, "step": 29894 }, { "epoch": 0.5332108586308993, "grad_norm": 0.2633300721645355, "learning_rate": 2.6463058965722113e-05, "loss": 0.1259, "step": 29895 }, { "epoch": 0.533228694752613, "grad_norm": 0.17879962921142578, "learning_rate": 2.646150512300171e-05, "loss": 0.1153, "step": 29896 }, { "epoch": 0.5332465308743267, "grad_norm": 0.2622550427913666, "learning_rate": 2.645995127461602e-05, "loss": 0.1032, "step": 29897 }, { "epoch": 0.5332643669960404, "grad_norm": 0.3151096701622009, "learning_rate": 2.6458397420571048e-05, "loss": 0.1808, "step": 29898 }, { "epoch": 0.5332822031177541, "grad_norm": 0.2530626058578491, "learning_rate": 2.6456843560872825e-05, "loss": 0.1445, "step": 29899 }, { "epoch": 0.5333000392394678, "grad_norm": 0.2448253482580185, "learning_rate": 2.645528969552737e-05, "loss": 0.1322, "step": 29900 }, { "epoch": 0.5333178753611815, "grad_norm": 0.2782707214355469, "learning_rate": 2.6453735824540715e-05, "loss": 0.1103, "step": 29901 }, { "epoch": 0.5333357114828952, "grad_norm": 0.265190988779068, "learning_rate": 2.645218194791887e-05, "loss": 0.1508, "step": 29902 }, { "epoch": 0.5333535476046088, "grad_norm": 0.35700690746307373, "learning_rate": 2.645062806566787e-05, "loss": 0.1444, "step": 29903 }, { "epoch": 0.5333713837263225, "grad_norm": 0.3671668469905853, "learning_rate": 2.644907417779373e-05, "loss": 0.1281, "step": 29904 }, { "epoch": 0.5333892198480362, "grad_norm": 0.25174978375434875, "learning_rate": 2.644752028430249e-05, "loss": 0.171, "step": 29905 }, { "epoch": 0.5334070559697499, "grad_norm": 0.3751481771469116, "learning_rate": 2.644596638520015e-05, "loss": 0.149, "step": 29906 }, { "epoch": 0.5334248920914636, "grad_norm": 0.312002032995224, "learning_rate": 2.644441248049275e-05, "loss": 0.1372, "step": 29907 }, { "epoch": 0.5334427282131773, "grad_norm": 0.2236240953207016, "learning_rate": 2.644285857018631e-05, "loss": 0.1352, "step": 29908 }, { "epoch": 0.533460564334891, "grad_norm": 0.3119545578956604, "learning_rate": 2.6441304654286847e-05, "loss": 0.2075, "step": 29909 }, { "epoch": 0.5334784004566047, "grad_norm": 0.3627402186393738, "learning_rate": 2.6439750732800394e-05, "loss": 0.1621, "step": 29910 }, { "epoch": 0.5334962365783185, "grad_norm": 0.2859326899051666, "learning_rate": 2.643819680573297e-05, "loss": 0.1618, "step": 29911 }, { "epoch": 0.5335140727000321, "grad_norm": 0.22822651267051697, "learning_rate": 2.6436642873090593e-05, "loss": 0.1191, "step": 29912 }, { "epoch": 0.5335319088217458, "grad_norm": 0.42542552947998047, "learning_rate": 2.6435088934879298e-05, "loss": 0.2156, "step": 29913 }, { "epoch": 0.5335497449434595, "grad_norm": 0.1848410815000534, "learning_rate": 2.6433534991105103e-05, "loss": 0.118, "step": 29914 }, { "epoch": 0.5335675810651732, "grad_norm": 0.24543717503547668, "learning_rate": 2.6431981041774023e-05, "loss": 0.1256, "step": 29915 }, { "epoch": 0.5335854171868869, "grad_norm": 0.3678569197654724, "learning_rate": 2.6430427086892096e-05, "loss": 0.2001, "step": 29916 }, { "epoch": 0.5336032533086006, "grad_norm": 0.24542857706546783, "learning_rate": 2.6428873126465336e-05, "loss": 0.1202, "step": 29917 }, { "epoch": 0.5336210894303143, "grad_norm": 0.2463257908821106, "learning_rate": 2.6427319160499776e-05, "loss": 0.1619, "step": 29918 }, { "epoch": 0.533638925552028, "grad_norm": 0.4380140006542206, "learning_rate": 2.6425765189001427e-05, "loss": 0.1066, "step": 29919 }, { "epoch": 0.5336567616737417, "grad_norm": 0.3083948791027069, "learning_rate": 2.642421121197633e-05, "loss": 0.1328, "step": 29920 }, { "epoch": 0.5336745977954553, "grad_norm": 0.29016420245170593, "learning_rate": 2.6422657229430488e-05, "loss": 0.1385, "step": 29921 }, { "epoch": 0.533692433917169, "grad_norm": 0.20196279883384705, "learning_rate": 2.6421103241369945e-05, "loss": 0.0941, "step": 29922 }, { "epoch": 0.5337102700388827, "grad_norm": 0.21342800557613373, "learning_rate": 2.6419549247800702e-05, "loss": 0.0794, "step": 29923 }, { "epoch": 0.5337281061605964, "grad_norm": 0.22464700043201447, "learning_rate": 2.64179952487288e-05, "loss": 0.1438, "step": 29924 }, { "epoch": 0.5337459422823101, "grad_norm": 0.2330685704946518, "learning_rate": 2.6416441244160268e-05, "loss": 0.0566, "step": 29925 }, { "epoch": 0.5337637784040238, "grad_norm": 0.2709701657295227, "learning_rate": 2.641488723410111e-05, "loss": 0.113, "step": 29926 }, { "epoch": 0.5337816145257375, "grad_norm": 0.28197982907295227, "learning_rate": 2.6413333218557367e-05, "loss": 0.1463, "step": 29927 }, { "epoch": 0.5337994506474513, "grad_norm": 0.27952587604522705, "learning_rate": 2.6411779197535052e-05, "loss": 0.149, "step": 29928 }, { "epoch": 0.533817286769165, "grad_norm": 0.2955726385116577, "learning_rate": 2.64102251710402e-05, "loss": 0.1669, "step": 29929 }, { "epoch": 0.5338351228908786, "grad_norm": 0.2965660095214844, "learning_rate": 2.6408671139078823e-05, "loss": 0.1007, "step": 29930 }, { "epoch": 0.5338529590125923, "grad_norm": 0.25237980484962463, "learning_rate": 2.6407117101656947e-05, "loss": 0.1413, "step": 29931 }, { "epoch": 0.533870795134306, "grad_norm": 0.31322088837623596, "learning_rate": 2.6405563058780596e-05, "loss": 0.1752, "step": 29932 }, { "epoch": 0.5338886312560197, "grad_norm": 0.4370499551296234, "learning_rate": 2.640400901045581e-05, "loss": 0.1133, "step": 29933 }, { "epoch": 0.5339064673777334, "grad_norm": 0.26866334676742554, "learning_rate": 2.6402454956688593e-05, "loss": 0.0732, "step": 29934 }, { "epoch": 0.5339243034994471, "grad_norm": 0.2273993194103241, "learning_rate": 2.640090089748498e-05, "loss": 0.1192, "step": 29935 }, { "epoch": 0.5339421396211608, "grad_norm": 0.22413517534732819, "learning_rate": 2.6399346832850985e-05, "loss": 0.1164, "step": 29936 }, { "epoch": 0.5339599757428745, "grad_norm": 0.2783486247062683, "learning_rate": 2.639779276279264e-05, "loss": 0.1615, "step": 29937 }, { "epoch": 0.5339778118645881, "grad_norm": 0.34598222374916077, "learning_rate": 2.639623868731597e-05, "loss": 0.1844, "step": 29938 }, { "epoch": 0.5339956479863018, "grad_norm": 0.3014484941959381, "learning_rate": 2.6394684606426994e-05, "loss": 0.1181, "step": 29939 }, { "epoch": 0.5340134841080155, "grad_norm": 0.26582470536231995, "learning_rate": 2.6393130520131744e-05, "loss": 0.0887, "step": 29940 }, { "epoch": 0.5340313202297292, "grad_norm": 0.21539784967899323, "learning_rate": 2.639157642843623e-05, "loss": 0.1153, "step": 29941 }, { "epoch": 0.5340491563514429, "grad_norm": 0.27531206607818604, "learning_rate": 2.63900223313465e-05, "loss": 0.1273, "step": 29942 }, { "epoch": 0.5340669924731566, "grad_norm": 0.25267964601516724, "learning_rate": 2.6388468228868545e-05, "loss": 0.0781, "step": 29943 }, { "epoch": 0.5340848285948704, "grad_norm": 0.21789827942848206, "learning_rate": 2.6386914121008416e-05, "loss": 0.1112, "step": 29944 }, { "epoch": 0.5341026647165841, "grad_norm": 0.2215350717306137, "learning_rate": 2.638536000777213e-05, "loss": 0.1445, "step": 29945 }, { "epoch": 0.5341205008382978, "grad_norm": 0.36032167077064514, "learning_rate": 2.6383805889165715e-05, "loss": 0.1018, "step": 29946 }, { "epoch": 0.5341383369600115, "grad_norm": 0.30838868021965027, "learning_rate": 2.6382251765195178e-05, "loss": 0.1778, "step": 29947 }, { "epoch": 0.5341561730817251, "grad_norm": 0.23818707466125488, "learning_rate": 2.6380697635866568e-05, "loss": 0.1178, "step": 29948 }, { "epoch": 0.5341740092034388, "grad_norm": 0.2256084531545639, "learning_rate": 2.637914350118589e-05, "loss": 0.1509, "step": 29949 }, { "epoch": 0.5341918453251525, "grad_norm": 0.26305562257766724, "learning_rate": 2.6377589361159178e-05, "loss": 0.0861, "step": 29950 }, { "epoch": 0.5342096814468662, "grad_norm": 0.26467767357826233, "learning_rate": 2.637603521579245e-05, "loss": 0.1338, "step": 29951 }, { "epoch": 0.5342275175685799, "grad_norm": 0.3854021430015564, "learning_rate": 2.637448106509174e-05, "loss": 0.1433, "step": 29952 }, { "epoch": 0.5342453536902936, "grad_norm": 0.22443512082099915, "learning_rate": 2.637292690906306e-05, "loss": 0.1563, "step": 29953 }, { "epoch": 0.5342631898120073, "grad_norm": 0.26604029536247253, "learning_rate": 2.637137274771245e-05, "loss": 0.1044, "step": 29954 }, { "epoch": 0.534281025933721, "grad_norm": 0.2421019822359085, "learning_rate": 2.636981858104592e-05, "loss": 0.1617, "step": 29955 }, { "epoch": 0.5342988620554346, "grad_norm": 0.27613556385040283, "learning_rate": 2.6368264409069498e-05, "loss": 0.1303, "step": 29956 }, { "epoch": 0.5343166981771483, "grad_norm": 0.26005810499191284, "learning_rate": 2.6366710231789214e-05, "loss": 0.1662, "step": 29957 }, { "epoch": 0.534334534298862, "grad_norm": 0.23246777057647705, "learning_rate": 2.6365156049211082e-05, "loss": 0.1385, "step": 29958 }, { "epoch": 0.5343523704205757, "grad_norm": 0.2797565162181854, "learning_rate": 2.6363601861341142e-05, "loss": 0.1426, "step": 29959 }, { "epoch": 0.5343702065422894, "grad_norm": 0.2667892873287201, "learning_rate": 2.6362047668185408e-05, "loss": 0.1468, "step": 29960 }, { "epoch": 0.5343880426640032, "grad_norm": 0.3596689701080322, "learning_rate": 2.6360493469749907e-05, "loss": 0.1453, "step": 29961 }, { "epoch": 0.5344058787857169, "grad_norm": 0.22444729506969452, "learning_rate": 2.635893926604066e-05, "loss": 0.0812, "step": 29962 }, { "epoch": 0.5344237149074306, "grad_norm": 0.21742577850818634, "learning_rate": 2.6357385057063703e-05, "loss": 0.1207, "step": 29963 }, { "epoch": 0.5344415510291443, "grad_norm": 0.3289489150047302, "learning_rate": 2.6355830842825048e-05, "loss": 0.1125, "step": 29964 }, { "epoch": 0.534459387150858, "grad_norm": 0.4211141765117645, "learning_rate": 2.635427662333072e-05, "loss": 0.1103, "step": 29965 }, { "epoch": 0.5344772232725716, "grad_norm": 0.28083640336990356, "learning_rate": 2.6352722398586755e-05, "loss": 0.1161, "step": 29966 }, { "epoch": 0.5344950593942853, "grad_norm": 0.24796617031097412, "learning_rate": 2.6351168168599166e-05, "loss": 0.1748, "step": 29967 }, { "epoch": 0.534512895515999, "grad_norm": 0.38269057869911194, "learning_rate": 2.634961393337399e-05, "loss": 0.1394, "step": 29968 }, { "epoch": 0.5345307316377127, "grad_norm": 0.26489534974098206, "learning_rate": 2.6348059692917238e-05, "loss": 0.1319, "step": 29969 }, { "epoch": 0.5345485677594264, "grad_norm": 0.36083126068115234, "learning_rate": 2.6346505447234944e-05, "loss": 0.1525, "step": 29970 }, { "epoch": 0.5345664038811401, "grad_norm": 0.28692829608917236, "learning_rate": 2.6344951196333122e-05, "loss": 0.1578, "step": 29971 }, { "epoch": 0.5345842400028538, "grad_norm": 0.20260398089885712, "learning_rate": 2.634339694021781e-05, "loss": 0.1012, "step": 29972 }, { "epoch": 0.5346020761245674, "grad_norm": 0.21179082989692688, "learning_rate": 2.6341842678895027e-05, "loss": 0.0928, "step": 29973 }, { "epoch": 0.5346199122462811, "grad_norm": 0.24858523905277252, "learning_rate": 2.63402884123708e-05, "loss": 0.1533, "step": 29974 }, { "epoch": 0.5346377483679948, "grad_norm": 0.2500287890434265, "learning_rate": 2.633873414065115e-05, "loss": 0.1609, "step": 29975 }, { "epoch": 0.5346555844897085, "grad_norm": 0.29444488883018494, "learning_rate": 2.633717986374211e-05, "loss": 0.0953, "step": 29976 }, { "epoch": 0.5346734206114222, "grad_norm": 0.17601536214351654, "learning_rate": 2.63356255816497e-05, "loss": 0.0706, "step": 29977 }, { "epoch": 0.534691256733136, "grad_norm": 0.20715098083019257, "learning_rate": 2.6334071294379938e-05, "loss": 0.153, "step": 29978 }, { "epoch": 0.5347090928548497, "grad_norm": 0.331758052110672, "learning_rate": 2.633251700193885e-05, "loss": 0.1597, "step": 29979 }, { "epoch": 0.5347269289765634, "grad_norm": 0.2710334062576294, "learning_rate": 2.6330962704332468e-05, "loss": 0.1184, "step": 29980 }, { "epoch": 0.5347447650982771, "grad_norm": 0.3113744556903839, "learning_rate": 2.6329408401566825e-05, "loss": 0.1223, "step": 29981 }, { "epoch": 0.5347626012199908, "grad_norm": 0.25887617468833923, "learning_rate": 2.6327854093647925e-05, "loss": 0.1047, "step": 29982 }, { "epoch": 0.5347804373417044, "grad_norm": 0.22559833526611328, "learning_rate": 2.632629978058181e-05, "loss": 0.1396, "step": 29983 }, { "epoch": 0.5347982734634181, "grad_norm": 0.227296382188797, "learning_rate": 2.6324745462374495e-05, "loss": 0.1488, "step": 29984 }, { "epoch": 0.5348161095851318, "grad_norm": 0.2206769436597824, "learning_rate": 2.632319113903201e-05, "loss": 0.1124, "step": 29985 }, { "epoch": 0.5348339457068455, "grad_norm": 0.22993215918540955, "learning_rate": 2.6321636810560373e-05, "loss": 0.1447, "step": 29986 }, { "epoch": 0.5348517818285592, "grad_norm": 0.24782823026180267, "learning_rate": 2.6320082476965628e-05, "loss": 0.1504, "step": 29987 }, { "epoch": 0.5348696179502729, "grad_norm": 0.40900227427482605, "learning_rate": 2.6318528138253777e-05, "loss": 0.1561, "step": 29988 }, { "epoch": 0.5348874540719866, "grad_norm": 0.29222583770751953, "learning_rate": 2.6316973794430865e-05, "loss": 0.1042, "step": 29989 }, { "epoch": 0.5349052901937003, "grad_norm": 0.2259809374809265, "learning_rate": 2.6315419445502902e-05, "loss": 0.1326, "step": 29990 }, { "epoch": 0.5349231263154139, "grad_norm": 0.30716872215270996, "learning_rate": 2.631386509147592e-05, "loss": 0.1155, "step": 29991 }, { "epoch": 0.5349409624371276, "grad_norm": 0.2817372977733612, "learning_rate": 2.6312310732355944e-05, "loss": 0.1516, "step": 29992 }, { "epoch": 0.5349587985588413, "grad_norm": 0.25902578234672546, "learning_rate": 2.631075636814899e-05, "loss": 0.1536, "step": 29993 }, { "epoch": 0.534976634680555, "grad_norm": 0.2492329627275467, "learning_rate": 2.6309201998861104e-05, "loss": 0.167, "step": 29994 }, { "epoch": 0.5349944708022688, "grad_norm": 0.2663809657096863, "learning_rate": 2.6307647624498294e-05, "loss": 0.1438, "step": 29995 }, { "epoch": 0.5350123069239825, "grad_norm": 0.24601788818836212, "learning_rate": 2.6306093245066594e-05, "loss": 0.1316, "step": 29996 }, { "epoch": 0.5350301430456962, "grad_norm": 0.286398708820343, "learning_rate": 2.6304538860572016e-05, "loss": 0.1861, "step": 29997 }, { "epoch": 0.5350479791674099, "grad_norm": 0.29543009400367737, "learning_rate": 2.6302984471020604e-05, "loss": 0.1404, "step": 29998 }, { "epoch": 0.5350658152891236, "grad_norm": 0.25596728920936584, "learning_rate": 2.6301430076418365e-05, "loss": 0.1137, "step": 29999 }, { "epoch": 0.5350836514108372, "grad_norm": 0.2510005533695221, "learning_rate": 2.629987567677134e-05, "loss": 0.1072, "step": 30000 }, { "epoch": 0.5350836514108372, "eval_loss": 0.13253282010555267, "eval_runtime": 106.3992, "eval_samples_per_second": 9.624, "eval_steps_per_second": 1.607, "step": 30000 }, { "epoch": 0.5351014875325509, "grad_norm": 0.23676490783691406, "learning_rate": 2.6298321272085542e-05, "loss": 0.1496, "step": 30001 }, { "epoch": 0.5351193236542646, "grad_norm": 0.30027639865875244, "learning_rate": 2.629676686236701e-05, "loss": 0.1888, "step": 30002 }, { "epoch": 0.5351371597759783, "grad_norm": 0.21820507943630219, "learning_rate": 2.6295212447621766e-05, "loss": 0.1165, "step": 30003 }, { "epoch": 0.535154995897692, "grad_norm": 0.3790316879749298, "learning_rate": 2.6293658027855817e-05, "loss": 0.1541, "step": 30004 }, { "epoch": 0.5351728320194057, "grad_norm": 0.2574770748615265, "learning_rate": 2.6292103603075214e-05, "loss": 0.1156, "step": 30005 }, { "epoch": 0.5351906681411194, "grad_norm": 0.2600179612636566, "learning_rate": 2.6290549173285966e-05, "loss": 0.1073, "step": 30006 }, { "epoch": 0.5352085042628331, "grad_norm": 0.23078328371047974, "learning_rate": 2.62889947384941e-05, "loss": 0.1648, "step": 30007 }, { "epoch": 0.5352263403845468, "grad_norm": 0.32244226336479187, "learning_rate": 2.6287440298705645e-05, "loss": 0.113, "step": 30008 }, { "epoch": 0.5352441765062604, "grad_norm": 0.37785470485687256, "learning_rate": 2.628588585392664e-05, "loss": 0.1938, "step": 30009 }, { "epoch": 0.5352620126279741, "grad_norm": 0.19090965390205383, "learning_rate": 2.6284331404163082e-05, "loss": 0.1313, "step": 30010 }, { "epoch": 0.5352798487496878, "grad_norm": 0.2262149155139923, "learning_rate": 2.6282776949421022e-05, "loss": 0.1318, "step": 30011 }, { "epoch": 0.5352976848714016, "grad_norm": 0.251688152551651, "learning_rate": 2.628122248970647e-05, "loss": 0.1035, "step": 30012 }, { "epoch": 0.5353155209931153, "grad_norm": 0.2728239893913269, "learning_rate": 2.627966802502546e-05, "loss": 0.186, "step": 30013 }, { "epoch": 0.535333357114829, "grad_norm": 0.3314443528652191, "learning_rate": 2.627811355538401e-05, "loss": 0.1941, "step": 30014 }, { "epoch": 0.5353511932365427, "grad_norm": 0.2868655323982239, "learning_rate": 2.627655908078816e-05, "loss": 0.114, "step": 30015 }, { "epoch": 0.5353690293582564, "grad_norm": 0.2744075059890747, "learning_rate": 2.6275004601243918e-05, "loss": 0.1816, "step": 30016 }, { "epoch": 0.53538686547997, "grad_norm": 0.2588185966014862, "learning_rate": 2.627345011675732e-05, "loss": 0.1857, "step": 30017 }, { "epoch": 0.5354047016016837, "grad_norm": 0.20969374477863312, "learning_rate": 2.627189562733439e-05, "loss": 0.1199, "step": 30018 }, { "epoch": 0.5354225377233974, "grad_norm": 0.28712162375450134, "learning_rate": 2.627034113298115e-05, "loss": 0.1394, "step": 30019 }, { "epoch": 0.5354403738451111, "grad_norm": 0.24129994213581085, "learning_rate": 2.6268786633703634e-05, "loss": 0.1095, "step": 30020 }, { "epoch": 0.5354582099668248, "grad_norm": 0.29542097449302673, "learning_rate": 2.6267232129507857e-05, "loss": 0.1032, "step": 30021 }, { "epoch": 0.5354760460885385, "grad_norm": 0.2944839298725128, "learning_rate": 2.626567762039986e-05, "loss": 0.1575, "step": 30022 }, { "epoch": 0.5354938822102522, "grad_norm": 0.306369811296463, "learning_rate": 2.6264123106385652e-05, "loss": 0.1733, "step": 30023 }, { "epoch": 0.5355117183319659, "grad_norm": 0.20029355585575104, "learning_rate": 2.6262568587471272e-05, "loss": 0.1501, "step": 30024 }, { "epoch": 0.5355295544536796, "grad_norm": 0.3794811964035034, "learning_rate": 2.6261014063662732e-05, "loss": 0.1384, "step": 30025 }, { "epoch": 0.5355473905753932, "grad_norm": 0.27813950181007385, "learning_rate": 2.6259459534966073e-05, "loss": 0.1379, "step": 30026 }, { "epoch": 0.5355652266971069, "grad_norm": 0.17722640931606293, "learning_rate": 2.6257905001387306e-05, "loss": 0.1379, "step": 30027 }, { "epoch": 0.5355830628188206, "grad_norm": 0.4119080603122711, "learning_rate": 2.6256350462932472e-05, "loss": 0.2011, "step": 30028 }, { "epoch": 0.5356008989405344, "grad_norm": 0.22568555176258087, "learning_rate": 2.625479591960759e-05, "loss": 0.1827, "step": 30029 }, { "epoch": 0.5356187350622481, "grad_norm": 0.2339983731508255, "learning_rate": 2.625324137141868e-05, "loss": 0.0693, "step": 30030 }, { "epoch": 0.5356365711839618, "grad_norm": 0.19217249751091003, "learning_rate": 2.6251686818371778e-05, "loss": 0.0946, "step": 30031 }, { "epoch": 0.5356544073056755, "grad_norm": 0.30612075328826904, "learning_rate": 2.6250132260472903e-05, "loss": 0.168, "step": 30032 }, { "epoch": 0.5356722434273892, "grad_norm": 0.30206209421157837, "learning_rate": 2.6248577697728088e-05, "loss": 0.1534, "step": 30033 }, { "epoch": 0.5356900795491029, "grad_norm": 0.2501528859138489, "learning_rate": 2.624702313014335e-05, "loss": 0.1379, "step": 30034 }, { "epoch": 0.5357079156708165, "grad_norm": 0.19195348024368286, "learning_rate": 2.6245468557724718e-05, "loss": 0.116, "step": 30035 }, { "epoch": 0.5357257517925302, "grad_norm": 0.2196555882692337, "learning_rate": 2.6243913980478218e-05, "loss": 0.1494, "step": 30036 }, { "epoch": 0.5357435879142439, "grad_norm": 0.25943294167518616, "learning_rate": 2.6242359398409888e-05, "loss": 0.1237, "step": 30037 }, { "epoch": 0.5357614240359576, "grad_norm": 0.2966499626636505, "learning_rate": 2.624080481152573e-05, "loss": 0.1281, "step": 30038 }, { "epoch": 0.5357792601576713, "grad_norm": 0.19807657599449158, "learning_rate": 2.62392502198318e-05, "loss": 0.1278, "step": 30039 }, { "epoch": 0.535797096279385, "grad_norm": 0.2528213560581207, "learning_rate": 2.6237695623334098e-05, "loss": 0.1604, "step": 30040 }, { "epoch": 0.5358149324010987, "grad_norm": 0.22685837745666504, "learning_rate": 2.623614102203866e-05, "loss": 0.1341, "step": 30041 }, { "epoch": 0.5358327685228124, "grad_norm": 0.30094918608665466, "learning_rate": 2.6234586415951517e-05, "loss": 0.1883, "step": 30042 }, { "epoch": 0.535850604644526, "grad_norm": 0.27206870913505554, "learning_rate": 2.623303180507869e-05, "loss": 0.0841, "step": 30043 }, { "epoch": 0.5358684407662397, "grad_norm": 0.2474784404039383, "learning_rate": 2.6231477189426208e-05, "loss": 0.1116, "step": 30044 }, { "epoch": 0.5358862768879535, "grad_norm": 0.20598961412906647, "learning_rate": 2.6229922569000086e-05, "loss": 0.1273, "step": 30045 }, { "epoch": 0.5359041130096672, "grad_norm": 0.22235436737537384, "learning_rate": 2.622836794380637e-05, "loss": 0.1575, "step": 30046 }, { "epoch": 0.5359219491313809, "grad_norm": 0.17955812811851501, "learning_rate": 2.6226813313851067e-05, "loss": 0.1078, "step": 30047 }, { "epoch": 0.5359397852530946, "grad_norm": 0.23530493676662445, "learning_rate": 2.6225258679140212e-05, "loss": 0.1173, "step": 30048 }, { "epoch": 0.5359576213748083, "grad_norm": 0.2665049731731415, "learning_rate": 2.622370403967983e-05, "loss": 0.1466, "step": 30049 }, { "epoch": 0.535975457496522, "grad_norm": 0.3786855936050415, "learning_rate": 2.6222149395475958e-05, "loss": 0.1668, "step": 30050 }, { "epoch": 0.5359932936182357, "grad_norm": 0.323861688375473, "learning_rate": 2.6220594746534604e-05, "loss": 0.1634, "step": 30051 }, { "epoch": 0.5360111297399494, "grad_norm": 0.3055436909198761, "learning_rate": 2.621904009286181e-05, "loss": 0.1199, "step": 30052 }, { "epoch": 0.536028965861663, "grad_norm": 0.2409042865037918, "learning_rate": 2.6217485434463595e-05, "loss": 0.0958, "step": 30053 }, { "epoch": 0.5360468019833767, "grad_norm": 0.3947082757949829, "learning_rate": 2.6215930771345986e-05, "loss": 0.1385, "step": 30054 }, { "epoch": 0.5360646381050904, "grad_norm": 0.2209540456533432, "learning_rate": 2.6214376103515004e-05, "loss": 0.1126, "step": 30055 }, { "epoch": 0.5360824742268041, "grad_norm": 0.4303555488586426, "learning_rate": 2.621282143097668e-05, "loss": 0.1537, "step": 30056 }, { "epoch": 0.5361003103485178, "grad_norm": 0.2560977041721344, "learning_rate": 2.621126675373705e-05, "loss": 0.1225, "step": 30057 }, { "epoch": 0.5361181464702315, "grad_norm": 0.3105206787586212, "learning_rate": 2.6209712071802123e-05, "loss": 0.1543, "step": 30058 }, { "epoch": 0.5361359825919452, "grad_norm": 0.2338428646326065, "learning_rate": 2.620815738517794e-05, "loss": 0.1353, "step": 30059 }, { "epoch": 0.5361538187136589, "grad_norm": 0.34728971123695374, "learning_rate": 2.6206602693870518e-05, "loss": 0.1193, "step": 30060 }, { "epoch": 0.5361716548353725, "grad_norm": 0.24602587521076202, "learning_rate": 2.620504799788589e-05, "loss": 0.1236, "step": 30061 }, { "epoch": 0.5361894909570863, "grad_norm": 0.2876373827457428, "learning_rate": 2.620349329723008e-05, "loss": 0.1391, "step": 30062 }, { "epoch": 0.5362073270788, "grad_norm": 0.27557122707366943, "learning_rate": 2.620193859190911e-05, "loss": 0.1289, "step": 30063 }, { "epoch": 0.5362251632005137, "grad_norm": 0.22649092972278595, "learning_rate": 2.6200383881929008e-05, "loss": 0.1093, "step": 30064 }, { "epoch": 0.5362429993222274, "grad_norm": 0.28372225165367126, "learning_rate": 2.6198829167295814e-05, "loss": 0.2103, "step": 30065 }, { "epoch": 0.5362608354439411, "grad_norm": 0.2724907696247101, "learning_rate": 2.6197274448015536e-05, "loss": 0.1299, "step": 30066 }, { "epoch": 0.5362786715656548, "grad_norm": 0.19539503753185272, "learning_rate": 2.6195719724094215e-05, "loss": 0.1357, "step": 30067 }, { "epoch": 0.5362965076873685, "grad_norm": 0.2573840022087097, "learning_rate": 2.6194164995537866e-05, "loss": 0.1668, "step": 30068 }, { "epoch": 0.5363143438090822, "grad_norm": 0.3429783582687378, "learning_rate": 2.6192610262352522e-05, "loss": 0.1078, "step": 30069 }, { "epoch": 0.5363321799307958, "grad_norm": 0.314705491065979, "learning_rate": 2.6191055524544212e-05, "loss": 0.1762, "step": 30070 }, { "epoch": 0.5363500160525095, "grad_norm": 0.24936902523040771, "learning_rate": 2.618950078211896e-05, "loss": 0.1112, "step": 30071 }, { "epoch": 0.5363678521742232, "grad_norm": 0.4061737358570099, "learning_rate": 2.618794603508279e-05, "loss": 0.1711, "step": 30072 }, { "epoch": 0.5363856882959369, "grad_norm": 0.26980453729629517, "learning_rate": 2.618639128344173e-05, "loss": 0.1328, "step": 30073 }, { "epoch": 0.5364035244176506, "grad_norm": 0.2761056423187256, "learning_rate": 2.6184836527201813e-05, "loss": 0.1074, "step": 30074 }, { "epoch": 0.5364213605393643, "grad_norm": 0.34335461258888245, "learning_rate": 2.618328176636905e-05, "loss": 0.1143, "step": 30075 }, { "epoch": 0.536439196661078, "grad_norm": 0.23472779989242554, "learning_rate": 2.618172700094948e-05, "loss": 0.0967, "step": 30076 }, { "epoch": 0.5364570327827917, "grad_norm": 0.36837902665138245, "learning_rate": 2.618017223094913e-05, "loss": 0.144, "step": 30077 }, { "epoch": 0.5364748689045054, "grad_norm": 0.30010440945625305, "learning_rate": 2.6178617456374027e-05, "loss": 0.1903, "step": 30078 }, { "epoch": 0.5364927050262192, "grad_norm": 0.20942270755767822, "learning_rate": 2.6177062677230197e-05, "loss": 0.0799, "step": 30079 }, { "epoch": 0.5365105411479328, "grad_norm": 0.31642064452171326, "learning_rate": 2.6175507893523665e-05, "loss": 0.1384, "step": 30080 }, { "epoch": 0.5365283772696465, "grad_norm": 0.23298269510269165, "learning_rate": 2.617395310526046e-05, "loss": 0.1309, "step": 30081 }, { "epoch": 0.5365462133913602, "grad_norm": 0.2235119491815567, "learning_rate": 2.61723983124466e-05, "loss": 0.1052, "step": 30082 }, { "epoch": 0.5365640495130739, "grad_norm": 0.22040526568889618, "learning_rate": 2.6170843515088123e-05, "loss": 0.133, "step": 30083 }, { "epoch": 0.5365818856347876, "grad_norm": 0.2211795300245285, "learning_rate": 2.6169288713191053e-05, "loss": 0.1595, "step": 30084 }, { "epoch": 0.5365997217565013, "grad_norm": 0.24885423481464386, "learning_rate": 2.6167733906761415e-05, "loss": 0.0994, "step": 30085 }, { "epoch": 0.536617557878215, "grad_norm": 0.34601157903671265, "learning_rate": 2.6166179095805236e-05, "loss": 0.117, "step": 30086 }, { "epoch": 0.5366353939999287, "grad_norm": 0.2754441499710083, "learning_rate": 2.6164624280328548e-05, "loss": 0.1403, "step": 30087 }, { "epoch": 0.5366532301216423, "grad_norm": 0.23783928155899048, "learning_rate": 2.616306946033737e-05, "loss": 0.1268, "step": 30088 }, { "epoch": 0.536671066243356, "grad_norm": 0.25563597679138184, "learning_rate": 2.6161514635837742e-05, "loss": 0.1221, "step": 30089 }, { "epoch": 0.5366889023650697, "grad_norm": 0.285347044467926, "learning_rate": 2.6159959806835665e-05, "loss": 0.1017, "step": 30090 }, { "epoch": 0.5367067384867834, "grad_norm": 0.20604771375656128, "learning_rate": 2.6158404973337196e-05, "loss": 0.1583, "step": 30091 }, { "epoch": 0.5367245746084971, "grad_norm": 0.33926403522491455, "learning_rate": 2.6156850135348343e-05, "loss": 0.1479, "step": 30092 }, { "epoch": 0.5367424107302108, "grad_norm": 0.22118711471557617, "learning_rate": 2.615529529287515e-05, "loss": 0.1572, "step": 30093 }, { "epoch": 0.5367602468519245, "grad_norm": 0.2924410104751587, "learning_rate": 2.6153740445923625e-05, "loss": 0.1297, "step": 30094 }, { "epoch": 0.5367780829736382, "grad_norm": 0.24608610570430756, "learning_rate": 2.6152185594499807e-05, "loss": 0.1712, "step": 30095 }, { "epoch": 0.536795919095352, "grad_norm": 0.18926946818828583, "learning_rate": 2.615063073860971e-05, "loss": 0.1252, "step": 30096 }, { "epoch": 0.5368137552170656, "grad_norm": 0.2684064507484436, "learning_rate": 2.6149075878259378e-05, "loss": 0.1373, "step": 30097 }, { "epoch": 0.5368315913387793, "grad_norm": 0.2129097431898117, "learning_rate": 2.6147521013454835e-05, "loss": 0.1461, "step": 30098 }, { "epoch": 0.536849427460493, "grad_norm": 0.2931802570819855, "learning_rate": 2.61459661442021e-05, "loss": 0.1609, "step": 30099 }, { "epoch": 0.5368672635822067, "grad_norm": 0.2953733801841736, "learning_rate": 2.614441127050721e-05, "loss": 0.1512, "step": 30100 }, { "epoch": 0.5368850997039204, "grad_norm": 0.2706487774848938, "learning_rate": 2.6142856392376174e-05, "loss": 0.0953, "step": 30101 }, { "epoch": 0.5369029358256341, "grad_norm": 0.2619917690753937, "learning_rate": 2.6141301509815042e-05, "loss": 0.1596, "step": 30102 }, { "epoch": 0.5369207719473478, "grad_norm": 0.17842665314674377, "learning_rate": 2.613974662282983e-05, "loss": 0.1196, "step": 30103 }, { "epoch": 0.5369386080690615, "grad_norm": 0.309428870677948, "learning_rate": 2.613819173142656e-05, "loss": 0.1672, "step": 30104 }, { "epoch": 0.5369564441907752, "grad_norm": 0.22137954831123352, "learning_rate": 2.6136636835611265e-05, "loss": 0.0996, "step": 30105 }, { "epoch": 0.5369742803124888, "grad_norm": 0.2813457250595093, "learning_rate": 2.6135081935389988e-05, "loss": 0.0876, "step": 30106 }, { "epoch": 0.5369921164342025, "grad_norm": 0.17232254147529602, "learning_rate": 2.6133527030768733e-05, "loss": 0.12, "step": 30107 }, { "epoch": 0.5370099525559162, "grad_norm": 0.3035275936126709, "learning_rate": 2.6131972121753536e-05, "loss": 0.1718, "step": 30108 }, { "epoch": 0.5370277886776299, "grad_norm": 0.30437931418418884, "learning_rate": 2.6130417208350423e-05, "loss": 0.2239, "step": 30109 }, { "epoch": 0.5370456247993436, "grad_norm": 0.2418203055858612, "learning_rate": 2.612886229056542e-05, "loss": 0.1273, "step": 30110 }, { "epoch": 0.5370634609210573, "grad_norm": 0.2719059884548187, "learning_rate": 2.612730736840456e-05, "loss": 0.1128, "step": 30111 }, { "epoch": 0.537081297042771, "grad_norm": 0.22249765694141388, "learning_rate": 2.612575244187386e-05, "loss": 0.0664, "step": 30112 }, { "epoch": 0.5370991331644848, "grad_norm": 0.28104168176651, "learning_rate": 2.6124197510979366e-05, "loss": 0.1348, "step": 30113 }, { "epoch": 0.5371169692861985, "grad_norm": 0.3130894601345062, "learning_rate": 2.6122642575727086e-05, "loss": 0.2115, "step": 30114 }, { "epoch": 0.5371348054079121, "grad_norm": 0.4007646143436432, "learning_rate": 2.6121087636123066e-05, "loss": 0.1529, "step": 30115 }, { "epoch": 0.5371526415296258, "grad_norm": 0.24136139452457428, "learning_rate": 2.611953269217331e-05, "loss": 0.1712, "step": 30116 }, { "epoch": 0.5371704776513395, "grad_norm": 0.25347164273262024, "learning_rate": 2.6117977743883864e-05, "loss": 0.1384, "step": 30117 }, { "epoch": 0.5371883137730532, "grad_norm": 0.20396308600902557, "learning_rate": 2.6116422791260747e-05, "loss": 0.1432, "step": 30118 }, { "epoch": 0.5372061498947669, "grad_norm": 0.2601173520088196, "learning_rate": 2.6114867834309997e-05, "loss": 0.1458, "step": 30119 }, { "epoch": 0.5372239860164806, "grad_norm": 0.23643594980239868, "learning_rate": 2.6113312873037634e-05, "loss": 0.1273, "step": 30120 }, { "epoch": 0.5372418221381943, "grad_norm": 0.4889412820339203, "learning_rate": 2.6111757907449678e-05, "loss": 0.1214, "step": 30121 }, { "epoch": 0.537259658259908, "grad_norm": 0.28934311866760254, "learning_rate": 2.6110202937552175e-05, "loss": 0.1431, "step": 30122 }, { "epoch": 0.5372774943816216, "grad_norm": 0.31065303087234497, "learning_rate": 2.6108647963351135e-05, "loss": 0.1707, "step": 30123 }, { "epoch": 0.5372953305033353, "grad_norm": 0.2623595595359802, "learning_rate": 2.6107092984852588e-05, "loss": 0.1369, "step": 30124 }, { "epoch": 0.537313166625049, "grad_norm": 0.26367178559303284, "learning_rate": 2.610553800206257e-05, "loss": 0.1536, "step": 30125 }, { "epoch": 0.5373310027467627, "grad_norm": 0.23547498881816864, "learning_rate": 2.610398301498711e-05, "loss": 0.1333, "step": 30126 }, { "epoch": 0.5373488388684764, "grad_norm": 0.2615172564983368, "learning_rate": 2.6102428023632225e-05, "loss": 0.1648, "step": 30127 }, { "epoch": 0.5373666749901901, "grad_norm": 0.23980942368507385, "learning_rate": 2.610087302800395e-05, "loss": 0.1201, "step": 30128 }, { "epoch": 0.5373845111119038, "grad_norm": 0.2491067349910736, "learning_rate": 2.609931802810831e-05, "loss": 0.1029, "step": 30129 }, { "epoch": 0.5374023472336176, "grad_norm": 0.17991548776626587, "learning_rate": 2.609776302395134e-05, "loss": 0.0941, "step": 30130 }, { "epoch": 0.5374201833553313, "grad_norm": 0.22439929842948914, "learning_rate": 2.6096208015539054e-05, "loss": 0.126, "step": 30131 }, { "epoch": 0.537438019477045, "grad_norm": 0.2057967334985733, "learning_rate": 2.609465300287749e-05, "loss": 0.1166, "step": 30132 }, { "epoch": 0.5374558555987586, "grad_norm": 0.3045075833797455, "learning_rate": 2.609309798597267e-05, "loss": 0.188, "step": 30133 }, { "epoch": 0.5374736917204723, "grad_norm": 0.25291216373443604, "learning_rate": 2.609154296483063e-05, "loss": 0.1608, "step": 30134 }, { "epoch": 0.537491527842186, "grad_norm": 0.2918686270713806, "learning_rate": 2.6089987939457388e-05, "loss": 0.1831, "step": 30135 }, { "epoch": 0.5375093639638997, "grad_norm": 0.30761343240737915, "learning_rate": 2.608843290985898e-05, "loss": 0.1665, "step": 30136 }, { "epoch": 0.5375272000856134, "grad_norm": 0.25711649656295776, "learning_rate": 2.6086877876041427e-05, "loss": 0.1112, "step": 30137 }, { "epoch": 0.5375450362073271, "grad_norm": 0.25996163487434387, "learning_rate": 2.608532283801076e-05, "loss": 0.1641, "step": 30138 }, { "epoch": 0.5375628723290408, "grad_norm": 0.2545626163482666, "learning_rate": 2.6083767795773006e-05, "loss": 0.1102, "step": 30139 }, { "epoch": 0.5375807084507545, "grad_norm": 0.2902357578277588, "learning_rate": 2.608221274933419e-05, "loss": 0.1323, "step": 30140 }, { "epoch": 0.5375985445724681, "grad_norm": 0.2788262367248535, "learning_rate": 2.6080657698700356e-05, "loss": 0.1411, "step": 30141 }, { "epoch": 0.5376163806941818, "grad_norm": 0.25920966267585754, "learning_rate": 2.6079102643877507e-05, "loss": 0.1828, "step": 30142 }, { "epoch": 0.5376342168158955, "grad_norm": 0.290206640958786, "learning_rate": 2.607754758487169e-05, "loss": 0.112, "step": 30143 }, { "epoch": 0.5376520529376092, "grad_norm": 0.20573769509792328, "learning_rate": 2.6075992521688924e-05, "loss": 0.1547, "step": 30144 }, { "epoch": 0.5376698890593229, "grad_norm": 0.26334241032600403, "learning_rate": 2.607443745433524e-05, "loss": 0.1506, "step": 30145 }, { "epoch": 0.5376877251810367, "grad_norm": 0.3441622257232666, "learning_rate": 2.607288238281666e-05, "loss": 0.1253, "step": 30146 }, { "epoch": 0.5377055613027504, "grad_norm": 0.31098997592926025, "learning_rate": 2.607132730713923e-05, "loss": 0.1946, "step": 30147 }, { "epoch": 0.5377233974244641, "grad_norm": 0.22055085003376007, "learning_rate": 2.6069772227308958e-05, "loss": 0.1064, "step": 30148 }, { "epoch": 0.5377412335461778, "grad_norm": 0.26129046082496643, "learning_rate": 2.606821714333188e-05, "loss": 0.0877, "step": 30149 }, { "epoch": 0.5377590696678914, "grad_norm": 0.28059816360473633, "learning_rate": 2.6066662055214024e-05, "loss": 0.1255, "step": 30150 }, { "epoch": 0.5377769057896051, "grad_norm": 0.3769588768482208, "learning_rate": 2.6065106962961416e-05, "loss": 0.169, "step": 30151 }, { "epoch": 0.5377947419113188, "grad_norm": 0.27034151554107666, "learning_rate": 2.6063551866580084e-05, "loss": 0.1637, "step": 30152 }, { "epoch": 0.5378125780330325, "grad_norm": 0.3192272186279297, "learning_rate": 2.6061996766076057e-05, "loss": 0.1272, "step": 30153 }, { "epoch": 0.5378304141547462, "grad_norm": 0.28916579484939575, "learning_rate": 2.606044166145537e-05, "loss": 0.1584, "step": 30154 }, { "epoch": 0.5378482502764599, "grad_norm": 0.3682737648487091, "learning_rate": 2.605888655272404e-05, "loss": 0.1519, "step": 30155 }, { "epoch": 0.5378660863981736, "grad_norm": 0.20910081267356873, "learning_rate": 2.6057331439888106e-05, "loss": 0.1095, "step": 30156 }, { "epoch": 0.5378839225198873, "grad_norm": 0.23869255185127258, "learning_rate": 2.6055776322953585e-05, "loss": 0.1462, "step": 30157 }, { "epoch": 0.537901758641601, "grad_norm": 0.2382553219795227, "learning_rate": 2.605422120192651e-05, "loss": 0.1099, "step": 30158 }, { "epoch": 0.5379195947633146, "grad_norm": 0.214219331741333, "learning_rate": 2.605266607681291e-05, "loss": 0.1353, "step": 30159 }, { "epoch": 0.5379374308850283, "grad_norm": 0.28148606419563293, "learning_rate": 2.605111094761881e-05, "loss": 0.1112, "step": 30160 }, { "epoch": 0.537955267006742, "grad_norm": 0.22239606082439423, "learning_rate": 2.6049555814350248e-05, "loss": 0.0909, "step": 30161 }, { "epoch": 0.5379731031284557, "grad_norm": 0.26567742228507996, "learning_rate": 2.604800067701324e-05, "loss": 0.1038, "step": 30162 }, { "epoch": 0.5379909392501695, "grad_norm": 0.19751587510108948, "learning_rate": 2.6046445535613822e-05, "loss": 0.1043, "step": 30163 }, { "epoch": 0.5380087753718832, "grad_norm": 0.2616390883922577, "learning_rate": 2.6044890390158017e-05, "loss": 0.1221, "step": 30164 }, { "epoch": 0.5380266114935969, "grad_norm": 0.2807294726371765, "learning_rate": 2.6043335240651863e-05, "loss": 0.1379, "step": 30165 }, { "epoch": 0.5380444476153106, "grad_norm": 0.31500375270843506, "learning_rate": 2.604178008710137e-05, "loss": 0.1216, "step": 30166 }, { "epoch": 0.5380622837370242, "grad_norm": 0.28421783447265625, "learning_rate": 2.604022492951258e-05, "loss": 0.1119, "step": 30167 }, { "epoch": 0.5380801198587379, "grad_norm": 0.22175948321819305, "learning_rate": 2.6038669767891522e-05, "loss": 0.1153, "step": 30168 }, { "epoch": 0.5380979559804516, "grad_norm": 0.2739332318305969, "learning_rate": 2.6037114602244224e-05, "loss": 0.0976, "step": 30169 }, { "epoch": 0.5381157921021653, "grad_norm": 0.23701195418834686, "learning_rate": 2.6035559432576707e-05, "loss": 0.1702, "step": 30170 }, { "epoch": 0.538133628223879, "grad_norm": 0.2555486857891083, "learning_rate": 2.6034004258895006e-05, "loss": 0.1654, "step": 30171 }, { "epoch": 0.5381514643455927, "grad_norm": 0.328472763299942, "learning_rate": 2.6032449081205146e-05, "loss": 0.0959, "step": 30172 }, { "epoch": 0.5381693004673064, "grad_norm": 0.29574477672576904, "learning_rate": 2.6030893899513153e-05, "loss": 0.1528, "step": 30173 }, { "epoch": 0.5381871365890201, "grad_norm": 0.2431219220161438, "learning_rate": 2.602933871382507e-05, "loss": 0.1446, "step": 30174 }, { "epoch": 0.5382049727107338, "grad_norm": 0.19734881818294525, "learning_rate": 2.6027783524146903e-05, "loss": 0.1229, "step": 30175 }, { "epoch": 0.5382228088324474, "grad_norm": 0.3010072112083435, "learning_rate": 2.6026228330484702e-05, "loss": 0.1618, "step": 30176 }, { "epoch": 0.5382406449541611, "grad_norm": 0.21486851572990417, "learning_rate": 2.6024673132844478e-05, "loss": 0.1431, "step": 30177 }, { "epoch": 0.5382584810758748, "grad_norm": 0.2521578073501587, "learning_rate": 2.6023117931232276e-05, "loss": 0.1456, "step": 30178 }, { "epoch": 0.5382763171975885, "grad_norm": 0.2575049102306366, "learning_rate": 2.602156272565411e-05, "loss": 0.1444, "step": 30179 }, { "epoch": 0.5382941533193023, "grad_norm": 0.30389899015426636, "learning_rate": 2.6020007516116007e-05, "loss": 0.1386, "step": 30180 }, { "epoch": 0.538311989441016, "grad_norm": 0.26248690485954285, "learning_rate": 2.601845230262401e-05, "loss": 0.0853, "step": 30181 }, { "epoch": 0.5383298255627297, "grad_norm": 0.23721909523010254, "learning_rate": 2.6016897085184145e-05, "loss": 0.1285, "step": 30182 }, { "epoch": 0.5383476616844434, "grad_norm": 0.3314650356769562, "learning_rate": 2.601534186380242e-05, "loss": 0.1629, "step": 30183 }, { "epoch": 0.5383654978061571, "grad_norm": 0.27695146203041077, "learning_rate": 2.60137866384849e-05, "loss": 0.1539, "step": 30184 }, { "epoch": 0.5383833339278707, "grad_norm": 0.2582625448703766, "learning_rate": 2.6012231409237575e-05, "loss": 0.1689, "step": 30185 }, { "epoch": 0.5384011700495844, "grad_norm": 0.3072316348552704, "learning_rate": 2.6010676176066507e-05, "loss": 0.146, "step": 30186 }, { "epoch": 0.5384190061712981, "grad_norm": 0.24565082788467407, "learning_rate": 2.6009120938977692e-05, "loss": 0.1432, "step": 30187 }, { "epoch": 0.5384368422930118, "grad_norm": 0.2465829700231552, "learning_rate": 2.6007565697977187e-05, "loss": 0.1235, "step": 30188 }, { "epoch": 0.5384546784147255, "grad_norm": 0.345730721950531, "learning_rate": 2.600601045307101e-05, "loss": 0.207, "step": 30189 }, { "epoch": 0.5384725145364392, "grad_norm": 0.6362784504890442, "learning_rate": 2.6004455204265182e-05, "loss": 0.2045, "step": 30190 }, { "epoch": 0.5384903506581529, "grad_norm": 0.2542247474193573, "learning_rate": 2.6002899951565746e-05, "loss": 0.1379, "step": 30191 }, { "epoch": 0.5385081867798666, "grad_norm": 0.2850508987903595, "learning_rate": 2.6001344694978717e-05, "loss": 0.153, "step": 30192 }, { "epoch": 0.5385260229015802, "grad_norm": 0.3003283143043518, "learning_rate": 2.5999789434510136e-05, "loss": 0.0988, "step": 30193 }, { "epoch": 0.5385438590232939, "grad_norm": 0.22373245656490326, "learning_rate": 2.5998234170166013e-05, "loss": 0.1382, "step": 30194 }, { "epoch": 0.5385616951450076, "grad_norm": 0.29682183265686035, "learning_rate": 2.5996678901952403e-05, "loss": 0.1302, "step": 30195 }, { "epoch": 0.5385795312667213, "grad_norm": 0.22597390413284302, "learning_rate": 2.5995123629875317e-05, "loss": 0.1404, "step": 30196 }, { "epoch": 0.5385973673884351, "grad_norm": 0.3263636827468872, "learning_rate": 2.5993568353940795e-05, "loss": 0.1298, "step": 30197 }, { "epoch": 0.5386152035101488, "grad_norm": 0.23312269151210785, "learning_rate": 2.5992013074154848e-05, "loss": 0.1563, "step": 30198 }, { "epoch": 0.5386330396318625, "grad_norm": 0.23365218937397003, "learning_rate": 2.5990457790523527e-05, "loss": 0.1451, "step": 30199 }, { "epoch": 0.5386508757535762, "grad_norm": 0.24646709859371185, "learning_rate": 2.5988902503052837e-05, "loss": 0.1618, "step": 30200 }, { "epoch": 0.5386687118752899, "grad_norm": 0.2120083123445511, "learning_rate": 2.5987347211748824e-05, "loss": 0.1298, "step": 30201 }, { "epoch": 0.5386865479970036, "grad_norm": 0.23787184059619904, "learning_rate": 2.598579191661752e-05, "loss": 0.1307, "step": 30202 }, { "epoch": 0.5387043841187172, "grad_norm": 0.26815640926361084, "learning_rate": 2.5984236617664936e-05, "loss": 0.1277, "step": 30203 }, { "epoch": 0.5387222202404309, "grad_norm": 0.3354211151599884, "learning_rate": 2.5982681314897118e-05, "loss": 0.1802, "step": 30204 }, { "epoch": 0.5387400563621446, "grad_norm": 0.21570003032684326, "learning_rate": 2.5981126008320078e-05, "loss": 0.0878, "step": 30205 }, { "epoch": 0.5387578924838583, "grad_norm": 0.3057674169540405, "learning_rate": 2.5979570697939864e-05, "loss": 0.1484, "step": 30206 }, { "epoch": 0.538775728605572, "grad_norm": 0.14569173753261566, "learning_rate": 2.5978015383762495e-05, "loss": 0.0774, "step": 30207 }, { "epoch": 0.5387935647272857, "grad_norm": 0.20691975951194763, "learning_rate": 2.5976460065793994e-05, "loss": 0.1085, "step": 30208 }, { "epoch": 0.5388114008489994, "grad_norm": 0.18781836330890656, "learning_rate": 2.5974904744040397e-05, "loss": 0.0624, "step": 30209 }, { "epoch": 0.538829236970713, "grad_norm": 0.3202458620071411, "learning_rate": 2.5973349418507742e-05, "loss": 0.1566, "step": 30210 }, { "epoch": 0.5388470730924267, "grad_norm": 0.2658301293849945, "learning_rate": 2.597179408920204e-05, "loss": 0.1645, "step": 30211 }, { "epoch": 0.5388649092141404, "grad_norm": 0.2889425754547119, "learning_rate": 2.5970238756129332e-05, "loss": 0.1679, "step": 30212 }, { "epoch": 0.5388827453358541, "grad_norm": 0.2121240496635437, "learning_rate": 2.596868341929565e-05, "loss": 0.1353, "step": 30213 }, { "epoch": 0.5389005814575679, "grad_norm": 0.25801408290863037, "learning_rate": 2.5967128078707004e-05, "loss": 0.1484, "step": 30214 }, { "epoch": 0.5389184175792816, "grad_norm": 0.2459481954574585, "learning_rate": 2.5965572734369438e-05, "loss": 0.1263, "step": 30215 }, { "epoch": 0.5389362537009953, "grad_norm": 0.3302626609802246, "learning_rate": 2.596401738628898e-05, "loss": 0.1191, "step": 30216 }, { "epoch": 0.538954089822709, "grad_norm": 0.31630364060401917, "learning_rate": 2.5962462034471663e-05, "loss": 0.1298, "step": 30217 }, { "epoch": 0.5389719259444227, "grad_norm": 0.2410750389099121, "learning_rate": 2.59609066789235e-05, "loss": 0.0959, "step": 30218 }, { "epoch": 0.5389897620661364, "grad_norm": 0.3196088969707489, "learning_rate": 2.5959351319650543e-05, "loss": 0.1573, "step": 30219 }, { "epoch": 0.53900759818785, "grad_norm": 0.2422807663679123, "learning_rate": 2.5957795956658804e-05, "loss": 0.133, "step": 30220 }, { "epoch": 0.5390254343095637, "grad_norm": 0.3216957151889801, "learning_rate": 2.5956240589954318e-05, "loss": 0.1602, "step": 30221 }, { "epoch": 0.5390432704312774, "grad_norm": 0.34926286339759827, "learning_rate": 2.5954685219543106e-05, "loss": 0.1891, "step": 30222 }, { "epoch": 0.5390611065529911, "grad_norm": 0.43251731991767883, "learning_rate": 2.595312984543121e-05, "loss": 0.1514, "step": 30223 }, { "epoch": 0.5390789426747048, "grad_norm": 0.25458455085754395, "learning_rate": 2.595157446762465e-05, "loss": 0.143, "step": 30224 }, { "epoch": 0.5390967787964185, "grad_norm": 0.28090301156044006, "learning_rate": 2.5950019086129467e-05, "loss": 0.1347, "step": 30225 }, { "epoch": 0.5391146149181322, "grad_norm": 0.32432064414024353, "learning_rate": 2.594846370095168e-05, "loss": 0.1852, "step": 30226 }, { "epoch": 0.5391324510398459, "grad_norm": 0.20416459441184998, "learning_rate": 2.594690831209732e-05, "loss": 0.112, "step": 30227 }, { "epoch": 0.5391502871615595, "grad_norm": 0.17926311492919922, "learning_rate": 2.5945352919572408e-05, "loss": 0.0929, "step": 30228 }, { "epoch": 0.5391681232832732, "grad_norm": 0.28817903995513916, "learning_rate": 2.5943797523382985e-05, "loss": 0.1671, "step": 30229 }, { "epoch": 0.5391859594049869, "grad_norm": 0.3736797869205475, "learning_rate": 2.5942242123535082e-05, "loss": 0.2002, "step": 30230 }, { "epoch": 0.5392037955267007, "grad_norm": 0.28220483660697937, "learning_rate": 2.5940686720034718e-05, "loss": 0.151, "step": 30231 }, { "epoch": 0.5392216316484144, "grad_norm": 0.2664179801940918, "learning_rate": 2.593913131288793e-05, "loss": 0.1667, "step": 30232 }, { "epoch": 0.5392394677701281, "grad_norm": 0.20628675818443298, "learning_rate": 2.5937575902100747e-05, "loss": 0.123, "step": 30233 }, { "epoch": 0.5392573038918418, "grad_norm": 0.22482234239578247, "learning_rate": 2.593602048767919e-05, "loss": 0.078, "step": 30234 }, { "epoch": 0.5392751400135555, "grad_norm": 0.24880845844745636, "learning_rate": 2.5934465069629298e-05, "loss": 0.1608, "step": 30235 }, { "epoch": 0.5392929761352692, "grad_norm": 0.26692065596580505, "learning_rate": 2.5932909647957092e-05, "loss": 0.1364, "step": 30236 }, { "epoch": 0.5393108122569829, "grad_norm": 0.22033682465553284, "learning_rate": 2.593135422266861e-05, "loss": 0.1082, "step": 30237 }, { "epoch": 0.5393286483786965, "grad_norm": 0.3624100983142853, "learning_rate": 2.5929798793769884e-05, "loss": 0.127, "step": 30238 }, { "epoch": 0.5393464845004102, "grad_norm": 0.2656458020210266, "learning_rate": 2.592824336126693e-05, "loss": 0.1301, "step": 30239 }, { "epoch": 0.5393643206221239, "grad_norm": 0.29909950494766235, "learning_rate": 2.5926687925165782e-05, "loss": 0.1415, "step": 30240 }, { "epoch": 0.5393821567438376, "grad_norm": 0.3570648431777954, "learning_rate": 2.5925132485472476e-05, "loss": 0.2211, "step": 30241 }, { "epoch": 0.5393999928655513, "grad_norm": 0.23444794118404388, "learning_rate": 2.5923577042193033e-05, "loss": 0.0894, "step": 30242 }, { "epoch": 0.539417828987265, "grad_norm": 0.2331296056509018, "learning_rate": 2.5922021595333486e-05, "loss": 0.096, "step": 30243 }, { "epoch": 0.5394356651089787, "grad_norm": 0.41303956508636475, "learning_rate": 2.5920466144899864e-05, "loss": 0.1378, "step": 30244 }, { "epoch": 0.5394535012306924, "grad_norm": 0.2473756968975067, "learning_rate": 2.5918910690898206e-05, "loss": 0.1428, "step": 30245 }, { "epoch": 0.539471337352406, "grad_norm": 0.3279547393321991, "learning_rate": 2.591735523333452e-05, "loss": 0.1161, "step": 30246 }, { "epoch": 0.5394891734741197, "grad_norm": 0.2827765643596649, "learning_rate": 2.591579977221486e-05, "loss": 0.1355, "step": 30247 }, { "epoch": 0.5395070095958335, "grad_norm": 0.20733055472373962, "learning_rate": 2.591424430754524e-05, "loss": 0.1476, "step": 30248 }, { "epoch": 0.5395248457175472, "grad_norm": 0.28367963433265686, "learning_rate": 2.591268883933169e-05, "loss": 0.1334, "step": 30249 }, { "epoch": 0.5395426818392609, "grad_norm": 0.2542972266674042, "learning_rate": 2.5911133367580244e-05, "loss": 0.1188, "step": 30250 }, { "epoch": 0.5395605179609746, "grad_norm": 0.2309126853942871, "learning_rate": 2.590957789229693e-05, "loss": 0.1272, "step": 30251 }, { "epoch": 0.5395783540826883, "grad_norm": 0.25583431124687195, "learning_rate": 2.5908022413487787e-05, "loss": 0.1716, "step": 30252 }, { "epoch": 0.539596190204402, "grad_norm": 0.20859479904174805, "learning_rate": 2.5906466931158823e-05, "loss": 0.108, "step": 30253 }, { "epoch": 0.5396140263261157, "grad_norm": 0.24227586388587952, "learning_rate": 2.5904911445316086e-05, "loss": 0.1567, "step": 30254 }, { "epoch": 0.5396318624478293, "grad_norm": 0.2867540717124939, "learning_rate": 2.5903355955965603e-05, "loss": 0.1571, "step": 30255 }, { "epoch": 0.539649698569543, "grad_norm": 0.2770766317844391, "learning_rate": 2.590180046311339e-05, "loss": 0.1291, "step": 30256 }, { "epoch": 0.5396675346912567, "grad_norm": 0.2663654088973999, "learning_rate": 2.590024496676549e-05, "loss": 0.165, "step": 30257 }, { "epoch": 0.5396853708129704, "grad_norm": 0.2213406264781952, "learning_rate": 2.589868946692794e-05, "loss": 0.137, "step": 30258 }, { "epoch": 0.5397032069346841, "grad_norm": 0.2811971604824066, "learning_rate": 2.5897133963606744e-05, "loss": 0.1488, "step": 30259 }, { "epoch": 0.5397210430563978, "grad_norm": 0.2854959964752197, "learning_rate": 2.589557845680796e-05, "loss": 0.14, "step": 30260 }, { "epoch": 0.5397388791781115, "grad_norm": 0.26762497425079346, "learning_rate": 2.58940229465376e-05, "loss": 0.1595, "step": 30261 }, { "epoch": 0.5397567152998252, "grad_norm": 0.2501121163368225, "learning_rate": 2.58924674328017e-05, "loss": 0.1329, "step": 30262 }, { "epoch": 0.5397745514215389, "grad_norm": 0.2629779279232025, "learning_rate": 2.589091191560628e-05, "loss": 0.136, "step": 30263 }, { "epoch": 0.5397923875432526, "grad_norm": 0.29275083541870117, "learning_rate": 2.588935639495738e-05, "loss": 0.1453, "step": 30264 }, { "epoch": 0.5398102236649663, "grad_norm": 0.28022679686546326, "learning_rate": 2.588780087086104e-05, "loss": 0.1353, "step": 30265 }, { "epoch": 0.53982805978668, "grad_norm": 0.2810835838317871, "learning_rate": 2.5886245343323263e-05, "loss": 0.1593, "step": 30266 }, { "epoch": 0.5398458959083937, "grad_norm": 0.3558344841003418, "learning_rate": 2.58846898123501e-05, "loss": 0.0792, "step": 30267 }, { "epoch": 0.5398637320301074, "grad_norm": 0.2560634911060333, "learning_rate": 2.588313427794757e-05, "loss": 0.1374, "step": 30268 }, { "epoch": 0.5398815681518211, "grad_norm": 0.26828914880752563, "learning_rate": 2.588157874012171e-05, "loss": 0.141, "step": 30269 }, { "epoch": 0.5398994042735348, "grad_norm": 0.23489776253700256, "learning_rate": 2.5880023198878544e-05, "loss": 0.199, "step": 30270 }, { "epoch": 0.5399172403952485, "grad_norm": 0.2857406735420227, "learning_rate": 2.5878467654224104e-05, "loss": 0.1525, "step": 30271 }, { "epoch": 0.5399350765169622, "grad_norm": 0.25609347224235535, "learning_rate": 2.5876912106164415e-05, "loss": 0.1443, "step": 30272 }, { "epoch": 0.5399529126386758, "grad_norm": 0.26739031076431274, "learning_rate": 2.5875356554705522e-05, "loss": 0.1295, "step": 30273 }, { "epoch": 0.5399707487603895, "grad_norm": 0.3170407712459564, "learning_rate": 2.587380099985344e-05, "loss": 0.1599, "step": 30274 }, { "epoch": 0.5399885848821032, "grad_norm": 0.2341240644454956, "learning_rate": 2.5872245441614202e-05, "loss": 0.1277, "step": 30275 }, { "epoch": 0.5400064210038169, "grad_norm": 0.213741272687912, "learning_rate": 2.587068987999384e-05, "loss": 0.121, "step": 30276 }, { "epoch": 0.5400242571255306, "grad_norm": 0.26799917221069336, "learning_rate": 2.5869134314998382e-05, "loss": 0.1149, "step": 30277 }, { "epoch": 0.5400420932472443, "grad_norm": 0.2628558278083801, "learning_rate": 2.5867578746633864e-05, "loss": 0.0884, "step": 30278 }, { "epoch": 0.540059929368958, "grad_norm": 0.2583215832710266, "learning_rate": 2.5866023174906306e-05, "loss": 0.0893, "step": 30279 }, { "epoch": 0.5400777654906717, "grad_norm": 0.2964015603065491, "learning_rate": 2.5864467599821753e-05, "loss": 0.1313, "step": 30280 }, { "epoch": 0.5400956016123855, "grad_norm": 0.39377182722091675, "learning_rate": 2.586291202138621e-05, "loss": 0.1669, "step": 30281 }, { "epoch": 0.5401134377340991, "grad_norm": 0.19919002056121826, "learning_rate": 2.586135643960573e-05, "loss": 0.1247, "step": 30282 }, { "epoch": 0.5401312738558128, "grad_norm": 0.2302965670824051, "learning_rate": 2.585980085448634e-05, "loss": 0.1231, "step": 30283 }, { "epoch": 0.5401491099775265, "grad_norm": 0.24823573231697083, "learning_rate": 2.5858245266034054e-05, "loss": 0.0878, "step": 30284 }, { "epoch": 0.5401669460992402, "grad_norm": 0.2385394126176834, "learning_rate": 2.585668967425492e-05, "loss": 0.1705, "step": 30285 }, { "epoch": 0.5401847822209539, "grad_norm": 0.2751266360282898, "learning_rate": 2.5855134079154957e-05, "loss": 0.1358, "step": 30286 }, { "epoch": 0.5402026183426676, "grad_norm": 0.3314450681209564, "learning_rate": 2.5853578480740203e-05, "loss": 0.1688, "step": 30287 }, { "epoch": 0.5402204544643813, "grad_norm": 0.23528346419334412, "learning_rate": 2.5852022879016684e-05, "loss": 0.1569, "step": 30288 }, { "epoch": 0.540238290586095, "grad_norm": 0.3035106658935547, "learning_rate": 2.5850467273990424e-05, "loss": 0.1463, "step": 30289 }, { "epoch": 0.5402561267078086, "grad_norm": 0.4092877209186554, "learning_rate": 2.5848911665667468e-05, "loss": 0.1503, "step": 30290 }, { "epoch": 0.5402739628295223, "grad_norm": 0.23853865265846252, "learning_rate": 2.584735605405383e-05, "loss": 0.1328, "step": 30291 }, { "epoch": 0.540291798951236, "grad_norm": 0.2787911891937256, "learning_rate": 2.5845800439155547e-05, "loss": 0.1624, "step": 30292 }, { "epoch": 0.5403096350729497, "grad_norm": 0.22847092151641846, "learning_rate": 2.5844244820978657e-05, "loss": 0.1847, "step": 30293 }, { "epoch": 0.5403274711946634, "grad_norm": 0.31058481335639954, "learning_rate": 2.5842689199529174e-05, "loss": 0.1144, "step": 30294 }, { "epoch": 0.5403453073163771, "grad_norm": 0.2743181586265564, "learning_rate": 2.5841133574813142e-05, "loss": 0.1191, "step": 30295 }, { "epoch": 0.5403631434380908, "grad_norm": 0.2613810896873474, "learning_rate": 2.5839577946836586e-05, "loss": 0.13, "step": 30296 }, { "epoch": 0.5403809795598045, "grad_norm": 0.27584362030029297, "learning_rate": 2.5838022315605536e-05, "loss": 0.1309, "step": 30297 }, { "epoch": 0.5403988156815183, "grad_norm": 0.30723848938941956, "learning_rate": 2.5836466681126016e-05, "loss": 0.1095, "step": 30298 }, { "epoch": 0.540416651803232, "grad_norm": 0.3005426824092865, "learning_rate": 2.5834911043404064e-05, "loss": 0.2067, "step": 30299 }, { "epoch": 0.5404344879249456, "grad_norm": 0.21849948167800903, "learning_rate": 2.583335540244571e-05, "loss": 0.066, "step": 30300 }, { "epoch": 0.5404523240466593, "grad_norm": 0.26733192801475525, "learning_rate": 2.5831799758256985e-05, "loss": 0.1004, "step": 30301 }, { "epoch": 0.540470160168373, "grad_norm": 0.33891624212265015, "learning_rate": 2.5830244110843916e-05, "loss": 0.1587, "step": 30302 }, { "epoch": 0.5404879962900867, "grad_norm": 0.3988088369369507, "learning_rate": 2.5828688460212536e-05, "loss": 0.1447, "step": 30303 }, { "epoch": 0.5405058324118004, "grad_norm": 0.3887338638305664, "learning_rate": 2.5827132806368864e-05, "loss": 0.1319, "step": 30304 }, { "epoch": 0.5405236685335141, "grad_norm": 0.22638940811157227, "learning_rate": 2.5825577149318948e-05, "loss": 0.1431, "step": 30305 }, { "epoch": 0.5405415046552278, "grad_norm": 0.21133919060230255, "learning_rate": 2.5824021489068807e-05, "loss": 0.1482, "step": 30306 }, { "epoch": 0.5405593407769415, "grad_norm": 0.25019216537475586, "learning_rate": 2.5822465825624477e-05, "loss": 0.1466, "step": 30307 }, { "epoch": 0.5405771768986551, "grad_norm": 0.26719704270362854, "learning_rate": 2.5820910158991983e-05, "loss": 0.1256, "step": 30308 }, { "epoch": 0.5405950130203688, "grad_norm": 0.2063877135515213, "learning_rate": 2.5819354489177356e-05, "loss": 0.1507, "step": 30309 }, { "epoch": 0.5406128491420825, "grad_norm": 0.29922378063201904, "learning_rate": 2.5817798816186633e-05, "loss": 0.1246, "step": 30310 }, { "epoch": 0.5406306852637962, "grad_norm": 0.2416669875383377, "learning_rate": 2.5816243140025835e-05, "loss": 0.1699, "step": 30311 }, { "epoch": 0.5406485213855099, "grad_norm": 0.23381966352462769, "learning_rate": 2.5814687460700993e-05, "loss": 0.0865, "step": 30312 }, { "epoch": 0.5406663575072236, "grad_norm": 0.2589220702648163, "learning_rate": 2.5813131778218148e-05, "loss": 0.1394, "step": 30313 }, { "epoch": 0.5406841936289373, "grad_norm": 0.2585050165653229, "learning_rate": 2.5811576092583323e-05, "loss": 0.1436, "step": 30314 }, { "epoch": 0.5407020297506511, "grad_norm": 0.2663988173007965, "learning_rate": 2.5810020403802544e-05, "loss": 0.1696, "step": 30315 }, { "epoch": 0.5407198658723648, "grad_norm": 0.2469911426305771, "learning_rate": 2.580846471188185e-05, "loss": 0.1426, "step": 30316 }, { "epoch": 0.5407377019940784, "grad_norm": 0.2628594636917114, "learning_rate": 2.5806909016827275e-05, "loss": 0.1712, "step": 30317 }, { "epoch": 0.5407555381157921, "grad_norm": 0.23110628128051758, "learning_rate": 2.5805353318644833e-05, "loss": 0.1249, "step": 30318 }, { "epoch": 0.5407733742375058, "grad_norm": 0.23697690665721893, "learning_rate": 2.5803797617340557e-05, "loss": 0.1552, "step": 30319 }, { "epoch": 0.5407912103592195, "grad_norm": 0.30695194005966187, "learning_rate": 2.580224191292049e-05, "loss": 0.182, "step": 30320 }, { "epoch": 0.5408090464809332, "grad_norm": 0.25207746028900146, "learning_rate": 2.580068620539066e-05, "loss": 0.1666, "step": 30321 }, { "epoch": 0.5408268826026469, "grad_norm": 0.290650337934494, "learning_rate": 2.579913049475709e-05, "loss": 0.1302, "step": 30322 }, { "epoch": 0.5408447187243606, "grad_norm": 0.2963530123233795, "learning_rate": 2.579757478102582e-05, "loss": 0.1491, "step": 30323 }, { "epoch": 0.5408625548460743, "grad_norm": 0.23589985072612762, "learning_rate": 2.5796019064202865e-05, "loss": 0.0971, "step": 30324 }, { "epoch": 0.540880390967788, "grad_norm": 0.3055668771266937, "learning_rate": 2.5794463344294272e-05, "loss": 0.1642, "step": 30325 }, { "epoch": 0.5408982270895016, "grad_norm": 0.2741827070713043, "learning_rate": 2.5792907621306057e-05, "loss": 0.185, "step": 30326 }, { "epoch": 0.5409160632112153, "grad_norm": 0.23917505145072937, "learning_rate": 2.5791351895244265e-05, "loss": 0.1275, "step": 30327 }, { "epoch": 0.540933899332929, "grad_norm": 0.2493560016155243, "learning_rate": 2.5789796166114915e-05, "loss": 0.1586, "step": 30328 }, { "epoch": 0.5409517354546427, "grad_norm": 0.238856241106987, "learning_rate": 2.5788240433924053e-05, "loss": 0.1515, "step": 30329 }, { "epoch": 0.5409695715763564, "grad_norm": 0.29860156774520874, "learning_rate": 2.5786684698677693e-05, "loss": 0.1131, "step": 30330 }, { "epoch": 0.5409874076980701, "grad_norm": 0.2356102019548416, "learning_rate": 2.578512896038187e-05, "loss": 0.1871, "step": 30331 }, { "epoch": 0.5410052438197839, "grad_norm": 0.22272637486457825, "learning_rate": 2.5783573219042613e-05, "loss": 0.1193, "step": 30332 }, { "epoch": 0.5410230799414976, "grad_norm": 0.22675439715385437, "learning_rate": 2.5782017474665955e-05, "loss": 0.1191, "step": 30333 }, { "epoch": 0.5410409160632113, "grad_norm": 0.2381807565689087, "learning_rate": 2.5780461727257936e-05, "loss": 0.1224, "step": 30334 }, { "epoch": 0.5410587521849249, "grad_norm": 0.182336688041687, "learning_rate": 2.577890597682457e-05, "loss": 0.095, "step": 30335 }, { "epoch": 0.5410765883066386, "grad_norm": 0.2791365087032318, "learning_rate": 2.57773502233719e-05, "loss": 0.1449, "step": 30336 }, { "epoch": 0.5410944244283523, "grad_norm": 0.21785160899162292, "learning_rate": 2.577579446690595e-05, "loss": 0.1061, "step": 30337 }, { "epoch": 0.541112260550066, "grad_norm": 0.20653729140758514, "learning_rate": 2.5774238707432758e-05, "loss": 0.1034, "step": 30338 }, { "epoch": 0.5411300966717797, "grad_norm": 0.37819990515708923, "learning_rate": 2.5772682944958337e-05, "loss": 0.2114, "step": 30339 }, { "epoch": 0.5411479327934934, "grad_norm": 0.20874670147895813, "learning_rate": 2.5771127179488736e-05, "loss": 0.1244, "step": 30340 }, { "epoch": 0.5411657689152071, "grad_norm": 0.31306561827659607, "learning_rate": 2.576957141102998e-05, "loss": 0.0999, "step": 30341 }, { "epoch": 0.5411836050369208, "grad_norm": 0.26542919874191284, "learning_rate": 2.5768015639588106e-05, "loss": 0.1362, "step": 30342 }, { "epoch": 0.5412014411586344, "grad_norm": 0.21382597088813782, "learning_rate": 2.5766459865169128e-05, "loss": 0.1231, "step": 30343 }, { "epoch": 0.5412192772803481, "grad_norm": 0.32082098722457886, "learning_rate": 2.5764904087779097e-05, "loss": 0.1159, "step": 30344 }, { "epoch": 0.5412371134020618, "grad_norm": 0.29864999651908875, "learning_rate": 2.5763348307424028e-05, "loss": 0.1295, "step": 30345 }, { "epoch": 0.5412549495237755, "grad_norm": 0.1638377606868744, "learning_rate": 2.5761792524109957e-05, "loss": 0.0988, "step": 30346 }, { "epoch": 0.5412727856454892, "grad_norm": 0.25024187564849854, "learning_rate": 2.576023673784291e-05, "loss": 0.1444, "step": 30347 }, { "epoch": 0.5412906217672029, "grad_norm": 0.33483538031578064, "learning_rate": 2.5758680948628928e-05, "loss": 0.1113, "step": 30348 }, { "epoch": 0.5413084578889167, "grad_norm": 0.31227901577949524, "learning_rate": 2.575712515647404e-05, "loss": 0.1559, "step": 30349 }, { "epoch": 0.5413262940106304, "grad_norm": 0.2563287019729614, "learning_rate": 2.5755569361384262e-05, "loss": 0.1664, "step": 30350 }, { "epoch": 0.5413441301323441, "grad_norm": 0.3479805886745453, "learning_rate": 2.5754013563365652e-05, "loss": 0.1112, "step": 30351 }, { "epoch": 0.5413619662540577, "grad_norm": 0.24797408282756805, "learning_rate": 2.5752457762424216e-05, "loss": 0.1281, "step": 30352 }, { "epoch": 0.5413798023757714, "grad_norm": 0.2663181722164154, "learning_rate": 2.5750901958565994e-05, "loss": 0.163, "step": 30353 }, { "epoch": 0.5413976384974851, "grad_norm": 0.25221049785614014, "learning_rate": 2.5749346151797012e-05, "loss": 0.1359, "step": 30354 }, { "epoch": 0.5414154746191988, "grad_norm": 0.21352344751358032, "learning_rate": 2.5747790342123313e-05, "loss": 0.1281, "step": 30355 }, { "epoch": 0.5414333107409125, "grad_norm": 0.2809578478336334, "learning_rate": 2.5746234529550918e-05, "loss": 0.1336, "step": 30356 }, { "epoch": 0.5414511468626262, "grad_norm": 0.2282564342021942, "learning_rate": 2.5744678714085862e-05, "loss": 0.0828, "step": 30357 }, { "epoch": 0.5414689829843399, "grad_norm": 0.2017303854227066, "learning_rate": 2.5743122895734178e-05, "loss": 0.1083, "step": 30358 }, { "epoch": 0.5414868191060536, "grad_norm": 0.26987993717193604, "learning_rate": 2.5741567074501888e-05, "loss": 0.2113, "step": 30359 }, { "epoch": 0.5415046552277673, "grad_norm": 0.2673798203468323, "learning_rate": 2.5740011250395026e-05, "loss": 0.1316, "step": 30360 }, { "epoch": 0.5415224913494809, "grad_norm": 0.29162001609802246, "learning_rate": 2.5738455423419622e-05, "loss": 0.1559, "step": 30361 }, { "epoch": 0.5415403274711946, "grad_norm": 0.2928520441055298, "learning_rate": 2.5736899593581714e-05, "loss": 0.1369, "step": 30362 }, { "epoch": 0.5415581635929083, "grad_norm": 0.24529993534088135, "learning_rate": 2.573534376088733e-05, "loss": 0.1252, "step": 30363 }, { "epoch": 0.541575999714622, "grad_norm": 0.31121334433555603, "learning_rate": 2.57337879253425e-05, "loss": 0.1689, "step": 30364 }, { "epoch": 0.5415938358363358, "grad_norm": 0.24328245222568512, "learning_rate": 2.573223208695325e-05, "loss": 0.1449, "step": 30365 }, { "epoch": 0.5416116719580495, "grad_norm": 0.25867602229118347, "learning_rate": 2.5730676245725622e-05, "loss": 0.16, "step": 30366 }, { "epoch": 0.5416295080797632, "grad_norm": 0.36305147409439087, "learning_rate": 2.5729120401665635e-05, "loss": 0.1427, "step": 30367 }, { "epoch": 0.5416473442014769, "grad_norm": 0.3836963176727295, "learning_rate": 2.5727564554779325e-05, "loss": 0.2233, "step": 30368 }, { "epoch": 0.5416651803231906, "grad_norm": 0.2705952227115631, "learning_rate": 2.5726008705072725e-05, "loss": 0.1498, "step": 30369 }, { "epoch": 0.5416830164449042, "grad_norm": 0.25841236114501953, "learning_rate": 2.5724452852551868e-05, "loss": 0.1668, "step": 30370 }, { "epoch": 0.5417008525666179, "grad_norm": 0.2395922690629959, "learning_rate": 2.5722896997222785e-05, "loss": 0.1035, "step": 30371 }, { "epoch": 0.5417186886883316, "grad_norm": 0.3051241338253021, "learning_rate": 2.5721341139091493e-05, "loss": 0.1668, "step": 30372 }, { "epoch": 0.5417365248100453, "grad_norm": 0.2469150722026825, "learning_rate": 2.5719785278164044e-05, "loss": 0.084, "step": 30373 }, { "epoch": 0.541754360931759, "grad_norm": 0.27553534507751465, "learning_rate": 2.5718229414446448e-05, "loss": 0.1475, "step": 30374 }, { "epoch": 0.5417721970534727, "grad_norm": 0.24740992486476898, "learning_rate": 2.5716673547944753e-05, "loss": 0.1371, "step": 30375 }, { "epoch": 0.5417900331751864, "grad_norm": 0.2842337191104889, "learning_rate": 2.571511767866498e-05, "loss": 0.1437, "step": 30376 }, { "epoch": 0.5418078692969001, "grad_norm": 0.23383453488349915, "learning_rate": 2.571356180661317e-05, "loss": 0.1091, "step": 30377 }, { "epoch": 0.5418257054186137, "grad_norm": 0.2744031846523285, "learning_rate": 2.5712005931795342e-05, "loss": 0.1176, "step": 30378 }, { "epoch": 0.5418435415403274, "grad_norm": 0.24337030947208405, "learning_rate": 2.5710450054217532e-05, "loss": 0.0945, "step": 30379 }, { "epoch": 0.5418613776620411, "grad_norm": 0.24219100177288055, "learning_rate": 2.570889417388578e-05, "loss": 0.1219, "step": 30380 }, { "epoch": 0.5418792137837548, "grad_norm": 0.26978981494903564, "learning_rate": 2.57073382908061e-05, "loss": 0.1516, "step": 30381 }, { "epoch": 0.5418970499054686, "grad_norm": 0.22270672023296356, "learning_rate": 2.5705782404984536e-05, "loss": 0.1334, "step": 30382 }, { "epoch": 0.5419148860271823, "grad_norm": 0.3178529739379883, "learning_rate": 2.5704226516427123e-05, "loss": 0.1762, "step": 30383 }, { "epoch": 0.541932722148896, "grad_norm": 0.4411005973815918, "learning_rate": 2.5702670625139878e-05, "loss": 0.1183, "step": 30384 }, { "epoch": 0.5419505582706097, "grad_norm": 0.34993645548820496, "learning_rate": 2.570111473112884e-05, "loss": 0.1347, "step": 30385 }, { "epoch": 0.5419683943923234, "grad_norm": 0.22318151593208313, "learning_rate": 2.569955883440004e-05, "loss": 0.1056, "step": 30386 }, { "epoch": 0.541986230514037, "grad_norm": 0.28538092970848083, "learning_rate": 2.5698002934959503e-05, "loss": 0.1775, "step": 30387 }, { "epoch": 0.5420040666357507, "grad_norm": 0.24118423461914062, "learning_rate": 2.569644703281327e-05, "loss": 0.1358, "step": 30388 }, { "epoch": 0.5420219027574644, "grad_norm": 0.3231845498085022, "learning_rate": 2.569489112796736e-05, "loss": 0.1363, "step": 30389 }, { "epoch": 0.5420397388791781, "grad_norm": 0.27154645323753357, "learning_rate": 2.5693335220427828e-05, "loss": 0.103, "step": 30390 }, { "epoch": 0.5420575750008918, "grad_norm": 0.37285539507865906, "learning_rate": 2.5691779310200676e-05, "loss": 0.1349, "step": 30391 }, { "epoch": 0.5420754111226055, "grad_norm": 0.2843606770038605, "learning_rate": 2.5690223397291957e-05, "loss": 0.1644, "step": 30392 }, { "epoch": 0.5420932472443192, "grad_norm": 0.29346659779548645, "learning_rate": 2.568866748170769e-05, "loss": 0.1086, "step": 30393 }, { "epoch": 0.5421110833660329, "grad_norm": 0.23756791651248932, "learning_rate": 2.5687111563453913e-05, "loss": 0.1246, "step": 30394 }, { "epoch": 0.5421289194877466, "grad_norm": 0.308624267578125, "learning_rate": 2.5685555642536646e-05, "loss": 0.1112, "step": 30395 }, { "epoch": 0.5421467556094602, "grad_norm": 0.25421419739723206, "learning_rate": 2.5683999718961932e-05, "loss": 0.1963, "step": 30396 }, { "epoch": 0.5421645917311739, "grad_norm": 0.22110804915428162, "learning_rate": 2.5682443792735805e-05, "loss": 0.1281, "step": 30397 }, { "epoch": 0.5421824278528876, "grad_norm": 0.2372094690799713, "learning_rate": 2.5680887863864283e-05, "loss": 0.1336, "step": 30398 }, { "epoch": 0.5422002639746014, "grad_norm": 0.226642906665802, "learning_rate": 2.5679331932353412e-05, "loss": 0.1442, "step": 30399 }, { "epoch": 0.5422181000963151, "grad_norm": 0.23188887536525726, "learning_rate": 2.5677775998209207e-05, "loss": 0.1348, "step": 30400 }, { "epoch": 0.5422359362180288, "grad_norm": 0.30838584899902344, "learning_rate": 2.5676220061437717e-05, "loss": 0.1631, "step": 30401 }, { "epoch": 0.5422537723397425, "grad_norm": 0.2719489336013794, "learning_rate": 2.5674664122044957e-05, "loss": 0.1007, "step": 30402 }, { "epoch": 0.5422716084614562, "grad_norm": 0.46597820520401, "learning_rate": 2.5673108180036963e-05, "loss": 0.1256, "step": 30403 }, { "epoch": 0.5422894445831699, "grad_norm": 0.25430217385292053, "learning_rate": 2.5671552235419776e-05, "loss": 0.1366, "step": 30404 }, { "epoch": 0.5423072807048835, "grad_norm": 0.340923547744751, "learning_rate": 2.5669996288199422e-05, "loss": 0.1385, "step": 30405 }, { "epoch": 0.5423251168265972, "grad_norm": 0.2916175425052643, "learning_rate": 2.5668440338381926e-05, "loss": 0.1117, "step": 30406 }, { "epoch": 0.5423429529483109, "grad_norm": 0.32697227597236633, "learning_rate": 2.5666884385973327e-05, "loss": 0.1037, "step": 30407 }, { "epoch": 0.5423607890700246, "grad_norm": 0.33345985412597656, "learning_rate": 2.566532843097965e-05, "loss": 0.1764, "step": 30408 }, { "epoch": 0.5423786251917383, "grad_norm": 0.21775665879249573, "learning_rate": 2.5663772473406934e-05, "loss": 0.1266, "step": 30409 }, { "epoch": 0.542396461313452, "grad_norm": 0.318934828042984, "learning_rate": 2.566221651326121e-05, "loss": 0.1442, "step": 30410 }, { "epoch": 0.5424142974351657, "grad_norm": 0.47422051429748535, "learning_rate": 2.56606605505485e-05, "loss": 0.1671, "step": 30411 }, { "epoch": 0.5424321335568794, "grad_norm": 0.2716493308544159, "learning_rate": 2.5659104585274844e-05, "loss": 0.14, "step": 30412 }, { "epoch": 0.542449969678593, "grad_norm": 0.3493393063545227, "learning_rate": 2.565754861744627e-05, "loss": 0.0959, "step": 30413 }, { "epoch": 0.5424678058003067, "grad_norm": 0.3317636251449585, "learning_rate": 2.5655992647068815e-05, "loss": 0.1289, "step": 30414 }, { "epoch": 0.5424856419220204, "grad_norm": 0.21200741827487946, "learning_rate": 2.5654436674148503e-05, "loss": 0.1255, "step": 30415 }, { "epoch": 0.5425034780437342, "grad_norm": 0.35558727383613586, "learning_rate": 2.565288069869136e-05, "loss": 0.1129, "step": 30416 }, { "epoch": 0.5425213141654479, "grad_norm": 0.30525141954421997, "learning_rate": 2.5651324720703433e-05, "loss": 0.1611, "step": 30417 }, { "epoch": 0.5425391502871616, "grad_norm": 0.25791317224502563, "learning_rate": 2.5649768740190755e-05, "loss": 0.1654, "step": 30418 }, { "epoch": 0.5425569864088753, "grad_norm": 0.24684809148311615, "learning_rate": 2.5648212757159335e-05, "loss": 0.1391, "step": 30419 }, { "epoch": 0.542574822530589, "grad_norm": 0.2556397318840027, "learning_rate": 2.564665677161523e-05, "loss": 0.1189, "step": 30420 }, { "epoch": 0.5425926586523027, "grad_norm": 0.22299419343471527, "learning_rate": 2.5645100783564453e-05, "loss": 0.0947, "step": 30421 }, { "epoch": 0.5426104947740163, "grad_norm": 0.22191090881824493, "learning_rate": 2.5643544793013048e-05, "loss": 0.1475, "step": 30422 }, { "epoch": 0.54262833089573, "grad_norm": 0.2882049083709717, "learning_rate": 2.5641988799967033e-05, "loss": 0.1277, "step": 30423 }, { "epoch": 0.5426461670174437, "grad_norm": 0.29855647683143616, "learning_rate": 2.5640432804432445e-05, "loss": 0.1474, "step": 30424 }, { "epoch": 0.5426640031391574, "grad_norm": 0.19883571565151215, "learning_rate": 2.563887680641533e-05, "loss": 0.0946, "step": 30425 }, { "epoch": 0.5426818392608711, "grad_norm": 0.2799416482448578, "learning_rate": 2.56373208059217e-05, "loss": 0.0912, "step": 30426 }, { "epoch": 0.5426996753825848, "grad_norm": 0.45761919021606445, "learning_rate": 2.5635764802957602e-05, "loss": 0.1379, "step": 30427 }, { "epoch": 0.5427175115042985, "grad_norm": 0.24477513134479523, "learning_rate": 2.5634208797529053e-05, "loss": 0.1285, "step": 30428 }, { "epoch": 0.5427353476260122, "grad_norm": 0.3262573778629303, "learning_rate": 2.56326527896421e-05, "loss": 0.1673, "step": 30429 }, { "epoch": 0.5427531837477259, "grad_norm": 0.3951232135295868, "learning_rate": 2.5631096779302748e-05, "loss": 0.2156, "step": 30430 }, { "epoch": 0.5427710198694395, "grad_norm": 0.1818213015794754, "learning_rate": 2.5629540766517063e-05, "loss": 0.1523, "step": 30431 }, { "epoch": 0.5427888559911532, "grad_norm": 0.2641490697860718, "learning_rate": 2.5627984751291057e-05, "loss": 0.1106, "step": 30432 }, { "epoch": 0.542806692112867, "grad_norm": 0.24945363402366638, "learning_rate": 2.5626428733630762e-05, "loss": 0.1194, "step": 30433 }, { "epoch": 0.5428245282345807, "grad_norm": 0.2434176206588745, "learning_rate": 2.562487271354222e-05, "loss": 0.1712, "step": 30434 }, { "epoch": 0.5428423643562944, "grad_norm": 0.23235736787319183, "learning_rate": 2.562331669103145e-05, "loss": 0.1177, "step": 30435 }, { "epoch": 0.5428602004780081, "grad_norm": 0.28597357869148254, "learning_rate": 2.5621760666104487e-05, "loss": 0.1569, "step": 30436 }, { "epoch": 0.5428780365997218, "grad_norm": 0.2535983622074127, "learning_rate": 2.562020463876737e-05, "loss": 0.1095, "step": 30437 }, { "epoch": 0.5428958727214355, "grad_norm": 0.32386693358421326, "learning_rate": 2.5618648609026125e-05, "loss": 0.0916, "step": 30438 }, { "epoch": 0.5429137088431492, "grad_norm": 0.19447185099124908, "learning_rate": 2.5617092576886782e-05, "loss": 0.1437, "step": 30439 }, { "epoch": 0.5429315449648628, "grad_norm": 0.22546543180942535, "learning_rate": 2.561553654235538e-05, "loss": 0.1348, "step": 30440 }, { "epoch": 0.5429493810865765, "grad_norm": 0.25543931126594543, "learning_rate": 2.561398050543794e-05, "loss": 0.1329, "step": 30441 }, { "epoch": 0.5429672172082902, "grad_norm": 0.29706892371177673, "learning_rate": 2.5612424466140504e-05, "loss": 0.1356, "step": 30442 }, { "epoch": 0.5429850533300039, "grad_norm": 0.2604389488697052, "learning_rate": 2.56108684244691e-05, "loss": 0.1773, "step": 30443 }, { "epoch": 0.5430028894517176, "grad_norm": 0.30576810240745544, "learning_rate": 2.560931238042975e-05, "loss": 0.124, "step": 30444 }, { "epoch": 0.5430207255734313, "grad_norm": 0.25967666506767273, "learning_rate": 2.56077563340285e-05, "loss": 0.1527, "step": 30445 }, { "epoch": 0.543038561695145, "grad_norm": 0.2925667464733124, "learning_rate": 2.5606200285271375e-05, "loss": 0.1214, "step": 30446 }, { "epoch": 0.5430563978168587, "grad_norm": 0.3637361228466034, "learning_rate": 2.560464423416441e-05, "loss": 0.1348, "step": 30447 }, { "epoch": 0.5430742339385723, "grad_norm": 0.23158197104930878, "learning_rate": 2.5603088180713642e-05, "loss": 0.1017, "step": 30448 }, { "epoch": 0.543092070060286, "grad_norm": 0.26995033025741577, "learning_rate": 2.5601532124925094e-05, "loss": 0.1462, "step": 30449 }, { "epoch": 0.5431099061819998, "grad_norm": 0.26165539026260376, "learning_rate": 2.5599976066804787e-05, "loss": 0.1205, "step": 30450 }, { "epoch": 0.5431277423037135, "grad_norm": 0.25607991218566895, "learning_rate": 2.5598420006358775e-05, "loss": 0.146, "step": 30451 }, { "epoch": 0.5431455784254272, "grad_norm": 0.2899633049964905, "learning_rate": 2.5596863943593075e-05, "loss": 0.1241, "step": 30452 }, { "epoch": 0.5431634145471409, "grad_norm": 0.23187774419784546, "learning_rate": 2.559530787851373e-05, "loss": 0.1261, "step": 30453 }, { "epoch": 0.5431812506688546, "grad_norm": 0.256250262260437, "learning_rate": 2.5593751811126765e-05, "loss": 0.1463, "step": 30454 }, { "epoch": 0.5431990867905683, "grad_norm": 0.23451587557792664, "learning_rate": 2.5592195741438213e-05, "loss": 0.1119, "step": 30455 }, { "epoch": 0.543216922912282, "grad_norm": 0.32399609684944153, "learning_rate": 2.55906396694541e-05, "loss": 0.156, "step": 30456 }, { "epoch": 0.5432347590339957, "grad_norm": 0.2855326235294342, "learning_rate": 2.558908359518047e-05, "loss": 0.1214, "step": 30457 }, { "epoch": 0.5432525951557093, "grad_norm": 0.2582947611808777, "learning_rate": 2.5587527518623345e-05, "loss": 0.1254, "step": 30458 }, { "epoch": 0.543270431277423, "grad_norm": 0.2899855077266693, "learning_rate": 2.558597143978877e-05, "loss": 0.1353, "step": 30459 }, { "epoch": 0.5432882673991367, "grad_norm": 0.22167515754699707, "learning_rate": 2.5584415358682755e-05, "loss": 0.1203, "step": 30460 }, { "epoch": 0.5433061035208504, "grad_norm": 0.2114895135164261, "learning_rate": 2.558285927531135e-05, "loss": 0.1255, "step": 30461 }, { "epoch": 0.5433239396425641, "grad_norm": 0.32345715165138245, "learning_rate": 2.5581303189680584e-05, "loss": 0.1191, "step": 30462 }, { "epoch": 0.5433417757642778, "grad_norm": 0.28123369812965393, "learning_rate": 2.557974710179648e-05, "loss": 0.1924, "step": 30463 }, { "epoch": 0.5433596118859915, "grad_norm": 0.46767711639404297, "learning_rate": 2.5578191011665077e-05, "loss": 0.1753, "step": 30464 }, { "epoch": 0.5433774480077052, "grad_norm": 0.2891663610935211, "learning_rate": 2.5576634919292408e-05, "loss": 0.1981, "step": 30465 }, { "epoch": 0.543395284129419, "grad_norm": 0.3217829465866089, "learning_rate": 2.55750788246845e-05, "loss": 0.0849, "step": 30466 }, { "epoch": 0.5434131202511326, "grad_norm": 0.1882012039422989, "learning_rate": 2.5573522727847394e-05, "loss": 0.1173, "step": 30467 }, { "epoch": 0.5434309563728463, "grad_norm": 0.34905749559402466, "learning_rate": 2.5571966628787114e-05, "loss": 0.1452, "step": 30468 }, { "epoch": 0.54344879249456, "grad_norm": 0.21599042415618896, "learning_rate": 2.5570410527509686e-05, "loss": 0.1405, "step": 30469 }, { "epoch": 0.5434666286162737, "grad_norm": 0.28630906343460083, "learning_rate": 2.556885442402116e-05, "loss": 0.1624, "step": 30470 }, { "epoch": 0.5434844647379874, "grad_norm": 0.2142532616853714, "learning_rate": 2.5567298318327548e-05, "loss": 0.1713, "step": 30471 }, { "epoch": 0.5435023008597011, "grad_norm": 0.29085907340049744, "learning_rate": 2.5565742210434897e-05, "loss": 0.1854, "step": 30472 }, { "epoch": 0.5435201369814148, "grad_norm": 0.21413438022136688, "learning_rate": 2.556418610034923e-05, "loss": 0.1107, "step": 30473 }, { "epoch": 0.5435379731031285, "grad_norm": 0.18788836896419525, "learning_rate": 2.5562629988076592e-05, "loss": 0.1287, "step": 30474 }, { "epoch": 0.5435558092248421, "grad_norm": 0.2793657183647156, "learning_rate": 2.5561073873623003e-05, "loss": 0.1074, "step": 30475 }, { "epoch": 0.5435736453465558, "grad_norm": 0.319446325302124, "learning_rate": 2.5559517756994495e-05, "loss": 0.1811, "step": 30476 }, { "epoch": 0.5435914814682695, "grad_norm": 0.298800528049469, "learning_rate": 2.5557961638197104e-05, "loss": 0.1785, "step": 30477 }, { "epoch": 0.5436093175899832, "grad_norm": 0.2088286280632019, "learning_rate": 2.555640551723686e-05, "loss": 0.1208, "step": 30478 }, { "epoch": 0.5436271537116969, "grad_norm": 0.3150528073310852, "learning_rate": 2.5554849394119794e-05, "loss": 0.099, "step": 30479 }, { "epoch": 0.5436449898334106, "grad_norm": 0.20364587008953094, "learning_rate": 2.5553293268851943e-05, "loss": 0.1305, "step": 30480 }, { "epoch": 0.5436628259551243, "grad_norm": 0.33581048250198364, "learning_rate": 2.5551737141439336e-05, "loss": 0.1764, "step": 30481 }, { "epoch": 0.543680662076838, "grad_norm": 0.23047466576099396, "learning_rate": 2.5550181011888002e-05, "loss": 0.123, "step": 30482 }, { "epoch": 0.5436984981985518, "grad_norm": 0.3257729709148407, "learning_rate": 2.554862488020398e-05, "loss": 0.1458, "step": 30483 }, { "epoch": 0.5437163343202654, "grad_norm": 0.24259547889232635, "learning_rate": 2.5547068746393297e-05, "loss": 0.1067, "step": 30484 }, { "epoch": 0.5437341704419791, "grad_norm": 0.2064991295337677, "learning_rate": 2.5545512610461986e-05, "loss": 0.1204, "step": 30485 }, { "epoch": 0.5437520065636928, "grad_norm": 0.19359250366687775, "learning_rate": 2.5543956472416076e-05, "loss": 0.0974, "step": 30486 }, { "epoch": 0.5437698426854065, "grad_norm": 0.2196130007505417, "learning_rate": 2.554240033226161e-05, "loss": 0.1534, "step": 30487 }, { "epoch": 0.5437876788071202, "grad_norm": 0.2541401982307434, "learning_rate": 2.5540844190004616e-05, "loss": 0.1384, "step": 30488 }, { "epoch": 0.5438055149288339, "grad_norm": 0.2838333547115326, "learning_rate": 2.5539288045651117e-05, "loss": 0.1543, "step": 30489 }, { "epoch": 0.5438233510505476, "grad_norm": 0.2721298635005951, "learning_rate": 2.5537731899207157e-05, "loss": 0.1838, "step": 30490 }, { "epoch": 0.5438411871722613, "grad_norm": 0.27650579810142517, "learning_rate": 2.5536175750678754e-05, "loss": 0.137, "step": 30491 }, { "epoch": 0.543859023293975, "grad_norm": 0.2977495789527893, "learning_rate": 2.5534619600071946e-05, "loss": 0.1746, "step": 30492 }, { "epoch": 0.5438768594156886, "grad_norm": 0.2611187994480133, "learning_rate": 2.5533063447392774e-05, "loss": 0.1421, "step": 30493 }, { "epoch": 0.5438946955374023, "grad_norm": 0.2547118067741394, "learning_rate": 2.5531507292647268e-05, "loss": 0.1623, "step": 30494 }, { "epoch": 0.543912531659116, "grad_norm": 0.324241042137146, "learning_rate": 2.5529951135841453e-05, "loss": 0.1912, "step": 30495 }, { "epoch": 0.5439303677808297, "grad_norm": 0.2348569631576538, "learning_rate": 2.5528394976981362e-05, "loss": 0.1077, "step": 30496 }, { "epoch": 0.5439482039025434, "grad_norm": 0.20454934239387512, "learning_rate": 2.5526838816073027e-05, "loss": 0.1546, "step": 30497 }, { "epoch": 0.5439660400242571, "grad_norm": 0.2143382579088211, "learning_rate": 2.5525282653122496e-05, "loss": 0.0956, "step": 30498 }, { "epoch": 0.5439838761459708, "grad_norm": 0.2045070379972458, "learning_rate": 2.5523726488135776e-05, "loss": 0.1146, "step": 30499 }, { "epoch": 0.5440017122676846, "grad_norm": 0.21712088584899902, "learning_rate": 2.552217032111891e-05, "loss": 0.1074, "step": 30500 }, { "epoch": 0.5440195483893983, "grad_norm": 0.2839733064174652, "learning_rate": 2.5520614152077938e-05, "loss": 0.126, "step": 30501 }, { "epoch": 0.5440373845111119, "grad_norm": 0.30920034646987915, "learning_rate": 2.551905798101888e-05, "loss": 0.1601, "step": 30502 }, { "epoch": 0.5440552206328256, "grad_norm": 0.37711143493652344, "learning_rate": 2.551750180794778e-05, "loss": 0.1334, "step": 30503 }, { "epoch": 0.5440730567545393, "grad_norm": 0.425048828125, "learning_rate": 2.551594563287066e-05, "loss": 0.1727, "step": 30504 }, { "epoch": 0.544090892876253, "grad_norm": 0.20313811302185059, "learning_rate": 2.5514389455793563e-05, "loss": 0.1036, "step": 30505 }, { "epoch": 0.5441087289979667, "grad_norm": 0.2686541974544525, "learning_rate": 2.5512833276722507e-05, "loss": 0.1677, "step": 30506 }, { "epoch": 0.5441265651196804, "grad_norm": 0.2547666132450104, "learning_rate": 2.5511277095663533e-05, "loss": 0.1215, "step": 30507 }, { "epoch": 0.5441444012413941, "grad_norm": 0.2346441149711609, "learning_rate": 2.550972091262267e-05, "loss": 0.123, "step": 30508 }, { "epoch": 0.5441622373631078, "grad_norm": 0.22550863027572632, "learning_rate": 2.550816472760596e-05, "loss": 0.1442, "step": 30509 }, { "epoch": 0.5441800734848214, "grad_norm": 0.20683231949806213, "learning_rate": 2.5506608540619425e-05, "loss": 0.1489, "step": 30510 }, { "epoch": 0.5441979096065351, "grad_norm": 0.2775627672672272, "learning_rate": 2.5505052351669102e-05, "loss": 0.1667, "step": 30511 }, { "epoch": 0.5442157457282488, "grad_norm": 0.3498181104660034, "learning_rate": 2.550349616076102e-05, "loss": 0.122, "step": 30512 }, { "epoch": 0.5442335818499625, "grad_norm": 0.2168666273355484, "learning_rate": 2.5501939967901212e-05, "loss": 0.1081, "step": 30513 }, { "epoch": 0.5442514179716762, "grad_norm": 0.2527812421321869, "learning_rate": 2.5500383773095717e-05, "loss": 0.109, "step": 30514 }, { "epoch": 0.5442692540933899, "grad_norm": 0.2178495228290558, "learning_rate": 2.549882757635055e-05, "loss": 0.1202, "step": 30515 }, { "epoch": 0.5442870902151036, "grad_norm": 0.2771246135234833, "learning_rate": 2.5497271377671765e-05, "loss": 0.1298, "step": 30516 }, { "epoch": 0.5443049263368174, "grad_norm": 0.31199607253074646, "learning_rate": 2.5495715177065378e-05, "loss": 0.1631, "step": 30517 }, { "epoch": 0.5443227624585311, "grad_norm": 0.36720263957977295, "learning_rate": 2.5494158974537436e-05, "loss": 0.1154, "step": 30518 }, { "epoch": 0.5443405985802447, "grad_norm": 0.2538905143737793, "learning_rate": 2.5492602770093956e-05, "loss": 0.1551, "step": 30519 }, { "epoch": 0.5443584347019584, "grad_norm": 0.3278164863586426, "learning_rate": 2.549104656374098e-05, "loss": 0.1336, "step": 30520 }, { "epoch": 0.5443762708236721, "grad_norm": 0.3136005699634552, "learning_rate": 2.548949035548453e-05, "loss": 0.1388, "step": 30521 }, { "epoch": 0.5443941069453858, "grad_norm": 0.22819246351718903, "learning_rate": 2.548793414533066e-05, "loss": 0.106, "step": 30522 }, { "epoch": 0.5444119430670995, "grad_norm": 0.28130069375038147, "learning_rate": 2.548637793328538e-05, "loss": 0.1155, "step": 30523 }, { "epoch": 0.5444297791888132, "grad_norm": 0.25665944814682007, "learning_rate": 2.5484821719354736e-05, "loss": 0.1307, "step": 30524 }, { "epoch": 0.5444476153105269, "grad_norm": 0.3511230945587158, "learning_rate": 2.548326550354475e-05, "loss": 0.1413, "step": 30525 }, { "epoch": 0.5444654514322406, "grad_norm": 0.24509581923484802, "learning_rate": 2.548170928586147e-05, "loss": 0.1617, "step": 30526 }, { "epoch": 0.5444832875539543, "grad_norm": 0.22838634252548218, "learning_rate": 2.5480153066310908e-05, "loss": 0.114, "step": 30527 }, { "epoch": 0.5445011236756679, "grad_norm": 0.22285029292106628, "learning_rate": 2.547859684489911e-05, "loss": 0.1572, "step": 30528 }, { "epoch": 0.5445189597973816, "grad_norm": 0.27975815534591675, "learning_rate": 2.547704062163211e-05, "loss": 0.1258, "step": 30529 }, { "epoch": 0.5445367959190953, "grad_norm": 0.24844714999198914, "learning_rate": 2.547548439651593e-05, "loss": 0.1418, "step": 30530 }, { "epoch": 0.544554632040809, "grad_norm": 0.292751282453537, "learning_rate": 2.547392816955661e-05, "loss": 0.1609, "step": 30531 }, { "epoch": 0.5445724681625227, "grad_norm": 0.344444215297699, "learning_rate": 2.5472371940760183e-05, "loss": 0.0961, "step": 30532 }, { "epoch": 0.5445903042842364, "grad_norm": 0.2207648754119873, "learning_rate": 2.5470815710132678e-05, "loss": 0.1639, "step": 30533 }, { "epoch": 0.5446081404059502, "grad_norm": 0.28195247054100037, "learning_rate": 2.546925947768012e-05, "loss": 0.1045, "step": 30534 }, { "epoch": 0.5446259765276639, "grad_norm": 0.3072606325149536, "learning_rate": 2.546770324340856e-05, "loss": 0.1046, "step": 30535 }, { "epoch": 0.5446438126493776, "grad_norm": 0.3027358651161194, "learning_rate": 2.5466147007324015e-05, "loss": 0.1138, "step": 30536 }, { "epoch": 0.5446616487710912, "grad_norm": 0.5015240907669067, "learning_rate": 2.5464590769432535e-05, "loss": 0.184, "step": 30537 }, { "epoch": 0.5446794848928049, "grad_norm": 0.22844772040843964, "learning_rate": 2.5463034529740127e-05, "loss": 0.1228, "step": 30538 }, { "epoch": 0.5446973210145186, "grad_norm": 0.31912747025489807, "learning_rate": 2.5461478288252844e-05, "loss": 0.1394, "step": 30539 }, { "epoch": 0.5447151571362323, "grad_norm": 0.25199249386787415, "learning_rate": 2.5459922044976708e-05, "loss": 0.1538, "step": 30540 }, { "epoch": 0.544732993257946, "grad_norm": 0.31932389736175537, "learning_rate": 2.5458365799917754e-05, "loss": 0.1663, "step": 30541 }, { "epoch": 0.5447508293796597, "grad_norm": 0.26404711604118347, "learning_rate": 2.545680955308203e-05, "loss": 0.1406, "step": 30542 }, { "epoch": 0.5447686655013734, "grad_norm": 0.2169080525636673, "learning_rate": 2.545525330447554e-05, "loss": 0.1146, "step": 30543 }, { "epoch": 0.5447865016230871, "grad_norm": 0.2407020628452301, "learning_rate": 2.5453697054104336e-05, "loss": 0.1274, "step": 30544 }, { "epoch": 0.5448043377448007, "grad_norm": 0.3030557930469513, "learning_rate": 2.5452140801974446e-05, "loss": 0.1283, "step": 30545 }, { "epoch": 0.5448221738665144, "grad_norm": 0.3027314841747284, "learning_rate": 2.54505845480919e-05, "loss": 0.2079, "step": 30546 }, { "epoch": 0.5448400099882281, "grad_norm": 0.1790473610162735, "learning_rate": 2.544902829246273e-05, "loss": 0.1592, "step": 30547 }, { "epoch": 0.5448578461099418, "grad_norm": 0.2134745568037033, "learning_rate": 2.544747203509297e-05, "loss": 0.1289, "step": 30548 }, { "epoch": 0.5448756822316555, "grad_norm": 0.23085126280784607, "learning_rate": 2.544591577598866e-05, "loss": 0.2047, "step": 30549 }, { "epoch": 0.5448935183533692, "grad_norm": 0.24027085304260254, "learning_rate": 2.544435951515583e-05, "loss": 0.1223, "step": 30550 }, { "epoch": 0.544911354475083, "grad_norm": 0.23477111756801605, "learning_rate": 2.54428032526005e-05, "loss": 0.1369, "step": 30551 }, { "epoch": 0.5449291905967967, "grad_norm": 0.25696438550949097, "learning_rate": 2.544124698832872e-05, "loss": 0.1588, "step": 30552 }, { "epoch": 0.5449470267185104, "grad_norm": 0.20204702019691467, "learning_rate": 2.5439690722346505e-05, "loss": 0.133, "step": 30553 }, { "epoch": 0.544964862840224, "grad_norm": 0.3292404115200043, "learning_rate": 2.5438134454659906e-05, "loss": 0.1143, "step": 30554 }, { "epoch": 0.5449826989619377, "grad_norm": 0.22995255887508392, "learning_rate": 2.5436578185274938e-05, "loss": 0.1485, "step": 30555 }, { "epoch": 0.5450005350836514, "grad_norm": 0.20696400105953217, "learning_rate": 2.5435021914197643e-05, "loss": 0.117, "step": 30556 }, { "epoch": 0.5450183712053651, "grad_norm": 0.2578725814819336, "learning_rate": 2.5433465641434064e-05, "loss": 0.1561, "step": 30557 }, { "epoch": 0.5450362073270788, "grad_norm": 0.2952697277069092, "learning_rate": 2.543190936699021e-05, "loss": 0.0982, "step": 30558 }, { "epoch": 0.5450540434487925, "grad_norm": 0.21113072335720062, "learning_rate": 2.5430353090872132e-05, "loss": 0.1454, "step": 30559 }, { "epoch": 0.5450718795705062, "grad_norm": 0.2743690013885498, "learning_rate": 2.5428796813085854e-05, "loss": 0.1378, "step": 30560 }, { "epoch": 0.5450897156922199, "grad_norm": 0.2805323600769043, "learning_rate": 2.542724053363742e-05, "loss": 0.2072, "step": 30561 }, { "epoch": 0.5451075518139336, "grad_norm": 0.27272406220436096, "learning_rate": 2.542568425253284e-05, "loss": 0.1526, "step": 30562 }, { "epoch": 0.5451253879356472, "grad_norm": 0.22355371713638306, "learning_rate": 2.5424127969778165e-05, "loss": 0.1136, "step": 30563 }, { "epoch": 0.5451432240573609, "grad_norm": 0.2965892553329468, "learning_rate": 2.5422571685379427e-05, "loss": 0.1696, "step": 30564 }, { "epoch": 0.5451610601790746, "grad_norm": 0.6076902151107788, "learning_rate": 2.542101539934266e-05, "loss": 0.1566, "step": 30565 }, { "epoch": 0.5451788963007883, "grad_norm": 0.26456040143966675, "learning_rate": 2.541945911167388e-05, "loss": 0.1716, "step": 30566 }, { "epoch": 0.5451967324225021, "grad_norm": 0.2619446814060211, "learning_rate": 2.541790282237914e-05, "loss": 0.1405, "step": 30567 }, { "epoch": 0.5452145685442158, "grad_norm": 0.27856355905532837, "learning_rate": 2.5416346531464465e-05, "loss": 0.1589, "step": 30568 }, { "epoch": 0.5452324046659295, "grad_norm": 0.4120332598686218, "learning_rate": 2.541479023893588e-05, "loss": 0.1287, "step": 30569 }, { "epoch": 0.5452502407876432, "grad_norm": 0.29790472984313965, "learning_rate": 2.5413233944799438e-05, "loss": 0.1311, "step": 30570 }, { "epoch": 0.5452680769093569, "grad_norm": 0.2873964011669159, "learning_rate": 2.5411677649061144e-05, "loss": 0.1255, "step": 30571 }, { "epoch": 0.5452859130310705, "grad_norm": 0.26232606172561646, "learning_rate": 2.541012135172705e-05, "loss": 0.1438, "step": 30572 }, { "epoch": 0.5453037491527842, "grad_norm": 0.2812102735042572, "learning_rate": 2.5408565052803184e-05, "loss": 0.1747, "step": 30573 }, { "epoch": 0.5453215852744979, "grad_norm": 0.2334454357624054, "learning_rate": 2.5407008752295586e-05, "loss": 0.1511, "step": 30574 }, { "epoch": 0.5453394213962116, "grad_norm": 0.303078830242157, "learning_rate": 2.540545245021027e-05, "loss": 0.1181, "step": 30575 }, { "epoch": 0.5453572575179253, "grad_norm": 0.2523733079433441, "learning_rate": 2.5403896146553286e-05, "loss": 0.1707, "step": 30576 }, { "epoch": 0.545375093639639, "grad_norm": 0.27870509028434753, "learning_rate": 2.540233984133066e-05, "loss": 0.1617, "step": 30577 }, { "epoch": 0.5453929297613527, "grad_norm": 0.28360575437545776, "learning_rate": 2.5400783534548428e-05, "loss": 0.0939, "step": 30578 }, { "epoch": 0.5454107658830664, "grad_norm": 0.2450939416885376, "learning_rate": 2.5399227226212613e-05, "loss": 0.1697, "step": 30579 }, { "epoch": 0.54542860200478, "grad_norm": 0.270203173160553, "learning_rate": 2.539767091632927e-05, "loss": 0.1369, "step": 30580 }, { "epoch": 0.5454464381264937, "grad_norm": 0.22776833176612854, "learning_rate": 2.539611460490441e-05, "loss": 0.138, "step": 30581 }, { "epoch": 0.5454642742482074, "grad_norm": 0.23696552217006683, "learning_rate": 2.5394558291944072e-05, "loss": 0.1793, "step": 30582 }, { "epoch": 0.5454821103699211, "grad_norm": 0.19679997861385345, "learning_rate": 2.5393001977454285e-05, "loss": 0.0973, "step": 30583 }, { "epoch": 0.5454999464916349, "grad_norm": 0.19741256535053253, "learning_rate": 2.5391445661441088e-05, "loss": 0.124, "step": 30584 }, { "epoch": 0.5455177826133486, "grad_norm": 0.20444637537002563, "learning_rate": 2.5389889343910517e-05, "loss": 0.0823, "step": 30585 }, { "epoch": 0.5455356187350623, "grad_norm": 0.3365132808685303, "learning_rate": 2.5388333024868595e-05, "loss": 0.1782, "step": 30586 }, { "epoch": 0.545553454856776, "grad_norm": 0.5505183339118958, "learning_rate": 2.538677670432137e-05, "loss": 0.2799, "step": 30587 }, { "epoch": 0.5455712909784897, "grad_norm": 0.2781968116760254, "learning_rate": 2.5385220382274853e-05, "loss": 0.177, "step": 30588 }, { "epoch": 0.5455891271002034, "grad_norm": 0.2326662242412567, "learning_rate": 2.538366405873509e-05, "loss": 0.1446, "step": 30589 }, { "epoch": 0.545606963221917, "grad_norm": 0.1863633692264557, "learning_rate": 2.5382107733708116e-05, "loss": 0.0767, "step": 30590 }, { "epoch": 0.5456247993436307, "grad_norm": 0.17594373226165771, "learning_rate": 2.5380551407199964e-05, "loss": 0.1099, "step": 30591 }, { "epoch": 0.5456426354653444, "grad_norm": 0.20853294432163239, "learning_rate": 2.537899507921666e-05, "loss": 0.1262, "step": 30592 }, { "epoch": 0.5456604715870581, "grad_norm": 0.3568669259548187, "learning_rate": 2.5377438749764243e-05, "loss": 0.175, "step": 30593 }, { "epoch": 0.5456783077087718, "grad_norm": 0.2863282263278961, "learning_rate": 2.5375882418848746e-05, "loss": 0.1643, "step": 30594 }, { "epoch": 0.5456961438304855, "grad_norm": 0.30819424986839294, "learning_rate": 2.5374326086476192e-05, "loss": 0.1427, "step": 30595 }, { "epoch": 0.5457139799521992, "grad_norm": 0.26100361347198486, "learning_rate": 2.537276975265262e-05, "loss": 0.1662, "step": 30596 }, { "epoch": 0.5457318160739129, "grad_norm": 0.2631935179233551, "learning_rate": 2.5371213417384064e-05, "loss": 0.121, "step": 30597 }, { "epoch": 0.5457496521956265, "grad_norm": 0.240121990442276, "learning_rate": 2.5369657080676562e-05, "loss": 0.1514, "step": 30598 }, { "epoch": 0.5457674883173402, "grad_norm": 0.22561949491500854, "learning_rate": 2.5368100742536134e-05, "loss": 0.1455, "step": 30599 }, { "epoch": 0.5457853244390539, "grad_norm": 0.2451285719871521, "learning_rate": 2.536654440296883e-05, "loss": 0.1061, "step": 30600 }, { "epoch": 0.5458031605607677, "grad_norm": 0.31559446454048157, "learning_rate": 2.5364988061980666e-05, "loss": 0.1468, "step": 30601 }, { "epoch": 0.5458209966824814, "grad_norm": 0.2675754427909851, "learning_rate": 2.5363431719577684e-05, "loss": 0.1178, "step": 30602 }, { "epoch": 0.5458388328041951, "grad_norm": 0.24018284678459167, "learning_rate": 2.5361875375765913e-05, "loss": 0.1207, "step": 30603 }, { "epoch": 0.5458566689259088, "grad_norm": 0.2545557916164398, "learning_rate": 2.5360319030551388e-05, "loss": 0.1284, "step": 30604 }, { "epoch": 0.5458745050476225, "grad_norm": 0.2843206226825714, "learning_rate": 2.5358762683940146e-05, "loss": 0.1541, "step": 30605 }, { "epoch": 0.5458923411693362, "grad_norm": 0.31861063838005066, "learning_rate": 2.5357206335938215e-05, "loss": 0.0971, "step": 30606 }, { "epoch": 0.5459101772910498, "grad_norm": 0.20014692842960358, "learning_rate": 2.5355649986551633e-05, "loss": 0.0905, "step": 30607 }, { "epoch": 0.5459280134127635, "grad_norm": 0.21438264846801758, "learning_rate": 2.535409363578642e-05, "loss": 0.089, "step": 30608 }, { "epoch": 0.5459458495344772, "grad_norm": 0.21107247471809387, "learning_rate": 2.5352537283648626e-05, "loss": 0.1327, "step": 30609 }, { "epoch": 0.5459636856561909, "grad_norm": 0.26052507758140564, "learning_rate": 2.5350980930144268e-05, "loss": 0.1621, "step": 30610 }, { "epoch": 0.5459815217779046, "grad_norm": 0.24816282093524933, "learning_rate": 2.534942457527939e-05, "loss": 0.1395, "step": 30611 }, { "epoch": 0.5459993578996183, "grad_norm": 0.30408936738967896, "learning_rate": 2.5347868219060017e-05, "loss": 0.147, "step": 30612 }, { "epoch": 0.546017194021332, "grad_norm": 0.2077506184577942, "learning_rate": 2.5346311861492194e-05, "loss": 0.0985, "step": 30613 }, { "epoch": 0.5460350301430457, "grad_norm": 0.3313791751861572, "learning_rate": 2.534475550258194e-05, "loss": 0.1689, "step": 30614 }, { "epoch": 0.5460528662647594, "grad_norm": 0.22992636263370514, "learning_rate": 2.5343199142335307e-05, "loss": 0.0933, "step": 30615 }, { "epoch": 0.546070702386473, "grad_norm": 0.2696564197540283, "learning_rate": 2.53416427807583e-05, "loss": 0.1682, "step": 30616 }, { "epoch": 0.5460885385081867, "grad_norm": 0.3920508325099945, "learning_rate": 2.5340086417856972e-05, "loss": 0.1667, "step": 30617 }, { "epoch": 0.5461063746299005, "grad_norm": 0.2885618805885315, "learning_rate": 2.5338530053637355e-05, "loss": 0.0963, "step": 30618 }, { "epoch": 0.5461242107516142, "grad_norm": 0.28976091742515564, "learning_rate": 2.533697368810548e-05, "loss": 0.1497, "step": 30619 }, { "epoch": 0.5461420468733279, "grad_norm": 0.2502298057079315, "learning_rate": 2.533541732126738e-05, "loss": 0.1282, "step": 30620 }, { "epoch": 0.5461598829950416, "grad_norm": 0.2730318009853363, "learning_rate": 2.5333860953129084e-05, "loss": 0.1285, "step": 30621 }, { "epoch": 0.5461777191167553, "grad_norm": 0.22559292614459991, "learning_rate": 2.533230458369663e-05, "loss": 0.124, "step": 30622 }, { "epoch": 0.546195555238469, "grad_norm": 0.3198586702346802, "learning_rate": 2.5330748212976042e-05, "loss": 0.133, "step": 30623 }, { "epoch": 0.5462133913601827, "grad_norm": 0.23066243529319763, "learning_rate": 2.532919184097336e-05, "loss": 0.151, "step": 30624 }, { "epoch": 0.5462312274818963, "grad_norm": 0.3152429461479187, "learning_rate": 2.5327635467694616e-05, "loss": 0.1003, "step": 30625 }, { "epoch": 0.54624906360361, "grad_norm": 0.25440070033073425, "learning_rate": 2.5326079093145856e-05, "loss": 0.1183, "step": 30626 }, { "epoch": 0.5462668997253237, "grad_norm": 0.2893313765525818, "learning_rate": 2.532452271733309e-05, "loss": 0.146, "step": 30627 }, { "epoch": 0.5462847358470374, "grad_norm": 0.27915453910827637, "learning_rate": 2.5322966340262366e-05, "loss": 0.1599, "step": 30628 }, { "epoch": 0.5463025719687511, "grad_norm": 0.28688129782676697, "learning_rate": 2.5321409961939708e-05, "loss": 0.1266, "step": 30629 }, { "epoch": 0.5463204080904648, "grad_norm": 0.22225314378738403, "learning_rate": 2.531985358237116e-05, "loss": 0.1673, "step": 30630 }, { "epoch": 0.5463382442121785, "grad_norm": 0.6175563335418701, "learning_rate": 2.5318297201562745e-05, "loss": 0.1635, "step": 30631 }, { "epoch": 0.5463560803338922, "grad_norm": 0.3103273808956146, "learning_rate": 2.5316740819520494e-05, "loss": 0.1645, "step": 30632 }, { "epoch": 0.5463739164556058, "grad_norm": 0.2526172697544098, "learning_rate": 2.531518443625046e-05, "loss": 0.1423, "step": 30633 }, { "epoch": 0.5463917525773195, "grad_norm": 0.41474178433418274, "learning_rate": 2.5313628051758654e-05, "loss": 0.1432, "step": 30634 }, { "epoch": 0.5464095886990333, "grad_norm": 0.23666517436504364, "learning_rate": 2.5312071666051122e-05, "loss": 0.1184, "step": 30635 }, { "epoch": 0.546427424820747, "grad_norm": 0.24965380132198334, "learning_rate": 2.531051527913389e-05, "loss": 0.0966, "step": 30636 }, { "epoch": 0.5464452609424607, "grad_norm": 0.2909402847290039, "learning_rate": 2.5308958891012995e-05, "loss": 0.166, "step": 30637 }, { "epoch": 0.5464630970641744, "grad_norm": 0.2088889479637146, "learning_rate": 2.5307402501694467e-05, "loss": 0.0941, "step": 30638 }, { "epoch": 0.5464809331858881, "grad_norm": 0.24103598296642303, "learning_rate": 2.5305846111184333e-05, "loss": 0.1212, "step": 30639 }, { "epoch": 0.5464987693076018, "grad_norm": 0.29101163148880005, "learning_rate": 2.5304289719488638e-05, "loss": 0.1218, "step": 30640 }, { "epoch": 0.5465166054293155, "grad_norm": 0.2213044911623001, "learning_rate": 2.530273332661342e-05, "loss": 0.1155, "step": 30641 }, { "epoch": 0.5465344415510291, "grad_norm": 0.2774907350540161, "learning_rate": 2.5301176932564696e-05, "loss": 0.1533, "step": 30642 }, { "epoch": 0.5465522776727428, "grad_norm": 0.32976096868515015, "learning_rate": 2.5299620537348512e-05, "loss": 0.1608, "step": 30643 }, { "epoch": 0.5465701137944565, "grad_norm": 0.26635968685150146, "learning_rate": 2.5298064140970883e-05, "loss": 0.0899, "step": 30644 }, { "epoch": 0.5465879499161702, "grad_norm": 0.23259654641151428, "learning_rate": 2.5296507743437863e-05, "loss": 0.1139, "step": 30645 }, { "epoch": 0.5466057860378839, "grad_norm": 0.23877407610416412, "learning_rate": 2.529495134475548e-05, "loss": 0.1381, "step": 30646 }, { "epoch": 0.5466236221595976, "grad_norm": 0.29260697960853577, "learning_rate": 2.5293394944929754e-05, "loss": 0.1563, "step": 30647 }, { "epoch": 0.5466414582813113, "grad_norm": 0.21915209293365479, "learning_rate": 2.529183854396674e-05, "loss": 0.1013, "step": 30648 }, { "epoch": 0.546659294403025, "grad_norm": 0.4737933874130249, "learning_rate": 2.529028214187245e-05, "loss": 0.1373, "step": 30649 }, { "epoch": 0.5466771305247387, "grad_norm": 0.2935916781425476, "learning_rate": 2.528872573865293e-05, "loss": 0.1555, "step": 30650 }, { "epoch": 0.5466949666464523, "grad_norm": 0.2658814787864685, "learning_rate": 2.528716933431421e-05, "loss": 0.1459, "step": 30651 }, { "epoch": 0.5467128027681661, "grad_norm": 0.22968004643917084, "learning_rate": 2.5285612928862317e-05, "loss": 0.1294, "step": 30652 }, { "epoch": 0.5467306388898798, "grad_norm": 0.3674127757549286, "learning_rate": 2.528405652230329e-05, "loss": 0.0873, "step": 30653 }, { "epoch": 0.5467484750115935, "grad_norm": 0.19489099085330963, "learning_rate": 2.5282500114643166e-05, "loss": 0.0921, "step": 30654 }, { "epoch": 0.5467663111333072, "grad_norm": 0.2663986086845398, "learning_rate": 2.5280943705887973e-05, "loss": 0.0878, "step": 30655 }, { "epoch": 0.5467841472550209, "grad_norm": 0.2378002554178238, "learning_rate": 2.527938729604375e-05, "loss": 0.1579, "step": 30656 }, { "epoch": 0.5468019833767346, "grad_norm": 0.3102874159812927, "learning_rate": 2.5277830885116517e-05, "loss": 0.1256, "step": 30657 }, { "epoch": 0.5468198194984483, "grad_norm": 0.25617241859436035, "learning_rate": 2.5276274473112322e-05, "loss": 0.1455, "step": 30658 }, { "epoch": 0.546837655620162, "grad_norm": 0.25545215606689453, "learning_rate": 2.5274718060037183e-05, "loss": 0.1232, "step": 30659 }, { "epoch": 0.5468554917418756, "grad_norm": 0.34630346298217773, "learning_rate": 2.5273161645897148e-05, "loss": 0.1841, "step": 30660 }, { "epoch": 0.5468733278635893, "grad_norm": 0.32550060749053955, "learning_rate": 2.5271605230698247e-05, "loss": 0.14, "step": 30661 }, { "epoch": 0.546891163985303, "grad_norm": 0.24155452847480774, "learning_rate": 2.52700488144465e-05, "loss": 0.1294, "step": 30662 }, { "epoch": 0.5469090001070167, "grad_norm": 0.3801244795322418, "learning_rate": 2.5268492397147964e-05, "loss": 0.1058, "step": 30663 }, { "epoch": 0.5469268362287304, "grad_norm": 0.26613855361938477, "learning_rate": 2.5266935978808647e-05, "loss": 0.1782, "step": 30664 }, { "epoch": 0.5469446723504441, "grad_norm": 0.22232182323932648, "learning_rate": 2.5265379559434605e-05, "loss": 0.0668, "step": 30665 }, { "epoch": 0.5469625084721578, "grad_norm": 0.2696954607963562, "learning_rate": 2.5263823139031846e-05, "loss": 0.1174, "step": 30666 }, { "epoch": 0.5469803445938715, "grad_norm": 0.25743570923805237, "learning_rate": 2.5262266717606432e-05, "loss": 0.1738, "step": 30667 }, { "epoch": 0.5469981807155853, "grad_norm": 0.25451594591140747, "learning_rate": 2.526071029516437e-05, "loss": 0.0917, "step": 30668 }, { "epoch": 0.547016016837299, "grad_norm": 0.2859238386154175, "learning_rate": 2.5259153871711715e-05, "loss": 0.1411, "step": 30669 }, { "epoch": 0.5470338529590126, "grad_norm": 0.312207967042923, "learning_rate": 2.5257597447254483e-05, "loss": 0.1664, "step": 30670 }, { "epoch": 0.5470516890807263, "grad_norm": 0.2456086426973343, "learning_rate": 2.5256041021798714e-05, "loss": 0.123, "step": 30671 }, { "epoch": 0.54706952520244, "grad_norm": 0.34301429986953735, "learning_rate": 2.5254484595350446e-05, "loss": 0.1661, "step": 30672 }, { "epoch": 0.5470873613241537, "grad_norm": 0.33208051323890686, "learning_rate": 2.5252928167915703e-05, "loss": 0.1969, "step": 30673 }, { "epoch": 0.5471051974458674, "grad_norm": 0.38960370421409607, "learning_rate": 2.525137173950053e-05, "loss": 0.1444, "step": 30674 }, { "epoch": 0.5471230335675811, "grad_norm": 0.2728498876094818, "learning_rate": 2.5249815310110947e-05, "loss": 0.2019, "step": 30675 }, { "epoch": 0.5471408696892948, "grad_norm": 0.3494815230369568, "learning_rate": 2.5248258879753002e-05, "loss": 0.0997, "step": 30676 }, { "epoch": 0.5471587058110085, "grad_norm": 0.2780648469924927, "learning_rate": 2.524670244843271e-05, "loss": 0.1554, "step": 30677 }, { "epoch": 0.5471765419327221, "grad_norm": 0.3877607583999634, "learning_rate": 2.524514601615612e-05, "loss": 0.1757, "step": 30678 }, { "epoch": 0.5471943780544358, "grad_norm": 0.3019520342350006, "learning_rate": 2.5243589582929257e-05, "loss": 0.1369, "step": 30679 }, { "epoch": 0.5472122141761495, "grad_norm": 0.2377205193042755, "learning_rate": 2.5242033148758152e-05, "loss": 0.1284, "step": 30680 }, { "epoch": 0.5472300502978632, "grad_norm": 0.2232901155948639, "learning_rate": 2.5240476713648847e-05, "loss": 0.1111, "step": 30681 }, { "epoch": 0.5472478864195769, "grad_norm": 0.29449349641799927, "learning_rate": 2.5238920277607376e-05, "loss": 0.099, "step": 30682 }, { "epoch": 0.5472657225412906, "grad_norm": 0.25763311982154846, "learning_rate": 2.523736384063976e-05, "loss": 0.1411, "step": 30683 }, { "epoch": 0.5472835586630043, "grad_norm": 0.28363871574401855, "learning_rate": 2.5235807402752043e-05, "loss": 0.1052, "step": 30684 }, { "epoch": 0.5473013947847181, "grad_norm": 0.19277070462703705, "learning_rate": 2.5234250963950263e-05, "loss": 0.0842, "step": 30685 }, { "epoch": 0.5473192309064318, "grad_norm": 0.25599876046180725, "learning_rate": 2.523269452424043e-05, "loss": 0.1174, "step": 30686 }, { "epoch": 0.5473370670281454, "grad_norm": 0.22644832730293274, "learning_rate": 2.5231138083628603e-05, "loss": 0.1631, "step": 30687 }, { "epoch": 0.5473549031498591, "grad_norm": 0.2773655951023102, "learning_rate": 2.5229581642120798e-05, "loss": 0.118, "step": 30688 }, { "epoch": 0.5473727392715728, "grad_norm": 0.25694385170936584, "learning_rate": 2.522802519972306e-05, "loss": 0.1185, "step": 30689 }, { "epoch": 0.5473905753932865, "grad_norm": 0.26030248403549194, "learning_rate": 2.5226468756441417e-05, "loss": 0.1688, "step": 30690 }, { "epoch": 0.5474084115150002, "grad_norm": 0.18752585351467133, "learning_rate": 2.5224912312281907e-05, "loss": 0.1214, "step": 30691 }, { "epoch": 0.5474262476367139, "grad_norm": 0.2060348093509674, "learning_rate": 2.5223355867250553e-05, "loss": 0.1245, "step": 30692 }, { "epoch": 0.5474440837584276, "grad_norm": 0.2473519891500473, "learning_rate": 2.5221799421353392e-05, "loss": 0.1694, "step": 30693 }, { "epoch": 0.5474619198801413, "grad_norm": 0.34526124596595764, "learning_rate": 2.5220242974596463e-05, "loss": 0.141, "step": 30694 }, { "epoch": 0.547479756001855, "grad_norm": 0.2736712396144867, "learning_rate": 2.52186865269858e-05, "loss": 0.1525, "step": 30695 }, { "epoch": 0.5474975921235686, "grad_norm": 0.3155522346496582, "learning_rate": 2.521713007852743e-05, "loss": 0.1077, "step": 30696 }, { "epoch": 0.5475154282452823, "grad_norm": 0.2977176308631897, "learning_rate": 2.521557362922739e-05, "loss": 0.1787, "step": 30697 }, { "epoch": 0.547533264366996, "grad_norm": 0.24261996150016785, "learning_rate": 2.5214017179091716e-05, "loss": 0.1573, "step": 30698 }, { "epoch": 0.5475511004887097, "grad_norm": 0.35513001680374146, "learning_rate": 2.5212460728126432e-05, "loss": 0.1152, "step": 30699 }, { "epoch": 0.5475689366104234, "grad_norm": 0.24917559325695038, "learning_rate": 2.521090427633757e-05, "loss": 0.1271, "step": 30700 }, { "epoch": 0.5475867727321371, "grad_norm": 0.24163921177387238, "learning_rate": 2.5209347823731177e-05, "loss": 0.0997, "step": 30701 }, { "epoch": 0.5476046088538509, "grad_norm": 0.2742210626602173, "learning_rate": 2.5207791370313282e-05, "loss": 0.1415, "step": 30702 }, { "epoch": 0.5476224449755646, "grad_norm": 0.22208933532238007, "learning_rate": 2.5206234916089916e-05, "loss": 0.1119, "step": 30703 }, { "epoch": 0.5476402810972782, "grad_norm": 0.3882651627063751, "learning_rate": 2.520467846106711e-05, "loss": 0.184, "step": 30704 }, { "epoch": 0.5476581172189919, "grad_norm": 0.24553602933883667, "learning_rate": 2.5203122005250902e-05, "loss": 0.096, "step": 30705 }, { "epoch": 0.5476759533407056, "grad_norm": 0.43698596954345703, "learning_rate": 2.520156554864732e-05, "loss": 0.1092, "step": 30706 }, { "epoch": 0.5476937894624193, "grad_norm": 0.28606539964675903, "learning_rate": 2.52000090912624e-05, "loss": 0.1197, "step": 30707 }, { "epoch": 0.547711625584133, "grad_norm": 0.20325832068920135, "learning_rate": 2.5198452633102177e-05, "loss": 0.1456, "step": 30708 }, { "epoch": 0.5477294617058467, "grad_norm": 0.30555295944213867, "learning_rate": 2.519689617417268e-05, "loss": 0.1244, "step": 30709 }, { "epoch": 0.5477472978275604, "grad_norm": 0.2689393162727356, "learning_rate": 2.5195339714479947e-05, "loss": 0.1296, "step": 30710 }, { "epoch": 0.5477651339492741, "grad_norm": 0.24193690717220306, "learning_rate": 2.519378325403002e-05, "loss": 0.1298, "step": 30711 }, { "epoch": 0.5477829700709878, "grad_norm": 0.2649729549884796, "learning_rate": 2.519222679282891e-05, "loss": 0.1412, "step": 30712 }, { "epoch": 0.5478008061927014, "grad_norm": 0.20728227496147156, "learning_rate": 2.5190670330882664e-05, "loss": 0.0884, "step": 30713 }, { "epoch": 0.5478186423144151, "grad_norm": 0.32075342535972595, "learning_rate": 2.5189113868197317e-05, "loss": 0.1501, "step": 30714 }, { "epoch": 0.5478364784361288, "grad_norm": 0.2802213430404663, "learning_rate": 2.5187557404778893e-05, "loss": 0.2015, "step": 30715 }, { "epoch": 0.5478543145578425, "grad_norm": 0.33109137415885925, "learning_rate": 2.5186000940633438e-05, "loss": 0.1622, "step": 30716 }, { "epoch": 0.5478721506795562, "grad_norm": 0.19377684593200684, "learning_rate": 2.5184444475766984e-05, "loss": 0.1096, "step": 30717 }, { "epoch": 0.5478899868012699, "grad_norm": 0.18932682275772095, "learning_rate": 2.5182888010185547e-05, "loss": 0.0919, "step": 30718 }, { "epoch": 0.5479078229229837, "grad_norm": 0.20289833843708038, "learning_rate": 2.518133154389518e-05, "loss": 0.157, "step": 30719 }, { "epoch": 0.5479256590446974, "grad_norm": 0.2738206386566162, "learning_rate": 2.517977507690191e-05, "loss": 0.1547, "step": 30720 }, { "epoch": 0.547943495166411, "grad_norm": 0.23485751450061798, "learning_rate": 2.517821860921177e-05, "loss": 0.1245, "step": 30721 }, { "epoch": 0.5479613312881247, "grad_norm": 0.2354004979133606, "learning_rate": 2.5176662140830786e-05, "loss": 0.1383, "step": 30722 }, { "epoch": 0.5479791674098384, "grad_norm": 0.3201492130756378, "learning_rate": 2.5175105671765004e-05, "loss": 0.1083, "step": 30723 }, { "epoch": 0.5479970035315521, "grad_norm": 0.23176920413970947, "learning_rate": 2.5173549202020458e-05, "loss": 0.141, "step": 30724 }, { "epoch": 0.5480148396532658, "grad_norm": 0.2387719750404358, "learning_rate": 2.5171992731603167e-05, "loss": 0.0748, "step": 30725 }, { "epoch": 0.5480326757749795, "grad_norm": 0.2699023485183716, "learning_rate": 2.5170436260519176e-05, "loss": 0.1609, "step": 30726 }, { "epoch": 0.5480505118966932, "grad_norm": 0.3339270055294037, "learning_rate": 2.5168879788774514e-05, "loss": 0.1164, "step": 30727 }, { "epoch": 0.5480683480184069, "grad_norm": 0.29143330454826355, "learning_rate": 2.5167323316375213e-05, "loss": 0.1372, "step": 30728 }, { "epoch": 0.5480861841401206, "grad_norm": 0.21815279126167297, "learning_rate": 2.516576684332731e-05, "loss": 0.1109, "step": 30729 }, { "epoch": 0.5481040202618342, "grad_norm": 0.36182475090026855, "learning_rate": 2.5164210369636843e-05, "loss": 0.13, "step": 30730 }, { "epoch": 0.5481218563835479, "grad_norm": 0.26641643047332764, "learning_rate": 2.5162653895309834e-05, "loss": 0.0967, "step": 30731 }, { "epoch": 0.5481396925052616, "grad_norm": 0.2650129199028015, "learning_rate": 2.5161097420352325e-05, "loss": 0.1242, "step": 30732 }, { "epoch": 0.5481575286269753, "grad_norm": 0.2204132229089737, "learning_rate": 2.5159540944770342e-05, "loss": 0.142, "step": 30733 }, { "epoch": 0.548175364748689, "grad_norm": 0.28929421305656433, "learning_rate": 2.5157984468569934e-05, "loss": 0.1066, "step": 30734 }, { "epoch": 0.5481932008704027, "grad_norm": 0.24537834525108337, "learning_rate": 2.5156427991757114e-05, "loss": 0.105, "step": 30735 }, { "epoch": 0.5482110369921165, "grad_norm": 0.18724027276039124, "learning_rate": 2.5154871514337924e-05, "loss": 0.1089, "step": 30736 }, { "epoch": 0.5482288731138302, "grad_norm": 0.2552861273288727, "learning_rate": 2.5153315036318404e-05, "loss": 0.117, "step": 30737 }, { "epoch": 0.5482467092355439, "grad_norm": 0.30879995226860046, "learning_rate": 2.5151758557704586e-05, "loss": 0.156, "step": 30738 }, { "epoch": 0.5482645453572575, "grad_norm": 0.2633190155029297, "learning_rate": 2.5150202078502492e-05, "loss": 0.1008, "step": 30739 }, { "epoch": 0.5482823814789712, "grad_norm": 0.2739719748497009, "learning_rate": 2.514864559871817e-05, "loss": 0.1465, "step": 30740 }, { "epoch": 0.5483002176006849, "grad_norm": 0.27851369976997375, "learning_rate": 2.5147089118357643e-05, "loss": 0.1116, "step": 30741 }, { "epoch": 0.5483180537223986, "grad_norm": 0.2841547131538391, "learning_rate": 2.514553263742694e-05, "loss": 0.1486, "step": 30742 }, { "epoch": 0.5483358898441123, "grad_norm": 0.3047911524772644, "learning_rate": 2.5143976155932107e-05, "loss": 0.166, "step": 30743 }, { "epoch": 0.548353725965826, "grad_norm": 0.2939079999923706, "learning_rate": 2.514241967387917e-05, "loss": 0.1203, "step": 30744 }, { "epoch": 0.5483715620875397, "grad_norm": 0.19341906905174255, "learning_rate": 2.5140863191274172e-05, "loss": 0.092, "step": 30745 }, { "epoch": 0.5483893982092534, "grad_norm": 0.3536452054977417, "learning_rate": 2.5139306708123133e-05, "loss": 0.147, "step": 30746 }, { "epoch": 0.548407234330967, "grad_norm": 0.31168386340141296, "learning_rate": 2.5137750224432105e-05, "loss": 0.1296, "step": 30747 }, { "epoch": 0.5484250704526807, "grad_norm": 0.34788137674331665, "learning_rate": 2.51361937402071e-05, "loss": 0.1281, "step": 30748 }, { "epoch": 0.5484429065743944, "grad_norm": 0.29051393270492554, "learning_rate": 2.513463725545416e-05, "loss": 0.1491, "step": 30749 }, { "epoch": 0.5484607426961081, "grad_norm": 0.24871081113815308, "learning_rate": 2.5133080770179317e-05, "loss": 0.1408, "step": 30750 }, { "epoch": 0.5484785788178218, "grad_norm": 0.25557181239128113, "learning_rate": 2.5131524284388612e-05, "loss": 0.1102, "step": 30751 }, { "epoch": 0.5484964149395355, "grad_norm": 0.23606055974960327, "learning_rate": 2.5129967798088078e-05, "loss": 0.1409, "step": 30752 }, { "epoch": 0.5485142510612493, "grad_norm": 0.2911280691623688, "learning_rate": 2.512841131128374e-05, "loss": 0.1273, "step": 30753 }, { "epoch": 0.548532087182963, "grad_norm": 0.25190410017967224, "learning_rate": 2.512685482398164e-05, "loss": 0.1291, "step": 30754 }, { "epoch": 0.5485499233046767, "grad_norm": 0.2589171528816223, "learning_rate": 2.5125298336187796e-05, "loss": 0.1163, "step": 30755 }, { "epoch": 0.5485677594263904, "grad_norm": 0.2403724640607834, "learning_rate": 2.5123741847908254e-05, "loss": 0.1048, "step": 30756 }, { "epoch": 0.548585595548104, "grad_norm": 0.24996334314346313, "learning_rate": 2.5122185359149048e-05, "loss": 0.1581, "step": 30757 }, { "epoch": 0.5486034316698177, "grad_norm": 0.28113648295402527, "learning_rate": 2.5120628869916214e-05, "loss": 0.1182, "step": 30758 }, { "epoch": 0.5486212677915314, "grad_norm": 0.3192913234233856, "learning_rate": 2.5119072380215775e-05, "loss": 0.1248, "step": 30759 }, { "epoch": 0.5486391039132451, "grad_norm": 0.3085576891899109, "learning_rate": 2.5117515890053777e-05, "loss": 0.1608, "step": 30760 }, { "epoch": 0.5486569400349588, "grad_norm": 0.29562899470329285, "learning_rate": 2.5115959399436244e-05, "loss": 0.1293, "step": 30761 }, { "epoch": 0.5486747761566725, "grad_norm": 0.2797335088253021, "learning_rate": 2.5114402908369212e-05, "loss": 0.1248, "step": 30762 }, { "epoch": 0.5486926122783862, "grad_norm": 0.31900107860565186, "learning_rate": 2.5112846416858714e-05, "loss": 0.089, "step": 30763 }, { "epoch": 0.5487104484000999, "grad_norm": 0.35078883171081543, "learning_rate": 2.511128992491078e-05, "loss": 0.2087, "step": 30764 }, { "epoch": 0.5487282845218135, "grad_norm": 0.8601611852645874, "learning_rate": 2.5109733432531457e-05, "loss": 0.1888, "step": 30765 }, { "epoch": 0.5487461206435272, "grad_norm": 0.26061251759529114, "learning_rate": 2.5108176939726763e-05, "loss": 0.1492, "step": 30766 }, { "epoch": 0.5487639567652409, "grad_norm": 0.32434362173080444, "learning_rate": 2.5106620446502745e-05, "loss": 0.1008, "step": 30767 }, { "epoch": 0.5487817928869546, "grad_norm": 0.2760969400405884, "learning_rate": 2.510506395286542e-05, "loss": 0.1325, "step": 30768 }, { "epoch": 0.5487996290086684, "grad_norm": 0.2676783502101898, "learning_rate": 2.5103507458820834e-05, "loss": 0.1376, "step": 30769 }, { "epoch": 0.5488174651303821, "grad_norm": 0.26040154695510864, "learning_rate": 2.5101950964375014e-05, "loss": 0.1168, "step": 30770 }, { "epoch": 0.5488353012520958, "grad_norm": 0.2696743309497833, "learning_rate": 2.5100394469534006e-05, "loss": 0.1805, "step": 30771 }, { "epoch": 0.5488531373738095, "grad_norm": 0.28089988231658936, "learning_rate": 2.5098837974303824e-05, "loss": 0.1477, "step": 30772 }, { "epoch": 0.5488709734955232, "grad_norm": 0.31052690744400024, "learning_rate": 2.5097281478690522e-05, "loss": 0.1641, "step": 30773 }, { "epoch": 0.5488888096172369, "grad_norm": 0.30707675218582153, "learning_rate": 2.5095724982700114e-05, "loss": 0.0983, "step": 30774 }, { "epoch": 0.5489066457389505, "grad_norm": 0.3314061164855957, "learning_rate": 2.5094168486338648e-05, "loss": 0.138, "step": 30775 }, { "epoch": 0.5489244818606642, "grad_norm": 0.33650079369544983, "learning_rate": 2.509261198961215e-05, "loss": 0.1796, "step": 30776 }, { "epoch": 0.5489423179823779, "grad_norm": 0.3104059398174286, "learning_rate": 2.5091055492526655e-05, "loss": 0.1741, "step": 30777 }, { "epoch": 0.5489601541040916, "grad_norm": 0.33527112007141113, "learning_rate": 2.5089498995088207e-05, "loss": 0.1393, "step": 30778 }, { "epoch": 0.5489779902258053, "grad_norm": 0.31261470913887024, "learning_rate": 2.5087942497302817e-05, "loss": 0.138, "step": 30779 }, { "epoch": 0.548995826347519, "grad_norm": 0.27892300486564636, "learning_rate": 2.5086385999176544e-05, "loss": 0.1118, "step": 30780 }, { "epoch": 0.5490136624692327, "grad_norm": 0.30686426162719727, "learning_rate": 2.5084829500715402e-05, "loss": 0.1776, "step": 30781 }, { "epoch": 0.5490314985909464, "grad_norm": 0.23882944881916046, "learning_rate": 2.5083273001925435e-05, "loss": 0.1385, "step": 30782 }, { "epoch": 0.54904933471266, "grad_norm": 0.3119361996650696, "learning_rate": 2.508171650281267e-05, "loss": 0.0854, "step": 30783 }, { "epoch": 0.5490671708343737, "grad_norm": 0.2971063554286957, "learning_rate": 2.508016000338314e-05, "loss": 0.1361, "step": 30784 }, { "epoch": 0.5490850069560874, "grad_norm": 0.2854631543159485, "learning_rate": 2.5078603503642882e-05, "loss": 0.2034, "step": 30785 }, { "epoch": 0.5491028430778012, "grad_norm": 0.253529816865921, "learning_rate": 2.5077047003597938e-05, "loss": 0.1278, "step": 30786 }, { "epoch": 0.5491206791995149, "grad_norm": 0.3247106373310089, "learning_rate": 2.507549050325433e-05, "loss": 0.1938, "step": 30787 }, { "epoch": 0.5491385153212286, "grad_norm": 0.2657196819782257, "learning_rate": 2.5073934002618094e-05, "loss": 0.1302, "step": 30788 }, { "epoch": 0.5491563514429423, "grad_norm": 0.21578383445739746, "learning_rate": 2.507237750169526e-05, "loss": 0.1387, "step": 30789 }, { "epoch": 0.549174187564656, "grad_norm": 0.34705445170402527, "learning_rate": 2.5070821000491873e-05, "loss": 0.172, "step": 30790 }, { "epoch": 0.5491920236863697, "grad_norm": 0.20326213538646698, "learning_rate": 2.5069264499013956e-05, "loss": 0.1314, "step": 30791 }, { "epoch": 0.5492098598080833, "grad_norm": 0.2628207206726074, "learning_rate": 2.5067707997267542e-05, "loss": 0.1713, "step": 30792 }, { "epoch": 0.549227695929797, "grad_norm": 0.23542331159114838, "learning_rate": 2.5066151495258677e-05, "loss": 0.1561, "step": 30793 }, { "epoch": 0.5492455320515107, "grad_norm": 0.2329222708940506, "learning_rate": 2.5064594992993378e-05, "loss": 0.1186, "step": 30794 }, { "epoch": 0.5492633681732244, "grad_norm": 0.3014250695705414, "learning_rate": 2.5063038490477692e-05, "loss": 0.1092, "step": 30795 }, { "epoch": 0.5492812042949381, "grad_norm": 0.24184347689151764, "learning_rate": 2.5061481987717644e-05, "loss": 0.1218, "step": 30796 }, { "epoch": 0.5492990404166518, "grad_norm": 0.26139211654663086, "learning_rate": 2.5059925484719275e-05, "loss": 0.129, "step": 30797 }, { "epoch": 0.5493168765383655, "grad_norm": 0.241389662027359, "learning_rate": 2.50583689814886e-05, "loss": 0.0675, "step": 30798 }, { "epoch": 0.5493347126600792, "grad_norm": 0.2729874849319458, "learning_rate": 2.5056812478031683e-05, "loss": 0.1503, "step": 30799 }, { "epoch": 0.5493525487817928, "grad_norm": 0.26856231689453125, "learning_rate": 2.5055255974354536e-05, "loss": 0.1005, "step": 30800 }, { "epoch": 0.5493703849035065, "grad_norm": 0.26102638244628906, "learning_rate": 2.5053699470463198e-05, "loss": 0.1703, "step": 30801 }, { "epoch": 0.5493882210252202, "grad_norm": 0.3327820897102356, "learning_rate": 2.50521429663637e-05, "loss": 0.1842, "step": 30802 }, { "epoch": 0.549406057146934, "grad_norm": 0.3525446057319641, "learning_rate": 2.5050586462062082e-05, "loss": 0.1633, "step": 30803 }, { "epoch": 0.5494238932686477, "grad_norm": 0.19760239124298096, "learning_rate": 2.504902995756437e-05, "loss": 0.1205, "step": 30804 }, { "epoch": 0.5494417293903614, "grad_norm": 0.24560704827308655, "learning_rate": 2.5047473452876597e-05, "loss": 0.1142, "step": 30805 }, { "epoch": 0.5494595655120751, "grad_norm": 0.1904924213886261, "learning_rate": 2.504591694800481e-05, "loss": 0.1179, "step": 30806 }, { "epoch": 0.5494774016337888, "grad_norm": 0.25329336524009705, "learning_rate": 2.5044360442955023e-05, "loss": 0.1206, "step": 30807 }, { "epoch": 0.5494952377555025, "grad_norm": 0.2768438160419464, "learning_rate": 2.5042803937733288e-05, "loss": 0.1526, "step": 30808 }, { "epoch": 0.5495130738772162, "grad_norm": 0.2582390606403351, "learning_rate": 2.504124743234563e-05, "loss": 0.1073, "step": 30809 }, { "epoch": 0.5495309099989298, "grad_norm": 0.295486718416214, "learning_rate": 2.503969092679808e-05, "loss": 0.1422, "step": 30810 }, { "epoch": 0.5495487461206435, "grad_norm": 0.363986611366272, "learning_rate": 2.503813442109667e-05, "loss": 0.1586, "step": 30811 }, { "epoch": 0.5495665822423572, "grad_norm": 0.25413811206817627, "learning_rate": 2.503657791524744e-05, "loss": 0.1312, "step": 30812 }, { "epoch": 0.5495844183640709, "grad_norm": 0.3368874192237854, "learning_rate": 2.503502140925642e-05, "loss": 0.13, "step": 30813 }, { "epoch": 0.5496022544857846, "grad_norm": 0.2822263836860657, "learning_rate": 2.5033464903129654e-05, "loss": 0.1492, "step": 30814 }, { "epoch": 0.5496200906074983, "grad_norm": 0.2521396279335022, "learning_rate": 2.503190839687316e-05, "loss": 0.1483, "step": 30815 }, { "epoch": 0.549637926729212, "grad_norm": 0.27061811089515686, "learning_rate": 2.503035189049298e-05, "loss": 0.1386, "step": 30816 }, { "epoch": 0.5496557628509257, "grad_norm": 0.35903432965278625, "learning_rate": 2.5028795383995146e-05, "loss": 0.1612, "step": 30817 }, { "epoch": 0.5496735989726393, "grad_norm": 0.2776688039302826, "learning_rate": 2.502723887738569e-05, "loss": 0.1653, "step": 30818 }, { "epoch": 0.549691435094353, "grad_norm": 0.2492254227399826, "learning_rate": 2.5025682370670644e-05, "loss": 0.1731, "step": 30819 }, { "epoch": 0.5497092712160668, "grad_norm": 0.25500962138175964, "learning_rate": 2.502412586385604e-05, "loss": 0.1182, "step": 30820 }, { "epoch": 0.5497271073377805, "grad_norm": 0.2514731287956238, "learning_rate": 2.502256935694793e-05, "loss": 0.0896, "step": 30821 }, { "epoch": 0.5497449434594942, "grad_norm": 0.2468254715204239, "learning_rate": 2.502101284995232e-05, "loss": 0.1253, "step": 30822 }, { "epoch": 0.5497627795812079, "grad_norm": 0.292104035615921, "learning_rate": 2.501945634287527e-05, "loss": 0.1338, "step": 30823 }, { "epoch": 0.5497806157029216, "grad_norm": 0.26707273721694946, "learning_rate": 2.5017899835722784e-05, "loss": 0.1173, "step": 30824 }, { "epoch": 0.5497984518246353, "grad_norm": 0.32471519708633423, "learning_rate": 2.5016343328500923e-05, "loss": 0.1128, "step": 30825 }, { "epoch": 0.549816287946349, "grad_norm": 0.20818696916103363, "learning_rate": 2.5014786821215703e-05, "loss": 0.129, "step": 30826 }, { "epoch": 0.5498341240680626, "grad_norm": 0.28682759404182434, "learning_rate": 2.5013230313873176e-05, "loss": 0.2049, "step": 30827 }, { "epoch": 0.5498519601897763, "grad_norm": 0.25129520893096924, "learning_rate": 2.5011673806479354e-05, "loss": 0.1282, "step": 30828 }, { "epoch": 0.54986979631149, "grad_norm": 0.23705129325389862, "learning_rate": 2.5010117299040286e-05, "loss": 0.1204, "step": 30829 }, { "epoch": 0.5498876324332037, "grad_norm": 0.2183847725391388, "learning_rate": 2.5008560791561997e-05, "loss": 0.0953, "step": 30830 }, { "epoch": 0.5499054685549174, "grad_norm": 0.21628688275814056, "learning_rate": 2.5007004284050524e-05, "loss": 0.1185, "step": 30831 }, { "epoch": 0.5499233046766311, "grad_norm": 0.4334314465522766, "learning_rate": 2.5005447776511893e-05, "loss": 0.1468, "step": 30832 }, { "epoch": 0.5499411407983448, "grad_norm": 0.30514198541641235, "learning_rate": 2.500389126895215e-05, "loss": 0.1456, "step": 30833 }, { "epoch": 0.5499589769200585, "grad_norm": 0.2574711740016937, "learning_rate": 2.5002334761377326e-05, "loss": 0.119, "step": 30834 }, { "epoch": 0.5499768130417722, "grad_norm": 0.23198111355304718, "learning_rate": 2.500077825379345e-05, "loss": 0.1455, "step": 30835 }, { "epoch": 0.5499946491634858, "grad_norm": 0.2556779682636261, "learning_rate": 2.4999221746206557e-05, "loss": 0.0968, "step": 30836 }, { "epoch": 0.5500124852851996, "grad_norm": 0.22976148128509521, "learning_rate": 2.4997665238622683e-05, "loss": 0.1397, "step": 30837 }, { "epoch": 0.5500303214069133, "grad_norm": 0.2004426270723343, "learning_rate": 2.4996108731047853e-05, "loss": 0.0765, "step": 30838 }, { "epoch": 0.550048157528627, "grad_norm": 0.263600617647171, "learning_rate": 2.499455222348811e-05, "loss": 0.1204, "step": 30839 }, { "epoch": 0.5500659936503407, "grad_norm": 0.27715352177619934, "learning_rate": 2.499299571594948e-05, "loss": 0.1507, "step": 30840 }, { "epoch": 0.5500838297720544, "grad_norm": 0.25137487053871155, "learning_rate": 2.4991439208438013e-05, "loss": 0.1484, "step": 30841 }, { "epoch": 0.5501016658937681, "grad_norm": 0.28624987602233887, "learning_rate": 2.4989882700959717e-05, "loss": 0.1232, "step": 30842 }, { "epoch": 0.5501195020154818, "grad_norm": 0.3630903959274292, "learning_rate": 2.498832619352065e-05, "loss": 0.1041, "step": 30843 }, { "epoch": 0.5501373381371955, "grad_norm": 0.31580740213394165, "learning_rate": 2.4986769686126833e-05, "loss": 0.113, "step": 30844 }, { "epoch": 0.5501551742589091, "grad_norm": 0.39485299587249756, "learning_rate": 2.49852131787843e-05, "loss": 0.114, "step": 30845 }, { "epoch": 0.5501730103806228, "grad_norm": 0.3004516065120697, "learning_rate": 2.4983656671499083e-05, "loss": 0.1784, "step": 30846 }, { "epoch": 0.5501908465023365, "grad_norm": 0.24809834361076355, "learning_rate": 2.498210016427721e-05, "loss": 0.1296, "step": 30847 }, { "epoch": 0.5502086826240502, "grad_norm": 0.28357893228530884, "learning_rate": 2.498054365712474e-05, "loss": 0.0939, "step": 30848 }, { "epoch": 0.5502265187457639, "grad_norm": 0.18697531521320343, "learning_rate": 2.4978987150047682e-05, "loss": 0.1068, "step": 30849 }, { "epoch": 0.5502443548674776, "grad_norm": 0.34882503747940063, "learning_rate": 2.4977430643052083e-05, "loss": 0.1418, "step": 30850 }, { "epoch": 0.5502621909891913, "grad_norm": 0.26318806409835815, "learning_rate": 2.4975874136143956e-05, "loss": 0.1161, "step": 30851 }, { "epoch": 0.550280027110905, "grad_norm": 0.3377297818660736, "learning_rate": 2.4974317629329365e-05, "loss": 0.1343, "step": 30852 }, { "epoch": 0.5502978632326186, "grad_norm": 0.26560068130493164, "learning_rate": 2.4972761122614317e-05, "loss": 0.1038, "step": 30853 }, { "epoch": 0.5503156993543324, "grad_norm": 0.2568463087081909, "learning_rate": 2.4971204616004863e-05, "loss": 0.1563, "step": 30854 }, { "epoch": 0.5503335354760461, "grad_norm": 0.22217004001140594, "learning_rate": 2.4969648109507022e-05, "loss": 0.1093, "step": 30855 }, { "epoch": 0.5503513715977598, "grad_norm": 0.3076801300048828, "learning_rate": 2.4968091603126843e-05, "loss": 0.1781, "step": 30856 }, { "epoch": 0.5503692077194735, "grad_norm": 0.17820556461811066, "learning_rate": 2.4966535096870352e-05, "loss": 0.1029, "step": 30857 }, { "epoch": 0.5503870438411872, "grad_norm": 0.2968607544898987, "learning_rate": 2.4964978590743585e-05, "loss": 0.1403, "step": 30858 }, { "epoch": 0.5504048799629009, "grad_norm": 0.2898006737232208, "learning_rate": 2.4963422084752564e-05, "loss": 0.1071, "step": 30859 }, { "epoch": 0.5504227160846146, "grad_norm": 0.3475673496723175, "learning_rate": 2.4961865578903336e-05, "loss": 0.1497, "step": 30860 }, { "epoch": 0.5504405522063283, "grad_norm": 0.282784104347229, "learning_rate": 2.4960309073201926e-05, "loss": 0.1019, "step": 30861 }, { "epoch": 0.550458388328042, "grad_norm": 0.29330962896347046, "learning_rate": 2.4958752567654377e-05, "loss": 0.1623, "step": 30862 }, { "epoch": 0.5504762244497556, "grad_norm": 0.20964418351650238, "learning_rate": 2.495719606226672e-05, "loss": 0.1089, "step": 30863 }, { "epoch": 0.5504940605714693, "grad_norm": 0.20337319374084473, "learning_rate": 2.4955639557044976e-05, "loss": 0.1145, "step": 30864 }, { "epoch": 0.550511896693183, "grad_norm": 0.26945674419403076, "learning_rate": 2.4954083051995198e-05, "loss": 0.1341, "step": 30865 }, { "epoch": 0.5505297328148967, "grad_norm": 0.20108583569526672, "learning_rate": 2.495252654712341e-05, "loss": 0.1156, "step": 30866 }, { "epoch": 0.5505475689366104, "grad_norm": 0.20346830785274506, "learning_rate": 2.4950970042435636e-05, "loss": 0.1458, "step": 30867 }, { "epoch": 0.5505654050583241, "grad_norm": 0.24866527318954468, "learning_rate": 2.4949413537937924e-05, "loss": 0.0829, "step": 30868 }, { "epoch": 0.5505832411800378, "grad_norm": 0.29681286215782166, "learning_rate": 2.4947857033636303e-05, "loss": 0.1154, "step": 30869 }, { "epoch": 0.5506010773017516, "grad_norm": 0.20020225644111633, "learning_rate": 2.4946300529536808e-05, "loss": 0.0829, "step": 30870 }, { "epoch": 0.5506189134234653, "grad_norm": 0.3476930260658264, "learning_rate": 2.4944744025645473e-05, "loss": 0.1363, "step": 30871 }, { "epoch": 0.5506367495451789, "grad_norm": 0.2925347089767456, "learning_rate": 2.494318752196833e-05, "loss": 0.1111, "step": 30872 }, { "epoch": 0.5506545856668926, "grad_norm": 0.262098491191864, "learning_rate": 2.4941631018511397e-05, "loss": 0.1433, "step": 30873 }, { "epoch": 0.5506724217886063, "grad_norm": 0.29183250665664673, "learning_rate": 2.494007451528073e-05, "loss": 0.1548, "step": 30874 }, { "epoch": 0.55069025791032, "grad_norm": 0.2617546021938324, "learning_rate": 2.493851801228236e-05, "loss": 0.1716, "step": 30875 }, { "epoch": 0.5507080940320337, "grad_norm": 0.332061231136322, "learning_rate": 2.493696150952232e-05, "loss": 0.1518, "step": 30876 }, { "epoch": 0.5507259301537474, "grad_norm": 0.3245917856693268, "learning_rate": 2.493540500700662e-05, "loss": 0.1575, "step": 30877 }, { "epoch": 0.5507437662754611, "grad_norm": 0.3222822844982147, "learning_rate": 2.493384850474133e-05, "loss": 0.0951, "step": 30878 }, { "epoch": 0.5507616023971748, "grad_norm": 0.21622516214847565, "learning_rate": 2.4932292002732464e-05, "loss": 0.1492, "step": 30879 }, { "epoch": 0.5507794385188884, "grad_norm": 0.22095699608325958, "learning_rate": 2.4930735500986054e-05, "loss": 0.1349, "step": 30880 }, { "epoch": 0.5507972746406021, "grad_norm": 0.24120375514030457, "learning_rate": 2.4929178999508132e-05, "loss": 0.078, "step": 30881 }, { "epoch": 0.5508151107623158, "grad_norm": 0.2538900375366211, "learning_rate": 2.4927622498304738e-05, "loss": 0.1186, "step": 30882 }, { "epoch": 0.5508329468840295, "grad_norm": 0.28807610273361206, "learning_rate": 2.492606599738191e-05, "loss": 0.1998, "step": 30883 }, { "epoch": 0.5508507830057432, "grad_norm": 0.2732655704021454, "learning_rate": 2.492450949674568e-05, "loss": 0.1356, "step": 30884 }, { "epoch": 0.5508686191274569, "grad_norm": 0.2930537760257721, "learning_rate": 2.4922952996402075e-05, "loss": 0.1536, "step": 30885 }, { "epoch": 0.5508864552491706, "grad_norm": 0.3630143702030182, "learning_rate": 2.492139649635712e-05, "loss": 0.1656, "step": 30886 }, { "epoch": 0.5509042913708844, "grad_norm": 0.28624990582466125, "learning_rate": 2.4919839996616867e-05, "loss": 0.1649, "step": 30887 }, { "epoch": 0.5509221274925981, "grad_norm": 0.3337860405445099, "learning_rate": 2.491828349718734e-05, "loss": 0.0846, "step": 30888 }, { "epoch": 0.5509399636143117, "grad_norm": 0.24200084805488586, "learning_rate": 2.4916726998074578e-05, "loss": 0.1079, "step": 30889 }, { "epoch": 0.5509577997360254, "grad_norm": 0.30300071835517883, "learning_rate": 2.49151704992846e-05, "loss": 0.1623, "step": 30890 }, { "epoch": 0.5509756358577391, "grad_norm": 0.23886799812316895, "learning_rate": 2.491361400082346e-05, "loss": 0.1253, "step": 30891 }, { "epoch": 0.5509934719794528, "grad_norm": 0.2942934036254883, "learning_rate": 2.4912057502697185e-05, "loss": 0.1886, "step": 30892 }, { "epoch": 0.5510113081011665, "grad_norm": 0.24329522252082825, "learning_rate": 2.4910501004911803e-05, "loss": 0.1352, "step": 30893 }, { "epoch": 0.5510291442228802, "grad_norm": 0.2849752902984619, "learning_rate": 2.490894450747334e-05, "loss": 0.11, "step": 30894 }, { "epoch": 0.5510469803445939, "grad_norm": 0.26910942792892456, "learning_rate": 2.490738801038785e-05, "loss": 0.15, "step": 30895 }, { "epoch": 0.5510648164663076, "grad_norm": 0.2877151668071747, "learning_rate": 2.4905831513661355e-05, "loss": 0.1739, "step": 30896 }, { "epoch": 0.5510826525880212, "grad_norm": 0.31718170642852783, "learning_rate": 2.4904275017299892e-05, "loss": 0.1838, "step": 30897 }, { "epoch": 0.5511004887097349, "grad_norm": 0.20561911165714264, "learning_rate": 2.490271852130949e-05, "loss": 0.1015, "step": 30898 }, { "epoch": 0.5511183248314486, "grad_norm": 0.24936097860336304, "learning_rate": 2.4901162025696175e-05, "loss": 0.1286, "step": 30899 }, { "epoch": 0.5511361609531623, "grad_norm": 0.31895044445991516, "learning_rate": 2.4899605530466003e-05, "loss": 0.0918, "step": 30900 }, { "epoch": 0.551153997074876, "grad_norm": 0.2934606969356537, "learning_rate": 2.489804903562499e-05, "loss": 0.1563, "step": 30901 }, { "epoch": 0.5511718331965897, "grad_norm": 0.24587039649486542, "learning_rate": 2.4896492541179175e-05, "loss": 0.1147, "step": 30902 }, { "epoch": 0.5511896693183034, "grad_norm": 0.2652030885219574, "learning_rate": 2.489493604713458e-05, "loss": 0.1504, "step": 30903 }, { "epoch": 0.5512075054400172, "grad_norm": 0.24747587740421295, "learning_rate": 2.4893379553497264e-05, "loss": 0.1701, "step": 30904 }, { "epoch": 0.5512253415617309, "grad_norm": 0.31529778242111206, "learning_rate": 2.4891823060273242e-05, "loss": 0.0831, "step": 30905 }, { "epoch": 0.5512431776834446, "grad_norm": 0.22627204656600952, "learning_rate": 2.4890266567468552e-05, "loss": 0.1134, "step": 30906 }, { "epoch": 0.5512610138051582, "grad_norm": 0.23765310645103455, "learning_rate": 2.488871007508922e-05, "loss": 0.1426, "step": 30907 }, { "epoch": 0.5512788499268719, "grad_norm": 0.26153743267059326, "learning_rate": 2.4887153583141292e-05, "loss": 0.1237, "step": 30908 }, { "epoch": 0.5512966860485856, "grad_norm": 0.22858697175979614, "learning_rate": 2.488559709163079e-05, "loss": 0.1556, "step": 30909 }, { "epoch": 0.5513145221702993, "grad_norm": 0.29707035422325134, "learning_rate": 2.488404060056376e-05, "loss": 0.1259, "step": 30910 }, { "epoch": 0.551332358292013, "grad_norm": 0.2790110409259796, "learning_rate": 2.4882484109946232e-05, "loss": 0.1631, "step": 30911 }, { "epoch": 0.5513501944137267, "grad_norm": 0.22752730548381805, "learning_rate": 2.488092761978422e-05, "loss": 0.1577, "step": 30912 }, { "epoch": 0.5513680305354404, "grad_norm": 0.2974559962749481, "learning_rate": 2.4879371130083788e-05, "loss": 0.141, "step": 30913 }, { "epoch": 0.5513858666571541, "grad_norm": 0.22686950862407684, "learning_rate": 2.4877814640850958e-05, "loss": 0.1464, "step": 30914 }, { "epoch": 0.5514037027788677, "grad_norm": 0.36017489433288574, "learning_rate": 2.487625815209175e-05, "loss": 0.1381, "step": 30915 }, { "epoch": 0.5514215389005814, "grad_norm": 0.3199823498725891, "learning_rate": 2.487470166381221e-05, "loss": 0.1342, "step": 30916 }, { "epoch": 0.5514393750222951, "grad_norm": 0.3009137213230133, "learning_rate": 2.487314517601837e-05, "loss": 0.1347, "step": 30917 }, { "epoch": 0.5514572111440088, "grad_norm": 0.29534175992012024, "learning_rate": 2.4871588688716267e-05, "loss": 0.1324, "step": 30918 }, { "epoch": 0.5514750472657225, "grad_norm": 0.2668975591659546, "learning_rate": 2.487003220191193e-05, "loss": 0.1025, "step": 30919 }, { "epoch": 0.5514928833874362, "grad_norm": 0.26687297224998474, "learning_rate": 2.4868475715611383e-05, "loss": 0.1317, "step": 30920 }, { "epoch": 0.55151071950915, "grad_norm": 0.2760293185710907, "learning_rate": 2.4866919229820682e-05, "loss": 0.1999, "step": 30921 }, { "epoch": 0.5515285556308637, "grad_norm": 0.33544641733169556, "learning_rate": 2.4865362744545847e-05, "loss": 0.1301, "step": 30922 }, { "epoch": 0.5515463917525774, "grad_norm": 0.9272142648696899, "learning_rate": 2.4863806259792906e-05, "loss": 0.2047, "step": 30923 }, { "epoch": 0.551564227874291, "grad_norm": 0.26097604632377625, "learning_rate": 2.4862249775567908e-05, "loss": 0.1181, "step": 30924 }, { "epoch": 0.5515820639960047, "grad_norm": 0.24084815382957458, "learning_rate": 2.4860693291876862e-05, "loss": 0.0997, "step": 30925 }, { "epoch": 0.5515999001177184, "grad_norm": 0.2701628506183624, "learning_rate": 2.485913680872583e-05, "loss": 0.1293, "step": 30926 }, { "epoch": 0.5516177362394321, "grad_norm": 0.18621596693992615, "learning_rate": 2.4857580326120833e-05, "loss": 0.1203, "step": 30927 }, { "epoch": 0.5516355723611458, "grad_norm": 0.3313795328140259, "learning_rate": 2.4856023844067905e-05, "loss": 0.1261, "step": 30928 }, { "epoch": 0.5516534084828595, "grad_norm": 0.2614094913005829, "learning_rate": 2.4854467362573063e-05, "loss": 0.1586, "step": 30929 }, { "epoch": 0.5516712446045732, "grad_norm": 0.19842149317264557, "learning_rate": 2.4852910881642366e-05, "loss": 0.1114, "step": 30930 }, { "epoch": 0.5516890807262869, "grad_norm": 0.21920661628246307, "learning_rate": 2.485135440128184e-05, "loss": 0.1026, "step": 30931 }, { "epoch": 0.5517069168480006, "grad_norm": 0.49742481112480164, "learning_rate": 2.4849797921497517e-05, "loss": 0.1146, "step": 30932 }, { "epoch": 0.5517247529697142, "grad_norm": 0.36548441648483276, "learning_rate": 2.4848241442295417e-05, "loss": 0.1393, "step": 30933 }, { "epoch": 0.5517425890914279, "grad_norm": 0.285293847322464, "learning_rate": 2.4846684963681595e-05, "loss": 0.1337, "step": 30934 }, { "epoch": 0.5517604252131416, "grad_norm": 0.3216269314289093, "learning_rate": 2.484512848566208e-05, "loss": 0.183, "step": 30935 }, { "epoch": 0.5517782613348553, "grad_norm": 0.19250985980033875, "learning_rate": 2.484357200824289e-05, "loss": 0.1174, "step": 30936 }, { "epoch": 0.551796097456569, "grad_norm": 0.20033006370067596, "learning_rate": 2.4842015531430075e-05, "loss": 0.1139, "step": 30937 }, { "epoch": 0.5518139335782828, "grad_norm": 0.2517346143722534, "learning_rate": 2.4840459055229653e-05, "loss": 0.1462, "step": 30938 }, { "epoch": 0.5518317696999965, "grad_norm": 0.2926196753978729, "learning_rate": 2.483890257964768e-05, "loss": 0.1243, "step": 30939 }, { "epoch": 0.5518496058217102, "grad_norm": 0.27074941992759705, "learning_rate": 2.4837346104690175e-05, "loss": 0.1201, "step": 30940 }, { "epoch": 0.5518674419434239, "grad_norm": 0.33330586552619934, "learning_rate": 2.483578963036317e-05, "loss": 0.1612, "step": 30941 }, { "epoch": 0.5518852780651375, "grad_norm": 0.27145063877105713, "learning_rate": 2.483423315667269e-05, "loss": 0.1292, "step": 30942 }, { "epoch": 0.5519031141868512, "grad_norm": 0.2024078220129013, "learning_rate": 2.4832676683624792e-05, "loss": 0.1356, "step": 30943 }, { "epoch": 0.5519209503085649, "grad_norm": 0.2600943446159363, "learning_rate": 2.4831120211225492e-05, "loss": 0.1573, "step": 30944 }, { "epoch": 0.5519387864302786, "grad_norm": 0.29306846857070923, "learning_rate": 2.4829563739480836e-05, "loss": 0.1176, "step": 30945 }, { "epoch": 0.5519566225519923, "grad_norm": 0.21002456545829773, "learning_rate": 2.4828007268396832e-05, "loss": 0.1033, "step": 30946 }, { "epoch": 0.551974458673706, "grad_norm": 0.22434687614440918, "learning_rate": 2.4826450797979548e-05, "loss": 0.1242, "step": 30947 }, { "epoch": 0.5519922947954197, "grad_norm": 0.20229731500148773, "learning_rate": 2.4824894328234998e-05, "loss": 0.0977, "step": 30948 }, { "epoch": 0.5520101309171334, "grad_norm": 0.2914944589138031, "learning_rate": 2.482333785916922e-05, "loss": 0.1592, "step": 30949 }, { "epoch": 0.552027967038847, "grad_norm": 0.19878222048282623, "learning_rate": 2.482178139078824e-05, "loss": 0.1131, "step": 30950 }, { "epoch": 0.5520458031605607, "grad_norm": 0.3126569092273712, "learning_rate": 2.482022492309809e-05, "loss": 0.18, "step": 30951 }, { "epoch": 0.5520636392822744, "grad_norm": 0.2392844557762146, "learning_rate": 2.4818668456104822e-05, "loss": 0.1201, "step": 30952 }, { "epoch": 0.5520814754039881, "grad_norm": 0.19470766186714172, "learning_rate": 2.4817111989814455e-05, "loss": 0.1282, "step": 30953 }, { "epoch": 0.5520993115257018, "grad_norm": 0.3420150578022003, "learning_rate": 2.481555552423303e-05, "loss": 0.1997, "step": 30954 }, { "epoch": 0.5521171476474156, "grad_norm": 0.2529189884662628, "learning_rate": 2.4813999059366565e-05, "loss": 0.1412, "step": 30955 }, { "epoch": 0.5521349837691293, "grad_norm": 0.20657360553741455, "learning_rate": 2.4812442595221112e-05, "loss": 0.1562, "step": 30956 }, { "epoch": 0.552152819890843, "grad_norm": 0.23202857375144958, "learning_rate": 2.481088613180269e-05, "loss": 0.1508, "step": 30957 }, { "epoch": 0.5521706560125567, "grad_norm": 0.2111150026321411, "learning_rate": 2.4809329669117342e-05, "loss": 0.1196, "step": 30958 }, { "epoch": 0.5521884921342703, "grad_norm": 0.1910131722688675, "learning_rate": 2.4807773207171092e-05, "loss": 0.0794, "step": 30959 }, { "epoch": 0.552206328255984, "grad_norm": 0.22991007566452026, "learning_rate": 2.4806216745969987e-05, "loss": 0.1388, "step": 30960 }, { "epoch": 0.5522241643776977, "grad_norm": 0.24920505285263062, "learning_rate": 2.4804660285520055e-05, "loss": 0.1181, "step": 30961 }, { "epoch": 0.5522420004994114, "grad_norm": 0.1558464765548706, "learning_rate": 2.4803103825827326e-05, "loss": 0.0724, "step": 30962 }, { "epoch": 0.5522598366211251, "grad_norm": 0.22510498762130737, "learning_rate": 2.4801547366897832e-05, "loss": 0.1168, "step": 30963 }, { "epoch": 0.5522776727428388, "grad_norm": 0.23172415792942047, "learning_rate": 2.4799990908737606e-05, "loss": 0.1096, "step": 30964 }, { "epoch": 0.5522955088645525, "grad_norm": 0.29675814509391785, "learning_rate": 2.4798434451352683e-05, "loss": 0.139, "step": 30965 }, { "epoch": 0.5523133449862662, "grad_norm": 0.31687185168266296, "learning_rate": 2.4796877994749107e-05, "loss": 0.1614, "step": 30966 }, { "epoch": 0.5523311811079799, "grad_norm": 0.22057831287384033, "learning_rate": 2.47953215389329e-05, "loss": 0.1121, "step": 30967 }, { "epoch": 0.5523490172296935, "grad_norm": 0.19451525807380676, "learning_rate": 2.4793765083910086e-05, "loss": 0.107, "step": 30968 }, { "epoch": 0.5523668533514072, "grad_norm": 0.3405996263027191, "learning_rate": 2.479220862968672e-05, "loss": 0.1305, "step": 30969 }, { "epoch": 0.5523846894731209, "grad_norm": 0.24071982502937317, "learning_rate": 2.479065217626883e-05, "loss": 0.1372, "step": 30970 }, { "epoch": 0.5524025255948347, "grad_norm": 0.2571866512298584, "learning_rate": 2.478909572366243e-05, "loss": 0.1239, "step": 30971 }, { "epoch": 0.5524203617165484, "grad_norm": 0.37197786569595337, "learning_rate": 2.4787539271873574e-05, "loss": 0.1413, "step": 30972 }, { "epoch": 0.5524381978382621, "grad_norm": 0.22080372273921967, "learning_rate": 2.478598282090829e-05, "loss": 0.1394, "step": 30973 }, { "epoch": 0.5524560339599758, "grad_norm": 0.2480211853981018, "learning_rate": 2.4784426370772613e-05, "loss": 0.1485, "step": 30974 }, { "epoch": 0.5524738700816895, "grad_norm": 0.21872583031654358, "learning_rate": 2.4782869921472576e-05, "loss": 0.1341, "step": 30975 }, { "epoch": 0.5524917062034032, "grad_norm": 0.19811439514160156, "learning_rate": 2.4781313473014208e-05, "loss": 0.0775, "step": 30976 }, { "epoch": 0.5525095423251168, "grad_norm": 0.2877408266067505, "learning_rate": 2.4779757025403536e-05, "loss": 0.1397, "step": 30977 }, { "epoch": 0.5525273784468305, "grad_norm": 0.5443234443664551, "learning_rate": 2.4778200578646613e-05, "loss": 0.2372, "step": 30978 }, { "epoch": 0.5525452145685442, "grad_norm": 0.17202936112880707, "learning_rate": 2.4776644132749456e-05, "loss": 0.0774, "step": 30979 }, { "epoch": 0.5525630506902579, "grad_norm": 0.22290056943893433, "learning_rate": 2.4775087687718105e-05, "loss": 0.1076, "step": 30980 }, { "epoch": 0.5525808868119716, "grad_norm": 0.2118106335401535, "learning_rate": 2.4773531243558585e-05, "loss": 0.1274, "step": 30981 }, { "epoch": 0.5525987229336853, "grad_norm": 0.23682989180088043, "learning_rate": 2.4771974800276944e-05, "loss": 0.1237, "step": 30982 }, { "epoch": 0.552616559055399, "grad_norm": 0.38061296939849854, "learning_rate": 2.4770418357879208e-05, "loss": 0.1766, "step": 30983 }, { "epoch": 0.5526343951771127, "grad_norm": 0.2852959930896759, "learning_rate": 2.4768861916371406e-05, "loss": 0.1723, "step": 30984 }, { "epoch": 0.5526522312988263, "grad_norm": 0.2268906533718109, "learning_rate": 2.476730547575957e-05, "loss": 0.0862, "step": 30985 }, { "epoch": 0.55267006742054, "grad_norm": 0.24655452370643616, "learning_rate": 2.4765749036049746e-05, "loss": 0.1429, "step": 30986 }, { "epoch": 0.5526879035422537, "grad_norm": 0.35229599475860596, "learning_rate": 2.476419259724796e-05, "loss": 0.1565, "step": 30987 }, { "epoch": 0.5527057396639675, "grad_norm": 0.23618173599243164, "learning_rate": 2.4762636159360248e-05, "loss": 0.1337, "step": 30988 }, { "epoch": 0.5527235757856812, "grad_norm": 0.25198864936828613, "learning_rate": 2.4761079722392637e-05, "loss": 0.1131, "step": 30989 }, { "epoch": 0.5527414119073949, "grad_norm": 0.26145991683006287, "learning_rate": 2.4759523286351155e-05, "loss": 0.11, "step": 30990 }, { "epoch": 0.5527592480291086, "grad_norm": 0.4384521245956421, "learning_rate": 2.4757966851241853e-05, "loss": 0.2093, "step": 30991 }, { "epoch": 0.5527770841508223, "grad_norm": 0.339275985956192, "learning_rate": 2.4756410417070752e-05, "loss": 0.2122, "step": 30992 }, { "epoch": 0.552794920272536, "grad_norm": 0.26871371269226074, "learning_rate": 2.4754853983843893e-05, "loss": 0.1298, "step": 30993 }, { "epoch": 0.5528127563942496, "grad_norm": 0.21961291134357452, "learning_rate": 2.4753297551567293e-05, "loss": 0.1165, "step": 30994 }, { "epoch": 0.5528305925159633, "grad_norm": 0.26469185948371887, "learning_rate": 2.4751741120247007e-05, "loss": 0.1484, "step": 30995 }, { "epoch": 0.552848428637677, "grad_norm": 0.32913029193878174, "learning_rate": 2.475018468988906e-05, "loss": 0.1705, "step": 30996 }, { "epoch": 0.5528662647593907, "grad_norm": 0.2835606038570404, "learning_rate": 2.4748628260499483e-05, "loss": 0.1428, "step": 30997 }, { "epoch": 0.5528841008811044, "grad_norm": 0.22081094980239868, "learning_rate": 2.4747071832084296e-05, "loss": 0.112, "step": 30998 }, { "epoch": 0.5529019370028181, "grad_norm": 0.27278465032577515, "learning_rate": 2.474551540464956e-05, "loss": 0.0811, "step": 30999 }, { "epoch": 0.5529197731245318, "grad_norm": 0.28471705317497253, "learning_rate": 2.474395897820129e-05, "loss": 0.1299, "step": 31000 }, { "epoch": 0.5529197731245318, "eval_loss": 0.13027334213256836, "eval_runtime": 108.7216, "eval_samples_per_second": 9.419, "eval_steps_per_second": 1.573, "step": 31000 }, { "epoch": 0.5529376092462455, "grad_norm": 0.29438138008117676, "learning_rate": 2.4742402552745527e-05, "loss": 0.1509, "step": 31001 }, { "epoch": 0.5529554453679592, "grad_norm": 0.2521360516548157, "learning_rate": 2.4740846128288298e-05, "loss": 0.1508, "step": 31002 }, { "epoch": 0.5529732814896728, "grad_norm": 0.30396267771720886, "learning_rate": 2.473928970483563e-05, "loss": 0.1261, "step": 31003 }, { "epoch": 0.5529911176113865, "grad_norm": 0.3621559739112854, "learning_rate": 2.4737733282393577e-05, "loss": 0.1691, "step": 31004 }, { "epoch": 0.5530089537331003, "grad_norm": 0.37630075216293335, "learning_rate": 2.473617686096816e-05, "loss": 0.086, "step": 31005 }, { "epoch": 0.553026789854814, "grad_norm": 0.3095848560333252, "learning_rate": 2.4734620440565407e-05, "loss": 0.105, "step": 31006 }, { "epoch": 0.5530446259765277, "grad_norm": 0.22440184652805328, "learning_rate": 2.4733064021191352e-05, "loss": 0.1235, "step": 31007 }, { "epoch": 0.5530624620982414, "grad_norm": 0.24018505215644836, "learning_rate": 2.4731507602852042e-05, "loss": 0.1143, "step": 31008 }, { "epoch": 0.5530802982199551, "grad_norm": 0.2785538136959076, "learning_rate": 2.4729951185553502e-05, "loss": 0.1196, "step": 31009 }, { "epoch": 0.5530981343416688, "grad_norm": 0.28385409712791443, "learning_rate": 2.4728394769301766e-05, "loss": 0.1839, "step": 31010 }, { "epoch": 0.5531159704633825, "grad_norm": 0.23382726311683655, "learning_rate": 2.4726838354102855e-05, "loss": 0.1021, "step": 31011 }, { "epoch": 0.5531338065850961, "grad_norm": 0.32426387071609497, "learning_rate": 2.4725281939962822e-05, "loss": 0.0833, "step": 31012 }, { "epoch": 0.5531516427068098, "grad_norm": 0.2664237320423126, "learning_rate": 2.4723725526887687e-05, "loss": 0.107, "step": 31013 }, { "epoch": 0.5531694788285235, "grad_norm": 0.3490878939628601, "learning_rate": 2.472216911488349e-05, "loss": 0.166, "step": 31014 }, { "epoch": 0.5531873149502372, "grad_norm": 0.3135441839694977, "learning_rate": 2.4720612703956263e-05, "loss": 0.1486, "step": 31015 }, { "epoch": 0.5532051510719509, "grad_norm": 0.21774740517139435, "learning_rate": 2.4719056294112026e-05, "loss": 0.1288, "step": 31016 }, { "epoch": 0.5532229871936646, "grad_norm": 0.20806358754634857, "learning_rate": 2.4717499885356837e-05, "loss": 0.1106, "step": 31017 }, { "epoch": 0.5532408233153783, "grad_norm": 0.27105849981307983, "learning_rate": 2.4715943477696715e-05, "loss": 0.1412, "step": 31018 }, { "epoch": 0.553258659437092, "grad_norm": 0.21384112536907196, "learning_rate": 2.471438707113769e-05, "loss": 0.1027, "step": 31019 }, { "epoch": 0.5532764955588056, "grad_norm": 0.25188562273979187, "learning_rate": 2.47128306656858e-05, "loss": 0.1219, "step": 31020 }, { "epoch": 0.5532943316805193, "grad_norm": 0.2596520781517029, "learning_rate": 2.4711274261347073e-05, "loss": 0.1066, "step": 31021 }, { "epoch": 0.5533121678022331, "grad_norm": 0.31644824147224426, "learning_rate": 2.4709717858127556e-05, "loss": 0.1848, "step": 31022 }, { "epoch": 0.5533300039239468, "grad_norm": 0.24661748111248016, "learning_rate": 2.470816145603327e-05, "loss": 0.1632, "step": 31023 }, { "epoch": 0.5533478400456605, "grad_norm": 0.3832049071788788, "learning_rate": 2.4706605055070242e-05, "loss": 0.135, "step": 31024 }, { "epoch": 0.5533656761673742, "grad_norm": 0.23447707295417786, "learning_rate": 2.4705048655244525e-05, "loss": 0.1169, "step": 31025 }, { "epoch": 0.5533835122890879, "grad_norm": 0.2833011746406555, "learning_rate": 2.4703492256562142e-05, "loss": 0.1259, "step": 31026 }, { "epoch": 0.5534013484108016, "grad_norm": 0.22857952117919922, "learning_rate": 2.470193585902912e-05, "loss": 0.1924, "step": 31027 }, { "epoch": 0.5534191845325153, "grad_norm": 0.3064655065536499, "learning_rate": 2.47003794626515e-05, "loss": 0.2353, "step": 31028 }, { "epoch": 0.553437020654229, "grad_norm": 0.15799081325531006, "learning_rate": 2.4698823067435303e-05, "loss": 0.0725, "step": 31029 }, { "epoch": 0.5534548567759426, "grad_norm": 0.2504464089870453, "learning_rate": 2.4697266673386584e-05, "loss": 0.1401, "step": 31030 }, { "epoch": 0.5534726928976563, "grad_norm": 0.22667838633060455, "learning_rate": 2.4695710280511365e-05, "loss": 0.1101, "step": 31031 }, { "epoch": 0.55349052901937, "grad_norm": 0.27978646755218506, "learning_rate": 2.4694153888815676e-05, "loss": 0.1506, "step": 31032 }, { "epoch": 0.5535083651410837, "grad_norm": 0.2594587206840515, "learning_rate": 2.4692597498305542e-05, "loss": 0.0991, "step": 31033 }, { "epoch": 0.5535262012627974, "grad_norm": 0.27300968766212463, "learning_rate": 2.469104110898701e-05, "loss": 0.1958, "step": 31034 }, { "epoch": 0.5535440373845111, "grad_norm": 0.26017341017723083, "learning_rate": 2.4689484720866117e-05, "loss": 0.1194, "step": 31035 }, { "epoch": 0.5535618735062248, "grad_norm": 0.25154468417167664, "learning_rate": 2.468792833394889e-05, "loss": 0.1448, "step": 31036 }, { "epoch": 0.5535797096279385, "grad_norm": 0.347650408744812, "learning_rate": 2.4686371948241345e-05, "loss": 0.1506, "step": 31037 }, { "epoch": 0.5535975457496521, "grad_norm": 0.27731940150260925, "learning_rate": 2.4684815563749543e-05, "loss": 0.1662, "step": 31038 }, { "epoch": 0.5536153818713659, "grad_norm": 0.21744950115680695, "learning_rate": 2.4683259180479508e-05, "loss": 0.1078, "step": 31039 }, { "epoch": 0.5536332179930796, "grad_norm": 0.22725103795528412, "learning_rate": 2.468170279843726e-05, "loss": 0.1215, "step": 31040 }, { "epoch": 0.5536510541147933, "grad_norm": 0.22972755134105682, "learning_rate": 2.4680146417628853e-05, "loss": 0.1357, "step": 31041 }, { "epoch": 0.553668890236507, "grad_norm": 0.298816055059433, "learning_rate": 2.467859003806029e-05, "loss": 0.0911, "step": 31042 }, { "epoch": 0.5536867263582207, "grad_norm": 0.24556376039981842, "learning_rate": 2.4677033659737643e-05, "loss": 0.1329, "step": 31043 }, { "epoch": 0.5537045624799344, "grad_norm": 0.3007456064224243, "learning_rate": 2.4675477282666917e-05, "loss": 0.1314, "step": 31044 }, { "epoch": 0.5537223986016481, "grad_norm": 0.23909218609333038, "learning_rate": 2.467392090685416e-05, "loss": 0.1282, "step": 31045 }, { "epoch": 0.5537402347233618, "grad_norm": 0.26538655161857605, "learning_rate": 2.467236453230538e-05, "loss": 0.1578, "step": 31046 }, { "epoch": 0.5537580708450754, "grad_norm": 0.3117910325527191, "learning_rate": 2.4670808159026644e-05, "loss": 0.1287, "step": 31047 }, { "epoch": 0.5537759069667891, "grad_norm": 0.344798743724823, "learning_rate": 2.4669251787023963e-05, "loss": 0.1803, "step": 31048 }, { "epoch": 0.5537937430885028, "grad_norm": 0.3242945671081543, "learning_rate": 2.4667695416303382e-05, "loss": 0.1382, "step": 31049 }, { "epoch": 0.5538115792102165, "grad_norm": 0.27023184299468994, "learning_rate": 2.4666139046870918e-05, "loss": 0.1517, "step": 31050 }, { "epoch": 0.5538294153319302, "grad_norm": 0.1973017156124115, "learning_rate": 2.4664582678732622e-05, "loss": 0.1107, "step": 31051 }, { "epoch": 0.5538472514536439, "grad_norm": 0.2021416574716568, "learning_rate": 2.4663026311894525e-05, "loss": 0.1338, "step": 31052 }, { "epoch": 0.5538650875753576, "grad_norm": 0.1999203860759735, "learning_rate": 2.466146994636265e-05, "loss": 0.0912, "step": 31053 }, { "epoch": 0.5538829236970713, "grad_norm": 0.30637460947036743, "learning_rate": 2.465991358214303e-05, "loss": 0.158, "step": 31054 }, { "epoch": 0.553900759818785, "grad_norm": 0.2713467478752136, "learning_rate": 2.4658357219241702e-05, "loss": 0.1261, "step": 31055 }, { "epoch": 0.5539185959404987, "grad_norm": 0.384473979473114, "learning_rate": 2.46568008576647e-05, "loss": 0.1749, "step": 31056 }, { "epoch": 0.5539364320622124, "grad_norm": 0.261934369802475, "learning_rate": 2.4655244497418063e-05, "loss": 0.1295, "step": 31057 }, { "epoch": 0.5539542681839261, "grad_norm": 0.3581433892250061, "learning_rate": 2.4653688138507815e-05, "loss": 0.1174, "step": 31058 }, { "epoch": 0.5539721043056398, "grad_norm": 0.24308906495571136, "learning_rate": 2.465213178093998e-05, "loss": 0.1407, "step": 31059 }, { "epoch": 0.5539899404273535, "grad_norm": 0.23988047242164612, "learning_rate": 2.4650575424720618e-05, "loss": 0.1307, "step": 31060 }, { "epoch": 0.5540077765490672, "grad_norm": 0.3088117241859436, "learning_rate": 2.4649019069855735e-05, "loss": 0.1466, "step": 31061 }, { "epoch": 0.5540256126707809, "grad_norm": 0.33846238255500793, "learning_rate": 2.4647462716351386e-05, "loss": 0.1324, "step": 31062 }, { "epoch": 0.5540434487924946, "grad_norm": 0.19548974931240082, "learning_rate": 2.464590636421358e-05, "loss": 0.1199, "step": 31063 }, { "epoch": 0.5540612849142083, "grad_norm": 0.21766163408756256, "learning_rate": 2.4644350013448373e-05, "loss": 0.133, "step": 31064 }, { "epoch": 0.5540791210359219, "grad_norm": 0.19711840152740479, "learning_rate": 2.464279366406179e-05, "loss": 0.0668, "step": 31065 }, { "epoch": 0.5540969571576356, "grad_norm": 0.36362069845199585, "learning_rate": 2.4641237316059863e-05, "loss": 0.107, "step": 31066 }, { "epoch": 0.5541147932793493, "grad_norm": 0.3641241490840912, "learning_rate": 2.4639680969448618e-05, "loss": 0.1598, "step": 31067 }, { "epoch": 0.554132629401063, "grad_norm": 0.16779862344264984, "learning_rate": 2.463812462423409e-05, "loss": 0.0708, "step": 31068 }, { "epoch": 0.5541504655227767, "grad_norm": 0.324424147605896, "learning_rate": 2.463656828042232e-05, "loss": 0.1874, "step": 31069 }, { "epoch": 0.5541683016444904, "grad_norm": 0.18128371238708496, "learning_rate": 2.4635011938019343e-05, "loss": 0.0839, "step": 31070 }, { "epoch": 0.5541861377662041, "grad_norm": 0.25565630197525024, "learning_rate": 2.4633455597031184e-05, "loss": 0.0935, "step": 31071 }, { "epoch": 0.5542039738879178, "grad_norm": 0.27423450350761414, "learning_rate": 2.4631899257463865e-05, "loss": 0.0831, "step": 31072 }, { "epoch": 0.5542218100096316, "grad_norm": 0.22155149281024933, "learning_rate": 2.4630342919323447e-05, "loss": 0.0737, "step": 31073 }, { "epoch": 0.5542396461313452, "grad_norm": 0.26568421721458435, "learning_rate": 2.462878658261594e-05, "loss": 0.1175, "step": 31074 }, { "epoch": 0.5542574822530589, "grad_norm": 0.2958373427391052, "learning_rate": 2.4627230247347386e-05, "loss": 0.1271, "step": 31075 }, { "epoch": 0.5542753183747726, "grad_norm": 0.25283315777778625, "learning_rate": 2.4625673913523814e-05, "loss": 0.1073, "step": 31076 }, { "epoch": 0.5542931544964863, "grad_norm": 0.19242306053638458, "learning_rate": 2.462411758115126e-05, "loss": 0.087, "step": 31077 }, { "epoch": 0.5543109906182, "grad_norm": 0.2242615669965744, "learning_rate": 2.4622561250235763e-05, "loss": 0.1313, "step": 31078 }, { "epoch": 0.5543288267399137, "grad_norm": 0.28264737129211426, "learning_rate": 2.4621004920783345e-05, "loss": 0.1041, "step": 31079 }, { "epoch": 0.5543466628616274, "grad_norm": 0.2663622498512268, "learning_rate": 2.4619448592800045e-05, "loss": 0.0854, "step": 31080 }, { "epoch": 0.5543644989833411, "grad_norm": 0.31599247455596924, "learning_rate": 2.461789226629188e-05, "loss": 0.1229, "step": 31081 }, { "epoch": 0.5543823351050547, "grad_norm": 0.24944135546684265, "learning_rate": 2.461633594126491e-05, "loss": 0.1177, "step": 31082 }, { "epoch": 0.5544001712267684, "grad_norm": 0.25315147638320923, "learning_rate": 2.461477961772515e-05, "loss": 0.107, "step": 31083 }, { "epoch": 0.5544180073484821, "grad_norm": 0.2580139935016632, "learning_rate": 2.4613223295678642e-05, "loss": 0.1386, "step": 31084 }, { "epoch": 0.5544358434701958, "grad_norm": 0.2543202340602875, "learning_rate": 2.4611666975131404e-05, "loss": 0.1045, "step": 31085 }, { "epoch": 0.5544536795919095, "grad_norm": 0.28528475761413574, "learning_rate": 2.461011065608949e-05, "loss": 0.1359, "step": 31086 }, { "epoch": 0.5544715157136232, "grad_norm": 0.2835729718208313, "learning_rate": 2.460855433855892e-05, "loss": 0.1519, "step": 31087 }, { "epoch": 0.5544893518353369, "grad_norm": 0.2815497815608978, "learning_rate": 2.460699802254572e-05, "loss": 0.1274, "step": 31088 }, { "epoch": 0.5545071879570507, "grad_norm": 0.3152524530887604, "learning_rate": 2.4605441708055937e-05, "loss": 0.1628, "step": 31089 }, { "epoch": 0.5545250240787644, "grad_norm": 0.28345367312431335, "learning_rate": 2.4603885395095597e-05, "loss": 0.1422, "step": 31090 }, { "epoch": 0.554542860200478, "grad_norm": 0.45184335112571716, "learning_rate": 2.4602329083670737e-05, "loss": 0.1416, "step": 31091 }, { "epoch": 0.5545606963221917, "grad_norm": 0.24651899933815002, "learning_rate": 2.460077277378739e-05, "loss": 0.1417, "step": 31092 }, { "epoch": 0.5545785324439054, "grad_norm": 0.2692287564277649, "learning_rate": 2.4599216465451584e-05, "loss": 0.0912, "step": 31093 }, { "epoch": 0.5545963685656191, "grad_norm": 0.26124873757362366, "learning_rate": 2.4597660158669343e-05, "loss": 0.173, "step": 31094 }, { "epoch": 0.5546142046873328, "grad_norm": 0.28615447878837585, "learning_rate": 2.459610385344672e-05, "loss": 0.1263, "step": 31095 }, { "epoch": 0.5546320408090465, "grad_norm": 0.29463401436805725, "learning_rate": 2.4594547549789735e-05, "loss": 0.162, "step": 31096 }, { "epoch": 0.5546498769307602, "grad_norm": 0.2976105511188507, "learning_rate": 2.4592991247704426e-05, "loss": 0.1113, "step": 31097 }, { "epoch": 0.5546677130524739, "grad_norm": 0.2727324068546295, "learning_rate": 2.4591434947196815e-05, "loss": 0.1031, "step": 31098 }, { "epoch": 0.5546855491741876, "grad_norm": 0.28413137793540955, "learning_rate": 2.4589878648272952e-05, "loss": 0.1278, "step": 31099 }, { "epoch": 0.5547033852959012, "grad_norm": 0.28571563959121704, "learning_rate": 2.4588322350938865e-05, "loss": 0.1431, "step": 31100 }, { "epoch": 0.5547212214176149, "grad_norm": 0.25055447220802307, "learning_rate": 2.4586766055200578e-05, "loss": 0.1598, "step": 31101 }, { "epoch": 0.5547390575393286, "grad_norm": 0.22316960990428925, "learning_rate": 2.458520976106412e-05, "loss": 0.1122, "step": 31102 }, { "epoch": 0.5547568936610423, "grad_norm": 0.26231351494789124, "learning_rate": 2.458365346853554e-05, "loss": 0.1289, "step": 31103 }, { "epoch": 0.554774729782756, "grad_norm": 0.294583261013031, "learning_rate": 2.4582097177620862e-05, "loss": 0.1476, "step": 31104 }, { "epoch": 0.5547925659044697, "grad_norm": 0.20654277503490448, "learning_rate": 2.4580540888326124e-05, "loss": 0.1304, "step": 31105 }, { "epoch": 0.5548104020261835, "grad_norm": 0.28445523977279663, "learning_rate": 2.4578984600657354e-05, "loss": 0.1336, "step": 31106 }, { "epoch": 0.5548282381478972, "grad_norm": 0.2922994792461395, "learning_rate": 2.4577428314620572e-05, "loss": 0.1049, "step": 31107 }, { "epoch": 0.5548460742696109, "grad_norm": 0.22151844203472137, "learning_rate": 2.4575872030221837e-05, "loss": 0.1275, "step": 31108 }, { "epoch": 0.5548639103913245, "grad_norm": 0.22371706366539001, "learning_rate": 2.457431574746717e-05, "loss": 0.1166, "step": 31109 }, { "epoch": 0.5548817465130382, "grad_norm": 0.2557823359966278, "learning_rate": 2.4572759466362593e-05, "loss": 0.1667, "step": 31110 }, { "epoch": 0.5548995826347519, "grad_norm": 0.23507322371006012, "learning_rate": 2.4571203186914145e-05, "loss": 0.1384, "step": 31111 }, { "epoch": 0.5549174187564656, "grad_norm": 0.2234814167022705, "learning_rate": 2.456964690912787e-05, "loss": 0.118, "step": 31112 }, { "epoch": 0.5549352548781793, "grad_norm": 0.21652117371559143, "learning_rate": 2.4568090633009797e-05, "loss": 0.1365, "step": 31113 }, { "epoch": 0.554953090999893, "grad_norm": 0.22298663854599, "learning_rate": 2.456653435856595e-05, "loss": 0.0743, "step": 31114 }, { "epoch": 0.5549709271216067, "grad_norm": 0.20688222348690033, "learning_rate": 2.4564978085802353e-05, "loss": 0.0949, "step": 31115 }, { "epoch": 0.5549887632433204, "grad_norm": 0.29829275608062744, "learning_rate": 2.4563421814725064e-05, "loss": 0.1537, "step": 31116 }, { "epoch": 0.555006599365034, "grad_norm": 0.2555026113986969, "learning_rate": 2.45618655453401e-05, "loss": 0.1734, "step": 31117 }, { "epoch": 0.5550244354867477, "grad_norm": 0.49102213978767395, "learning_rate": 2.45603092776535e-05, "loss": 0.1202, "step": 31118 }, { "epoch": 0.5550422716084614, "grad_norm": 0.2527959942817688, "learning_rate": 2.455875301167129e-05, "loss": 0.1524, "step": 31119 }, { "epoch": 0.5550601077301751, "grad_norm": 0.27550408244132996, "learning_rate": 2.45571967473995e-05, "loss": 0.1602, "step": 31120 }, { "epoch": 0.5550779438518888, "grad_norm": 0.2326967865228653, "learning_rate": 2.455564048484418e-05, "loss": 0.1114, "step": 31121 }, { "epoch": 0.5550957799736025, "grad_norm": 0.23051267862319946, "learning_rate": 2.4554084224011346e-05, "loss": 0.1149, "step": 31122 }, { "epoch": 0.5551136160953163, "grad_norm": 0.23868350684642792, "learning_rate": 2.455252796490703e-05, "loss": 0.1022, "step": 31123 }, { "epoch": 0.55513145221703, "grad_norm": 0.22512511909008026, "learning_rate": 2.4550971707537273e-05, "loss": 0.1071, "step": 31124 }, { "epoch": 0.5551492883387437, "grad_norm": 0.27983564138412476, "learning_rate": 2.4549415451908107e-05, "loss": 0.1112, "step": 31125 }, { "epoch": 0.5551671244604574, "grad_norm": 0.23378156125545502, "learning_rate": 2.4547859198025563e-05, "loss": 0.126, "step": 31126 }, { "epoch": 0.555184960582171, "grad_norm": 0.237847700715065, "learning_rate": 2.4546302945895673e-05, "loss": 0.105, "step": 31127 }, { "epoch": 0.5552027967038847, "grad_norm": 0.45227423310279846, "learning_rate": 2.4544746695524464e-05, "loss": 0.1279, "step": 31128 }, { "epoch": 0.5552206328255984, "grad_norm": 0.26082584261894226, "learning_rate": 2.454319044691798e-05, "loss": 0.1119, "step": 31129 }, { "epoch": 0.5552384689473121, "grad_norm": 0.303725004196167, "learning_rate": 2.454163420008225e-05, "loss": 0.2165, "step": 31130 }, { "epoch": 0.5552563050690258, "grad_norm": 0.2925204336643219, "learning_rate": 2.4540077955023298e-05, "loss": 0.1454, "step": 31131 }, { "epoch": 0.5552741411907395, "grad_norm": 0.22085171937942505, "learning_rate": 2.4538521711747166e-05, "loss": 0.1023, "step": 31132 }, { "epoch": 0.5552919773124532, "grad_norm": 0.33117154240608215, "learning_rate": 2.4536965470259875e-05, "loss": 0.1185, "step": 31133 }, { "epoch": 0.5553098134341669, "grad_norm": 0.25302746891975403, "learning_rate": 2.4535409230567474e-05, "loss": 0.1658, "step": 31134 }, { "epoch": 0.5553276495558805, "grad_norm": 0.2912423014640808, "learning_rate": 2.453385299267599e-05, "loss": 0.127, "step": 31135 }, { "epoch": 0.5553454856775942, "grad_norm": 0.34991511702537537, "learning_rate": 2.453229675659145e-05, "loss": 0.1013, "step": 31136 }, { "epoch": 0.5553633217993079, "grad_norm": 0.2708688974380493, "learning_rate": 2.453074052231988e-05, "loss": 0.1276, "step": 31137 }, { "epoch": 0.5553811579210216, "grad_norm": 0.2998257577419281, "learning_rate": 2.452918428986733e-05, "loss": 0.1706, "step": 31138 }, { "epoch": 0.5553989940427353, "grad_norm": 0.24800308048725128, "learning_rate": 2.4527628059239826e-05, "loss": 0.1166, "step": 31139 }, { "epoch": 0.5554168301644491, "grad_norm": 0.21276402473449707, "learning_rate": 2.45260718304434e-05, "loss": 0.088, "step": 31140 }, { "epoch": 0.5554346662861628, "grad_norm": 0.19047676026821136, "learning_rate": 2.4524515603484073e-05, "loss": 0.0666, "step": 31141 }, { "epoch": 0.5554525024078765, "grad_norm": 0.24123834073543549, "learning_rate": 2.4522959378367896e-05, "loss": 0.2002, "step": 31142 }, { "epoch": 0.5554703385295902, "grad_norm": 0.19772756099700928, "learning_rate": 2.4521403155100898e-05, "loss": 0.1002, "step": 31143 }, { "epoch": 0.5554881746513038, "grad_norm": 0.31100645661354065, "learning_rate": 2.4519846933689098e-05, "loss": 0.1728, "step": 31144 }, { "epoch": 0.5555060107730175, "grad_norm": 0.15906788408756256, "learning_rate": 2.451829071413854e-05, "loss": 0.0853, "step": 31145 }, { "epoch": 0.5555238468947312, "grad_norm": 0.2505277991294861, "learning_rate": 2.4516734496455248e-05, "loss": 0.1264, "step": 31146 }, { "epoch": 0.5555416830164449, "grad_norm": 0.3088262677192688, "learning_rate": 2.4515178280645267e-05, "loss": 0.1652, "step": 31147 }, { "epoch": 0.5555595191381586, "grad_norm": 0.3293002247810364, "learning_rate": 2.4513622066714624e-05, "loss": 0.1787, "step": 31148 }, { "epoch": 0.5555773552598723, "grad_norm": 0.2221067100763321, "learning_rate": 2.4512065854669352e-05, "loss": 0.0803, "step": 31149 }, { "epoch": 0.555595191381586, "grad_norm": 0.24624836444854736, "learning_rate": 2.4510509644515467e-05, "loss": 0.1207, "step": 31150 }, { "epoch": 0.5556130275032997, "grad_norm": 0.250946044921875, "learning_rate": 2.4508953436259027e-05, "loss": 0.1309, "step": 31151 }, { "epoch": 0.5556308636250133, "grad_norm": 0.26991331577301025, "learning_rate": 2.450739722990605e-05, "loss": 0.1474, "step": 31152 }, { "epoch": 0.555648699746727, "grad_norm": 0.24678856134414673, "learning_rate": 2.4505841025462577e-05, "loss": 0.1641, "step": 31153 }, { "epoch": 0.5556665358684407, "grad_norm": 0.2615037262439728, "learning_rate": 2.450428482293462e-05, "loss": 0.1134, "step": 31154 }, { "epoch": 0.5556843719901544, "grad_norm": 0.2891719937324524, "learning_rate": 2.450272862232824e-05, "loss": 0.1386, "step": 31155 }, { "epoch": 0.5557022081118681, "grad_norm": 0.22479921579360962, "learning_rate": 2.4501172423649453e-05, "loss": 0.0839, "step": 31156 }, { "epoch": 0.5557200442335819, "grad_norm": 0.27169883251190186, "learning_rate": 2.44996162269043e-05, "loss": 0.1376, "step": 31157 }, { "epoch": 0.5557378803552956, "grad_norm": 0.2793879806995392, "learning_rate": 2.4498060032098797e-05, "loss": 0.1455, "step": 31158 }, { "epoch": 0.5557557164770093, "grad_norm": 0.30989184975624084, "learning_rate": 2.4496503839238987e-05, "loss": 0.1301, "step": 31159 }, { "epoch": 0.555773552598723, "grad_norm": 0.26302963495254517, "learning_rate": 2.44949476483309e-05, "loss": 0.1291, "step": 31160 }, { "epoch": 0.5557913887204367, "grad_norm": 0.2543509006500244, "learning_rate": 2.449339145938058e-05, "loss": 0.1653, "step": 31161 }, { "epoch": 0.5558092248421503, "grad_norm": 0.32371747493743896, "learning_rate": 2.449183527239405e-05, "loss": 0.0999, "step": 31162 }, { "epoch": 0.555827060963864, "grad_norm": 0.3102806508541107, "learning_rate": 2.449027908737733e-05, "loss": 0.1152, "step": 31163 }, { "epoch": 0.5558448970855777, "grad_norm": 0.2877858877182007, "learning_rate": 2.4488722904336473e-05, "loss": 0.1497, "step": 31164 }, { "epoch": 0.5558627332072914, "grad_norm": 0.2872796356678009, "learning_rate": 2.44871667232775e-05, "loss": 0.1274, "step": 31165 }, { "epoch": 0.5558805693290051, "grad_norm": 0.299836665391922, "learning_rate": 2.448561054420645e-05, "loss": 0.1083, "step": 31166 }, { "epoch": 0.5558984054507188, "grad_norm": 0.2736889123916626, "learning_rate": 2.448405436712934e-05, "loss": 0.1796, "step": 31167 }, { "epoch": 0.5559162415724325, "grad_norm": 0.2884184420108795, "learning_rate": 2.4482498192052225e-05, "loss": 0.1343, "step": 31168 }, { "epoch": 0.5559340776941462, "grad_norm": 0.2364308089017868, "learning_rate": 2.4480942018981124e-05, "loss": 0.1339, "step": 31169 }, { "epoch": 0.5559519138158598, "grad_norm": 0.25484520196914673, "learning_rate": 2.4479385847922075e-05, "loss": 0.1896, "step": 31170 }, { "epoch": 0.5559697499375735, "grad_norm": 0.3305186331272125, "learning_rate": 2.4477829678881095e-05, "loss": 0.1364, "step": 31171 }, { "epoch": 0.5559875860592872, "grad_norm": 0.26606112718582153, "learning_rate": 2.447627351186423e-05, "loss": 0.1602, "step": 31172 }, { "epoch": 0.5560054221810009, "grad_norm": 0.2041306495666504, "learning_rate": 2.4474717346877513e-05, "loss": 0.1123, "step": 31173 }, { "epoch": 0.5560232583027147, "grad_norm": 0.3697538375854492, "learning_rate": 2.4473161183926975e-05, "loss": 0.1457, "step": 31174 }, { "epoch": 0.5560410944244284, "grad_norm": 0.22401094436645508, "learning_rate": 2.4471605023018647e-05, "loss": 0.0702, "step": 31175 }, { "epoch": 0.5560589305461421, "grad_norm": 0.24855849146842957, "learning_rate": 2.447004886415855e-05, "loss": 0.1422, "step": 31176 }, { "epoch": 0.5560767666678558, "grad_norm": 0.3434732258319855, "learning_rate": 2.4468492707352738e-05, "loss": 0.1493, "step": 31177 }, { "epoch": 0.5560946027895695, "grad_norm": 0.26988792419433594, "learning_rate": 2.4466936552607232e-05, "loss": 0.1295, "step": 31178 }, { "epoch": 0.5561124389112831, "grad_norm": 0.2510938346385956, "learning_rate": 2.4465380399928056e-05, "loss": 0.1657, "step": 31179 }, { "epoch": 0.5561302750329968, "grad_norm": 0.30789700150489807, "learning_rate": 2.446382424932125e-05, "loss": 0.1356, "step": 31180 }, { "epoch": 0.5561481111547105, "grad_norm": 0.20323596894741058, "learning_rate": 2.4462268100792852e-05, "loss": 0.1174, "step": 31181 }, { "epoch": 0.5561659472764242, "grad_norm": 0.2767643630504608, "learning_rate": 2.446071195434889e-05, "loss": 0.1892, "step": 31182 }, { "epoch": 0.5561837833981379, "grad_norm": 0.3257175087928772, "learning_rate": 2.4459155809995393e-05, "loss": 0.1606, "step": 31183 }, { "epoch": 0.5562016195198516, "grad_norm": 0.1835612803697586, "learning_rate": 2.44575996677384e-05, "loss": 0.1435, "step": 31184 }, { "epoch": 0.5562194556415653, "grad_norm": 0.27789485454559326, "learning_rate": 2.4456043527583923e-05, "loss": 0.1353, "step": 31185 }, { "epoch": 0.556237291763279, "grad_norm": 0.2492380142211914, "learning_rate": 2.445448738953802e-05, "loss": 0.0676, "step": 31186 }, { "epoch": 0.5562551278849927, "grad_norm": 0.25265613198280334, "learning_rate": 2.445293125360671e-05, "loss": 0.136, "step": 31187 }, { "epoch": 0.5562729640067063, "grad_norm": 0.46785983443260193, "learning_rate": 2.445137511979603e-05, "loss": 0.119, "step": 31188 }, { "epoch": 0.55629080012842, "grad_norm": 0.2458163946866989, "learning_rate": 2.4449818988112e-05, "loss": 0.1703, "step": 31189 }, { "epoch": 0.5563086362501338, "grad_norm": 0.3483573794364929, "learning_rate": 2.4448262858560673e-05, "loss": 0.1786, "step": 31190 }, { "epoch": 0.5563264723718475, "grad_norm": 0.19383332133293152, "learning_rate": 2.4446706731148066e-05, "loss": 0.0777, "step": 31191 }, { "epoch": 0.5563443084935612, "grad_norm": 0.2786974608898163, "learning_rate": 2.444515060588021e-05, "loss": 0.0997, "step": 31192 }, { "epoch": 0.5563621446152749, "grad_norm": 0.24330143630504608, "learning_rate": 2.4443594482763146e-05, "loss": 0.1582, "step": 31193 }, { "epoch": 0.5563799807369886, "grad_norm": 0.3224984407424927, "learning_rate": 2.4442038361802898e-05, "loss": 0.1418, "step": 31194 }, { "epoch": 0.5563978168587023, "grad_norm": 0.22933678328990936, "learning_rate": 2.444048224300551e-05, "loss": 0.1498, "step": 31195 }, { "epoch": 0.556415652980416, "grad_norm": 0.36137765645980835, "learning_rate": 2.4438926126377006e-05, "loss": 0.191, "step": 31196 }, { "epoch": 0.5564334891021296, "grad_norm": 0.19844384491443634, "learning_rate": 2.4437370011923417e-05, "loss": 0.1402, "step": 31197 }, { "epoch": 0.5564513252238433, "grad_norm": 0.3075028955936432, "learning_rate": 2.4435813899650766e-05, "loss": 0.1688, "step": 31198 }, { "epoch": 0.556469161345557, "grad_norm": 0.25540030002593994, "learning_rate": 2.4434257789565106e-05, "loss": 0.1169, "step": 31199 }, { "epoch": 0.5564869974672707, "grad_norm": 0.329569935798645, "learning_rate": 2.4432701681672455e-05, "loss": 0.1199, "step": 31200 }, { "epoch": 0.5565048335889844, "grad_norm": 0.22274599969387054, "learning_rate": 2.443114557597885e-05, "loss": 0.1077, "step": 31201 }, { "epoch": 0.5565226697106981, "grad_norm": 0.2922258973121643, "learning_rate": 2.4429589472490313e-05, "loss": 0.1027, "step": 31202 }, { "epoch": 0.5565405058324118, "grad_norm": 0.20232409238815308, "learning_rate": 2.4428033371212895e-05, "loss": 0.1056, "step": 31203 }, { "epoch": 0.5565583419541255, "grad_norm": 0.4353424608707428, "learning_rate": 2.4426477272152615e-05, "loss": 0.1437, "step": 31204 }, { "epoch": 0.5565761780758391, "grad_norm": 0.42640116810798645, "learning_rate": 2.4424921175315506e-05, "loss": 0.1189, "step": 31205 }, { "epoch": 0.5565940141975528, "grad_norm": 0.2993132472038269, "learning_rate": 2.4423365080707595e-05, "loss": 0.1731, "step": 31206 }, { "epoch": 0.5566118503192666, "grad_norm": 0.2619737386703491, "learning_rate": 2.4421808988334925e-05, "loss": 0.1102, "step": 31207 }, { "epoch": 0.5566296864409803, "grad_norm": 0.2965092658996582, "learning_rate": 2.4420252898203525e-05, "loss": 0.1467, "step": 31208 }, { "epoch": 0.556647522562694, "grad_norm": 0.1895340383052826, "learning_rate": 2.4418696810319425e-05, "loss": 0.1009, "step": 31209 }, { "epoch": 0.5566653586844077, "grad_norm": 0.29556336998939514, "learning_rate": 2.441714072468866e-05, "loss": 0.1726, "step": 31210 }, { "epoch": 0.5566831948061214, "grad_norm": 0.2298395186662674, "learning_rate": 2.4415584641317247e-05, "loss": 0.1851, "step": 31211 }, { "epoch": 0.5567010309278351, "grad_norm": 0.3290596008300781, "learning_rate": 2.4414028560211237e-05, "loss": 0.1886, "step": 31212 }, { "epoch": 0.5567188670495488, "grad_norm": 0.246909499168396, "learning_rate": 2.4412472481376658e-05, "loss": 0.1674, "step": 31213 }, { "epoch": 0.5567367031712624, "grad_norm": 0.22312992811203003, "learning_rate": 2.4410916404819533e-05, "loss": 0.108, "step": 31214 }, { "epoch": 0.5567545392929761, "grad_norm": 0.2371286302804947, "learning_rate": 2.4409360330545894e-05, "loss": 0.2067, "step": 31215 }, { "epoch": 0.5567723754146898, "grad_norm": 0.20558395981788635, "learning_rate": 2.440780425856179e-05, "loss": 0.0875, "step": 31216 }, { "epoch": 0.5567902115364035, "grad_norm": 0.22025008499622345, "learning_rate": 2.440624818887324e-05, "loss": 0.1176, "step": 31217 }, { "epoch": 0.5568080476581172, "grad_norm": 0.29906705021858215, "learning_rate": 2.4404692121486278e-05, "loss": 0.128, "step": 31218 }, { "epoch": 0.5568258837798309, "grad_norm": 0.26025450229644775, "learning_rate": 2.4403136056406924e-05, "loss": 0.0914, "step": 31219 }, { "epoch": 0.5568437199015446, "grad_norm": 0.23228274285793304, "learning_rate": 2.440157999364123e-05, "loss": 0.1302, "step": 31220 }, { "epoch": 0.5568615560232583, "grad_norm": 0.33225101232528687, "learning_rate": 2.4400023933195215e-05, "loss": 0.11, "step": 31221 }, { "epoch": 0.556879392144972, "grad_norm": 0.24343401193618774, "learning_rate": 2.439846787507492e-05, "loss": 0.0844, "step": 31222 }, { "epoch": 0.5568972282666856, "grad_norm": 0.2668202519416809, "learning_rate": 2.439691181928637e-05, "loss": 0.1144, "step": 31223 }, { "epoch": 0.5569150643883994, "grad_norm": 0.4529821574687958, "learning_rate": 2.439535576583559e-05, "loss": 0.146, "step": 31224 }, { "epoch": 0.5569329005101131, "grad_norm": 0.2705513536930084, "learning_rate": 2.4393799714728628e-05, "loss": 0.1577, "step": 31225 }, { "epoch": 0.5569507366318268, "grad_norm": 0.24749356508255005, "learning_rate": 2.439224366597151e-05, "loss": 0.1397, "step": 31226 }, { "epoch": 0.5569685727535405, "grad_norm": 0.2518272399902344, "learning_rate": 2.4390687619570257e-05, "loss": 0.144, "step": 31227 }, { "epoch": 0.5569864088752542, "grad_norm": 0.27374425530433655, "learning_rate": 2.438913157553091e-05, "loss": 0.1283, "step": 31228 }, { "epoch": 0.5570042449969679, "grad_norm": 0.2797556519508362, "learning_rate": 2.4387575533859498e-05, "loss": 0.1165, "step": 31229 }, { "epoch": 0.5570220811186816, "grad_norm": 0.229637011885643, "learning_rate": 2.4386019494562065e-05, "loss": 0.1549, "step": 31230 }, { "epoch": 0.5570399172403953, "grad_norm": 0.2515673339366913, "learning_rate": 2.438446345764463e-05, "loss": 0.1498, "step": 31231 }, { "epoch": 0.5570577533621089, "grad_norm": 0.34347423911094666, "learning_rate": 2.4382907423113217e-05, "loss": 0.0765, "step": 31232 }, { "epoch": 0.5570755894838226, "grad_norm": 0.3063320815563202, "learning_rate": 2.4381351390973877e-05, "loss": 0.1888, "step": 31233 }, { "epoch": 0.5570934256055363, "grad_norm": 0.22773747146129608, "learning_rate": 2.4379795361232636e-05, "loss": 0.1485, "step": 31234 }, { "epoch": 0.55711126172725, "grad_norm": 0.2152549773454666, "learning_rate": 2.4378239333895515e-05, "loss": 0.098, "step": 31235 }, { "epoch": 0.5571290978489637, "grad_norm": 0.23356224596500397, "learning_rate": 2.437668330896856e-05, "loss": 0.1388, "step": 31236 }, { "epoch": 0.5571469339706774, "grad_norm": 0.22563858330249786, "learning_rate": 2.437512728645778e-05, "loss": 0.1139, "step": 31237 }, { "epoch": 0.5571647700923911, "grad_norm": 0.457992285490036, "learning_rate": 2.437357126636924e-05, "loss": 0.1092, "step": 31238 }, { "epoch": 0.5571826062141048, "grad_norm": 0.31593573093414307, "learning_rate": 2.4372015248708952e-05, "loss": 0.1503, "step": 31239 }, { "epoch": 0.5572004423358184, "grad_norm": 0.2338608354330063, "learning_rate": 2.437045923348295e-05, "loss": 0.0909, "step": 31240 }, { "epoch": 0.5572182784575322, "grad_norm": 0.243311807513237, "learning_rate": 2.436890322069725e-05, "loss": 0.1218, "step": 31241 }, { "epoch": 0.5572361145792459, "grad_norm": 0.2441393882036209, "learning_rate": 2.4367347210357907e-05, "loss": 0.1595, "step": 31242 }, { "epoch": 0.5572539507009596, "grad_norm": 0.22858485579490662, "learning_rate": 2.4365791202470953e-05, "loss": 0.1423, "step": 31243 }, { "epoch": 0.5572717868226733, "grad_norm": 0.30238595604896545, "learning_rate": 2.436423519704241e-05, "loss": 0.1768, "step": 31244 }, { "epoch": 0.557289622944387, "grad_norm": 0.2489360272884369, "learning_rate": 2.4362679194078297e-05, "loss": 0.1327, "step": 31245 }, { "epoch": 0.5573074590661007, "grad_norm": 0.3165079951286316, "learning_rate": 2.4361123193584673e-05, "loss": 0.1728, "step": 31246 }, { "epoch": 0.5573252951878144, "grad_norm": 0.26704007387161255, "learning_rate": 2.4359567195567557e-05, "loss": 0.098, "step": 31247 }, { "epoch": 0.5573431313095281, "grad_norm": 0.24416470527648926, "learning_rate": 2.4358011200032973e-05, "loss": 0.1203, "step": 31248 }, { "epoch": 0.5573609674312417, "grad_norm": 0.27771514654159546, "learning_rate": 2.435645520698696e-05, "loss": 0.1227, "step": 31249 }, { "epoch": 0.5573788035529554, "grad_norm": 0.28032785654067993, "learning_rate": 2.435489921643555e-05, "loss": 0.1299, "step": 31250 }, { "epoch": 0.5573966396746691, "grad_norm": 0.2338113933801651, "learning_rate": 2.4353343228384776e-05, "loss": 0.1119, "step": 31251 }, { "epoch": 0.5574144757963828, "grad_norm": 0.21300457417964935, "learning_rate": 2.435178724284067e-05, "loss": 0.1066, "step": 31252 }, { "epoch": 0.5574323119180965, "grad_norm": 0.31338635087013245, "learning_rate": 2.4350231259809258e-05, "loss": 0.1526, "step": 31253 }, { "epoch": 0.5574501480398102, "grad_norm": 0.35606101155281067, "learning_rate": 2.4348675279296566e-05, "loss": 0.1415, "step": 31254 }, { "epoch": 0.5574679841615239, "grad_norm": 0.24667878448963165, "learning_rate": 2.434711930130864e-05, "loss": 0.1318, "step": 31255 }, { "epoch": 0.5574858202832376, "grad_norm": 0.19740404188632965, "learning_rate": 2.4345563325851503e-05, "loss": 0.0701, "step": 31256 }, { "epoch": 0.5575036564049513, "grad_norm": 0.3051196038722992, "learning_rate": 2.4344007352931194e-05, "loss": 0.1237, "step": 31257 }, { "epoch": 0.557521492526665, "grad_norm": 0.23537857830524445, "learning_rate": 2.434245138255373e-05, "loss": 0.1538, "step": 31258 }, { "epoch": 0.5575393286483787, "grad_norm": 0.2209349423646927, "learning_rate": 2.4340895414725158e-05, "loss": 0.1217, "step": 31259 }, { "epoch": 0.5575571647700924, "grad_norm": 0.25894731283187866, "learning_rate": 2.4339339449451504e-05, "loss": 0.1648, "step": 31260 }, { "epoch": 0.5575750008918061, "grad_norm": 0.22068540751934052, "learning_rate": 2.43377834867388e-05, "loss": 0.0931, "step": 31261 }, { "epoch": 0.5575928370135198, "grad_norm": 0.40496519207954407, "learning_rate": 2.4336227526593065e-05, "loss": 0.1509, "step": 31262 }, { "epoch": 0.5576106731352335, "grad_norm": 0.2762773633003235, "learning_rate": 2.4334671569020352e-05, "loss": 0.1244, "step": 31263 }, { "epoch": 0.5576285092569472, "grad_norm": 0.2728966772556305, "learning_rate": 2.4333115614026676e-05, "loss": 0.1939, "step": 31264 }, { "epoch": 0.5576463453786609, "grad_norm": 0.3698302209377289, "learning_rate": 2.433155966161808e-05, "loss": 0.1973, "step": 31265 }, { "epoch": 0.5576641815003746, "grad_norm": 0.26120084524154663, "learning_rate": 2.433000371180059e-05, "loss": 0.089, "step": 31266 }, { "epoch": 0.5576820176220882, "grad_norm": 0.28453847765922546, "learning_rate": 2.4328447764580227e-05, "loss": 0.1935, "step": 31267 }, { "epoch": 0.5576998537438019, "grad_norm": 0.2795289158821106, "learning_rate": 2.432689181996304e-05, "loss": 0.1126, "step": 31268 }, { "epoch": 0.5577176898655156, "grad_norm": 0.2534651458263397, "learning_rate": 2.432533587795505e-05, "loss": 0.1066, "step": 31269 }, { "epoch": 0.5577355259872293, "grad_norm": 0.31744736433029175, "learning_rate": 2.4323779938562292e-05, "loss": 0.1334, "step": 31270 }, { "epoch": 0.557753362108943, "grad_norm": 0.3402377963066101, "learning_rate": 2.432222400179079e-05, "loss": 0.1963, "step": 31271 }, { "epoch": 0.5577711982306567, "grad_norm": 0.25750455260276794, "learning_rate": 2.4320668067646594e-05, "loss": 0.1652, "step": 31272 }, { "epoch": 0.5577890343523704, "grad_norm": 0.2741274833679199, "learning_rate": 2.4319112136135723e-05, "loss": 0.1066, "step": 31273 }, { "epoch": 0.5578068704740841, "grad_norm": 0.25941941142082214, "learning_rate": 2.4317556207264204e-05, "loss": 0.1234, "step": 31274 }, { "epoch": 0.5578247065957979, "grad_norm": 0.25675323605537415, "learning_rate": 2.4316000281038063e-05, "loss": 0.1555, "step": 31275 }, { "epoch": 0.5578425427175115, "grad_norm": 0.26543018221855164, "learning_rate": 2.4314444357463356e-05, "loss": 0.0824, "step": 31276 }, { "epoch": 0.5578603788392252, "grad_norm": 0.3261551856994629, "learning_rate": 2.4312888436546093e-05, "loss": 0.1137, "step": 31277 }, { "epoch": 0.5578782149609389, "grad_norm": 0.4933760166168213, "learning_rate": 2.4311332518292315e-05, "loss": 0.1287, "step": 31278 }, { "epoch": 0.5578960510826526, "grad_norm": 0.2757829427719116, "learning_rate": 2.4309776602708055e-05, "loss": 0.1117, "step": 31279 }, { "epoch": 0.5579138872043663, "grad_norm": 0.27446433901786804, "learning_rate": 2.4308220689799323e-05, "loss": 0.0958, "step": 31280 }, { "epoch": 0.55793172332608, "grad_norm": 0.3416633903980255, "learning_rate": 2.4306664779572177e-05, "loss": 0.1447, "step": 31281 }, { "epoch": 0.5579495594477937, "grad_norm": 0.21480360627174377, "learning_rate": 2.430510887203264e-05, "loss": 0.1091, "step": 31282 }, { "epoch": 0.5579673955695074, "grad_norm": 0.2571437954902649, "learning_rate": 2.4303552967186737e-05, "loss": 0.1308, "step": 31283 }, { "epoch": 0.557985231691221, "grad_norm": 0.33606624603271484, "learning_rate": 2.43019970650405e-05, "loss": 0.1223, "step": 31284 }, { "epoch": 0.5580030678129347, "grad_norm": 0.22661249339580536, "learning_rate": 2.4300441165599967e-05, "loss": 0.0671, "step": 31285 }, { "epoch": 0.5580209039346484, "grad_norm": 0.2513117492198944, "learning_rate": 2.429888526887117e-05, "loss": 0.1551, "step": 31286 }, { "epoch": 0.5580387400563621, "grad_norm": 0.249809131026268, "learning_rate": 2.4297329374860134e-05, "loss": 0.1236, "step": 31287 }, { "epoch": 0.5580565761780758, "grad_norm": 0.2533338963985443, "learning_rate": 2.429577348357288e-05, "loss": 0.1206, "step": 31288 }, { "epoch": 0.5580744122997895, "grad_norm": 0.27693861722946167, "learning_rate": 2.4294217595015463e-05, "loss": 0.1198, "step": 31289 }, { "epoch": 0.5580922484215032, "grad_norm": 0.23216712474822998, "learning_rate": 2.4292661709193906e-05, "loss": 0.16, "step": 31290 }, { "epoch": 0.558110084543217, "grad_norm": 0.25312209129333496, "learning_rate": 2.429110582611423e-05, "loss": 0.1396, "step": 31291 }, { "epoch": 0.5581279206649307, "grad_norm": 0.23938021063804626, "learning_rate": 2.4289549945782473e-05, "loss": 0.1877, "step": 31292 }, { "epoch": 0.5581457567866444, "grad_norm": 0.3467380404472351, "learning_rate": 2.428799406820466e-05, "loss": 0.2085, "step": 31293 }, { "epoch": 0.558163592908358, "grad_norm": 0.2804919183254242, "learning_rate": 2.4286438193386837e-05, "loss": 0.1895, "step": 31294 }, { "epoch": 0.5581814290300717, "grad_norm": 0.3616204857826233, "learning_rate": 2.428488232133503e-05, "loss": 0.0976, "step": 31295 }, { "epoch": 0.5581992651517854, "grad_norm": 0.21757498383522034, "learning_rate": 2.4283326452055256e-05, "loss": 0.0909, "step": 31296 }, { "epoch": 0.5582171012734991, "grad_norm": 0.36979496479034424, "learning_rate": 2.4281770585553558e-05, "loss": 0.1721, "step": 31297 }, { "epoch": 0.5582349373952128, "grad_norm": 0.2841634154319763, "learning_rate": 2.4280214721835965e-05, "loss": 0.1395, "step": 31298 }, { "epoch": 0.5582527735169265, "grad_norm": 0.23487070202827454, "learning_rate": 2.4278658860908513e-05, "loss": 0.1255, "step": 31299 }, { "epoch": 0.5582706096386402, "grad_norm": 0.16677440702915192, "learning_rate": 2.4277103002777228e-05, "loss": 0.0967, "step": 31300 }, { "epoch": 0.5582884457603539, "grad_norm": 0.22451193630695343, "learning_rate": 2.427554714744813e-05, "loss": 0.1233, "step": 31301 }, { "epoch": 0.5583062818820675, "grad_norm": 0.2914694845676422, "learning_rate": 2.4273991294927274e-05, "loss": 0.1579, "step": 31302 }, { "epoch": 0.5583241180037812, "grad_norm": 0.20527410507202148, "learning_rate": 2.4272435445220677e-05, "loss": 0.1005, "step": 31303 }, { "epoch": 0.5583419541254949, "grad_norm": 0.18512022495269775, "learning_rate": 2.427087959833437e-05, "loss": 0.1056, "step": 31304 }, { "epoch": 0.5583597902472086, "grad_norm": 0.24566857516765594, "learning_rate": 2.4269323754274387e-05, "loss": 0.1247, "step": 31305 }, { "epoch": 0.5583776263689223, "grad_norm": 0.25929155945777893, "learning_rate": 2.426776791304675e-05, "loss": 0.1729, "step": 31306 }, { "epoch": 0.558395462490636, "grad_norm": 0.2857249677181244, "learning_rate": 2.4266212074657505e-05, "loss": 0.174, "step": 31307 }, { "epoch": 0.5584132986123498, "grad_norm": 0.2965015769004822, "learning_rate": 2.4264656239112676e-05, "loss": 0.1511, "step": 31308 }, { "epoch": 0.5584311347340635, "grad_norm": 0.29612934589385986, "learning_rate": 2.4263100406418295e-05, "loss": 0.1008, "step": 31309 }, { "epoch": 0.5584489708557772, "grad_norm": 0.33067581057548523, "learning_rate": 2.4261544576580377e-05, "loss": 0.1457, "step": 31310 }, { "epoch": 0.5584668069774908, "grad_norm": 0.21364837884902954, "learning_rate": 2.4259988749604984e-05, "loss": 0.1031, "step": 31311 }, { "epoch": 0.5584846430992045, "grad_norm": 0.35552164912223816, "learning_rate": 2.425843292549812e-05, "loss": 0.163, "step": 31312 }, { "epoch": 0.5585024792209182, "grad_norm": 0.2339756190776825, "learning_rate": 2.425687710426583e-05, "loss": 0.0841, "step": 31313 }, { "epoch": 0.5585203153426319, "grad_norm": 0.4499208331108093, "learning_rate": 2.4255321285914137e-05, "loss": 0.1466, "step": 31314 }, { "epoch": 0.5585381514643456, "grad_norm": 0.29159289598464966, "learning_rate": 2.4253765470449084e-05, "loss": 0.1015, "step": 31315 }, { "epoch": 0.5585559875860593, "grad_norm": 0.22079280018806458, "learning_rate": 2.425220965787669e-05, "loss": 0.1227, "step": 31316 }, { "epoch": 0.558573823707773, "grad_norm": 0.2803367078304291, "learning_rate": 2.4250653848202994e-05, "loss": 0.1132, "step": 31317 }, { "epoch": 0.5585916598294867, "grad_norm": 0.2252812683582306, "learning_rate": 2.4249098041434015e-05, "loss": 0.1458, "step": 31318 }, { "epoch": 0.5586094959512004, "grad_norm": 0.31765860319137573, "learning_rate": 2.4247542237575784e-05, "loss": 0.1242, "step": 31319 }, { "epoch": 0.558627332072914, "grad_norm": 0.23371125757694244, "learning_rate": 2.4245986436634354e-05, "loss": 0.1356, "step": 31320 }, { "epoch": 0.5586451681946277, "grad_norm": 0.21731913089752197, "learning_rate": 2.424443063861574e-05, "loss": 0.1373, "step": 31321 }, { "epoch": 0.5586630043163414, "grad_norm": 0.24007317423820496, "learning_rate": 2.4242874843525973e-05, "loss": 0.1632, "step": 31322 }, { "epoch": 0.5586808404380551, "grad_norm": 0.2753141522407532, "learning_rate": 2.4241319051371074e-05, "loss": 0.0983, "step": 31323 }, { "epoch": 0.5586986765597688, "grad_norm": 0.23878073692321777, "learning_rate": 2.4239763262157094e-05, "loss": 0.13, "step": 31324 }, { "epoch": 0.5587165126814826, "grad_norm": 0.31503576040267944, "learning_rate": 2.4238207475890052e-05, "loss": 0.1439, "step": 31325 }, { "epoch": 0.5587343488031963, "grad_norm": 0.3762138783931732, "learning_rate": 2.4236651692575985e-05, "loss": 0.0864, "step": 31326 }, { "epoch": 0.55875218492491, "grad_norm": 0.25332483649253845, "learning_rate": 2.4235095912220905e-05, "loss": 0.0681, "step": 31327 }, { "epoch": 0.5587700210466237, "grad_norm": 0.27977555990219116, "learning_rate": 2.4233540134830874e-05, "loss": 0.1156, "step": 31328 }, { "epoch": 0.5587878571683373, "grad_norm": 0.19785727560520172, "learning_rate": 2.4231984360411903e-05, "loss": 0.0633, "step": 31329 }, { "epoch": 0.558805693290051, "grad_norm": 0.2541104555130005, "learning_rate": 2.4230428588970026e-05, "loss": 0.1187, "step": 31330 }, { "epoch": 0.5588235294117647, "grad_norm": 0.2536443769931793, "learning_rate": 2.422887282051127e-05, "loss": 0.1072, "step": 31331 }, { "epoch": 0.5588413655334784, "grad_norm": 0.23140481114387512, "learning_rate": 2.422731705504167e-05, "loss": 0.1097, "step": 31332 }, { "epoch": 0.5588592016551921, "grad_norm": 0.26262009143829346, "learning_rate": 2.422576129256725e-05, "loss": 0.1303, "step": 31333 }, { "epoch": 0.5588770377769058, "grad_norm": 0.3466799557209015, "learning_rate": 2.4224205533094058e-05, "loss": 0.1591, "step": 31334 }, { "epoch": 0.5588948738986195, "grad_norm": 0.3482324481010437, "learning_rate": 2.422264977662811e-05, "loss": 0.2553, "step": 31335 }, { "epoch": 0.5589127100203332, "grad_norm": 0.27339667081832886, "learning_rate": 2.422109402317543e-05, "loss": 0.1135, "step": 31336 }, { "epoch": 0.5589305461420468, "grad_norm": 0.23287396132946014, "learning_rate": 2.421953827274207e-05, "loss": 0.1449, "step": 31337 }, { "epoch": 0.5589483822637605, "grad_norm": 0.3708248436450958, "learning_rate": 2.4217982525334047e-05, "loss": 0.1823, "step": 31338 }, { "epoch": 0.5589662183854742, "grad_norm": 0.24411886930465698, "learning_rate": 2.4216426780957393e-05, "loss": 0.1332, "step": 31339 }, { "epoch": 0.5589840545071879, "grad_norm": 0.3579823076725006, "learning_rate": 2.4214871039618138e-05, "loss": 0.1483, "step": 31340 }, { "epoch": 0.5590018906289016, "grad_norm": 0.3297712206840515, "learning_rate": 2.4213315301322313e-05, "loss": 0.1554, "step": 31341 }, { "epoch": 0.5590197267506154, "grad_norm": 0.3117685914039612, "learning_rate": 2.4211759566075953e-05, "loss": 0.1232, "step": 31342 }, { "epoch": 0.5590375628723291, "grad_norm": 0.24371536076068878, "learning_rate": 2.4210203833885087e-05, "loss": 0.1491, "step": 31343 }, { "epoch": 0.5590553989940428, "grad_norm": 0.3649060130119324, "learning_rate": 2.4208648104755745e-05, "loss": 0.141, "step": 31344 }, { "epoch": 0.5590732351157565, "grad_norm": 0.23793701827526093, "learning_rate": 2.4207092378693942e-05, "loss": 0.1024, "step": 31345 }, { "epoch": 0.5590910712374701, "grad_norm": 0.22641438245773315, "learning_rate": 2.4205536655705734e-05, "loss": 0.1109, "step": 31346 }, { "epoch": 0.5591089073591838, "grad_norm": 0.2940894067287445, "learning_rate": 2.420398093579714e-05, "loss": 0.1167, "step": 31347 }, { "epoch": 0.5591267434808975, "grad_norm": 0.2984673082828522, "learning_rate": 2.420242521897419e-05, "loss": 0.1558, "step": 31348 }, { "epoch": 0.5591445796026112, "grad_norm": 0.27737459540367126, "learning_rate": 2.420086950524291e-05, "loss": 0.1169, "step": 31349 }, { "epoch": 0.5591624157243249, "grad_norm": 0.2794012129306793, "learning_rate": 2.4199313794609342e-05, "loss": 0.1392, "step": 31350 }, { "epoch": 0.5591802518460386, "grad_norm": 0.2727005183696747, "learning_rate": 2.4197758087079513e-05, "loss": 0.1399, "step": 31351 }, { "epoch": 0.5591980879677523, "grad_norm": 0.24773304164409637, "learning_rate": 2.4196202382659446e-05, "loss": 0.1556, "step": 31352 }, { "epoch": 0.559215924089466, "grad_norm": 0.2942865490913391, "learning_rate": 2.4194646681355176e-05, "loss": 0.1276, "step": 31353 }, { "epoch": 0.5592337602111797, "grad_norm": 0.34158968925476074, "learning_rate": 2.419309098317273e-05, "loss": 0.1646, "step": 31354 }, { "epoch": 0.5592515963328933, "grad_norm": 0.27357548475265503, "learning_rate": 2.419153528811815e-05, "loss": 0.1754, "step": 31355 }, { "epoch": 0.559269432454607, "grad_norm": 0.2923235595226288, "learning_rate": 2.418997959619746e-05, "loss": 0.1142, "step": 31356 }, { "epoch": 0.5592872685763207, "grad_norm": 0.268451988697052, "learning_rate": 2.418842390741669e-05, "loss": 0.1326, "step": 31357 }, { "epoch": 0.5593051046980344, "grad_norm": 0.20715366303920746, "learning_rate": 2.418686822178185e-05, "loss": 0.1287, "step": 31358 }, { "epoch": 0.5593229408197482, "grad_norm": 0.22980226576328278, "learning_rate": 2.418531253929901e-05, "loss": 0.1162, "step": 31359 }, { "epoch": 0.5593407769414619, "grad_norm": 0.3117606043815613, "learning_rate": 2.418375685997417e-05, "loss": 0.1826, "step": 31360 }, { "epoch": 0.5593586130631756, "grad_norm": 0.24659809470176697, "learning_rate": 2.4182201183813376e-05, "loss": 0.1456, "step": 31361 }, { "epoch": 0.5593764491848893, "grad_norm": 0.21749085187911987, "learning_rate": 2.4180645510822643e-05, "loss": 0.1277, "step": 31362 }, { "epoch": 0.559394285306603, "grad_norm": 0.3348606824874878, "learning_rate": 2.4179089841008022e-05, "loss": 0.1296, "step": 31363 }, { "epoch": 0.5594121214283166, "grad_norm": 0.24284961819648743, "learning_rate": 2.4177534174375532e-05, "loss": 0.1354, "step": 31364 }, { "epoch": 0.5594299575500303, "grad_norm": 0.2766849100589752, "learning_rate": 2.4175978510931202e-05, "loss": 0.1027, "step": 31365 }, { "epoch": 0.559447793671744, "grad_norm": 0.2365037202835083, "learning_rate": 2.4174422850681054e-05, "loss": 0.1203, "step": 31366 }, { "epoch": 0.5594656297934577, "grad_norm": 0.2361527979373932, "learning_rate": 2.4172867193631142e-05, "loss": 0.1325, "step": 31367 }, { "epoch": 0.5594834659151714, "grad_norm": 0.28245800733566284, "learning_rate": 2.4171311539787474e-05, "loss": 0.1538, "step": 31368 }, { "epoch": 0.5595013020368851, "grad_norm": 0.2114911824464798, "learning_rate": 2.4169755889156093e-05, "loss": 0.0824, "step": 31369 }, { "epoch": 0.5595191381585988, "grad_norm": 0.27884936332702637, "learning_rate": 2.4168200241743024e-05, "loss": 0.1024, "step": 31370 }, { "epoch": 0.5595369742803125, "grad_norm": 0.32638683915138245, "learning_rate": 2.416664459755429e-05, "loss": 0.126, "step": 31371 }, { "epoch": 0.5595548104020261, "grad_norm": 0.25983357429504395, "learning_rate": 2.416508895659594e-05, "loss": 0.0625, "step": 31372 }, { "epoch": 0.5595726465237398, "grad_norm": 0.29169797897338867, "learning_rate": 2.416353331887399e-05, "loss": 0.132, "step": 31373 }, { "epoch": 0.5595904826454535, "grad_norm": 0.3536311984062195, "learning_rate": 2.4161977684394477e-05, "loss": 0.1278, "step": 31374 }, { "epoch": 0.5596083187671672, "grad_norm": 0.2661953866481781, "learning_rate": 2.4160422053163417e-05, "loss": 0.1349, "step": 31375 }, { "epoch": 0.559626154888881, "grad_norm": 0.32716691493988037, "learning_rate": 2.415886642518686e-05, "loss": 0.1213, "step": 31376 }, { "epoch": 0.5596439910105947, "grad_norm": 0.2951219379901886, "learning_rate": 2.4157310800470832e-05, "loss": 0.154, "step": 31377 }, { "epoch": 0.5596618271323084, "grad_norm": 0.3086981773376465, "learning_rate": 2.4155755179021356e-05, "loss": 0.1987, "step": 31378 }, { "epoch": 0.5596796632540221, "grad_norm": 0.2586064040660858, "learning_rate": 2.4154199560844455e-05, "loss": 0.1098, "step": 31379 }, { "epoch": 0.5596974993757358, "grad_norm": 0.2792659401893616, "learning_rate": 2.4152643945946173e-05, "loss": 0.1649, "step": 31380 }, { "epoch": 0.5597153354974495, "grad_norm": 0.27620840072631836, "learning_rate": 2.4151088334332538e-05, "loss": 0.207, "step": 31381 }, { "epoch": 0.5597331716191631, "grad_norm": 0.27956655621528625, "learning_rate": 2.414953272600958e-05, "loss": 0.1389, "step": 31382 }, { "epoch": 0.5597510077408768, "grad_norm": 0.39406895637512207, "learning_rate": 2.414797712098333e-05, "loss": 0.1383, "step": 31383 }, { "epoch": 0.5597688438625905, "grad_norm": 0.2809605002403259, "learning_rate": 2.41464215192598e-05, "loss": 0.1417, "step": 31384 }, { "epoch": 0.5597866799843042, "grad_norm": 0.32095375657081604, "learning_rate": 2.4144865920845046e-05, "loss": 0.1161, "step": 31385 }, { "epoch": 0.5598045161060179, "grad_norm": 0.22089307010173798, "learning_rate": 2.4143310325745087e-05, "loss": 0.1239, "step": 31386 }, { "epoch": 0.5598223522277316, "grad_norm": 0.25564903020858765, "learning_rate": 2.4141754733965952e-05, "loss": 0.0696, "step": 31387 }, { "epoch": 0.5598401883494453, "grad_norm": 0.2967927157878876, "learning_rate": 2.4140199145513667e-05, "loss": 0.189, "step": 31388 }, { "epoch": 0.559858024471159, "grad_norm": 0.33322104811668396, "learning_rate": 2.413864356039427e-05, "loss": 0.1818, "step": 31389 }, { "epoch": 0.5598758605928726, "grad_norm": 0.24041935801506042, "learning_rate": 2.4137087978613794e-05, "loss": 0.1629, "step": 31390 }, { "epoch": 0.5598936967145863, "grad_norm": 0.23456811904907227, "learning_rate": 2.4135532400178263e-05, "loss": 0.1234, "step": 31391 }, { "epoch": 0.5599115328363001, "grad_norm": 0.2907894253730774, "learning_rate": 2.4133976825093693e-05, "loss": 0.1741, "step": 31392 }, { "epoch": 0.5599293689580138, "grad_norm": 0.22135528922080994, "learning_rate": 2.4132421253366142e-05, "loss": 0.1577, "step": 31393 }, { "epoch": 0.5599472050797275, "grad_norm": 0.33190566301345825, "learning_rate": 2.4130865685001623e-05, "loss": 0.1944, "step": 31394 }, { "epoch": 0.5599650412014412, "grad_norm": 0.28703323006629944, "learning_rate": 2.4129310120006165e-05, "loss": 0.1479, "step": 31395 }, { "epoch": 0.5599828773231549, "grad_norm": 0.28238335251808167, "learning_rate": 2.4127754558385807e-05, "loss": 0.1392, "step": 31396 }, { "epoch": 0.5600007134448686, "grad_norm": 0.25788432359695435, "learning_rate": 2.4126199000146564e-05, "loss": 0.1412, "step": 31397 }, { "epoch": 0.5600185495665823, "grad_norm": 0.21524713933467865, "learning_rate": 2.4124643445294484e-05, "loss": 0.1134, "step": 31398 }, { "epoch": 0.560036385688296, "grad_norm": 0.22623580694198608, "learning_rate": 2.4123087893835587e-05, "loss": 0.1156, "step": 31399 }, { "epoch": 0.5600542218100096, "grad_norm": 0.27302345633506775, "learning_rate": 2.412153234577591e-05, "loss": 0.1778, "step": 31400 }, { "epoch": 0.5600720579317233, "grad_norm": 0.3543337881565094, "learning_rate": 2.4119976801121465e-05, "loss": 0.1561, "step": 31401 }, { "epoch": 0.560089894053437, "grad_norm": 0.3550248444080353, "learning_rate": 2.4118421259878294e-05, "loss": 0.1934, "step": 31402 }, { "epoch": 0.5601077301751507, "grad_norm": 0.17590175569057465, "learning_rate": 2.4116865722052433e-05, "loss": 0.1129, "step": 31403 }, { "epoch": 0.5601255662968644, "grad_norm": 0.3290773630142212, "learning_rate": 2.4115310187649907e-05, "loss": 0.1012, "step": 31404 }, { "epoch": 0.5601434024185781, "grad_norm": 0.4822506308555603, "learning_rate": 2.4113754656676736e-05, "loss": 0.1741, "step": 31405 }, { "epoch": 0.5601612385402918, "grad_norm": 0.2730311155319214, "learning_rate": 2.4112199129138968e-05, "loss": 0.145, "step": 31406 }, { "epoch": 0.5601790746620054, "grad_norm": 0.30690997838974, "learning_rate": 2.411064360504262e-05, "loss": 0.1346, "step": 31407 }, { "epoch": 0.5601969107837191, "grad_norm": 0.2378779798746109, "learning_rate": 2.4109088084393724e-05, "loss": 0.125, "step": 31408 }, { "epoch": 0.5602147469054329, "grad_norm": 0.32127776741981506, "learning_rate": 2.4107532567198313e-05, "loss": 0.1233, "step": 31409 }, { "epoch": 0.5602325830271466, "grad_norm": 0.2980946898460388, "learning_rate": 2.4105977053462402e-05, "loss": 0.1342, "step": 31410 }, { "epoch": 0.5602504191488603, "grad_norm": 0.1978059560060501, "learning_rate": 2.4104421543192044e-05, "loss": 0.1377, "step": 31411 }, { "epoch": 0.560268255270574, "grad_norm": 0.2657274901866913, "learning_rate": 2.4102866036393258e-05, "loss": 0.1315, "step": 31412 }, { "epoch": 0.5602860913922877, "grad_norm": 0.43789440393447876, "learning_rate": 2.4101310533072074e-05, "loss": 0.1327, "step": 31413 }, { "epoch": 0.5603039275140014, "grad_norm": 0.32028093934059143, "learning_rate": 2.409975503323451e-05, "loss": 0.1276, "step": 31414 }, { "epoch": 0.5603217636357151, "grad_norm": 0.2650982141494751, "learning_rate": 2.4098199536886616e-05, "loss": 0.1556, "step": 31415 }, { "epoch": 0.5603395997574288, "grad_norm": 0.29290321469306946, "learning_rate": 2.4096644044034406e-05, "loss": 0.1159, "step": 31416 }, { "epoch": 0.5603574358791424, "grad_norm": 0.26071709394454956, "learning_rate": 2.409508855468392e-05, "loss": 0.0988, "step": 31417 }, { "epoch": 0.5603752720008561, "grad_norm": 0.24011465907096863, "learning_rate": 2.4093533068841176e-05, "loss": 0.1082, "step": 31418 }, { "epoch": 0.5603931081225698, "grad_norm": 0.21061640977859497, "learning_rate": 2.4091977586512223e-05, "loss": 0.117, "step": 31419 }, { "epoch": 0.5604109442442835, "grad_norm": 0.3192479610443115, "learning_rate": 2.4090422107703074e-05, "loss": 0.1421, "step": 31420 }, { "epoch": 0.5604287803659972, "grad_norm": 0.24465888738632202, "learning_rate": 2.408886663241976e-05, "loss": 0.1264, "step": 31421 }, { "epoch": 0.5604466164877109, "grad_norm": 0.22927770018577576, "learning_rate": 2.4087311160668315e-05, "loss": 0.1111, "step": 31422 }, { "epoch": 0.5604644526094246, "grad_norm": 0.36364415287971497, "learning_rate": 2.4085755692454763e-05, "loss": 0.1195, "step": 31423 }, { "epoch": 0.5604822887311383, "grad_norm": 0.20706453919410706, "learning_rate": 2.4084200227785143e-05, "loss": 0.1032, "step": 31424 }, { "epoch": 0.5605001248528519, "grad_norm": 0.2741907238960266, "learning_rate": 2.408264476666548e-05, "loss": 0.1141, "step": 31425 }, { "epoch": 0.5605179609745657, "grad_norm": 0.23388290405273438, "learning_rate": 2.4081089309101806e-05, "loss": 0.1257, "step": 31426 }, { "epoch": 0.5605357970962794, "grad_norm": 0.3187490403652191, "learning_rate": 2.4079533855100132e-05, "loss": 0.1578, "step": 31427 }, { "epoch": 0.5605536332179931, "grad_norm": 0.28279832005500793, "learning_rate": 2.4077978404666516e-05, "loss": 0.1487, "step": 31428 }, { "epoch": 0.5605714693397068, "grad_norm": 0.313147634267807, "learning_rate": 2.407642295780697e-05, "loss": 0.1286, "step": 31429 }, { "epoch": 0.5605893054614205, "grad_norm": 0.31667324900627136, "learning_rate": 2.4074867514527533e-05, "loss": 0.0985, "step": 31430 }, { "epoch": 0.5606071415831342, "grad_norm": 0.24084815382957458, "learning_rate": 2.4073312074834217e-05, "loss": 0.0968, "step": 31431 }, { "epoch": 0.5606249777048479, "grad_norm": 0.2430635690689087, "learning_rate": 2.4071756638733077e-05, "loss": 0.1148, "step": 31432 }, { "epoch": 0.5606428138265616, "grad_norm": 0.3483937978744507, "learning_rate": 2.4070201206230125e-05, "loss": 0.14, "step": 31433 }, { "epoch": 0.5606606499482752, "grad_norm": 0.3065212368965149, "learning_rate": 2.4068645777331394e-05, "loss": 0.2544, "step": 31434 }, { "epoch": 0.5606784860699889, "grad_norm": 0.28280800580978394, "learning_rate": 2.406709035204291e-05, "loss": 0.1354, "step": 31435 }, { "epoch": 0.5606963221917026, "grad_norm": 0.5980995297431946, "learning_rate": 2.4065534930370704e-05, "loss": 0.1478, "step": 31436 }, { "epoch": 0.5607141583134163, "grad_norm": 0.2971732020378113, "learning_rate": 2.4063979512320812e-05, "loss": 0.1532, "step": 31437 }, { "epoch": 0.56073199443513, "grad_norm": 0.30833563208580017, "learning_rate": 2.4062424097899263e-05, "loss": 0.1498, "step": 31438 }, { "epoch": 0.5607498305568437, "grad_norm": 0.37537726759910583, "learning_rate": 2.4060868687112078e-05, "loss": 0.2028, "step": 31439 }, { "epoch": 0.5607676666785574, "grad_norm": 0.2662407457828522, "learning_rate": 2.405931327996528e-05, "loss": 0.1564, "step": 31440 }, { "epoch": 0.5607855028002711, "grad_norm": 0.25998353958129883, "learning_rate": 2.4057757876464924e-05, "loss": 0.188, "step": 31441 }, { "epoch": 0.5608033389219848, "grad_norm": 0.23177604377269745, "learning_rate": 2.405620247661702e-05, "loss": 0.1339, "step": 31442 }, { "epoch": 0.5608211750436985, "grad_norm": 0.31647083163261414, "learning_rate": 2.4054647080427598e-05, "loss": 0.1516, "step": 31443 }, { "epoch": 0.5608390111654122, "grad_norm": 0.38324934244155884, "learning_rate": 2.4053091687902687e-05, "loss": 0.1683, "step": 31444 }, { "epoch": 0.5608568472871259, "grad_norm": 0.18483775854110718, "learning_rate": 2.4051536299048323e-05, "loss": 0.1005, "step": 31445 }, { "epoch": 0.5608746834088396, "grad_norm": 0.2281084954738617, "learning_rate": 2.404998091387054e-05, "loss": 0.102, "step": 31446 }, { "epoch": 0.5608925195305533, "grad_norm": 0.2561064064502716, "learning_rate": 2.4048425532375352e-05, "loss": 0.1334, "step": 31447 }, { "epoch": 0.560910355652267, "grad_norm": 0.3684125542640686, "learning_rate": 2.4046870154568798e-05, "loss": 0.1451, "step": 31448 }, { "epoch": 0.5609281917739807, "grad_norm": 0.29495108127593994, "learning_rate": 2.4045314780456896e-05, "loss": 0.2184, "step": 31449 }, { "epoch": 0.5609460278956944, "grad_norm": 0.26864001154899597, "learning_rate": 2.4043759410045688e-05, "loss": 0.1198, "step": 31450 }, { "epoch": 0.560963864017408, "grad_norm": 0.28136900067329407, "learning_rate": 2.40422040433412e-05, "loss": 0.1467, "step": 31451 }, { "epoch": 0.5609817001391217, "grad_norm": 0.2807161808013916, "learning_rate": 2.4040648680349467e-05, "loss": 0.1331, "step": 31452 }, { "epoch": 0.5609995362608354, "grad_norm": 0.1994100958108902, "learning_rate": 2.4039093321076494e-05, "loss": 0.1138, "step": 31453 }, { "epoch": 0.5610173723825491, "grad_norm": 0.22997941076755524, "learning_rate": 2.4037537965528343e-05, "loss": 0.1614, "step": 31454 }, { "epoch": 0.5610352085042628, "grad_norm": 0.23174940049648285, "learning_rate": 2.4035982613711026e-05, "loss": 0.1259, "step": 31455 }, { "epoch": 0.5610530446259765, "grad_norm": 0.3028358221054077, "learning_rate": 2.4034427265630564e-05, "loss": 0.0863, "step": 31456 }, { "epoch": 0.5610708807476902, "grad_norm": 0.31821393966674805, "learning_rate": 2.4032871921293002e-05, "loss": 0.1826, "step": 31457 }, { "epoch": 0.5610887168694039, "grad_norm": 0.2550991475582123, "learning_rate": 2.4031316580704357e-05, "loss": 0.0723, "step": 31458 }, { "epoch": 0.5611065529911176, "grad_norm": 0.29452019929885864, "learning_rate": 2.402976124387067e-05, "loss": 0.1164, "step": 31459 }, { "epoch": 0.5611243891128314, "grad_norm": 0.28820693492889404, "learning_rate": 2.4028205910797965e-05, "loss": 0.098, "step": 31460 }, { "epoch": 0.561142225234545, "grad_norm": 0.22527167201042175, "learning_rate": 2.402665058149227e-05, "loss": 0.1627, "step": 31461 }, { "epoch": 0.5611600613562587, "grad_norm": 0.29793596267700195, "learning_rate": 2.4025095255959602e-05, "loss": 0.1407, "step": 31462 }, { "epoch": 0.5611778974779724, "grad_norm": 0.2924523651599884, "learning_rate": 2.4023539934206012e-05, "loss": 0.1459, "step": 31463 }, { "epoch": 0.5611957335996861, "grad_norm": 0.23874494433403015, "learning_rate": 2.402198461623751e-05, "loss": 0.1313, "step": 31464 }, { "epoch": 0.5612135697213998, "grad_norm": 0.35290271043777466, "learning_rate": 2.4020429302060142e-05, "loss": 0.1292, "step": 31465 }, { "epoch": 0.5612314058431135, "grad_norm": 0.29797959327697754, "learning_rate": 2.4018873991679917e-05, "loss": 0.1872, "step": 31466 }, { "epoch": 0.5612492419648272, "grad_norm": 0.28676173090934753, "learning_rate": 2.4017318685102888e-05, "loss": 0.1154, "step": 31467 }, { "epoch": 0.5612670780865409, "grad_norm": 0.26434823870658875, "learning_rate": 2.401576338233507e-05, "loss": 0.1503, "step": 31468 }, { "epoch": 0.5612849142082545, "grad_norm": 0.3844640254974365, "learning_rate": 2.4014208083382496e-05, "loss": 0.1533, "step": 31469 }, { "epoch": 0.5613027503299682, "grad_norm": 0.28696200251579285, "learning_rate": 2.4012652788251175e-05, "loss": 0.2161, "step": 31470 }, { "epoch": 0.5613205864516819, "grad_norm": 0.2041654735803604, "learning_rate": 2.4011097496947166e-05, "loss": 0.1288, "step": 31471 }, { "epoch": 0.5613384225733956, "grad_norm": 0.26276299357414246, "learning_rate": 2.400954220947648e-05, "loss": 0.1454, "step": 31472 }, { "epoch": 0.5613562586951093, "grad_norm": 0.25883156061172485, "learning_rate": 2.4007986925845154e-05, "loss": 0.1309, "step": 31473 }, { "epoch": 0.561374094816823, "grad_norm": 0.33456939458847046, "learning_rate": 2.4006431646059218e-05, "loss": 0.1275, "step": 31474 }, { "epoch": 0.5613919309385367, "grad_norm": 0.23295243084430695, "learning_rate": 2.4004876370124682e-05, "loss": 0.1196, "step": 31475 }, { "epoch": 0.5614097670602504, "grad_norm": 0.2399265170097351, "learning_rate": 2.40033210980476e-05, "loss": 0.085, "step": 31476 }, { "epoch": 0.5614276031819642, "grad_norm": 0.23919454216957092, "learning_rate": 2.400176582983399e-05, "loss": 0.1595, "step": 31477 }, { "epoch": 0.5614454393036779, "grad_norm": 0.33691704273223877, "learning_rate": 2.4000210565489877e-05, "loss": 0.145, "step": 31478 }, { "epoch": 0.5614632754253915, "grad_norm": 0.23280148208141327, "learning_rate": 2.3998655305021282e-05, "loss": 0.1136, "step": 31479 }, { "epoch": 0.5614811115471052, "grad_norm": 0.26939666271209717, "learning_rate": 2.399710004843426e-05, "loss": 0.1611, "step": 31480 }, { "epoch": 0.5614989476688189, "grad_norm": 0.2814178466796875, "learning_rate": 2.3995544795734824e-05, "loss": 0.1318, "step": 31481 }, { "epoch": 0.5615167837905326, "grad_norm": 0.25734108686447144, "learning_rate": 2.3993989546929002e-05, "loss": 0.1375, "step": 31482 }, { "epoch": 0.5615346199122463, "grad_norm": 0.248263418674469, "learning_rate": 2.3992434302022815e-05, "loss": 0.1053, "step": 31483 }, { "epoch": 0.56155245603396, "grad_norm": 0.25484323501586914, "learning_rate": 2.399087906102231e-05, "loss": 0.1065, "step": 31484 }, { "epoch": 0.5615702921556737, "grad_norm": 0.29617932438850403, "learning_rate": 2.3989323823933502e-05, "loss": 0.1417, "step": 31485 }, { "epoch": 0.5615881282773874, "grad_norm": 0.25645482540130615, "learning_rate": 2.3987768590762428e-05, "loss": 0.1698, "step": 31486 }, { "epoch": 0.561605964399101, "grad_norm": 0.24056769907474518, "learning_rate": 2.3986213361515115e-05, "loss": 0.1102, "step": 31487 }, { "epoch": 0.5616238005208147, "grad_norm": 0.2023090273141861, "learning_rate": 2.3984658136197575e-05, "loss": 0.1153, "step": 31488 }, { "epoch": 0.5616416366425284, "grad_norm": 0.41713064908981323, "learning_rate": 2.3983102914815864e-05, "loss": 0.1225, "step": 31489 }, { "epoch": 0.5616594727642421, "grad_norm": 0.2384224534034729, "learning_rate": 2.3981547697375997e-05, "loss": 0.0912, "step": 31490 }, { "epoch": 0.5616773088859558, "grad_norm": 0.2911020815372467, "learning_rate": 2.3979992483884e-05, "loss": 0.1037, "step": 31491 }, { "epoch": 0.5616951450076695, "grad_norm": 0.26076075434684753, "learning_rate": 2.39784372743459e-05, "loss": 0.1607, "step": 31492 }, { "epoch": 0.5617129811293833, "grad_norm": 0.19151878356933594, "learning_rate": 2.397688206876773e-05, "loss": 0.1202, "step": 31493 }, { "epoch": 0.561730817251097, "grad_norm": 0.3104279935359955, "learning_rate": 2.3975326867155525e-05, "loss": 0.1364, "step": 31494 }, { "epoch": 0.5617486533728107, "grad_norm": 0.2121114730834961, "learning_rate": 2.3973771669515307e-05, "loss": 0.1585, "step": 31495 }, { "epoch": 0.5617664894945243, "grad_norm": 0.20801205933094025, "learning_rate": 2.3972216475853092e-05, "loss": 0.0916, "step": 31496 }, { "epoch": 0.561784325616238, "grad_norm": 0.29379209876060486, "learning_rate": 2.3970661286174937e-05, "loss": 0.1195, "step": 31497 }, { "epoch": 0.5618021617379517, "grad_norm": 0.286279559135437, "learning_rate": 2.3969106100486853e-05, "loss": 0.1321, "step": 31498 }, { "epoch": 0.5618199978596654, "grad_norm": 0.2575676739215851, "learning_rate": 2.396755091879486e-05, "loss": 0.0872, "step": 31499 }, { "epoch": 0.5618378339813791, "grad_norm": 0.36126649379730225, "learning_rate": 2.3965995741105006e-05, "loss": 0.1446, "step": 31500 }, { "epoch": 0.5618556701030928, "grad_norm": 0.27827003598213196, "learning_rate": 2.3964440567423295e-05, "loss": 0.1698, "step": 31501 }, { "epoch": 0.5618735062248065, "grad_norm": 0.23590479791164398, "learning_rate": 2.3962885397755782e-05, "loss": 0.1479, "step": 31502 }, { "epoch": 0.5618913423465202, "grad_norm": 0.2628158926963806, "learning_rate": 2.3961330232108487e-05, "loss": 0.14, "step": 31503 }, { "epoch": 0.5619091784682338, "grad_norm": 0.4789707362651825, "learning_rate": 2.395977507048743e-05, "loss": 0.2047, "step": 31504 }, { "epoch": 0.5619270145899475, "grad_norm": 0.2957553565502167, "learning_rate": 2.3958219912898635e-05, "loss": 0.1629, "step": 31505 }, { "epoch": 0.5619448507116612, "grad_norm": 0.23507602512836456, "learning_rate": 2.3956664759348146e-05, "loss": 0.1538, "step": 31506 }, { "epoch": 0.5619626868333749, "grad_norm": 0.2927339971065521, "learning_rate": 2.395510960984199e-05, "loss": 0.1163, "step": 31507 }, { "epoch": 0.5619805229550886, "grad_norm": 0.17802639305591583, "learning_rate": 2.3953554464386187e-05, "loss": 0.126, "step": 31508 }, { "epoch": 0.5619983590768023, "grad_norm": 0.2092919945716858, "learning_rate": 2.3951999322986762e-05, "loss": 0.1244, "step": 31509 }, { "epoch": 0.5620161951985161, "grad_norm": 0.44034096598625183, "learning_rate": 2.3950444185649758e-05, "loss": 0.1617, "step": 31510 }, { "epoch": 0.5620340313202298, "grad_norm": 0.29422372579574585, "learning_rate": 2.3948889052381195e-05, "loss": 0.12, "step": 31511 }, { "epoch": 0.5620518674419435, "grad_norm": 0.2691257894039154, "learning_rate": 2.3947333923187095e-05, "loss": 0.0886, "step": 31512 }, { "epoch": 0.5620697035636572, "grad_norm": 0.2979081869125366, "learning_rate": 2.39457787980735e-05, "loss": 0.0941, "step": 31513 }, { "epoch": 0.5620875396853708, "grad_norm": 0.33796852827072144, "learning_rate": 2.3944223677046418e-05, "loss": 0.12, "step": 31514 }, { "epoch": 0.5621053758070845, "grad_norm": 0.26629310846328735, "learning_rate": 2.39426685601119e-05, "loss": 0.1534, "step": 31515 }, { "epoch": 0.5621232119287982, "grad_norm": 0.25867897272109985, "learning_rate": 2.3941113447275964e-05, "loss": 0.1296, "step": 31516 }, { "epoch": 0.5621410480505119, "grad_norm": 0.2649631202220917, "learning_rate": 2.393955833854464e-05, "loss": 0.1467, "step": 31517 }, { "epoch": 0.5621588841722256, "grad_norm": 0.28785979747772217, "learning_rate": 2.3938003233923942e-05, "loss": 0.1556, "step": 31518 }, { "epoch": 0.5621767202939393, "grad_norm": 0.34189853072166443, "learning_rate": 2.393644813341992e-05, "loss": 0.2038, "step": 31519 }, { "epoch": 0.562194556415653, "grad_norm": 0.33835071325302124, "learning_rate": 2.393489303703859e-05, "loss": 0.1185, "step": 31520 }, { "epoch": 0.5622123925373667, "grad_norm": 0.2385687381029129, "learning_rate": 2.3933337944785982e-05, "loss": 0.1691, "step": 31521 }, { "epoch": 0.5622302286590803, "grad_norm": 0.18524384498596191, "learning_rate": 2.393178285666812e-05, "loss": 0.0813, "step": 31522 }, { "epoch": 0.562248064780794, "grad_norm": 0.19806908071041107, "learning_rate": 2.3930227772691045e-05, "loss": 0.1207, "step": 31523 }, { "epoch": 0.5622659009025077, "grad_norm": 0.32618603110313416, "learning_rate": 2.3928672692860777e-05, "loss": 0.1575, "step": 31524 }, { "epoch": 0.5622837370242214, "grad_norm": 0.3744017779827118, "learning_rate": 2.3927117617183344e-05, "loss": 0.1242, "step": 31525 }, { "epoch": 0.5623015731459351, "grad_norm": 0.2930123507976532, "learning_rate": 2.3925562545664763e-05, "loss": 0.1431, "step": 31526 }, { "epoch": 0.5623194092676489, "grad_norm": 0.24760590493679047, "learning_rate": 2.3924007478311075e-05, "loss": 0.1592, "step": 31527 }, { "epoch": 0.5623372453893626, "grad_norm": 0.22133322060108185, "learning_rate": 2.3922452415128312e-05, "loss": 0.1122, "step": 31528 }, { "epoch": 0.5623550815110763, "grad_norm": 0.34430113434791565, "learning_rate": 2.39208973561225e-05, "loss": 0.1653, "step": 31529 }, { "epoch": 0.56237291763279, "grad_norm": 0.23589089512825012, "learning_rate": 2.3919342301299656e-05, "loss": 0.1209, "step": 31530 }, { "epoch": 0.5623907537545036, "grad_norm": 0.21339066326618195, "learning_rate": 2.391778725066581e-05, "loss": 0.149, "step": 31531 }, { "epoch": 0.5624085898762173, "grad_norm": 0.3414429724216461, "learning_rate": 2.3916232204227e-05, "loss": 0.1262, "step": 31532 }, { "epoch": 0.562426425997931, "grad_norm": 0.2576408088207245, "learning_rate": 2.3914677161989248e-05, "loss": 0.1664, "step": 31533 }, { "epoch": 0.5624442621196447, "grad_norm": 0.32501357793807983, "learning_rate": 2.3913122123958583e-05, "loss": 0.1715, "step": 31534 }, { "epoch": 0.5624620982413584, "grad_norm": 0.22479450702667236, "learning_rate": 2.3911567090141022e-05, "loss": 0.0836, "step": 31535 }, { "epoch": 0.5624799343630721, "grad_norm": 0.20873993635177612, "learning_rate": 2.3910012060542618e-05, "loss": 0.1213, "step": 31536 }, { "epoch": 0.5624977704847858, "grad_norm": 0.23867380619049072, "learning_rate": 2.390845703516938e-05, "loss": 0.1517, "step": 31537 }, { "epoch": 0.5625156066064995, "grad_norm": 0.25853532552719116, "learning_rate": 2.390690201402734e-05, "loss": 0.1644, "step": 31538 }, { "epoch": 0.5625334427282132, "grad_norm": 0.22269342839717865, "learning_rate": 2.390534699712252e-05, "loss": 0.0977, "step": 31539 }, { "epoch": 0.5625512788499268, "grad_norm": 0.2442581206560135, "learning_rate": 2.390379198446095e-05, "loss": 0.1287, "step": 31540 }, { "epoch": 0.5625691149716405, "grad_norm": 0.21370096504688263, "learning_rate": 2.3902236976048664e-05, "loss": 0.1322, "step": 31541 }, { "epoch": 0.5625869510933542, "grad_norm": 0.25343915820121765, "learning_rate": 2.3900681971891693e-05, "loss": 0.1053, "step": 31542 }, { "epoch": 0.5626047872150679, "grad_norm": 0.1930512934923172, "learning_rate": 2.3899126971996058e-05, "loss": 0.1081, "step": 31543 }, { "epoch": 0.5626226233367817, "grad_norm": 0.2629535496234894, "learning_rate": 2.3897571976367774e-05, "loss": 0.1135, "step": 31544 }, { "epoch": 0.5626404594584954, "grad_norm": 0.2897069752216339, "learning_rate": 2.3896016985012897e-05, "loss": 0.1237, "step": 31545 }, { "epoch": 0.5626582955802091, "grad_norm": 0.25566011667251587, "learning_rate": 2.3894461997937433e-05, "loss": 0.1522, "step": 31546 }, { "epoch": 0.5626761317019228, "grad_norm": 0.35920479893684387, "learning_rate": 2.3892907015147418e-05, "loss": 0.1527, "step": 31547 }, { "epoch": 0.5626939678236365, "grad_norm": 0.21773487329483032, "learning_rate": 2.3891352036648874e-05, "loss": 0.0874, "step": 31548 }, { "epoch": 0.5627118039453501, "grad_norm": 0.36515769362449646, "learning_rate": 2.388979706244783e-05, "loss": 0.1415, "step": 31549 }, { "epoch": 0.5627296400670638, "grad_norm": 0.24259336292743683, "learning_rate": 2.3888242092550324e-05, "loss": 0.1734, "step": 31550 }, { "epoch": 0.5627474761887775, "grad_norm": 0.2675774395465851, "learning_rate": 2.3886687126962376e-05, "loss": 0.1233, "step": 31551 }, { "epoch": 0.5627653123104912, "grad_norm": 0.21952860057353973, "learning_rate": 2.3885132165690012e-05, "loss": 0.1465, "step": 31552 }, { "epoch": 0.5627831484322049, "grad_norm": 0.2538287341594696, "learning_rate": 2.3883577208739252e-05, "loss": 0.1548, "step": 31553 }, { "epoch": 0.5628009845539186, "grad_norm": 0.23038217425346375, "learning_rate": 2.3882022256116142e-05, "loss": 0.1288, "step": 31554 }, { "epoch": 0.5628188206756323, "grad_norm": 0.3215599060058594, "learning_rate": 2.3880467307826697e-05, "loss": 0.1397, "step": 31555 }, { "epoch": 0.562836656797346, "grad_norm": 0.24070176482200623, "learning_rate": 2.3878912363876947e-05, "loss": 0.1671, "step": 31556 }, { "epoch": 0.5628544929190596, "grad_norm": 0.33674654364585876, "learning_rate": 2.3877357424272913e-05, "loss": 0.1607, "step": 31557 }, { "epoch": 0.5628723290407733, "grad_norm": 0.27031153440475464, "learning_rate": 2.387580248902064e-05, "loss": 0.1013, "step": 31558 }, { "epoch": 0.562890165162487, "grad_norm": 0.22360815107822418, "learning_rate": 2.3874247558126144e-05, "loss": 0.1084, "step": 31559 }, { "epoch": 0.5629080012842007, "grad_norm": 0.28280171751976013, "learning_rate": 2.3872692631595447e-05, "loss": 0.158, "step": 31560 }, { "epoch": 0.5629258374059145, "grad_norm": 0.2862938344478607, "learning_rate": 2.3871137709434587e-05, "loss": 0.1525, "step": 31561 }, { "epoch": 0.5629436735276282, "grad_norm": 0.21458853781223297, "learning_rate": 2.3869582791649583e-05, "loss": 0.1382, "step": 31562 }, { "epoch": 0.5629615096493419, "grad_norm": 0.26677051186561584, "learning_rate": 2.3868027878246474e-05, "loss": 0.1548, "step": 31563 }, { "epoch": 0.5629793457710556, "grad_norm": 0.21544599533081055, "learning_rate": 2.3866472969231276e-05, "loss": 0.1286, "step": 31564 }, { "epoch": 0.5629971818927693, "grad_norm": 0.30368542671203613, "learning_rate": 2.3864918064610025e-05, "loss": 0.1498, "step": 31565 }, { "epoch": 0.563015018014483, "grad_norm": 0.26172858476638794, "learning_rate": 2.386336316438873e-05, "loss": 0.1019, "step": 31566 }, { "epoch": 0.5630328541361966, "grad_norm": 0.25865015387535095, "learning_rate": 2.3861808268573447e-05, "loss": 0.1318, "step": 31567 }, { "epoch": 0.5630506902579103, "grad_norm": 0.28514692187309265, "learning_rate": 2.386025337717018e-05, "loss": 0.1161, "step": 31568 }, { "epoch": 0.563068526379624, "grad_norm": 0.29225876927375793, "learning_rate": 2.3858698490184967e-05, "loss": 0.1199, "step": 31569 }, { "epoch": 0.5630863625013377, "grad_norm": 0.3115609884262085, "learning_rate": 2.3857143607623825e-05, "loss": 0.1212, "step": 31570 }, { "epoch": 0.5631041986230514, "grad_norm": 0.288664847612381, "learning_rate": 2.38555887294928e-05, "loss": 0.169, "step": 31571 }, { "epoch": 0.5631220347447651, "grad_norm": 0.26004230976104736, "learning_rate": 2.3854033855797908e-05, "loss": 0.0906, "step": 31572 }, { "epoch": 0.5631398708664788, "grad_norm": 0.3064267039299011, "learning_rate": 2.3852478986545178e-05, "loss": 0.1581, "step": 31573 }, { "epoch": 0.5631577069881925, "grad_norm": 0.3240392208099365, "learning_rate": 2.385092412174062e-05, "loss": 0.1467, "step": 31574 }, { "epoch": 0.5631755431099061, "grad_norm": 0.5359348058700562, "learning_rate": 2.3849369261390293e-05, "loss": 0.1329, "step": 31575 }, { "epoch": 0.5631933792316198, "grad_norm": 0.2657002806663513, "learning_rate": 2.3847814405500202e-05, "loss": 0.1194, "step": 31576 }, { "epoch": 0.5632112153533335, "grad_norm": 0.30079349875450134, "learning_rate": 2.3846259554076384e-05, "loss": 0.1212, "step": 31577 }, { "epoch": 0.5632290514750473, "grad_norm": 0.3471679091453552, "learning_rate": 2.3844704707124863e-05, "loss": 0.1431, "step": 31578 }, { "epoch": 0.563246887596761, "grad_norm": 0.2554193139076233, "learning_rate": 2.3843149864651656e-05, "loss": 0.1475, "step": 31579 }, { "epoch": 0.5632647237184747, "grad_norm": 0.25658100843429565, "learning_rate": 2.3841595026662807e-05, "loss": 0.1241, "step": 31580 }, { "epoch": 0.5632825598401884, "grad_norm": 0.28506821393966675, "learning_rate": 2.384004019316434e-05, "loss": 0.1535, "step": 31581 }, { "epoch": 0.5633003959619021, "grad_norm": 0.30916842818260193, "learning_rate": 2.383848536416227e-05, "loss": 0.1059, "step": 31582 }, { "epoch": 0.5633182320836158, "grad_norm": 0.26445603370666504, "learning_rate": 2.3836930539662628e-05, "loss": 0.1342, "step": 31583 }, { "epoch": 0.5633360682053294, "grad_norm": 0.2803240120410919, "learning_rate": 2.3835375719671454e-05, "loss": 0.1491, "step": 31584 }, { "epoch": 0.5633539043270431, "grad_norm": 0.16185447573661804, "learning_rate": 2.3833820904194766e-05, "loss": 0.1203, "step": 31585 }, { "epoch": 0.5633717404487568, "grad_norm": 0.23894047737121582, "learning_rate": 2.3832266093238594e-05, "loss": 0.1365, "step": 31586 }, { "epoch": 0.5633895765704705, "grad_norm": 0.3779670298099518, "learning_rate": 2.383071128680895e-05, "loss": 0.1468, "step": 31587 }, { "epoch": 0.5634074126921842, "grad_norm": 0.46665042638778687, "learning_rate": 2.3829156484911883e-05, "loss": 0.1008, "step": 31588 }, { "epoch": 0.5634252488138979, "grad_norm": 0.34893810749053955, "learning_rate": 2.3827601687553402e-05, "loss": 0.1506, "step": 31589 }, { "epoch": 0.5634430849356116, "grad_norm": 0.3829846978187561, "learning_rate": 2.382604689473955e-05, "loss": 0.0985, "step": 31590 }, { "epoch": 0.5634609210573253, "grad_norm": 0.320425808429718, "learning_rate": 2.3824492106476345e-05, "loss": 0.1113, "step": 31591 }, { "epoch": 0.563478757179039, "grad_norm": 0.24195371568202972, "learning_rate": 2.3822937322769805e-05, "loss": 0.0892, "step": 31592 }, { "epoch": 0.5634965933007526, "grad_norm": 0.24868498742580414, "learning_rate": 2.3821382543625975e-05, "loss": 0.1047, "step": 31593 }, { "epoch": 0.5635144294224664, "grad_norm": 0.33224624395370483, "learning_rate": 2.3819827769050875e-05, "loss": 0.0948, "step": 31594 }, { "epoch": 0.5635322655441801, "grad_norm": 0.24193434417247772, "learning_rate": 2.3818272999050525e-05, "loss": 0.1465, "step": 31595 }, { "epoch": 0.5635501016658938, "grad_norm": 0.36690574884414673, "learning_rate": 2.3816718233630956e-05, "loss": 0.1723, "step": 31596 }, { "epoch": 0.5635679377876075, "grad_norm": 0.25640392303466797, "learning_rate": 2.3815163472798196e-05, "loss": 0.1293, "step": 31597 }, { "epoch": 0.5635857739093212, "grad_norm": 0.35884809494018555, "learning_rate": 2.381360871655828e-05, "loss": 0.1376, "step": 31598 }, { "epoch": 0.5636036100310349, "grad_norm": 0.25123217701911926, "learning_rate": 2.3812053964917223e-05, "loss": 0.0984, "step": 31599 }, { "epoch": 0.5636214461527486, "grad_norm": 0.31737473607063293, "learning_rate": 2.3810499217881044e-05, "loss": 0.1674, "step": 31600 }, { "epoch": 0.5636392822744622, "grad_norm": 0.3481954038143158, "learning_rate": 2.380894447545579e-05, "loss": 0.191, "step": 31601 }, { "epoch": 0.5636571183961759, "grad_norm": 0.24566833674907684, "learning_rate": 2.3807389737647483e-05, "loss": 0.1392, "step": 31602 }, { "epoch": 0.5636749545178896, "grad_norm": 0.24603791534900665, "learning_rate": 2.3805835004462136e-05, "loss": 0.1113, "step": 31603 }, { "epoch": 0.5636927906396033, "grad_norm": 0.22214391827583313, "learning_rate": 2.3804280275905794e-05, "loss": 0.1416, "step": 31604 }, { "epoch": 0.563710626761317, "grad_norm": 0.32133549451828003, "learning_rate": 2.3802725551984463e-05, "loss": 0.1509, "step": 31605 }, { "epoch": 0.5637284628830307, "grad_norm": 0.2267196774482727, "learning_rate": 2.3801170832704192e-05, "loss": 0.0849, "step": 31606 }, { "epoch": 0.5637462990047444, "grad_norm": 0.32285070419311523, "learning_rate": 2.3799616118070994e-05, "loss": 0.0792, "step": 31607 }, { "epoch": 0.5637641351264581, "grad_norm": 0.2434544563293457, "learning_rate": 2.37980614080909e-05, "loss": 0.1143, "step": 31608 }, { "epoch": 0.5637819712481718, "grad_norm": 0.23506079614162445, "learning_rate": 2.3796506702769926e-05, "loss": 0.0958, "step": 31609 }, { "epoch": 0.5637998073698854, "grad_norm": 0.22474223375320435, "learning_rate": 2.3794952002114113e-05, "loss": 0.1651, "step": 31610 }, { "epoch": 0.5638176434915992, "grad_norm": 0.21540921926498413, "learning_rate": 2.3793397306129488e-05, "loss": 0.1069, "step": 31611 }, { "epoch": 0.5638354796133129, "grad_norm": 0.22561131417751312, "learning_rate": 2.379184261482207e-05, "loss": 0.1322, "step": 31612 }, { "epoch": 0.5638533157350266, "grad_norm": 0.3039524257183075, "learning_rate": 2.3790287928197876e-05, "loss": 0.1484, "step": 31613 }, { "epoch": 0.5638711518567403, "grad_norm": 0.24317194521427155, "learning_rate": 2.3788733246262956e-05, "loss": 0.179, "step": 31614 }, { "epoch": 0.563888987978454, "grad_norm": 0.36901363730430603, "learning_rate": 2.3787178569023325e-05, "loss": 0.1516, "step": 31615 }, { "epoch": 0.5639068241001677, "grad_norm": 0.2813870906829834, "learning_rate": 2.3785623896485002e-05, "loss": 0.1315, "step": 31616 }, { "epoch": 0.5639246602218814, "grad_norm": 0.249116912484169, "learning_rate": 2.3784069228654027e-05, "loss": 0.1714, "step": 31617 }, { "epoch": 0.5639424963435951, "grad_norm": 0.35946303606033325, "learning_rate": 2.3782514565536407e-05, "loss": 0.1597, "step": 31618 }, { "epoch": 0.5639603324653087, "grad_norm": 0.27216583490371704, "learning_rate": 2.378095990713819e-05, "loss": 0.1367, "step": 31619 }, { "epoch": 0.5639781685870224, "grad_norm": 0.20986083149909973, "learning_rate": 2.37794052534654e-05, "loss": 0.1279, "step": 31620 }, { "epoch": 0.5639960047087361, "grad_norm": 0.3030647933483124, "learning_rate": 2.377785060452405e-05, "loss": 0.1728, "step": 31621 }, { "epoch": 0.5640138408304498, "grad_norm": 0.29480674862861633, "learning_rate": 2.3776295960320166e-05, "loss": 0.1521, "step": 31622 }, { "epoch": 0.5640316769521635, "grad_norm": 0.2918318808078766, "learning_rate": 2.377474132085979e-05, "loss": 0.107, "step": 31623 }, { "epoch": 0.5640495130738772, "grad_norm": 0.3118656277656555, "learning_rate": 2.3773186686148942e-05, "loss": 0.1272, "step": 31624 }, { "epoch": 0.5640673491955909, "grad_norm": 0.272695928812027, "learning_rate": 2.3771632056193645e-05, "loss": 0.1707, "step": 31625 }, { "epoch": 0.5640851853173046, "grad_norm": 0.245751291513443, "learning_rate": 2.3770077430999917e-05, "loss": 0.1334, "step": 31626 }, { "epoch": 0.5641030214390182, "grad_norm": 0.3448629379272461, "learning_rate": 2.37685228105738e-05, "loss": 0.1723, "step": 31627 }, { "epoch": 0.564120857560732, "grad_norm": 0.19837304949760437, "learning_rate": 2.376696819492132e-05, "loss": 0.1538, "step": 31628 }, { "epoch": 0.5641386936824457, "grad_norm": 0.2494966685771942, "learning_rate": 2.3765413584048492e-05, "loss": 0.1038, "step": 31629 }, { "epoch": 0.5641565298041594, "grad_norm": 0.228762686252594, "learning_rate": 2.3763858977961344e-05, "loss": 0.1606, "step": 31630 }, { "epoch": 0.5641743659258731, "grad_norm": 0.20529478788375854, "learning_rate": 2.3762304376665908e-05, "loss": 0.0634, "step": 31631 }, { "epoch": 0.5641922020475868, "grad_norm": 0.3570258617401123, "learning_rate": 2.3760749780168205e-05, "loss": 0.1452, "step": 31632 }, { "epoch": 0.5642100381693005, "grad_norm": 0.2126232236623764, "learning_rate": 2.375919518847427e-05, "loss": 0.1472, "step": 31633 }, { "epoch": 0.5642278742910142, "grad_norm": 0.2873368561267853, "learning_rate": 2.3757640601590125e-05, "loss": 0.1112, "step": 31634 }, { "epoch": 0.5642457104127279, "grad_norm": 0.26218634843826294, "learning_rate": 2.375608601952178e-05, "loss": 0.0989, "step": 31635 }, { "epoch": 0.5642635465344416, "grad_norm": 0.23815672099590302, "learning_rate": 2.375453144227529e-05, "loss": 0.0932, "step": 31636 }, { "epoch": 0.5642813826561552, "grad_norm": 0.402576744556427, "learning_rate": 2.375297686985666e-05, "loss": 0.1609, "step": 31637 }, { "epoch": 0.5642992187778689, "grad_norm": 0.21550101041793823, "learning_rate": 2.3751422302271925e-05, "loss": 0.0999, "step": 31638 }, { "epoch": 0.5643170548995826, "grad_norm": 0.30787888169288635, "learning_rate": 2.37498677395271e-05, "loss": 0.1269, "step": 31639 }, { "epoch": 0.5643348910212963, "grad_norm": 0.26787036657333374, "learning_rate": 2.3748313181628228e-05, "loss": 0.1833, "step": 31640 }, { "epoch": 0.56435272714301, "grad_norm": 0.33512693643569946, "learning_rate": 2.3746758628581327e-05, "loss": 0.1576, "step": 31641 }, { "epoch": 0.5643705632647237, "grad_norm": 0.2764591872692108, "learning_rate": 2.374520408039242e-05, "loss": 0.107, "step": 31642 }, { "epoch": 0.5643883993864374, "grad_norm": 0.28388968110084534, "learning_rate": 2.374364953706753e-05, "loss": 0.1359, "step": 31643 }, { "epoch": 0.564406235508151, "grad_norm": 0.266609251499176, "learning_rate": 2.3742094998612696e-05, "loss": 0.112, "step": 31644 }, { "epoch": 0.5644240716298649, "grad_norm": 0.2514941394329071, "learning_rate": 2.3740540465033933e-05, "loss": 0.1063, "step": 31645 }, { "epoch": 0.5644419077515785, "grad_norm": 0.5056187510490417, "learning_rate": 2.3738985936337274e-05, "loss": 0.1778, "step": 31646 }, { "epoch": 0.5644597438732922, "grad_norm": 0.3707565665245056, "learning_rate": 2.373743141252874e-05, "loss": 0.1366, "step": 31647 }, { "epoch": 0.5644775799950059, "grad_norm": 0.3683274984359741, "learning_rate": 2.3735876893614347e-05, "loss": 0.1302, "step": 31648 }, { "epoch": 0.5644954161167196, "grad_norm": 0.2075379192829132, "learning_rate": 2.3734322379600147e-05, "loss": 0.1054, "step": 31649 }, { "epoch": 0.5645132522384333, "grad_norm": 0.24443985521793365, "learning_rate": 2.3732767870492145e-05, "loss": 0.1584, "step": 31650 }, { "epoch": 0.564531088360147, "grad_norm": 0.26057642698287964, "learning_rate": 2.3731213366296372e-05, "loss": 0.1289, "step": 31651 }, { "epoch": 0.5645489244818607, "grad_norm": 0.21860824525356293, "learning_rate": 2.372965886701885e-05, "loss": 0.1378, "step": 31652 }, { "epoch": 0.5645667606035744, "grad_norm": 0.2887742221355438, "learning_rate": 2.372810437266561e-05, "loss": 0.1795, "step": 31653 }, { "epoch": 0.564584596725288, "grad_norm": 0.23349636793136597, "learning_rate": 2.3726549883242685e-05, "loss": 0.1284, "step": 31654 }, { "epoch": 0.5646024328470017, "grad_norm": 0.2939760982990265, "learning_rate": 2.3724995398756088e-05, "loss": 0.1302, "step": 31655 }, { "epoch": 0.5646202689687154, "grad_norm": 0.3866825997829437, "learning_rate": 2.3723440919211843e-05, "loss": 0.1186, "step": 31656 }, { "epoch": 0.5646381050904291, "grad_norm": 0.30090954899787903, "learning_rate": 2.372188644461599e-05, "loss": 0.1266, "step": 31657 }, { "epoch": 0.5646559412121428, "grad_norm": 0.22703874111175537, "learning_rate": 2.3720331974974545e-05, "loss": 0.1107, "step": 31658 }, { "epoch": 0.5646737773338565, "grad_norm": 0.2614491879940033, "learning_rate": 2.3718777510293533e-05, "loss": 0.1144, "step": 31659 }, { "epoch": 0.5646916134555702, "grad_norm": 0.23779863119125366, "learning_rate": 2.3717223050578987e-05, "loss": 0.1522, "step": 31660 }, { "epoch": 0.5647094495772839, "grad_norm": 0.24018393456935883, "learning_rate": 2.3715668595836914e-05, "loss": 0.1078, "step": 31661 }, { "epoch": 0.5647272856989977, "grad_norm": 0.3777134120464325, "learning_rate": 2.3714114146073368e-05, "loss": 0.1733, "step": 31662 }, { "epoch": 0.5647451218207113, "grad_norm": 0.251590758562088, "learning_rate": 2.3712559701294358e-05, "loss": 0.1868, "step": 31663 }, { "epoch": 0.564762957942425, "grad_norm": 0.31605085730552673, "learning_rate": 2.3711005261505902e-05, "loss": 0.1329, "step": 31664 }, { "epoch": 0.5647807940641387, "grad_norm": 0.24037417769432068, "learning_rate": 2.370945082671404e-05, "loss": 0.1477, "step": 31665 }, { "epoch": 0.5647986301858524, "grad_norm": 0.3526400327682495, "learning_rate": 2.3707896396924792e-05, "loss": 0.165, "step": 31666 }, { "epoch": 0.5648164663075661, "grad_norm": 0.1747768372297287, "learning_rate": 2.3706341972144185e-05, "loss": 0.1146, "step": 31667 }, { "epoch": 0.5648343024292798, "grad_norm": 0.30727043747901917, "learning_rate": 2.370478755237825e-05, "loss": 0.1341, "step": 31668 }, { "epoch": 0.5648521385509935, "grad_norm": 0.23793411254882812, "learning_rate": 2.3703233137632988e-05, "loss": 0.0998, "step": 31669 }, { "epoch": 0.5648699746727072, "grad_norm": 0.18020571768283844, "learning_rate": 2.3701678727914457e-05, "loss": 0.0935, "step": 31670 }, { "epoch": 0.5648878107944209, "grad_norm": 0.2109021544456482, "learning_rate": 2.3700124323228666e-05, "loss": 0.1395, "step": 31671 }, { "epoch": 0.5649056469161345, "grad_norm": 0.24290896952152252, "learning_rate": 2.3698569923581638e-05, "loss": 0.1086, "step": 31672 }, { "epoch": 0.5649234830378482, "grad_norm": 0.32073161005973816, "learning_rate": 2.369701552897941e-05, "loss": 0.1675, "step": 31673 }, { "epoch": 0.5649413191595619, "grad_norm": 0.27767741680145264, "learning_rate": 2.3695461139427986e-05, "loss": 0.1662, "step": 31674 }, { "epoch": 0.5649591552812756, "grad_norm": 0.22632794082164764, "learning_rate": 2.3693906754933415e-05, "loss": 0.1301, "step": 31675 }, { "epoch": 0.5649769914029893, "grad_norm": 0.23737747967243195, "learning_rate": 2.3692352375501715e-05, "loss": 0.1311, "step": 31676 }, { "epoch": 0.564994827524703, "grad_norm": 0.2506340444087982, "learning_rate": 2.3690798001138905e-05, "loss": 0.1596, "step": 31677 }, { "epoch": 0.5650126636464167, "grad_norm": 0.30249613523483276, "learning_rate": 2.3689243631851008e-05, "loss": 0.1343, "step": 31678 }, { "epoch": 0.5650304997681305, "grad_norm": 0.36731263995170593, "learning_rate": 2.3687689267644065e-05, "loss": 0.1945, "step": 31679 }, { "epoch": 0.5650483358898442, "grad_norm": 0.2557585835456848, "learning_rate": 2.3686134908524086e-05, "loss": 0.1438, "step": 31680 }, { "epoch": 0.5650661720115578, "grad_norm": 0.3712798058986664, "learning_rate": 2.3684580554497104e-05, "loss": 0.1038, "step": 31681 }, { "epoch": 0.5650840081332715, "grad_norm": 0.20703443884849548, "learning_rate": 2.3683026205569138e-05, "loss": 0.1525, "step": 31682 }, { "epoch": 0.5651018442549852, "grad_norm": 0.29868629574775696, "learning_rate": 2.3681471861746222e-05, "loss": 0.1479, "step": 31683 }, { "epoch": 0.5651196803766989, "grad_norm": 0.2785952389240265, "learning_rate": 2.3679917523034378e-05, "loss": 0.1336, "step": 31684 }, { "epoch": 0.5651375164984126, "grad_norm": 0.26928675174713135, "learning_rate": 2.3678363189439632e-05, "loss": 0.1377, "step": 31685 }, { "epoch": 0.5651553526201263, "grad_norm": 0.3255443572998047, "learning_rate": 2.3676808860967996e-05, "loss": 0.14, "step": 31686 }, { "epoch": 0.56517318874184, "grad_norm": 0.395115464925766, "learning_rate": 2.3675254537625507e-05, "loss": 0.075, "step": 31687 }, { "epoch": 0.5651910248635537, "grad_norm": 0.3141167163848877, "learning_rate": 2.3673700219418196e-05, "loss": 0.1156, "step": 31688 }, { "epoch": 0.5652088609852673, "grad_norm": 0.25743556022644043, "learning_rate": 2.3672145906352084e-05, "loss": 0.1249, "step": 31689 }, { "epoch": 0.565226697106981, "grad_norm": 0.29879993200302124, "learning_rate": 2.367059159843319e-05, "loss": 0.1274, "step": 31690 }, { "epoch": 0.5652445332286947, "grad_norm": 0.2265031486749649, "learning_rate": 2.366903729566753e-05, "loss": 0.1053, "step": 31691 }, { "epoch": 0.5652623693504084, "grad_norm": 0.1926048845052719, "learning_rate": 2.3667482998061155e-05, "loss": 0.1019, "step": 31692 }, { "epoch": 0.5652802054721221, "grad_norm": 0.20879815518856049, "learning_rate": 2.366592870562007e-05, "loss": 0.1017, "step": 31693 }, { "epoch": 0.5652980415938358, "grad_norm": 0.23162026703357697, "learning_rate": 2.3664374418350313e-05, "loss": 0.1075, "step": 31694 }, { "epoch": 0.5653158777155496, "grad_norm": 0.26945286989212036, "learning_rate": 2.3662820136257892e-05, "loss": 0.1505, "step": 31695 }, { "epoch": 0.5653337138372633, "grad_norm": 0.20930252969264984, "learning_rate": 2.366126585934885e-05, "loss": 0.0911, "step": 31696 }, { "epoch": 0.565351549958977, "grad_norm": 0.30685189366340637, "learning_rate": 2.3659711587629202e-05, "loss": 0.0946, "step": 31697 }, { "epoch": 0.5653693860806907, "grad_norm": 0.2428974211215973, "learning_rate": 2.3658157321104978e-05, "loss": 0.1724, "step": 31698 }, { "epoch": 0.5653872222024043, "grad_norm": 0.2702333629131317, "learning_rate": 2.3656603059782194e-05, "loss": 0.1277, "step": 31699 }, { "epoch": 0.565405058324118, "grad_norm": 0.23337192833423615, "learning_rate": 2.365504880366688e-05, "loss": 0.1267, "step": 31700 }, { "epoch": 0.5654228944458317, "grad_norm": 0.2820528447628021, "learning_rate": 2.365349455276506e-05, "loss": 0.161, "step": 31701 }, { "epoch": 0.5654407305675454, "grad_norm": 0.22095970809459686, "learning_rate": 2.3651940307082768e-05, "loss": 0.0814, "step": 31702 }, { "epoch": 0.5654585666892591, "grad_norm": 0.29226115345954895, "learning_rate": 2.365038606662602e-05, "loss": 0.1553, "step": 31703 }, { "epoch": 0.5654764028109728, "grad_norm": 0.25860917568206787, "learning_rate": 2.364883183140083e-05, "loss": 0.2047, "step": 31704 }, { "epoch": 0.5654942389326865, "grad_norm": 0.3201715350151062, "learning_rate": 2.3647277601413247e-05, "loss": 0.1221, "step": 31705 }, { "epoch": 0.5655120750544002, "grad_norm": 0.28690049052238464, "learning_rate": 2.3645723376669286e-05, "loss": 0.1056, "step": 31706 }, { "epoch": 0.5655299111761138, "grad_norm": 0.2842472791671753, "learning_rate": 2.3644169157174958e-05, "loss": 0.1755, "step": 31707 }, { "epoch": 0.5655477472978275, "grad_norm": 0.35904672741889954, "learning_rate": 2.36426149429363e-05, "loss": 0.1602, "step": 31708 }, { "epoch": 0.5655655834195412, "grad_norm": 0.27771344780921936, "learning_rate": 2.3641060733959335e-05, "loss": 0.1644, "step": 31709 }, { "epoch": 0.5655834195412549, "grad_norm": 0.277072936296463, "learning_rate": 2.3639506530250095e-05, "loss": 0.1714, "step": 31710 }, { "epoch": 0.5656012556629686, "grad_norm": 0.2892456650733948, "learning_rate": 2.3637952331814598e-05, "loss": 0.1027, "step": 31711 }, { "epoch": 0.5656190917846824, "grad_norm": 0.23468153178691864, "learning_rate": 2.3636398138658867e-05, "loss": 0.1374, "step": 31712 }, { "epoch": 0.5656369279063961, "grad_norm": 0.21396775543689728, "learning_rate": 2.3634843950788917e-05, "loss": 0.1246, "step": 31713 }, { "epoch": 0.5656547640281098, "grad_norm": 0.25983208417892456, "learning_rate": 2.3633289768210788e-05, "loss": 0.1649, "step": 31714 }, { "epoch": 0.5656726001498235, "grad_norm": 0.508179247379303, "learning_rate": 2.3631735590930508e-05, "loss": 0.1637, "step": 31715 }, { "epoch": 0.5656904362715371, "grad_norm": 0.22736430168151855, "learning_rate": 2.363018141895409e-05, "loss": 0.1183, "step": 31716 }, { "epoch": 0.5657082723932508, "grad_norm": 0.40757668018341064, "learning_rate": 2.3628627252287554e-05, "loss": 0.1033, "step": 31717 }, { "epoch": 0.5657261085149645, "grad_norm": 0.27163437008857727, "learning_rate": 2.3627073090936945e-05, "loss": 0.1532, "step": 31718 }, { "epoch": 0.5657439446366782, "grad_norm": 0.2641112804412842, "learning_rate": 2.362551893490827e-05, "loss": 0.1104, "step": 31719 }, { "epoch": 0.5657617807583919, "grad_norm": 0.294924259185791, "learning_rate": 2.3623964784207556e-05, "loss": 0.1575, "step": 31720 }, { "epoch": 0.5657796168801056, "grad_norm": 0.2325085550546646, "learning_rate": 2.3622410638840828e-05, "loss": 0.1337, "step": 31721 }, { "epoch": 0.5657974530018193, "grad_norm": 0.3161733150482178, "learning_rate": 2.362085649881411e-05, "loss": 0.1484, "step": 31722 }, { "epoch": 0.565815289123533, "grad_norm": 0.3015326261520386, "learning_rate": 2.3619302364133438e-05, "loss": 0.136, "step": 31723 }, { "epoch": 0.5658331252452466, "grad_norm": 0.2192542999982834, "learning_rate": 2.3617748234804825e-05, "loss": 0.14, "step": 31724 }, { "epoch": 0.5658509613669603, "grad_norm": 0.3219265043735504, "learning_rate": 2.36161941108343e-05, "loss": 0.1201, "step": 31725 }, { "epoch": 0.565868797488674, "grad_norm": 0.2728806734085083, "learning_rate": 2.361463999222787e-05, "loss": 0.1396, "step": 31726 }, { "epoch": 0.5658866336103877, "grad_norm": 0.25127914547920227, "learning_rate": 2.3613085878991587e-05, "loss": 0.1148, "step": 31727 }, { "epoch": 0.5659044697321014, "grad_norm": 0.23592594265937805, "learning_rate": 2.3611531771131457e-05, "loss": 0.125, "step": 31728 }, { "epoch": 0.5659223058538152, "grad_norm": 0.24304333329200745, "learning_rate": 2.3609977668653514e-05, "loss": 0.1695, "step": 31729 }, { "epoch": 0.5659401419755289, "grad_norm": 0.2560407519340515, "learning_rate": 2.3608423571563767e-05, "loss": 0.1232, "step": 31730 }, { "epoch": 0.5659579780972426, "grad_norm": 0.19703637063503265, "learning_rate": 2.360686947986826e-05, "loss": 0.1334, "step": 31731 }, { "epoch": 0.5659758142189563, "grad_norm": 0.3853825628757477, "learning_rate": 2.360531539357301e-05, "loss": 0.1288, "step": 31732 }, { "epoch": 0.56599365034067, "grad_norm": 0.22643814980983734, "learning_rate": 2.3603761312684038e-05, "loss": 0.1141, "step": 31733 }, { "epoch": 0.5660114864623836, "grad_norm": 0.2896316945552826, "learning_rate": 2.360220723720736e-05, "loss": 0.2069, "step": 31734 }, { "epoch": 0.5660293225840973, "grad_norm": 0.2831781208515167, "learning_rate": 2.360065316714902e-05, "loss": 0.1198, "step": 31735 }, { "epoch": 0.566047158705811, "grad_norm": 0.2595368027687073, "learning_rate": 2.3599099102515025e-05, "loss": 0.106, "step": 31736 }, { "epoch": 0.5660649948275247, "grad_norm": 0.2445223331451416, "learning_rate": 2.3597545043311416e-05, "loss": 0.1361, "step": 31737 }, { "epoch": 0.5660828309492384, "grad_norm": 0.3353172242641449, "learning_rate": 2.35959909895442e-05, "loss": 0.119, "step": 31738 }, { "epoch": 0.5661006670709521, "grad_norm": 0.24241621792316437, "learning_rate": 2.35944369412194e-05, "loss": 0.108, "step": 31739 }, { "epoch": 0.5661185031926658, "grad_norm": 0.2791469693183899, "learning_rate": 2.3592882898343062e-05, "loss": 0.1495, "step": 31740 }, { "epoch": 0.5661363393143795, "grad_norm": 0.18953026831150055, "learning_rate": 2.3591328860921186e-05, "loss": 0.1259, "step": 31741 }, { "epoch": 0.5661541754360931, "grad_norm": 0.24994948506355286, "learning_rate": 2.3589774828959813e-05, "loss": 0.1407, "step": 31742 }, { "epoch": 0.5661720115578068, "grad_norm": 0.3137910068035126, "learning_rate": 2.3588220802464947e-05, "loss": 0.1007, "step": 31743 }, { "epoch": 0.5661898476795205, "grad_norm": 0.2321649044752121, "learning_rate": 2.358666678144264e-05, "loss": 0.1315, "step": 31744 }, { "epoch": 0.5662076838012342, "grad_norm": 0.23086529970169067, "learning_rate": 2.3585112765898895e-05, "loss": 0.1085, "step": 31745 }, { "epoch": 0.566225519922948, "grad_norm": 0.29069772362709045, "learning_rate": 2.3583558755839744e-05, "loss": 0.1281, "step": 31746 }, { "epoch": 0.5662433560446617, "grad_norm": 0.18334349989891052, "learning_rate": 2.3582004751271197e-05, "loss": 0.092, "step": 31747 }, { "epoch": 0.5662611921663754, "grad_norm": 0.3791230022907257, "learning_rate": 2.35804507521993e-05, "loss": 0.2191, "step": 31748 }, { "epoch": 0.5662790282880891, "grad_norm": 0.2948589324951172, "learning_rate": 2.3578896758630065e-05, "loss": 0.1546, "step": 31749 }, { "epoch": 0.5662968644098028, "grad_norm": 0.29607513546943665, "learning_rate": 2.3577342770569518e-05, "loss": 0.151, "step": 31750 }, { "epoch": 0.5663147005315164, "grad_norm": 0.27405473589897156, "learning_rate": 2.3575788788023685e-05, "loss": 0.1376, "step": 31751 }, { "epoch": 0.5663325366532301, "grad_norm": 0.19494296610355377, "learning_rate": 2.357423481099857e-05, "loss": 0.1041, "step": 31752 }, { "epoch": 0.5663503727749438, "grad_norm": 0.2988523840904236, "learning_rate": 2.357268083950023e-05, "loss": 0.1439, "step": 31753 }, { "epoch": 0.5663682088966575, "grad_norm": 0.2279803305864334, "learning_rate": 2.357112687353467e-05, "loss": 0.1446, "step": 31754 }, { "epoch": 0.5663860450183712, "grad_norm": 0.18937614560127258, "learning_rate": 2.3569572913107913e-05, "loss": 0.1139, "step": 31755 }, { "epoch": 0.5664038811400849, "grad_norm": 0.30287057161331177, "learning_rate": 2.3568018958225982e-05, "loss": 0.1598, "step": 31756 }, { "epoch": 0.5664217172617986, "grad_norm": 0.26082295179367065, "learning_rate": 2.3566465008894903e-05, "loss": 0.1332, "step": 31757 }, { "epoch": 0.5664395533835123, "grad_norm": 0.38175928592681885, "learning_rate": 2.3564911065120708e-05, "loss": 0.149, "step": 31758 }, { "epoch": 0.566457389505226, "grad_norm": 0.36254653334617615, "learning_rate": 2.3563357126909413e-05, "loss": 0.1751, "step": 31759 }, { "epoch": 0.5664752256269396, "grad_norm": 0.2453150451183319, "learning_rate": 2.356180319426703e-05, "loss": 0.116, "step": 31760 }, { "epoch": 0.5664930617486533, "grad_norm": 0.2619331479072571, "learning_rate": 2.3560249267199612e-05, "loss": 0.1479, "step": 31761 }, { "epoch": 0.566510897870367, "grad_norm": 0.23770898580551147, "learning_rate": 2.355869534571316e-05, "loss": 0.1466, "step": 31762 }, { "epoch": 0.5665287339920808, "grad_norm": 0.28270354866981506, "learning_rate": 2.3557141429813693e-05, "loss": 0.1443, "step": 31763 }, { "epoch": 0.5665465701137945, "grad_norm": 0.2974907159805298, "learning_rate": 2.3555587519507258e-05, "loss": 0.1177, "step": 31764 }, { "epoch": 0.5665644062355082, "grad_norm": 0.22263066470623016, "learning_rate": 2.3554033614799846e-05, "loss": 0.1477, "step": 31765 }, { "epoch": 0.5665822423572219, "grad_norm": 0.22978141903877258, "learning_rate": 2.355247971569752e-05, "loss": 0.1351, "step": 31766 }, { "epoch": 0.5666000784789356, "grad_norm": 0.3551715612411499, "learning_rate": 2.3550925822206273e-05, "loss": 0.1021, "step": 31767 }, { "epoch": 0.5666179146006493, "grad_norm": 0.31905874609947205, "learning_rate": 2.3549371934332136e-05, "loss": 0.125, "step": 31768 }, { "epoch": 0.5666357507223629, "grad_norm": 0.2632167637348175, "learning_rate": 2.3547818052081133e-05, "loss": 0.1566, "step": 31769 }, { "epoch": 0.5666535868440766, "grad_norm": 0.2459852248430252, "learning_rate": 2.3546264175459294e-05, "loss": 0.1493, "step": 31770 }, { "epoch": 0.5666714229657903, "grad_norm": 0.24205629527568817, "learning_rate": 2.3544710304472636e-05, "loss": 0.1165, "step": 31771 }, { "epoch": 0.566689259087504, "grad_norm": 0.2550499141216278, "learning_rate": 2.3543156439127187e-05, "loss": 0.1261, "step": 31772 }, { "epoch": 0.5667070952092177, "grad_norm": 0.25032225251197815, "learning_rate": 2.3541602579428955e-05, "loss": 0.1405, "step": 31773 }, { "epoch": 0.5667249313309314, "grad_norm": 0.24248740077018738, "learning_rate": 2.3540048725383986e-05, "loss": 0.1423, "step": 31774 }, { "epoch": 0.5667427674526451, "grad_norm": 0.20904380083084106, "learning_rate": 2.3538494876998295e-05, "loss": 0.1286, "step": 31775 }, { "epoch": 0.5667606035743588, "grad_norm": 0.2057378888130188, "learning_rate": 2.3536941034277892e-05, "loss": 0.1282, "step": 31776 }, { "epoch": 0.5667784396960724, "grad_norm": 0.41894152760505676, "learning_rate": 2.353538719722882e-05, "loss": 0.1706, "step": 31777 }, { "epoch": 0.5667962758177861, "grad_norm": 0.2069336622953415, "learning_rate": 2.353383336585708e-05, "loss": 0.1119, "step": 31778 }, { "epoch": 0.5668141119394998, "grad_norm": 0.2987953722476959, "learning_rate": 2.3532279540168724e-05, "loss": 0.1285, "step": 31779 }, { "epoch": 0.5668319480612136, "grad_norm": 0.29955628514289856, "learning_rate": 2.3530725720169753e-05, "loss": 0.151, "step": 31780 }, { "epoch": 0.5668497841829273, "grad_norm": 0.2698342800140381, "learning_rate": 2.3529171905866198e-05, "loss": 0.1455, "step": 31781 }, { "epoch": 0.566867620304641, "grad_norm": 0.20955033600330353, "learning_rate": 2.3527618097264072e-05, "loss": 0.1261, "step": 31782 }, { "epoch": 0.5668854564263547, "grad_norm": 0.3072643280029297, "learning_rate": 2.3526064294369418e-05, "loss": 0.1743, "step": 31783 }, { "epoch": 0.5669032925480684, "grad_norm": 0.3178693652153015, "learning_rate": 2.352451049718824e-05, "loss": 0.0975, "step": 31784 }, { "epoch": 0.5669211286697821, "grad_norm": 0.27388060092926025, "learning_rate": 2.3522956705726575e-05, "loss": 0.1122, "step": 31785 }, { "epoch": 0.5669389647914957, "grad_norm": 0.2072443813085556, "learning_rate": 2.3521402919990433e-05, "loss": 0.0983, "step": 31786 }, { "epoch": 0.5669568009132094, "grad_norm": 0.275480717420578, "learning_rate": 2.351984913998585e-05, "loss": 0.0596, "step": 31787 }, { "epoch": 0.5669746370349231, "grad_norm": 0.3250078558921814, "learning_rate": 2.3518295365718846e-05, "loss": 0.1457, "step": 31788 }, { "epoch": 0.5669924731566368, "grad_norm": 0.2418910712003708, "learning_rate": 2.3516741597195436e-05, "loss": 0.125, "step": 31789 }, { "epoch": 0.5670103092783505, "grad_norm": 0.2838447690010071, "learning_rate": 2.351518783442165e-05, "loss": 0.1399, "step": 31790 }, { "epoch": 0.5670281454000642, "grad_norm": 0.2699092626571655, "learning_rate": 2.3513634077403498e-05, "loss": 0.1128, "step": 31791 }, { "epoch": 0.5670459815217779, "grad_norm": 0.5057222247123718, "learning_rate": 2.3512080326147028e-05, "loss": 0.137, "step": 31792 }, { "epoch": 0.5670638176434916, "grad_norm": 0.22933563590049744, "learning_rate": 2.3510526580658245e-05, "loss": 0.1345, "step": 31793 }, { "epoch": 0.5670816537652053, "grad_norm": 0.20409952104091644, "learning_rate": 2.3508972840943178e-05, "loss": 0.0944, "step": 31794 }, { "epoch": 0.5670994898869189, "grad_norm": 0.35851675271987915, "learning_rate": 2.3507419107007834e-05, "loss": 0.1124, "step": 31795 }, { "epoch": 0.5671173260086327, "grad_norm": 0.25478753447532654, "learning_rate": 2.350586537885826e-05, "loss": 0.193, "step": 31796 }, { "epoch": 0.5671351621303464, "grad_norm": 0.34525060653686523, "learning_rate": 2.3504311656500466e-05, "loss": 0.1628, "step": 31797 }, { "epoch": 0.5671529982520601, "grad_norm": 0.24626538157463074, "learning_rate": 2.3502757939940478e-05, "loss": 0.1331, "step": 31798 }, { "epoch": 0.5671708343737738, "grad_norm": 0.27038154006004333, "learning_rate": 2.350120422918431e-05, "loss": 0.1437, "step": 31799 }, { "epoch": 0.5671886704954875, "grad_norm": 0.36471810936927795, "learning_rate": 2.3499650524238e-05, "loss": 0.0896, "step": 31800 }, { "epoch": 0.5672065066172012, "grad_norm": 0.20968887209892273, "learning_rate": 2.3498096825107564e-05, "loss": 0.1239, "step": 31801 }, { "epoch": 0.5672243427389149, "grad_norm": 0.22653204202651978, "learning_rate": 2.3496543131799023e-05, "loss": 0.1195, "step": 31802 }, { "epoch": 0.5672421788606286, "grad_norm": 0.36928629875183105, "learning_rate": 2.3494989444318398e-05, "loss": 0.1745, "step": 31803 }, { "epoch": 0.5672600149823422, "grad_norm": 0.3669426441192627, "learning_rate": 2.3493435762671708e-05, "loss": 0.1337, "step": 31804 }, { "epoch": 0.5672778511040559, "grad_norm": 0.293701708316803, "learning_rate": 2.3491882086864982e-05, "loss": 0.1636, "step": 31805 }, { "epoch": 0.5672956872257696, "grad_norm": 0.24530857801437378, "learning_rate": 2.349032841690425e-05, "loss": 0.1976, "step": 31806 }, { "epoch": 0.5673135233474833, "grad_norm": 0.304094135761261, "learning_rate": 2.3488774752795528e-05, "loss": 0.1476, "step": 31807 }, { "epoch": 0.567331359469197, "grad_norm": 0.2270730435848236, "learning_rate": 2.3487221094544823e-05, "loss": 0.13, "step": 31808 }, { "epoch": 0.5673491955909107, "grad_norm": 0.26939326524734497, "learning_rate": 2.3485667442158186e-05, "loss": 0.1602, "step": 31809 }, { "epoch": 0.5673670317126244, "grad_norm": 0.21226643025875092, "learning_rate": 2.3484113795641622e-05, "loss": 0.0952, "step": 31810 }, { "epoch": 0.5673848678343381, "grad_norm": 0.268289178609848, "learning_rate": 2.3482560155001153e-05, "loss": 0.1723, "step": 31811 }, { "epoch": 0.5674027039560517, "grad_norm": 0.2653530538082123, "learning_rate": 2.3481006520242802e-05, "loss": 0.1076, "step": 31812 }, { "epoch": 0.5674205400777655, "grad_norm": 0.22172777354717255, "learning_rate": 2.3479452891372594e-05, "loss": 0.0963, "step": 31813 }, { "epoch": 0.5674383761994792, "grad_norm": 0.20208679139614105, "learning_rate": 2.347789926839656e-05, "loss": 0.0771, "step": 31814 }, { "epoch": 0.5674562123211929, "grad_norm": 0.21570773422718048, "learning_rate": 2.3476345651320713e-05, "loss": 0.1293, "step": 31815 }, { "epoch": 0.5674740484429066, "grad_norm": 0.24860739707946777, "learning_rate": 2.347479204015108e-05, "loss": 0.1052, "step": 31816 }, { "epoch": 0.5674918845646203, "grad_norm": 0.31034550070762634, "learning_rate": 2.3473238434893662e-05, "loss": 0.1394, "step": 31817 }, { "epoch": 0.567509720686334, "grad_norm": 0.2236240953207016, "learning_rate": 2.347168483555451e-05, "loss": 0.1212, "step": 31818 }, { "epoch": 0.5675275568080477, "grad_norm": 0.46751800179481506, "learning_rate": 2.3470131242139637e-05, "loss": 0.1669, "step": 31819 }, { "epoch": 0.5675453929297614, "grad_norm": 0.30457568168640137, "learning_rate": 2.3468577654655066e-05, "loss": 0.0997, "step": 31820 }, { "epoch": 0.567563229051475, "grad_norm": 0.26304295659065247, "learning_rate": 2.3467024073106807e-05, "loss": 0.1565, "step": 31821 }, { "epoch": 0.5675810651731887, "grad_norm": 0.3105275630950928, "learning_rate": 2.34654704975009e-05, "loss": 0.1089, "step": 31822 }, { "epoch": 0.5675989012949024, "grad_norm": 0.23342853784561157, "learning_rate": 2.3463916927843364e-05, "loss": 0.1311, "step": 31823 }, { "epoch": 0.5676167374166161, "grad_norm": 0.2506275475025177, "learning_rate": 2.346236336414021e-05, "loss": 0.1197, "step": 31824 }, { "epoch": 0.5676345735383298, "grad_norm": 0.29939284920692444, "learning_rate": 2.3460809806397462e-05, "loss": 0.1589, "step": 31825 }, { "epoch": 0.5676524096600435, "grad_norm": 0.3076852560043335, "learning_rate": 2.345925625462115e-05, "loss": 0.1414, "step": 31826 }, { "epoch": 0.5676702457817572, "grad_norm": 0.35731741786003113, "learning_rate": 2.34577027088173e-05, "loss": 0.2079, "step": 31827 }, { "epoch": 0.5676880819034709, "grad_norm": 0.2032073438167572, "learning_rate": 2.3456149168991924e-05, "loss": 0.1229, "step": 31828 }, { "epoch": 0.5677059180251846, "grad_norm": 0.18575747311115265, "learning_rate": 2.345459563515105e-05, "loss": 0.1095, "step": 31829 }, { "epoch": 0.5677237541468984, "grad_norm": 0.2545779049396515, "learning_rate": 2.3453042107300682e-05, "loss": 0.1396, "step": 31830 }, { "epoch": 0.567741590268612, "grad_norm": 0.25183358788490295, "learning_rate": 2.345148858544687e-05, "loss": 0.1648, "step": 31831 }, { "epoch": 0.5677594263903257, "grad_norm": 0.35085731744766235, "learning_rate": 2.3449935069595618e-05, "loss": 0.1143, "step": 31832 }, { "epoch": 0.5677772625120394, "grad_norm": 0.28099489212036133, "learning_rate": 2.344838155975296e-05, "loss": 0.1137, "step": 31833 }, { "epoch": 0.5677950986337531, "grad_norm": 0.27445825934410095, "learning_rate": 2.34468280559249e-05, "loss": 0.0834, "step": 31834 }, { "epoch": 0.5678129347554668, "grad_norm": 0.2711659073829651, "learning_rate": 2.344527455811748e-05, "loss": 0.1134, "step": 31835 }, { "epoch": 0.5678307708771805, "grad_norm": 0.34612905979156494, "learning_rate": 2.344372106633671e-05, "loss": 0.1334, "step": 31836 }, { "epoch": 0.5678486069988942, "grad_norm": 0.18164853751659393, "learning_rate": 2.3442167580588624e-05, "loss": 0.0841, "step": 31837 }, { "epoch": 0.5678664431206079, "grad_norm": 0.29229211807250977, "learning_rate": 2.3440614100879217e-05, "loss": 0.1462, "step": 31838 }, { "epoch": 0.5678842792423215, "grad_norm": 0.28702273964881897, "learning_rate": 2.3439060627214536e-05, "loss": 0.2186, "step": 31839 }, { "epoch": 0.5679021153640352, "grad_norm": 0.2673148810863495, "learning_rate": 2.3437507159600597e-05, "loss": 0.1317, "step": 31840 }, { "epoch": 0.5679199514857489, "grad_norm": 0.2983056604862213, "learning_rate": 2.3435953698043418e-05, "loss": 0.1997, "step": 31841 }, { "epoch": 0.5679377876074626, "grad_norm": 0.3157918453216553, "learning_rate": 2.343440024254903e-05, "loss": 0.1529, "step": 31842 }, { "epoch": 0.5679556237291763, "grad_norm": 0.21640507876873016, "learning_rate": 2.343284679312343e-05, "loss": 0.145, "step": 31843 }, { "epoch": 0.56797345985089, "grad_norm": 0.23252245783805847, "learning_rate": 2.3431293349772672e-05, "loss": 0.1157, "step": 31844 }, { "epoch": 0.5679912959726037, "grad_norm": 0.28265056014060974, "learning_rate": 2.3429739912502757e-05, "loss": 0.1789, "step": 31845 }, { "epoch": 0.5680091320943174, "grad_norm": 0.2644653618335724, "learning_rate": 2.342818648131972e-05, "loss": 0.139, "step": 31846 }, { "epoch": 0.5680269682160312, "grad_norm": 0.1959562748670578, "learning_rate": 2.342663305622956e-05, "loss": 0.0951, "step": 31847 }, { "epoch": 0.5680448043377448, "grad_norm": 0.24504858255386353, "learning_rate": 2.3425079637238326e-05, "loss": 0.1548, "step": 31848 }, { "epoch": 0.5680626404594585, "grad_norm": 0.21574705839157104, "learning_rate": 2.3423526224352026e-05, "loss": 0.1129, "step": 31849 }, { "epoch": 0.5680804765811722, "grad_norm": 0.2897961437702179, "learning_rate": 2.3421972817576686e-05, "loss": 0.0994, "step": 31850 }, { "epoch": 0.5680983127028859, "grad_norm": 0.23364970088005066, "learning_rate": 2.342041941691831e-05, "loss": 0.1182, "step": 31851 }, { "epoch": 0.5681161488245996, "grad_norm": 0.4226117432117462, "learning_rate": 2.3418866022382948e-05, "loss": 0.1823, "step": 31852 }, { "epoch": 0.5681339849463133, "grad_norm": 0.239666149020195, "learning_rate": 2.34173126339766e-05, "loss": 0.1228, "step": 31853 }, { "epoch": 0.568151821068027, "grad_norm": 0.3042081892490387, "learning_rate": 2.34157592517053e-05, "loss": 0.1316, "step": 31854 }, { "epoch": 0.5681696571897407, "grad_norm": 0.24170371890068054, "learning_rate": 2.3414205875575068e-05, "loss": 0.1067, "step": 31855 }, { "epoch": 0.5681874933114544, "grad_norm": 0.2811267077922821, "learning_rate": 2.3412652505591905e-05, "loss": 0.1101, "step": 31856 }, { "epoch": 0.568205329433168, "grad_norm": 0.29492050409317017, "learning_rate": 2.3411099141761864e-05, "loss": 0.1344, "step": 31857 }, { "epoch": 0.5682231655548817, "grad_norm": 0.3043202757835388, "learning_rate": 2.3409545784090952e-05, "loss": 0.1036, "step": 31858 }, { "epoch": 0.5682410016765954, "grad_norm": 0.27893373370170593, "learning_rate": 2.3407992432585183e-05, "loss": 0.1489, "step": 31859 }, { "epoch": 0.5682588377983091, "grad_norm": 0.23858894407749176, "learning_rate": 2.3406439087250584e-05, "loss": 0.1242, "step": 31860 }, { "epoch": 0.5682766739200228, "grad_norm": 0.31661051511764526, "learning_rate": 2.340488574809318e-05, "loss": 0.1698, "step": 31861 }, { "epoch": 0.5682945100417365, "grad_norm": 0.20748351514339447, "learning_rate": 2.3403332415118993e-05, "loss": 0.0778, "step": 31862 }, { "epoch": 0.5683123461634502, "grad_norm": 0.24186772108078003, "learning_rate": 2.3401779088334043e-05, "loss": 0.1236, "step": 31863 }, { "epoch": 0.568330182285164, "grad_norm": 0.3661872446537018, "learning_rate": 2.3400225767744336e-05, "loss": 0.1579, "step": 31864 }, { "epoch": 0.5683480184068777, "grad_norm": 0.3736518919467926, "learning_rate": 2.339867245335592e-05, "loss": 0.1422, "step": 31865 }, { "epoch": 0.5683658545285913, "grad_norm": 0.21076396107673645, "learning_rate": 2.33971191451748e-05, "loss": 0.0995, "step": 31866 }, { "epoch": 0.568383690650305, "grad_norm": 0.38257306814193726, "learning_rate": 2.3395565843206995e-05, "loss": 0.1414, "step": 31867 }, { "epoch": 0.5684015267720187, "grad_norm": 0.2380020171403885, "learning_rate": 2.339401254745854e-05, "loss": 0.1093, "step": 31868 }, { "epoch": 0.5684193628937324, "grad_norm": 0.3140031397342682, "learning_rate": 2.3392459257935432e-05, "loss": 0.1513, "step": 31869 }, { "epoch": 0.5684371990154461, "grad_norm": 0.3794325590133667, "learning_rate": 2.3390905974643716e-05, "loss": 0.2061, "step": 31870 }, { "epoch": 0.5684550351371598, "grad_norm": 0.3635885417461395, "learning_rate": 2.338935269758941e-05, "loss": 0.1426, "step": 31871 }, { "epoch": 0.5684728712588735, "grad_norm": 0.39303216338157654, "learning_rate": 2.338779942677852e-05, "loss": 0.167, "step": 31872 }, { "epoch": 0.5684907073805872, "grad_norm": 0.20632973313331604, "learning_rate": 2.3386246162217078e-05, "loss": 0.15, "step": 31873 }, { "epoch": 0.5685085435023008, "grad_norm": 0.22460660338401794, "learning_rate": 2.3384692903911103e-05, "loss": 0.1138, "step": 31874 }, { "epoch": 0.5685263796240145, "grad_norm": 0.21528342366218567, "learning_rate": 2.3383139651866622e-05, "loss": 0.1034, "step": 31875 }, { "epoch": 0.5685442157457282, "grad_norm": 0.2516711950302124, "learning_rate": 2.338158640608965e-05, "loss": 0.0816, "step": 31876 }, { "epoch": 0.5685620518674419, "grad_norm": 0.2805202305316925, "learning_rate": 2.3380033166586197e-05, "loss": 0.0803, "step": 31877 }, { "epoch": 0.5685798879891556, "grad_norm": 0.2692238390445709, "learning_rate": 2.3378479933362304e-05, "loss": 0.0922, "step": 31878 }, { "epoch": 0.5685977241108693, "grad_norm": 0.2968035042285919, "learning_rate": 2.3376926706423985e-05, "loss": 0.1776, "step": 31879 }, { "epoch": 0.568615560232583, "grad_norm": 0.2722340524196625, "learning_rate": 2.3375373485777254e-05, "loss": 0.1227, "step": 31880 }, { "epoch": 0.5686333963542968, "grad_norm": 0.23171938955783844, "learning_rate": 2.337382027142814e-05, "loss": 0.1121, "step": 31881 }, { "epoch": 0.5686512324760105, "grad_norm": 0.41601529717445374, "learning_rate": 2.3372267063382648e-05, "loss": 0.1537, "step": 31882 }, { "epoch": 0.5686690685977241, "grad_norm": 0.22648778557777405, "learning_rate": 2.3370713861646826e-05, "loss": 0.1703, "step": 31883 }, { "epoch": 0.5686869047194378, "grad_norm": 0.224659726023674, "learning_rate": 2.3369160666226677e-05, "loss": 0.1118, "step": 31884 }, { "epoch": 0.5687047408411515, "grad_norm": 0.2781287133693695, "learning_rate": 2.3367607477128227e-05, "loss": 0.16, "step": 31885 }, { "epoch": 0.5687225769628652, "grad_norm": 0.29262882471084595, "learning_rate": 2.3366054294357482e-05, "loss": 0.1595, "step": 31886 }, { "epoch": 0.5687404130845789, "grad_norm": 0.22359992563724518, "learning_rate": 2.3364501117920484e-05, "loss": 0.1064, "step": 31887 }, { "epoch": 0.5687582492062926, "grad_norm": 0.24614740908145905, "learning_rate": 2.3362947947823242e-05, "loss": 0.163, "step": 31888 }, { "epoch": 0.5687760853280063, "grad_norm": 0.2586507201194763, "learning_rate": 2.3361394784071782e-05, "loss": 0.1544, "step": 31889 }, { "epoch": 0.56879392144972, "grad_norm": 0.24406969547271729, "learning_rate": 2.3359841626672113e-05, "loss": 0.1329, "step": 31890 }, { "epoch": 0.5688117575714337, "grad_norm": 0.24728265404701233, "learning_rate": 2.3358288475630274e-05, "loss": 0.1359, "step": 31891 }, { "epoch": 0.5688295936931473, "grad_norm": 0.3328385353088379, "learning_rate": 2.3356735330952275e-05, "loss": 0.1788, "step": 31892 }, { "epoch": 0.568847429814861, "grad_norm": 0.29807713627815247, "learning_rate": 2.335518219264414e-05, "loss": 0.1841, "step": 31893 }, { "epoch": 0.5688652659365747, "grad_norm": 0.22984002530574799, "learning_rate": 2.3353629060711878e-05, "loss": 0.1167, "step": 31894 }, { "epoch": 0.5688831020582884, "grad_norm": 0.3549318015575409, "learning_rate": 2.3352075935161516e-05, "loss": 0.1444, "step": 31895 }, { "epoch": 0.5689009381800021, "grad_norm": 0.24268245697021484, "learning_rate": 2.3350522815999086e-05, "loss": 0.1241, "step": 31896 }, { "epoch": 0.5689187743017158, "grad_norm": 0.24521784484386444, "learning_rate": 2.33489697032306e-05, "loss": 0.0854, "step": 31897 }, { "epoch": 0.5689366104234296, "grad_norm": 0.2668326497077942, "learning_rate": 2.3347416596862078e-05, "loss": 0.0893, "step": 31898 }, { "epoch": 0.5689544465451433, "grad_norm": 0.20976288616657257, "learning_rate": 2.3345863496899527e-05, "loss": 0.1322, "step": 31899 }, { "epoch": 0.568972282666857, "grad_norm": 0.2963721752166748, "learning_rate": 2.3344310403348995e-05, "loss": 0.1077, "step": 31900 }, { "epoch": 0.5689901187885706, "grad_norm": 0.2996422350406647, "learning_rate": 2.3342757316216483e-05, "loss": 0.0912, "step": 31901 }, { "epoch": 0.5690079549102843, "grad_norm": 0.2290738970041275, "learning_rate": 2.3341204235508018e-05, "loss": 0.1502, "step": 31902 }, { "epoch": 0.569025791031998, "grad_norm": 0.2685301899909973, "learning_rate": 2.333965116122961e-05, "loss": 0.1368, "step": 31903 }, { "epoch": 0.5690436271537117, "grad_norm": 0.21077746152877808, "learning_rate": 2.3338098093387294e-05, "loss": 0.1316, "step": 31904 }, { "epoch": 0.5690614632754254, "grad_norm": 0.3262901306152344, "learning_rate": 2.3336545031987087e-05, "loss": 0.0929, "step": 31905 }, { "epoch": 0.5690792993971391, "grad_norm": 0.34701892733573914, "learning_rate": 2.3334991977035006e-05, "loss": 0.1654, "step": 31906 }, { "epoch": 0.5690971355188528, "grad_norm": 0.20195747911930084, "learning_rate": 2.3333438928537066e-05, "loss": 0.1275, "step": 31907 }, { "epoch": 0.5691149716405665, "grad_norm": 0.16705262660980225, "learning_rate": 2.333188588649929e-05, "loss": 0.1099, "step": 31908 }, { "epoch": 0.5691328077622801, "grad_norm": 0.2527308166027069, "learning_rate": 2.3330332850927703e-05, "loss": 0.1506, "step": 31909 }, { "epoch": 0.5691506438839938, "grad_norm": 0.2541621923446655, "learning_rate": 2.332877982182833e-05, "loss": 0.1472, "step": 31910 }, { "epoch": 0.5691684800057075, "grad_norm": 0.2527715265750885, "learning_rate": 2.3327226799207177e-05, "loss": 0.1368, "step": 31911 }, { "epoch": 0.5691863161274212, "grad_norm": 0.3632810711860657, "learning_rate": 2.3325673783070266e-05, "loss": 0.1062, "step": 31912 }, { "epoch": 0.5692041522491349, "grad_norm": 0.27961620688438416, "learning_rate": 2.332412077342363e-05, "loss": 0.1489, "step": 31913 }, { "epoch": 0.5692219883708487, "grad_norm": 0.22823116183280945, "learning_rate": 2.332256777027328e-05, "loss": 0.1038, "step": 31914 }, { "epoch": 0.5692398244925624, "grad_norm": 0.3571709096431732, "learning_rate": 2.3321014773625234e-05, "loss": 0.1755, "step": 31915 }, { "epoch": 0.5692576606142761, "grad_norm": 0.18647009134292603, "learning_rate": 2.331946178348551e-05, "loss": 0.1023, "step": 31916 }, { "epoch": 0.5692754967359898, "grad_norm": 0.2772287428379059, "learning_rate": 2.3317908799860135e-05, "loss": 0.0915, "step": 31917 }, { "epoch": 0.5692933328577034, "grad_norm": 0.25653931498527527, "learning_rate": 2.3316355822755136e-05, "loss": 0.1312, "step": 31918 }, { "epoch": 0.5693111689794171, "grad_norm": 0.24136759340763092, "learning_rate": 2.331480285217652e-05, "loss": 0.1246, "step": 31919 }, { "epoch": 0.5693290051011308, "grad_norm": 0.22177313268184662, "learning_rate": 2.3313249888130308e-05, "loss": 0.147, "step": 31920 }, { "epoch": 0.5693468412228445, "grad_norm": 0.21226876974105835, "learning_rate": 2.331169693062251e-05, "loss": 0.0793, "step": 31921 }, { "epoch": 0.5693646773445582, "grad_norm": 0.3017657995223999, "learning_rate": 2.3310143979659167e-05, "loss": 0.1554, "step": 31922 }, { "epoch": 0.5693825134662719, "grad_norm": 0.20766964554786682, "learning_rate": 2.3308591035246292e-05, "loss": 0.0575, "step": 31923 }, { "epoch": 0.5694003495879856, "grad_norm": 0.3533168435096741, "learning_rate": 2.3307038097389906e-05, "loss": 0.1226, "step": 31924 }, { "epoch": 0.5694181857096993, "grad_norm": 0.35080260038375854, "learning_rate": 2.3305485166096013e-05, "loss": 0.1395, "step": 31925 }, { "epoch": 0.569436021831413, "grad_norm": 0.4067175090312958, "learning_rate": 2.3303932241370658e-05, "loss": 0.117, "step": 31926 }, { "epoch": 0.5694538579531266, "grad_norm": 0.20107346773147583, "learning_rate": 2.3302379323219846e-05, "loss": 0.1126, "step": 31927 }, { "epoch": 0.5694716940748403, "grad_norm": 0.21098102629184723, "learning_rate": 2.330082641164959e-05, "loss": 0.1516, "step": 31928 }, { "epoch": 0.569489530196554, "grad_norm": 0.3123009204864502, "learning_rate": 2.329927350666592e-05, "loss": 0.132, "step": 31929 }, { "epoch": 0.5695073663182677, "grad_norm": 0.26454970240592957, "learning_rate": 2.329772060827485e-05, "loss": 0.123, "step": 31930 }, { "epoch": 0.5695252024399815, "grad_norm": 0.308457612991333, "learning_rate": 2.329616771648241e-05, "loss": 0.1505, "step": 31931 }, { "epoch": 0.5695430385616952, "grad_norm": 0.22915974259376526, "learning_rate": 2.3294614831294615e-05, "loss": 0.0846, "step": 31932 }, { "epoch": 0.5695608746834089, "grad_norm": 0.2821044921875, "learning_rate": 2.329306195271748e-05, "loss": 0.1022, "step": 31933 }, { "epoch": 0.5695787108051226, "grad_norm": 0.2712126672267914, "learning_rate": 2.3291509080757016e-05, "loss": 0.1705, "step": 31934 }, { "epoch": 0.5695965469268363, "grad_norm": 0.2786289155483246, "learning_rate": 2.328995621541926e-05, "loss": 0.1308, "step": 31935 }, { "epoch": 0.5696143830485499, "grad_norm": 0.22536014020442963, "learning_rate": 2.3288403356710227e-05, "loss": 0.1025, "step": 31936 }, { "epoch": 0.5696322191702636, "grad_norm": 0.22865964472293854, "learning_rate": 2.3286850504635933e-05, "loss": 0.1401, "step": 31937 }, { "epoch": 0.5696500552919773, "grad_norm": 0.22847989201545715, "learning_rate": 2.328529765920239e-05, "loss": 0.1056, "step": 31938 }, { "epoch": 0.569667891413691, "grad_norm": 0.24454420804977417, "learning_rate": 2.3283744820415634e-05, "loss": 0.1302, "step": 31939 }, { "epoch": 0.5696857275354047, "grad_norm": 0.2526075541973114, "learning_rate": 2.3282191988281676e-05, "loss": 0.0834, "step": 31940 }, { "epoch": 0.5697035636571184, "grad_norm": 0.38525134325027466, "learning_rate": 2.3280639162806538e-05, "loss": 0.1491, "step": 31941 }, { "epoch": 0.5697213997788321, "grad_norm": 0.48143163323402405, "learning_rate": 2.3279086343996225e-05, "loss": 0.1437, "step": 31942 }, { "epoch": 0.5697392359005458, "grad_norm": 0.31480303406715393, "learning_rate": 2.3277533531856777e-05, "loss": 0.1154, "step": 31943 }, { "epoch": 0.5697570720222594, "grad_norm": 0.320762038230896, "learning_rate": 2.32759807263942e-05, "loss": 0.2208, "step": 31944 }, { "epoch": 0.5697749081439731, "grad_norm": 0.25504228472709656, "learning_rate": 2.3274427927614518e-05, "loss": 0.1716, "step": 31945 }, { "epoch": 0.5697927442656868, "grad_norm": 0.1853378564119339, "learning_rate": 2.3272875135523754e-05, "loss": 0.0676, "step": 31946 }, { "epoch": 0.5698105803874005, "grad_norm": 0.23926015198230743, "learning_rate": 2.327132235012791e-05, "loss": 0.1285, "step": 31947 }, { "epoch": 0.5698284165091143, "grad_norm": 0.2526625692844391, "learning_rate": 2.3269769571433028e-05, "loss": 0.1028, "step": 31948 }, { "epoch": 0.569846252630828, "grad_norm": 0.32967737317085266, "learning_rate": 2.3268216799445114e-05, "loss": 0.1818, "step": 31949 }, { "epoch": 0.5698640887525417, "grad_norm": 0.2688600420951843, "learning_rate": 2.3266664034170194e-05, "loss": 0.1025, "step": 31950 }, { "epoch": 0.5698819248742554, "grad_norm": 0.3574877083301544, "learning_rate": 2.326511127561427e-05, "loss": 0.1226, "step": 31951 }, { "epoch": 0.5698997609959691, "grad_norm": 0.2183462381362915, "learning_rate": 2.3263558523783386e-05, "loss": 0.1615, "step": 31952 }, { "epoch": 0.5699175971176828, "grad_norm": 0.24285347759723663, "learning_rate": 2.326200577868355e-05, "loss": 0.132, "step": 31953 }, { "epoch": 0.5699354332393964, "grad_norm": 0.20640622079372406, "learning_rate": 2.3260453040320778e-05, "loss": 0.0971, "step": 31954 }, { "epoch": 0.5699532693611101, "grad_norm": 0.2130829393863678, "learning_rate": 2.325890030870108e-05, "loss": 0.1391, "step": 31955 }, { "epoch": 0.5699711054828238, "grad_norm": 0.277995228767395, "learning_rate": 2.32573475838305e-05, "loss": 0.1785, "step": 31956 }, { "epoch": 0.5699889416045375, "grad_norm": 0.2410154789686203, "learning_rate": 2.3255794865715037e-05, "loss": 0.1381, "step": 31957 }, { "epoch": 0.5700067777262512, "grad_norm": 0.21570318937301636, "learning_rate": 2.3254242154360716e-05, "loss": 0.1228, "step": 31958 }, { "epoch": 0.5700246138479649, "grad_norm": 0.3246050775051117, "learning_rate": 2.3252689449773558e-05, "loss": 0.1417, "step": 31959 }, { "epoch": 0.5700424499696786, "grad_norm": 0.20825032889842987, "learning_rate": 2.325113675195957e-05, "loss": 0.0807, "step": 31960 }, { "epoch": 0.5700602860913923, "grad_norm": 0.273908406496048, "learning_rate": 2.324958406092479e-05, "loss": 0.1563, "step": 31961 }, { "epoch": 0.5700781222131059, "grad_norm": 0.31246012449264526, "learning_rate": 2.3248031376675226e-05, "loss": 0.1729, "step": 31962 }, { "epoch": 0.5700959583348196, "grad_norm": 0.2307450920343399, "learning_rate": 2.324647869921689e-05, "loss": 0.0948, "step": 31963 }, { "epoch": 0.5701137944565333, "grad_norm": 0.234095498919487, "learning_rate": 2.3244926028555815e-05, "loss": 0.1093, "step": 31964 }, { "epoch": 0.5701316305782471, "grad_norm": 0.2592236399650574, "learning_rate": 2.3243373364698006e-05, "loss": 0.1204, "step": 31965 }, { "epoch": 0.5701494666999608, "grad_norm": 0.25265398621559143, "learning_rate": 2.3241820707649495e-05, "loss": 0.146, "step": 31966 }, { "epoch": 0.5701673028216745, "grad_norm": 0.3033604919910431, "learning_rate": 2.32402680574163e-05, "loss": 0.1488, "step": 31967 }, { "epoch": 0.5701851389433882, "grad_norm": 0.3133663237094879, "learning_rate": 2.3238715414004415e-05, "loss": 0.1684, "step": 31968 }, { "epoch": 0.5702029750651019, "grad_norm": 0.3297819197177887, "learning_rate": 2.323716277741989e-05, "loss": 0.1415, "step": 31969 }, { "epoch": 0.5702208111868156, "grad_norm": 0.2560020387172699, "learning_rate": 2.3235610147668735e-05, "loss": 0.0864, "step": 31970 }, { "epoch": 0.5702386473085292, "grad_norm": 0.23315803706645966, "learning_rate": 2.3234057524756956e-05, "loss": 0.1196, "step": 31971 }, { "epoch": 0.5702564834302429, "grad_norm": 0.32611286640167236, "learning_rate": 2.3232504908690585e-05, "loss": 0.151, "step": 31972 }, { "epoch": 0.5702743195519566, "grad_norm": 0.24620841443538666, "learning_rate": 2.3230952299475628e-05, "loss": 0.0744, "step": 31973 }, { "epoch": 0.5702921556736703, "grad_norm": 0.24318484961986542, "learning_rate": 2.3229399697118116e-05, "loss": 0.1477, "step": 31974 }, { "epoch": 0.570309991795384, "grad_norm": 0.24427008628845215, "learning_rate": 2.322784710162407e-05, "loss": 0.1613, "step": 31975 }, { "epoch": 0.5703278279170977, "grad_norm": 0.23330405354499817, "learning_rate": 2.3226294512999496e-05, "loss": 0.1493, "step": 31976 }, { "epoch": 0.5703456640388114, "grad_norm": 0.252483069896698, "learning_rate": 2.3224741931250404e-05, "loss": 0.1353, "step": 31977 }, { "epoch": 0.5703635001605251, "grad_norm": 0.3242998421192169, "learning_rate": 2.3223189356382836e-05, "loss": 0.1127, "step": 31978 }, { "epoch": 0.5703813362822387, "grad_norm": 0.2655237913131714, "learning_rate": 2.32216367884028e-05, "loss": 0.1298, "step": 31979 }, { "epoch": 0.5703991724039524, "grad_norm": 0.27564719319343567, "learning_rate": 2.322008422731632e-05, "loss": 0.1236, "step": 31980 }, { "epoch": 0.5704170085256661, "grad_norm": 0.29023295640945435, "learning_rate": 2.321853167312939e-05, "loss": 0.1997, "step": 31981 }, { "epoch": 0.5704348446473799, "grad_norm": 0.2064339965581894, "learning_rate": 2.3216979125848067e-05, "loss": 0.0732, "step": 31982 }, { "epoch": 0.5704526807690936, "grad_norm": 0.3277190625667572, "learning_rate": 2.321542658547834e-05, "loss": 0.1234, "step": 31983 }, { "epoch": 0.5704705168908073, "grad_norm": 0.2326844185590744, "learning_rate": 2.3213874052026234e-05, "loss": 0.1093, "step": 31984 }, { "epoch": 0.570488353012521, "grad_norm": 0.31463688611984253, "learning_rate": 2.3212321525497776e-05, "loss": 0.146, "step": 31985 }, { "epoch": 0.5705061891342347, "grad_norm": 0.217011496424675, "learning_rate": 2.3210769005898964e-05, "loss": 0.087, "step": 31986 }, { "epoch": 0.5705240252559484, "grad_norm": 0.36521267890930176, "learning_rate": 2.3209216493235842e-05, "loss": 0.1446, "step": 31987 }, { "epoch": 0.570541861377662, "grad_norm": 0.22679482400417328, "learning_rate": 2.3207663987514412e-05, "loss": 0.1876, "step": 31988 }, { "epoch": 0.5705596974993757, "grad_norm": 0.24948854744434357, "learning_rate": 2.3206111488740702e-05, "loss": 0.1131, "step": 31989 }, { "epoch": 0.5705775336210894, "grad_norm": 0.2821401059627533, "learning_rate": 2.3204558996920706e-05, "loss": 0.1105, "step": 31990 }, { "epoch": 0.5705953697428031, "grad_norm": 0.27452602982521057, "learning_rate": 2.320300651206047e-05, "loss": 0.1048, "step": 31991 }, { "epoch": 0.5706132058645168, "grad_norm": 0.2265673726797104, "learning_rate": 2.3201454034166e-05, "loss": 0.1382, "step": 31992 }, { "epoch": 0.5706310419862305, "grad_norm": 0.18851260840892792, "learning_rate": 2.319990156324332e-05, "loss": 0.1029, "step": 31993 }, { "epoch": 0.5706488781079442, "grad_norm": 0.3642235994338989, "learning_rate": 2.3198349099298433e-05, "loss": 0.1312, "step": 31994 }, { "epoch": 0.5706667142296579, "grad_norm": 0.31986698508262634, "learning_rate": 2.3196796642337376e-05, "loss": 0.1692, "step": 31995 }, { "epoch": 0.5706845503513716, "grad_norm": 0.3594389855861664, "learning_rate": 2.3195244192366158e-05, "loss": 0.2242, "step": 31996 }, { "epoch": 0.5707023864730852, "grad_norm": 0.3043067157268524, "learning_rate": 2.31936917493908e-05, "loss": 0.1267, "step": 31997 }, { "epoch": 0.5707202225947989, "grad_norm": 0.23148998618125916, "learning_rate": 2.3192139313417306e-05, "loss": 0.1303, "step": 31998 }, { "epoch": 0.5707380587165127, "grad_norm": 0.22920602560043335, "learning_rate": 2.3190586884451705e-05, "loss": 0.1424, "step": 31999 }, { "epoch": 0.5707558948382264, "grad_norm": 0.27284783124923706, "learning_rate": 2.318903446250002e-05, "loss": 0.1461, "step": 32000 }, { "epoch": 0.5707558948382264, "eval_loss": 0.1288762390613556, "eval_runtime": 107.6868, "eval_samples_per_second": 9.509, "eval_steps_per_second": 1.588, "step": 32000 }, { "epoch": 0.5707737309599401, "grad_norm": 0.24406161904335022, "learning_rate": 2.3187482047568264e-05, "loss": 0.1575, "step": 32001 }, { "epoch": 0.5707915670816538, "grad_norm": 0.2429829239845276, "learning_rate": 2.3185929639662457e-05, "loss": 0.0983, "step": 32002 }, { "epoch": 0.5708094032033675, "grad_norm": 0.29681476950645447, "learning_rate": 2.3184377238788598e-05, "loss": 0.1032, "step": 32003 }, { "epoch": 0.5708272393250812, "grad_norm": 0.33057114481925964, "learning_rate": 2.3182824844952733e-05, "loss": 0.1621, "step": 32004 }, { "epoch": 0.5708450754467949, "grad_norm": 0.2808226943016052, "learning_rate": 2.3181272458160864e-05, "loss": 0.1406, "step": 32005 }, { "epoch": 0.5708629115685085, "grad_norm": 0.2762940526008606, "learning_rate": 2.317972007841901e-05, "loss": 0.1391, "step": 32006 }, { "epoch": 0.5708807476902222, "grad_norm": 0.2615235447883606, "learning_rate": 2.3178167705733185e-05, "loss": 0.1292, "step": 32007 }, { "epoch": 0.5708985838119359, "grad_norm": 0.26972535252571106, "learning_rate": 2.3176615340109416e-05, "loss": 0.103, "step": 32008 }, { "epoch": 0.5709164199336496, "grad_norm": 0.26495981216430664, "learning_rate": 2.3175062981553723e-05, "loss": 0.1524, "step": 32009 }, { "epoch": 0.5709342560553633, "grad_norm": 0.22156774997711182, "learning_rate": 2.317351063007211e-05, "loss": 0.1419, "step": 32010 }, { "epoch": 0.570952092177077, "grad_norm": 0.33964696526527405, "learning_rate": 2.31719582856706e-05, "loss": 0.1563, "step": 32011 }, { "epoch": 0.5709699282987907, "grad_norm": 0.2056892216205597, "learning_rate": 2.3170405948355206e-05, "loss": 0.0784, "step": 32012 }, { "epoch": 0.5709877644205044, "grad_norm": 0.20111960172653198, "learning_rate": 2.3168853618131952e-05, "loss": 0.1085, "step": 32013 }, { "epoch": 0.571005600542218, "grad_norm": 0.24330471456050873, "learning_rate": 2.316730129500686e-05, "loss": 0.1232, "step": 32014 }, { "epoch": 0.5710234366639318, "grad_norm": 0.2663096487522125, "learning_rate": 2.3165748978985944e-05, "loss": 0.1221, "step": 32015 }, { "epoch": 0.5710412727856455, "grad_norm": 0.2402796596288681, "learning_rate": 2.31641966700752e-05, "loss": 0.0941, "step": 32016 }, { "epoch": 0.5710591089073592, "grad_norm": 0.2702605128288269, "learning_rate": 2.3162644368280684e-05, "loss": 0.1172, "step": 32017 }, { "epoch": 0.5710769450290729, "grad_norm": 0.2644036114215851, "learning_rate": 2.316109207360839e-05, "loss": 0.1294, "step": 32018 }, { "epoch": 0.5710947811507866, "grad_norm": 0.2989738881587982, "learning_rate": 2.315953978606433e-05, "loss": 0.1195, "step": 32019 }, { "epoch": 0.5711126172725003, "grad_norm": 0.1917884647846222, "learning_rate": 2.315798750565453e-05, "loss": 0.0947, "step": 32020 }, { "epoch": 0.571130453394214, "grad_norm": 0.27884235978126526, "learning_rate": 2.3156435232385007e-05, "loss": 0.1653, "step": 32021 }, { "epoch": 0.5711482895159277, "grad_norm": 0.2796356976032257, "learning_rate": 2.3154882966261784e-05, "loss": 0.0953, "step": 32022 }, { "epoch": 0.5711661256376414, "grad_norm": 0.32896968722343445, "learning_rate": 2.315333070729087e-05, "loss": 0.1703, "step": 32023 }, { "epoch": 0.571183961759355, "grad_norm": 0.24676883220672607, "learning_rate": 2.3151778455478287e-05, "loss": 0.1382, "step": 32024 }, { "epoch": 0.5712017978810687, "grad_norm": 0.23007610440254211, "learning_rate": 2.315022621083004e-05, "loss": 0.0919, "step": 32025 }, { "epoch": 0.5712196340027824, "grad_norm": 0.25032544136047363, "learning_rate": 2.3148673973352157e-05, "loss": 0.1297, "step": 32026 }, { "epoch": 0.5712374701244961, "grad_norm": 0.3636959493160248, "learning_rate": 2.314712174305066e-05, "loss": 0.1164, "step": 32027 }, { "epoch": 0.5712553062462098, "grad_norm": 0.272922545671463, "learning_rate": 2.3145569519931557e-05, "loss": 0.1306, "step": 32028 }, { "epoch": 0.5712731423679235, "grad_norm": 0.2876552641391754, "learning_rate": 2.3144017304000857e-05, "loss": 0.1617, "step": 32029 }, { "epoch": 0.5712909784896372, "grad_norm": 0.4710802137851715, "learning_rate": 2.3142465095264598e-05, "loss": 0.1255, "step": 32030 }, { "epoch": 0.5713088146113509, "grad_norm": 0.2886894643306732, "learning_rate": 2.3140912893728786e-05, "loss": 0.0996, "step": 32031 }, { "epoch": 0.5713266507330647, "grad_norm": 0.27970659732818604, "learning_rate": 2.3139360699399433e-05, "loss": 0.1217, "step": 32032 }, { "epoch": 0.5713444868547783, "grad_norm": 0.25713902711868286, "learning_rate": 2.3137808512282558e-05, "loss": 0.1298, "step": 32033 }, { "epoch": 0.571362322976492, "grad_norm": 0.24366679787635803, "learning_rate": 2.3136256332384182e-05, "loss": 0.1432, "step": 32034 }, { "epoch": 0.5713801590982057, "grad_norm": 0.3444467782974243, "learning_rate": 2.3134704159710327e-05, "loss": 0.1781, "step": 32035 }, { "epoch": 0.5713979952199194, "grad_norm": 0.2424277663230896, "learning_rate": 2.3133151994267003e-05, "loss": 0.119, "step": 32036 }, { "epoch": 0.5714158313416331, "grad_norm": 0.42974215745925903, "learning_rate": 2.3131599836060226e-05, "loss": 0.1411, "step": 32037 }, { "epoch": 0.5714336674633468, "grad_norm": 0.21793238818645477, "learning_rate": 2.3130047685095998e-05, "loss": 0.1341, "step": 32038 }, { "epoch": 0.5714515035850605, "grad_norm": 0.23208774626255035, "learning_rate": 2.312849554138037e-05, "loss": 0.1417, "step": 32039 }, { "epoch": 0.5714693397067742, "grad_norm": 0.29587915539741516, "learning_rate": 2.3126943404919328e-05, "loss": 0.0714, "step": 32040 }, { "epoch": 0.5714871758284878, "grad_norm": 0.5470567941665649, "learning_rate": 2.3125391275718907e-05, "loss": 0.1491, "step": 32041 }, { "epoch": 0.5715050119502015, "grad_norm": 0.25324326753616333, "learning_rate": 2.3123839153785107e-05, "loss": 0.1425, "step": 32042 }, { "epoch": 0.5715228480719152, "grad_norm": 0.3722260296344757, "learning_rate": 2.3122287039123966e-05, "loss": 0.1209, "step": 32043 }, { "epoch": 0.5715406841936289, "grad_norm": 0.2965531051158905, "learning_rate": 2.3120734931741487e-05, "loss": 0.1132, "step": 32044 }, { "epoch": 0.5715585203153426, "grad_norm": 0.2074170708656311, "learning_rate": 2.311918283164369e-05, "loss": 0.1377, "step": 32045 }, { "epoch": 0.5715763564370563, "grad_norm": 0.24770928919315338, "learning_rate": 2.311763073883658e-05, "loss": 0.1758, "step": 32046 }, { "epoch": 0.57159419255877, "grad_norm": 0.35495057702064514, "learning_rate": 2.3116078653326192e-05, "loss": 0.174, "step": 32047 }, { "epoch": 0.5716120286804837, "grad_norm": 0.2525014877319336, "learning_rate": 2.3114526575118527e-05, "loss": 0.1753, "step": 32048 }, { "epoch": 0.5716298648021975, "grad_norm": 0.20264236629009247, "learning_rate": 2.3112974504219617e-05, "loss": 0.1315, "step": 32049 }, { "epoch": 0.5716477009239112, "grad_norm": 0.3439137041568756, "learning_rate": 2.3111422440635457e-05, "loss": 0.1561, "step": 32050 }, { "epoch": 0.5716655370456248, "grad_norm": 0.24801819026470184, "learning_rate": 2.310987038437209e-05, "loss": 0.1004, "step": 32051 }, { "epoch": 0.5716833731673385, "grad_norm": 0.28539639711380005, "learning_rate": 2.3108318335435513e-05, "loss": 0.1909, "step": 32052 }, { "epoch": 0.5717012092890522, "grad_norm": 0.27709439396858215, "learning_rate": 2.3106766293831752e-05, "loss": 0.1241, "step": 32053 }, { "epoch": 0.5717190454107659, "grad_norm": 0.3075057566165924, "learning_rate": 2.310521425956681e-05, "loss": 0.1156, "step": 32054 }, { "epoch": 0.5717368815324796, "grad_norm": 0.5115877985954285, "learning_rate": 2.310366223264671e-05, "loss": 0.1382, "step": 32055 }, { "epoch": 0.5717547176541933, "grad_norm": 0.2903379499912262, "learning_rate": 2.3102110213077478e-05, "loss": 0.1106, "step": 32056 }, { "epoch": 0.571772553775907, "grad_norm": 0.24777671694755554, "learning_rate": 2.3100558200865124e-05, "loss": 0.0839, "step": 32057 }, { "epoch": 0.5717903898976207, "grad_norm": 0.20852568745613098, "learning_rate": 2.3099006196015662e-05, "loss": 0.1269, "step": 32058 }, { "epoch": 0.5718082260193343, "grad_norm": 0.31661632657051086, "learning_rate": 2.3097454198535098e-05, "loss": 0.1835, "step": 32059 }, { "epoch": 0.571826062141048, "grad_norm": 0.24231091141700745, "learning_rate": 2.3095902208429468e-05, "loss": 0.0581, "step": 32060 }, { "epoch": 0.5718438982627617, "grad_norm": 0.2693176567554474, "learning_rate": 2.3094350225704773e-05, "loss": 0.1361, "step": 32061 }, { "epoch": 0.5718617343844754, "grad_norm": 0.20122888684272766, "learning_rate": 2.3092798250367044e-05, "loss": 0.0929, "step": 32062 }, { "epoch": 0.5718795705061891, "grad_norm": 0.3333703577518463, "learning_rate": 2.3091246282422275e-05, "loss": 0.1147, "step": 32063 }, { "epoch": 0.5718974066279028, "grad_norm": 0.28907883167266846, "learning_rate": 2.3089694321876505e-05, "loss": 0.1281, "step": 32064 }, { "epoch": 0.5719152427496165, "grad_norm": 0.45795542001724243, "learning_rate": 2.3088142368735738e-05, "loss": 0.1485, "step": 32065 }, { "epoch": 0.5719330788713303, "grad_norm": 0.2489219754934311, "learning_rate": 2.3086590423005993e-05, "loss": 0.1095, "step": 32066 }, { "epoch": 0.571950914993044, "grad_norm": 0.2608923316001892, "learning_rate": 2.3085038484693276e-05, "loss": 0.1487, "step": 32067 }, { "epoch": 0.5719687511147576, "grad_norm": 0.2482975870370865, "learning_rate": 2.3083486553803617e-05, "loss": 0.1135, "step": 32068 }, { "epoch": 0.5719865872364713, "grad_norm": 0.22487591207027435, "learning_rate": 2.308193463034302e-05, "loss": 0.124, "step": 32069 }, { "epoch": 0.572004423358185, "grad_norm": 0.22936218976974487, "learning_rate": 2.308038271431752e-05, "loss": 0.1364, "step": 32070 }, { "epoch": 0.5720222594798987, "grad_norm": 0.28268882632255554, "learning_rate": 2.3078830805733114e-05, "loss": 0.165, "step": 32071 }, { "epoch": 0.5720400956016124, "grad_norm": 0.3238881528377533, "learning_rate": 2.3077278904595815e-05, "loss": 0.1552, "step": 32072 }, { "epoch": 0.5720579317233261, "grad_norm": 0.3021112382411957, "learning_rate": 2.3075727010911655e-05, "loss": 0.092, "step": 32073 }, { "epoch": 0.5720757678450398, "grad_norm": 0.31802070140838623, "learning_rate": 2.3074175124686643e-05, "loss": 0.1321, "step": 32074 }, { "epoch": 0.5720936039667535, "grad_norm": 0.27723920345306396, "learning_rate": 2.307262324592679e-05, "loss": 0.1191, "step": 32075 }, { "epoch": 0.5721114400884671, "grad_norm": 0.31892484426498413, "learning_rate": 2.3071071374638106e-05, "loss": 0.1319, "step": 32076 }, { "epoch": 0.5721292762101808, "grad_norm": 0.26982811093330383, "learning_rate": 2.306951951082663e-05, "loss": 0.1498, "step": 32077 }, { "epoch": 0.5721471123318945, "grad_norm": 0.20521079003810883, "learning_rate": 2.306796765449836e-05, "loss": 0.1022, "step": 32078 }, { "epoch": 0.5721649484536082, "grad_norm": 0.27309471368789673, "learning_rate": 2.3066415805659316e-05, "loss": 0.1624, "step": 32079 }, { "epoch": 0.5721827845753219, "grad_norm": 0.23783299326896667, "learning_rate": 2.306486396431551e-05, "loss": 0.0787, "step": 32080 }, { "epoch": 0.5722006206970356, "grad_norm": 0.34868064522743225, "learning_rate": 2.3063312130472953e-05, "loss": 0.1518, "step": 32081 }, { "epoch": 0.5722184568187493, "grad_norm": 0.23732613027095795, "learning_rate": 2.306176030413767e-05, "loss": 0.1222, "step": 32082 }, { "epoch": 0.5722362929404631, "grad_norm": 0.3081076145172119, "learning_rate": 2.306020848531568e-05, "loss": 0.1439, "step": 32083 }, { "epoch": 0.5722541290621768, "grad_norm": 0.18338099122047424, "learning_rate": 2.305865667401299e-05, "loss": 0.1427, "step": 32084 }, { "epoch": 0.5722719651838905, "grad_norm": 0.24480365216732025, "learning_rate": 2.3057104870235606e-05, "loss": 0.1032, "step": 32085 }, { "epoch": 0.5722898013056041, "grad_norm": 0.2501121759414673, "learning_rate": 2.305555307398957e-05, "loss": 0.1679, "step": 32086 }, { "epoch": 0.5723076374273178, "grad_norm": 0.32427743077278137, "learning_rate": 2.305400128528088e-05, "loss": 0.1343, "step": 32087 }, { "epoch": 0.5723254735490315, "grad_norm": 0.33027711510658264, "learning_rate": 2.3052449504115545e-05, "loss": 0.1109, "step": 32088 }, { "epoch": 0.5723433096707452, "grad_norm": 0.20214340090751648, "learning_rate": 2.305089773049959e-05, "loss": 0.1018, "step": 32089 }, { "epoch": 0.5723611457924589, "grad_norm": 0.25955909490585327, "learning_rate": 2.3049345964439028e-05, "loss": 0.0906, "step": 32090 }, { "epoch": 0.5723789819141726, "grad_norm": 0.24333368241786957, "learning_rate": 2.3047794205939883e-05, "loss": 0.1298, "step": 32091 }, { "epoch": 0.5723968180358863, "grad_norm": 0.19318167865276337, "learning_rate": 2.304624245500816e-05, "loss": 0.1212, "step": 32092 }, { "epoch": 0.5724146541576, "grad_norm": 0.24397949874401093, "learning_rate": 2.3044690711649875e-05, "loss": 0.1529, "step": 32093 }, { "epoch": 0.5724324902793136, "grad_norm": 0.24525824189186096, "learning_rate": 2.3043138975871033e-05, "loss": 0.1114, "step": 32094 }, { "epoch": 0.5724503264010273, "grad_norm": 0.2584737539291382, "learning_rate": 2.3041587247677673e-05, "loss": 0.0793, "step": 32095 }, { "epoch": 0.572468162522741, "grad_norm": 0.2366800457239151, "learning_rate": 2.3040035527075794e-05, "loss": 0.1644, "step": 32096 }, { "epoch": 0.5724859986444547, "grad_norm": 0.23789042234420776, "learning_rate": 2.3038483814071416e-05, "loss": 0.1215, "step": 32097 }, { "epoch": 0.5725038347661684, "grad_norm": 0.2214551717042923, "learning_rate": 2.3036932108670543e-05, "loss": 0.1402, "step": 32098 }, { "epoch": 0.5725216708878821, "grad_norm": 0.280853271484375, "learning_rate": 2.3035380410879208e-05, "loss": 0.1227, "step": 32099 }, { "epoch": 0.5725395070095959, "grad_norm": 0.3992501497268677, "learning_rate": 2.3033828720703417e-05, "loss": 0.0877, "step": 32100 }, { "epoch": 0.5725573431313096, "grad_norm": 0.24538998305797577, "learning_rate": 2.3032277038149185e-05, "loss": 0.1155, "step": 32101 }, { "epoch": 0.5725751792530233, "grad_norm": 0.27168336510658264, "learning_rate": 2.3030725363222518e-05, "loss": 0.0808, "step": 32102 }, { "epoch": 0.572593015374737, "grad_norm": 0.33248263597488403, "learning_rate": 2.3029173695929445e-05, "loss": 0.1536, "step": 32103 }, { "epoch": 0.5726108514964506, "grad_norm": 0.2076752632856369, "learning_rate": 2.302762203627598e-05, "loss": 0.1611, "step": 32104 }, { "epoch": 0.5726286876181643, "grad_norm": 0.21981672942638397, "learning_rate": 2.3026070384268132e-05, "loss": 0.1324, "step": 32105 }, { "epoch": 0.572646523739878, "grad_norm": 0.3746435046195984, "learning_rate": 2.302451873991192e-05, "loss": 0.0712, "step": 32106 }, { "epoch": 0.5726643598615917, "grad_norm": 0.23974384367465973, "learning_rate": 2.3022967103213346e-05, "loss": 0.1133, "step": 32107 }, { "epoch": 0.5726821959833054, "grad_norm": 0.3026624023914337, "learning_rate": 2.302141547417844e-05, "loss": 0.1432, "step": 32108 }, { "epoch": 0.5727000321050191, "grad_norm": 0.22384096682071686, "learning_rate": 2.3019863852813207e-05, "loss": 0.1138, "step": 32109 }, { "epoch": 0.5727178682267328, "grad_norm": 0.25508683919906616, "learning_rate": 2.3018312239123675e-05, "loss": 0.117, "step": 32110 }, { "epoch": 0.5727357043484465, "grad_norm": 0.694191575050354, "learning_rate": 2.3016760633115834e-05, "loss": 0.2424, "step": 32111 }, { "epoch": 0.5727535404701601, "grad_norm": 0.3212147653102875, "learning_rate": 2.3015209034795725e-05, "loss": 0.1275, "step": 32112 }, { "epoch": 0.5727713765918738, "grad_norm": 0.22423692047595978, "learning_rate": 2.301365744416935e-05, "loss": 0.1405, "step": 32113 }, { "epoch": 0.5727892127135875, "grad_norm": 0.2901543378829956, "learning_rate": 2.301210586124273e-05, "loss": 0.2205, "step": 32114 }, { "epoch": 0.5728070488353012, "grad_norm": 0.23535673320293427, "learning_rate": 2.301055428602186e-05, "loss": 0.1353, "step": 32115 }, { "epoch": 0.572824884957015, "grad_norm": 0.2565169930458069, "learning_rate": 2.300900271851278e-05, "loss": 0.1001, "step": 32116 }, { "epoch": 0.5728427210787287, "grad_norm": 0.23519715666770935, "learning_rate": 2.3007451158721488e-05, "loss": 0.0915, "step": 32117 }, { "epoch": 0.5728605572004424, "grad_norm": 0.20799244940280914, "learning_rate": 2.300589960665401e-05, "loss": 0.1578, "step": 32118 }, { "epoch": 0.5728783933221561, "grad_norm": 0.23612584173679352, "learning_rate": 2.300434806231635e-05, "loss": 0.1592, "step": 32119 }, { "epoch": 0.5728962294438698, "grad_norm": 0.27974092960357666, "learning_rate": 2.3002796525714517e-05, "loss": 0.1455, "step": 32120 }, { "epoch": 0.5729140655655834, "grad_norm": 0.20070381462574005, "learning_rate": 2.300124499685455e-05, "loss": 0.1346, "step": 32121 }, { "epoch": 0.5729319016872971, "grad_norm": 0.24519819021224976, "learning_rate": 2.299969347574244e-05, "loss": 0.0985, "step": 32122 }, { "epoch": 0.5729497378090108, "grad_norm": 0.260475218296051, "learning_rate": 2.299814196238421e-05, "loss": 0.1423, "step": 32123 }, { "epoch": 0.5729675739307245, "grad_norm": 0.27158546447753906, "learning_rate": 2.299659045678587e-05, "loss": 0.1368, "step": 32124 }, { "epoch": 0.5729854100524382, "grad_norm": 0.19083476066589355, "learning_rate": 2.2995038958953437e-05, "loss": 0.111, "step": 32125 }, { "epoch": 0.5730032461741519, "grad_norm": 0.2080836296081543, "learning_rate": 2.2993487468892934e-05, "loss": 0.1352, "step": 32126 }, { "epoch": 0.5730210822958656, "grad_norm": 0.23523162305355072, "learning_rate": 2.2991935986610365e-05, "loss": 0.1105, "step": 32127 }, { "epoch": 0.5730389184175793, "grad_norm": 0.2426353543996811, "learning_rate": 2.2990384512111735e-05, "loss": 0.1048, "step": 32128 }, { "epoch": 0.573056754539293, "grad_norm": 0.20853424072265625, "learning_rate": 2.2988833045403076e-05, "loss": 0.0951, "step": 32129 }, { "epoch": 0.5730745906610066, "grad_norm": 0.24359837174415588, "learning_rate": 2.29872815864904e-05, "loss": 0.1272, "step": 32130 }, { "epoch": 0.5730924267827203, "grad_norm": 0.25653213262557983, "learning_rate": 2.298573013537971e-05, "loss": 0.1057, "step": 32131 }, { "epoch": 0.573110262904434, "grad_norm": 0.292688250541687, "learning_rate": 2.2984178692077027e-05, "loss": 0.1614, "step": 32132 }, { "epoch": 0.5731280990261478, "grad_norm": 0.22703872621059418, "learning_rate": 2.2982627256588353e-05, "loss": 0.0945, "step": 32133 }, { "epoch": 0.5731459351478615, "grad_norm": 0.29195770621299744, "learning_rate": 2.2981075828919728e-05, "loss": 0.1472, "step": 32134 }, { "epoch": 0.5731637712695752, "grad_norm": 0.3011448383331299, "learning_rate": 2.2979524409077145e-05, "loss": 0.1406, "step": 32135 }, { "epoch": 0.5731816073912889, "grad_norm": 0.3198857009410858, "learning_rate": 2.2977972997066622e-05, "loss": 0.184, "step": 32136 }, { "epoch": 0.5731994435130026, "grad_norm": 0.21236436069011688, "learning_rate": 2.2976421592894172e-05, "loss": 0.0994, "step": 32137 }, { "epoch": 0.5732172796347162, "grad_norm": 0.2372962385416031, "learning_rate": 2.297487019656581e-05, "loss": 0.1371, "step": 32138 }, { "epoch": 0.5732351157564299, "grad_norm": 0.371499240398407, "learning_rate": 2.297331880808756e-05, "loss": 0.1349, "step": 32139 }, { "epoch": 0.5732529518781436, "grad_norm": 0.2742388844490051, "learning_rate": 2.2971767427465425e-05, "loss": 0.1859, "step": 32140 }, { "epoch": 0.5732707879998573, "grad_norm": 0.26087868213653564, "learning_rate": 2.2970216054705406e-05, "loss": 0.1091, "step": 32141 }, { "epoch": 0.573288624121571, "grad_norm": 0.2959029972553253, "learning_rate": 2.2968664689813543e-05, "loss": 0.1544, "step": 32142 }, { "epoch": 0.5733064602432847, "grad_norm": 0.26370272040367126, "learning_rate": 2.2967113332795838e-05, "loss": 0.1608, "step": 32143 }, { "epoch": 0.5733242963649984, "grad_norm": 0.36787140369415283, "learning_rate": 2.29655619836583e-05, "loss": 0.1148, "step": 32144 }, { "epoch": 0.5733421324867121, "grad_norm": 0.3151266872882843, "learning_rate": 2.2964010642406948e-05, "loss": 0.1205, "step": 32145 }, { "epoch": 0.5733599686084258, "grad_norm": 0.2222466766834259, "learning_rate": 2.2962459309047784e-05, "loss": 0.1555, "step": 32146 }, { "epoch": 0.5733778047301394, "grad_norm": 0.2472175806760788, "learning_rate": 2.2960907983586844e-05, "loss": 0.1094, "step": 32147 }, { "epoch": 0.5733956408518531, "grad_norm": 0.23866930603981018, "learning_rate": 2.295935666603013e-05, "loss": 0.1108, "step": 32148 }, { "epoch": 0.5734134769735668, "grad_norm": 0.21956735849380493, "learning_rate": 2.2957805356383654e-05, "loss": 0.1442, "step": 32149 }, { "epoch": 0.5734313130952806, "grad_norm": 0.21173205971717834, "learning_rate": 2.2956254054653415e-05, "loss": 0.102, "step": 32150 }, { "epoch": 0.5734491492169943, "grad_norm": 0.2393854707479477, "learning_rate": 2.2954702760845452e-05, "loss": 0.1332, "step": 32151 }, { "epoch": 0.573466985338708, "grad_norm": 0.22613154351711273, "learning_rate": 2.2953151474965768e-05, "loss": 0.1505, "step": 32152 }, { "epoch": 0.5734848214604217, "grad_norm": 0.311028391122818, "learning_rate": 2.2951600197020377e-05, "loss": 0.1001, "step": 32153 }, { "epoch": 0.5735026575821354, "grad_norm": 0.2462809830904007, "learning_rate": 2.295004892701528e-05, "loss": 0.1326, "step": 32154 }, { "epoch": 0.573520493703849, "grad_norm": 0.2720085680484772, "learning_rate": 2.2948497664956514e-05, "loss": 0.138, "step": 32155 }, { "epoch": 0.5735383298255627, "grad_norm": 0.32655903697013855, "learning_rate": 2.294694641085008e-05, "loss": 0.1129, "step": 32156 }, { "epoch": 0.5735561659472764, "grad_norm": 0.17895837128162384, "learning_rate": 2.294539516470199e-05, "loss": 0.1469, "step": 32157 }, { "epoch": 0.5735740020689901, "grad_norm": 0.2596011161804199, "learning_rate": 2.294384392651825e-05, "loss": 0.1651, "step": 32158 }, { "epoch": 0.5735918381907038, "grad_norm": 0.21749746799468994, "learning_rate": 2.2942292696304878e-05, "loss": 0.1599, "step": 32159 }, { "epoch": 0.5736096743124175, "grad_norm": 0.21060678362846375, "learning_rate": 2.2940741474067902e-05, "loss": 0.1095, "step": 32160 }, { "epoch": 0.5736275104341312, "grad_norm": 0.27059638500213623, "learning_rate": 2.2939190259813324e-05, "loss": 0.1457, "step": 32161 }, { "epoch": 0.5736453465558449, "grad_norm": 0.3091186583042145, "learning_rate": 2.2937639053547155e-05, "loss": 0.1242, "step": 32162 }, { "epoch": 0.5736631826775586, "grad_norm": 0.3126698136329651, "learning_rate": 2.2936087855275398e-05, "loss": 0.1473, "step": 32163 }, { "epoch": 0.5736810187992722, "grad_norm": 0.31462979316711426, "learning_rate": 2.293453666500409e-05, "loss": 0.1228, "step": 32164 }, { "epoch": 0.5736988549209859, "grad_norm": 0.362069308757782, "learning_rate": 2.2932985482739223e-05, "loss": 0.1807, "step": 32165 }, { "epoch": 0.5737166910426996, "grad_norm": 0.39632242918014526, "learning_rate": 2.2931434308486826e-05, "loss": 0.1493, "step": 32166 }, { "epoch": 0.5737345271644134, "grad_norm": 0.28127777576446533, "learning_rate": 2.292988314225289e-05, "loss": 0.1157, "step": 32167 }, { "epoch": 0.5737523632861271, "grad_norm": 0.23270058631896973, "learning_rate": 2.2928331984043454e-05, "loss": 0.1053, "step": 32168 }, { "epoch": 0.5737701994078408, "grad_norm": 0.23575817048549652, "learning_rate": 2.2926780833864523e-05, "loss": 0.1141, "step": 32169 }, { "epoch": 0.5737880355295545, "grad_norm": 0.23141250014305115, "learning_rate": 2.2925229691722102e-05, "loss": 0.1524, "step": 32170 }, { "epoch": 0.5738058716512682, "grad_norm": 0.24286416172981262, "learning_rate": 2.29236785576222e-05, "loss": 0.109, "step": 32171 }, { "epoch": 0.5738237077729819, "grad_norm": 0.22819821536540985, "learning_rate": 2.292212743157084e-05, "loss": 0.1425, "step": 32172 }, { "epoch": 0.5738415438946955, "grad_norm": 0.5288501977920532, "learning_rate": 2.2920576313574033e-05, "loss": 0.1163, "step": 32173 }, { "epoch": 0.5738593800164092, "grad_norm": 0.2385987788438797, "learning_rate": 2.2919025203637793e-05, "loss": 0.0881, "step": 32174 }, { "epoch": 0.5738772161381229, "grad_norm": 0.3185761868953705, "learning_rate": 2.291747410176813e-05, "loss": 0.1443, "step": 32175 }, { "epoch": 0.5738950522598366, "grad_norm": 0.20291152596473694, "learning_rate": 2.2915923007971046e-05, "loss": 0.1127, "step": 32176 }, { "epoch": 0.5739128883815503, "grad_norm": 0.2372930943965912, "learning_rate": 2.291437192225258e-05, "loss": 0.1167, "step": 32177 }, { "epoch": 0.573930724503264, "grad_norm": 0.2707439064979553, "learning_rate": 2.291282084461872e-05, "loss": 0.1334, "step": 32178 }, { "epoch": 0.5739485606249777, "grad_norm": 0.2781921625137329, "learning_rate": 2.291126977507549e-05, "loss": 0.1281, "step": 32179 }, { "epoch": 0.5739663967466914, "grad_norm": 0.21838612854480743, "learning_rate": 2.2909718713628888e-05, "loss": 0.1075, "step": 32180 }, { "epoch": 0.573984232868405, "grad_norm": 0.23490910232067108, "learning_rate": 2.2908167660284952e-05, "loss": 0.1509, "step": 32181 }, { "epoch": 0.5740020689901187, "grad_norm": 0.22514356672763824, "learning_rate": 2.290661661504968e-05, "loss": 0.1067, "step": 32182 }, { "epoch": 0.5740199051118324, "grad_norm": 0.25914818048477173, "learning_rate": 2.2905065577929085e-05, "loss": 0.1273, "step": 32183 }, { "epoch": 0.5740377412335462, "grad_norm": 0.25469735264778137, "learning_rate": 2.2903514548929185e-05, "loss": 0.1143, "step": 32184 }, { "epoch": 0.5740555773552599, "grad_norm": 0.23247644305229187, "learning_rate": 2.2901963528055966e-05, "loss": 0.1127, "step": 32185 }, { "epoch": 0.5740734134769736, "grad_norm": 0.3107673227787018, "learning_rate": 2.2900412515315473e-05, "loss": 0.1773, "step": 32186 }, { "epoch": 0.5740912495986873, "grad_norm": 0.27206945419311523, "learning_rate": 2.289886151071371e-05, "loss": 0.1132, "step": 32187 }, { "epoch": 0.574109085720401, "grad_norm": 0.41328123211860657, "learning_rate": 2.2897310514256687e-05, "loss": 0.1392, "step": 32188 }, { "epoch": 0.5741269218421147, "grad_norm": 0.3105325698852539, "learning_rate": 2.28957595259504e-05, "loss": 0.13, "step": 32189 }, { "epoch": 0.5741447579638284, "grad_norm": 0.22780832648277283, "learning_rate": 2.289420854580089e-05, "loss": 0.0973, "step": 32190 }, { "epoch": 0.574162594085542, "grad_norm": 0.2111360728740692, "learning_rate": 2.2892657573814153e-05, "loss": 0.0907, "step": 32191 }, { "epoch": 0.5741804302072557, "grad_norm": 0.24277439713478088, "learning_rate": 2.28911066099962e-05, "loss": 0.1429, "step": 32192 }, { "epoch": 0.5741982663289694, "grad_norm": 0.25361335277557373, "learning_rate": 2.2889555654353046e-05, "loss": 0.1214, "step": 32193 }, { "epoch": 0.5742161024506831, "grad_norm": 0.2720125615596771, "learning_rate": 2.2888004706890702e-05, "loss": 0.1039, "step": 32194 }, { "epoch": 0.5742339385723968, "grad_norm": 0.26404550671577454, "learning_rate": 2.2886453767615185e-05, "loss": 0.1523, "step": 32195 }, { "epoch": 0.5742517746941105, "grad_norm": 0.3610498905181885, "learning_rate": 2.2884902836532504e-05, "loss": 0.1361, "step": 32196 }, { "epoch": 0.5742696108158242, "grad_norm": 0.28425687551498413, "learning_rate": 2.288335191364867e-05, "loss": 0.1539, "step": 32197 }, { "epoch": 0.5742874469375379, "grad_norm": 0.23330342769622803, "learning_rate": 2.2881800998969687e-05, "loss": 0.1322, "step": 32198 }, { "epoch": 0.5743052830592515, "grad_norm": 0.2934582531452179, "learning_rate": 2.2880250092501583e-05, "loss": 0.149, "step": 32199 }, { "epoch": 0.5743231191809652, "grad_norm": 0.2155960649251938, "learning_rate": 2.287869919425036e-05, "loss": 0.1272, "step": 32200 }, { "epoch": 0.574340955302679, "grad_norm": 0.21304568648338318, "learning_rate": 2.2877148304222033e-05, "loss": 0.1113, "step": 32201 }, { "epoch": 0.5743587914243927, "grad_norm": 0.3859685957431793, "learning_rate": 2.2875597422422602e-05, "loss": 0.1098, "step": 32202 }, { "epoch": 0.5743766275461064, "grad_norm": 0.24139446020126343, "learning_rate": 2.2874046548858103e-05, "loss": 0.1257, "step": 32203 }, { "epoch": 0.5743944636678201, "grad_norm": 0.23132754862308502, "learning_rate": 2.287249568353453e-05, "loss": 0.0869, "step": 32204 }, { "epoch": 0.5744122997895338, "grad_norm": 0.23843498528003693, "learning_rate": 2.28709448264579e-05, "loss": 0.1416, "step": 32205 }, { "epoch": 0.5744301359112475, "grad_norm": 0.2811092138290405, "learning_rate": 2.2869393977634212e-05, "loss": 0.1327, "step": 32206 }, { "epoch": 0.5744479720329612, "grad_norm": 0.2144453227519989, "learning_rate": 2.28678431370695e-05, "loss": 0.0985, "step": 32207 }, { "epoch": 0.5744658081546749, "grad_norm": 0.33172857761383057, "learning_rate": 2.286629230476976e-05, "loss": 0.1485, "step": 32208 }, { "epoch": 0.5744836442763885, "grad_norm": 0.2953266203403473, "learning_rate": 2.2864741480741014e-05, "loss": 0.1626, "step": 32209 }, { "epoch": 0.5745014803981022, "grad_norm": 0.2817229926586151, "learning_rate": 2.2863190664989264e-05, "loss": 0.1411, "step": 32210 }, { "epoch": 0.5745193165198159, "grad_norm": 0.31147894263267517, "learning_rate": 2.2861639857520518e-05, "loss": 0.179, "step": 32211 }, { "epoch": 0.5745371526415296, "grad_norm": 0.3016244173049927, "learning_rate": 2.2860089058340802e-05, "loss": 0.1514, "step": 32212 }, { "epoch": 0.5745549887632433, "grad_norm": 0.3923414945602417, "learning_rate": 2.2858538267456117e-05, "loss": 0.1341, "step": 32213 }, { "epoch": 0.574572824884957, "grad_norm": 0.3515370488166809, "learning_rate": 2.2856987484872484e-05, "loss": 0.1453, "step": 32214 }, { "epoch": 0.5745906610066707, "grad_norm": 0.24974365532398224, "learning_rate": 2.2855436710595892e-05, "loss": 0.1069, "step": 32215 }, { "epoch": 0.5746084971283844, "grad_norm": 0.2985091805458069, "learning_rate": 2.2853885944632383e-05, "loss": 0.0915, "step": 32216 }, { "epoch": 0.5746263332500982, "grad_norm": 0.2817144989967346, "learning_rate": 2.285233518698795e-05, "loss": 0.1239, "step": 32217 }, { "epoch": 0.5746441693718118, "grad_norm": 0.3197835087776184, "learning_rate": 2.2850784437668613e-05, "loss": 0.1594, "step": 32218 }, { "epoch": 0.5746620054935255, "grad_norm": 0.3060835003852844, "learning_rate": 2.2849233696680362e-05, "loss": 0.1126, "step": 32219 }, { "epoch": 0.5746798416152392, "grad_norm": 0.2791560888290405, "learning_rate": 2.2847682964029236e-05, "loss": 0.185, "step": 32220 }, { "epoch": 0.5746976777369529, "grad_norm": 0.28000256419181824, "learning_rate": 2.284613223972123e-05, "loss": 0.1343, "step": 32221 }, { "epoch": 0.5747155138586666, "grad_norm": 0.25441911816596985, "learning_rate": 2.2844581523762365e-05, "loss": 0.155, "step": 32222 }, { "epoch": 0.5747333499803803, "grad_norm": 0.2840367555618286, "learning_rate": 2.2843030816158644e-05, "loss": 0.1878, "step": 32223 }, { "epoch": 0.574751186102094, "grad_norm": 0.3270607888698578, "learning_rate": 2.284148011691607e-05, "loss": 0.1382, "step": 32224 }, { "epoch": 0.5747690222238077, "grad_norm": 0.3001517653465271, "learning_rate": 2.283992942604068e-05, "loss": 0.0993, "step": 32225 }, { "epoch": 0.5747868583455213, "grad_norm": 0.37920400500297546, "learning_rate": 2.283837874353846e-05, "loss": 0.1394, "step": 32226 }, { "epoch": 0.574804694467235, "grad_norm": 0.21001584827899933, "learning_rate": 2.2836828069415435e-05, "loss": 0.128, "step": 32227 }, { "epoch": 0.5748225305889487, "grad_norm": 0.25147759914398193, "learning_rate": 2.283527740367761e-05, "loss": 0.1445, "step": 32228 }, { "epoch": 0.5748403667106624, "grad_norm": 0.26965656876564026, "learning_rate": 2.2833726746330995e-05, "loss": 0.1321, "step": 32229 }, { "epoch": 0.5748582028323761, "grad_norm": 0.24212568998336792, "learning_rate": 2.283217609738161e-05, "loss": 0.0776, "step": 32230 }, { "epoch": 0.5748760389540898, "grad_norm": 0.1760663092136383, "learning_rate": 2.2830625456835456e-05, "loss": 0.0613, "step": 32231 }, { "epoch": 0.5748938750758035, "grad_norm": 0.272725909948349, "learning_rate": 2.282907482469854e-05, "loss": 0.1953, "step": 32232 }, { "epoch": 0.5749117111975172, "grad_norm": 0.23237788677215576, "learning_rate": 2.2827524200976887e-05, "loss": 0.108, "step": 32233 }, { "epoch": 0.574929547319231, "grad_norm": 0.23438678681850433, "learning_rate": 2.2825973585676504e-05, "loss": 0.1062, "step": 32234 }, { "epoch": 0.5749473834409446, "grad_norm": 0.2420775592327118, "learning_rate": 2.282442297880339e-05, "loss": 0.1212, "step": 32235 }, { "epoch": 0.5749652195626583, "grad_norm": 0.26243868470191956, "learning_rate": 2.2822872380363572e-05, "loss": 0.116, "step": 32236 }, { "epoch": 0.574983055684372, "grad_norm": 0.2642301023006439, "learning_rate": 2.282132179036304e-05, "loss": 0.1473, "step": 32237 }, { "epoch": 0.5750008918060857, "grad_norm": 0.19997823238372803, "learning_rate": 2.281977120880783e-05, "loss": 0.1308, "step": 32238 }, { "epoch": 0.5750187279277994, "grad_norm": 0.2773137092590332, "learning_rate": 2.281822063570394e-05, "loss": 0.13, "step": 32239 }, { "epoch": 0.5750365640495131, "grad_norm": 0.418454647064209, "learning_rate": 2.2816670071057373e-05, "loss": 0.1025, "step": 32240 }, { "epoch": 0.5750544001712268, "grad_norm": 0.32250580191612244, "learning_rate": 2.2815119514874144e-05, "loss": 0.1277, "step": 32241 }, { "epoch": 0.5750722362929405, "grad_norm": 0.3185238838195801, "learning_rate": 2.281356896716027e-05, "loss": 0.1632, "step": 32242 }, { "epoch": 0.5750900724146542, "grad_norm": 0.24539116024971008, "learning_rate": 2.2812018427921767e-05, "loss": 0.2008, "step": 32243 }, { "epoch": 0.5751079085363678, "grad_norm": 0.24439223110675812, "learning_rate": 2.281046789716463e-05, "loss": 0.0915, "step": 32244 }, { "epoch": 0.5751257446580815, "grad_norm": 0.26436668634414673, "learning_rate": 2.2808917374894866e-05, "loss": 0.1539, "step": 32245 }, { "epoch": 0.5751435807797952, "grad_norm": 0.27153873443603516, "learning_rate": 2.280736686111851e-05, "loss": 0.1698, "step": 32246 }, { "epoch": 0.5751614169015089, "grad_norm": 0.26940545439720154, "learning_rate": 2.280581635584155e-05, "loss": 0.1216, "step": 32247 }, { "epoch": 0.5751792530232226, "grad_norm": 0.22699624300003052, "learning_rate": 2.2804265859070006e-05, "loss": 0.1065, "step": 32248 }, { "epoch": 0.5751970891449363, "grad_norm": 0.22343863546848297, "learning_rate": 2.2802715370809888e-05, "loss": 0.0993, "step": 32249 }, { "epoch": 0.57521492526665, "grad_norm": 0.27126502990722656, "learning_rate": 2.280116489106719e-05, "loss": 0.1248, "step": 32250 }, { "epoch": 0.5752327613883638, "grad_norm": 0.231038436293602, "learning_rate": 2.2799614419847953e-05, "loss": 0.1411, "step": 32251 }, { "epoch": 0.5752505975100775, "grad_norm": 0.2828325033187866, "learning_rate": 2.279806395715817e-05, "loss": 0.1709, "step": 32252 }, { "epoch": 0.5752684336317911, "grad_norm": 0.27043285965919495, "learning_rate": 2.2796513503003848e-05, "loss": 0.0826, "step": 32253 }, { "epoch": 0.5752862697535048, "grad_norm": 0.23741087317466736, "learning_rate": 2.279496305739099e-05, "loss": 0.1213, "step": 32254 }, { "epoch": 0.5753041058752185, "grad_norm": 0.2832827866077423, "learning_rate": 2.279341262032563e-05, "loss": 0.1685, "step": 32255 }, { "epoch": 0.5753219419969322, "grad_norm": 0.29893118143081665, "learning_rate": 2.279186219181376e-05, "loss": 0.1193, "step": 32256 }, { "epoch": 0.5753397781186459, "grad_norm": 0.28755274415016174, "learning_rate": 2.2790311771861397e-05, "loss": 0.1035, "step": 32257 }, { "epoch": 0.5753576142403596, "grad_norm": 0.22959105670452118, "learning_rate": 2.278876136047454e-05, "loss": 0.1221, "step": 32258 }, { "epoch": 0.5753754503620733, "grad_norm": 0.191476970911026, "learning_rate": 2.278721095765922e-05, "loss": 0.1004, "step": 32259 }, { "epoch": 0.575393286483787, "grad_norm": 0.49067234992980957, "learning_rate": 2.2785660563421432e-05, "loss": 0.1197, "step": 32260 }, { "epoch": 0.5754111226055006, "grad_norm": 0.30326080322265625, "learning_rate": 2.2784110177767186e-05, "loss": 0.1522, "step": 32261 }, { "epoch": 0.5754289587272143, "grad_norm": 0.20653213560581207, "learning_rate": 2.2782559800702494e-05, "loss": 0.0968, "step": 32262 }, { "epoch": 0.575446794848928, "grad_norm": 0.2553901672363281, "learning_rate": 2.278100943223336e-05, "loss": 0.1475, "step": 32263 }, { "epoch": 0.5754646309706417, "grad_norm": 0.21338555216789246, "learning_rate": 2.2779459072365808e-05, "loss": 0.1258, "step": 32264 }, { "epoch": 0.5754824670923554, "grad_norm": 0.28063151240348816, "learning_rate": 2.2777908721105843e-05, "loss": 0.1185, "step": 32265 }, { "epoch": 0.5755003032140691, "grad_norm": 0.3025532066822052, "learning_rate": 2.277635837845947e-05, "loss": 0.0916, "step": 32266 }, { "epoch": 0.5755181393357828, "grad_norm": 0.30818408727645874, "learning_rate": 2.2774808044432688e-05, "loss": 0.1837, "step": 32267 }, { "epoch": 0.5755359754574966, "grad_norm": 0.21512390673160553, "learning_rate": 2.277325771903153e-05, "loss": 0.1575, "step": 32268 }, { "epoch": 0.5755538115792103, "grad_norm": 0.31319689750671387, "learning_rate": 2.2771707402261988e-05, "loss": 0.1036, "step": 32269 }, { "epoch": 0.575571647700924, "grad_norm": 0.27915140986442566, "learning_rate": 2.2770157094130084e-05, "loss": 0.1317, "step": 32270 }, { "epoch": 0.5755894838226376, "grad_norm": 0.19175270199775696, "learning_rate": 2.276860679464181e-05, "loss": 0.0915, "step": 32271 }, { "epoch": 0.5756073199443513, "grad_norm": 0.2898296117782593, "learning_rate": 2.2767056503803197e-05, "loss": 0.1748, "step": 32272 }, { "epoch": 0.575625156066065, "grad_norm": 0.21256984770298004, "learning_rate": 2.2765506221620245e-05, "loss": 0.0868, "step": 32273 }, { "epoch": 0.5756429921877787, "grad_norm": 0.2054421603679657, "learning_rate": 2.2763955948098965e-05, "loss": 0.1103, "step": 32274 }, { "epoch": 0.5756608283094924, "grad_norm": 0.3292624056339264, "learning_rate": 2.2762405683245355e-05, "loss": 0.1189, "step": 32275 }, { "epoch": 0.5756786644312061, "grad_norm": 0.25626716017723083, "learning_rate": 2.2760855427065434e-05, "loss": 0.1148, "step": 32276 }, { "epoch": 0.5756965005529198, "grad_norm": 0.26967746019363403, "learning_rate": 2.2759305179565213e-05, "loss": 0.1372, "step": 32277 }, { "epoch": 0.5757143366746335, "grad_norm": 0.2222234308719635, "learning_rate": 2.2757754940750704e-05, "loss": 0.1239, "step": 32278 }, { "epoch": 0.5757321727963471, "grad_norm": 0.274392694234848, "learning_rate": 2.275620471062791e-05, "loss": 0.141, "step": 32279 }, { "epoch": 0.5757500089180608, "grad_norm": 0.289774090051651, "learning_rate": 2.275465448920283e-05, "loss": 0.1249, "step": 32280 }, { "epoch": 0.5757678450397745, "grad_norm": 0.24564850330352783, "learning_rate": 2.2753104276481496e-05, "loss": 0.1401, "step": 32281 }, { "epoch": 0.5757856811614882, "grad_norm": 0.29305174946784973, "learning_rate": 2.2751554072469904e-05, "loss": 0.1134, "step": 32282 }, { "epoch": 0.5758035172832019, "grad_norm": 0.2242419719696045, "learning_rate": 2.2750003877174065e-05, "loss": 0.1336, "step": 32283 }, { "epoch": 0.5758213534049156, "grad_norm": 0.2604919672012329, "learning_rate": 2.2748453690599977e-05, "loss": 0.0976, "step": 32284 }, { "epoch": 0.5758391895266294, "grad_norm": 0.2729603350162506, "learning_rate": 2.2746903512753677e-05, "loss": 0.1064, "step": 32285 }, { "epoch": 0.5758570256483431, "grad_norm": 0.2426808625459671, "learning_rate": 2.274535334364115e-05, "loss": 0.1417, "step": 32286 }, { "epoch": 0.5758748617700568, "grad_norm": 0.2500886023044586, "learning_rate": 2.2743803183268418e-05, "loss": 0.1016, "step": 32287 }, { "epoch": 0.5758926978917704, "grad_norm": 0.32663434743881226, "learning_rate": 2.2742253031641484e-05, "loss": 0.2067, "step": 32288 }, { "epoch": 0.5759105340134841, "grad_norm": 0.23015238344669342, "learning_rate": 2.274070288876634e-05, "loss": 0.1339, "step": 32289 }, { "epoch": 0.5759283701351978, "grad_norm": 0.3159938454627991, "learning_rate": 2.2739152754649025e-05, "loss": 0.1412, "step": 32290 }, { "epoch": 0.5759462062569115, "grad_norm": 0.24782168865203857, "learning_rate": 2.2737602629295535e-05, "loss": 0.1035, "step": 32291 }, { "epoch": 0.5759640423786252, "grad_norm": 0.24251790344715118, "learning_rate": 2.273605251271188e-05, "loss": 0.1304, "step": 32292 }, { "epoch": 0.5759818785003389, "grad_norm": 0.22524601221084595, "learning_rate": 2.273450240490406e-05, "loss": 0.0901, "step": 32293 }, { "epoch": 0.5759997146220526, "grad_norm": 0.29124411940574646, "learning_rate": 2.2732952305878098e-05, "loss": 0.1211, "step": 32294 }, { "epoch": 0.5760175507437663, "grad_norm": 0.24226750433444977, "learning_rate": 2.2731402215639995e-05, "loss": 0.1424, "step": 32295 }, { "epoch": 0.57603538686548, "grad_norm": 0.2564210593700409, "learning_rate": 2.2729852134195758e-05, "loss": 0.1443, "step": 32296 }, { "epoch": 0.5760532229871936, "grad_norm": 0.22292165458202362, "learning_rate": 2.27283020615514e-05, "loss": 0.094, "step": 32297 }, { "epoch": 0.5760710591089073, "grad_norm": 0.26974618434906006, "learning_rate": 2.2726751997712922e-05, "loss": 0.1365, "step": 32298 }, { "epoch": 0.576088895230621, "grad_norm": 0.2666942775249481, "learning_rate": 2.272520194268635e-05, "loss": 0.1485, "step": 32299 }, { "epoch": 0.5761067313523347, "grad_norm": 0.3342462182044983, "learning_rate": 2.2723651896477676e-05, "loss": 0.1473, "step": 32300 }, { "epoch": 0.5761245674740484, "grad_norm": 0.27080947160720825, "learning_rate": 2.2722101859092914e-05, "loss": 0.1132, "step": 32301 }, { "epoch": 0.5761424035957622, "grad_norm": 0.4054609537124634, "learning_rate": 2.2720551830538065e-05, "loss": 0.1572, "step": 32302 }, { "epoch": 0.5761602397174759, "grad_norm": 0.21025905013084412, "learning_rate": 2.271900181081915e-05, "loss": 0.1356, "step": 32303 }, { "epoch": 0.5761780758391896, "grad_norm": 0.2511407732963562, "learning_rate": 2.271745179994217e-05, "loss": 0.1376, "step": 32304 }, { "epoch": 0.5761959119609033, "grad_norm": 0.3078734576702118, "learning_rate": 2.271590179791314e-05, "loss": 0.1433, "step": 32305 }, { "epoch": 0.5762137480826169, "grad_norm": 0.2085607796907425, "learning_rate": 2.2714351804738054e-05, "loss": 0.1632, "step": 32306 }, { "epoch": 0.5762315842043306, "grad_norm": 0.3166585862636566, "learning_rate": 2.271280182042294e-05, "loss": 0.1412, "step": 32307 }, { "epoch": 0.5762494203260443, "grad_norm": 0.23929819464683533, "learning_rate": 2.2711251844973793e-05, "loss": 0.1149, "step": 32308 }, { "epoch": 0.576267256447758, "grad_norm": 0.26305484771728516, "learning_rate": 2.270970187839663e-05, "loss": 0.1546, "step": 32309 }, { "epoch": 0.5762850925694717, "grad_norm": 0.2060820460319519, "learning_rate": 2.2708151920697434e-05, "loss": 0.1282, "step": 32310 }, { "epoch": 0.5763029286911854, "grad_norm": 0.18784496188163757, "learning_rate": 2.2706601971882253e-05, "loss": 0.1005, "step": 32311 }, { "epoch": 0.5763207648128991, "grad_norm": 0.3025131821632385, "learning_rate": 2.2705052031957065e-05, "loss": 0.1328, "step": 32312 }, { "epoch": 0.5763386009346128, "grad_norm": 0.3148513734340668, "learning_rate": 2.270350210092789e-05, "loss": 0.1657, "step": 32313 }, { "epoch": 0.5763564370563264, "grad_norm": 0.2889856696128845, "learning_rate": 2.270195217880074e-05, "loss": 0.1316, "step": 32314 }, { "epoch": 0.5763742731780401, "grad_norm": 0.19084741175174713, "learning_rate": 2.2700402265581606e-05, "loss": 0.0864, "step": 32315 }, { "epoch": 0.5763921092997538, "grad_norm": 0.3317219913005829, "learning_rate": 2.2698852361276512e-05, "loss": 0.2156, "step": 32316 }, { "epoch": 0.5764099454214675, "grad_norm": 0.21589666604995728, "learning_rate": 2.269730246589146e-05, "loss": 0.0568, "step": 32317 }, { "epoch": 0.5764277815431813, "grad_norm": 0.2767912447452545, "learning_rate": 2.2695752579432467e-05, "loss": 0.1245, "step": 32318 }, { "epoch": 0.576445617664895, "grad_norm": 0.2305850386619568, "learning_rate": 2.2694202701905516e-05, "loss": 0.0695, "step": 32319 }, { "epoch": 0.5764634537866087, "grad_norm": 0.5760293006896973, "learning_rate": 2.2692652833316647e-05, "loss": 0.1479, "step": 32320 }, { "epoch": 0.5764812899083224, "grad_norm": 0.22271789610385895, "learning_rate": 2.2691102973671853e-05, "loss": 0.135, "step": 32321 }, { "epoch": 0.5764991260300361, "grad_norm": 0.34600135684013367, "learning_rate": 2.2689553122977138e-05, "loss": 0.1476, "step": 32322 }, { "epoch": 0.5765169621517497, "grad_norm": 0.2297000139951706, "learning_rate": 2.2688003281238505e-05, "loss": 0.1, "step": 32323 }, { "epoch": 0.5765347982734634, "grad_norm": 0.2916584610939026, "learning_rate": 2.268645344846198e-05, "loss": 0.2058, "step": 32324 }, { "epoch": 0.5765526343951771, "grad_norm": 0.23188558220863342, "learning_rate": 2.2684903624653553e-05, "loss": 0.1224, "step": 32325 }, { "epoch": 0.5765704705168908, "grad_norm": 0.2858402729034424, "learning_rate": 2.2683353809819244e-05, "loss": 0.1454, "step": 32326 }, { "epoch": 0.5765883066386045, "grad_norm": 0.27780696749687195, "learning_rate": 2.2681804003965057e-05, "loss": 0.1215, "step": 32327 }, { "epoch": 0.5766061427603182, "grad_norm": 0.245209738612175, "learning_rate": 2.2680254207096992e-05, "loss": 0.1607, "step": 32328 }, { "epoch": 0.5766239788820319, "grad_norm": 0.5074828267097473, "learning_rate": 2.2678704419221066e-05, "loss": 0.2376, "step": 32329 }, { "epoch": 0.5766418150037456, "grad_norm": 0.3205380439758301, "learning_rate": 2.267715464034329e-05, "loss": 0.1327, "step": 32330 }, { "epoch": 0.5766596511254592, "grad_norm": 0.23271895945072174, "learning_rate": 2.267560487046966e-05, "loss": 0.1457, "step": 32331 }, { "epoch": 0.5766774872471729, "grad_norm": 0.23441344499588013, "learning_rate": 2.2674055109606183e-05, "loss": 0.102, "step": 32332 }, { "epoch": 0.5766953233688866, "grad_norm": 0.23528803884983063, "learning_rate": 2.267250535775887e-05, "loss": 0.064, "step": 32333 }, { "epoch": 0.5767131594906003, "grad_norm": 0.34019744396209717, "learning_rate": 2.2670955614933743e-05, "loss": 0.1291, "step": 32334 }, { "epoch": 0.5767309956123141, "grad_norm": 0.24590329825878143, "learning_rate": 2.2669405881136795e-05, "loss": 0.1486, "step": 32335 }, { "epoch": 0.5767488317340278, "grad_norm": 0.3161316514015198, "learning_rate": 2.266785615637402e-05, "loss": 0.1303, "step": 32336 }, { "epoch": 0.5767666678557415, "grad_norm": 0.2143571823835373, "learning_rate": 2.2666306440651452e-05, "loss": 0.1343, "step": 32337 }, { "epoch": 0.5767845039774552, "grad_norm": 0.28213709592819214, "learning_rate": 2.2664756733975086e-05, "loss": 0.1254, "step": 32338 }, { "epoch": 0.5768023400991689, "grad_norm": 0.18796256184577942, "learning_rate": 2.2663207036350926e-05, "loss": 0.0971, "step": 32339 }, { "epoch": 0.5768201762208826, "grad_norm": 0.36780208349227905, "learning_rate": 2.266165734778499e-05, "loss": 0.1238, "step": 32340 }, { "epoch": 0.5768380123425962, "grad_norm": 0.30121201276779175, "learning_rate": 2.2660107668283262e-05, "loss": 0.1269, "step": 32341 }, { "epoch": 0.5768558484643099, "grad_norm": 0.46117663383483887, "learning_rate": 2.2658557997851777e-05, "loss": 0.1618, "step": 32342 }, { "epoch": 0.5768736845860236, "grad_norm": 0.27574294805526733, "learning_rate": 2.2657008336496532e-05, "loss": 0.1404, "step": 32343 }, { "epoch": 0.5768915207077373, "grad_norm": 0.24472962319850922, "learning_rate": 2.2655458684223523e-05, "loss": 0.1387, "step": 32344 }, { "epoch": 0.576909356829451, "grad_norm": 0.2560674250125885, "learning_rate": 2.265390904103877e-05, "loss": 0.0876, "step": 32345 }, { "epoch": 0.5769271929511647, "grad_norm": 0.23681677877902985, "learning_rate": 2.2652359406948273e-05, "loss": 0.1161, "step": 32346 }, { "epoch": 0.5769450290728784, "grad_norm": 0.27823352813720703, "learning_rate": 2.265080978195805e-05, "loss": 0.164, "step": 32347 }, { "epoch": 0.5769628651945921, "grad_norm": 0.22411201894283295, "learning_rate": 2.2649260166074095e-05, "loss": 0.1097, "step": 32348 }, { "epoch": 0.5769807013163057, "grad_norm": 0.2635782063007355, "learning_rate": 2.2647710559302412e-05, "loss": 0.1394, "step": 32349 }, { "epoch": 0.5769985374380194, "grad_norm": 0.24432113766670227, "learning_rate": 2.2646160961649028e-05, "loss": 0.1375, "step": 32350 }, { "epoch": 0.5770163735597331, "grad_norm": 0.18400879204273224, "learning_rate": 2.2644611373119934e-05, "loss": 0.1159, "step": 32351 }, { "epoch": 0.5770342096814469, "grad_norm": 0.22061029076576233, "learning_rate": 2.2643061793721134e-05, "loss": 0.1237, "step": 32352 }, { "epoch": 0.5770520458031606, "grad_norm": 0.25662875175476074, "learning_rate": 2.2641512223458648e-05, "loss": 0.1133, "step": 32353 }, { "epoch": 0.5770698819248743, "grad_norm": 0.20533181726932526, "learning_rate": 2.2639962662338466e-05, "loss": 0.0927, "step": 32354 }, { "epoch": 0.577087718046588, "grad_norm": 0.31295469403266907, "learning_rate": 2.263841311036661e-05, "loss": 0.1318, "step": 32355 }, { "epoch": 0.5771055541683017, "grad_norm": 0.2255227118730545, "learning_rate": 2.2636863567549082e-05, "loss": 0.1432, "step": 32356 }, { "epoch": 0.5771233902900154, "grad_norm": 0.17697836458683014, "learning_rate": 2.2635314033891887e-05, "loss": 0.1064, "step": 32357 }, { "epoch": 0.577141226411729, "grad_norm": 0.2235167920589447, "learning_rate": 2.2633764509401022e-05, "loss": 0.1375, "step": 32358 }, { "epoch": 0.5771590625334427, "grad_norm": 0.2680342495441437, "learning_rate": 2.2632214994082514e-05, "loss": 0.1587, "step": 32359 }, { "epoch": 0.5771768986551564, "grad_norm": 0.34540942311286926, "learning_rate": 2.2630665487942353e-05, "loss": 0.0837, "step": 32360 }, { "epoch": 0.5771947347768701, "grad_norm": 0.22852060198783875, "learning_rate": 2.2629115990986553e-05, "loss": 0.0948, "step": 32361 }, { "epoch": 0.5772125708985838, "grad_norm": 0.24823763966560364, "learning_rate": 2.2627566503221112e-05, "loss": 0.1024, "step": 32362 }, { "epoch": 0.5772304070202975, "grad_norm": 0.27845174074172974, "learning_rate": 2.2626017024652053e-05, "loss": 0.139, "step": 32363 }, { "epoch": 0.5772482431420112, "grad_norm": 0.2625521123409271, "learning_rate": 2.262446755528537e-05, "loss": 0.0898, "step": 32364 }, { "epoch": 0.5772660792637249, "grad_norm": 0.23189617693424225, "learning_rate": 2.262291809512707e-05, "loss": 0.1198, "step": 32365 }, { "epoch": 0.5772839153854386, "grad_norm": 0.1825329214334488, "learning_rate": 2.2621368644183158e-05, "loss": 0.1378, "step": 32366 }, { "epoch": 0.5773017515071522, "grad_norm": 0.2309417873620987, "learning_rate": 2.2619819202459636e-05, "loss": 0.1346, "step": 32367 }, { "epoch": 0.5773195876288659, "grad_norm": 0.22425667941570282, "learning_rate": 2.261826976996253e-05, "loss": 0.1211, "step": 32368 }, { "epoch": 0.5773374237505797, "grad_norm": 0.3045188784599304, "learning_rate": 2.261672034669783e-05, "loss": 0.1261, "step": 32369 }, { "epoch": 0.5773552598722934, "grad_norm": 0.34467121958732605, "learning_rate": 2.2615170932671544e-05, "loss": 0.1003, "step": 32370 }, { "epoch": 0.5773730959940071, "grad_norm": 0.1908007562160492, "learning_rate": 2.2613621527889673e-05, "loss": 0.1012, "step": 32371 }, { "epoch": 0.5773909321157208, "grad_norm": 0.2497616857290268, "learning_rate": 2.2612072132358236e-05, "loss": 0.1163, "step": 32372 }, { "epoch": 0.5774087682374345, "grad_norm": 0.3948986530303955, "learning_rate": 2.261052274608323e-05, "loss": 0.1301, "step": 32373 }, { "epoch": 0.5774266043591482, "grad_norm": 0.28548774123191833, "learning_rate": 2.2608973369070666e-05, "loss": 0.1267, "step": 32374 }, { "epoch": 0.5774444404808619, "grad_norm": 0.3578229546546936, "learning_rate": 2.2607424001326537e-05, "loss": 0.1558, "step": 32375 }, { "epoch": 0.5774622766025755, "grad_norm": 0.2500741183757782, "learning_rate": 2.260587464285687e-05, "loss": 0.1213, "step": 32376 }, { "epoch": 0.5774801127242892, "grad_norm": 0.3045952618122101, "learning_rate": 2.260432529366766e-05, "loss": 0.159, "step": 32377 }, { "epoch": 0.5774979488460029, "grad_norm": 0.2800423502922058, "learning_rate": 2.260277595376491e-05, "loss": 0.1178, "step": 32378 }, { "epoch": 0.5775157849677166, "grad_norm": 0.6150677800178528, "learning_rate": 2.2601226623154625e-05, "loss": 0.1378, "step": 32379 }, { "epoch": 0.5775336210894303, "grad_norm": 0.25808438658714294, "learning_rate": 2.2599677301842813e-05, "loss": 0.1337, "step": 32380 }, { "epoch": 0.577551457211144, "grad_norm": 0.28601396083831787, "learning_rate": 2.2598127989835482e-05, "loss": 0.1315, "step": 32381 }, { "epoch": 0.5775692933328577, "grad_norm": 0.22821418941020966, "learning_rate": 2.2596578687138642e-05, "loss": 0.0986, "step": 32382 }, { "epoch": 0.5775871294545714, "grad_norm": 0.23115041851997375, "learning_rate": 2.2595029393758294e-05, "loss": 0.0956, "step": 32383 }, { "epoch": 0.577604965576285, "grad_norm": 0.27500250935554504, "learning_rate": 2.259348010970043e-05, "loss": 0.1488, "step": 32384 }, { "epoch": 0.5776228016979987, "grad_norm": 0.3548430800437927, "learning_rate": 2.259193083497108e-05, "loss": 0.0995, "step": 32385 }, { "epoch": 0.5776406378197125, "grad_norm": 0.350884348154068, "learning_rate": 2.2590381569576237e-05, "loss": 0.1162, "step": 32386 }, { "epoch": 0.5776584739414262, "grad_norm": 0.2650716304779053, "learning_rate": 2.2588832313521904e-05, "loss": 0.1265, "step": 32387 }, { "epoch": 0.5776763100631399, "grad_norm": 0.22411689162254333, "learning_rate": 2.2587283066814088e-05, "loss": 0.0905, "step": 32388 }, { "epoch": 0.5776941461848536, "grad_norm": 0.32652562856674194, "learning_rate": 2.2585733829458796e-05, "loss": 0.1306, "step": 32389 }, { "epoch": 0.5777119823065673, "grad_norm": 0.29913076758384705, "learning_rate": 2.2584184601462042e-05, "loss": 0.1058, "step": 32390 }, { "epoch": 0.577729818428281, "grad_norm": 0.42833200097084045, "learning_rate": 2.258263538282982e-05, "loss": 0.1677, "step": 32391 }, { "epoch": 0.5777476545499947, "grad_norm": 0.31012651324272156, "learning_rate": 2.2581086173568138e-05, "loss": 0.148, "step": 32392 }, { "epoch": 0.5777654906717083, "grad_norm": 0.20969222486019135, "learning_rate": 2.2579536973682992e-05, "loss": 0.1604, "step": 32393 }, { "epoch": 0.577783326793422, "grad_norm": 0.2786845266819, "learning_rate": 2.2577987783180403e-05, "loss": 0.1268, "step": 32394 }, { "epoch": 0.5778011629151357, "grad_norm": 0.3049396574497223, "learning_rate": 2.257643860206637e-05, "loss": 0.1898, "step": 32395 }, { "epoch": 0.5778189990368494, "grad_norm": 0.23088280856609344, "learning_rate": 2.2574889430346903e-05, "loss": 0.1102, "step": 32396 }, { "epoch": 0.5778368351585631, "grad_norm": 0.24143581092357635, "learning_rate": 2.257334026802799e-05, "loss": 0.1312, "step": 32397 }, { "epoch": 0.5778546712802768, "grad_norm": 0.3001772165298462, "learning_rate": 2.257179111511566e-05, "loss": 0.1872, "step": 32398 }, { "epoch": 0.5778725074019905, "grad_norm": 0.23794269561767578, "learning_rate": 2.2570241971615903e-05, "loss": 0.1221, "step": 32399 }, { "epoch": 0.5778903435237042, "grad_norm": 0.20139265060424805, "learning_rate": 2.2568692837534724e-05, "loss": 0.1183, "step": 32400 }, { "epoch": 0.5779081796454179, "grad_norm": 0.28829342126846313, "learning_rate": 2.2567143712878135e-05, "loss": 0.145, "step": 32401 }, { "epoch": 0.5779260157671315, "grad_norm": 0.26882028579711914, "learning_rate": 2.2565594597652133e-05, "loss": 0.1549, "step": 32402 }, { "epoch": 0.5779438518888453, "grad_norm": 0.3388108015060425, "learning_rate": 2.256404549186273e-05, "loss": 0.1362, "step": 32403 }, { "epoch": 0.577961688010559, "grad_norm": 0.33914968371391296, "learning_rate": 2.2562496395515935e-05, "loss": 0.1498, "step": 32404 }, { "epoch": 0.5779795241322727, "grad_norm": 0.3868884742259979, "learning_rate": 2.256094730861774e-05, "loss": 0.1174, "step": 32405 }, { "epoch": 0.5779973602539864, "grad_norm": 0.2845574617385864, "learning_rate": 2.2559398231174147e-05, "loss": 0.1387, "step": 32406 }, { "epoch": 0.5780151963757001, "grad_norm": 0.25631916522979736, "learning_rate": 2.2557849163191184e-05, "loss": 0.106, "step": 32407 }, { "epoch": 0.5780330324974138, "grad_norm": 0.3559955358505249, "learning_rate": 2.255630010467483e-05, "loss": 0.1167, "step": 32408 }, { "epoch": 0.5780508686191275, "grad_norm": 0.2889615297317505, "learning_rate": 2.2554751055631107e-05, "loss": 0.1705, "step": 32409 }, { "epoch": 0.5780687047408412, "grad_norm": 0.32917043566703796, "learning_rate": 2.2553202016066004e-05, "loss": 0.1252, "step": 32410 }, { "epoch": 0.5780865408625548, "grad_norm": 0.26466673612594604, "learning_rate": 2.2551652985985546e-05, "loss": 0.1176, "step": 32411 }, { "epoch": 0.5781043769842685, "grad_norm": 0.26158544421195984, "learning_rate": 2.2550103965395726e-05, "loss": 0.1075, "step": 32412 }, { "epoch": 0.5781222131059822, "grad_norm": 0.22283294796943665, "learning_rate": 2.2548554954302552e-05, "loss": 0.0809, "step": 32413 }, { "epoch": 0.5781400492276959, "grad_norm": 0.30263715982437134, "learning_rate": 2.2547005952712013e-05, "loss": 0.1499, "step": 32414 }, { "epoch": 0.5781578853494096, "grad_norm": 0.2318008989095688, "learning_rate": 2.2545456960630134e-05, "loss": 0.1196, "step": 32415 }, { "epoch": 0.5781757214711233, "grad_norm": 0.28474387526512146, "learning_rate": 2.2543907978062912e-05, "loss": 0.1372, "step": 32416 }, { "epoch": 0.578193557592837, "grad_norm": 0.22017902135849, "learning_rate": 2.2542359005016353e-05, "loss": 0.1422, "step": 32417 }, { "epoch": 0.5782113937145507, "grad_norm": 0.27815932035446167, "learning_rate": 2.2540810041496456e-05, "loss": 0.071, "step": 32418 }, { "epoch": 0.5782292298362645, "grad_norm": 0.26125043630599976, "learning_rate": 2.2539261087509223e-05, "loss": 0.1602, "step": 32419 }, { "epoch": 0.5782470659579781, "grad_norm": 0.23184041678905487, "learning_rate": 2.2537712143060675e-05, "loss": 0.1161, "step": 32420 }, { "epoch": 0.5782649020796918, "grad_norm": 0.3446803689002991, "learning_rate": 2.2536163208156797e-05, "loss": 0.1358, "step": 32421 }, { "epoch": 0.5782827382014055, "grad_norm": 0.24228334426879883, "learning_rate": 2.2534614282803607e-05, "loss": 0.0953, "step": 32422 }, { "epoch": 0.5783005743231192, "grad_norm": 0.322874516248703, "learning_rate": 2.2533065367007094e-05, "loss": 0.1005, "step": 32423 }, { "epoch": 0.5783184104448329, "grad_norm": 0.3061692416667938, "learning_rate": 2.2531516460773282e-05, "loss": 0.1236, "step": 32424 }, { "epoch": 0.5783362465665466, "grad_norm": 0.24308371543884277, "learning_rate": 2.2529967564108164e-05, "loss": 0.1428, "step": 32425 }, { "epoch": 0.5783540826882603, "grad_norm": 0.2664937376976013, "learning_rate": 2.2528418677017746e-05, "loss": 0.1133, "step": 32426 }, { "epoch": 0.578371918809974, "grad_norm": 0.22126761078834534, "learning_rate": 2.252686979950802e-05, "loss": 0.1204, "step": 32427 }, { "epoch": 0.5783897549316876, "grad_norm": 0.25034287571907043, "learning_rate": 2.252532093158501e-05, "loss": 0.1152, "step": 32428 }, { "epoch": 0.5784075910534013, "grad_norm": 0.2439475655555725, "learning_rate": 2.252377207325471e-05, "loss": 0.1298, "step": 32429 }, { "epoch": 0.578425427175115, "grad_norm": 0.3364785611629486, "learning_rate": 2.2522223224523127e-05, "loss": 0.1418, "step": 32430 }, { "epoch": 0.5784432632968287, "grad_norm": 0.2142474353313446, "learning_rate": 2.252067438539626e-05, "loss": 0.1033, "step": 32431 }, { "epoch": 0.5784610994185424, "grad_norm": 0.30729854106903076, "learning_rate": 2.2519125555880108e-05, "loss": 0.0981, "step": 32432 }, { "epoch": 0.5784789355402561, "grad_norm": 0.285013347864151, "learning_rate": 2.251757673598069e-05, "loss": 0.1697, "step": 32433 }, { "epoch": 0.5784967716619698, "grad_norm": 0.23829345405101776, "learning_rate": 2.2516027925704005e-05, "loss": 0.1067, "step": 32434 }, { "epoch": 0.5785146077836835, "grad_norm": 0.23749589920043945, "learning_rate": 2.2514479125056048e-05, "loss": 0.0967, "step": 32435 }, { "epoch": 0.5785324439053973, "grad_norm": 0.23260420560836792, "learning_rate": 2.2512930334042826e-05, "loss": 0.1127, "step": 32436 }, { "epoch": 0.578550280027111, "grad_norm": 0.3212168216705322, "learning_rate": 2.2511381552670347e-05, "loss": 0.1618, "step": 32437 }, { "epoch": 0.5785681161488246, "grad_norm": 0.2994755208492279, "learning_rate": 2.2509832780944618e-05, "loss": 0.1751, "step": 32438 }, { "epoch": 0.5785859522705383, "grad_norm": 0.24331361055374146, "learning_rate": 2.2508284018871635e-05, "loss": 0.0889, "step": 32439 }, { "epoch": 0.578603788392252, "grad_norm": 0.2054992914199829, "learning_rate": 2.2506735266457397e-05, "loss": 0.1391, "step": 32440 }, { "epoch": 0.5786216245139657, "grad_norm": 0.29034626483917236, "learning_rate": 2.2505186523707923e-05, "loss": 0.121, "step": 32441 }, { "epoch": 0.5786394606356794, "grad_norm": 0.18746566772460938, "learning_rate": 2.250363779062921e-05, "loss": 0.1208, "step": 32442 }, { "epoch": 0.5786572967573931, "grad_norm": 0.20350977778434753, "learning_rate": 2.250208906722725e-05, "loss": 0.1132, "step": 32443 }, { "epoch": 0.5786751328791068, "grad_norm": 0.2571881115436554, "learning_rate": 2.250054035350806e-05, "loss": 0.1247, "step": 32444 }, { "epoch": 0.5786929690008205, "grad_norm": 0.33046942949295044, "learning_rate": 2.249899164947763e-05, "loss": 0.1379, "step": 32445 }, { "epoch": 0.5787108051225341, "grad_norm": 0.3338419795036316, "learning_rate": 2.2497442955141986e-05, "loss": 0.1815, "step": 32446 }, { "epoch": 0.5787286412442478, "grad_norm": 0.33364900946617126, "learning_rate": 2.2495894270507113e-05, "loss": 0.1732, "step": 32447 }, { "epoch": 0.5787464773659615, "grad_norm": 0.3296375870704651, "learning_rate": 2.2494345595579015e-05, "loss": 0.1019, "step": 32448 }, { "epoch": 0.5787643134876752, "grad_norm": 0.29567357897758484, "learning_rate": 2.2492796930363698e-05, "loss": 0.1187, "step": 32449 }, { "epoch": 0.5787821496093889, "grad_norm": 0.2151576429605484, "learning_rate": 2.249124827486717e-05, "loss": 0.1083, "step": 32450 }, { "epoch": 0.5787999857311026, "grad_norm": 0.33929628133773804, "learning_rate": 2.2489699629095432e-05, "loss": 0.1297, "step": 32451 }, { "epoch": 0.5788178218528163, "grad_norm": 0.2873328924179077, "learning_rate": 2.2488150993054484e-05, "loss": 0.1927, "step": 32452 }, { "epoch": 0.5788356579745301, "grad_norm": 0.30493971705436707, "learning_rate": 2.2486602366750322e-05, "loss": 0.1498, "step": 32453 }, { "epoch": 0.5788534940962438, "grad_norm": 0.23510156571865082, "learning_rate": 2.248505375018897e-05, "loss": 0.1286, "step": 32454 }, { "epoch": 0.5788713302179574, "grad_norm": 0.21999810636043549, "learning_rate": 2.2483505143376416e-05, "loss": 0.1391, "step": 32455 }, { "epoch": 0.5788891663396711, "grad_norm": 0.24669304490089417, "learning_rate": 2.248195654631866e-05, "loss": 0.1003, "step": 32456 }, { "epoch": 0.5789070024613848, "grad_norm": 0.29634609818458557, "learning_rate": 2.248040795902171e-05, "loss": 0.1001, "step": 32457 }, { "epoch": 0.5789248385830985, "grad_norm": 0.33521899580955505, "learning_rate": 2.2478859381491568e-05, "loss": 0.1302, "step": 32458 }, { "epoch": 0.5789426747048122, "grad_norm": 0.26556825637817383, "learning_rate": 2.2477310813734242e-05, "loss": 0.1692, "step": 32459 }, { "epoch": 0.5789605108265259, "grad_norm": 0.2117352932691574, "learning_rate": 2.2475762255755732e-05, "loss": 0.1016, "step": 32460 }, { "epoch": 0.5789783469482396, "grad_norm": 0.3086357116699219, "learning_rate": 2.247421370756204e-05, "loss": 0.1153, "step": 32461 }, { "epoch": 0.5789961830699533, "grad_norm": 0.2631135582923889, "learning_rate": 2.247266516915916e-05, "loss": 0.1228, "step": 32462 }, { "epoch": 0.579014019191667, "grad_norm": 0.18638308346271515, "learning_rate": 2.247111664055311e-05, "loss": 0.1271, "step": 32463 }, { "epoch": 0.5790318553133806, "grad_norm": 0.3414674997329712, "learning_rate": 2.246956812174988e-05, "loss": 0.1986, "step": 32464 }, { "epoch": 0.5790496914350943, "grad_norm": 0.17765916883945465, "learning_rate": 2.2468019612755487e-05, "loss": 0.1212, "step": 32465 }, { "epoch": 0.579067527556808, "grad_norm": 0.22507187724113464, "learning_rate": 2.246647111357591e-05, "loss": 0.1297, "step": 32466 }, { "epoch": 0.5790853636785217, "grad_norm": 0.26670798659324646, "learning_rate": 2.246492262421718e-05, "loss": 0.1125, "step": 32467 }, { "epoch": 0.5791031998002354, "grad_norm": 0.28797197341918945, "learning_rate": 2.2463374144685277e-05, "loss": 0.1079, "step": 32468 }, { "epoch": 0.5791210359219491, "grad_norm": 0.2610745131969452, "learning_rate": 2.246182567498622e-05, "loss": 0.158, "step": 32469 }, { "epoch": 0.5791388720436629, "grad_norm": 0.2122703343629837, "learning_rate": 2.2460277215125992e-05, "loss": 0.0729, "step": 32470 }, { "epoch": 0.5791567081653766, "grad_norm": 0.3137718737125397, "learning_rate": 2.245872876511061e-05, "loss": 0.1646, "step": 32471 }, { "epoch": 0.5791745442870903, "grad_norm": 0.2896655201911926, "learning_rate": 2.2457180324946078e-05, "loss": 0.1874, "step": 32472 }, { "epoch": 0.5791923804088039, "grad_norm": 0.33308401703834534, "learning_rate": 2.2455631894638395e-05, "loss": 0.1579, "step": 32473 }, { "epoch": 0.5792102165305176, "grad_norm": 0.2378171980381012, "learning_rate": 2.245408347419356e-05, "loss": 0.1393, "step": 32474 }, { "epoch": 0.5792280526522313, "grad_norm": 0.2899779975414276, "learning_rate": 2.245253506361757e-05, "loss": 0.1351, "step": 32475 }, { "epoch": 0.579245888773945, "grad_norm": 0.2345377653837204, "learning_rate": 2.245098666291644e-05, "loss": 0.1069, "step": 32476 }, { "epoch": 0.5792637248956587, "grad_norm": 0.2698560953140259, "learning_rate": 2.2449438272096164e-05, "loss": 0.133, "step": 32477 }, { "epoch": 0.5792815610173724, "grad_norm": 0.2843421697616577, "learning_rate": 2.244788989116275e-05, "loss": 0.1202, "step": 32478 }, { "epoch": 0.5792993971390861, "grad_norm": 0.27611538767814636, "learning_rate": 2.244634152012218e-05, "loss": 0.1557, "step": 32479 }, { "epoch": 0.5793172332607998, "grad_norm": 0.26356732845306396, "learning_rate": 2.244479315898049e-05, "loss": 0.1474, "step": 32480 }, { "epoch": 0.5793350693825134, "grad_norm": 0.28101345896720886, "learning_rate": 2.2443244807743665e-05, "loss": 0.1653, "step": 32481 }, { "epoch": 0.5793529055042271, "grad_norm": 0.28042471408843994, "learning_rate": 2.24416964664177e-05, "loss": 0.1305, "step": 32482 }, { "epoch": 0.5793707416259408, "grad_norm": 0.23569683730602264, "learning_rate": 2.24401481350086e-05, "loss": 0.1578, "step": 32483 }, { "epoch": 0.5793885777476545, "grad_norm": 0.34091052412986755, "learning_rate": 2.2438599813522375e-05, "loss": 0.173, "step": 32484 }, { "epoch": 0.5794064138693682, "grad_norm": 0.2379416525363922, "learning_rate": 2.243705150196502e-05, "loss": 0.1181, "step": 32485 }, { "epoch": 0.5794242499910819, "grad_norm": 0.2575030028820038, "learning_rate": 2.243550320034254e-05, "loss": 0.1356, "step": 32486 }, { "epoch": 0.5794420861127957, "grad_norm": 0.21261177957057953, "learning_rate": 2.2433954908660943e-05, "loss": 0.1235, "step": 32487 }, { "epoch": 0.5794599222345094, "grad_norm": 0.22560103237628937, "learning_rate": 2.2432406626926207e-05, "loss": 0.173, "step": 32488 }, { "epoch": 0.5794777583562231, "grad_norm": 0.24423782527446747, "learning_rate": 2.2430858355144358e-05, "loss": 0.1458, "step": 32489 }, { "epoch": 0.5794955944779367, "grad_norm": 0.3535788953304291, "learning_rate": 2.2429310093321394e-05, "loss": 0.1887, "step": 32490 }, { "epoch": 0.5795134305996504, "grad_norm": 0.2025614231824875, "learning_rate": 2.24277618414633e-05, "loss": 0.1213, "step": 32491 }, { "epoch": 0.5795312667213641, "grad_norm": 0.2531871795654297, "learning_rate": 2.2426213599576095e-05, "loss": 0.1716, "step": 32492 }, { "epoch": 0.5795491028430778, "grad_norm": 0.24019251763820648, "learning_rate": 2.2424665367665777e-05, "loss": 0.122, "step": 32493 }, { "epoch": 0.5795669389647915, "grad_norm": 0.31495600938796997, "learning_rate": 2.242311714573835e-05, "loss": 0.1664, "step": 32494 }, { "epoch": 0.5795847750865052, "grad_norm": 0.20914030075073242, "learning_rate": 2.2421568933799805e-05, "loss": 0.1135, "step": 32495 }, { "epoch": 0.5796026112082189, "grad_norm": 0.3683305084705353, "learning_rate": 2.2420020731856145e-05, "loss": 0.1826, "step": 32496 }, { "epoch": 0.5796204473299326, "grad_norm": 0.20640255510807037, "learning_rate": 2.2418472539913382e-05, "loss": 0.0893, "step": 32497 }, { "epoch": 0.5796382834516463, "grad_norm": 0.24882817268371582, "learning_rate": 2.241692435797751e-05, "loss": 0.1184, "step": 32498 }, { "epoch": 0.5796561195733599, "grad_norm": 0.3185776174068451, "learning_rate": 2.241537618605453e-05, "loss": 0.1087, "step": 32499 }, { "epoch": 0.5796739556950736, "grad_norm": 0.23715680837631226, "learning_rate": 2.2413828024150453e-05, "loss": 0.1367, "step": 32500 }, { "epoch": 0.5796917918167873, "grad_norm": 0.23971588909626007, "learning_rate": 2.2412279872271254e-05, "loss": 0.0904, "step": 32501 }, { "epoch": 0.579709627938501, "grad_norm": 0.27716606855392456, "learning_rate": 2.241073173042297e-05, "loss": 0.1338, "step": 32502 }, { "epoch": 0.5797274640602147, "grad_norm": 0.5109320878982544, "learning_rate": 2.2409183598611576e-05, "loss": 0.1947, "step": 32503 }, { "epoch": 0.5797453001819285, "grad_norm": 0.26904845237731934, "learning_rate": 2.240763547684308e-05, "loss": 0.0817, "step": 32504 }, { "epoch": 0.5797631363036422, "grad_norm": 0.24545560777187347, "learning_rate": 2.2406087365123486e-05, "loss": 0.1638, "step": 32505 }, { "epoch": 0.5797809724253559, "grad_norm": 0.49714016914367676, "learning_rate": 2.240453926345879e-05, "loss": 0.1396, "step": 32506 }, { "epoch": 0.5797988085470696, "grad_norm": 0.24684901535511017, "learning_rate": 2.2402991171855003e-05, "loss": 0.1294, "step": 32507 }, { "epoch": 0.5798166446687832, "grad_norm": 0.22937579452991486, "learning_rate": 2.2401443090318123e-05, "loss": 0.0887, "step": 32508 }, { "epoch": 0.5798344807904969, "grad_norm": 0.24646563827991486, "learning_rate": 2.2399895018854132e-05, "loss": 0.1634, "step": 32509 }, { "epoch": 0.5798523169122106, "grad_norm": 0.31046807765960693, "learning_rate": 2.2398346957469057e-05, "loss": 0.1536, "step": 32510 }, { "epoch": 0.5798701530339243, "grad_norm": 0.27808254957199097, "learning_rate": 2.2396798906168888e-05, "loss": 0.1182, "step": 32511 }, { "epoch": 0.579887989155638, "grad_norm": 0.27484357357025146, "learning_rate": 2.239525086495962e-05, "loss": 0.1193, "step": 32512 }, { "epoch": 0.5799058252773517, "grad_norm": 0.31249159574508667, "learning_rate": 2.2393702833847264e-05, "loss": 0.119, "step": 32513 }, { "epoch": 0.5799236613990654, "grad_norm": 0.20423823595046997, "learning_rate": 2.2392154812837807e-05, "loss": 0.1026, "step": 32514 }, { "epoch": 0.5799414975207791, "grad_norm": 0.27444329857826233, "learning_rate": 2.239060680193727e-05, "loss": 0.1281, "step": 32515 }, { "epoch": 0.5799593336424927, "grad_norm": 0.21621066331863403, "learning_rate": 2.238905880115164e-05, "loss": 0.0878, "step": 32516 }, { "epoch": 0.5799771697642064, "grad_norm": 0.4129605293273926, "learning_rate": 2.238751081048692e-05, "loss": 0.1234, "step": 32517 }, { "epoch": 0.5799950058859201, "grad_norm": 0.3110547363758087, "learning_rate": 2.23859628299491e-05, "loss": 0.185, "step": 32518 }, { "epoch": 0.5800128420076338, "grad_norm": 0.2133747637271881, "learning_rate": 2.2384414859544204e-05, "loss": 0.0705, "step": 32519 }, { "epoch": 0.5800306781293476, "grad_norm": 0.2495887279510498, "learning_rate": 2.238286689927821e-05, "loss": 0.1426, "step": 32520 }, { "epoch": 0.5800485142510613, "grad_norm": 0.31482580304145813, "learning_rate": 2.2381318949157136e-05, "loss": 0.1074, "step": 32521 }, { "epoch": 0.580066350372775, "grad_norm": 0.23461973667144775, "learning_rate": 2.2379771009186963e-05, "loss": 0.1594, "step": 32522 }, { "epoch": 0.5800841864944887, "grad_norm": 0.2245056927204132, "learning_rate": 2.237822307937371e-05, "loss": 0.162, "step": 32523 }, { "epoch": 0.5801020226162024, "grad_norm": 0.2365114986896515, "learning_rate": 2.2376675159723377e-05, "loss": 0.1594, "step": 32524 }, { "epoch": 0.580119858737916, "grad_norm": 0.24609144032001495, "learning_rate": 2.2375127250241945e-05, "loss": 0.1301, "step": 32525 }, { "epoch": 0.5801376948596297, "grad_norm": 0.22959747910499573, "learning_rate": 2.2373579350935438e-05, "loss": 0.1206, "step": 32526 }, { "epoch": 0.5801555309813434, "grad_norm": 0.24409672617912292, "learning_rate": 2.2372031461809827e-05, "loss": 0.1072, "step": 32527 }, { "epoch": 0.5801733671030571, "grad_norm": 0.27131932973861694, "learning_rate": 2.2370483582871146e-05, "loss": 0.1113, "step": 32528 }, { "epoch": 0.5801912032247708, "grad_norm": 0.25985661149024963, "learning_rate": 2.2368935714125374e-05, "loss": 0.127, "step": 32529 }, { "epoch": 0.5802090393464845, "grad_norm": 0.26481959223747253, "learning_rate": 2.236738785557852e-05, "loss": 0.1245, "step": 32530 }, { "epoch": 0.5802268754681982, "grad_norm": 0.28355562686920166, "learning_rate": 2.236584000723657e-05, "loss": 0.0727, "step": 32531 }, { "epoch": 0.5802447115899119, "grad_norm": 0.39319220185279846, "learning_rate": 2.236429216910554e-05, "loss": 0.1827, "step": 32532 }, { "epoch": 0.5802625477116256, "grad_norm": 0.27143603563308716, "learning_rate": 2.2362744341191423e-05, "loss": 0.1551, "step": 32533 }, { "epoch": 0.5802803838333392, "grad_norm": 0.2402048259973526, "learning_rate": 2.2361196523500223e-05, "loss": 0.1651, "step": 32534 }, { "epoch": 0.5802982199550529, "grad_norm": 0.23064440488815308, "learning_rate": 2.2359648716037926e-05, "loss": 0.1135, "step": 32535 }, { "epoch": 0.5803160560767666, "grad_norm": 0.2531764805316925, "learning_rate": 2.235810091881055e-05, "loss": 0.1158, "step": 32536 }, { "epoch": 0.5803338921984804, "grad_norm": 0.23505248129367828, "learning_rate": 2.2356553131824093e-05, "loss": 0.102, "step": 32537 }, { "epoch": 0.5803517283201941, "grad_norm": 0.25851181149482727, "learning_rate": 2.2355005355084545e-05, "loss": 0.1176, "step": 32538 }, { "epoch": 0.5803695644419078, "grad_norm": 0.22408215701580048, "learning_rate": 2.235345758859791e-05, "loss": 0.1076, "step": 32539 }, { "epoch": 0.5803874005636215, "grad_norm": 0.2597883641719818, "learning_rate": 2.2351909832370183e-05, "loss": 0.1359, "step": 32540 }, { "epoch": 0.5804052366853352, "grad_norm": 0.31105777621269226, "learning_rate": 2.2350362086407367e-05, "loss": 0.1094, "step": 32541 }, { "epoch": 0.5804230728070489, "grad_norm": 0.32001444697380066, "learning_rate": 2.234881435071547e-05, "loss": 0.1197, "step": 32542 }, { "epoch": 0.5804409089287625, "grad_norm": 0.21128469705581665, "learning_rate": 2.2347266625300483e-05, "loss": 0.1162, "step": 32543 }, { "epoch": 0.5804587450504762, "grad_norm": 0.33103376626968384, "learning_rate": 2.23457189101684e-05, "loss": 0.1001, "step": 32544 }, { "epoch": 0.5804765811721899, "grad_norm": 0.3422320783138275, "learning_rate": 2.2344171205325234e-05, "loss": 0.1577, "step": 32545 }, { "epoch": 0.5804944172939036, "grad_norm": 0.24828532338142395, "learning_rate": 2.234262351077698e-05, "loss": 0.1562, "step": 32546 }, { "epoch": 0.5805122534156173, "grad_norm": 0.22820478677749634, "learning_rate": 2.2341075826529627e-05, "loss": 0.1282, "step": 32547 }, { "epoch": 0.580530089537331, "grad_norm": 0.3037009537220001, "learning_rate": 2.233952815258918e-05, "loss": 0.1515, "step": 32548 }, { "epoch": 0.5805479256590447, "grad_norm": 0.18708738684654236, "learning_rate": 2.2337980488961648e-05, "loss": 0.123, "step": 32549 }, { "epoch": 0.5805657617807584, "grad_norm": 0.269703209400177, "learning_rate": 2.2336432835653025e-05, "loss": 0.1482, "step": 32550 }, { "epoch": 0.580583597902472, "grad_norm": 0.24714531004428864, "learning_rate": 2.233488519266931e-05, "loss": 0.0766, "step": 32551 }, { "epoch": 0.5806014340241857, "grad_norm": 0.2163441777229309, "learning_rate": 2.2333337560016497e-05, "loss": 0.1151, "step": 32552 }, { "epoch": 0.5806192701458994, "grad_norm": 0.21663017570972443, "learning_rate": 2.2331789937700582e-05, "loss": 0.1529, "step": 32553 }, { "epoch": 0.5806371062676132, "grad_norm": 0.27300775051116943, "learning_rate": 2.2330242325727574e-05, "loss": 0.1695, "step": 32554 }, { "epoch": 0.5806549423893269, "grad_norm": 0.20462629199028015, "learning_rate": 2.2328694724103475e-05, "loss": 0.1232, "step": 32555 }, { "epoch": 0.5806727785110406, "grad_norm": 0.2590431272983551, "learning_rate": 2.232714713283428e-05, "loss": 0.1313, "step": 32556 }, { "epoch": 0.5806906146327543, "grad_norm": 0.2840895354747772, "learning_rate": 2.232559955192597e-05, "loss": 0.1604, "step": 32557 }, { "epoch": 0.580708450754468, "grad_norm": 0.2970852851867676, "learning_rate": 2.232405198138457e-05, "loss": 0.1134, "step": 32558 }, { "epoch": 0.5807262868761817, "grad_norm": 0.31411030888557434, "learning_rate": 2.2322504421216074e-05, "loss": 0.1581, "step": 32559 }, { "epoch": 0.5807441229978954, "grad_norm": 0.28287172317504883, "learning_rate": 2.2320956871426468e-05, "loss": 0.1238, "step": 32560 }, { "epoch": 0.580761959119609, "grad_norm": 0.27553895115852356, "learning_rate": 2.231940933202176e-05, "loss": 0.0806, "step": 32561 }, { "epoch": 0.5807797952413227, "grad_norm": 0.2762424945831299, "learning_rate": 2.2317861803007944e-05, "loss": 0.11, "step": 32562 }, { "epoch": 0.5807976313630364, "grad_norm": 0.2315920889377594, "learning_rate": 2.231631428439103e-05, "loss": 0.105, "step": 32563 }, { "epoch": 0.5808154674847501, "grad_norm": 0.23663736879825592, "learning_rate": 2.2314766776177007e-05, "loss": 0.1547, "step": 32564 }, { "epoch": 0.5808333036064638, "grad_norm": 0.22240598499774933, "learning_rate": 2.2313219278371876e-05, "loss": 0.1086, "step": 32565 }, { "epoch": 0.5808511397281775, "grad_norm": 0.31067079305648804, "learning_rate": 2.2311671790981625e-05, "loss": 0.0923, "step": 32566 }, { "epoch": 0.5808689758498912, "grad_norm": 0.2335953712463379, "learning_rate": 2.2310124314012272e-05, "loss": 0.1251, "step": 32567 }, { "epoch": 0.5808868119716049, "grad_norm": 0.30275240540504456, "learning_rate": 2.2308576847469802e-05, "loss": 0.1347, "step": 32568 }, { "epoch": 0.5809046480933185, "grad_norm": 0.25733524560928345, "learning_rate": 2.2307029391360224e-05, "loss": 0.0968, "step": 32569 }, { "epoch": 0.5809224842150322, "grad_norm": 0.30112549662590027, "learning_rate": 2.2305481945689517e-05, "loss": 0.1442, "step": 32570 }, { "epoch": 0.580940320336746, "grad_norm": 0.2403465062379837, "learning_rate": 2.2303934510463706e-05, "loss": 0.1165, "step": 32571 }, { "epoch": 0.5809581564584597, "grad_norm": 0.24589656293392181, "learning_rate": 2.2302387085688776e-05, "loss": 0.1269, "step": 32572 }, { "epoch": 0.5809759925801734, "grad_norm": 0.23497413098812103, "learning_rate": 2.2300839671370723e-05, "loss": 0.1373, "step": 32573 }, { "epoch": 0.5809938287018871, "grad_norm": 0.21979612112045288, "learning_rate": 2.229929226751554e-05, "loss": 0.1223, "step": 32574 }, { "epoch": 0.5810116648236008, "grad_norm": 0.2341279536485672, "learning_rate": 2.2297744874129235e-05, "loss": 0.1635, "step": 32575 }, { "epoch": 0.5810295009453145, "grad_norm": 0.4703749716281891, "learning_rate": 2.229619749121781e-05, "loss": 0.1178, "step": 32576 }, { "epoch": 0.5810473370670282, "grad_norm": 0.2579607367515564, "learning_rate": 2.2294650118787262e-05, "loss": 0.1474, "step": 32577 }, { "epoch": 0.5810651731887418, "grad_norm": 0.3578774034976959, "learning_rate": 2.229310275684358e-05, "loss": 0.1239, "step": 32578 }, { "epoch": 0.5810830093104555, "grad_norm": 0.2348346710205078, "learning_rate": 2.2291555405392756e-05, "loss": 0.1656, "step": 32579 }, { "epoch": 0.5811008454321692, "grad_norm": 0.215261310338974, "learning_rate": 2.229000806444081e-05, "loss": 0.082, "step": 32580 }, { "epoch": 0.5811186815538829, "grad_norm": 0.2548074722290039, "learning_rate": 2.2288460733993723e-05, "loss": 0.1051, "step": 32581 }, { "epoch": 0.5811365176755966, "grad_norm": 0.31394532322883606, "learning_rate": 2.2286913414057505e-05, "loss": 0.0837, "step": 32582 }, { "epoch": 0.5811543537973103, "grad_norm": 0.19115255773067474, "learning_rate": 2.2285366104638136e-05, "loss": 0.0986, "step": 32583 }, { "epoch": 0.581172189919024, "grad_norm": 0.28062525391578674, "learning_rate": 2.2283818805741637e-05, "loss": 0.1476, "step": 32584 }, { "epoch": 0.5811900260407377, "grad_norm": 0.24052348732948303, "learning_rate": 2.2282271517373995e-05, "loss": 0.1201, "step": 32585 }, { "epoch": 0.5812078621624513, "grad_norm": 0.4481671154499054, "learning_rate": 2.228072423954121e-05, "loss": 0.2107, "step": 32586 }, { "epoch": 0.581225698284165, "grad_norm": 0.44187813997268677, "learning_rate": 2.227917697224926e-05, "loss": 0.1524, "step": 32587 }, { "epoch": 0.5812435344058788, "grad_norm": 0.27583783864974976, "learning_rate": 2.2277629715504175e-05, "loss": 0.176, "step": 32588 }, { "epoch": 0.5812613705275925, "grad_norm": 0.23410040140151978, "learning_rate": 2.2276082469311932e-05, "loss": 0.1377, "step": 32589 }, { "epoch": 0.5812792066493062, "grad_norm": 0.3143663704395294, "learning_rate": 2.2274535233678535e-05, "loss": 0.1447, "step": 32590 }, { "epoch": 0.5812970427710199, "grad_norm": 0.20452502369880676, "learning_rate": 2.2272988008609984e-05, "loss": 0.156, "step": 32591 }, { "epoch": 0.5813148788927336, "grad_norm": 0.23313471674919128, "learning_rate": 2.2271440794112264e-05, "loss": 0.1153, "step": 32592 }, { "epoch": 0.5813327150144473, "grad_norm": 0.2702648937702179, "learning_rate": 2.2269893590191395e-05, "loss": 0.1148, "step": 32593 }, { "epoch": 0.581350551136161, "grad_norm": 0.19469690322875977, "learning_rate": 2.2268346396853354e-05, "loss": 0.1192, "step": 32594 }, { "epoch": 0.5813683872578747, "grad_norm": 0.31677988171577454, "learning_rate": 2.2266799214104144e-05, "loss": 0.1142, "step": 32595 }, { "epoch": 0.5813862233795883, "grad_norm": 0.8086874485015869, "learning_rate": 2.2265252041949765e-05, "loss": 0.1796, "step": 32596 }, { "epoch": 0.581404059501302, "grad_norm": 0.2221411019563675, "learning_rate": 2.2263704880396212e-05, "loss": 0.0916, "step": 32597 }, { "epoch": 0.5814218956230157, "grad_norm": 0.27320221066474915, "learning_rate": 2.226215772944949e-05, "loss": 0.152, "step": 32598 }, { "epoch": 0.5814397317447294, "grad_norm": 0.2692977488040924, "learning_rate": 2.2260610589115595e-05, "loss": 0.0969, "step": 32599 }, { "epoch": 0.5814575678664431, "grad_norm": 0.2220585197210312, "learning_rate": 2.2259063459400503e-05, "loss": 0.1319, "step": 32600 }, { "epoch": 0.5814754039881568, "grad_norm": 0.26261645555496216, "learning_rate": 2.2257516340310237e-05, "loss": 0.1536, "step": 32601 }, { "epoch": 0.5814932401098705, "grad_norm": 0.2868455648422241, "learning_rate": 2.2255969231850784e-05, "loss": 0.1225, "step": 32602 }, { "epoch": 0.5815110762315842, "grad_norm": 0.24366708099842072, "learning_rate": 2.2254422134028146e-05, "loss": 0.1646, "step": 32603 }, { "epoch": 0.5815289123532978, "grad_norm": 0.26461949944496155, "learning_rate": 2.2252875046848318e-05, "loss": 0.1334, "step": 32604 }, { "epoch": 0.5815467484750116, "grad_norm": 0.2557846009731293, "learning_rate": 2.225132797031728e-05, "loss": 0.1281, "step": 32605 }, { "epoch": 0.5815645845967253, "grad_norm": 0.23969800770282745, "learning_rate": 2.224978090444106e-05, "loss": 0.1209, "step": 32606 }, { "epoch": 0.581582420718439, "grad_norm": 0.24777325987815857, "learning_rate": 2.2248233849225636e-05, "loss": 0.1574, "step": 32607 }, { "epoch": 0.5816002568401527, "grad_norm": 0.26014086604118347, "learning_rate": 2.2246686804677004e-05, "loss": 0.1713, "step": 32608 }, { "epoch": 0.5816180929618664, "grad_norm": 0.19621795415878296, "learning_rate": 2.2245139770801163e-05, "loss": 0.1108, "step": 32609 }, { "epoch": 0.5816359290835801, "grad_norm": 0.14991632103919983, "learning_rate": 2.224359274760411e-05, "loss": 0.0583, "step": 32610 }, { "epoch": 0.5816537652052938, "grad_norm": 0.24359093606472015, "learning_rate": 2.2242045735091856e-05, "loss": 0.07, "step": 32611 }, { "epoch": 0.5816716013270075, "grad_norm": 0.24627834558486938, "learning_rate": 2.2240498733270377e-05, "loss": 0.1371, "step": 32612 }, { "epoch": 0.5816894374487211, "grad_norm": 0.2858821749687195, "learning_rate": 2.223895174214567e-05, "loss": 0.1467, "step": 32613 }, { "epoch": 0.5817072735704348, "grad_norm": 0.29005980491638184, "learning_rate": 2.2237404761723755e-05, "loss": 0.1175, "step": 32614 }, { "epoch": 0.5817251096921485, "grad_norm": 0.22842881083488464, "learning_rate": 2.223585779201061e-05, "loss": 0.1225, "step": 32615 }, { "epoch": 0.5817429458138622, "grad_norm": 0.2409631758928299, "learning_rate": 2.223431083301223e-05, "loss": 0.1331, "step": 32616 }, { "epoch": 0.5817607819355759, "grad_norm": 0.2476070672273636, "learning_rate": 2.223276388473462e-05, "loss": 0.1332, "step": 32617 }, { "epoch": 0.5817786180572896, "grad_norm": 0.2819904685020447, "learning_rate": 2.2231216947183763e-05, "loss": 0.0912, "step": 32618 }, { "epoch": 0.5817964541790033, "grad_norm": 0.25920283794403076, "learning_rate": 2.2229670020365677e-05, "loss": 0.1286, "step": 32619 }, { "epoch": 0.581814290300717, "grad_norm": 0.3017318546772003, "learning_rate": 2.222812310428635e-05, "loss": 0.1153, "step": 32620 }, { "epoch": 0.5818321264224308, "grad_norm": 0.19782114028930664, "learning_rate": 2.222657619895177e-05, "loss": 0.0501, "step": 32621 }, { "epoch": 0.5818499625441445, "grad_norm": 0.24279803037643433, "learning_rate": 2.2225029304367928e-05, "loss": 0.1084, "step": 32622 }, { "epoch": 0.5818677986658581, "grad_norm": 0.36125853657722473, "learning_rate": 2.2223482420540842e-05, "loss": 0.1765, "step": 32623 }, { "epoch": 0.5818856347875718, "grad_norm": 0.2242000550031662, "learning_rate": 2.2221935547476493e-05, "loss": 0.1181, "step": 32624 }, { "epoch": 0.5819034709092855, "grad_norm": 0.2644370496273041, "learning_rate": 2.2220388685180888e-05, "loss": 0.1236, "step": 32625 }, { "epoch": 0.5819213070309992, "grad_norm": 0.2659998834133148, "learning_rate": 2.2218841833660005e-05, "loss": 0.1615, "step": 32626 }, { "epoch": 0.5819391431527129, "grad_norm": 0.24852383136749268, "learning_rate": 2.221729499291986e-05, "loss": 0.1483, "step": 32627 }, { "epoch": 0.5819569792744266, "grad_norm": 0.21230261027812958, "learning_rate": 2.2215748162966445e-05, "loss": 0.1627, "step": 32628 }, { "epoch": 0.5819748153961403, "grad_norm": 0.2367599904537201, "learning_rate": 2.2214201343805744e-05, "loss": 0.0937, "step": 32629 }, { "epoch": 0.581992651517854, "grad_norm": 0.32215067744255066, "learning_rate": 2.2212654535443762e-05, "loss": 0.1222, "step": 32630 }, { "epoch": 0.5820104876395676, "grad_norm": 0.21733912825584412, "learning_rate": 2.2211107737886488e-05, "loss": 0.1288, "step": 32631 }, { "epoch": 0.5820283237612813, "grad_norm": 0.32909077405929565, "learning_rate": 2.2209560951139936e-05, "loss": 0.1802, "step": 32632 }, { "epoch": 0.582046159882995, "grad_norm": 0.2339247465133667, "learning_rate": 2.2208014175210083e-05, "loss": 0.0921, "step": 32633 }, { "epoch": 0.5820639960047087, "grad_norm": 0.3000536561012268, "learning_rate": 2.220646741010294e-05, "loss": 0.1533, "step": 32634 }, { "epoch": 0.5820818321264224, "grad_norm": 0.18525777757167816, "learning_rate": 2.2204920655824478e-05, "loss": 0.1147, "step": 32635 }, { "epoch": 0.5820996682481361, "grad_norm": 0.2412268966436386, "learning_rate": 2.2203373912380717e-05, "loss": 0.0987, "step": 32636 }, { "epoch": 0.5821175043698498, "grad_norm": 0.2635987102985382, "learning_rate": 2.2201827179777643e-05, "loss": 0.184, "step": 32637 }, { "epoch": 0.5821353404915636, "grad_norm": 0.33686697483062744, "learning_rate": 2.220028045802126e-05, "loss": 0.1325, "step": 32638 }, { "epoch": 0.5821531766132773, "grad_norm": 0.21539783477783203, "learning_rate": 2.2198733747117546e-05, "loss": 0.1264, "step": 32639 }, { "epoch": 0.582171012734991, "grad_norm": 0.314988911151886, "learning_rate": 2.2197187047072514e-05, "loss": 0.18, "step": 32640 }, { "epoch": 0.5821888488567046, "grad_norm": 0.2584691643714905, "learning_rate": 2.2195640357892156e-05, "loss": 0.1548, "step": 32641 }, { "epoch": 0.5822066849784183, "grad_norm": 0.2685898244380951, "learning_rate": 2.2194093679582464e-05, "loss": 0.1427, "step": 32642 }, { "epoch": 0.582224521100132, "grad_norm": 0.40392282605171204, "learning_rate": 2.219254701214943e-05, "loss": 0.1509, "step": 32643 }, { "epoch": 0.5822423572218457, "grad_norm": 0.4018021523952484, "learning_rate": 2.2191000355599053e-05, "loss": 0.171, "step": 32644 }, { "epoch": 0.5822601933435594, "grad_norm": 0.2413187026977539, "learning_rate": 2.2189453709937327e-05, "loss": 0.106, "step": 32645 }, { "epoch": 0.5822780294652731, "grad_norm": 0.19793254137039185, "learning_rate": 2.2187907075170255e-05, "loss": 0.1146, "step": 32646 }, { "epoch": 0.5822958655869868, "grad_norm": 0.3355720639228821, "learning_rate": 2.218636045130383e-05, "loss": 0.1907, "step": 32647 }, { "epoch": 0.5823137017087004, "grad_norm": 0.2272433191537857, "learning_rate": 2.218481383834403e-05, "loss": 0.1571, "step": 32648 }, { "epoch": 0.5823315378304141, "grad_norm": 0.251446396112442, "learning_rate": 2.2183267236296874e-05, "loss": 0.0966, "step": 32649 }, { "epoch": 0.5823493739521278, "grad_norm": 0.21056394279003143, "learning_rate": 2.218172064516835e-05, "loss": 0.1622, "step": 32650 }, { "epoch": 0.5823672100738415, "grad_norm": 0.33742034435272217, "learning_rate": 2.218017406496444e-05, "loss": 0.2206, "step": 32651 }, { "epoch": 0.5823850461955552, "grad_norm": 0.2827896475791931, "learning_rate": 2.2178627495691147e-05, "loss": 0.1065, "step": 32652 }, { "epoch": 0.5824028823172689, "grad_norm": 0.2870829105377197, "learning_rate": 2.217708093735448e-05, "loss": 0.153, "step": 32653 }, { "epoch": 0.5824207184389826, "grad_norm": 0.27159231901168823, "learning_rate": 2.217553438996042e-05, "loss": 0.1211, "step": 32654 }, { "epoch": 0.5824385545606964, "grad_norm": 0.23261034488677979, "learning_rate": 2.2173987853514964e-05, "loss": 0.1223, "step": 32655 }, { "epoch": 0.5824563906824101, "grad_norm": 0.2524707019329071, "learning_rate": 2.217244132802411e-05, "loss": 0.1129, "step": 32656 }, { "epoch": 0.5824742268041238, "grad_norm": 0.29308706521987915, "learning_rate": 2.2170894813493836e-05, "loss": 0.1545, "step": 32657 }, { "epoch": 0.5824920629258374, "grad_norm": 0.26639047265052795, "learning_rate": 2.216934830993016e-05, "loss": 0.1401, "step": 32658 }, { "epoch": 0.5825098990475511, "grad_norm": 0.3612046539783478, "learning_rate": 2.216780181733907e-05, "loss": 0.1245, "step": 32659 }, { "epoch": 0.5825277351692648, "grad_norm": 0.30385059118270874, "learning_rate": 2.216625533572656e-05, "loss": 0.1333, "step": 32660 }, { "epoch": 0.5825455712909785, "grad_norm": 0.31221461296081543, "learning_rate": 2.216470886509861e-05, "loss": 0.1156, "step": 32661 }, { "epoch": 0.5825634074126922, "grad_norm": 0.27799734473228455, "learning_rate": 2.2163162405461242e-05, "loss": 0.1421, "step": 32662 }, { "epoch": 0.5825812435344059, "grad_norm": 0.3311552405357361, "learning_rate": 2.2161615956820434e-05, "loss": 0.162, "step": 32663 }, { "epoch": 0.5825990796561196, "grad_norm": 0.2670689821243286, "learning_rate": 2.2160069519182177e-05, "loss": 0.1387, "step": 32664 }, { "epoch": 0.5826169157778333, "grad_norm": 0.2806749939918518, "learning_rate": 2.2158523092552476e-05, "loss": 0.1508, "step": 32665 }, { "epoch": 0.5826347518995469, "grad_norm": 0.22325634956359863, "learning_rate": 2.2156976676937313e-05, "loss": 0.1797, "step": 32666 }, { "epoch": 0.5826525880212606, "grad_norm": 0.2963971495628357, "learning_rate": 2.21554302723427e-05, "loss": 0.1076, "step": 32667 }, { "epoch": 0.5826704241429743, "grad_norm": 0.5901766419410706, "learning_rate": 2.2153883878774624e-05, "loss": 0.154, "step": 32668 }, { "epoch": 0.582688260264688, "grad_norm": 0.28280287981033325, "learning_rate": 2.2152337496239073e-05, "loss": 0.1569, "step": 32669 }, { "epoch": 0.5827060963864017, "grad_norm": 0.275160014629364, "learning_rate": 2.2150791124742037e-05, "loss": 0.1821, "step": 32670 }, { "epoch": 0.5827239325081154, "grad_norm": 0.2983241081237793, "learning_rate": 2.214924476428953e-05, "loss": 0.1412, "step": 32671 }, { "epoch": 0.5827417686298292, "grad_norm": 0.26532986760139465, "learning_rate": 2.2147698414887528e-05, "loss": 0.1163, "step": 32672 }, { "epoch": 0.5827596047515429, "grad_norm": 0.19336272776126862, "learning_rate": 2.2146152076542038e-05, "loss": 0.1103, "step": 32673 }, { "epoch": 0.5827774408732566, "grad_norm": 0.23490720987319946, "learning_rate": 2.2144605749259038e-05, "loss": 0.0891, "step": 32674 }, { "epoch": 0.5827952769949702, "grad_norm": 0.25227001309394836, "learning_rate": 2.214305943304454e-05, "loss": 0.1409, "step": 32675 }, { "epoch": 0.5828131131166839, "grad_norm": 0.23049236834049225, "learning_rate": 2.2141513127904533e-05, "loss": 0.1073, "step": 32676 }, { "epoch": 0.5828309492383976, "grad_norm": 0.249522864818573, "learning_rate": 2.2139966833845012e-05, "loss": 0.1197, "step": 32677 }, { "epoch": 0.5828487853601113, "grad_norm": 0.2541482448577881, "learning_rate": 2.213842055087195e-05, "loss": 0.1539, "step": 32678 }, { "epoch": 0.582866621481825, "grad_norm": 0.2356705665588379, "learning_rate": 2.213687427899137e-05, "loss": 0.1259, "step": 32679 }, { "epoch": 0.5828844576035387, "grad_norm": 0.3538076877593994, "learning_rate": 2.2135328018209255e-05, "loss": 0.1501, "step": 32680 }, { "epoch": 0.5829022937252524, "grad_norm": 0.30948957800865173, "learning_rate": 2.2133781768531597e-05, "loss": 0.172, "step": 32681 }, { "epoch": 0.5829201298469661, "grad_norm": 0.22080104053020477, "learning_rate": 2.2132235529964392e-05, "loss": 0.13, "step": 32682 }, { "epoch": 0.5829379659686797, "grad_norm": 0.2092370241880417, "learning_rate": 2.2130689302513624e-05, "loss": 0.0844, "step": 32683 }, { "epoch": 0.5829558020903934, "grad_norm": 0.3171740174293518, "learning_rate": 2.2129143086185306e-05, "loss": 0.1485, "step": 32684 }, { "epoch": 0.5829736382121071, "grad_norm": 0.2533302903175354, "learning_rate": 2.2127596880985413e-05, "loss": 0.1536, "step": 32685 }, { "epoch": 0.5829914743338208, "grad_norm": 0.24403366446495056, "learning_rate": 2.2126050686919954e-05, "loss": 0.1583, "step": 32686 }, { "epoch": 0.5830093104555345, "grad_norm": 0.30744120478630066, "learning_rate": 2.2124504503994902e-05, "loss": 0.0661, "step": 32687 }, { "epoch": 0.5830271465772482, "grad_norm": 0.24740423262119293, "learning_rate": 2.212295833221628e-05, "loss": 0.1025, "step": 32688 }, { "epoch": 0.583044982698962, "grad_norm": 0.32939428091049194, "learning_rate": 2.2121412171590058e-05, "loss": 0.1427, "step": 32689 }, { "epoch": 0.5830628188206757, "grad_norm": 0.35520297288894653, "learning_rate": 2.2119866022122242e-05, "loss": 0.1687, "step": 32690 }, { "epoch": 0.5830806549423894, "grad_norm": 0.30466994643211365, "learning_rate": 2.211831988381881e-05, "loss": 0.1658, "step": 32691 }, { "epoch": 0.583098491064103, "grad_norm": 0.21075256168842316, "learning_rate": 2.211677375668577e-05, "loss": 0.1554, "step": 32692 }, { "epoch": 0.5831163271858167, "grad_norm": 0.2382170408964157, "learning_rate": 2.211522764072911e-05, "loss": 0.0959, "step": 32693 }, { "epoch": 0.5831341633075304, "grad_norm": 0.3092350661754608, "learning_rate": 2.2113681535954828e-05, "loss": 0.1642, "step": 32694 }, { "epoch": 0.5831519994292441, "grad_norm": 0.2672624886035919, "learning_rate": 2.2112135442368913e-05, "loss": 0.1401, "step": 32695 }, { "epoch": 0.5831698355509578, "grad_norm": 0.34344083070755005, "learning_rate": 2.211058935997735e-05, "loss": 0.1362, "step": 32696 }, { "epoch": 0.5831876716726715, "grad_norm": 0.2562764585018158, "learning_rate": 2.2109043288786148e-05, "loss": 0.1317, "step": 32697 }, { "epoch": 0.5832055077943852, "grad_norm": 0.32705238461494446, "learning_rate": 2.2107497228801295e-05, "loss": 0.161, "step": 32698 }, { "epoch": 0.5832233439160989, "grad_norm": 0.3412044644355774, "learning_rate": 2.2105951180028776e-05, "loss": 0.1686, "step": 32699 }, { "epoch": 0.5832411800378126, "grad_norm": 0.3075982630252838, "learning_rate": 2.210440514247459e-05, "loss": 0.1191, "step": 32700 }, { "epoch": 0.5832590161595262, "grad_norm": 0.2473858892917633, "learning_rate": 2.210285911614473e-05, "loss": 0.1503, "step": 32701 }, { "epoch": 0.5832768522812399, "grad_norm": 0.22259639203548431, "learning_rate": 2.2101313101045193e-05, "loss": 0.1348, "step": 32702 }, { "epoch": 0.5832946884029536, "grad_norm": 0.25709596276283264, "learning_rate": 2.2099767097181968e-05, "loss": 0.1407, "step": 32703 }, { "epoch": 0.5833125245246673, "grad_norm": 0.26394790410995483, "learning_rate": 2.2098221104561036e-05, "loss": 0.1161, "step": 32704 }, { "epoch": 0.583330360646381, "grad_norm": 0.318134605884552, "learning_rate": 2.2096675123188416e-05, "loss": 0.1573, "step": 32705 }, { "epoch": 0.5833481967680948, "grad_norm": 0.2347719967365265, "learning_rate": 2.2095129153070076e-05, "loss": 0.1192, "step": 32706 }, { "epoch": 0.5833660328898085, "grad_norm": 0.2109861522912979, "learning_rate": 2.2093583194212025e-05, "loss": 0.1126, "step": 32707 }, { "epoch": 0.5833838690115222, "grad_norm": 0.33003291487693787, "learning_rate": 2.2092037246620252e-05, "loss": 0.1619, "step": 32708 }, { "epoch": 0.5834017051332359, "grad_norm": 0.23176167905330658, "learning_rate": 2.2090491310300732e-05, "loss": 0.1069, "step": 32709 }, { "epoch": 0.5834195412549495, "grad_norm": 0.25414010882377625, "learning_rate": 2.2088945385259486e-05, "loss": 0.1715, "step": 32710 }, { "epoch": 0.5834373773766632, "grad_norm": 0.21339866518974304, "learning_rate": 2.208739947150249e-05, "loss": 0.1374, "step": 32711 }, { "epoch": 0.5834552134983769, "grad_norm": 0.30355340242385864, "learning_rate": 2.2085853569035737e-05, "loss": 0.1231, "step": 32712 }, { "epoch": 0.5834730496200906, "grad_norm": 0.2227211892604828, "learning_rate": 2.2084307677865225e-05, "loss": 0.117, "step": 32713 }, { "epoch": 0.5834908857418043, "grad_norm": 0.40996474027633667, "learning_rate": 2.208276179799694e-05, "loss": 0.2058, "step": 32714 }, { "epoch": 0.583508721863518, "grad_norm": 0.19010716676712036, "learning_rate": 2.2081215929436882e-05, "loss": 0.1022, "step": 32715 }, { "epoch": 0.5835265579852317, "grad_norm": 0.1919294148683548, "learning_rate": 2.2079670072191042e-05, "loss": 0.11, "step": 32716 }, { "epoch": 0.5835443941069454, "grad_norm": 0.2056245654821396, "learning_rate": 2.2078124226265397e-05, "loss": 0.1287, "step": 32717 }, { "epoch": 0.583562230228659, "grad_norm": 0.22734293341636658, "learning_rate": 2.2076578391665962e-05, "loss": 0.1322, "step": 32718 }, { "epoch": 0.5835800663503727, "grad_norm": 0.20983117818832397, "learning_rate": 2.207503256839872e-05, "loss": 0.1232, "step": 32719 }, { "epoch": 0.5835979024720864, "grad_norm": 0.2745174467563629, "learning_rate": 2.2073486756469658e-05, "loss": 0.1495, "step": 32720 }, { "epoch": 0.5836157385938001, "grad_norm": 0.2101171612739563, "learning_rate": 2.2071940955884776e-05, "loss": 0.0974, "step": 32721 }, { "epoch": 0.5836335747155138, "grad_norm": 0.2162742018699646, "learning_rate": 2.207039516665005e-05, "loss": 0.1293, "step": 32722 }, { "epoch": 0.5836514108372276, "grad_norm": 0.34841805696487427, "learning_rate": 2.20688493887715e-05, "loss": 0.1794, "step": 32723 }, { "epoch": 0.5836692469589413, "grad_norm": 0.23516437411308289, "learning_rate": 2.20673036222551e-05, "loss": 0.1815, "step": 32724 }, { "epoch": 0.583687083080655, "grad_norm": 0.18162007629871368, "learning_rate": 2.206575786710684e-05, "loss": 0.0877, "step": 32725 }, { "epoch": 0.5837049192023687, "grad_norm": 0.31141769886016846, "learning_rate": 2.2064212123332707e-05, "loss": 0.1652, "step": 32726 }, { "epoch": 0.5837227553240824, "grad_norm": 0.251302570104599, "learning_rate": 2.2062666390938714e-05, "loss": 0.1381, "step": 32727 }, { "epoch": 0.583740591445796, "grad_norm": 0.2424483299255371, "learning_rate": 2.2061120669930836e-05, "loss": 0.1003, "step": 32728 }, { "epoch": 0.5837584275675097, "grad_norm": 0.31126800179481506, "learning_rate": 2.2059574960315073e-05, "loss": 0.1488, "step": 32729 }, { "epoch": 0.5837762636892234, "grad_norm": 0.3306005299091339, "learning_rate": 2.2058029262097402e-05, "loss": 0.1885, "step": 32730 }, { "epoch": 0.5837940998109371, "grad_norm": 0.2621316909790039, "learning_rate": 2.2056483575283837e-05, "loss": 0.163, "step": 32731 }, { "epoch": 0.5838119359326508, "grad_norm": 0.30854377150535583, "learning_rate": 2.205493789988036e-05, "loss": 0.125, "step": 32732 }, { "epoch": 0.5838297720543645, "grad_norm": 0.23702551424503326, "learning_rate": 2.2053392235892956e-05, "loss": 0.141, "step": 32733 }, { "epoch": 0.5838476081760782, "grad_norm": 0.2055118978023529, "learning_rate": 2.205184658332762e-05, "loss": 0.1041, "step": 32734 }, { "epoch": 0.5838654442977919, "grad_norm": 0.2355274111032486, "learning_rate": 2.2050300942190337e-05, "loss": 0.1114, "step": 32735 }, { "epoch": 0.5838832804195055, "grad_norm": 0.25135165452957153, "learning_rate": 2.2048755312487122e-05, "loss": 0.1709, "step": 32736 }, { "epoch": 0.5839011165412192, "grad_norm": 0.2206704020500183, "learning_rate": 2.2047209694223947e-05, "loss": 0.1723, "step": 32737 }, { "epoch": 0.5839189526629329, "grad_norm": 0.26961734890937805, "learning_rate": 2.204566408740681e-05, "loss": 0.0855, "step": 32738 }, { "epoch": 0.5839367887846467, "grad_norm": 0.2480851411819458, "learning_rate": 2.2044118492041683e-05, "loss": 0.1264, "step": 32739 }, { "epoch": 0.5839546249063604, "grad_norm": 0.34333324432373047, "learning_rate": 2.204257290813459e-05, "loss": 0.1268, "step": 32740 }, { "epoch": 0.5839724610280741, "grad_norm": 0.2474355250597, "learning_rate": 2.20410273356915e-05, "loss": 0.1162, "step": 32741 }, { "epoch": 0.5839902971497878, "grad_norm": 0.2821494936943054, "learning_rate": 2.203948177471841e-05, "loss": 0.1325, "step": 32742 }, { "epoch": 0.5840081332715015, "grad_norm": 0.30933400988578796, "learning_rate": 2.2037936225221303e-05, "loss": 0.181, "step": 32743 }, { "epoch": 0.5840259693932152, "grad_norm": 0.185426265001297, "learning_rate": 2.203639068720619e-05, "loss": 0.1325, "step": 32744 }, { "epoch": 0.5840438055149288, "grad_norm": 0.2748141288757324, "learning_rate": 2.203484516067905e-05, "loss": 0.1602, "step": 32745 }, { "epoch": 0.5840616416366425, "grad_norm": 0.24127641320228577, "learning_rate": 2.203329964564588e-05, "loss": 0.1033, "step": 32746 }, { "epoch": 0.5840794777583562, "grad_norm": 0.23739008605480194, "learning_rate": 2.2031754142112652e-05, "loss": 0.1444, "step": 32747 }, { "epoch": 0.5840973138800699, "grad_norm": 0.22942371666431427, "learning_rate": 2.2030208650085373e-05, "loss": 0.1023, "step": 32748 }, { "epoch": 0.5841151500017836, "grad_norm": 0.1496797502040863, "learning_rate": 2.202866316957003e-05, "loss": 0.0817, "step": 32749 }, { "epoch": 0.5841329861234973, "grad_norm": 0.2373179942369461, "learning_rate": 2.202711770057262e-05, "loss": 0.1269, "step": 32750 }, { "epoch": 0.584150822245211, "grad_norm": 0.2223374992609024, "learning_rate": 2.2025572243099128e-05, "loss": 0.092, "step": 32751 }, { "epoch": 0.5841686583669247, "grad_norm": 0.24049773812294006, "learning_rate": 2.202402679715554e-05, "loss": 0.1296, "step": 32752 }, { "epoch": 0.5841864944886384, "grad_norm": 0.2524278163909912, "learning_rate": 2.2022481362747856e-05, "loss": 0.0977, "step": 32753 }, { "epoch": 0.584204330610352, "grad_norm": 0.280766099691391, "learning_rate": 2.2020935939882067e-05, "loss": 0.1218, "step": 32754 }, { "epoch": 0.5842221667320657, "grad_norm": 0.2215835452079773, "learning_rate": 2.2019390528564152e-05, "loss": 0.1263, "step": 32755 }, { "epoch": 0.5842400028537795, "grad_norm": 0.17980517446994781, "learning_rate": 2.2017845128800105e-05, "loss": 0.0973, "step": 32756 }, { "epoch": 0.5842578389754932, "grad_norm": 0.23257394134998322, "learning_rate": 2.2016299740595927e-05, "loss": 0.1005, "step": 32757 }, { "epoch": 0.5842756750972069, "grad_norm": 0.2657771706581116, "learning_rate": 2.2014754363957608e-05, "loss": 0.0448, "step": 32758 }, { "epoch": 0.5842935112189206, "grad_norm": 0.21145270764827728, "learning_rate": 2.2013208998891128e-05, "loss": 0.0756, "step": 32759 }, { "epoch": 0.5843113473406343, "grad_norm": 0.3095283806324005, "learning_rate": 2.201166364540248e-05, "loss": 0.1284, "step": 32760 }, { "epoch": 0.584329183462348, "grad_norm": 0.2519843578338623, "learning_rate": 2.201011830349765e-05, "loss": 0.1153, "step": 32761 }, { "epoch": 0.5843470195840617, "grad_norm": 0.2932208478450775, "learning_rate": 2.2008572973182633e-05, "loss": 0.1414, "step": 32762 }, { "epoch": 0.5843648557057753, "grad_norm": 0.30687496066093445, "learning_rate": 2.200702765446343e-05, "loss": 0.174, "step": 32763 }, { "epoch": 0.584382691827489, "grad_norm": 0.2801288068294525, "learning_rate": 2.2005482347346025e-05, "loss": 0.161, "step": 32764 }, { "epoch": 0.5844005279492027, "grad_norm": 0.30687326192855835, "learning_rate": 2.2003937051836386e-05, "loss": 0.0798, "step": 32765 }, { "epoch": 0.5844183640709164, "grad_norm": 0.24588850140571594, "learning_rate": 2.2002391767940538e-05, "loss": 0.1265, "step": 32766 }, { "epoch": 0.5844362001926301, "grad_norm": 0.31414589285850525, "learning_rate": 2.2000846495664453e-05, "loss": 0.1126, "step": 32767 }, { "epoch": 0.5844540363143438, "grad_norm": 0.28846508264541626, "learning_rate": 2.199930123501412e-05, "loss": 0.0993, "step": 32768 }, { "epoch": 0.5844718724360575, "grad_norm": 0.17866621911525726, "learning_rate": 2.1997755985995528e-05, "loss": 0.1037, "step": 32769 }, { "epoch": 0.5844897085577712, "grad_norm": 0.28960931301116943, "learning_rate": 2.1996210748614673e-05, "loss": 0.1602, "step": 32770 }, { "epoch": 0.5845075446794848, "grad_norm": 0.2598286271095276, "learning_rate": 2.1994665522877547e-05, "loss": 0.0612, "step": 32771 }, { "epoch": 0.5845253808011985, "grad_norm": 0.20510664582252502, "learning_rate": 2.1993120308790136e-05, "loss": 0.106, "step": 32772 }, { "epoch": 0.5845432169229123, "grad_norm": 0.24668174982070923, "learning_rate": 2.199157510635843e-05, "loss": 0.1482, "step": 32773 }, { "epoch": 0.584561053044626, "grad_norm": 0.2542809844017029, "learning_rate": 2.1990029915588406e-05, "loss": 0.1075, "step": 32774 }, { "epoch": 0.5845788891663397, "grad_norm": 0.3045494556427002, "learning_rate": 2.1988484736486075e-05, "loss": 0.0926, "step": 32775 }, { "epoch": 0.5845967252880534, "grad_norm": 0.23481105268001556, "learning_rate": 2.1986939569057416e-05, "loss": 0.0682, "step": 32776 }, { "epoch": 0.5846145614097671, "grad_norm": 0.346417635679245, "learning_rate": 2.198539441330842e-05, "loss": 0.1392, "step": 32777 }, { "epoch": 0.5846323975314808, "grad_norm": 0.24586959183216095, "learning_rate": 2.198384926924507e-05, "loss": 0.0864, "step": 32778 }, { "epoch": 0.5846502336531945, "grad_norm": 0.24564705789089203, "learning_rate": 2.198230413687337e-05, "loss": 0.1379, "step": 32779 }, { "epoch": 0.5846680697749082, "grad_norm": 0.2375066876411438, "learning_rate": 2.1980759016199304e-05, "loss": 0.0936, "step": 32780 }, { "epoch": 0.5846859058966218, "grad_norm": 0.2650754749774933, "learning_rate": 2.197921390722886e-05, "loss": 0.1956, "step": 32781 }, { "epoch": 0.5847037420183355, "grad_norm": 0.280928373336792, "learning_rate": 2.197766880996801e-05, "loss": 0.1248, "step": 32782 }, { "epoch": 0.5847215781400492, "grad_norm": 0.18832944333553314, "learning_rate": 2.197612372442277e-05, "loss": 0.1073, "step": 32783 }, { "epoch": 0.5847394142617629, "grad_norm": 0.303588330745697, "learning_rate": 2.1974578650599123e-05, "loss": 0.118, "step": 32784 }, { "epoch": 0.5847572503834766, "grad_norm": 0.251571387052536, "learning_rate": 2.1973033588503054e-05, "loss": 0.1572, "step": 32785 }, { "epoch": 0.5847750865051903, "grad_norm": 0.25179558992385864, "learning_rate": 2.197148853814055e-05, "loss": 0.1555, "step": 32786 }, { "epoch": 0.584792922626904, "grad_norm": 0.24583813548088074, "learning_rate": 2.1969943499517595e-05, "loss": 0.1483, "step": 32787 }, { "epoch": 0.5848107587486177, "grad_norm": 0.19594620168209076, "learning_rate": 2.1968398472640196e-05, "loss": 0.1307, "step": 32788 }, { "epoch": 0.5848285948703313, "grad_norm": 0.384545236825943, "learning_rate": 2.1966853457514322e-05, "loss": 0.1234, "step": 32789 }, { "epoch": 0.5848464309920451, "grad_norm": 0.23136396706104279, "learning_rate": 2.196530845414598e-05, "loss": 0.0821, "step": 32790 }, { "epoch": 0.5848642671137588, "grad_norm": 0.22859366238117218, "learning_rate": 2.196376346254114e-05, "loss": 0.1243, "step": 32791 }, { "epoch": 0.5848821032354725, "grad_norm": 0.3239055275917053, "learning_rate": 2.1962218482705812e-05, "loss": 0.1712, "step": 32792 }, { "epoch": 0.5848999393571862, "grad_norm": 0.2586090564727783, "learning_rate": 2.1960673514645974e-05, "loss": 0.1524, "step": 32793 }, { "epoch": 0.5849177754788999, "grad_norm": 0.3600848615169525, "learning_rate": 2.1959128558367617e-05, "loss": 0.1712, "step": 32794 }, { "epoch": 0.5849356116006136, "grad_norm": 0.2688920497894287, "learning_rate": 2.1957583613876715e-05, "loss": 0.2064, "step": 32795 }, { "epoch": 0.5849534477223273, "grad_norm": 0.2552483081817627, "learning_rate": 2.195603868117928e-05, "loss": 0.0927, "step": 32796 }, { "epoch": 0.584971283844041, "grad_norm": 0.2931370139122009, "learning_rate": 2.195449376028129e-05, "loss": 0.1073, "step": 32797 }, { "epoch": 0.5849891199657546, "grad_norm": 0.26654571294784546, "learning_rate": 2.1952948851188734e-05, "loss": 0.1286, "step": 32798 }, { "epoch": 0.5850069560874683, "grad_norm": 0.3052487373352051, "learning_rate": 2.1951403953907603e-05, "loss": 0.138, "step": 32799 }, { "epoch": 0.585024792209182, "grad_norm": 0.3467080295085907, "learning_rate": 2.1949859068443873e-05, "loss": 0.1843, "step": 32800 }, { "epoch": 0.5850426283308957, "grad_norm": 0.23049597442150116, "learning_rate": 2.194831419480355e-05, "loss": 0.1662, "step": 32801 }, { "epoch": 0.5850604644526094, "grad_norm": 0.42516759037971497, "learning_rate": 2.194676933299262e-05, "loss": 0.1521, "step": 32802 }, { "epoch": 0.5850783005743231, "grad_norm": 0.2271551638841629, "learning_rate": 2.194522448301706e-05, "loss": 0.1575, "step": 32803 }, { "epoch": 0.5850961366960368, "grad_norm": 0.2876285910606384, "learning_rate": 2.194367964488286e-05, "loss": 0.1419, "step": 32804 }, { "epoch": 0.5851139728177505, "grad_norm": 0.24481911957263947, "learning_rate": 2.194213481859602e-05, "loss": 0.1039, "step": 32805 }, { "epoch": 0.5851318089394641, "grad_norm": 0.32447394728660583, "learning_rate": 2.1940590004162524e-05, "loss": 0.1707, "step": 32806 }, { "epoch": 0.585149645061178, "grad_norm": 0.2697230577468872, "learning_rate": 2.1939045201588357e-05, "loss": 0.1636, "step": 32807 }, { "epoch": 0.5851674811828916, "grad_norm": 0.18803580105304718, "learning_rate": 2.19375004108795e-05, "loss": 0.1165, "step": 32808 }, { "epoch": 0.5851853173046053, "grad_norm": 0.28365233540534973, "learning_rate": 2.1935955632041957e-05, "loss": 0.1859, "step": 32809 }, { "epoch": 0.585203153426319, "grad_norm": 0.31464138627052307, "learning_rate": 2.193441086508171e-05, "loss": 0.1167, "step": 32810 }, { "epoch": 0.5852209895480327, "grad_norm": 0.2586153447628021, "learning_rate": 2.1932866110004736e-05, "loss": 0.1127, "step": 32811 }, { "epoch": 0.5852388256697464, "grad_norm": 0.2822292745113373, "learning_rate": 2.1931321366817042e-05, "loss": 0.0737, "step": 32812 }, { "epoch": 0.5852566617914601, "grad_norm": 0.29303503036499023, "learning_rate": 2.1929776635524594e-05, "loss": 0.0901, "step": 32813 }, { "epoch": 0.5852744979131738, "grad_norm": 0.1761719435453415, "learning_rate": 2.1928231916133407e-05, "loss": 0.114, "step": 32814 }, { "epoch": 0.5852923340348875, "grad_norm": 0.3096100091934204, "learning_rate": 2.192668720864945e-05, "loss": 0.1644, "step": 32815 }, { "epoch": 0.5853101701566011, "grad_norm": 0.25290408730506897, "learning_rate": 2.1925142513078708e-05, "loss": 0.1218, "step": 32816 }, { "epoch": 0.5853280062783148, "grad_norm": 0.34670233726501465, "learning_rate": 2.1923597829427177e-05, "loss": 0.1262, "step": 32817 }, { "epoch": 0.5853458424000285, "grad_norm": 0.21767553687095642, "learning_rate": 2.1922053157700848e-05, "loss": 0.1394, "step": 32818 }, { "epoch": 0.5853636785217422, "grad_norm": 0.36319202184677124, "learning_rate": 2.1920508497905702e-05, "loss": 0.157, "step": 32819 }, { "epoch": 0.5853815146434559, "grad_norm": 0.23719051480293274, "learning_rate": 2.1918963850047734e-05, "loss": 0.1393, "step": 32820 }, { "epoch": 0.5853993507651696, "grad_norm": 0.2927936315536499, "learning_rate": 2.1917419214132914e-05, "loss": 0.1234, "step": 32821 }, { "epoch": 0.5854171868868833, "grad_norm": 0.2914182245731354, "learning_rate": 2.1915874590167252e-05, "loss": 0.1339, "step": 32822 }, { "epoch": 0.585435023008597, "grad_norm": 0.22109076380729675, "learning_rate": 2.1914329978156724e-05, "loss": 0.1194, "step": 32823 }, { "epoch": 0.5854528591303108, "grad_norm": 0.2898158133029938, "learning_rate": 2.191278537810732e-05, "loss": 0.1333, "step": 32824 }, { "epoch": 0.5854706952520244, "grad_norm": 0.37327486276626587, "learning_rate": 2.191124079002502e-05, "loss": 0.1907, "step": 32825 }, { "epoch": 0.5854885313737381, "grad_norm": 0.23889465630054474, "learning_rate": 2.1909696213915816e-05, "loss": 0.1153, "step": 32826 }, { "epoch": 0.5855063674954518, "grad_norm": 0.25527429580688477, "learning_rate": 2.1908151649785704e-05, "loss": 0.1113, "step": 32827 }, { "epoch": 0.5855242036171655, "grad_norm": 0.2556540071964264, "learning_rate": 2.1906607097640666e-05, "loss": 0.1584, "step": 32828 }, { "epoch": 0.5855420397388792, "grad_norm": 0.20545242726802826, "learning_rate": 2.1905062557486685e-05, "loss": 0.1078, "step": 32829 }, { "epoch": 0.5855598758605929, "grad_norm": 0.2756780683994293, "learning_rate": 2.1903518029329743e-05, "loss": 0.1225, "step": 32830 }, { "epoch": 0.5855777119823066, "grad_norm": 0.34050872921943665, "learning_rate": 2.1901973513175844e-05, "loss": 0.1469, "step": 32831 }, { "epoch": 0.5855955481040203, "grad_norm": 0.24623584747314453, "learning_rate": 2.190042900903096e-05, "loss": 0.1128, "step": 32832 }, { "epoch": 0.585613384225734, "grad_norm": 0.29396793246269226, "learning_rate": 2.1898884516901088e-05, "loss": 0.1214, "step": 32833 }, { "epoch": 0.5856312203474476, "grad_norm": 0.2876686453819275, "learning_rate": 2.1897340036792198e-05, "loss": 0.2136, "step": 32834 }, { "epoch": 0.5856490564691613, "grad_norm": 0.3147815763950348, "learning_rate": 2.1895795568710305e-05, "loss": 0.1358, "step": 32835 }, { "epoch": 0.585666892590875, "grad_norm": 0.24319984018802643, "learning_rate": 2.189425111266138e-05, "loss": 0.1103, "step": 32836 }, { "epoch": 0.5856847287125887, "grad_norm": 0.3565587103366852, "learning_rate": 2.1892706668651408e-05, "loss": 0.1238, "step": 32837 }, { "epoch": 0.5857025648343024, "grad_norm": 0.20736780762672424, "learning_rate": 2.1891162236686373e-05, "loss": 0.1176, "step": 32838 }, { "epoch": 0.5857204009560161, "grad_norm": 0.26560136675834656, "learning_rate": 2.1889617816772263e-05, "loss": 0.1397, "step": 32839 }, { "epoch": 0.5857382370777299, "grad_norm": 0.23852647840976715, "learning_rate": 2.188807340891508e-05, "loss": 0.1234, "step": 32840 }, { "epoch": 0.5857560731994436, "grad_norm": 0.23946654796600342, "learning_rate": 2.1886529013120795e-05, "loss": 0.1077, "step": 32841 }, { "epoch": 0.5857739093211572, "grad_norm": 0.2291734665632248, "learning_rate": 2.1884984629395405e-05, "loss": 0.1144, "step": 32842 }, { "epoch": 0.5857917454428709, "grad_norm": 0.2994038164615631, "learning_rate": 2.1883440257744876e-05, "loss": 0.096, "step": 32843 }, { "epoch": 0.5858095815645846, "grad_norm": 0.33408018946647644, "learning_rate": 2.188189589817522e-05, "loss": 0.1977, "step": 32844 }, { "epoch": 0.5858274176862983, "grad_norm": 0.23636189103126526, "learning_rate": 2.1880351550692407e-05, "loss": 0.1266, "step": 32845 }, { "epoch": 0.585845253808012, "grad_norm": 0.2544075548648834, "learning_rate": 2.1878807215302437e-05, "loss": 0.1381, "step": 32846 }, { "epoch": 0.5858630899297257, "grad_norm": 0.26502206921577454, "learning_rate": 2.1877262892011275e-05, "loss": 0.1526, "step": 32847 }, { "epoch": 0.5858809260514394, "grad_norm": 0.2717975676059723, "learning_rate": 2.187571858082493e-05, "loss": 0.1015, "step": 32848 }, { "epoch": 0.5858987621731531, "grad_norm": 0.3843843340873718, "learning_rate": 2.1874174281749383e-05, "loss": 0.1008, "step": 32849 }, { "epoch": 0.5859165982948668, "grad_norm": 0.255064457654953, "learning_rate": 2.1872629994790612e-05, "loss": 0.1169, "step": 32850 }, { "epoch": 0.5859344344165804, "grad_norm": 0.3429819345474243, "learning_rate": 2.1871085719954604e-05, "loss": 0.1476, "step": 32851 }, { "epoch": 0.5859522705382941, "grad_norm": 0.2847726345062256, "learning_rate": 2.186954145724735e-05, "loss": 0.1478, "step": 32852 }, { "epoch": 0.5859701066600078, "grad_norm": 0.2902240753173828, "learning_rate": 2.186799720667483e-05, "loss": 0.2203, "step": 32853 }, { "epoch": 0.5859879427817215, "grad_norm": 0.34646478295326233, "learning_rate": 2.1866452968243044e-05, "loss": 0.0979, "step": 32854 }, { "epoch": 0.5860057789034352, "grad_norm": 0.26700881123542786, "learning_rate": 2.1864908741957965e-05, "loss": 0.1525, "step": 32855 }, { "epoch": 0.5860236150251489, "grad_norm": 0.30656698346138, "learning_rate": 2.1863364527825575e-05, "loss": 0.1815, "step": 32856 }, { "epoch": 0.5860414511468627, "grad_norm": 0.30681997537612915, "learning_rate": 2.1861820325851877e-05, "loss": 0.162, "step": 32857 }, { "epoch": 0.5860592872685764, "grad_norm": 0.30271705985069275, "learning_rate": 2.1860276136042845e-05, "loss": 0.1931, "step": 32858 }, { "epoch": 0.5860771233902901, "grad_norm": 0.2553333342075348, "learning_rate": 2.1858731958404467e-05, "loss": 0.1518, "step": 32859 }, { "epoch": 0.5860949595120037, "grad_norm": 0.2667693495750427, "learning_rate": 2.1857187792942717e-05, "loss": 0.1796, "step": 32860 }, { "epoch": 0.5861127956337174, "grad_norm": 0.22701171040534973, "learning_rate": 2.185564363966361e-05, "loss": 0.1057, "step": 32861 }, { "epoch": 0.5861306317554311, "grad_norm": 0.22908222675323486, "learning_rate": 2.1854099498573112e-05, "loss": 0.0846, "step": 32862 }, { "epoch": 0.5861484678771448, "grad_norm": 0.30423903465270996, "learning_rate": 2.1852555369677208e-05, "loss": 0.1547, "step": 32863 }, { "epoch": 0.5861663039988585, "grad_norm": 0.22806040942668915, "learning_rate": 2.1851011252981878e-05, "loss": 0.1516, "step": 32864 }, { "epoch": 0.5861841401205722, "grad_norm": 0.3106060028076172, "learning_rate": 2.1849467148493127e-05, "loss": 0.1529, "step": 32865 }, { "epoch": 0.5862019762422859, "grad_norm": 0.2221730500459671, "learning_rate": 2.1847923056216924e-05, "loss": 0.0887, "step": 32866 }, { "epoch": 0.5862198123639996, "grad_norm": 0.24242134392261505, "learning_rate": 2.1846378976159266e-05, "loss": 0.108, "step": 32867 }, { "epoch": 0.5862376484857132, "grad_norm": 0.2813638150691986, "learning_rate": 2.1844834908326133e-05, "loss": 0.1465, "step": 32868 }, { "epoch": 0.5862554846074269, "grad_norm": 0.2639690637588501, "learning_rate": 2.1843290852723495e-05, "loss": 0.1462, "step": 32869 }, { "epoch": 0.5862733207291406, "grad_norm": 0.2633322775363922, "learning_rate": 2.1841746809357363e-05, "loss": 0.1052, "step": 32870 }, { "epoch": 0.5862911568508543, "grad_norm": 0.23971325159072876, "learning_rate": 2.1840202778233716e-05, "loss": 0.1417, "step": 32871 }, { "epoch": 0.586308992972568, "grad_norm": 0.4095028042793274, "learning_rate": 2.1838658759358525e-05, "loss": 0.0737, "step": 32872 }, { "epoch": 0.5863268290942817, "grad_norm": 0.3204917311668396, "learning_rate": 2.1837114752737787e-05, "loss": 0.1108, "step": 32873 }, { "epoch": 0.5863446652159955, "grad_norm": 0.2623700201511383, "learning_rate": 2.1835570758377484e-05, "loss": 0.125, "step": 32874 }, { "epoch": 0.5863625013377092, "grad_norm": 0.37184256315231323, "learning_rate": 2.1834026776283605e-05, "loss": 0.1038, "step": 32875 }, { "epoch": 0.5863803374594229, "grad_norm": 0.35463324189186096, "learning_rate": 2.183248280646213e-05, "loss": 0.0976, "step": 32876 }, { "epoch": 0.5863981735811366, "grad_norm": 0.24924468994140625, "learning_rate": 2.183093884891904e-05, "loss": 0.0647, "step": 32877 }, { "epoch": 0.5864160097028502, "grad_norm": 0.28524017333984375, "learning_rate": 2.1829394903660334e-05, "loss": 0.1728, "step": 32878 }, { "epoch": 0.5864338458245639, "grad_norm": 0.3683130145072937, "learning_rate": 2.182785097069199e-05, "loss": 0.1655, "step": 32879 }, { "epoch": 0.5864516819462776, "grad_norm": 0.2757599949836731, "learning_rate": 2.1826307050019983e-05, "loss": 0.094, "step": 32880 }, { "epoch": 0.5864695180679913, "grad_norm": 0.2617654502391815, "learning_rate": 2.1824763141650316e-05, "loss": 0.1363, "step": 32881 }, { "epoch": 0.586487354189705, "grad_norm": 0.30277103185653687, "learning_rate": 2.182321924558895e-05, "loss": 0.1637, "step": 32882 }, { "epoch": 0.5865051903114187, "grad_norm": 0.25579097867012024, "learning_rate": 2.1821675361841894e-05, "loss": 0.1479, "step": 32883 }, { "epoch": 0.5865230264331324, "grad_norm": 0.24641653895378113, "learning_rate": 2.1820131490415122e-05, "loss": 0.0902, "step": 32884 }, { "epoch": 0.586540862554846, "grad_norm": 0.2992168068885803, "learning_rate": 2.1818587631314617e-05, "loss": 0.1242, "step": 32885 }, { "epoch": 0.5865586986765597, "grad_norm": 0.17372293770313263, "learning_rate": 2.1817043784546357e-05, "loss": 0.0995, "step": 32886 }, { "epoch": 0.5865765347982734, "grad_norm": 0.27276352047920227, "learning_rate": 2.1815499950116347e-05, "loss": 0.1047, "step": 32887 }, { "epoch": 0.5865943709199871, "grad_norm": 0.2772451639175415, "learning_rate": 2.181395612803055e-05, "loss": 0.1047, "step": 32888 }, { "epoch": 0.5866122070417008, "grad_norm": 0.32433974742889404, "learning_rate": 2.1812412318294965e-05, "loss": 0.2032, "step": 32889 }, { "epoch": 0.5866300431634145, "grad_norm": 0.25707507133483887, "learning_rate": 2.1810868520915563e-05, "loss": 0.1136, "step": 32890 }, { "epoch": 0.5866478792851283, "grad_norm": 0.3069787323474884, "learning_rate": 2.1809324735898346e-05, "loss": 0.1296, "step": 32891 }, { "epoch": 0.586665715406842, "grad_norm": 0.30653896927833557, "learning_rate": 2.1807780963249286e-05, "loss": 0.1496, "step": 32892 }, { "epoch": 0.5866835515285557, "grad_norm": 0.2532758414745331, "learning_rate": 2.1806237202974365e-05, "loss": 0.1688, "step": 32893 }, { "epoch": 0.5867013876502694, "grad_norm": 0.24667032063007355, "learning_rate": 2.1804693455079576e-05, "loss": 0.1263, "step": 32894 }, { "epoch": 0.586719223771983, "grad_norm": 0.2821042537689209, "learning_rate": 2.1803149719570893e-05, "loss": 0.1768, "step": 32895 }, { "epoch": 0.5867370598936967, "grad_norm": 0.22775308787822723, "learning_rate": 2.1801605996454315e-05, "loss": 0.1261, "step": 32896 }, { "epoch": 0.5867548960154104, "grad_norm": 0.22688975930213928, "learning_rate": 2.1800062285735815e-05, "loss": 0.1223, "step": 32897 }, { "epoch": 0.5867727321371241, "grad_norm": 0.20398736000061035, "learning_rate": 2.1798518587421378e-05, "loss": 0.0926, "step": 32898 }, { "epoch": 0.5867905682588378, "grad_norm": 0.29975900053977966, "learning_rate": 2.179697490151698e-05, "loss": 0.1343, "step": 32899 }, { "epoch": 0.5868084043805515, "grad_norm": 0.20599398016929626, "learning_rate": 2.1795431228028625e-05, "loss": 0.1155, "step": 32900 }, { "epoch": 0.5868262405022652, "grad_norm": 0.23313100636005402, "learning_rate": 2.1793887566962275e-05, "loss": 0.1188, "step": 32901 }, { "epoch": 0.5868440766239789, "grad_norm": 0.2681022882461548, "learning_rate": 2.1792343918323935e-05, "loss": 0.1137, "step": 32902 }, { "epoch": 0.5868619127456925, "grad_norm": 0.25823819637298584, "learning_rate": 2.1790800282119564e-05, "loss": 0.1451, "step": 32903 }, { "epoch": 0.5868797488674062, "grad_norm": 0.2917575538158417, "learning_rate": 2.1789256658355174e-05, "loss": 0.1439, "step": 32904 }, { "epoch": 0.5868975849891199, "grad_norm": 0.23296421766281128, "learning_rate": 2.178771304703673e-05, "loss": 0.1409, "step": 32905 }, { "epoch": 0.5869154211108336, "grad_norm": 0.2006133496761322, "learning_rate": 2.178616944817022e-05, "loss": 0.0925, "step": 32906 }, { "epoch": 0.5869332572325473, "grad_norm": 0.2315547913312912, "learning_rate": 2.1784625861761624e-05, "loss": 0.141, "step": 32907 }, { "epoch": 0.5869510933542611, "grad_norm": 0.2253972887992859, "learning_rate": 2.178308228781693e-05, "loss": 0.1746, "step": 32908 }, { "epoch": 0.5869689294759748, "grad_norm": 0.3042829930782318, "learning_rate": 2.1781538726342115e-05, "loss": 0.1722, "step": 32909 }, { "epoch": 0.5869867655976885, "grad_norm": 0.2084905058145523, "learning_rate": 2.1779995177343178e-05, "loss": 0.0904, "step": 32910 }, { "epoch": 0.5870046017194022, "grad_norm": 0.26686906814575195, "learning_rate": 2.1778451640826087e-05, "loss": 0.1174, "step": 32911 }, { "epoch": 0.5870224378411159, "grad_norm": 0.21772006154060364, "learning_rate": 2.1776908116796824e-05, "loss": 0.0924, "step": 32912 }, { "epoch": 0.5870402739628295, "grad_norm": 0.2813258767127991, "learning_rate": 2.1775364605261385e-05, "loss": 0.1617, "step": 32913 }, { "epoch": 0.5870581100845432, "grad_norm": 0.21322877705097198, "learning_rate": 2.177382110622575e-05, "loss": 0.1144, "step": 32914 }, { "epoch": 0.5870759462062569, "grad_norm": 0.1968001127243042, "learning_rate": 2.1772277619695893e-05, "loss": 0.1205, "step": 32915 }, { "epoch": 0.5870937823279706, "grad_norm": 0.277910441160202, "learning_rate": 2.1770734145677796e-05, "loss": 0.1793, "step": 32916 }, { "epoch": 0.5871116184496843, "grad_norm": 0.29438337683677673, "learning_rate": 2.176919068417746e-05, "loss": 0.1032, "step": 32917 }, { "epoch": 0.587129454571398, "grad_norm": 0.23804901540279388, "learning_rate": 2.1767647235200856e-05, "loss": 0.0904, "step": 32918 }, { "epoch": 0.5871472906931117, "grad_norm": 0.3984792232513428, "learning_rate": 2.1766103798753966e-05, "loss": 0.1457, "step": 32919 }, { "epoch": 0.5871651268148254, "grad_norm": 0.5022168159484863, "learning_rate": 2.1764560374842774e-05, "loss": 0.2379, "step": 32920 }, { "epoch": 0.587182962936539, "grad_norm": 0.2859339416027069, "learning_rate": 2.176301696347326e-05, "loss": 0.1428, "step": 32921 }, { "epoch": 0.5872007990582527, "grad_norm": 0.25966086983680725, "learning_rate": 2.1761473564651412e-05, "loss": 0.0891, "step": 32922 }, { "epoch": 0.5872186351799664, "grad_norm": 0.322607159614563, "learning_rate": 2.1759930178383218e-05, "loss": 0.1516, "step": 32923 }, { "epoch": 0.5872364713016801, "grad_norm": 0.2912963926792145, "learning_rate": 2.1758386804674652e-05, "loss": 0.1056, "step": 32924 }, { "epoch": 0.5872543074233939, "grad_norm": 0.28418809175491333, "learning_rate": 2.1756843443531685e-05, "loss": 0.1334, "step": 32925 }, { "epoch": 0.5872721435451076, "grad_norm": 0.2073073387145996, "learning_rate": 2.1755300094960327e-05, "loss": 0.088, "step": 32926 }, { "epoch": 0.5872899796668213, "grad_norm": 0.2851669490337372, "learning_rate": 2.1753756758966547e-05, "loss": 0.1231, "step": 32927 }, { "epoch": 0.587307815788535, "grad_norm": 0.22561010718345642, "learning_rate": 2.175221343555632e-05, "loss": 0.108, "step": 32928 }, { "epoch": 0.5873256519102487, "grad_norm": 0.36879727244377136, "learning_rate": 2.1750670124735633e-05, "loss": 0.157, "step": 32929 }, { "epoch": 0.5873434880319623, "grad_norm": 0.29005277156829834, "learning_rate": 2.1749126826510473e-05, "loss": 0.1254, "step": 32930 }, { "epoch": 0.587361324153676, "grad_norm": 0.2758479416370392, "learning_rate": 2.1747583540886826e-05, "loss": 0.117, "step": 32931 }, { "epoch": 0.5873791602753897, "grad_norm": 0.26588526368141174, "learning_rate": 2.174604026787067e-05, "loss": 0.1844, "step": 32932 }, { "epoch": 0.5873969963971034, "grad_norm": 0.30634355545043945, "learning_rate": 2.1744497007467985e-05, "loss": 0.1326, "step": 32933 }, { "epoch": 0.5874148325188171, "grad_norm": 0.33632099628448486, "learning_rate": 2.174295375968474e-05, "loss": 0.1459, "step": 32934 }, { "epoch": 0.5874326686405308, "grad_norm": 0.23401671648025513, "learning_rate": 2.1741410524526944e-05, "loss": 0.1204, "step": 32935 }, { "epoch": 0.5874505047622445, "grad_norm": 0.24698470532894135, "learning_rate": 2.1739867302000563e-05, "loss": 0.0907, "step": 32936 }, { "epoch": 0.5874683408839582, "grad_norm": 0.20571982860565186, "learning_rate": 2.1738324092111588e-05, "loss": 0.1365, "step": 32937 }, { "epoch": 0.5874861770056719, "grad_norm": 0.24413610994815826, "learning_rate": 2.1736780894865982e-05, "loss": 0.1106, "step": 32938 }, { "epoch": 0.5875040131273855, "grad_norm": 0.25464290380477905, "learning_rate": 2.173523771026975e-05, "loss": 0.1285, "step": 32939 }, { "epoch": 0.5875218492490992, "grad_norm": 0.30646461248397827, "learning_rate": 2.1733694538328868e-05, "loss": 0.1141, "step": 32940 }, { "epoch": 0.587539685370813, "grad_norm": 0.24935881793498993, "learning_rate": 2.1732151379049312e-05, "loss": 0.1756, "step": 32941 }, { "epoch": 0.5875575214925267, "grad_norm": 0.27392441034317017, "learning_rate": 2.1730608232437056e-05, "loss": 0.1338, "step": 32942 }, { "epoch": 0.5875753576142404, "grad_norm": 0.2358497679233551, "learning_rate": 2.1729065098498096e-05, "loss": 0.1216, "step": 32943 }, { "epoch": 0.5875931937359541, "grad_norm": 0.3070487082004547, "learning_rate": 2.1727521977238417e-05, "loss": 0.1446, "step": 32944 }, { "epoch": 0.5876110298576678, "grad_norm": 0.31453925371170044, "learning_rate": 2.172597886866399e-05, "loss": 0.1343, "step": 32945 }, { "epoch": 0.5876288659793815, "grad_norm": 0.28357356786727905, "learning_rate": 2.1724435772780803e-05, "loss": 0.1348, "step": 32946 }, { "epoch": 0.5876467021010952, "grad_norm": 0.303285151720047, "learning_rate": 2.172289268959482e-05, "loss": 0.1004, "step": 32947 }, { "epoch": 0.5876645382228088, "grad_norm": 0.26704922318458557, "learning_rate": 2.1721349619112047e-05, "loss": 0.115, "step": 32948 }, { "epoch": 0.5876823743445225, "grad_norm": 0.22708401083946228, "learning_rate": 2.1719806561338453e-05, "loss": 0.1355, "step": 32949 }, { "epoch": 0.5877002104662362, "grad_norm": 0.3322717547416687, "learning_rate": 2.1718263516280026e-05, "loss": 0.1504, "step": 32950 }, { "epoch": 0.5877180465879499, "grad_norm": 0.2664613425731659, "learning_rate": 2.1716720483942733e-05, "loss": 0.0897, "step": 32951 }, { "epoch": 0.5877358827096636, "grad_norm": 0.2527411878108978, "learning_rate": 2.1715177464332574e-05, "loss": 0.1288, "step": 32952 }, { "epoch": 0.5877537188313773, "grad_norm": 0.22989559173583984, "learning_rate": 2.1713634457455527e-05, "loss": 0.0958, "step": 32953 }, { "epoch": 0.587771554953091, "grad_norm": 0.298786461353302, "learning_rate": 2.1712091463317563e-05, "loss": 0.1087, "step": 32954 }, { "epoch": 0.5877893910748047, "grad_norm": 0.2342507690191269, "learning_rate": 2.171054848192466e-05, "loss": 0.1508, "step": 32955 }, { "epoch": 0.5878072271965183, "grad_norm": 0.2711906135082245, "learning_rate": 2.1709005513282815e-05, "loss": 0.1102, "step": 32956 }, { "epoch": 0.587825063318232, "grad_norm": 0.2384423017501831, "learning_rate": 2.1707462557397998e-05, "loss": 0.189, "step": 32957 }, { "epoch": 0.5878428994399458, "grad_norm": 0.33205151557922363, "learning_rate": 2.1705919614276198e-05, "loss": 0.1645, "step": 32958 }, { "epoch": 0.5878607355616595, "grad_norm": 0.24081368744373322, "learning_rate": 2.170437668392339e-05, "loss": 0.122, "step": 32959 }, { "epoch": 0.5878785716833732, "grad_norm": 0.3218442499637604, "learning_rate": 2.170283376634555e-05, "loss": 0.1172, "step": 32960 }, { "epoch": 0.5878964078050869, "grad_norm": 0.377908855676651, "learning_rate": 2.1701290861548674e-05, "loss": 0.0869, "step": 32961 }, { "epoch": 0.5879142439268006, "grad_norm": 0.22866292297840118, "learning_rate": 2.169974796953873e-05, "loss": 0.1229, "step": 32962 }, { "epoch": 0.5879320800485143, "grad_norm": 0.21684928238391876, "learning_rate": 2.1698205090321704e-05, "loss": 0.1193, "step": 32963 }, { "epoch": 0.587949916170228, "grad_norm": 0.27293986082077026, "learning_rate": 2.169666222390357e-05, "loss": 0.1601, "step": 32964 }, { "epoch": 0.5879677522919416, "grad_norm": 0.2788138687610626, "learning_rate": 2.1695119370290316e-05, "loss": 0.1112, "step": 32965 }, { "epoch": 0.5879855884136553, "grad_norm": 0.2058860957622528, "learning_rate": 2.1693576529487925e-05, "loss": 0.0955, "step": 32966 }, { "epoch": 0.588003424535369, "grad_norm": 0.2647230625152588, "learning_rate": 2.1692033701502372e-05, "loss": 0.0747, "step": 32967 }, { "epoch": 0.5880212606570827, "grad_norm": 0.23579511046409607, "learning_rate": 2.169049088633963e-05, "loss": 0.107, "step": 32968 }, { "epoch": 0.5880390967787964, "grad_norm": 0.24442631006240845, "learning_rate": 2.1688948084005702e-05, "loss": 0.0979, "step": 32969 }, { "epoch": 0.5880569329005101, "grad_norm": 0.2332504242658615, "learning_rate": 2.1687405294506548e-05, "loss": 0.1505, "step": 32970 }, { "epoch": 0.5880747690222238, "grad_norm": 0.2690383791923523, "learning_rate": 2.168586251784816e-05, "loss": 0.07, "step": 32971 }, { "epoch": 0.5880926051439375, "grad_norm": 0.39139053225517273, "learning_rate": 2.1684319754036513e-05, "loss": 0.1601, "step": 32972 }, { "epoch": 0.5881104412656512, "grad_norm": 0.23805727064609528, "learning_rate": 2.1682777003077578e-05, "loss": 0.0746, "step": 32973 }, { "epoch": 0.5881282773873648, "grad_norm": 0.4265058934688568, "learning_rate": 2.1681234264977356e-05, "loss": 0.1239, "step": 32974 }, { "epoch": 0.5881461135090786, "grad_norm": 0.20426680147647858, "learning_rate": 2.1679691539741815e-05, "loss": 0.0997, "step": 32975 }, { "epoch": 0.5881639496307923, "grad_norm": 0.21748527884483337, "learning_rate": 2.167814882737693e-05, "loss": 0.1442, "step": 32976 }, { "epoch": 0.588181785752506, "grad_norm": 0.19584710896015167, "learning_rate": 2.1676606127888692e-05, "loss": 0.1391, "step": 32977 }, { "epoch": 0.5881996218742197, "grad_norm": 0.209767684340477, "learning_rate": 2.167506344128307e-05, "loss": 0.1177, "step": 32978 }, { "epoch": 0.5882174579959334, "grad_norm": 0.231951043009758, "learning_rate": 2.167352076756606e-05, "loss": 0.1165, "step": 32979 }, { "epoch": 0.5882352941176471, "grad_norm": 0.658835768699646, "learning_rate": 2.1671978106743632e-05, "loss": 0.1854, "step": 32980 }, { "epoch": 0.5882531302393608, "grad_norm": 0.2097320854663849, "learning_rate": 2.1670435458821756e-05, "loss": 0.0957, "step": 32981 }, { "epoch": 0.5882709663610745, "grad_norm": 0.2828176021575928, "learning_rate": 2.1668892823806433e-05, "loss": 0.1088, "step": 32982 }, { "epoch": 0.5882888024827881, "grad_norm": 0.2525877058506012, "learning_rate": 2.166735020170363e-05, "loss": 0.1019, "step": 32983 }, { "epoch": 0.5883066386045018, "grad_norm": 0.26961150765419006, "learning_rate": 2.1665807592519324e-05, "loss": 0.1458, "step": 32984 }, { "epoch": 0.5883244747262155, "grad_norm": 0.2963247001171112, "learning_rate": 2.1664264996259505e-05, "loss": 0.115, "step": 32985 }, { "epoch": 0.5883423108479292, "grad_norm": 0.2993464469909668, "learning_rate": 2.1662722412930136e-05, "loss": 0.1835, "step": 32986 }, { "epoch": 0.5883601469696429, "grad_norm": 0.2097903937101364, "learning_rate": 2.1661179842537223e-05, "loss": 0.1332, "step": 32987 }, { "epoch": 0.5883779830913566, "grad_norm": 0.2858603894710541, "learning_rate": 2.1659637285086722e-05, "loss": 0.1081, "step": 32988 }, { "epoch": 0.5883958192130703, "grad_norm": 0.22751972079277039, "learning_rate": 2.1658094740584628e-05, "loss": 0.1378, "step": 32989 }, { "epoch": 0.588413655334784, "grad_norm": 0.3043662905693054, "learning_rate": 2.1656552209036897e-05, "loss": 0.1377, "step": 32990 }, { "epoch": 0.5884314914564976, "grad_norm": 0.3018110394477844, "learning_rate": 2.165500969044954e-05, "loss": 0.1095, "step": 32991 }, { "epoch": 0.5884493275782114, "grad_norm": 0.2185465544462204, "learning_rate": 2.165346718482851e-05, "loss": 0.1256, "step": 32992 }, { "epoch": 0.5884671636999251, "grad_norm": 0.2335309088230133, "learning_rate": 2.1651924692179804e-05, "loss": 0.1088, "step": 32993 }, { "epoch": 0.5884849998216388, "grad_norm": 0.412971168756485, "learning_rate": 2.1650382212509385e-05, "loss": 0.1455, "step": 32994 }, { "epoch": 0.5885028359433525, "grad_norm": 0.25159329175949097, "learning_rate": 2.1648839745823252e-05, "loss": 0.1444, "step": 32995 }, { "epoch": 0.5885206720650662, "grad_norm": 0.3027758300304413, "learning_rate": 2.1647297292127373e-05, "loss": 0.1288, "step": 32996 }, { "epoch": 0.5885385081867799, "grad_norm": 0.21371954679489136, "learning_rate": 2.1645754851427723e-05, "loss": 0.0805, "step": 32997 }, { "epoch": 0.5885563443084936, "grad_norm": 0.40860676765441895, "learning_rate": 2.164421242373029e-05, "loss": 0.1316, "step": 32998 }, { "epoch": 0.5885741804302073, "grad_norm": 0.31980836391448975, "learning_rate": 2.164267000904104e-05, "loss": 0.1778, "step": 32999 }, { "epoch": 0.588592016551921, "grad_norm": 0.2727595567703247, "learning_rate": 2.164112760736597e-05, "loss": 0.111, "step": 33000 }, { "epoch": 0.588592016551921, "eval_loss": 0.1261141002178192, "eval_runtime": 108.1661, "eval_samples_per_second": 9.467, "eval_steps_per_second": 1.581, "step": 33000 }, { "epoch": 0.5886098526736346, "grad_norm": 0.19662021100521088, "learning_rate": 2.1639585218711047e-05, "loss": 0.1202, "step": 33001 }, { "epoch": 0.5886276887953483, "grad_norm": 0.24519965052604675, "learning_rate": 2.1638042843082257e-05, "loss": 0.1111, "step": 33002 }, { "epoch": 0.588645524917062, "grad_norm": 0.2554810345172882, "learning_rate": 2.163650048048556e-05, "loss": 0.1099, "step": 33003 }, { "epoch": 0.5886633610387757, "grad_norm": 0.2318880259990692, "learning_rate": 2.163495813092696e-05, "loss": 0.1224, "step": 33004 }, { "epoch": 0.5886811971604894, "grad_norm": 0.21858911216259003, "learning_rate": 2.163341579441242e-05, "loss": 0.1571, "step": 33005 }, { "epoch": 0.5886990332822031, "grad_norm": 0.3522951602935791, "learning_rate": 2.1631873470947928e-05, "loss": 0.1202, "step": 33006 }, { "epoch": 0.5887168694039168, "grad_norm": 0.22781698405742645, "learning_rate": 2.1630331160539445e-05, "loss": 0.1457, "step": 33007 }, { "epoch": 0.5887347055256305, "grad_norm": 0.23906351625919342, "learning_rate": 2.1628788863192976e-05, "loss": 0.0934, "step": 33008 }, { "epoch": 0.5887525416473443, "grad_norm": 0.2531311810016632, "learning_rate": 2.162724657891449e-05, "loss": 0.1355, "step": 33009 }, { "epoch": 0.5887703777690579, "grad_norm": 0.2691013813018799, "learning_rate": 2.1625704307709952e-05, "loss": 0.1535, "step": 33010 }, { "epoch": 0.5887882138907716, "grad_norm": 0.17697560787200928, "learning_rate": 2.162416204958535e-05, "loss": 0.0761, "step": 33011 }, { "epoch": 0.5888060500124853, "grad_norm": 0.25484609603881836, "learning_rate": 2.1622619804546663e-05, "loss": 0.1137, "step": 33012 }, { "epoch": 0.588823886134199, "grad_norm": 0.3665103018283844, "learning_rate": 2.1621077572599862e-05, "loss": 0.1083, "step": 33013 }, { "epoch": 0.5888417222559127, "grad_norm": 0.37395647168159485, "learning_rate": 2.161953535375094e-05, "loss": 0.1766, "step": 33014 }, { "epoch": 0.5888595583776264, "grad_norm": 0.1853387951850891, "learning_rate": 2.1617993148005868e-05, "loss": 0.1093, "step": 33015 }, { "epoch": 0.5888773944993401, "grad_norm": 0.27346721291542053, "learning_rate": 2.161645095537061e-05, "loss": 0.1097, "step": 33016 }, { "epoch": 0.5888952306210538, "grad_norm": 0.27654165029525757, "learning_rate": 2.161490877585117e-05, "loss": 0.1263, "step": 33017 }, { "epoch": 0.5889130667427674, "grad_norm": 0.3392234742641449, "learning_rate": 2.161336660945351e-05, "loss": 0.1515, "step": 33018 }, { "epoch": 0.5889309028644811, "grad_norm": 0.22535693645477295, "learning_rate": 2.1611824456183608e-05, "loss": 0.1225, "step": 33019 }, { "epoch": 0.5889487389861948, "grad_norm": 0.2584347426891327, "learning_rate": 2.1610282316047437e-05, "loss": 0.1421, "step": 33020 }, { "epoch": 0.5889665751079085, "grad_norm": 0.26880693435668945, "learning_rate": 2.1608740189050993e-05, "loss": 0.1311, "step": 33021 }, { "epoch": 0.5889844112296222, "grad_norm": 0.18865562975406647, "learning_rate": 2.1607198075200246e-05, "loss": 0.0603, "step": 33022 }, { "epoch": 0.5890022473513359, "grad_norm": 0.2707449495792389, "learning_rate": 2.160565597450117e-05, "loss": 0.1246, "step": 33023 }, { "epoch": 0.5890200834730496, "grad_norm": 0.3323516547679901, "learning_rate": 2.1604113886959737e-05, "loss": 0.1399, "step": 33024 }, { "epoch": 0.5890379195947633, "grad_norm": 0.2800370156764984, "learning_rate": 2.160257181258193e-05, "loss": 0.1318, "step": 33025 }, { "epoch": 0.5890557557164771, "grad_norm": 0.30118528008461, "learning_rate": 2.1601029751373733e-05, "loss": 0.1684, "step": 33026 }, { "epoch": 0.5890735918381907, "grad_norm": 0.23637090623378754, "learning_rate": 2.159948770334112e-05, "loss": 0.1354, "step": 33027 }, { "epoch": 0.5890914279599044, "grad_norm": 0.27454230189323425, "learning_rate": 2.159794566849007e-05, "loss": 0.1039, "step": 33028 }, { "epoch": 0.5891092640816181, "grad_norm": 0.33771610260009766, "learning_rate": 2.1596403646826543e-05, "loss": 0.1776, "step": 33029 }, { "epoch": 0.5891271002033318, "grad_norm": 0.28984037041664124, "learning_rate": 2.1594861638356544e-05, "loss": 0.1603, "step": 33030 }, { "epoch": 0.5891449363250455, "grad_norm": 0.2714102864265442, "learning_rate": 2.1593319643086042e-05, "loss": 0.1764, "step": 33031 }, { "epoch": 0.5891627724467592, "grad_norm": 0.24412472546100616, "learning_rate": 2.1591777661020998e-05, "loss": 0.0756, "step": 33032 }, { "epoch": 0.5891806085684729, "grad_norm": 0.3281966745853424, "learning_rate": 2.15902356921674e-05, "loss": 0.1142, "step": 33033 }, { "epoch": 0.5891984446901866, "grad_norm": 0.32212576270103455, "learning_rate": 2.158869373653123e-05, "loss": 0.1457, "step": 33034 }, { "epoch": 0.5892162808119003, "grad_norm": 0.2897561490535736, "learning_rate": 2.1587151794118465e-05, "loss": 0.1751, "step": 33035 }, { "epoch": 0.5892341169336139, "grad_norm": 0.24207904934883118, "learning_rate": 2.158560986493508e-05, "loss": 0.1885, "step": 33036 }, { "epoch": 0.5892519530553276, "grad_norm": 0.2025081217288971, "learning_rate": 2.158406794898705e-05, "loss": 0.1137, "step": 33037 }, { "epoch": 0.5892697891770413, "grad_norm": 0.24720114469528198, "learning_rate": 2.1582526046280342e-05, "loss": 0.0953, "step": 33038 }, { "epoch": 0.589287625298755, "grad_norm": 0.255377858877182, "learning_rate": 2.1580984156820952e-05, "loss": 0.0721, "step": 33039 }, { "epoch": 0.5893054614204687, "grad_norm": 0.24377861618995667, "learning_rate": 2.1579442280614846e-05, "loss": 0.1438, "step": 33040 }, { "epoch": 0.5893232975421824, "grad_norm": 0.2838647663593292, "learning_rate": 2.1577900417668008e-05, "loss": 0.0886, "step": 33041 }, { "epoch": 0.5893411336638962, "grad_norm": 0.23067690432071686, "learning_rate": 2.1576358567986397e-05, "loss": 0.1315, "step": 33042 }, { "epoch": 0.5893589697856099, "grad_norm": 0.24995240569114685, "learning_rate": 2.1574816731576013e-05, "loss": 0.0782, "step": 33043 }, { "epoch": 0.5893768059073236, "grad_norm": 0.2691757380962372, "learning_rate": 2.1573274908442827e-05, "loss": 0.164, "step": 33044 }, { "epoch": 0.5893946420290372, "grad_norm": 0.29734519124031067, "learning_rate": 2.1571733098592805e-05, "loss": 0.1863, "step": 33045 }, { "epoch": 0.5894124781507509, "grad_norm": 0.24343740940093994, "learning_rate": 2.1570191302031923e-05, "loss": 0.1301, "step": 33046 }, { "epoch": 0.5894303142724646, "grad_norm": 0.2541879415512085, "learning_rate": 2.1568649518766165e-05, "loss": 0.1749, "step": 33047 }, { "epoch": 0.5894481503941783, "grad_norm": 0.27241525053977966, "learning_rate": 2.1567107748801518e-05, "loss": 0.1709, "step": 33048 }, { "epoch": 0.589465986515892, "grad_norm": 0.3554043471813202, "learning_rate": 2.1565565992143945e-05, "loss": 0.1084, "step": 33049 }, { "epoch": 0.5894838226376057, "grad_norm": 0.25861111283302307, "learning_rate": 2.1564024248799422e-05, "loss": 0.128, "step": 33050 }, { "epoch": 0.5895016587593194, "grad_norm": 0.30657315254211426, "learning_rate": 2.1562482518773917e-05, "loss": 0.2006, "step": 33051 }, { "epoch": 0.5895194948810331, "grad_norm": 0.31410300731658936, "learning_rate": 2.1560940802073433e-05, "loss": 0.1223, "step": 33052 }, { "epoch": 0.5895373310027467, "grad_norm": 0.21022747457027435, "learning_rate": 2.155939909870392e-05, "loss": 0.1048, "step": 33053 }, { "epoch": 0.5895551671244604, "grad_norm": 0.23118728399276733, "learning_rate": 2.155785740867137e-05, "loss": 0.1332, "step": 33054 }, { "epoch": 0.5895730032461741, "grad_norm": 0.20418299734592438, "learning_rate": 2.1556315731981743e-05, "loss": 0.1176, "step": 33055 }, { "epoch": 0.5895908393678878, "grad_norm": 0.24226684868335724, "learning_rate": 2.1554774068641037e-05, "loss": 0.1598, "step": 33056 }, { "epoch": 0.5896086754896015, "grad_norm": 0.21234562993049622, "learning_rate": 2.155323241865522e-05, "loss": 0.0776, "step": 33057 }, { "epoch": 0.5896265116113152, "grad_norm": 0.2150479108095169, "learning_rate": 2.1551690782030258e-05, "loss": 0.1232, "step": 33058 }, { "epoch": 0.589644347733029, "grad_norm": 0.2894500195980072, "learning_rate": 2.1550149158772128e-05, "loss": 0.1188, "step": 33059 }, { "epoch": 0.5896621838547427, "grad_norm": 0.24211075901985168, "learning_rate": 2.154860754888682e-05, "loss": 0.1247, "step": 33060 }, { "epoch": 0.5896800199764564, "grad_norm": 0.26943182945251465, "learning_rate": 2.1547065952380295e-05, "loss": 0.1398, "step": 33061 }, { "epoch": 0.58969785609817, "grad_norm": 0.2406005561351776, "learning_rate": 2.1545524369258543e-05, "loss": 0.1333, "step": 33062 }, { "epoch": 0.5897156922198837, "grad_norm": 0.30333927273750305, "learning_rate": 2.154398279952753e-05, "loss": 0.1911, "step": 33063 }, { "epoch": 0.5897335283415974, "grad_norm": 0.198168084025383, "learning_rate": 2.1542441243193223e-05, "loss": 0.0999, "step": 33064 }, { "epoch": 0.5897513644633111, "grad_norm": 0.42712706327438354, "learning_rate": 2.1540899700261622e-05, "loss": 0.129, "step": 33065 }, { "epoch": 0.5897692005850248, "grad_norm": 0.26657959818840027, "learning_rate": 2.1539358170738682e-05, "loss": 0.1049, "step": 33066 }, { "epoch": 0.5897870367067385, "grad_norm": 0.2324095219373703, "learning_rate": 2.1537816654630385e-05, "loss": 0.123, "step": 33067 }, { "epoch": 0.5898048728284522, "grad_norm": 0.22534257173538208, "learning_rate": 2.1536275151942702e-05, "loss": 0.1097, "step": 33068 }, { "epoch": 0.5898227089501659, "grad_norm": 0.28968656063079834, "learning_rate": 2.1534733662681616e-05, "loss": 0.1912, "step": 33069 }, { "epoch": 0.5898405450718796, "grad_norm": 0.38640910387039185, "learning_rate": 2.1533192186853104e-05, "loss": 0.0936, "step": 33070 }, { "epoch": 0.5898583811935932, "grad_norm": 0.2354934960603714, "learning_rate": 2.153165072446314e-05, "loss": 0.1364, "step": 33071 }, { "epoch": 0.5898762173153069, "grad_norm": 0.298694372177124, "learning_rate": 2.153010927551768e-05, "loss": 0.1579, "step": 33072 }, { "epoch": 0.5898940534370206, "grad_norm": 0.27349498867988586, "learning_rate": 2.1528567840022722e-05, "loss": 0.1645, "step": 33073 }, { "epoch": 0.5899118895587343, "grad_norm": 0.2971816658973694, "learning_rate": 2.1527026417984237e-05, "loss": 0.1145, "step": 33074 }, { "epoch": 0.589929725680448, "grad_norm": 0.22058752179145813, "learning_rate": 2.1525485009408196e-05, "loss": 0.1099, "step": 33075 }, { "epoch": 0.5899475618021618, "grad_norm": 0.26899510622024536, "learning_rate": 2.152394361430058e-05, "loss": 0.1407, "step": 33076 }, { "epoch": 0.5899653979238755, "grad_norm": 0.2096758633852005, "learning_rate": 2.1522402232667345e-05, "loss": 0.1305, "step": 33077 }, { "epoch": 0.5899832340455892, "grad_norm": 0.2950533926486969, "learning_rate": 2.1520860864514494e-05, "loss": 0.1063, "step": 33078 }, { "epoch": 0.5900010701673029, "grad_norm": 0.19849883019924164, "learning_rate": 2.1519319509847984e-05, "loss": 0.0943, "step": 33079 }, { "epoch": 0.5900189062890165, "grad_norm": 0.34837716817855835, "learning_rate": 2.1517778168673793e-05, "loss": 0.171, "step": 33080 }, { "epoch": 0.5900367424107302, "grad_norm": 0.2350277453660965, "learning_rate": 2.1516236840997893e-05, "loss": 0.1433, "step": 33081 }, { "epoch": 0.5900545785324439, "grad_norm": 0.25818994641304016, "learning_rate": 2.1514695526826265e-05, "loss": 0.128, "step": 33082 }, { "epoch": 0.5900724146541576, "grad_norm": 0.24907296895980835, "learning_rate": 2.151315422616488e-05, "loss": 0.1362, "step": 33083 }, { "epoch": 0.5900902507758713, "grad_norm": 0.24716128408908844, "learning_rate": 2.1511612939019724e-05, "loss": 0.1009, "step": 33084 }, { "epoch": 0.590108086897585, "grad_norm": 0.28970226645469666, "learning_rate": 2.151007166539674e-05, "loss": 0.1638, "step": 33085 }, { "epoch": 0.5901259230192987, "grad_norm": 0.21572163701057434, "learning_rate": 2.150853040530194e-05, "loss": 0.0895, "step": 33086 }, { "epoch": 0.5901437591410124, "grad_norm": 0.22393271327018738, "learning_rate": 2.1506989158741285e-05, "loss": 0.1068, "step": 33087 }, { "epoch": 0.590161595262726, "grad_norm": 0.23315875232219696, "learning_rate": 2.1505447925720734e-05, "loss": 0.1095, "step": 33088 }, { "epoch": 0.5901794313844397, "grad_norm": 0.28150245547294617, "learning_rate": 2.1503906706246285e-05, "loss": 0.1484, "step": 33089 }, { "epoch": 0.5901972675061534, "grad_norm": 0.1934724897146225, "learning_rate": 2.1502365500323887e-05, "loss": 0.1068, "step": 33090 }, { "epoch": 0.5902151036278671, "grad_norm": 0.22217927873134613, "learning_rate": 2.1500824307959538e-05, "loss": 0.1447, "step": 33091 }, { "epoch": 0.5902329397495808, "grad_norm": 0.23097416758537292, "learning_rate": 2.1499283129159205e-05, "loss": 0.123, "step": 33092 }, { "epoch": 0.5902507758712946, "grad_norm": 0.2799910604953766, "learning_rate": 2.1497741963928862e-05, "loss": 0.1272, "step": 33093 }, { "epoch": 0.5902686119930083, "grad_norm": 0.22545427083969116, "learning_rate": 2.1496200812274465e-05, "loss": 0.1405, "step": 33094 }, { "epoch": 0.590286448114722, "grad_norm": 0.2801673114299774, "learning_rate": 2.1494659674202018e-05, "loss": 0.1398, "step": 33095 }, { "epoch": 0.5903042842364357, "grad_norm": 0.18933075666427612, "learning_rate": 2.1493118549717473e-05, "loss": 0.1188, "step": 33096 }, { "epoch": 0.5903221203581493, "grad_norm": 0.23201805353164673, "learning_rate": 2.1491577438826818e-05, "loss": 0.1298, "step": 33097 }, { "epoch": 0.590339956479863, "grad_norm": 0.2574159502983093, "learning_rate": 2.149003634153601e-05, "loss": 0.1398, "step": 33098 }, { "epoch": 0.5903577926015767, "grad_norm": 0.28991690278053284, "learning_rate": 2.1488495257851045e-05, "loss": 0.1524, "step": 33099 }, { "epoch": 0.5903756287232904, "grad_norm": 0.22774146497249603, "learning_rate": 2.1486954187777887e-05, "loss": 0.1063, "step": 33100 }, { "epoch": 0.5903934648450041, "grad_norm": 0.30327367782592773, "learning_rate": 2.1485413131322496e-05, "loss": 0.1603, "step": 33101 }, { "epoch": 0.5904113009667178, "grad_norm": 0.311737984418869, "learning_rate": 2.148387208849087e-05, "loss": 0.1408, "step": 33102 }, { "epoch": 0.5904291370884315, "grad_norm": 0.29760342836380005, "learning_rate": 2.1482331059288953e-05, "loss": 0.1073, "step": 33103 }, { "epoch": 0.5904469732101452, "grad_norm": 0.41577932238578796, "learning_rate": 2.1480790043722752e-05, "loss": 0.135, "step": 33104 }, { "epoch": 0.5904648093318589, "grad_norm": 0.2204284369945526, "learning_rate": 2.1479249041798223e-05, "loss": 0.108, "step": 33105 }, { "epoch": 0.5904826454535725, "grad_norm": 0.44028276205062866, "learning_rate": 2.147770805352134e-05, "loss": 0.1592, "step": 33106 }, { "epoch": 0.5905004815752862, "grad_norm": 0.2741379737854004, "learning_rate": 2.1476167078898066e-05, "loss": 0.124, "step": 33107 }, { "epoch": 0.5905183176969999, "grad_norm": 0.3365446925163269, "learning_rate": 2.1474626117934397e-05, "loss": 0.0945, "step": 33108 }, { "epoch": 0.5905361538187136, "grad_norm": 0.24904195964336395, "learning_rate": 2.147308517063629e-05, "loss": 0.1379, "step": 33109 }, { "epoch": 0.5905539899404274, "grad_norm": 0.26429814100265503, "learning_rate": 2.147154423700973e-05, "loss": 0.1086, "step": 33110 }, { "epoch": 0.5905718260621411, "grad_norm": 0.26478275656700134, "learning_rate": 2.1470003317060666e-05, "loss": 0.1482, "step": 33111 }, { "epoch": 0.5905896621838548, "grad_norm": 0.18958696722984314, "learning_rate": 2.1468462410795105e-05, "loss": 0.0939, "step": 33112 }, { "epoch": 0.5906074983055685, "grad_norm": 0.2733018398284912, "learning_rate": 2.1466921518219e-05, "loss": 0.1197, "step": 33113 }, { "epoch": 0.5906253344272822, "grad_norm": 0.21467597782611847, "learning_rate": 2.146538063933833e-05, "loss": 0.0808, "step": 33114 }, { "epoch": 0.5906431705489958, "grad_norm": 0.19865699112415314, "learning_rate": 2.146383977415906e-05, "loss": 0.1393, "step": 33115 }, { "epoch": 0.5906610066707095, "grad_norm": 0.24648387730121613, "learning_rate": 2.1462298922687166e-05, "loss": 0.0997, "step": 33116 }, { "epoch": 0.5906788427924232, "grad_norm": 0.24080605804920197, "learning_rate": 2.1460758084928624e-05, "loss": 0.2018, "step": 33117 }, { "epoch": 0.5906966789141369, "grad_norm": 0.18410654366016388, "learning_rate": 2.1459217260889413e-05, "loss": 0.1034, "step": 33118 }, { "epoch": 0.5907145150358506, "grad_norm": 0.242695152759552, "learning_rate": 2.1457676450575497e-05, "loss": 0.1454, "step": 33119 }, { "epoch": 0.5907323511575643, "grad_norm": 0.24289849400520325, "learning_rate": 2.1456135653992843e-05, "loss": 0.1683, "step": 33120 }, { "epoch": 0.590750187279278, "grad_norm": 0.23219898343086243, "learning_rate": 2.1454594871147436e-05, "loss": 0.1256, "step": 33121 }, { "epoch": 0.5907680234009917, "grad_norm": 0.202924907207489, "learning_rate": 2.145305410204525e-05, "loss": 0.0917, "step": 33122 }, { "epoch": 0.5907858595227053, "grad_norm": 0.21039503812789917, "learning_rate": 2.1451513346692244e-05, "loss": 0.1268, "step": 33123 }, { "epoch": 0.590803695644419, "grad_norm": 0.2628650367259979, "learning_rate": 2.1449972605094394e-05, "loss": 0.0922, "step": 33124 }, { "epoch": 0.5908215317661327, "grad_norm": 0.3127400279045105, "learning_rate": 2.1448431877257686e-05, "loss": 0.1875, "step": 33125 }, { "epoch": 0.5908393678878464, "grad_norm": 0.25862622261047363, "learning_rate": 2.144689116318808e-05, "loss": 0.1941, "step": 33126 }, { "epoch": 0.5908572040095602, "grad_norm": 0.2257784754037857, "learning_rate": 2.1445350462891552e-05, "loss": 0.1398, "step": 33127 }, { "epoch": 0.5908750401312739, "grad_norm": 0.2474130243062973, "learning_rate": 2.144380977637407e-05, "loss": 0.1426, "step": 33128 }, { "epoch": 0.5908928762529876, "grad_norm": 0.27754154801368713, "learning_rate": 2.144226910364161e-05, "loss": 0.1452, "step": 33129 }, { "epoch": 0.5909107123747013, "grad_norm": 0.36767107248306274, "learning_rate": 2.1440728444700144e-05, "loss": 0.1816, "step": 33130 }, { "epoch": 0.590928548496415, "grad_norm": 0.2943879961967468, "learning_rate": 2.1439187799555643e-05, "loss": 0.1192, "step": 33131 }, { "epoch": 0.5909463846181287, "grad_norm": 0.19691762328147888, "learning_rate": 2.1437647168214087e-05, "loss": 0.1088, "step": 33132 }, { "epoch": 0.5909642207398423, "grad_norm": 0.20330274105072021, "learning_rate": 2.1436106550681424e-05, "loss": 0.1288, "step": 33133 }, { "epoch": 0.590982056861556, "grad_norm": 0.2210235446691513, "learning_rate": 2.1434565946963662e-05, "loss": 0.1704, "step": 33134 }, { "epoch": 0.5909998929832697, "grad_norm": 0.259302020072937, "learning_rate": 2.1433025357066747e-05, "loss": 0.1239, "step": 33135 }, { "epoch": 0.5910177291049834, "grad_norm": 0.2674104571342468, "learning_rate": 2.1431484780996655e-05, "loss": 0.1035, "step": 33136 }, { "epoch": 0.5910355652266971, "grad_norm": 0.2931918799877167, "learning_rate": 2.142994421875936e-05, "loss": 0.1052, "step": 33137 }, { "epoch": 0.5910534013484108, "grad_norm": 0.24653010070323944, "learning_rate": 2.1428403670360836e-05, "loss": 0.1111, "step": 33138 }, { "epoch": 0.5910712374701245, "grad_norm": 0.2379283457994461, "learning_rate": 2.1426863135807058e-05, "loss": 0.1412, "step": 33139 }, { "epoch": 0.5910890735918382, "grad_norm": 0.47157180309295654, "learning_rate": 2.1425322615103986e-05, "loss": 0.1638, "step": 33140 }, { "epoch": 0.5911069097135518, "grad_norm": 0.23331566154956818, "learning_rate": 2.1423782108257608e-05, "loss": 0.131, "step": 33141 }, { "epoch": 0.5911247458352655, "grad_norm": 0.3077402114868164, "learning_rate": 2.142224161527387e-05, "loss": 0.1736, "step": 33142 }, { "epoch": 0.5911425819569793, "grad_norm": 0.27864784002304077, "learning_rate": 2.142070113615877e-05, "loss": 0.1374, "step": 33143 }, { "epoch": 0.591160418078693, "grad_norm": 0.2880363464355469, "learning_rate": 2.1419160670918264e-05, "loss": 0.1374, "step": 33144 }, { "epoch": 0.5911782542004067, "grad_norm": 0.32880255579948425, "learning_rate": 2.141762021955833e-05, "loss": 0.1356, "step": 33145 }, { "epoch": 0.5911960903221204, "grad_norm": 0.19545045495033264, "learning_rate": 2.1416079782084933e-05, "loss": 0.111, "step": 33146 }, { "epoch": 0.5912139264438341, "grad_norm": 0.29984405636787415, "learning_rate": 2.1414539358504055e-05, "loss": 0.1429, "step": 33147 }, { "epoch": 0.5912317625655478, "grad_norm": 0.23452989757061005, "learning_rate": 2.1412998948821663e-05, "loss": 0.0702, "step": 33148 }, { "epoch": 0.5912495986872615, "grad_norm": 0.23591265082359314, "learning_rate": 2.1411458553043727e-05, "loss": 0.0892, "step": 33149 }, { "epoch": 0.5912674348089751, "grad_norm": 0.24769672751426697, "learning_rate": 2.1409918171176203e-05, "loss": 0.1454, "step": 33150 }, { "epoch": 0.5912852709306888, "grad_norm": 0.27837643027305603, "learning_rate": 2.140837780322508e-05, "loss": 0.1206, "step": 33151 }, { "epoch": 0.5913031070524025, "grad_norm": 0.2272465080022812, "learning_rate": 2.1406837449196335e-05, "loss": 0.1474, "step": 33152 }, { "epoch": 0.5913209431741162, "grad_norm": 0.1675087958574295, "learning_rate": 2.1405297109095928e-05, "loss": 0.0863, "step": 33153 }, { "epoch": 0.5913387792958299, "grad_norm": 0.27321112155914307, "learning_rate": 2.140375678292983e-05, "loss": 0.1465, "step": 33154 }, { "epoch": 0.5913566154175436, "grad_norm": 0.23717403411865234, "learning_rate": 2.1402216470704002e-05, "loss": 0.1119, "step": 33155 }, { "epoch": 0.5913744515392573, "grad_norm": 0.2761916518211365, "learning_rate": 2.140067617242444e-05, "loss": 0.1021, "step": 33156 }, { "epoch": 0.591392287660971, "grad_norm": 0.362943559885025, "learning_rate": 2.139913588809709e-05, "loss": 0.1478, "step": 33157 }, { "epoch": 0.5914101237826846, "grad_norm": 0.20055672526359558, "learning_rate": 2.1397595617727938e-05, "loss": 0.1414, "step": 33158 }, { "epoch": 0.5914279599043983, "grad_norm": 0.3138004541397095, "learning_rate": 2.139605536132294e-05, "loss": 0.1577, "step": 33159 }, { "epoch": 0.5914457960261121, "grad_norm": 0.1988336145877838, "learning_rate": 2.139451511888809e-05, "loss": 0.1207, "step": 33160 }, { "epoch": 0.5914636321478258, "grad_norm": 0.18993769586086273, "learning_rate": 2.1392974890429344e-05, "loss": 0.0875, "step": 33161 }, { "epoch": 0.5914814682695395, "grad_norm": 0.2395271509885788, "learning_rate": 2.139143467595267e-05, "loss": 0.1175, "step": 33162 }, { "epoch": 0.5914993043912532, "grad_norm": 0.32086557149887085, "learning_rate": 2.1389894475464036e-05, "loss": 0.1572, "step": 33163 }, { "epoch": 0.5915171405129669, "grad_norm": 0.22084881365299225, "learning_rate": 2.1388354288969424e-05, "loss": 0.0874, "step": 33164 }, { "epoch": 0.5915349766346806, "grad_norm": 0.2549118399620056, "learning_rate": 2.1386814116474794e-05, "loss": 0.1229, "step": 33165 }, { "epoch": 0.5915528127563943, "grad_norm": 0.33142685890197754, "learning_rate": 2.1385273957986125e-05, "loss": 0.1197, "step": 33166 }, { "epoch": 0.591570648878108, "grad_norm": 0.26726052165031433, "learning_rate": 2.1383733813509382e-05, "loss": 0.0824, "step": 33167 }, { "epoch": 0.5915884849998216, "grad_norm": 0.2754552364349365, "learning_rate": 2.1382193683050527e-05, "loss": 0.1529, "step": 33168 }, { "epoch": 0.5916063211215353, "grad_norm": 0.3096371293067932, "learning_rate": 2.138065356661555e-05, "loss": 0.1211, "step": 33169 }, { "epoch": 0.591624157243249, "grad_norm": 0.2355424165725708, "learning_rate": 2.137911346421041e-05, "loss": 0.1711, "step": 33170 }, { "epoch": 0.5916419933649627, "grad_norm": 0.22811809182167053, "learning_rate": 2.137757337584107e-05, "loss": 0.097, "step": 33171 }, { "epoch": 0.5916598294866764, "grad_norm": 0.23363593220710754, "learning_rate": 2.1376033301513504e-05, "loss": 0.0937, "step": 33172 }, { "epoch": 0.5916776656083901, "grad_norm": 0.21910591423511505, "learning_rate": 2.137449324123369e-05, "loss": 0.1081, "step": 33173 }, { "epoch": 0.5916955017301038, "grad_norm": 0.25870445370674133, "learning_rate": 2.1372953195007596e-05, "loss": 0.1207, "step": 33174 }, { "epoch": 0.5917133378518175, "grad_norm": 0.27380484342575073, "learning_rate": 2.1371413162841185e-05, "loss": 0.1311, "step": 33175 }, { "epoch": 0.5917311739735311, "grad_norm": 0.1943562626838684, "learning_rate": 2.1369873144740424e-05, "loss": 0.0986, "step": 33176 }, { "epoch": 0.5917490100952449, "grad_norm": 0.3256903290748596, "learning_rate": 2.1368333140711295e-05, "loss": 0.1545, "step": 33177 }, { "epoch": 0.5917668462169586, "grad_norm": 0.21299290657043457, "learning_rate": 2.136679315075976e-05, "loss": 0.1013, "step": 33178 }, { "epoch": 0.5917846823386723, "grad_norm": 0.32878273725509644, "learning_rate": 2.136525317489179e-05, "loss": 0.1196, "step": 33179 }, { "epoch": 0.591802518460386, "grad_norm": 0.3366239666938782, "learning_rate": 2.136371321311336e-05, "loss": 0.122, "step": 33180 }, { "epoch": 0.5918203545820997, "grad_norm": 0.3108285963535309, "learning_rate": 2.1362173265430418e-05, "loss": 0.106, "step": 33181 }, { "epoch": 0.5918381907038134, "grad_norm": 0.35554245114326477, "learning_rate": 2.1360633331848962e-05, "loss": 0.2326, "step": 33182 }, { "epoch": 0.5918560268255271, "grad_norm": 0.23072634637355804, "learning_rate": 2.135909341237495e-05, "loss": 0.1185, "step": 33183 }, { "epoch": 0.5918738629472408, "grad_norm": 0.3261723518371582, "learning_rate": 2.1357553507014337e-05, "loss": 0.158, "step": 33184 }, { "epoch": 0.5918916990689544, "grad_norm": 0.26431435346603394, "learning_rate": 2.135601361577311e-05, "loss": 0.1418, "step": 33185 }, { "epoch": 0.5919095351906681, "grad_norm": 0.2369711548089981, "learning_rate": 2.1354473738657233e-05, "loss": 0.1012, "step": 33186 }, { "epoch": 0.5919273713123818, "grad_norm": 0.3591991364955902, "learning_rate": 2.135293387567268e-05, "loss": 0.1384, "step": 33187 }, { "epoch": 0.5919452074340955, "grad_norm": 0.278198778629303, "learning_rate": 2.1351394026825415e-05, "loss": 0.1316, "step": 33188 }, { "epoch": 0.5919630435558092, "grad_norm": 0.2619699239730835, "learning_rate": 2.1349854192121392e-05, "loss": 0.1274, "step": 33189 }, { "epoch": 0.5919808796775229, "grad_norm": 0.35606810450553894, "learning_rate": 2.134831437156661e-05, "loss": 0.1375, "step": 33190 }, { "epoch": 0.5919987157992366, "grad_norm": 0.25631991028785706, "learning_rate": 2.1346774565167022e-05, "loss": 0.1843, "step": 33191 }, { "epoch": 0.5920165519209503, "grad_norm": 0.21016977727413177, "learning_rate": 2.134523477292859e-05, "loss": 0.1186, "step": 33192 }, { "epoch": 0.592034388042664, "grad_norm": 0.25714293122291565, "learning_rate": 2.13436949948573e-05, "loss": 0.1232, "step": 33193 }, { "epoch": 0.5920522241643777, "grad_norm": 0.24614916741847992, "learning_rate": 2.13421552309591e-05, "loss": 0.1201, "step": 33194 }, { "epoch": 0.5920700602860914, "grad_norm": 0.2710455358028412, "learning_rate": 2.1340615481239975e-05, "loss": 0.1175, "step": 33195 }, { "epoch": 0.5920878964078051, "grad_norm": 0.3321419954299927, "learning_rate": 2.1339075745705894e-05, "loss": 0.181, "step": 33196 }, { "epoch": 0.5921057325295188, "grad_norm": 0.2175750583410263, "learning_rate": 2.1337536024362818e-05, "loss": 0.1124, "step": 33197 }, { "epoch": 0.5921235686512325, "grad_norm": 0.19029946625232697, "learning_rate": 2.1335996317216705e-05, "loss": 0.1369, "step": 33198 }, { "epoch": 0.5921414047729462, "grad_norm": 0.3677079975605011, "learning_rate": 2.1334456624273548e-05, "loss": 0.1125, "step": 33199 }, { "epoch": 0.5921592408946599, "grad_norm": 0.2554343342781067, "learning_rate": 2.13329169455393e-05, "loss": 0.1047, "step": 33200 }, { "epoch": 0.5921770770163736, "grad_norm": 0.3062489330768585, "learning_rate": 2.1331377281019932e-05, "loss": 0.1156, "step": 33201 }, { "epoch": 0.5921949131380873, "grad_norm": 0.26012712717056274, "learning_rate": 2.1329837630721405e-05, "loss": 0.1473, "step": 33202 }, { "epoch": 0.5922127492598009, "grad_norm": 0.22849048674106598, "learning_rate": 2.1328297994649708e-05, "loss": 0.1321, "step": 33203 }, { "epoch": 0.5922305853815146, "grad_norm": 0.26811352372169495, "learning_rate": 2.1326758372810793e-05, "loss": 0.1249, "step": 33204 }, { "epoch": 0.5922484215032283, "grad_norm": 0.28943127393722534, "learning_rate": 2.1325218765210628e-05, "loss": 0.1687, "step": 33205 }, { "epoch": 0.592266257624942, "grad_norm": 0.18160155415534973, "learning_rate": 2.132367917185519e-05, "loss": 0.0659, "step": 33206 }, { "epoch": 0.5922840937466557, "grad_norm": 0.3300116956233978, "learning_rate": 2.1322139592750428e-05, "loss": 0.1284, "step": 33207 }, { "epoch": 0.5923019298683694, "grad_norm": 0.3537009060382843, "learning_rate": 2.1320600027902336e-05, "loss": 0.1192, "step": 33208 }, { "epoch": 0.5923197659900831, "grad_norm": 0.29702624678611755, "learning_rate": 2.1319060477316867e-05, "loss": 0.0956, "step": 33209 }, { "epoch": 0.5923376021117968, "grad_norm": 0.24897907674312592, "learning_rate": 2.1317520940999992e-05, "loss": 0.121, "step": 33210 }, { "epoch": 0.5923554382335106, "grad_norm": 0.3004474937915802, "learning_rate": 2.1315981418957666e-05, "loss": 0.1506, "step": 33211 }, { "epoch": 0.5923732743552242, "grad_norm": 0.21354342997074127, "learning_rate": 2.1314441911195882e-05, "loss": 0.121, "step": 33212 }, { "epoch": 0.5923911104769379, "grad_norm": 0.2853353023529053, "learning_rate": 2.1312902417720586e-05, "loss": 0.1517, "step": 33213 }, { "epoch": 0.5924089465986516, "grad_norm": 0.20831961929798126, "learning_rate": 2.1311362938537764e-05, "loss": 0.1212, "step": 33214 }, { "epoch": 0.5924267827203653, "grad_norm": 0.290397047996521, "learning_rate": 2.1309823473653357e-05, "loss": 0.11, "step": 33215 }, { "epoch": 0.592444618842079, "grad_norm": 0.2503226101398468, "learning_rate": 2.130828402307336e-05, "loss": 0.1584, "step": 33216 }, { "epoch": 0.5924624549637927, "grad_norm": 0.25400683283805847, "learning_rate": 2.1306744586803735e-05, "loss": 0.1318, "step": 33217 }, { "epoch": 0.5924802910855064, "grad_norm": 0.27401265501976013, "learning_rate": 2.1305205164850437e-05, "loss": 0.1127, "step": 33218 }, { "epoch": 0.5924981272072201, "grad_norm": 0.22648422420024872, "learning_rate": 2.1303665757219437e-05, "loss": 0.1564, "step": 33219 }, { "epoch": 0.5925159633289337, "grad_norm": 0.27828386425971985, "learning_rate": 2.1302126363916706e-05, "loss": 0.1429, "step": 33220 }, { "epoch": 0.5925337994506474, "grad_norm": 0.24349278211593628, "learning_rate": 2.1300586984948206e-05, "loss": 0.1573, "step": 33221 }, { "epoch": 0.5925516355723611, "grad_norm": 0.2145342230796814, "learning_rate": 2.1299047620319916e-05, "loss": 0.0828, "step": 33222 }, { "epoch": 0.5925694716940748, "grad_norm": 0.233333021402359, "learning_rate": 2.12975082700378e-05, "loss": 0.1058, "step": 33223 }, { "epoch": 0.5925873078157885, "grad_norm": 0.268494188785553, "learning_rate": 2.129596893410781e-05, "loss": 0.1383, "step": 33224 }, { "epoch": 0.5926051439375022, "grad_norm": 0.2279106229543686, "learning_rate": 2.1294429612535928e-05, "loss": 0.0904, "step": 33225 }, { "epoch": 0.5926229800592159, "grad_norm": 0.32699477672576904, "learning_rate": 2.129289030532812e-05, "loss": 0.1768, "step": 33226 }, { "epoch": 0.5926408161809296, "grad_norm": 0.25072968006134033, "learning_rate": 2.1291351012490345e-05, "loss": 0.1309, "step": 33227 }, { "epoch": 0.5926586523026434, "grad_norm": 0.3012182414531708, "learning_rate": 2.1289811734028568e-05, "loss": 0.1198, "step": 33228 }, { "epoch": 0.592676488424357, "grad_norm": 0.3182644248008728, "learning_rate": 2.128827246994877e-05, "loss": 0.1187, "step": 33229 }, { "epoch": 0.5926943245460707, "grad_norm": 0.21471263468265533, "learning_rate": 2.1286733220256916e-05, "loss": 0.1387, "step": 33230 }, { "epoch": 0.5927121606677844, "grad_norm": 0.26514896750450134, "learning_rate": 2.1285193984958966e-05, "loss": 0.1157, "step": 33231 }, { "epoch": 0.5927299967894981, "grad_norm": 0.23476961255073547, "learning_rate": 2.1283654764060885e-05, "loss": 0.135, "step": 33232 }, { "epoch": 0.5927478329112118, "grad_norm": 0.30298492312431335, "learning_rate": 2.128211555756863e-05, "loss": 0.1451, "step": 33233 }, { "epoch": 0.5927656690329255, "grad_norm": 0.2590707242488861, "learning_rate": 2.1280576365488186e-05, "loss": 0.1196, "step": 33234 }, { "epoch": 0.5927835051546392, "grad_norm": 0.2447502166032791, "learning_rate": 2.127903718782552e-05, "loss": 0.0814, "step": 33235 }, { "epoch": 0.5928013412763529, "grad_norm": 0.32594069838523865, "learning_rate": 2.1277498024586583e-05, "loss": 0.1291, "step": 33236 }, { "epoch": 0.5928191773980666, "grad_norm": 0.261898398399353, "learning_rate": 2.1275958875777345e-05, "loss": 0.1656, "step": 33237 }, { "epoch": 0.5928370135197802, "grad_norm": 0.22964385151863098, "learning_rate": 2.1274419741403788e-05, "loss": 0.081, "step": 33238 }, { "epoch": 0.5928548496414939, "grad_norm": 0.31449735164642334, "learning_rate": 2.1272880621471868e-05, "loss": 0.1983, "step": 33239 }, { "epoch": 0.5928726857632076, "grad_norm": 0.24741274118423462, "learning_rate": 2.1271341515987538e-05, "loss": 0.1264, "step": 33240 }, { "epoch": 0.5928905218849213, "grad_norm": 0.22285273671150208, "learning_rate": 2.126980242495678e-05, "loss": 0.1227, "step": 33241 }, { "epoch": 0.592908358006635, "grad_norm": 0.252570241689682, "learning_rate": 2.126826334838556e-05, "loss": 0.1071, "step": 33242 }, { "epoch": 0.5929261941283487, "grad_norm": 0.29839420318603516, "learning_rate": 2.126672428627984e-05, "loss": 0.1808, "step": 33243 }, { "epoch": 0.5929440302500625, "grad_norm": 0.3151942789554596, "learning_rate": 2.1265185238645587e-05, "loss": 0.1161, "step": 33244 }, { "epoch": 0.5929618663717762, "grad_norm": 0.2681471109390259, "learning_rate": 2.126364620548877e-05, "loss": 0.0971, "step": 33245 }, { "epoch": 0.5929797024934899, "grad_norm": 0.20755575597286224, "learning_rate": 2.1262107186815332e-05, "loss": 0.1093, "step": 33246 }, { "epoch": 0.5929975386152035, "grad_norm": 0.2835428714752197, "learning_rate": 2.1260568182631278e-05, "loss": 0.1113, "step": 33247 }, { "epoch": 0.5930153747369172, "grad_norm": 0.28805971145629883, "learning_rate": 2.1259029192942542e-05, "loss": 0.105, "step": 33248 }, { "epoch": 0.5930332108586309, "grad_norm": 0.23881368339061737, "learning_rate": 2.125749021775511e-05, "loss": 0.1335, "step": 33249 }, { "epoch": 0.5930510469803446, "grad_norm": 0.343791127204895, "learning_rate": 2.125595125707492e-05, "loss": 0.1481, "step": 33250 }, { "epoch": 0.5930688831020583, "grad_norm": 0.2349422574043274, "learning_rate": 2.1254412310907974e-05, "loss": 0.1234, "step": 33251 }, { "epoch": 0.593086719223772, "grad_norm": 0.3011320233345032, "learning_rate": 2.125287337926022e-05, "loss": 0.1532, "step": 33252 }, { "epoch": 0.5931045553454857, "grad_norm": 0.2773435115814209, "learning_rate": 2.125133446213762e-05, "loss": 0.1304, "step": 33253 }, { "epoch": 0.5931223914671994, "grad_norm": 0.22966253757476807, "learning_rate": 2.124979555954613e-05, "loss": 0.1214, "step": 33254 }, { "epoch": 0.593140227588913, "grad_norm": 0.2792550027370453, "learning_rate": 2.1248256671491736e-05, "loss": 0.1606, "step": 33255 }, { "epoch": 0.5931580637106267, "grad_norm": 0.19948892295360565, "learning_rate": 2.12467177979804e-05, "loss": 0.075, "step": 33256 }, { "epoch": 0.5931758998323404, "grad_norm": 0.2976965308189392, "learning_rate": 2.124517893901808e-05, "loss": 0.2024, "step": 33257 }, { "epoch": 0.5931937359540541, "grad_norm": 0.31936338543891907, "learning_rate": 2.1243640094610738e-05, "loss": 0.1153, "step": 33258 }, { "epoch": 0.5932115720757678, "grad_norm": 0.28131014108657837, "learning_rate": 2.124210126476435e-05, "loss": 0.0815, "step": 33259 }, { "epoch": 0.5932294081974815, "grad_norm": 0.2174483686685562, "learning_rate": 2.1240562449484878e-05, "loss": 0.104, "step": 33260 }, { "epoch": 0.5932472443191953, "grad_norm": 0.34584856033325195, "learning_rate": 2.1239023648778276e-05, "loss": 0.091, "step": 33261 }, { "epoch": 0.593265080440909, "grad_norm": 0.2531794011592865, "learning_rate": 2.1237484862650525e-05, "loss": 0.1126, "step": 33262 }, { "epoch": 0.5932829165626227, "grad_norm": 0.2942150831222534, "learning_rate": 2.1235946091107572e-05, "loss": 0.1479, "step": 33263 }, { "epoch": 0.5933007526843364, "grad_norm": 0.26067620515823364, "learning_rate": 2.1234407334155403e-05, "loss": 0.1134, "step": 33264 }, { "epoch": 0.59331858880605, "grad_norm": 0.23999664187431335, "learning_rate": 2.123286859179997e-05, "loss": 0.1282, "step": 33265 }, { "epoch": 0.5933364249277637, "grad_norm": 0.2643487751483917, "learning_rate": 2.123132986404724e-05, "loss": 0.1483, "step": 33266 }, { "epoch": 0.5933542610494774, "grad_norm": 0.246018186211586, "learning_rate": 2.1229791150903168e-05, "loss": 0.1292, "step": 33267 }, { "epoch": 0.5933720971711911, "grad_norm": 0.23561933636665344, "learning_rate": 2.1228252452373738e-05, "loss": 0.1335, "step": 33268 }, { "epoch": 0.5933899332929048, "grad_norm": 0.2946788966655731, "learning_rate": 2.1226713768464895e-05, "loss": 0.1326, "step": 33269 }, { "epoch": 0.5934077694146185, "grad_norm": 0.24714688956737518, "learning_rate": 2.1225175099182625e-05, "loss": 0.1222, "step": 33270 }, { "epoch": 0.5934256055363322, "grad_norm": 0.37364429235458374, "learning_rate": 2.122363644453286e-05, "loss": 0.1289, "step": 33271 }, { "epoch": 0.5934434416580459, "grad_norm": 0.27294230461120605, "learning_rate": 2.1222097804521603e-05, "loss": 0.1842, "step": 33272 }, { "epoch": 0.5934612777797595, "grad_norm": 0.2676510214805603, "learning_rate": 2.1220559179154796e-05, "loss": 0.149, "step": 33273 }, { "epoch": 0.5934791139014732, "grad_norm": 0.1891804039478302, "learning_rate": 2.121902056843841e-05, "loss": 0.085, "step": 33274 }, { "epoch": 0.5934969500231869, "grad_norm": 0.22598101198673248, "learning_rate": 2.12174819723784e-05, "loss": 0.1187, "step": 33275 }, { "epoch": 0.5935147861449006, "grad_norm": 0.24087947607040405, "learning_rate": 2.1215943390980734e-05, "loss": 0.1438, "step": 33276 }, { "epoch": 0.5935326222666143, "grad_norm": 0.24859794974327087, "learning_rate": 2.121440482425138e-05, "loss": 0.1221, "step": 33277 }, { "epoch": 0.5935504583883281, "grad_norm": 0.25371286273002625, "learning_rate": 2.1212866272196306e-05, "loss": 0.1408, "step": 33278 }, { "epoch": 0.5935682945100418, "grad_norm": 0.22118690609931946, "learning_rate": 2.1211327734821467e-05, "loss": 0.1543, "step": 33279 }, { "epoch": 0.5935861306317555, "grad_norm": 0.20435787737369537, "learning_rate": 2.1209789212132823e-05, "loss": 0.1168, "step": 33280 }, { "epoch": 0.5936039667534692, "grad_norm": 0.26853296160697937, "learning_rate": 2.1208250704136356e-05, "loss": 0.0991, "step": 33281 }, { "epoch": 0.5936218028751828, "grad_norm": 0.21009793877601624, "learning_rate": 2.1206712210838014e-05, "loss": 0.0905, "step": 33282 }, { "epoch": 0.5936396389968965, "grad_norm": 0.2654861509799957, "learning_rate": 2.120517373224377e-05, "loss": 0.1391, "step": 33283 }, { "epoch": 0.5936574751186102, "grad_norm": 0.2824491560459137, "learning_rate": 2.1203635268359574e-05, "loss": 0.138, "step": 33284 }, { "epoch": 0.5936753112403239, "grad_norm": 0.2922595143318176, "learning_rate": 2.1202096819191405e-05, "loss": 0.1387, "step": 33285 }, { "epoch": 0.5936931473620376, "grad_norm": 0.4046544134616852, "learning_rate": 2.1200558384745224e-05, "loss": 0.1128, "step": 33286 }, { "epoch": 0.5937109834837513, "grad_norm": 0.219674214720726, "learning_rate": 2.1199019965026988e-05, "loss": 0.1052, "step": 33287 }, { "epoch": 0.593728819605465, "grad_norm": 0.2652757465839386, "learning_rate": 2.1197481560042663e-05, "loss": 0.1132, "step": 33288 }, { "epoch": 0.5937466557271787, "grad_norm": 0.2525189518928528, "learning_rate": 2.119594316979821e-05, "loss": 0.1223, "step": 33289 }, { "epoch": 0.5937644918488924, "grad_norm": 0.2301081269979477, "learning_rate": 2.1194404794299596e-05, "loss": 0.1202, "step": 33290 }, { "epoch": 0.593782327970606, "grad_norm": 0.2703513503074646, "learning_rate": 2.119286643355279e-05, "loss": 0.1107, "step": 33291 }, { "epoch": 0.5938001640923197, "grad_norm": 0.3026869297027588, "learning_rate": 2.1191328087563743e-05, "loss": 0.1378, "step": 33292 }, { "epoch": 0.5938180002140334, "grad_norm": 0.32537534832954407, "learning_rate": 2.1189789756338417e-05, "loss": 0.1657, "step": 33293 }, { "epoch": 0.5938358363357471, "grad_norm": 0.34009039402008057, "learning_rate": 2.1188251439882794e-05, "loss": 0.1728, "step": 33294 }, { "epoch": 0.5938536724574609, "grad_norm": 0.22713157534599304, "learning_rate": 2.1186713138202825e-05, "loss": 0.1316, "step": 33295 }, { "epoch": 0.5938715085791746, "grad_norm": 0.4985451400279999, "learning_rate": 2.1185174851304467e-05, "loss": 0.2686, "step": 33296 }, { "epoch": 0.5938893447008883, "grad_norm": 0.2969025671482086, "learning_rate": 2.118363657919369e-05, "loss": 0.1032, "step": 33297 }, { "epoch": 0.593907180822602, "grad_norm": 0.2373484969139099, "learning_rate": 2.118209832187645e-05, "loss": 0.1491, "step": 33298 }, { "epoch": 0.5939250169443157, "grad_norm": 0.273009717464447, "learning_rate": 2.1180560079358726e-05, "loss": 0.1272, "step": 33299 }, { "epoch": 0.5939428530660293, "grad_norm": 0.2795901596546173, "learning_rate": 2.117902185164647e-05, "loss": 0.1289, "step": 33300 }, { "epoch": 0.593960689187743, "grad_norm": 0.24349498748779297, "learning_rate": 2.1177483638745643e-05, "loss": 0.1095, "step": 33301 }, { "epoch": 0.5939785253094567, "grad_norm": 0.3849669396877289, "learning_rate": 2.11759454406622e-05, "loss": 0.1098, "step": 33302 }, { "epoch": 0.5939963614311704, "grad_norm": 0.28109389543533325, "learning_rate": 2.1174407257402123e-05, "loss": 0.1234, "step": 33303 }, { "epoch": 0.5940141975528841, "grad_norm": 0.2173144370317459, "learning_rate": 2.1172869088971362e-05, "loss": 0.1219, "step": 33304 }, { "epoch": 0.5940320336745978, "grad_norm": 0.26749375462532043, "learning_rate": 2.1171330935375885e-05, "loss": 0.1006, "step": 33305 }, { "epoch": 0.5940498697963115, "grad_norm": 0.3091845214366913, "learning_rate": 2.1169792796621642e-05, "loss": 0.1239, "step": 33306 }, { "epoch": 0.5940677059180252, "grad_norm": 0.24436207115650177, "learning_rate": 2.1168254672714616e-05, "loss": 0.1681, "step": 33307 }, { "epoch": 0.5940855420397388, "grad_norm": 0.3007710576057434, "learning_rate": 2.116671656366076e-05, "loss": 0.1298, "step": 33308 }, { "epoch": 0.5941033781614525, "grad_norm": 0.33093568682670593, "learning_rate": 2.116517846946603e-05, "loss": 0.1179, "step": 33309 }, { "epoch": 0.5941212142831662, "grad_norm": 0.313926100730896, "learning_rate": 2.1163640390136387e-05, "loss": 0.1884, "step": 33310 }, { "epoch": 0.5941390504048799, "grad_norm": 0.21629378199577332, "learning_rate": 2.1162102325677803e-05, "loss": 0.1333, "step": 33311 }, { "epoch": 0.5941568865265937, "grad_norm": 0.22283364832401276, "learning_rate": 2.116056427609624e-05, "loss": 0.1107, "step": 33312 }, { "epoch": 0.5941747226483074, "grad_norm": 0.2408323436975479, "learning_rate": 2.1159026241397657e-05, "loss": 0.1306, "step": 33313 }, { "epoch": 0.5941925587700211, "grad_norm": 0.30166885256767273, "learning_rate": 2.1157488221588013e-05, "loss": 0.0941, "step": 33314 }, { "epoch": 0.5942103948917348, "grad_norm": 0.2446063756942749, "learning_rate": 2.1155950216673263e-05, "loss": 0.096, "step": 33315 }, { "epoch": 0.5942282310134485, "grad_norm": 0.2665475904941559, "learning_rate": 2.1154412226659388e-05, "loss": 0.1321, "step": 33316 }, { "epoch": 0.5942460671351621, "grad_norm": 0.20058229565620422, "learning_rate": 2.1152874251552333e-05, "loss": 0.1159, "step": 33317 }, { "epoch": 0.5942639032568758, "grad_norm": 0.33355823159217834, "learning_rate": 2.115133629135807e-05, "loss": 0.2198, "step": 33318 }, { "epoch": 0.5942817393785895, "grad_norm": 0.24582815170288086, "learning_rate": 2.114979834608255e-05, "loss": 0.1305, "step": 33319 }, { "epoch": 0.5942995755003032, "grad_norm": 0.2085101157426834, "learning_rate": 2.114826041573175e-05, "loss": 0.1076, "step": 33320 }, { "epoch": 0.5943174116220169, "grad_norm": 0.2698776125907898, "learning_rate": 2.1146722500311624e-05, "loss": 0.1046, "step": 33321 }, { "epoch": 0.5943352477437306, "grad_norm": 0.30865195393562317, "learning_rate": 2.114518459982813e-05, "loss": 0.1145, "step": 33322 }, { "epoch": 0.5943530838654443, "grad_norm": 0.35019543766975403, "learning_rate": 2.1143646714287226e-05, "loss": 0.1861, "step": 33323 }, { "epoch": 0.594370919987158, "grad_norm": 0.2837424576282501, "learning_rate": 2.1142108843694886e-05, "loss": 0.1243, "step": 33324 }, { "epoch": 0.5943887561088717, "grad_norm": 0.21246322989463806, "learning_rate": 2.114057098805706e-05, "loss": 0.1121, "step": 33325 }, { "epoch": 0.5944065922305853, "grad_norm": 0.44531095027923584, "learning_rate": 2.113903314737972e-05, "loss": 0.1534, "step": 33326 }, { "epoch": 0.594424428352299, "grad_norm": 0.25767385959625244, "learning_rate": 2.113749532166882e-05, "loss": 0.1031, "step": 33327 }, { "epoch": 0.5944422644740127, "grad_norm": 0.32219457626342773, "learning_rate": 2.1135957510930312e-05, "loss": 0.1226, "step": 33328 }, { "epoch": 0.5944601005957265, "grad_norm": 0.30908921360969543, "learning_rate": 2.113441971517018e-05, "loss": 0.1359, "step": 33329 }, { "epoch": 0.5944779367174402, "grad_norm": 0.21614624559879303, "learning_rate": 2.113288193439437e-05, "loss": 0.1206, "step": 33330 }, { "epoch": 0.5944957728391539, "grad_norm": 0.27616238594055176, "learning_rate": 2.113134416860884e-05, "loss": 0.1943, "step": 33331 }, { "epoch": 0.5945136089608676, "grad_norm": 0.27395099401474, "learning_rate": 2.112980641781955e-05, "loss": 0.1448, "step": 33332 }, { "epoch": 0.5945314450825813, "grad_norm": 0.3325318694114685, "learning_rate": 2.112826868203248e-05, "loss": 0.1472, "step": 33333 }, { "epoch": 0.594549281204295, "grad_norm": 0.2864380478858948, "learning_rate": 2.1126730961253576e-05, "loss": 0.1497, "step": 33334 }, { "epoch": 0.5945671173260086, "grad_norm": 0.3155297040939331, "learning_rate": 2.11251932554888e-05, "loss": 0.1583, "step": 33335 }, { "epoch": 0.5945849534477223, "grad_norm": 0.2774326205253601, "learning_rate": 2.1123655564744106e-05, "loss": 0.1158, "step": 33336 }, { "epoch": 0.594602789569436, "grad_norm": 0.27363941073417664, "learning_rate": 2.112211788902547e-05, "loss": 0.0904, "step": 33337 }, { "epoch": 0.5946206256911497, "grad_norm": 0.2833259105682373, "learning_rate": 2.112058022833884e-05, "loss": 0.1192, "step": 33338 }, { "epoch": 0.5946384618128634, "grad_norm": 0.21289248764514923, "learning_rate": 2.1119042582690184e-05, "loss": 0.1013, "step": 33339 }, { "epoch": 0.5946562979345771, "grad_norm": 0.25423264503479004, "learning_rate": 2.1117504952085463e-05, "loss": 0.1413, "step": 33340 }, { "epoch": 0.5946741340562908, "grad_norm": 0.2733125388622284, "learning_rate": 2.111596733653062e-05, "loss": 0.2077, "step": 33341 }, { "epoch": 0.5946919701780045, "grad_norm": 0.29845696687698364, "learning_rate": 2.1114429736031643e-05, "loss": 0.2034, "step": 33342 }, { "epoch": 0.5947098062997181, "grad_norm": 0.2779388427734375, "learning_rate": 2.1112892150594476e-05, "loss": 0.1165, "step": 33343 }, { "epoch": 0.5947276424214318, "grad_norm": 0.2692381739616394, "learning_rate": 2.1111354580225077e-05, "loss": 0.1429, "step": 33344 }, { "epoch": 0.5947454785431456, "grad_norm": 0.1827811747789383, "learning_rate": 2.110981702492941e-05, "loss": 0.0748, "step": 33345 }, { "epoch": 0.5947633146648593, "grad_norm": 0.17644137144088745, "learning_rate": 2.1108279484713437e-05, "loss": 0.0894, "step": 33346 }, { "epoch": 0.594781150786573, "grad_norm": 0.21733225882053375, "learning_rate": 2.1106741959583122e-05, "loss": 0.1039, "step": 33347 }, { "epoch": 0.5947989869082867, "grad_norm": 0.24388693273067474, "learning_rate": 2.110520444954442e-05, "loss": 0.1347, "step": 33348 }, { "epoch": 0.5948168230300004, "grad_norm": 0.275558739900589, "learning_rate": 2.1103666954603278e-05, "loss": 0.0931, "step": 33349 }, { "epoch": 0.5948346591517141, "grad_norm": 0.25211426615715027, "learning_rate": 2.1102129474765682e-05, "loss": 0.1247, "step": 33350 }, { "epoch": 0.5948524952734278, "grad_norm": 0.27250900864601135, "learning_rate": 2.1100592010037575e-05, "loss": 0.1499, "step": 33351 }, { "epoch": 0.5948703313951414, "grad_norm": 0.22814081609249115, "learning_rate": 2.109905456042492e-05, "loss": 0.0994, "step": 33352 }, { "epoch": 0.5948881675168551, "grad_norm": 0.44190946221351624, "learning_rate": 2.1097517125933676e-05, "loss": 0.1893, "step": 33353 }, { "epoch": 0.5949060036385688, "grad_norm": 0.1724148988723755, "learning_rate": 2.1095979706569797e-05, "loss": 0.1342, "step": 33354 }, { "epoch": 0.5949238397602825, "grad_norm": 0.4163680970668793, "learning_rate": 2.109444230233926e-05, "loss": 0.1964, "step": 33355 }, { "epoch": 0.5949416758819962, "grad_norm": 0.289826363325119, "learning_rate": 2.1092904913248013e-05, "loss": 0.1629, "step": 33356 }, { "epoch": 0.5949595120037099, "grad_norm": 0.24717707931995392, "learning_rate": 2.1091367539302014e-05, "loss": 0.1576, "step": 33357 }, { "epoch": 0.5949773481254236, "grad_norm": 0.3352966606616974, "learning_rate": 2.1089830180507215e-05, "loss": 0.1558, "step": 33358 }, { "epoch": 0.5949951842471373, "grad_norm": 0.3377259075641632, "learning_rate": 2.108829283686959e-05, "loss": 0.0998, "step": 33359 }, { "epoch": 0.595013020368851, "grad_norm": 0.3050462305545807, "learning_rate": 2.10867555083951e-05, "loss": 0.1626, "step": 33360 }, { "epoch": 0.5950308564905646, "grad_norm": 0.408711701631546, "learning_rate": 2.1085218195089694e-05, "loss": 0.1507, "step": 33361 }, { "epoch": 0.5950486926122784, "grad_norm": 0.2460470199584961, "learning_rate": 2.1083680896959327e-05, "loss": 0.1037, "step": 33362 }, { "epoch": 0.5950665287339921, "grad_norm": 0.3225964307785034, "learning_rate": 2.1082143614009976e-05, "loss": 0.1138, "step": 33363 }, { "epoch": 0.5950843648557058, "grad_norm": 0.18813158571720123, "learning_rate": 2.1080606346247586e-05, "loss": 0.1068, "step": 33364 }, { "epoch": 0.5951022009774195, "grad_norm": 0.37332475185394287, "learning_rate": 2.1079069093678115e-05, "loss": 0.2153, "step": 33365 }, { "epoch": 0.5951200370991332, "grad_norm": 0.28281331062316895, "learning_rate": 2.1077531856307535e-05, "loss": 0.1248, "step": 33366 }, { "epoch": 0.5951378732208469, "grad_norm": 0.25799229741096497, "learning_rate": 2.1075994634141787e-05, "loss": 0.1422, "step": 33367 }, { "epoch": 0.5951557093425606, "grad_norm": 0.19244201481342316, "learning_rate": 2.1074457427186846e-05, "loss": 0.1143, "step": 33368 }, { "epoch": 0.5951735454642743, "grad_norm": 0.3046590983867645, "learning_rate": 2.107292023544866e-05, "loss": 0.0733, "step": 33369 }, { "epoch": 0.5951913815859879, "grad_norm": 0.28025656938552856, "learning_rate": 2.1071383058933198e-05, "loss": 0.1024, "step": 33370 }, { "epoch": 0.5952092177077016, "grad_norm": 0.18772763013839722, "learning_rate": 2.10698458976464e-05, "loss": 0.1443, "step": 33371 }, { "epoch": 0.5952270538294153, "grad_norm": 0.26078474521636963, "learning_rate": 2.1068308751594247e-05, "loss": 0.107, "step": 33372 }, { "epoch": 0.595244889951129, "grad_norm": 0.29275834560394287, "learning_rate": 2.1066771620782682e-05, "loss": 0.1261, "step": 33373 }, { "epoch": 0.5952627260728427, "grad_norm": 0.25916218757629395, "learning_rate": 2.1065234505217673e-05, "loss": 0.0741, "step": 33374 }, { "epoch": 0.5952805621945564, "grad_norm": 0.2652100920677185, "learning_rate": 2.1063697404905162e-05, "loss": 0.1134, "step": 33375 }, { "epoch": 0.5952983983162701, "grad_norm": 0.2871285080909729, "learning_rate": 2.1062160319851134e-05, "loss": 0.0927, "step": 33376 }, { "epoch": 0.5953162344379838, "grad_norm": 0.2495378851890564, "learning_rate": 2.1060623250061533e-05, "loss": 0.1472, "step": 33377 }, { "epoch": 0.5953340705596974, "grad_norm": 0.2431400567293167, "learning_rate": 2.1059086195542314e-05, "loss": 0.0966, "step": 33378 }, { "epoch": 0.5953519066814112, "grad_norm": 0.23413266241550446, "learning_rate": 2.1057549156299432e-05, "loss": 0.1348, "step": 33379 }, { "epoch": 0.5953697428031249, "grad_norm": 0.20966389775276184, "learning_rate": 2.1056012132338855e-05, "loss": 0.11, "step": 33380 }, { "epoch": 0.5953875789248386, "grad_norm": 0.28601932525634766, "learning_rate": 2.1054475123666534e-05, "loss": 0.1482, "step": 33381 }, { "epoch": 0.5954054150465523, "grad_norm": 0.240091472864151, "learning_rate": 2.105293813028844e-05, "loss": 0.1023, "step": 33382 }, { "epoch": 0.595423251168266, "grad_norm": 0.24070826172828674, "learning_rate": 2.1051401152210515e-05, "loss": 0.1135, "step": 33383 }, { "epoch": 0.5954410872899797, "grad_norm": 0.3157764971256256, "learning_rate": 2.1049864189438713e-05, "loss": 0.1471, "step": 33384 }, { "epoch": 0.5954589234116934, "grad_norm": 0.37929174304008484, "learning_rate": 2.1048327241979014e-05, "loss": 0.1747, "step": 33385 }, { "epoch": 0.5954767595334071, "grad_norm": 0.2279956042766571, "learning_rate": 2.1046790309837367e-05, "loss": 0.0797, "step": 33386 }, { "epoch": 0.5954945956551208, "grad_norm": 0.30363374948501587, "learning_rate": 2.1045253393019718e-05, "loss": 0.1187, "step": 33387 }, { "epoch": 0.5955124317768344, "grad_norm": 0.25819629430770874, "learning_rate": 2.1043716491532027e-05, "loss": 0.1616, "step": 33388 }, { "epoch": 0.5955302678985481, "grad_norm": 0.23750442266464233, "learning_rate": 2.1042179605380267e-05, "loss": 0.1144, "step": 33389 }, { "epoch": 0.5955481040202618, "grad_norm": 0.4008646607398987, "learning_rate": 2.104064273457039e-05, "loss": 0.1571, "step": 33390 }, { "epoch": 0.5955659401419755, "grad_norm": 0.1990721970796585, "learning_rate": 2.1039105879108344e-05, "loss": 0.0821, "step": 33391 }, { "epoch": 0.5955837762636892, "grad_norm": 0.2515743672847748, "learning_rate": 2.103756903900009e-05, "loss": 0.1208, "step": 33392 }, { "epoch": 0.5956016123854029, "grad_norm": 0.3623788058757782, "learning_rate": 2.1036032214251584e-05, "loss": 0.1851, "step": 33393 }, { "epoch": 0.5956194485071166, "grad_norm": 0.2713909447193146, "learning_rate": 2.1034495404868785e-05, "loss": 0.1443, "step": 33394 }, { "epoch": 0.5956372846288303, "grad_norm": 0.3002442419528961, "learning_rate": 2.1032958610857663e-05, "loss": 0.1442, "step": 33395 }, { "epoch": 0.595655120750544, "grad_norm": 0.21712704002857208, "learning_rate": 2.1031421832224156e-05, "loss": 0.124, "step": 33396 }, { "epoch": 0.5956729568722577, "grad_norm": 0.35082346200942993, "learning_rate": 2.1029885068974224e-05, "loss": 0.0981, "step": 33397 }, { "epoch": 0.5956907929939714, "grad_norm": 0.24211251735687256, "learning_rate": 2.1028348321113837e-05, "loss": 0.0879, "step": 33398 }, { "epoch": 0.5957086291156851, "grad_norm": 0.31062939763069153, "learning_rate": 2.1026811588648945e-05, "loss": 0.1071, "step": 33399 }, { "epoch": 0.5957264652373988, "grad_norm": 0.38054537773132324, "learning_rate": 2.1025274871585497e-05, "loss": 0.176, "step": 33400 }, { "epoch": 0.5957443013591125, "grad_norm": 0.26598379015922546, "learning_rate": 2.1023738169929457e-05, "loss": 0.1102, "step": 33401 }, { "epoch": 0.5957621374808262, "grad_norm": 0.25477680563926697, "learning_rate": 2.1022201483686783e-05, "loss": 0.1151, "step": 33402 }, { "epoch": 0.5957799736025399, "grad_norm": 0.33847224712371826, "learning_rate": 2.102066481286343e-05, "loss": 0.1444, "step": 33403 }, { "epoch": 0.5957978097242536, "grad_norm": 0.3492162823677063, "learning_rate": 2.101912815746536e-05, "loss": 0.1283, "step": 33404 }, { "epoch": 0.5958156458459672, "grad_norm": 0.31831786036491394, "learning_rate": 2.101759151749852e-05, "loss": 0.1629, "step": 33405 }, { "epoch": 0.5958334819676809, "grad_norm": 0.3120494484901428, "learning_rate": 2.1016054892968863e-05, "loss": 0.1237, "step": 33406 }, { "epoch": 0.5958513180893946, "grad_norm": 0.2621499300003052, "learning_rate": 2.1014518283882363e-05, "loss": 0.1145, "step": 33407 }, { "epoch": 0.5958691542111083, "grad_norm": 0.28799012303352356, "learning_rate": 2.101298169024496e-05, "loss": 0.0874, "step": 33408 }, { "epoch": 0.595886990332822, "grad_norm": 0.25026583671569824, "learning_rate": 2.1011445112062625e-05, "loss": 0.1322, "step": 33409 }, { "epoch": 0.5959048264545357, "grad_norm": 0.2893786132335663, "learning_rate": 2.1009908549341296e-05, "loss": 0.1563, "step": 33410 }, { "epoch": 0.5959226625762494, "grad_norm": 0.23970358073711395, "learning_rate": 2.1008372002086947e-05, "loss": 0.1054, "step": 33411 }, { "epoch": 0.5959404986979631, "grad_norm": 0.2955992519855499, "learning_rate": 2.100683547030553e-05, "loss": 0.1415, "step": 33412 }, { "epoch": 0.5959583348196769, "grad_norm": 0.33711910247802734, "learning_rate": 2.1005298954002995e-05, "loss": 0.1696, "step": 33413 }, { "epoch": 0.5959761709413905, "grad_norm": 0.28058022260665894, "learning_rate": 2.1003762453185293e-05, "loss": 0.1622, "step": 33414 }, { "epoch": 0.5959940070631042, "grad_norm": 0.29343101382255554, "learning_rate": 2.100222596785839e-05, "loss": 0.1366, "step": 33415 }, { "epoch": 0.5960118431848179, "grad_norm": 0.2745125889778137, "learning_rate": 2.1000689498028248e-05, "loss": 0.1391, "step": 33416 }, { "epoch": 0.5960296793065316, "grad_norm": 0.27711477875709534, "learning_rate": 2.0999153043700814e-05, "loss": 0.1516, "step": 33417 }, { "epoch": 0.5960475154282453, "grad_norm": 0.24365398287773132, "learning_rate": 2.0997616604882044e-05, "loss": 0.1031, "step": 33418 }, { "epoch": 0.596065351549959, "grad_norm": 0.20161396265029907, "learning_rate": 2.0996080181577886e-05, "loss": 0.1145, "step": 33419 }, { "epoch": 0.5960831876716727, "grad_norm": 0.23411734402179718, "learning_rate": 2.0994543773794316e-05, "loss": 0.1331, "step": 33420 }, { "epoch": 0.5961010237933864, "grad_norm": 0.2648693025112152, "learning_rate": 2.0993007381537266e-05, "loss": 0.1795, "step": 33421 }, { "epoch": 0.5961188599151, "grad_norm": 0.209220290184021, "learning_rate": 2.0991471004812714e-05, "loss": 0.1217, "step": 33422 }, { "epoch": 0.5961366960368137, "grad_norm": 0.3890189230442047, "learning_rate": 2.098993464362659e-05, "loss": 0.1424, "step": 33423 }, { "epoch": 0.5961545321585274, "grad_norm": 0.26895254850387573, "learning_rate": 2.0988398297984878e-05, "loss": 0.1421, "step": 33424 }, { "epoch": 0.5961723682802411, "grad_norm": 0.23200629651546478, "learning_rate": 2.0986861967893524e-05, "loss": 0.1164, "step": 33425 }, { "epoch": 0.5961902044019548, "grad_norm": 0.2543470859527588, "learning_rate": 2.0985325653358473e-05, "loss": 0.0801, "step": 33426 }, { "epoch": 0.5962080405236685, "grad_norm": 0.30066829919815063, "learning_rate": 2.0983789354385677e-05, "loss": 0.1875, "step": 33427 }, { "epoch": 0.5962258766453822, "grad_norm": 0.28343334794044495, "learning_rate": 2.0982253070981112e-05, "loss": 0.1537, "step": 33428 }, { "epoch": 0.5962437127670959, "grad_norm": 0.356293261051178, "learning_rate": 2.0980716803150716e-05, "loss": 0.1492, "step": 33429 }, { "epoch": 0.5962615488888097, "grad_norm": 0.2436731904745102, "learning_rate": 2.0979180550900453e-05, "loss": 0.1077, "step": 33430 }, { "epoch": 0.5962793850105234, "grad_norm": 0.2647886276245117, "learning_rate": 2.0977644314236278e-05, "loss": 0.0954, "step": 33431 }, { "epoch": 0.596297221132237, "grad_norm": 0.20411249995231628, "learning_rate": 2.0976108093164133e-05, "loss": 0.0756, "step": 33432 }, { "epoch": 0.5963150572539507, "grad_norm": 0.3440779447555542, "learning_rate": 2.097457188768999e-05, "loss": 0.154, "step": 33433 }, { "epoch": 0.5963328933756644, "grad_norm": 0.251858651638031, "learning_rate": 2.09730356978198e-05, "loss": 0.1252, "step": 33434 }, { "epoch": 0.5963507294973781, "grad_norm": 0.3432500958442688, "learning_rate": 2.0971499523559506e-05, "loss": 0.1285, "step": 33435 }, { "epoch": 0.5963685656190918, "grad_norm": 0.22653472423553467, "learning_rate": 2.0969963364915068e-05, "loss": 0.0866, "step": 33436 }, { "epoch": 0.5963864017408055, "grad_norm": 0.3579009175300598, "learning_rate": 2.0968427221892455e-05, "loss": 0.1734, "step": 33437 }, { "epoch": 0.5964042378625192, "grad_norm": 0.23884211480617523, "learning_rate": 2.096689109449761e-05, "loss": 0.1586, "step": 33438 }, { "epoch": 0.5964220739842329, "grad_norm": 0.1790805160999298, "learning_rate": 2.096535498273649e-05, "loss": 0.0926, "step": 33439 }, { "epoch": 0.5964399101059465, "grad_norm": 0.20240965485572815, "learning_rate": 2.0963818886615034e-05, "loss": 0.0715, "step": 33440 }, { "epoch": 0.5964577462276602, "grad_norm": 0.4511924386024475, "learning_rate": 2.096228280613922e-05, "loss": 0.1523, "step": 33441 }, { "epoch": 0.5964755823493739, "grad_norm": 0.4020020067691803, "learning_rate": 2.0960746741314992e-05, "loss": 0.1524, "step": 33442 }, { "epoch": 0.5964934184710876, "grad_norm": 0.3994465470314026, "learning_rate": 2.0959210692148306e-05, "loss": 0.2365, "step": 33443 }, { "epoch": 0.5965112545928013, "grad_norm": 0.3291572332382202, "learning_rate": 2.0957674658645115e-05, "loss": 0.113, "step": 33444 }, { "epoch": 0.596529090714515, "grad_norm": 0.21668535470962524, "learning_rate": 2.0956138640811363e-05, "loss": 0.1603, "step": 33445 }, { "epoch": 0.5965469268362288, "grad_norm": 0.25522488355636597, "learning_rate": 2.0954602638653027e-05, "loss": 0.127, "step": 33446 }, { "epoch": 0.5965647629579425, "grad_norm": 0.2664676010608673, "learning_rate": 2.0953066652176045e-05, "loss": 0.1381, "step": 33447 }, { "epoch": 0.5965825990796562, "grad_norm": 0.2657136023044586, "learning_rate": 2.095153068138637e-05, "loss": 0.1276, "step": 33448 }, { "epoch": 0.5966004352013698, "grad_norm": 0.2838156521320343, "learning_rate": 2.094999472628996e-05, "loss": 0.0919, "step": 33449 }, { "epoch": 0.5966182713230835, "grad_norm": 0.2860822379589081, "learning_rate": 2.094845878689277e-05, "loss": 0.1354, "step": 33450 }, { "epoch": 0.5966361074447972, "grad_norm": 0.2965041995048523, "learning_rate": 2.0946922863200764e-05, "loss": 0.1647, "step": 33451 }, { "epoch": 0.5966539435665109, "grad_norm": 0.2065976858139038, "learning_rate": 2.0945386955219878e-05, "loss": 0.1204, "step": 33452 }, { "epoch": 0.5966717796882246, "grad_norm": 0.24486243724822998, "learning_rate": 2.0943851062956063e-05, "loss": 0.1554, "step": 33453 }, { "epoch": 0.5966896158099383, "grad_norm": 0.2656596004962921, "learning_rate": 2.0942315186415294e-05, "loss": 0.1264, "step": 33454 }, { "epoch": 0.596707451931652, "grad_norm": 0.2900265157222748, "learning_rate": 2.0940779325603514e-05, "loss": 0.175, "step": 33455 }, { "epoch": 0.5967252880533657, "grad_norm": 0.23209281265735626, "learning_rate": 2.0939243480526665e-05, "loss": 0.1579, "step": 33456 }, { "epoch": 0.5967431241750794, "grad_norm": 0.24760572612285614, "learning_rate": 2.093770765119072e-05, "loss": 0.1151, "step": 33457 }, { "epoch": 0.596760960296793, "grad_norm": 0.20232835412025452, "learning_rate": 2.0936171837601613e-05, "loss": 0.1399, "step": 33458 }, { "epoch": 0.5967787964185067, "grad_norm": 0.2508339583873749, "learning_rate": 2.0934636039765317e-05, "loss": 0.1242, "step": 33459 }, { "epoch": 0.5967966325402204, "grad_norm": 0.2698204219341278, "learning_rate": 2.0933100257687776e-05, "loss": 0.0805, "step": 33460 }, { "epoch": 0.5968144686619341, "grad_norm": 0.235343798995018, "learning_rate": 2.0931564491374946e-05, "loss": 0.1015, "step": 33461 }, { "epoch": 0.5968323047836478, "grad_norm": 0.3445269763469696, "learning_rate": 2.0930028740832764e-05, "loss": 0.1336, "step": 33462 }, { "epoch": 0.5968501409053616, "grad_norm": 0.21841862797737122, "learning_rate": 2.0928493006067204e-05, "loss": 0.098, "step": 33463 }, { "epoch": 0.5968679770270753, "grad_norm": 0.25828567147254944, "learning_rate": 2.092695728708421e-05, "loss": 0.1447, "step": 33464 }, { "epoch": 0.596885813148789, "grad_norm": 0.23380765318870544, "learning_rate": 2.092542158388974e-05, "loss": 0.1772, "step": 33465 }, { "epoch": 0.5969036492705027, "grad_norm": 0.29929211735725403, "learning_rate": 2.0923885896489734e-05, "loss": 0.1315, "step": 33466 }, { "epoch": 0.5969214853922163, "grad_norm": 0.2961951792240143, "learning_rate": 2.0922350224890163e-05, "loss": 0.1137, "step": 33467 }, { "epoch": 0.59693932151393, "grad_norm": 0.23490402102470398, "learning_rate": 2.092081456909697e-05, "loss": 0.1172, "step": 33468 }, { "epoch": 0.5969571576356437, "grad_norm": 0.26800209283828735, "learning_rate": 2.0919278929116106e-05, "loss": 0.1379, "step": 33469 }, { "epoch": 0.5969749937573574, "grad_norm": 0.21255186200141907, "learning_rate": 2.0917743304953534e-05, "loss": 0.1449, "step": 33470 }, { "epoch": 0.5969928298790711, "grad_norm": 0.3051043450832367, "learning_rate": 2.091620769661518e-05, "loss": 0.1378, "step": 33471 }, { "epoch": 0.5970106660007848, "grad_norm": 0.2557990550994873, "learning_rate": 2.0914672104107032e-05, "loss": 0.1095, "step": 33472 }, { "epoch": 0.5970285021224985, "grad_norm": 0.31415480375289917, "learning_rate": 2.0913136527435026e-05, "loss": 0.129, "step": 33473 }, { "epoch": 0.5970463382442122, "grad_norm": 0.2764354944229126, "learning_rate": 2.0911600966605114e-05, "loss": 0.0963, "step": 33474 }, { "epoch": 0.5970641743659258, "grad_norm": 0.2895379662513733, "learning_rate": 2.0910065421623236e-05, "loss": 0.1437, "step": 33475 }, { "epoch": 0.5970820104876395, "grad_norm": 0.257869154214859, "learning_rate": 2.090852989249537e-05, "loss": 0.1338, "step": 33476 }, { "epoch": 0.5970998466093532, "grad_norm": 0.20041821897029877, "learning_rate": 2.090699437922745e-05, "loss": 0.1147, "step": 33477 }, { "epoch": 0.5971176827310669, "grad_norm": 0.26313722133636475, "learning_rate": 2.090545888182544e-05, "loss": 0.0902, "step": 33478 }, { "epoch": 0.5971355188527806, "grad_norm": 0.4446222484111786, "learning_rate": 2.0903923400295273e-05, "loss": 0.1261, "step": 33479 }, { "epoch": 0.5971533549744944, "grad_norm": 0.19148489832878113, "learning_rate": 2.0902387934642923e-05, "loss": 0.1077, "step": 33480 }, { "epoch": 0.5971711910962081, "grad_norm": 0.3146808445453644, "learning_rate": 2.0900852484874335e-05, "loss": 0.0833, "step": 33481 }, { "epoch": 0.5971890272179218, "grad_norm": 0.21696031093597412, "learning_rate": 2.0899317050995454e-05, "loss": 0.0937, "step": 33482 }, { "epoch": 0.5972068633396355, "grad_norm": 0.1917061060667038, "learning_rate": 2.0897781633012238e-05, "loss": 0.0897, "step": 33483 }, { "epoch": 0.5972246994613492, "grad_norm": 0.31284597516059875, "learning_rate": 2.0896246230930632e-05, "loss": 0.1646, "step": 33484 }, { "epoch": 0.5972425355830628, "grad_norm": 0.3136140704154968, "learning_rate": 2.0894710844756593e-05, "loss": 0.19, "step": 33485 }, { "epoch": 0.5972603717047765, "grad_norm": 0.30459582805633545, "learning_rate": 2.089317547449608e-05, "loss": 0.1533, "step": 33486 }, { "epoch": 0.5972782078264902, "grad_norm": 0.35404688119888306, "learning_rate": 2.0891640120155037e-05, "loss": 0.1404, "step": 33487 }, { "epoch": 0.5972960439482039, "grad_norm": 0.2684485912322998, "learning_rate": 2.0890104781739405e-05, "loss": 0.1098, "step": 33488 }, { "epoch": 0.5973138800699176, "grad_norm": 0.39728739857673645, "learning_rate": 2.088856945925516e-05, "loss": 0.1215, "step": 33489 }, { "epoch": 0.5973317161916313, "grad_norm": 0.31502604484558105, "learning_rate": 2.0887034152708234e-05, "loss": 0.128, "step": 33490 }, { "epoch": 0.597349552313345, "grad_norm": 0.25949475169181824, "learning_rate": 2.0885498862104584e-05, "loss": 0.1328, "step": 33491 }, { "epoch": 0.5973673884350587, "grad_norm": 0.34783926606178284, "learning_rate": 2.0883963587450154e-05, "loss": 0.0974, "step": 33492 }, { "epoch": 0.5973852245567723, "grad_norm": 0.22386687994003296, "learning_rate": 2.0882428328750914e-05, "loss": 0.1278, "step": 33493 }, { "epoch": 0.597403060678486, "grad_norm": 0.23003214597702026, "learning_rate": 2.0880893086012804e-05, "loss": 0.1089, "step": 33494 }, { "epoch": 0.5974208968001997, "grad_norm": 0.21393248438835144, "learning_rate": 2.0879357859241773e-05, "loss": 0.1036, "step": 33495 }, { "epoch": 0.5974387329219134, "grad_norm": 0.24486044049263, "learning_rate": 2.087782264844377e-05, "loss": 0.167, "step": 33496 }, { "epoch": 0.5974565690436272, "grad_norm": 0.300487756729126, "learning_rate": 2.087628745362475e-05, "loss": 0.1555, "step": 33497 }, { "epoch": 0.5974744051653409, "grad_norm": 0.29788637161254883, "learning_rate": 2.0874752274790665e-05, "loss": 0.153, "step": 33498 }, { "epoch": 0.5974922412870546, "grad_norm": 0.19125089049339294, "learning_rate": 2.087321711194747e-05, "loss": 0.0831, "step": 33499 }, { "epoch": 0.5975100774087683, "grad_norm": 0.3453262150287628, "learning_rate": 2.0871681965101114e-05, "loss": 0.182, "step": 33500 }, { "epoch": 0.597527913530482, "grad_norm": 0.31526249647140503, "learning_rate": 2.087014683425753e-05, "loss": 0.1612, "step": 33501 }, { "epoch": 0.5975457496521956, "grad_norm": 0.21088182926177979, "learning_rate": 2.0868611719422696e-05, "loss": 0.0618, "step": 33502 }, { "epoch": 0.5975635857739093, "grad_norm": 0.36538219451904297, "learning_rate": 2.0867076620602548e-05, "loss": 0.1237, "step": 33503 }, { "epoch": 0.597581421895623, "grad_norm": 0.21200306713581085, "learning_rate": 2.0865541537803032e-05, "loss": 0.1167, "step": 33504 }, { "epoch": 0.5975992580173367, "grad_norm": 0.17556160688400269, "learning_rate": 2.0864006471030108e-05, "loss": 0.1343, "step": 33505 }, { "epoch": 0.5976170941390504, "grad_norm": 0.2599734365940094, "learning_rate": 2.086247142028972e-05, "loss": 0.111, "step": 33506 }, { "epoch": 0.5976349302607641, "grad_norm": 0.21359191834926605, "learning_rate": 2.0860936385587833e-05, "loss": 0.1201, "step": 33507 }, { "epoch": 0.5976527663824778, "grad_norm": 0.23952512443065643, "learning_rate": 2.085940136693038e-05, "loss": 0.0894, "step": 33508 }, { "epoch": 0.5976706025041915, "grad_norm": 0.20351237058639526, "learning_rate": 2.0857866364323324e-05, "loss": 0.1061, "step": 33509 }, { "epoch": 0.5976884386259051, "grad_norm": 0.2951543927192688, "learning_rate": 2.0856331377772592e-05, "loss": 0.1574, "step": 33510 }, { "epoch": 0.5977062747476188, "grad_norm": 0.3727482855319977, "learning_rate": 2.0854796407284163e-05, "loss": 0.1197, "step": 33511 }, { "epoch": 0.5977241108693325, "grad_norm": 0.3370814919471741, "learning_rate": 2.085326145286397e-05, "loss": 0.1528, "step": 33512 }, { "epoch": 0.5977419469910462, "grad_norm": 0.2609003484249115, "learning_rate": 2.0851726514517973e-05, "loss": 0.1433, "step": 33513 }, { "epoch": 0.59775978311276, "grad_norm": 0.27603965997695923, "learning_rate": 2.0850191592252106e-05, "loss": 0.1227, "step": 33514 }, { "epoch": 0.5977776192344737, "grad_norm": 0.2421277016401291, "learning_rate": 2.084865668607234e-05, "loss": 0.1268, "step": 33515 }, { "epoch": 0.5977954553561874, "grad_norm": 0.3370356857776642, "learning_rate": 2.0847121795984616e-05, "loss": 0.1436, "step": 33516 }, { "epoch": 0.5978132914779011, "grad_norm": 0.28814664483070374, "learning_rate": 2.084558692199488e-05, "loss": 0.1243, "step": 33517 }, { "epoch": 0.5978311275996148, "grad_norm": 0.2421230524778366, "learning_rate": 2.0844052064109076e-05, "loss": 0.051, "step": 33518 }, { "epoch": 0.5978489637213285, "grad_norm": 0.21642711758613586, "learning_rate": 2.0842517222333168e-05, "loss": 0.1429, "step": 33519 }, { "epoch": 0.5978667998430421, "grad_norm": 0.3548171818256378, "learning_rate": 2.0840982396673098e-05, "loss": 0.1707, "step": 33520 }, { "epoch": 0.5978846359647558, "grad_norm": 0.22144341468811035, "learning_rate": 2.0839447587134824e-05, "loss": 0.0972, "step": 33521 }, { "epoch": 0.5979024720864695, "grad_norm": 0.3229910135269165, "learning_rate": 2.0837912793724284e-05, "loss": 0.1417, "step": 33522 }, { "epoch": 0.5979203082081832, "grad_norm": 0.33225852251052856, "learning_rate": 2.083637801644742e-05, "loss": 0.1249, "step": 33523 }, { "epoch": 0.5979381443298969, "grad_norm": 0.28374913334846497, "learning_rate": 2.0834843255310208e-05, "loss": 0.1629, "step": 33524 }, { "epoch": 0.5979559804516106, "grad_norm": 0.1922423243522644, "learning_rate": 2.083330851031857e-05, "loss": 0.0805, "step": 33525 }, { "epoch": 0.5979738165733243, "grad_norm": 0.23374253511428833, "learning_rate": 2.083177378147848e-05, "loss": 0.1491, "step": 33526 }, { "epoch": 0.597991652695038, "grad_norm": 0.28973060846328735, "learning_rate": 2.083023906879586e-05, "loss": 0.1215, "step": 33527 }, { "epoch": 0.5980094888167516, "grad_norm": 0.23298515379428864, "learning_rate": 2.0828704372276686e-05, "loss": 0.1105, "step": 33528 }, { "epoch": 0.5980273249384653, "grad_norm": 0.16594116389751434, "learning_rate": 2.0827169691926894e-05, "loss": 0.0932, "step": 33529 }, { "epoch": 0.598045161060179, "grad_norm": 0.2171420007944107, "learning_rate": 2.0825635027752433e-05, "loss": 0.1023, "step": 33530 }, { "epoch": 0.5980629971818928, "grad_norm": 0.2784365117549896, "learning_rate": 2.082410037975924e-05, "loss": 0.1553, "step": 33531 }, { "epoch": 0.5980808333036065, "grad_norm": 0.26501089334487915, "learning_rate": 2.082256574795329e-05, "loss": 0.167, "step": 33532 }, { "epoch": 0.5980986694253202, "grad_norm": 0.28261762857437134, "learning_rate": 2.082103113234051e-05, "loss": 0.123, "step": 33533 }, { "epoch": 0.5981165055470339, "grad_norm": 0.28261658549308777, "learning_rate": 2.0819496532926864e-05, "loss": 0.1854, "step": 33534 }, { "epoch": 0.5981343416687476, "grad_norm": 0.344864159822464, "learning_rate": 2.0817961949718294e-05, "loss": 0.1237, "step": 33535 }, { "epoch": 0.5981521777904613, "grad_norm": 0.35241538286209106, "learning_rate": 2.0816427382720734e-05, "loss": 0.186, "step": 33536 }, { "epoch": 0.598170013912175, "grad_norm": 0.2554865777492523, "learning_rate": 2.081489283194016e-05, "loss": 0.1417, "step": 33537 }, { "epoch": 0.5981878500338886, "grad_norm": 0.25521984696388245, "learning_rate": 2.081335829738251e-05, "loss": 0.1297, "step": 33538 }, { "epoch": 0.5982056861556023, "grad_norm": 0.18651452660560608, "learning_rate": 2.0811823779053714e-05, "loss": 0.0904, "step": 33539 }, { "epoch": 0.598223522277316, "grad_norm": 0.2694258689880371, "learning_rate": 2.0810289276959743e-05, "loss": 0.1532, "step": 33540 }, { "epoch": 0.5982413583990297, "grad_norm": 0.24189093708992004, "learning_rate": 2.0808754791106537e-05, "loss": 0.1289, "step": 33541 }, { "epoch": 0.5982591945207434, "grad_norm": 0.27592676877975464, "learning_rate": 2.0807220321500047e-05, "loss": 0.1672, "step": 33542 }, { "epoch": 0.5982770306424571, "grad_norm": 0.251250684261322, "learning_rate": 2.0805685868146225e-05, "loss": 0.1144, "step": 33543 }, { "epoch": 0.5982948667641708, "grad_norm": 0.3173721730709076, "learning_rate": 2.0804151431051e-05, "loss": 0.1539, "step": 33544 }, { "epoch": 0.5983127028858845, "grad_norm": 0.2538539469242096, "learning_rate": 2.0802617010220343e-05, "loss": 0.1707, "step": 33545 }, { "epoch": 0.5983305390075981, "grad_norm": 0.4023657739162445, "learning_rate": 2.0801082605660186e-05, "loss": 0.132, "step": 33546 }, { "epoch": 0.5983483751293119, "grad_norm": 0.3657127618789673, "learning_rate": 2.0799548217376492e-05, "loss": 0.1358, "step": 33547 }, { "epoch": 0.5983662112510256, "grad_norm": 0.23266048729419708, "learning_rate": 2.0798013845375197e-05, "loss": 0.1228, "step": 33548 }, { "epoch": 0.5983840473727393, "grad_norm": 0.25415685772895813, "learning_rate": 2.079647948966224e-05, "loss": 0.1237, "step": 33549 }, { "epoch": 0.598401883494453, "grad_norm": 0.2588161826133728, "learning_rate": 2.0794945150243593e-05, "loss": 0.1152, "step": 33550 }, { "epoch": 0.5984197196161667, "grad_norm": 0.24725402891635895, "learning_rate": 2.079341082712519e-05, "loss": 0.1146, "step": 33551 }, { "epoch": 0.5984375557378804, "grad_norm": 0.37954816222190857, "learning_rate": 2.0791876520312974e-05, "loss": 0.1536, "step": 33552 }, { "epoch": 0.5984553918595941, "grad_norm": 0.41757869720458984, "learning_rate": 2.0790342229812898e-05, "loss": 0.1306, "step": 33553 }, { "epoch": 0.5984732279813078, "grad_norm": 0.2830967307090759, "learning_rate": 2.0788807955630912e-05, "loss": 0.1099, "step": 33554 }, { "epoch": 0.5984910641030214, "grad_norm": 0.25304535031318665, "learning_rate": 2.0787273697772965e-05, "loss": 0.1444, "step": 33555 }, { "epoch": 0.5985089002247351, "grad_norm": 0.298365980386734, "learning_rate": 2.0785739456245e-05, "loss": 0.1762, "step": 33556 }, { "epoch": 0.5985267363464488, "grad_norm": 0.2548024654388428, "learning_rate": 2.0784205231052953e-05, "loss": 0.0839, "step": 33557 }, { "epoch": 0.5985445724681625, "grad_norm": 0.2621406018733978, "learning_rate": 2.0782671022202794e-05, "loss": 0.1558, "step": 33558 }, { "epoch": 0.5985624085898762, "grad_norm": 0.26424023509025574, "learning_rate": 2.0781136829700456e-05, "loss": 0.0989, "step": 33559 }, { "epoch": 0.5985802447115899, "grad_norm": 0.27558988332748413, "learning_rate": 2.0779602653551888e-05, "loss": 0.1342, "step": 33560 }, { "epoch": 0.5985980808333036, "grad_norm": 0.22411102056503296, "learning_rate": 2.0778068493763043e-05, "loss": 0.1433, "step": 33561 }, { "epoch": 0.5986159169550173, "grad_norm": 0.2516861855983734, "learning_rate": 2.077653435033985e-05, "loss": 0.1465, "step": 33562 }, { "epoch": 0.598633753076731, "grad_norm": 0.28461748361587524, "learning_rate": 2.0775000223288276e-05, "loss": 0.16, "step": 33563 }, { "epoch": 0.5986515891984447, "grad_norm": 0.37802982330322266, "learning_rate": 2.0773466112614264e-05, "loss": 0.0974, "step": 33564 }, { "epoch": 0.5986694253201584, "grad_norm": 0.3002423346042633, "learning_rate": 2.077193201832376e-05, "loss": 0.1923, "step": 33565 }, { "epoch": 0.5986872614418721, "grad_norm": 0.2853030264377594, "learning_rate": 2.0770397940422692e-05, "loss": 0.125, "step": 33566 }, { "epoch": 0.5987050975635858, "grad_norm": 0.2667273283004761, "learning_rate": 2.0768863878917032e-05, "loss": 0.1164, "step": 33567 }, { "epoch": 0.5987229336852995, "grad_norm": 0.2134263515472412, "learning_rate": 2.0767329833812717e-05, "loss": 0.1015, "step": 33568 }, { "epoch": 0.5987407698070132, "grad_norm": 0.40813490748405457, "learning_rate": 2.0765795805115696e-05, "loss": 0.1395, "step": 33569 }, { "epoch": 0.5987586059287269, "grad_norm": 0.2575003206729889, "learning_rate": 2.0764261792831906e-05, "loss": 0.151, "step": 33570 }, { "epoch": 0.5987764420504406, "grad_norm": 0.32282647490501404, "learning_rate": 2.0762727796967304e-05, "loss": 0.1564, "step": 33571 }, { "epoch": 0.5987942781721542, "grad_norm": 0.29961487650871277, "learning_rate": 2.0761193817527836e-05, "loss": 0.1702, "step": 33572 }, { "epoch": 0.5988121142938679, "grad_norm": 0.2437695413827896, "learning_rate": 2.0759659854519443e-05, "loss": 0.0923, "step": 33573 }, { "epoch": 0.5988299504155816, "grad_norm": 0.26290687918663025, "learning_rate": 2.0758125907948073e-05, "loss": 0.1354, "step": 33574 }, { "epoch": 0.5988477865372953, "grad_norm": 0.3356347382068634, "learning_rate": 2.0756591977819665e-05, "loss": 0.1146, "step": 33575 }, { "epoch": 0.598865622659009, "grad_norm": 0.33639901876449585, "learning_rate": 2.0755058064140183e-05, "loss": 0.1561, "step": 33576 }, { "epoch": 0.5988834587807227, "grad_norm": 0.21176671981811523, "learning_rate": 2.075352416691556e-05, "loss": 0.1189, "step": 33577 }, { "epoch": 0.5989012949024364, "grad_norm": 0.28039446473121643, "learning_rate": 2.0751990286151744e-05, "loss": 0.1272, "step": 33578 }, { "epoch": 0.5989191310241501, "grad_norm": 0.25911805033683777, "learning_rate": 2.075045642185467e-05, "loss": 0.1402, "step": 33579 }, { "epoch": 0.5989369671458638, "grad_norm": 0.2090800702571869, "learning_rate": 2.0748922574030306e-05, "loss": 0.119, "step": 33580 }, { "epoch": 0.5989548032675776, "grad_norm": 0.2648777961730957, "learning_rate": 2.074738874268458e-05, "loss": 0.0934, "step": 33581 }, { "epoch": 0.5989726393892912, "grad_norm": 0.27204152941703796, "learning_rate": 2.074585492782345e-05, "loss": 0.1448, "step": 33582 }, { "epoch": 0.5989904755110049, "grad_norm": 0.24797779321670532, "learning_rate": 2.0744321129452846e-05, "loss": 0.1169, "step": 33583 }, { "epoch": 0.5990083116327186, "grad_norm": 0.2660035490989685, "learning_rate": 2.074278734757873e-05, "loss": 0.1262, "step": 33584 }, { "epoch": 0.5990261477544323, "grad_norm": 0.2804333567619324, "learning_rate": 2.0741253582207045e-05, "loss": 0.1417, "step": 33585 }, { "epoch": 0.599043983876146, "grad_norm": 0.3176865875720978, "learning_rate": 2.0739719833343732e-05, "loss": 0.1361, "step": 33586 }, { "epoch": 0.5990618199978597, "grad_norm": 0.2855227589607239, "learning_rate": 2.073818610099473e-05, "loss": 0.1329, "step": 33587 }, { "epoch": 0.5990796561195734, "grad_norm": 0.29159054160118103, "learning_rate": 2.0736652385165993e-05, "loss": 0.1169, "step": 33588 }, { "epoch": 0.599097492241287, "grad_norm": 0.25642481446266174, "learning_rate": 2.073511868586346e-05, "loss": 0.0432, "step": 33589 }, { "epoch": 0.5991153283630007, "grad_norm": 0.573711633682251, "learning_rate": 2.0733585003093086e-05, "loss": 0.2194, "step": 33590 }, { "epoch": 0.5991331644847144, "grad_norm": 0.28695085644721985, "learning_rate": 2.0732051336860812e-05, "loss": 0.1025, "step": 33591 }, { "epoch": 0.5991510006064281, "grad_norm": 0.21565885841846466, "learning_rate": 2.073051768717257e-05, "loss": 0.1193, "step": 33592 }, { "epoch": 0.5991688367281418, "grad_norm": 0.23005762696266174, "learning_rate": 2.0728984054034323e-05, "loss": 0.1775, "step": 33593 }, { "epoch": 0.5991866728498555, "grad_norm": 0.2618167996406555, "learning_rate": 2.0727450437452013e-05, "loss": 0.1688, "step": 33594 }, { "epoch": 0.5992045089715692, "grad_norm": 0.3125520944595337, "learning_rate": 2.0725916837431576e-05, "loss": 0.1419, "step": 33595 }, { "epoch": 0.5992223450932829, "grad_norm": 0.46696290373802185, "learning_rate": 2.072438325397895e-05, "loss": 0.1088, "step": 33596 }, { "epoch": 0.5992401812149966, "grad_norm": 0.19843453168869019, "learning_rate": 2.072284968710011e-05, "loss": 0.1163, "step": 33597 }, { "epoch": 0.5992580173367104, "grad_norm": 0.2652958035469055, "learning_rate": 2.072131613680098e-05, "loss": 0.1154, "step": 33598 }, { "epoch": 0.599275853458424, "grad_norm": 0.24856826663017273, "learning_rate": 2.0719782603087504e-05, "loss": 0.1073, "step": 33599 }, { "epoch": 0.5992936895801377, "grad_norm": 0.5437614321708679, "learning_rate": 2.0718249085965623e-05, "loss": 0.2109, "step": 33600 }, { "epoch": 0.5993115257018514, "grad_norm": 0.2498537302017212, "learning_rate": 2.071671558544129e-05, "loss": 0.0947, "step": 33601 }, { "epoch": 0.5993293618235651, "grad_norm": 0.3388812243938446, "learning_rate": 2.0715182101520448e-05, "loss": 0.1609, "step": 33602 }, { "epoch": 0.5993471979452788, "grad_norm": 0.4093707501888275, "learning_rate": 2.0713648634209044e-05, "loss": 0.1098, "step": 33603 }, { "epoch": 0.5993650340669925, "grad_norm": 0.33917126059532166, "learning_rate": 2.0712115183513014e-05, "loss": 0.1524, "step": 33604 }, { "epoch": 0.5993828701887062, "grad_norm": 0.2906486988067627, "learning_rate": 2.07105817494383e-05, "loss": 0.1419, "step": 33605 }, { "epoch": 0.5994007063104199, "grad_norm": 0.27075502276420593, "learning_rate": 2.0709048331990863e-05, "loss": 0.1334, "step": 33606 }, { "epoch": 0.5994185424321335, "grad_norm": 0.24697193503379822, "learning_rate": 2.0707514931176637e-05, "loss": 0.131, "step": 33607 }, { "epoch": 0.5994363785538472, "grad_norm": 0.22174084186553955, "learning_rate": 2.070598154700156e-05, "loss": 0.1139, "step": 33608 }, { "epoch": 0.5994542146755609, "grad_norm": 0.24717064201831818, "learning_rate": 2.070444817947158e-05, "loss": 0.1576, "step": 33609 }, { "epoch": 0.5994720507972746, "grad_norm": 0.255853533744812, "learning_rate": 2.0702914828592647e-05, "loss": 0.1453, "step": 33610 }, { "epoch": 0.5994898869189883, "grad_norm": 0.2678378224372864, "learning_rate": 2.07013814943707e-05, "loss": 0.1831, "step": 33611 }, { "epoch": 0.599507723040702, "grad_norm": 0.24620047211647034, "learning_rate": 2.0699848176811686e-05, "loss": 0.0922, "step": 33612 }, { "epoch": 0.5995255591624157, "grad_norm": 0.3019634187221527, "learning_rate": 2.0698314875921544e-05, "loss": 0.1352, "step": 33613 }, { "epoch": 0.5995433952841294, "grad_norm": 0.24899795651435852, "learning_rate": 2.069678159170621e-05, "loss": 0.1333, "step": 33614 }, { "epoch": 0.5995612314058432, "grad_norm": 0.27833905816078186, "learning_rate": 2.069524832417165e-05, "loss": 0.1996, "step": 33615 }, { "epoch": 0.5995790675275569, "grad_norm": 0.3196410834789276, "learning_rate": 2.0693715073323786e-05, "loss": 0.1179, "step": 33616 }, { "epoch": 0.5995969036492705, "grad_norm": 0.32081323862075806, "learning_rate": 2.0692181839168575e-05, "loss": 0.1426, "step": 33617 }, { "epoch": 0.5996147397709842, "grad_norm": 0.32498452067375183, "learning_rate": 2.0690648621711944e-05, "loss": 0.13, "step": 33618 }, { "epoch": 0.5996325758926979, "grad_norm": 0.25471729040145874, "learning_rate": 2.068911542095986e-05, "loss": 0.0906, "step": 33619 }, { "epoch": 0.5996504120144116, "grad_norm": 0.3257145583629608, "learning_rate": 2.0687582236918252e-05, "loss": 0.1501, "step": 33620 }, { "epoch": 0.5996682481361253, "grad_norm": 0.34932589530944824, "learning_rate": 2.0686049069593066e-05, "loss": 0.1342, "step": 33621 }, { "epoch": 0.599686084257839, "grad_norm": 0.23956450819969177, "learning_rate": 2.0684515918990232e-05, "loss": 0.1375, "step": 33622 }, { "epoch": 0.5997039203795527, "grad_norm": 0.30068206787109375, "learning_rate": 2.0682982785115712e-05, "loss": 0.1309, "step": 33623 }, { "epoch": 0.5997217565012664, "grad_norm": 0.21349970996379852, "learning_rate": 2.0681449667975443e-05, "loss": 0.1373, "step": 33624 }, { "epoch": 0.59973959262298, "grad_norm": 0.2551549971103668, "learning_rate": 2.067991656757537e-05, "loss": 0.1728, "step": 33625 }, { "epoch": 0.5997574287446937, "grad_norm": 0.274891197681427, "learning_rate": 2.067838348392143e-05, "loss": 0.1353, "step": 33626 }, { "epoch": 0.5997752648664074, "grad_norm": 0.2301880568265915, "learning_rate": 2.067685041701956e-05, "loss": 0.139, "step": 33627 }, { "epoch": 0.5997931009881211, "grad_norm": 0.2576291859149933, "learning_rate": 2.0675317366875724e-05, "loss": 0.1411, "step": 33628 }, { "epoch": 0.5998109371098348, "grad_norm": 0.2454328089952469, "learning_rate": 2.067378433349584e-05, "loss": 0.0862, "step": 33629 }, { "epoch": 0.5998287732315485, "grad_norm": 0.2625868618488312, "learning_rate": 2.0672251316885872e-05, "loss": 0.0978, "step": 33630 }, { "epoch": 0.5998466093532622, "grad_norm": 0.23759739100933075, "learning_rate": 2.067071831705174e-05, "loss": 0.1208, "step": 33631 }, { "epoch": 0.599864445474976, "grad_norm": 0.3765805959701538, "learning_rate": 2.066918533399941e-05, "loss": 0.1195, "step": 33632 }, { "epoch": 0.5998822815966897, "grad_norm": 0.22054898738861084, "learning_rate": 2.0667652367734815e-05, "loss": 0.0887, "step": 33633 }, { "epoch": 0.5999001177184033, "grad_norm": 0.2293272465467453, "learning_rate": 2.0666119418263897e-05, "loss": 0.1121, "step": 33634 }, { "epoch": 0.599917953840117, "grad_norm": 0.30799606442451477, "learning_rate": 2.066458648559258e-05, "loss": 0.1532, "step": 33635 }, { "epoch": 0.5999357899618307, "grad_norm": 0.3114411532878876, "learning_rate": 2.066305356972684e-05, "loss": 0.1422, "step": 33636 }, { "epoch": 0.5999536260835444, "grad_norm": 0.259997695684433, "learning_rate": 2.0661520670672597e-05, "loss": 0.1584, "step": 33637 }, { "epoch": 0.5999714622052581, "grad_norm": 0.3229637145996094, "learning_rate": 2.06599877884358e-05, "loss": 0.1241, "step": 33638 }, { "epoch": 0.5999892983269718, "grad_norm": 0.28554773330688477, "learning_rate": 2.065845492302239e-05, "loss": 0.15, "step": 33639 }, { "epoch": 0.6000071344486855, "grad_norm": 0.31168311834335327, "learning_rate": 2.0656922074438298e-05, "loss": 0.1242, "step": 33640 }, { "epoch": 0.6000249705703992, "grad_norm": 0.22958698868751526, "learning_rate": 2.0655389242689487e-05, "loss": 0.1216, "step": 33641 }, { "epoch": 0.6000428066921129, "grad_norm": 0.2597629725933075, "learning_rate": 2.0653856427781886e-05, "loss": 0.1105, "step": 33642 }, { "epoch": 0.6000606428138265, "grad_norm": 0.22892449796199799, "learning_rate": 2.065232362972144e-05, "loss": 0.1098, "step": 33643 }, { "epoch": 0.6000784789355402, "grad_norm": 0.30714118480682373, "learning_rate": 2.065079084851408e-05, "loss": 0.1643, "step": 33644 }, { "epoch": 0.6000963150572539, "grad_norm": 0.3327876329421997, "learning_rate": 2.064925808416576e-05, "loss": 0.1282, "step": 33645 }, { "epoch": 0.6001141511789676, "grad_norm": 0.2873730957508087, "learning_rate": 2.0647725336682427e-05, "loss": 0.0702, "step": 33646 }, { "epoch": 0.6001319873006813, "grad_norm": 0.3141648769378662, "learning_rate": 2.0646192606070014e-05, "loss": 0.1433, "step": 33647 }, { "epoch": 0.600149823422395, "grad_norm": 0.333107590675354, "learning_rate": 2.064465989233445e-05, "loss": 0.1521, "step": 33648 }, { "epoch": 0.6001676595441088, "grad_norm": 0.24174322187900543, "learning_rate": 2.06431271954817e-05, "loss": 0.1026, "step": 33649 }, { "epoch": 0.6001854956658225, "grad_norm": 0.26976731419563293, "learning_rate": 2.0641594515517685e-05, "loss": 0.1204, "step": 33650 }, { "epoch": 0.6002033317875362, "grad_norm": 0.2513549029827118, "learning_rate": 2.064006185244836e-05, "loss": 0.1413, "step": 33651 }, { "epoch": 0.6002211679092498, "grad_norm": 0.238325297832489, "learning_rate": 2.0638529206279667e-05, "loss": 0.1142, "step": 33652 }, { "epoch": 0.6002390040309635, "grad_norm": 0.22856846451759338, "learning_rate": 2.0636996577017524e-05, "loss": 0.12, "step": 33653 }, { "epoch": 0.6002568401526772, "grad_norm": 0.3029005527496338, "learning_rate": 2.0635463964667905e-05, "loss": 0.1454, "step": 33654 }, { "epoch": 0.6002746762743909, "grad_norm": 0.2216426283121109, "learning_rate": 2.0633931369236733e-05, "loss": 0.1172, "step": 33655 }, { "epoch": 0.6002925123961046, "grad_norm": 0.2906281650066376, "learning_rate": 2.0632398790729946e-05, "loss": 0.0831, "step": 33656 }, { "epoch": 0.6003103485178183, "grad_norm": 0.24564191699028015, "learning_rate": 2.0630866229153488e-05, "loss": 0.0933, "step": 33657 }, { "epoch": 0.600328184639532, "grad_norm": 0.27084264159202576, "learning_rate": 2.0629333684513304e-05, "loss": 0.1753, "step": 33658 }, { "epoch": 0.6003460207612457, "grad_norm": 0.35575029253959656, "learning_rate": 2.0627801156815338e-05, "loss": 0.1634, "step": 33659 }, { "epoch": 0.6003638568829593, "grad_norm": 0.2876966893672943, "learning_rate": 2.0626268646065522e-05, "loss": 0.1347, "step": 33660 }, { "epoch": 0.600381693004673, "grad_norm": 0.297993928194046, "learning_rate": 2.0624736152269793e-05, "loss": 0.1622, "step": 33661 }, { "epoch": 0.6003995291263867, "grad_norm": 0.34068602323532104, "learning_rate": 2.0623203675434105e-05, "loss": 0.1878, "step": 33662 }, { "epoch": 0.6004173652481004, "grad_norm": 0.30879369378089905, "learning_rate": 2.0621671215564393e-05, "loss": 0.1434, "step": 33663 }, { "epoch": 0.6004352013698141, "grad_norm": 0.23983246088027954, "learning_rate": 2.0620138772666585e-05, "loss": 0.1205, "step": 33664 }, { "epoch": 0.6004530374915279, "grad_norm": 0.33153408765792847, "learning_rate": 2.0618606346746638e-05, "loss": 0.105, "step": 33665 }, { "epoch": 0.6004708736132416, "grad_norm": 0.30795496702194214, "learning_rate": 2.0617073937810484e-05, "loss": 0.0838, "step": 33666 }, { "epoch": 0.6004887097349553, "grad_norm": 0.2563682198524475, "learning_rate": 2.061554154586407e-05, "loss": 0.1129, "step": 33667 }, { "epoch": 0.600506545856669, "grad_norm": 0.26605424284935, "learning_rate": 2.0614009170913333e-05, "loss": 0.1146, "step": 33668 }, { "epoch": 0.6005243819783826, "grad_norm": 0.28893157839775085, "learning_rate": 2.061247681296421e-05, "loss": 0.1232, "step": 33669 }, { "epoch": 0.6005422181000963, "grad_norm": 0.247714102268219, "learning_rate": 2.0610944472022632e-05, "loss": 0.141, "step": 33670 }, { "epoch": 0.60056005422181, "grad_norm": 0.2717185914516449, "learning_rate": 2.060941214809456e-05, "loss": 0.1269, "step": 33671 }, { "epoch": 0.6005778903435237, "grad_norm": 0.26414069533348083, "learning_rate": 2.060787984118592e-05, "loss": 0.1483, "step": 33672 }, { "epoch": 0.6005957264652374, "grad_norm": 0.253976970911026, "learning_rate": 2.0606347551302654e-05, "loss": 0.1227, "step": 33673 }, { "epoch": 0.6006135625869511, "grad_norm": 0.2299121469259262, "learning_rate": 2.0604815278450695e-05, "loss": 0.1344, "step": 33674 }, { "epoch": 0.6006313987086648, "grad_norm": 0.3089067041873932, "learning_rate": 2.0603283022636003e-05, "loss": 0.1037, "step": 33675 }, { "epoch": 0.6006492348303785, "grad_norm": 0.22295323014259338, "learning_rate": 2.0601750783864503e-05, "loss": 0.1066, "step": 33676 }, { "epoch": 0.6006670709520922, "grad_norm": 0.28311818838119507, "learning_rate": 2.060021856214213e-05, "loss": 0.0847, "step": 33677 }, { "epoch": 0.6006849070738058, "grad_norm": 0.2758885622024536, "learning_rate": 2.0598686357474827e-05, "loss": 0.1876, "step": 33678 }, { "epoch": 0.6007027431955195, "grad_norm": 0.28104543685913086, "learning_rate": 2.059715416986854e-05, "loss": 0.1038, "step": 33679 }, { "epoch": 0.6007205793172332, "grad_norm": 0.24740703403949738, "learning_rate": 2.059562199932921e-05, "loss": 0.1203, "step": 33680 }, { "epoch": 0.6007384154389469, "grad_norm": 0.31523147225379944, "learning_rate": 2.0594089845862768e-05, "loss": 0.1008, "step": 33681 }, { "epoch": 0.6007562515606607, "grad_norm": 0.29176992177963257, "learning_rate": 2.0592557709475155e-05, "loss": 0.1693, "step": 33682 }, { "epoch": 0.6007740876823744, "grad_norm": 0.23750586807727814, "learning_rate": 2.05910255901723e-05, "loss": 0.148, "step": 33683 }, { "epoch": 0.6007919238040881, "grad_norm": 0.23496584594249725, "learning_rate": 2.0589493487960164e-05, "loss": 0.1505, "step": 33684 }, { "epoch": 0.6008097599258018, "grad_norm": 0.22927476465702057, "learning_rate": 2.0587961402844672e-05, "loss": 0.0851, "step": 33685 }, { "epoch": 0.6008275960475155, "grad_norm": 0.23963990807533264, "learning_rate": 2.0586429334831768e-05, "loss": 0.1341, "step": 33686 }, { "epoch": 0.6008454321692291, "grad_norm": 0.29195401072502136, "learning_rate": 2.0584897283927377e-05, "loss": 0.1863, "step": 33687 }, { "epoch": 0.6008632682909428, "grad_norm": 0.2602557837963104, "learning_rate": 2.058336525013746e-05, "loss": 0.1306, "step": 33688 }, { "epoch": 0.6008811044126565, "grad_norm": 0.27246448397636414, "learning_rate": 2.0581833233467946e-05, "loss": 0.1509, "step": 33689 }, { "epoch": 0.6008989405343702, "grad_norm": 0.2719744145870209, "learning_rate": 2.058030123392477e-05, "loss": 0.1268, "step": 33690 }, { "epoch": 0.6009167766560839, "grad_norm": 0.3758862614631653, "learning_rate": 2.0578769251513866e-05, "loss": 0.1405, "step": 33691 }, { "epoch": 0.6009346127777976, "grad_norm": 0.27474191784858704, "learning_rate": 2.057723728624119e-05, "loss": 0.158, "step": 33692 }, { "epoch": 0.6009524488995113, "grad_norm": 0.325585275888443, "learning_rate": 2.057570533811266e-05, "loss": 0.1564, "step": 33693 }, { "epoch": 0.600970285021225, "grad_norm": 0.3095877766609192, "learning_rate": 2.0574173407134233e-05, "loss": 0.1687, "step": 33694 }, { "epoch": 0.6009881211429386, "grad_norm": 0.2678094506263733, "learning_rate": 2.0572641493311835e-05, "loss": 0.112, "step": 33695 }, { "epoch": 0.6010059572646523, "grad_norm": 0.3076682984828949, "learning_rate": 2.05711095966514e-05, "loss": 0.1155, "step": 33696 }, { "epoch": 0.601023793386366, "grad_norm": 0.2864644527435303, "learning_rate": 2.0569577717158887e-05, "loss": 0.1337, "step": 33697 }, { "epoch": 0.6010416295080797, "grad_norm": 0.34131529927253723, "learning_rate": 2.056804585484022e-05, "loss": 0.1445, "step": 33698 }, { "epoch": 0.6010594656297935, "grad_norm": 0.22036266326904297, "learning_rate": 2.0566514009701328e-05, "loss": 0.0817, "step": 33699 }, { "epoch": 0.6010773017515072, "grad_norm": 0.3300800919532776, "learning_rate": 2.0564982181748155e-05, "loss": 0.1148, "step": 33700 }, { "epoch": 0.6010951378732209, "grad_norm": 0.22799605131149292, "learning_rate": 2.0563450370986655e-05, "loss": 0.0775, "step": 33701 }, { "epoch": 0.6011129739949346, "grad_norm": 0.2662222385406494, "learning_rate": 2.0561918577422756e-05, "loss": 0.1146, "step": 33702 }, { "epoch": 0.6011308101166483, "grad_norm": 0.22732426226139069, "learning_rate": 2.056038680106239e-05, "loss": 0.1046, "step": 33703 }, { "epoch": 0.601148646238362, "grad_norm": 0.27283963561058044, "learning_rate": 2.055885504191149e-05, "loss": 0.1473, "step": 33704 }, { "epoch": 0.6011664823600756, "grad_norm": 0.21563753485679626, "learning_rate": 2.055732329997601e-05, "loss": 0.1294, "step": 33705 }, { "epoch": 0.6011843184817893, "grad_norm": 0.2540137469768524, "learning_rate": 2.0555791575261877e-05, "loss": 0.1579, "step": 33706 }, { "epoch": 0.601202154603503, "grad_norm": 0.2659685015678406, "learning_rate": 2.0554259867775034e-05, "loss": 0.1387, "step": 33707 }, { "epoch": 0.6012199907252167, "grad_norm": 0.27802035212516785, "learning_rate": 2.0552728177521413e-05, "loss": 0.1672, "step": 33708 }, { "epoch": 0.6012378268469304, "grad_norm": 0.29357317090034485, "learning_rate": 2.0551196504506948e-05, "loss": 0.1612, "step": 33709 }, { "epoch": 0.6012556629686441, "grad_norm": 0.3730638921260834, "learning_rate": 2.0549664848737586e-05, "loss": 0.0681, "step": 33710 }, { "epoch": 0.6012734990903578, "grad_norm": 0.21985360980033875, "learning_rate": 2.0548133210219267e-05, "loss": 0.1164, "step": 33711 }, { "epoch": 0.6012913352120715, "grad_norm": 0.22377026081085205, "learning_rate": 2.0546601588957916e-05, "loss": 0.1112, "step": 33712 }, { "epoch": 0.6013091713337851, "grad_norm": 0.2902832329273224, "learning_rate": 2.054506998495947e-05, "loss": 0.1001, "step": 33713 }, { "epoch": 0.6013270074554988, "grad_norm": 0.231264129281044, "learning_rate": 2.0543538398229875e-05, "loss": 0.1216, "step": 33714 }, { "epoch": 0.6013448435772125, "grad_norm": 0.27825841307640076, "learning_rate": 2.0542006828775068e-05, "loss": 0.1693, "step": 33715 }, { "epoch": 0.6013626796989263, "grad_norm": 0.3316711187362671, "learning_rate": 2.0540475276600983e-05, "loss": 0.1469, "step": 33716 }, { "epoch": 0.60138051582064, "grad_norm": 0.2353348284959793, "learning_rate": 2.0538943741713547e-05, "loss": 0.1411, "step": 33717 }, { "epoch": 0.6013983519423537, "grad_norm": 0.29443326592445374, "learning_rate": 2.053741222411871e-05, "loss": 0.1717, "step": 33718 }, { "epoch": 0.6014161880640674, "grad_norm": 0.22235290706157684, "learning_rate": 2.0535880723822416e-05, "loss": 0.1132, "step": 33719 }, { "epoch": 0.6014340241857811, "grad_norm": 0.23972205817699432, "learning_rate": 2.0534349240830574e-05, "loss": 0.1204, "step": 33720 }, { "epoch": 0.6014518603074948, "grad_norm": 0.2309301346540451, "learning_rate": 2.0532817775149148e-05, "loss": 0.1521, "step": 33721 }, { "epoch": 0.6014696964292084, "grad_norm": 0.2577821910381317, "learning_rate": 2.0531286326784054e-05, "loss": 0.1363, "step": 33722 }, { "epoch": 0.6014875325509221, "grad_norm": 0.3417529761791229, "learning_rate": 2.0529754895741244e-05, "loss": 0.2097, "step": 33723 }, { "epoch": 0.6015053686726358, "grad_norm": 0.22946985065937042, "learning_rate": 2.0528223482026647e-05, "loss": 0.1248, "step": 33724 }, { "epoch": 0.6015232047943495, "grad_norm": 0.29907146096229553, "learning_rate": 2.0526692085646204e-05, "loss": 0.1472, "step": 33725 }, { "epoch": 0.6015410409160632, "grad_norm": 0.2472325712442398, "learning_rate": 2.0525160706605834e-05, "loss": 0.1375, "step": 33726 }, { "epoch": 0.6015588770377769, "grad_norm": 0.21015435457229614, "learning_rate": 2.0523629344911495e-05, "loss": 0.1436, "step": 33727 }, { "epoch": 0.6015767131594906, "grad_norm": 0.21222342550754547, "learning_rate": 2.0522098000569117e-05, "loss": 0.1267, "step": 33728 }, { "epoch": 0.6015945492812043, "grad_norm": 0.3917984366416931, "learning_rate": 2.0520566673584637e-05, "loss": 0.1433, "step": 33729 }, { "epoch": 0.601612385402918, "grad_norm": 0.3214591443538666, "learning_rate": 2.0519035363963973e-05, "loss": 0.1412, "step": 33730 }, { "epoch": 0.6016302215246316, "grad_norm": 0.3727131187915802, "learning_rate": 2.0517504071713088e-05, "loss": 0.1439, "step": 33731 }, { "epoch": 0.6016480576463453, "grad_norm": 0.25211301445961, "learning_rate": 2.0515972796837905e-05, "loss": 0.1034, "step": 33732 }, { "epoch": 0.6016658937680591, "grad_norm": 0.28403568267822266, "learning_rate": 2.051444153934436e-05, "loss": 0.1043, "step": 33733 }, { "epoch": 0.6016837298897728, "grad_norm": 0.2361334264278412, "learning_rate": 2.051291029923839e-05, "loss": 0.1797, "step": 33734 }, { "epoch": 0.6017015660114865, "grad_norm": 0.2434931844472885, "learning_rate": 2.0511379076525917e-05, "loss": 0.1476, "step": 33735 }, { "epoch": 0.6017194021332002, "grad_norm": 0.2450096309185028, "learning_rate": 2.0509847871212902e-05, "loss": 0.1014, "step": 33736 }, { "epoch": 0.6017372382549139, "grad_norm": 0.26669010519981384, "learning_rate": 2.050831668330527e-05, "loss": 0.1177, "step": 33737 }, { "epoch": 0.6017550743766276, "grad_norm": 0.228338822722435, "learning_rate": 2.050678551280895e-05, "loss": 0.1443, "step": 33738 }, { "epoch": 0.6017729104983413, "grad_norm": 0.29480478167533875, "learning_rate": 2.050525435972987e-05, "loss": 0.2194, "step": 33739 }, { "epoch": 0.6017907466200549, "grad_norm": 0.30891695618629456, "learning_rate": 2.050372322407399e-05, "loss": 0.1428, "step": 33740 }, { "epoch": 0.6018085827417686, "grad_norm": 0.24988296627998352, "learning_rate": 2.0502192105847222e-05, "loss": 0.1006, "step": 33741 }, { "epoch": 0.6018264188634823, "grad_norm": 0.569564938545227, "learning_rate": 2.050066100505552e-05, "loss": 0.121, "step": 33742 }, { "epoch": 0.601844254985196, "grad_norm": 0.25561919808387756, "learning_rate": 2.04991299217048e-05, "loss": 0.1149, "step": 33743 }, { "epoch": 0.6018620911069097, "grad_norm": 0.2002534568309784, "learning_rate": 2.049759885580102e-05, "loss": 0.0952, "step": 33744 }, { "epoch": 0.6018799272286234, "grad_norm": 0.19361424446105957, "learning_rate": 2.0496067807350096e-05, "loss": 0.1052, "step": 33745 }, { "epoch": 0.6018977633503371, "grad_norm": 0.1840454638004303, "learning_rate": 2.049453677635797e-05, "loss": 0.0782, "step": 33746 }, { "epoch": 0.6019155994720508, "grad_norm": 0.3458082675933838, "learning_rate": 2.0493005762830573e-05, "loss": 0.1884, "step": 33747 }, { "epoch": 0.6019334355937644, "grad_norm": 0.18853051960468292, "learning_rate": 2.049147476677384e-05, "loss": 0.1029, "step": 33748 }, { "epoch": 0.6019512717154781, "grad_norm": 0.2220182716846466, "learning_rate": 2.0489943788193708e-05, "loss": 0.1139, "step": 33749 }, { "epoch": 0.6019691078371919, "grad_norm": 0.2086227536201477, "learning_rate": 2.0488412827096117e-05, "loss": 0.0996, "step": 33750 }, { "epoch": 0.6019869439589056, "grad_norm": 0.2594011127948761, "learning_rate": 2.0486881883486996e-05, "loss": 0.1199, "step": 33751 }, { "epoch": 0.6020047800806193, "grad_norm": 0.2105686515569687, "learning_rate": 2.0485350957372272e-05, "loss": 0.0998, "step": 33752 }, { "epoch": 0.602022616202333, "grad_norm": 0.24959707260131836, "learning_rate": 2.0483820048757893e-05, "loss": 0.1221, "step": 33753 }, { "epoch": 0.6020404523240467, "grad_norm": 0.28625673055648804, "learning_rate": 2.0482289157649788e-05, "loss": 0.0892, "step": 33754 }, { "epoch": 0.6020582884457604, "grad_norm": 0.2425151914358139, "learning_rate": 2.048075828405389e-05, "loss": 0.0821, "step": 33755 }, { "epoch": 0.6020761245674741, "grad_norm": 0.22902196645736694, "learning_rate": 2.0479227427976125e-05, "loss": 0.1162, "step": 33756 }, { "epoch": 0.6020939606891877, "grad_norm": 0.2373112291097641, "learning_rate": 2.0477696589422447e-05, "loss": 0.1133, "step": 33757 }, { "epoch": 0.6021117968109014, "grad_norm": 0.27441054582595825, "learning_rate": 2.047616576839878e-05, "loss": 0.0879, "step": 33758 }, { "epoch": 0.6021296329326151, "grad_norm": 0.2369239181280136, "learning_rate": 2.0474634964911055e-05, "loss": 0.1124, "step": 33759 }, { "epoch": 0.6021474690543288, "grad_norm": 0.2260851263999939, "learning_rate": 2.0473104178965204e-05, "loss": 0.1235, "step": 33760 }, { "epoch": 0.6021653051760425, "grad_norm": 0.22847603261470795, "learning_rate": 2.0471573410567164e-05, "loss": 0.1193, "step": 33761 }, { "epoch": 0.6021831412977562, "grad_norm": 0.2566198408603668, "learning_rate": 2.0470042659722867e-05, "loss": 0.1115, "step": 33762 }, { "epoch": 0.6022009774194699, "grad_norm": 0.2882314622402191, "learning_rate": 2.046851192643826e-05, "loss": 0.1417, "step": 33763 }, { "epoch": 0.6022188135411836, "grad_norm": 0.20216074585914612, "learning_rate": 2.0466981210719263e-05, "loss": 0.1085, "step": 33764 }, { "epoch": 0.6022366496628972, "grad_norm": 0.2686808705329895, "learning_rate": 2.0465450512571802e-05, "loss": 0.1518, "step": 33765 }, { "epoch": 0.602254485784611, "grad_norm": 0.24801988899707794, "learning_rate": 2.0463919832001834e-05, "loss": 0.1188, "step": 33766 }, { "epoch": 0.6022723219063247, "grad_norm": 0.2675819396972656, "learning_rate": 2.0462389169015273e-05, "loss": 0.1464, "step": 33767 }, { "epoch": 0.6022901580280384, "grad_norm": 0.382968932390213, "learning_rate": 2.046085852361806e-05, "loss": 0.095, "step": 33768 }, { "epoch": 0.6023079941497521, "grad_norm": 0.7312154769897461, "learning_rate": 2.0459327895816122e-05, "loss": 0.1518, "step": 33769 }, { "epoch": 0.6023258302714658, "grad_norm": 0.2627183496952057, "learning_rate": 2.04577972856154e-05, "loss": 0.1269, "step": 33770 }, { "epoch": 0.6023436663931795, "grad_norm": 0.28598958253860474, "learning_rate": 2.0456266693021832e-05, "loss": 0.1335, "step": 33771 }, { "epoch": 0.6023615025148932, "grad_norm": 0.2967975437641144, "learning_rate": 2.045473611804134e-05, "loss": 0.1099, "step": 33772 }, { "epoch": 0.6023793386366069, "grad_norm": 0.3294130861759186, "learning_rate": 2.0453205560679862e-05, "loss": 0.1175, "step": 33773 }, { "epoch": 0.6023971747583206, "grad_norm": 0.22721338272094727, "learning_rate": 2.045167502094332e-05, "loss": 0.0825, "step": 33774 }, { "epoch": 0.6024150108800342, "grad_norm": 0.32131215929985046, "learning_rate": 2.045014449883767e-05, "loss": 0.141, "step": 33775 }, { "epoch": 0.6024328470017479, "grad_norm": 0.5400431752204895, "learning_rate": 2.044861399436882e-05, "loss": 0.1895, "step": 33776 }, { "epoch": 0.6024506831234616, "grad_norm": 0.25084590911865234, "learning_rate": 2.044708350754272e-05, "loss": 0.0854, "step": 33777 }, { "epoch": 0.6024685192451753, "grad_norm": 0.1929490864276886, "learning_rate": 2.0445553038365288e-05, "loss": 0.1409, "step": 33778 }, { "epoch": 0.602486355366889, "grad_norm": 0.24766503274440765, "learning_rate": 2.0444022586842477e-05, "loss": 0.1122, "step": 33779 }, { "epoch": 0.6025041914886027, "grad_norm": 0.2801390588283539, "learning_rate": 2.044249215298021e-05, "loss": 0.1135, "step": 33780 }, { "epoch": 0.6025220276103164, "grad_norm": 0.29283440113067627, "learning_rate": 2.044096173678441e-05, "loss": 0.1722, "step": 33781 }, { "epoch": 0.6025398637320301, "grad_norm": 0.24493587017059326, "learning_rate": 2.0439431338261013e-05, "loss": 0.0924, "step": 33782 }, { "epoch": 0.6025576998537439, "grad_norm": 0.21509939432144165, "learning_rate": 2.0437900957415963e-05, "loss": 0.1262, "step": 33783 }, { "epoch": 0.6025755359754575, "grad_norm": 0.2098754346370697, "learning_rate": 2.0436370594255183e-05, "loss": 0.0651, "step": 33784 }, { "epoch": 0.6025933720971712, "grad_norm": 0.3117353022098541, "learning_rate": 2.043484024878461e-05, "loss": 0.1048, "step": 33785 }, { "epoch": 0.6026112082188849, "grad_norm": 0.2930700480937958, "learning_rate": 2.0433309921010173e-05, "loss": 0.1625, "step": 33786 }, { "epoch": 0.6026290443405986, "grad_norm": 0.2191721796989441, "learning_rate": 2.0431779610937793e-05, "loss": 0.1037, "step": 33787 }, { "epoch": 0.6026468804623123, "grad_norm": 0.3632800877094269, "learning_rate": 2.0430249318573425e-05, "loss": 0.1163, "step": 33788 }, { "epoch": 0.602664716584026, "grad_norm": 0.3090948462486267, "learning_rate": 2.042871904392298e-05, "loss": 0.1476, "step": 33789 }, { "epoch": 0.6026825527057397, "grad_norm": 0.29852980375289917, "learning_rate": 2.0427188786992406e-05, "loss": 0.1406, "step": 33790 }, { "epoch": 0.6027003888274534, "grad_norm": 0.1763669103384018, "learning_rate": 2.042565854778762e-05, "loss": 0.0965, "step": 33791 }, { "epoch": 0.602718224949167, "grad_norm": 0.30537042021751404, "learning_rate": 2.0424128326314567e-05, "loss": 0.1376, "step": 33792 }, { "epoch": 0.6027360610708807, "grad_norm": 0.2414328008890152, "learning_rate": 2.0422598122579177e-05, "loss": 0.135, "step": 33793 }, { "epoch": 0.6027538971925944, "grad_norm": 0.19762182235717773, "learning_rate": 2.0421067936587376e-05, "loss": 0.0854, "step": 33794 }, { "epoch": 0.6027717333143081, "grad_norm": 0.24279852211475372, "learning_rate": 2.041953776834509e-05, "loss": 0.1528, "step": 33795 }, { "epoch": 0.6027895694360218, "grad_norm": 0.20025098323822021, "learning_rate": 2.0418007617858262e-05, "loss": 0.0959, "step": 33796 }, { "epoch": 0.6028074055577355, "grad_norm": 0.22615863382816315, "learning_rate": 2.041647748513282e-05, "loss": 0.1196, "step": 33797 }, { "epoch": 0.6028252416794492, "grad_norm": 0.236972376704216, "learning_rate": 2.041494737017469e-05, "loss": 0.1132, "step": 33798 }, { "epoch": 0.6028430778011629, "grad_norm": 0.2631887197494507, "learning_rate": 2.0413417272989818e-05, "loss": 0.1182, "step": 33799 }, { "epoch": 0.6028609139228767, "grad_norm": 0.2501896619796753, "learning_rate": 2.041188719358411e-05, "loss": 0.1206, "step": 33800 }, { "epoch": 0.6028787500445904, "grad_norm": 0.23473411798477173, "learning_rate": 2.0410357131963524e-05, "loss": 0.1509, "step": 33801 }, { "epoch": 0.602896586166304, "grad_norm": 0.3168741464614868, "learning_rate": 2.0408827088133975e-05, "loss": 0.1184, "step": 33802 }, { "epoch": 0.6029144222880177, "grad_norm": 0.25983482599258423, "learning_rate": 2.0407297062101395e-05, "loss": 0.1145, "step": 33803 }, { "epoch": 0.6029322584097314, "grad_norm": 0.2601884603500366, "learning_rate": 2.0405767053871712e-05, "loss": 0.1017, "step": 33804 }, { "epoch": 0.6029500945314451, "grad_norm": 0.3014456629753113, "learning_rate": 2.0404237063450877e-05, "loss": 0.1378, "step": 33805 }, { "epoch": 0.6029679306531588, "grad_norm": 0.2662687301635742, "learning_rate": 2.04027070908448e-05, "loss": 0.1088, "step": 33806 }, { "epoch": 0.6029857667748725, "grad_norm": 0.33728307485580444, "learning_rate": 2.0401177136059425e-05, "loss": 0.1549, "step": 33807 }, { "epoch": 0.6030036028965862, "grad_norm": 0.2911766469478607, "learning_rate": 2.0399647199100663e-05, "loss": 0.1311, "step": 33808 }, { "epoch": 0.6030214390182999, "grad_norm": 0.2578064203262329, "learning_rate": 2.0398117279974466e-05, "loss": 0.1247, "step": 33809 }, { "epoch": 0.6030392751400135, "grad_norm": 0.2399481236934662, "learning_rate": 2.0396587378686752e-05, "loss": 0.0991, "step": 33810 }, { "epoch": 0.6030571112617272, "grad_norm": 0.22368918359279633, "learning_rate": 2.039505749524346e-05, "loss": 0.0966, "step": 33811 }, { "epoch": 0.6030749473834409, "grad_norm": 0.23402705788612366, "learning_rate": 2.0393527629650518e-05, "loss": 0.0917, "step": 33812 }, { "epoch": 0.6030927835051546, "grad_norm": 0.2707475423812866, "learning_rate": 2.039199778191384e-05, "loss": 0.0804, "step": 33813 }, { "epoch": 0.6031106196268683, "grad_norm": 0.24490302801132202, "learning_rate": 2.039046795203938e-05, "loss": 0.1271, "step": 33814 }, { "epoch": 0.603128455748582, "grad_norm": 0.34024158120155334, "learning_rate": 2.0388938140033064e-05, "loss": 0.0903, "step": 33815 }, { "epoch": 0.6031462918702957, "grad_norm": 0.3218767046928406, "learning_rate": 2.0387408345900804e-05, "loss": 0.1325, "step": 33816 }, { "epoch": 0.6031641279920095, "grad_norm": 0.2643524706363678, "learning_rate": 2.0385878569648546e-05, "loss": 0.1221, "step": 33817 }, { "epoch": 0.6031819641137232, "grad_norm": 0.2380962073802948, "learning_rate": 2.0384348811282216e-05, "loss": 0.156, "step": 33818 }, { "epoch": 0.6031998002354368, "grad_norm": 0.2571446895599365, "learning_rate": 2.0382819070807752e-05, "loss": 0.1111, "step": 33819 }, { "epoch": 0.6032176363571505, "grad_norm": 0.24764426052570343, "learning_rate": 2.038128934823107e-05, "loss": 0.1309, "step": 33820 }, { "epoch": 0.6032354724788642, "grad_norm": 0.2795545756816864, "learning_rate": 2.03797596435581e-05, "loss": 0.1593, "step": 33821 }, { "epoch": 0.6032533086005779, "grad_norm": 0.32767194509506226, "learning_rate": 2.0378229956794787e-05, "loss": 0.1665, "step": 33822 }, { "epoch": 0.6032711447222916, "grad_norm": 0.2888633608818054, "learning_rate": 2.0376700287947052e-05, "loss": 0.1505, "step": 33823 }, { "epoch": 0.6032889808440053, "grad_norm": 0.2893064618110657, "learning_rate": 2.0375170637020817e-05, "loss": 0.0845, "step": 33824 }, { "epoch": 0.603306816965719, "grad_norm": 0.2887312173843384, "learning_rate": 2.0373641004022024e-05, "loss": 0.1336, "step": 33825 }, { "epoch": 0.6033246530874327, "grad_norm": 0.2215019166469574, "learning_rate": 2.0372111388956582e-05, "loss": 0.1256, "step": 33826 }, { "epoch": 0.6033424892091463, "grad_norm": 0.20559757947921753, "learning_rate": 2.037058179183045e-05, "loss": 0.1463, "step": 33827 }, { "epoch": 0.60336032533086, "grad_norm": 0.3059665560722351, "learning_rate": 2.036905221264954e-05, "loss": 0.1102, "step": 33828 }, { "epoch": 0.6033781614525737, "grad_norm": 0.2694060504436493, "learning_rate": 2.0367522651419783e-05, "loss": 0.0967, "step": 33829 }, { "epoch": 0.6033959975742874, "grad_norm": 0.3812882602214813, "learning_rate": 2.0365993108147095e-05, "loss": 0.1279, "step": 33830 }, { "epoch": 0.6034138336960011, "grad_norm": 0.2730935513973236, "learning_rate": 2.0364463582837428e-05, "loss": 0.1261, "step": 33831 }, { "epoch": 0.6034316698177148, "grad_norm": 0.28607073426246643, "learning_rate": 2.0362934075496705e-05, "loss": 0.0997, "step": 33832 }, { "epoch": 0.6034495059394285, "grad_norm": 0.2214580774307251, "learning_rate": 2.0361404586130854e-05, "loss": 0.1234, "step": 33833 }, { "epoch": 0.6034673420611423, "grad_norm": 0.3885900378227234, "learning_rate": 2.0359875114745784e-05, "loss": 0.1922, "step": 33834 }, { "epoch": 0.603485178182856, "grad_norm": 0.2388555407524109, "learning_rate": 2.0358345661347456e-05, "loss": 0.0879, "step": 33835 }, { "epoch": 0.6035030143045697, "grad_norm": 0.2469319850206375, "learning_rate": 2.035681622594178e-05, "loss": 0.1084, "step": 33836 }, { "epoch": 0.6035208504262833, "grad_norm": 0.37272289395332336, "learning_rate": 2.0355286808534686e-05, "loss": 0.1444, "step": 33837 }, { "epoch": 0.603538686547997, "grad_norm": 0.2836235761642456, "learning_rate": 2.035375740913211e-05, "loss": 0.1177, "step": 33838 }, { "epoch": 0.6035565226697107, "grad_norm": 0.2795677185058594, "learning_rate": 2.0352228027739964e-05, "loss": 0.0925, "step": 33839 }, { "epoch": 0.6035743587914244, "grad_norm": 0.2673434913158417, "learning_rate": 2.0350698664364196e-05, "loss": 0.1391, "step": 33840 }, { "epoch": 0.6035921949131381, "grad_norm": 0.1964213103055954, "learning_rate": 2.0349169319010728e-05, "loss": 0.1113, "step": 33841 }, { "epoch": 0.6036100310348518, "grad_norm": 0.25359031558036804, "learning_rate": 2.0347639991685485e-05, "loss": 0.0855, "step": 33842 }, { "epoch": 0.6036278671565655, "grad_norm": 0.25008174777030945, "learning_rate": 2.0346110682394383e-05, "loss": 0.1187, "step": 33843 }, { "epoch": 0.6036457032782792, "grad_norm": 0.21026980876922607, "learning_rate": 2.034458139114338e-05, "loss": 0.114, "step": 33844 }, { "epoch": 0.6036635393999928, "grad_norm": 0.2965630292892456, "learning_rate": 2.0343052117938378e-05, "loss": 0.0858, "step": 33845 }, { "epoch": 0.6036813755217065, "grad_norm": 0.1811700463294983, "learning_rate": 2.034152286278532e-05, "loss": 0.1248, "step": 33846 }, { "epoch": 0.6036992116434202, "grad_norm": 0.29956483840942383, "learning_rate": 2.033999362569012e-05, "loss": 0.1357, "step": 33847 }, { "epoch": 0.6037170477651339, "grad_norm": 0.26156920194625854, "learning_rate": 2.0338464406658722e-05, "loss": 0.116, "step": 33848 }, { "epoch": 0.6037348838868476, "grad_norm": 0.27455633878707886, "learning_rate": 2.0336935205697046e-05, "loss": 0.1237, "step": 33849 }, { "epoch": 0.6037527200085613, "grad_norm": 0.23272190988063812, "learning_rate": 2.0335406022811022e-05, "loss": 0.1157, "step": 33850 }, { "epoch": 0.6037705561302751, "grad_norm": 0.3226031959056854, "learning_rate": 2.0333876858006567e-05, "loss": 0.2059, "step": 33851 }, { "epoch": 0.6037883922519888, "grad_norm": 0.2438773512840271, "learning_rate": 2.033234771128962e-05, "loss": 0.1119, "step": 33852 }, { "epoch": 0.6038062283737025, "grad_norm": 0.32348883152008057, "learning_rate": 2.0330818582666105e-05, "loss": 0.1398, "step": 33853 }, { "epoch": 0.6038240644954161, "grad_norm": 0.18496102094650269, "learning_rate": 2.0329289472141954e-05, "loss": 0.1195, "step": 33854 }, { "epoch": 0.6038419006171298, "grad_norm": 0.21836508810520172, "learning_rate": 2.032776037972309e-05, "loss": 0.0993, "step": 33855 }, { "epoch": 0.6038597367388435, "grad_norm": 0.2764545977115631, "learning_rate": 2.0326231305415432e-05, "loss": 0.1546, "step": 33856 }, { "epoch": 0.6038775728605572, "grad_norm": 0.2795056700706482, "learning_rate": 2.0324702249224924e-05, "loss": 0.158, "step": 33857 }, { "epoch": 0.6038954089822709, "grad_norm": 0.3521125614643097, "learning_rate": 2.0323173211157482e-05, "loss": 0.0996, "step": 33858 }, { "epoch": 0.6039132451039846, "grad_norm": 0.24373279511928558, "learning_rate": 2.032164419121904e-05, "loss": 0.0603, "step": 33859 }, { "epoch": 0.6039310812256983, "grad_norm": 0.25983476638793945, "learning_rate": 2.032011518941551e-05, "loss": 0.126, "step": 33860 }, { "epoch": 0.603948917347412, "grad_norm": 0.2526746988296509, "learning_rate": 2.031858620575284e-05, "loss": 0.1612, "step": 33861 }, { "epoch": 0.6039667534691257, "grad_norm": 0.2785739004611969, "learning_rate": 2.0317057240236946e-05, "loss": 0.1431, "step": 33862 }, { "epoch": 0.6039845895908393, "grad_norm": 0.24862068891525269, "learning_rate": 2.031552829287376e-05, "loss": 0.116, "step": 33863 }, { "epoch": 0.604002425712553, "grad_norm": 0.2915647327899933, "learning_rate": 2.0313999363669194e-05, "loss": 0.1121, "step": 33864 }, { "epoch": 0.6040202618342667, "grad_norm": 0.3019426465034485, "learning_rate": 2.0312470452629186e-05, "loss": 0.1056, "step": 33865 }, { "epoch": 0.6040380979559804, "grad_norm": 0.26330095529556274, "learning_rate": 2.0310941559759664e-05, "loss": 0.0832, "step": 33866 }, { "epoch": 0.6040559340776942, "grad_norm": 0.22627967596054077, "learning_rate": 2.0309412685066555e-05, "loss": 0.0694, "step": 33867 }, { "epoch": 0.6040737701994079, "grad_norm": 0.20052380859851837, "learning_rate": 2.0307883828555783e-05, "loss": 0.0475, "step": 33868 }, { "epoch": 0.6040916063211216, "grad_norm": 0.21634647250175476, "learning_rate": 2.0306354990233266e-05, "loss": 0.1278, "step": 33869 }, { "epoch": 0.6041094424428353, "grad_norm": 0.23765288293361664, "learning_rate": 2.0304826170104945e-05, "loss": 0.0769, "step": 33870 }, { "epoch": 0.604127278564549, "grad_norm": 0.31639793515205383, "learning_rate": 2.030329736817674e-05, "loss": 0.1165, "step": 33871 }, { "epoch": 0.6041451146862626, "grad_norm": 0.2135034203529358, "learning_rate": 2.0301768584454572e-05, "loss": 0.1266, "step": 33872 }, { "epoch": 0.6041629508079763, "grad_norm": 0.3173833191394806, "learning_rate": 2.0300239818944372e-05, "loss": 0.1449, "step": 33873 }, { "epoch": 0.60418078692969, "grad_norm": 0.25829756259918213, "learning_rate": 2.0298711071652065e-05, "loss": 0.1219, "step": 33874 }, { "epoch": 0.6041986230514037, "grad_norm": 0.2784562408924103, "learning_rate": 2.0297182342583584e-05, "loss": 0.0998, "step": 33875 }, { "epoch": 0.6042164591731174, "grad_norm": 0.2562209367752075, "learning_rate": 2.0295653631744847e-05, "loss": 0.1683, "step": 33876 }, { "epoch": 0.6042342952948311, "grad_norm": 0.265676349401474, "learning_rate": 2.0294124939141785e-05, "loss": 0.1391, "step": 33877 }, { "epoch": 0.6042521314165448, "grad_norm": 0.24899938702583313, "learning_rate": 2.0292596264780306e-05, "loss": 0.1114, "step": 33878 }, { "epoch": 0.6042699675382585, "grad_norm": 0.47234484553337097, "learning_rate": 2.0291067608666362e-05, "loss": 0.1856, "step": 33879 }, { "epoch": 0.6042878036599721, "grad_norm": 0.4021255075931549, "learning_rate": 2.028953897080586e-05, "loss": 0.0839, "step": 33880 }, { "epoch": 0.6043056397816858, "grad_norm": 0.21968500316143036, "learning_rate": 2.0288010351204736e-05, "loss": 0.1216, "step": 33881 }, { "epoch": 0.6043234759033995, "grad_norm": 0.3248167037963867, "learning_rate": 2.02864817498689e-05, "loss": 0.145, "step": 33882 }, { "epoch": 0.6043413120251132, "grad_norm": 0.24360419809818268, "learning_rate": 2.0284953166804306e-05, "loss": 0.1271, "step": 33883 }, { "epoch": 0.604359148146827, "grad_norm": 0.21658781170845032, "learning_rate": 2.0283424602016858e-05, "loss": 0.1612, "step": 33884 }, { "epoch": 0.6043769842685407, "grad_norm": 0.2616976797580719, "learning_rate": 2.0281896055512485e-05, "loss": 0.1589, "step": 33885 }, { "epoch": 0.6043948203902544, "grad_norm": 0.45536941289901733, "learning_rate": 2.02803675272971e-05, "loss": 0.1408, "step": 33886 }, { "epoch": 0.6044126565119681, "grad_norm": 0.34238550066947937, "learning_rate": 2.0278839017376647e-05, "loss": 0.1163, "step": 33887 }, { "epoch": 0.6044304926336818, "grad_norm": 0.32174745202064514, "learning_rate": 2.027731052575705e-05, "loss": 0.1145, "step": 33888 }, { "epoch": 0.6044483287553954, "grad_norm": 0.1972050964832306, "learning_rate": 2.0275782052444232e-05, "loss": 0.0798, "step": 33889 }, { "epoch": 0.6044661648771091, "grad_norm": 0.16877669095993042, "learning_rate": 2.027425359744411e-05, "loss": 0.1081, "step": 33890 }, { "epoch": 0.6044840009988228, "grad_norm": 0.29525187611579895, "learning_rate": 2.0272725160762603e-05, "loss": 0.1108, "step": 33891 }, { "epoch": 0.6045018371205365, "grad_norm": 0.21393455564975739, "learning_rate": 2.027119674240566e-05, "loss": 0.133, "step": 33892 }, { "epoch": 0.6045196732422502, "grad_norm": 0.3553031086921692, "learning_rate": 2.0269668342379183e-05, "loss": 0.1705, "step": 33893 }, { "epoch": 0.6045375093639639, "grad_norm": 0.29390406608581543, "learning_rate": 2.0268139960689112e-05, "loss": 0.1674, "step": 33894 }, { "epoch": 0.6045553454856776, "grad_norm": 0.3952547311782837, "learning_rate": 2.0266611597341353e-05, "loss": 0.1113, "step": 33895 }, { "epoch": 0.6045731816073913, "grad_norm": 0.21549092233181, "learning_rate": 2.0265083252341856e-05, "loss": 0.0924, "step": 33896 }, { "epoch": 0.604591017729105, "grad_norm": 0.26260876655578613, "learning_rate": 2.026355492569653e-05, "loss": 0.1063, "step": 33897 }, { "epoch": 0.6046088538508186, "grad_norm": 0.25324922800064087, "learning_rate": 2.0262026617411302e-05, "loss": 0.1513, "step": 33898 }, { "epoch": 0.6046266899725323, "grad_norm": 0.26757606863975525, "learning_rate": 2.026049832749208e-05, "loss": 0.1257, "step": 33899 }, { "epoch": 0.604644526094246, "grad_norm": 0.2973572611808777, "learning_rate": 2.025897005594482e-05, "loss": 0.1352, "step": 33900 }, { "epoch": 0.6046623622159598, "grad_norm": 0.31855741143226624, "learning_rate": 2.025744180277542e-05, "loss": 0.173, "step": 33901 }, { "epoch": 0.6046801983376735, "grad_norm": 0.2872883975505829, "learning_rate": 2.025591356798982e-05, "loss": 0.105, "step": 33902 }, { "epoch": 0.6046980344593872, "grad_norm": 0.27869299054145813, "learning_rate": 2.025438535159394e-05, "loss": 0.1346, "step": 33903 }, { "epoch": 0.6047158705811009, "grad_norm": 0.30435711145401, "learning_rate": 2.0252857153593687e-05, "loss": 0.1301, "step": 33904 }, { "epoch": 0.6047337067028146, "grad_norm": 0.25051939487457275, "learning_rate": 2.0251328973995014e-05, "loss": 0.1275, "step": 33905 }, { "epoch": 0.6047515428245283, "grad_norm": 0.2828250825405121, "learning_rate": 2.0249800812803828e-05, "loss": 0.1254, "step": 33906 }, { "epoch": 0.6047693789462419, "grad_norm": 0.2737036347389221, "learning_rate": 2.024827267002605e-05, "loss": 0.1273, "step": 33907 }, { "epoch": 0.6047872150679556, "grad_norm": 0.3362065851688385, "learning_rate": 2.02467445456676e-05, "loss": 0.0723, "step": 33908 }, { "epoch": 0.6048050511896693, "grad_norm": 0.2804470956325531, "learning_rate": 2.0245216439734425e-05, "loss": 0.1172, "step": 33909 }, { "epoch": 0.604822887311383, "grad_norm": 0.33394521474838257, "learning_rate": 2.0243688352232432e-05, "loss": 0.1608, "step": 33910 }, { "epoch": 0.6048407234330967, "grad_norm": 0.25664955377578735, "learning_rate": 2.0242160283167544e-05, "loss": 0.1339, "step": 33911 }, { "epoch": 0.6048585595548104, "grad_norm": 0.25859886407852173, "learning_rate": 2.0240632232545677e-05, "loss": 0.0995, "step": 33912 }, { "epoch": 0.6048763956765241, "grad_norm": 0.24843475222587585, "learning_rate": 2.023910420037277e-05, "loss": 0.1509, "step": 33913 }, { "epoch": 0.6048942317982378, "grad_norm": 0.2508089542388916, "learning_rate": 2.023757618665474e-05, "loss": 0.1652, "step": 33914 }, { "epoch": 0.6049120679199514, "grad_norm": 0.21523480117321014, "learning_rate": 2.0236048191397515e-05, "loss": 0.1239, "step": 33915 }, { "epoch": 0.6049299040416651, "grad_norm": 0.2109798938035965, "learning_rate": 2.0234520214607007e-05, "loss": 0.0884, "step": 33916 }, { "epoch": 0.6049477401633788, "grad_norm": 0.22996476292610168, "learning_rate": 2.0232992256289136e-05, "loss": 0.1049, "step": 33917 }, { "epoch": 0.6049655762850926, "grad_norm": 0.24631816148757935, "learning_rate": 2.0231464316449848e-05, "loss": 0.1028, "step": 33918 }, { "epoch": 0.6049834124068063, "grad_norm": 0.25303933024406433, "learning_rate": 2.0229936395095048e-05, "loss": 0.1351, "step": 33919 }, { "epoch": 0.60500124852852, "grad_norm": 0.3592248857021332, "learning_rate": 2.0228408492230656e-05, "loss": 0.1241, "step": 33920 }, { "epoch": 0.6050190846502337, "grad_norm": 0.21240203082561493, "learning_rate": 2.0226880607862603e-05, "loss": 0.0698, "step": 33921 }, { "epoch": 0.6050369207719474, "grad_norm": 0.28455227613449097, "learning_rate": 2.0225352741996806e-05, "loss": 0.1326, "step": 33922 }, { "epoch": 0.6050547568936611, "grad_norm": 0.1921006143093109, "learning_rate": 2.0223824894639197e-05, "loss": 0.1256, "step": 33923 }, { "epoch": 0.6050725930153747, "grad_norm": 0.23239262402057648, "learning_rate": 2.0222297065795696e-05, "loss": 0.1137, "step": 33924 }, { "epoch": 0.6050904291370884, "grad_norm": 0.19406414031982422, "learning_rate": 2.022076925547221e-05, "loss": 0.0683, "step": 33925 }, { "epoch": 0.6051082652588021, "grad_norm": 0.35909807682037354, "learning_rate": 2.0219241463674682e-05, "loss": 0.1217, "step": 33926 }, { "epoch": 0.6051261013805158, "grad_norm": 0.23722302913665771, "learning_rate": 2.0217713690409027e-05, "loss": 0.0949, "step": 33927 }, { "epoch": 0.6051439375022295, "grad_norm": 0.4289957880973816, "learning_rate": 2.021618593568116e-05, "loss": 0.1334, "step": 33928 }, { "epoch": 0.6051617736239432, "grad_norm": 0.2533237338066101, "learning_rate": 2.021465819949701e-05, "loss": 0.1007, "step": 33929 }, { "epoch": 0.6051796097456569, "grad_norm": 0.25178685784339905, "learning_rate": 2.0213130481862492e-05, "loss": 0.1371, "step": 33930 }, { "epoch": 0.6051974458673706, "grad_norm": 0.2986135184764862, "learning_rate": 2.021160278278354e-05, "loss": 0.1255, "step": 33931 }, { "epoch": 0.6052152819890843, "grad_norm": 0.2928605079650879, "learning_rate": 2.0210075102266075e-05, "loss": 0.1172, "step": 33932 }, { "epoch": 0.6052331181107979, "grad_norm": 0.44925788044929504, "learning_rate": 2.020854744031601e-05, "loss": 0.1825, "step": 33933 }, { "epoch": 0.6052509542325116, "grad_norm": 0.267938494682312, "learning_rate": 2.020701979693926e-05, "loss": 0.1755, "step": 33934 }, { "epoch": 0.6052687903542254, "grad_norm": 0.2114374041557312, "learning_rate": 2.020549217214176e-05, "loss": 0.1189, "step": 33935 }, { "epoch": 0.6052866264759391, "grad_norm": 0.2029339224100113, "learning_rate": 2.0203964565929434e-05, "loss": 0.1328, "step": 33936 }, { "epoch": 0.6053044625976528, "grad_norm": 0.24222591519355774, "learning_rate": 2.0202436978308197e-05, "loss": 0.0997, "step": 33937 }, { "epoch": 0.6053222987193665, "grad_norm": 0.33572790026664734, "learning_rate": 2.0200909409283965e-05, "loss": 0.1658, "step": 33938 }, { "epoch": 0.6053401348410802, "grad_norm": 0.28817689418792725, "learning_rate": 2.0199381858862674e-05, "loss": 0.1063, "step": 33939 }, { "epoch": 0.6053579709627939, "grad_norm": 0.1996411681175232, "learning_rate": 2.0197854327050235e-05, "loss": 0.1061, "step": 33940 }, { "epoch": 0.6053758070845076, "grad_norm": 0.3642538785934448, "learning_rate": 2.019632681385257e-05, "loss": 0.1754, "step": 33941 }, { "epoch": 0.6053936432062212, "grad_norm": 0.24938619136810303, "learning_rate": 2.0194799319275604e-05, "loss": 0.1148, "step": 33942 }, { "epoch": 0.6054114793279349, "grad_norm": 0.21470627188682556, "learning_rate": 2.0193271843325243e-05, "loss": 0.0988, "step": 33943 }, { "epoch": 0.6054293154496486, "grad_norm": 0.25092217326164246, "learning_rate": 2.0191744386007434e-05, "loss": 0.133, "step": 33944 }, { "epoch": 0.6054471515713623, "grad_norm": 0.3918848931789398, "learning_rate": 2.0190216947328085e-05, "loss": 0.104, "step": 33945 }, { "epoch": 0.605464987693076, "grad_norm": 0.2911170423030853, "learning_rate": 2.0188689527293116e-05, "loss": 0.1594, "step": 33946 }, { "epoch": 0.6054828238147897, "grad_norm": 0.22725218534469604, "learning_rate": 2.0187162125908434e-05, "loss": 0.1087, "step": 33947 }, { "epoch": 0.6055006599365034, "grad_norm": 0.30462467670440674, "learning_rate": 2.0185634743179988e-05, "loss": 0.1372, "step": 33948 }, { "epoch": 0.6055184960582171, "grad_norm": 0.33564862608909607, "learning_rate": 2.018410737911368e-05, "loss": 0.1278, "step": 33949 }, { "epoch": 0.6055363321799307, "grad_norm": 0.23036031424999237, "learning_rate": 2.0182580033715436e-05, "loss": 0.1408, "step": 33950 }, { "epoch": 0.6055541683016444, "grad_norm": 0.21163055300712585, "learning_rate": 2.018105270699117e-05, "loss": 0.1238, "step": 33951 }, { "epoch": 0.6055720044233582, "grad_norm": 0.30165475606918335, "learning_rate": 2.017952539894682e-05, "loss": 0.1263, "step": 33952 }, { "epoch": 0.6055898405450719, "grad_norm": 0.3575689494609833, "learning_rate": 2.017799810958829e-05, "loss": 0.1797, "step": 33953 }, { "epoch": 0.6056076766667856, "grad_norm": 0.26383379101753235, "learning_rate": 2.0176470838921506e-05, "loss": 0.1212, "step": 33954 }, { "epoch": 0.6056255127884993, "grad_norm": 0.24447055160999298, "learning_rate": 2.017494358695238e-05, "loss": 0.1275, "step": 33955 }, { "epoch": 0.605643348910213, "grad_norm": 0.2355479598045349, "learning_rate": 2.0173416353686843e-05, "loss": 0.1487, "step": 33956 }, { "epoch": 0.6056611850319267, "grad_norm": 0.22544480860233307, "learning_rate": 2.017188913913081e-05, "loss": 0.1255, "step": 33957 }, { "epoch": 0.6056790211536404, "grad_norm": 0.23199716210365295, "learning_rate": 2.0170361943290204e-05, "loss": 0.1048, "step": 33958 }, { "epoch": 0.605696857275354, "grad_norm": 0.22903956472873688, "learning_rate": 2.0168834766170946e-05, "loss": 0.0951, "step": 33959 }, { "epoch": 0.6057146933970677, "grad_norm": 0.2908589243888855, "learning_rate": 2.0167307607778942e-05, "loss": 0.0983, "step": 33960 }, { "epoch": 0.6057325295187814, "grad_norm": 0.26266583800315857, "learning_rate": 2.0165780468120136e-05, "loss": 0.0999, "step": 33961 }, { "epoch": 0.6057503656404951, "grad_norm": 0.2423059046268463, "learning_rate": 2.0164253347200432e-05, "loss": 0.1278, "step": 33962 }, { "epoch": 0.6057682017622088, "grad_norm": 0.43719765543937683, "learning_rate": 2.0162726245025748e-05, "loss": 0.155, "step": 33963 }, { "epoch": 0.6057860378839225, "grad_norm": 0.2557755410671234, "learning_rate": 2.0161199161602e-05, "loss": 0.159, "step": 33964 }, { "epoch": 0.6058038740056362, "grad_norm": 0.2540648281574249, "learning_rate": 2.015967209693513e-05, "loss": 0.1166, "step": 33965 }, { "epoch": 0.6058217101273499, "grad_norm": 0.3005029559135437, "learning_rate": 2.015814505103104e-05, "loss": 0.1268, "step": 33966 }, { "epoch": 0.6058395462490636, "grad_norm": 0.207055002450943, "learning_rate": 2.0156618023895655e-05, "loss": 0.0992, "step": 33967 }, { "epoch": 0.6058573823707774, "grad_norm": 0.26175299286842346, "learning_rate": 2.0155091015534884e-05, "loss": 0.122, "step": 33968 }, { "epoch": 0.605875218492491, "grad_norm": 0.2624555230140686, "learning_rate": 2.0153564025954653e-05, "loss": 0.1581, "step": 33969 }, { "epoch": 0.6058930546142047, "grad_norm": 0.17795804142951965, "learning_rate": 2.0152037055160884e-05, "loss": 0.108, "step": 33970 }, { "epoch": 0.6059108907359184, "grad_norm": 0.3625028431415558, "learning_rate": 2.0150510103159496e-05, "loss": 0.1378, "step": 33971 }, { "epoch": 0.6059287268576321, "grad_norm": 0.49435219168663025, "learning_rate": 2.0148983169956405e-05, "loss": 0.1431, "step": 33972 }, { "epoch": 0.6059465629793458, "grad_norm": 0.3415945768356323, "learning_rate": 2.0147456255557524e-05, "loss": 0.0993, "step": 33973 }, { "epoch": 0.6059643991010595, "grad_norm": 0.31059375405311584, "learning_rate": 2.0145929359968788e-05, "loss": 0.1254, "step": 33974 }, { "epoch": 0.6059822352227732, "grad_norm": 0.20938971638679504, "learning_rate": 2.0144402483196106e-05, "loss": 0.1153, "step": 33975 }, { "epoch": 0.6060000713444869, "grad_norm": 0.21535004675388336, "learning_rate": 2.014287562524539e-05, "loss": 0.1185, "step": 33976 }, { "epoch": 0.6060179074662005, "grad_norm": 0.2150115668773651, "learning_rate": 2.014134878612257e-05, "loss": 0.1271, "step": 33977 }, { "epoch": 0.6060357435879142, "grad_norm": 0.5401460528373718, "learning_rate": 2.0139821965833554e-05, "loss": 0.192, "step": 33978 }, { "epoch": 0.6060535797096279, "grad_norm": 0.35790112614631653, "learning_rate": 2.0138295164384277e-05, "loss": 0.1438, "step": 33979 }, { "epoch": 0.6060714158313416, "grad_norm": 0.2671012580394745, "learning_rate": 2.0136768381780645e-05, "loss": 0.1448, "step": 33980 }, { "epoch": 0.6060892519530553, "grad_norm": 0.37059590220451355, "learning_rate": 2.013524161802858e-05, "loss": 0.1706, "step": 33981 }, { "epoch": 0.606107088074769, "grad_norm": 0.26011282205581665, "learning_rate": 2.0133714873133985e-05, "loss": 0.1657, "step": 33982 }, { "epoch": 0.6061249241964827, "grad_norm": 0.23580026626586914, "learning_rate": 2.0132188147102807e-05, "loss": 0.122, "step": 33983 }, { "epoch": 0.6061427603181964, "grad_norm": 0.22677592933177948, "learning_rate": 2.013066143994094e-05, "loss": 0.1333, "step": 33984 }, { "epoch": 0.6061605964399102, "grad_norm": 0.2703092694282532, "learning_rate": 2.012913475165432e-05, "loss": 0.1546, "step": 33985 }, { "epoch": 0.6061784325616238, "grad_norm": 0.2766292095184326, "learning_rate": 2.0127608082248843e-05, "loss": 0.1175, "step": 33986 }, { "epoch": 0.6061962686833375, "grad_norm": 0.25880423188209534, "learning_rate": 2.012608143173045e-05, "loss": 0.1167, "step": 33987 }, { "epoch": 0.6062141048050512, "grad_norm": 0.3839293122291565, "learning_rate": 2.012455480010505e-05, "loss": 0.17, "step": 33988 }, { "epoch": 0.6062319409267649, "grad_norm": 0.32370656728744507, "learning_rate": 2.012302818737856e-05, "loss": 0.1364, "step": 33989 }, { "epoch": 0.6062497770484786, "grad_norm": 0.2363174557685852, "learning_rate": 2.0121501593556884e-05, "loss": 0.1344, "step": 33990 }, { "epoch": 0.6062676131701923, "grad_norm": 0.28111395239830017, "learning_rate": 2.011997501864596e-05, "loss": 0.167, "step": 33991 }, { "epoch": 0.606285449291906, "grad_norm": 0.30178555846214294, "learning_rate": 2.0118448462651705e-05, "loss": 0.113, "step": 33992 }, { "epoch": 0.6063032854136197, "grad_norm": 0.3490726053714752, "learning_rate": 2.0116921925580025e-05, "loss": 0.1593, "step": 33993 }, { "epoch": 0.6063211215353334, "grad_norm": 0.307981014251709, "learning_rate": 2.0115395407436848e-05, "loss": 0.1455, "step": 33994 }, { "epoch": 0.606338957657047, "grad_norm": 0.26569628715515137, "learning_rate": 2.0113868908228072e-05, "loss": 0.1808, "step": 33995 }, { "epoch": 0.6063567937787607, "grad_norm": 0.26798880100250244, "learning_rate": 2.0112342427959638e-05, "loss": 0.1418, "step": 33996 }, { "epoch": 0.6063746299004744, "grad_norm": 0.2148667722940445, "learning_rate": 2.0110815966637447e-05, "loss": 0.1472, "step": 33997 }, { "epoch": 0.6063924660221881, "grad_norm": 0.25280389189720154, "learning_rate": 2.010928952426743e-05, "loss": 0.1038, "step": 33998 }, { "epoch": 0.6064103021439018, "grad_norm": 0.2429462969303131, "learning_rate": 2.0107763100855484e-05, "loss": 0.1198, "step": 33999 }, { "epoch": 0.6064281382656155, "grad_norm": 0.30507683753967285, "learning_rate": 2.0106236696407547e-05, "loss": 0.1417, "step": 34000 }, { "epoch": 0.6064281382656155, "eval_loss": 0.1242346316576004, "eval_runtime": 107.0263, "eval_samples_per_second": 9.568, "eval_steps_per_second": 1.598, "step": 34000 }, { "epoch": 0.6064459743873292, "grad_norm": 0.2689165472984314, "learning_rate": 2.0104710310929527e-05, "loss": 0.1506, "step": 34001 }, { "epoch": 0.606463810509043, "grad_norm": 0.31818389892578125, "learning_rate": 2.0103183944427344e-05, "loss": 0.1521, "step": 34002 }, { "epoch": 0.6064816466307567, "grad_norm": 0.29521316289901733, "learning_rate": 2.0101657596906896e-05, "loss": 0.0735, "step": 34003 }, { "epoch": 0.6064994827524703, "grad_norm": 0.3413086235523224, "learning_rate": 2.010013126837413e-05, "loss": 0.1599, "step": 34004 }, { "epoch": 0.606517318874184, "grad_norm": 0.21740014851093292, "learning_rate": 2.0098604958834942e-05, "loss": 0.1141, "step": 34005 }, { "epoch": 0.6065351549958977, "grad_norm": 0.25479939579963684, "learning_rate": 2.0097078668295257e-05, "loss": 0.1308, "step": 34006 }, { "epoch": 0.6065529911176114, "grad_norm": 0.25989586114883423, "learning_rate": 2.009555239676099e-05, "loss": 0.1305, "step": 34007 }, { "epoch": 0.6065708272393251, "grad_norm": 0.2964211106300354, "learning_rate": 2.0094026144238044e-05, "loss": 0.1695, "step": 34008 }, { "epoch": 0.6065886633610388, "grad_norm": 0.23744186758995056, "learning_rate": 2.009249991073236e-05, "loss": 0.1314, "step": 34009 }, { "epoch": 0.6066064994827525, "grad_norm": 0.25520065426826477, "learning_rate": 2.0090973696249838e-05, "loss": 0.1051, "step": 34010 }, { "epoch": 0.6066243356044662, "grad_norm": 0.36073005199432373, "learning_rate": 2.0089447500796395e-05, "loss": 0.1386, "step": 34011 }, { "epoch": 0.6066421717261798, "grad_norm": 0.28548458218574524, "learning_rate": 2.0087921324377944e-05, "loss": 0.0898, "step": 34012 }, { "epoch": 0.6066600078478935, "grad_norm": 0.24678723514080048, "learning_rate": 2.0086395167000414e-05, "loss": 0.1279, "step": 34013 }, { "epoch": 0.6066778439696072, "grad_norm": 0.3923526704311371, "learning_rate": 2.0084869028669717e-05, "loss": 0.1274, "step": 34014 }, { "epoch": 0.6066956800913209, "grad_norm": 0.21978864073753357, "learning_rate": 2.008334290939176e-05, "loss": 0.0898, "step": 34015 }, { "epoch": 0.6067135162130346, "grad_norm": 0.2535965144634247, "learning_rate": 2.008181680917246e-05, "loss": 0.1188, "step": 34016 }, { "epoch": 0.6067313523347483, "grad_norm": 0.22535917162895203, "learning_rate": 2.0080290728017745e-05, "loss": 0.1228, "step": 34017 }, { "epoch": 0.606749188456462, "grad_norm": 0.34669050574302673, "learning_rate": 2.0078764665933515e-05, "loss": 0.1017, "step": 34018 }, { "epoch": 0.6067670245781758, "grad_norm": 0.29563677310943604, "learning_rate": 2.00772386229257e-05, "loss": 0.1188, "step": 34019 }, { "epoch": 0.6067848606998895, "grad_norm": 0.24344807863235474, "learning_rate": 2.007571259900021e-05, "loss": 0.1641, "step": 34020 }, { "epoch": 0.6068026968216031, "grad_norm": 0.24273742735385895, "learning_rate": 2.0074186594162947e-05, "loss": 0.1025, "step": 34021 }, { "epoch": 0.6068205329433168, "grad_norm": 0.3006818890571594, "learning_rate": 2.0072660608419845e-05, "loss": 0.1333, "step": 34022 }, { "epoch": 0.6068383690650305, "grad_norm": 0.25352567434310913, "learning_rate": 2.0071134641776818e-05, "loss": 0.1666, "step": 34023 }, { "epoch": 0.6068562051867442, "grad_norm": 0.34692496061325073, "learning_rate": 2.0069608694239768e-05, "loss": 0.0932, "step": 34024 }, { "epoch": 0.6068740413084579, "grad_norm": 0.26554763317108154, "learning_rate": 2.0068082765814616e-05, "loss": 0.1389, "step": 34025 }, { "epoch": 0.6068918774301716, "grad_norm": 0.1897977888584137, "learning_rate": 2.006655685650728e-05, "loss": 0.1423, "step": 34026 }, { "epoch": 0.6069097135518853, "grad_norm": 0.29043421149253845, "learning_rate": 2.0065030966323678e-05, "loss": 0.1189, "step": 34027 }, { "epoch": 0.606927549673599, "grad_norm": 0.3475072383880615, "learning_rate": 2.006350509526972e-05, "loss": 0.1736, "step": 34028 }, { "epoch": 0.6069453857953127, "grad_norm": 0.28601470589637756, "learning_rate": 2.0061979243351313e-05, "loss": 0.1087, "step": 34029 }, { "epoch": 0.6069632219170263, "grad_norm": 0.27227428555488586, "learning_rate": 2.006045341057439e-05, "loss": 0.1418, "step": 34030 }, { "epoch": 0.60698105803874, "grad_norm": 0.23274704813957214, "learning_rate": 2.0058927596944853e-05, "loss": 0.0936, "step": 34031 }, { "epoch": 0.6069988941604537, "grad_norm": 0.2962673604488373, "learning_rate": 2.0057401802468618e-05, "loss": 0.1494, "step": 34032 }, { "epoch": 0.6070167302821674, "grad_norm": 0.22344911098480225, "learning_rate": 2.0055876027151604e-05, "loss": 0.1089, "step": 34033 }, { "epoch": 0.6070345664038811, "grad_norm": 0.33765116333961487, "learning_rate": 2.005435027099971e-05, "loss": 0.1437, "step": 34034 }, { "epoch": 0.6070524025255948, "grad_norm": 0.22132675349712372, "learning_rate": 2.005282453401888e-05, "loss": 0.1363, "step": 34035 }, { "epoch": 0.6070702386473086, "grad_norm": 0.29878923296928406, "learning_rate": 2.0051298816215002e-05, "loss": 0.1148, "step": 34036 }, { "epoch": 0.6070880747690223, "grad_norm": 0.3090602457523346, "learning_rate": 2.0049773117594003e-05, "loss": 0.1295, "step": 34037 }, { "epoch": 0.607105910890736, "grad_norm": 0.275724858045578, "learning_rate": 2.0048247438161783e-05, "loss": 0.1388, "step": 34038 }, { "epoch": 0.6071237470124496, "grad_norm": 0.19276633858680725, "learning_rate": 2.004672177792427e-05, "loss": 0.109, "step": 34039 }, { "epoch": 0.6071415831341633, "grad_norm": 0.3063611090183258, "learning_rate": 2.004519613688738e-05, "loss": 0.1526, "step": 34040 }, { "epoch": 0.607159419255877, "grad_norm": 0.2948013246059418, "learning_rate": 2.0043670515057022e-05, "loss": 0.1926, "step": 34041 }, { "epoch": 0.6071772553775907, "grad_norm": 0.20293796062469482, "learning_rate": 2.00421449124391e-05, "loss": 0.1019, "step": 34042 }, { "epoch": 0.6071950914993044, "grad_norm": 0.26538240909576416, "learning_rate": 2.004061932903954e-05, "loss": 0.1449, "step": 34043 }, { "epoch": 0.6072129276210181, "grad_norm": 0.27516084909439087, "learning_rate": 2.003909376486426e-05, "loss": 0.1479, "step": 34044 }, { "epoch": 0.6072307637427318, "grad_norm": 0.23916086554527283, "learning_rate": 2.0037568219919157e-05, "loss": 0.0884, "step": 34045 }, { "epoch": 0.6072485998644455, "grad_norm": 0.24960020184516907, "learning_rate": 2.003604269421016e-05, "loss": 0.1873, "step": 34046 }, { "epoch": 0.6072664359861591, "grad_norm": 0.17460691928863525, "learning_rate": 2.0034517187743165e-05, "loss": 0.0854, "step": 34047 }, { "epoch": 0.6072842721078728, "grad_norm": 0.23467344045639038, "learning_rate": 2.0032991700524106e-05, "loss": 0.1356, "step": 34048 }, { "epoch": 0.6073021082295865, "grad_norm": 0.2633321285247803, "learning_rate": 2.003146623255889e-05, "loss": 0.067, "step": 34049 }, { "epoch": 0.6073199443513002, "grad_norm": 0.27493250370025635, "learning_rate": 2.0029940783853423e-05, "loss": 0.1435, "step": 34050 }, { "epoch": 0.6073377804730139, "grad_norm": 0.3249135911464691, "learning_rate": 2.0028415354413615e-05, "loss": 0.1388, "step": 34051 }, { "epoch": 0.6073556165947276, "grad_norm": 0.3141963481903076, "learning_rate": 2.0026889944245397e-05, "loss": 0.1225, "step": 34052 }, { "epoch": 0.6073734527164414, "grad_norm": 0.21605466306209564, "learning_rate": 2.0025364553354666e-05, "loss": 0.0989, "step": 34053 }, { "epoch": 0.6073912888381551, "grad_norm": 0.279005229473114, "learning_rate": 2.002383918174734e-05, "loss": 0.1279, "step": 34054 }, { "epoch": 0.6074091249598688, "grad_norm": 0.28412842750549316, "learning_rate": 2.0022313829429328e-05, "loss": 0.1607, "step": 34055 }, { "epoch": 0.6074269610815825, "grad_norm": 0.2515621781349182, "learning_rate": 2.002078849640655e-05, "loss": 0.0837, "step": 34056 }, { "epoch": 0.6074447972032961, "grad_norm": 0.2768721878528595, "learning_rate": 2.001926318268492e-05, "loss": 0.1623, "step": 34057 }, { "epoch": 0.6074626333250098, "grad_norm": 0.27684301137924194, "learning_rate": 2.001773788827035e-05, "loss": 0.096, "step": 34058 }, { "epoch": 0.6074804694467235, "grad_norm": 0.34042295813560486, "learning_rate": 2.001621261316874e-05, "loss": 0.1516, "step": 34059 }, { "epoch": 0.6074983055684372, "grad_norm": 0.24979668855667114, "learning_rate": 2.0014687357386007e-05, "loss": 0.1554, "step": 34060 }, { "epoch": 0.6075161416901509, "grad_norm": 0.2412799447774887, "learning_rate": 2.0013162120928074e-05, "loss": 0.152, "step": 34061 }, { "epoch": 0.6075339778118646, "grad_norm": 0.23882125318050385, "learning_rate": 2.001163690380085e-05, "loss": 0.1459, "step": 34062 }, { "epoch": 0.6075518139335783, "grad_norm": 0.2327205240726471, "learning_rate": 2.0010111706010246e-05, "loss": 0.1369, "step": 34063 }, { "epoch": 0.607569650055292, "grad_norm": 0.25749075412750244, "learning_rate": 2.000858652756216e-05, "loss": 0.093, "step": 34064 }, { "epoch": 0.6075874861770056, "grad_norm": 0.24764353036880493, "learning_rate": 2.0007061368462527e-05, "loss": 0.146, "step": 34065 }, { "epoch": 0.6076053222987193, "grad_norm": 0.2832137942314148, "learning_rate": 2.0005536228717248e-05, "loss": 0.1352, "step": 34066 }, { "epoch": 0.607623158420433, "grad_norm": 0.2774147689342499, "learning_rate": 2.000401110833223e-05, "loss": 0.1892, "step": 34067 }, { "epoch": 0.6076409945421467, "grad_norm": 0.31670087575912476, "learning_rate": 2.0002486007313386e-05, "loss": 0.1522, "step": 34068 }, { "epoch": 0.6076588306638605, "grad_norm": 0.20794054865837097, "learning_rate": 2.0000960925666645e-05, "loss": 0.128, "step": 34069 }, { "epoch": 0.6076766667855742, "grad_norm": 0.25698500871658325, "learning_rate": 1.9999435863397904e-05, "loss": 0.1666, "step": 34070 }, { "epoch": 0.6076945029072879, "grad_norm": 0.2686476409435272, "learning_rate": 1.999791082051308e-05, "loss": 0.1747, "step": 34071 }, { "epoch": 0.6077123390290016, "grad_norm": 0.2534348666667938, "learning_rate": 1.9996385797018067e-05, "loss": 0.1083, "step": 34072 }, { "epoch": 0.6077301751507153, "grad_norm": 0.4016781151294708, "learning_rate": 1.9994860792918802e-05, "loss": 0.1703, "step": 34073 }, { "epoch": 0.607748011272429, "grad_norm": 0.2587624192237854, "learning_rate": 1.9993335808221182e-05, "loss": 0.1367, "step": 34074 }, { "epoch": 0.6077658473941426, "grad_norm": 0.3383089601993561, "learning_rate": 1.9991810842931124e-05, "loss": 0.1752, "step": 34075 }, { "epoch": 0.6077836835158563, "grad_norm": 0.26160141825675964, "learning_rate": 1.999028589705454e-05, "loss": 0.0497, "step": 34076 }, { "epoch": 0.60780151963757, "grad_norm": 0.2654273808002472, "learning_rate": 1.9988760970597322e-05, "loss": 0.1649, "step": 34077 }, { "epoch": 0.6078193557592837, "grad_norm": 0.29998132586479187, "learning_rate": 1.998723606356541e-05, "loss": 0.115, "step": 34078 }, { "epoch": 0.6078371918809974, "grad_norm": 0.2516030967235565, "learning_rate": 1.998571117596471e-05, "loss": 0.1638, "step": 34079 }, { "epoch": 0.6078550280027111, "grad_norm": 0.24922418594360352, "learning_rate": 1.9984186307801113e-05, "loss": 0.1112, "step": 34080 }, { "epoch": 0.6078728641244248, "grad_norm": 0.3222910463809967, "learning_rate": 1.9982661459080542e-05, "loss": 0.1405, "step": 34081 }, { "epoch": 0.6078907002461384, "grad_norm": 0.286417156457901, "learning_rate": 1.998113662980891e-05, "loss": 0.1348, "step": 34082 }, { "epoch": 0.6079085363678521, "grad_norm": 0.3120495676994324, "learning_rate": 1.997961181999213e-05, "loss": 0.1489, "step": 34083 }, { "epoch": 0.6079263724895658, "grad_norm": 0.19435276091098785, "learning_rate": 1.997808702963611e-05, "loss": 0.1184, "step": 34084 }, { "epoch": 0.6079442086112795, "grad_norm": 0.21123048663139343, "learning_rate": 1.9976562258746746e-05, "loss": 0.1583, "step": 34085 }, { "epoch": 0.6079620447329933, "grad_norm": 0.36681869626045227, "learning_rate": 1.9975037507329976e-05, "loss": 0.127, "step": 34086 }, { "epoch": 0.607979880854707, "grad_norm": 0.2873871326446533, "learning_rate": 1.9973512775391694e-05, "loss": 0.1262, "step": 34087 }, { "epoch": 0.6079977169764207, "grad_norm": 0.24000637233257294, "learning_rate": 1.997198806293781e-05, "loss": 0.1069, "step": 34088 }, { "epoch": 0.6080155530981344, "grad_norm": 0.2373301237821579, "learning_rate": 1.997046336997424e-05, "loss": 0.0747, "step": 34089 }, { "epoch": 0.6080333892198481, "grad_norm": 0.24301819503307343, "learning_rate": 1.9968938696506876e-05, "loss": 0.1761, "step": 34090 }, { "epoch": 0.6080512253415618, "grad_norm": 0.7439576387405396, "learning_rate": 1.996741404254166e-05, "loss": 0.1731, "step": 34091 }, { "epoch": 0.6080690614632754, "grad_norm": 0.2787877917289734, "learning_rate": 1.9965889408084483e-05, "loss": 0.1359, "step": 34092 }, { "epoch": 0.6080868975849891, "grad_norm": 0.2211727648973465, "learning_rate": 1.996436479314126e-05, "loss": 0.0982, "step": 34093 }, { "epoch": 0.6081047337067028, "grad_norm": 0.23949767649173737, "learning_rate": 1.9962840197717887e-05, "loss": 0.1078, "step": 34094 }, { "epoch": 0.6081225698284165, "grad_norm": 0.2994820773601532, "learning_rate": 1.9961315621820286e-05, "loss": 0.1266, "step": 34095 }, { "epoch": 0.6081404059501302, "grad_norm": 0.22102081775665283, "learning_rate": 1.9959791065454376e-05, "loss": 0.1099, "step": 34096 }, { "epoch": 0.6081582420718439, "grad_norm": 0.25049301981925964, "learning_rate": 1.9958266528626058e-05, "loss": 0.1639, "step": 34097 }, { "epoch": 0.6081760781935576, "grad_norm": 0.3949906826019287, "learning_rate": 1.9956742011341225e-05, "loss": 0.1561, "step": 34098 }, { "epoch": 0.6081939143152713, "grad_norm": 0.24044504761695862, "learning_rate": 1.9955217513605815e-05, "loss": 0.1068, "step": 34099 }, { "epoch": 0.6082117504369849, "grad_norm": 0.270826131105423, "learning_rate": 1.9953693035425726e-05, "loss": 0.1076, "step": 34100 }, { "epoch": 0.6082295865586986, "grad_norm": 0.25290408730506897, "learning_rate": 1.9952168576806856e-05, "loss": 0.0949, "step": 34101 }, { "epoch": 0.6082474226804123, "grad_norm": 0.22967013716697693, "learning_rate": 1.9950644137755132e-05, "loss": 0.1298, "step": 34102 }, { "epoch": 0.6082652588021261, "grad_norm": 0.2955591380596161, "learning_rate": 1.9949119718276442e-05, "loss": 0.2006, "step": 34103 }, { "epoch": 0.6082830949238398, "grad_norm": 0.27504876255989075, "learning_rate": 1.9947595318376722e-05, "loss": 0.137, "step": 34104 }, { "epoch": 0.6083009310455535, "grad_norm": 0.30607736110687256, "learning_rate": 1.9946070938061867e-05, "loss": 0.135, "step": 34105 }, { "epoch": 0.6083187671672672, "grad_norm": 0.22454895079135895, "learning_rate": 1.9944546577337787e-05, "loss": 0.1128, "step": 34106 }, { "epoch": 0.6083366032889809, "grad_norm": 0.2026168256998062, "learning_rate": 1.994302223621038e-05, "loss": 0.1249, "step": 34107 }, { "epoch": 0.6083544394106946, "grad_norm": 0.18978428840637207, "learning_rate": 1.9941497914685574e-05, "loss": 0.0853, "step": 34108 }, { "epoch": 0.6083722755324082, "grad_norm": 0.2684705853462219, "learning_rate": 1.9939973612769267e-05, "loss": 0.1369, "step": 34109 }, { "epoch": 0.6083901116541219, "grad_norm": 0.330782413482666, "learning_rate": 1.9938449330467373e-05, "loss": 0.1358, "step": 34110 }, { "epoch": 0.6084079477758356, "grad_norm": 0.27003681659698486, "learning_rate": 1.9936925067785787e-05, "loss": 0.1153, "step": 34111 }, { "epoch": 0.6084257838975493, "grad_norm": 0.22729693353176117, "learning_rate": 1.9935400824730437e-05, "loss": 0.0943, "step": 34112 }, { "epoch": 0.608443620019263, "grad_norm": 0.2702178955078125, "learning_rate": 1.993387660130723e-05, "loss": 0.0668, "step": 34113 }, { "epoch": 0.6084614561409767, "grad_norm": 0.24327760934829712, "learning_rate": 1.9932352397522057e-05, "loss": 0.1077, "step": 34114 }, { "epoch": 0.6084792922626904, "grad_norm": 0.2638218402862549, "learning_rate": 1.993082821338084e-05, "loss": 0.0992, "step": 34115 }, { "epoch": 0.6084971283844041, "grad_norm": 0.4522806704044342, "learning_rate": 1.9929304048889475e-05, "loss": 0.1623, "step": 34116 }, { "epoch": 0.6085149645061178, "grad_norm": 0.354140043258667, "learning_rate": 1.992777990405389e-05, "loss": 0.2001, "step": 34117 }, { "epoch": 0.6085328006278314, "grad_norm": 0.25301435589790344, "learning_rate": 1.9926255778879978e-05, "loss": 0.067, "step": 34118 }, { "epoch": 0.6085506367495451, "grad_norm": 0.27323105931282043, "learning_rate": 1.9924731673373657e-05, "loss": 0.1098, "step": 34119 }, { "epoch": 0.6085684728712589, "grad_norm": 0.2349899411201477, "learning_rate": 1.9923207587540814e-05, "loss": 0.103, "step": 34120 }, { "epoch": 0.6085863089929726, "grad_norm": 0.3228157162666321, "learning_rate": 1.9921683521387386e-05, "loss": 0.1176, "step": 34121 }, { "epoch": 0.6086041451146863, "grad_norm": 0.3161437511444092, "learning_rate": 1.9920159474919255e-05, "loss": 0.111, "step": 34122 }, { "epoch": 0.6086219812364, "grad_norm": 0.31503742933273315, "learning_rate": 1.9918635448142352e-05, "loss": 0.1351, "step": 34123 }, { "epoch": 0.6086398173581137, "grad_norm": 0.21428050100803375, "learning_rate": 1.991711144106256e-05, "loss": 0.1409, "step": 34124 }, { "epoch": 0.6086576534798274, "grad_norm": 0.24637004733085632, "learning_rate": 1.991558745368581e-05, "loss": 0.1115, "step": 34125 }, { "epoch": 0.608675489601541, "grad_norm": 0.2616561949253082, "learning_rate": 1.9914063486018e-05, "loss": 0.1417, "step": 34126 }, { "epoch": 0.6086933257232547, "grad_norm": 0.23726029694080353, "learning_rate": 1.9912539538065038e-05, "loss": 0.0882, "step": 34127 }, { "epoch": 0.6087111618449684, "grad_norm": 0.22764617204666138, "learning_rate": 1.9911015609832823e-05, "loss": 0.152, "step": 34128 }, { "epoch": 0.6087289979666821, "grad_norm": 0.18785883486270905, "learning_rate": 1.9909491701327266e-05, "loss": 0.1016, "step": 34129 }, { "epoch": 0.6087468340883958, "grad_norm": 0.21322157979011536, "learning_rate": 1.9907967812554284e-05, "loss": 0.1329, "step": 34130 }, { "epoch": 0.6087646702101095, "grad_norm": 0.26051679253578186, "learning_rate": 1.990644394351978e-05, "loss": 0.1578, "step": 34131 }, { "epoch": 0.6087825063318232, "grad_norm": 0.2870309352874756, "learning_rate": 1.9904920094229655e-05, "loss": 0.1322, "step": 34132 }, { "epoch": 0.6088003424535369, "grad_norm": 0.250116765499115, "learning_rate": 1.9903396264689813e-05, "loss": 0.1447, "step": 34133 }, { "epoch": 0.6088181785752506, "grad_norm": 0.30739596486091614, "learning_rate": 1.9901872454906176e-05, "loss": 0.2093, "step": 34134 }, { "epoch": 0.6088360146969642, "grad_norm": 0.2513252794742584, "learning_rate": 1.9900348664884642e-05, "loss": 0.1389, "step": 34135 }, { "epoch": 0.6088538508186779, "grad_norm": 0.26735758781433105, "learning_rate": 1.9898824894631116e-05, "loss": 0.1346, "step": 34136 }, { "epoch": 0.6088716869403917, "grad_norm": 0.3010508418083191, "learning_rate": 1.9897301144151502e-05, "loss": 0.1581, "step": 34137 }, { "epoch": 0.6088895230621054, "grad_norm": 0.27822795510292053, "learning_rate": 1.9895777413451717e-05, "loss": 0.0965, "step": 34138 }, { "epoch": 0.6089073591838191, "grad_norm": 0.34074705839157104, "learning_rate": 1.989425370253766e-05, "loss": 0.1304, "step": 34139 }, { "epoch": 0.6089251953055328, "grad_norm": 0.2117132693529129, "learning_rate": 1.9892730011415245e-05, "loss": 0.1317, "step": 34140 }, { "epoch": 0.6089430314272465, "grad_norm": 0.30902761220932007, "learning_rate": 1.989120634009037e-05, "loss": 0.1181, "step": 34141 }, { "epoch": 0.6089608675489602, "grad_norm": 0.2999575138092041, "learning_rate": 1.988968268856893e-05, "loss": 0.1224, "step": 34142 }, { "epoch": 0.6089787036706739, "grad_norm": 0.35089242458343506, "learning_rate": 1.9888159056856858e-05, "loss": 0.1854, "step": 34143 }, { "epoch": 0.6089965397923875, "grad_norm": 0.2679131031036377, "learning_rate": 1.9886635444960044e-05, "loss": 0.1278, "step": 34144 }, { "epoch": 0.6090143759141012, "grad_norm": 0.42240259051322937, "learning_rate": 1.9885111852884404e-05, "loss": 0.1643, "step": 34145 }, { "epoch": 0.6090322120358149, "grad_norm": 0.2190408706665039, "learning_rate": 1.988358828063582e-05, "loss": 0.1285, "step": 34146 }, { "epoch": 0.6090500481575286, "grad_norm": 0.24016138911247253, "learning_rate": 1.9882064728220227e-05, "loss": 0.1616, "step": 34147 }, { "epoch": 0.6090678842792423, "grad_norm": 0.23907403647899628, "learning_rate": 1.9880541195643522e-05, "loss": 0.1572, "step": 34148 }, { "epoch": 0.609085720400956, "grad_norm": 0.254570871591568, "learning_rate": 1.9879017682911603e-05, "loss": 0.0794, "step": 34149 }, { "epoch": 0.6091035565226697, "grad_norm": 0.30045750737190247, "learning_rate": 1.9877494190030376e-05, "loss": 0.1975, "step": 34150 }, { "epoch": 0.6091213926443834, "grad_norm": 0.30831149220466614, "learning_rate": 1.9875970717005755e-05, "loss": 0.1631, "step": 34151 }, { "epoch": 0.609139228766097, "grad_norm": 0.3792189359664917, "learning_rate": 1.9874447263843644e-05, "loss": 0.2206, "step": 34152 }, { "epoch": 0.6091570648878107, "grad_norm": 0.2211562693119049, "learning_rate": 1.9872923830549944e-05, "loss": 0.0924, "step": 34153 }, { "epoch": 0.6091749010095245, "grad_norm": 0.2657011151313782, "learning_rate": 1.9871400417130564e-05, "loss": 0.1238, "step": 34154 }, { "epoch": 0.6091927371312382, "grad_norm": 0.311235636472702, "learning_rate": 1.98698770235914e-05, "loss": 0.1411, "step": 34155 }, { "epoch": 0.6092105732529519, "grad_norm": 0.2832774817943573, "learning_rate": 1.986835364993837e-05, "loss": 0.1304, "step": 34156 }, { "epoch": 0.6092284093746656, "grad_norm": 0.25852981209754944, "learning_rate": 1.9866830296177372e-05, "loss": 0.119, "step": 34157 }, { "epoch": 0.6092462454963793, "grad_norm": 0.350294828414917, "learning_rate": 1.986530696231432e-05, "loss": 0.1365, "step": 34158 }, { "epoch": 0.609264081618093, "grad_norm": 0.2197064906358719, "learning_rate": 1.9863783648355096e-05, "loss": 0.0999, "step": 34159 }, { "epoch": 0.6092819177398067, "grad_norm": 0.183275043964386, "learning_rate": 1.9862260354305627e-05, "loss": 0.1393, "step": 34160 }, { "epoch": 0.6092997538615204, "grad_norm": 0.2630709409713745, "learning_rate": 1.986073708017182e-05, "loss": 0.0853, "step": 34161 }, { "epoch": 0.609317589983234, "grad_norm": 0.34906816482543945, "learning_rate": 1.985921382595957e-05, "loss": 0.1438, "step": 34162 }, { "epoch": 0.6093354261049477, "grad_norm": 0.2369636744260788, "learning_rate": 1.9857690591674768e-05, "loss": 0.1259, "step": 34163 }, { "epoch": 0.6093532622266614, "grad_norm": 0.350037157535553, "learning_rate": 1.9856167377323347e-05, "loss": 0.195, "step": 34164 }, { "epoch": 0.6093710983483751, "grad_norm": 0.18668416142463684, "learning_rate": 1.9854644182911193e-05, "loss": 0.1089, "step": 34165 }, { "epoch": 0.6093889344700888, "grad_norm": 0.21468549966812134, "learning_rate": 1.985312100844422e-05, "loss": 0.1294, "step": 34166 }, { "epoch": 0.6094067705918025, "grad_norm": 0.35791632533073425, "learning_rate": 1.9851597853928327e-05, "loss": 0.136, "step": 34167 }, { "epoch": 0.6094246067135162, "grad_norm": 0.239421084523201, "learning_rate": 1.985007471936941e-05, "loss": 0.1353, "step": 34168 }, { "epoch": 0.6094424428352299, "grad_norm": 0.2497035413980484, "learning_rate": 1.9848551604773387e-05, "loss": 0.0773, "step": 34169 }, { "epoch": 0.6094602789569437, "grad_norm": 0.22092542052268982, "learning_rate": 1.9847028510146167e-05, "loss": 0.1334, "step": 34170 }, { "epoch": 0.6094781150786573, "grad_norm": 0.22181285917758942, "learning_rate": 1.984550543549363e-05, "loss": 0.1151, "step": 34171 }, { "epoch": 0.609495951200371, "grad_norm": 0.3666284382343292, "learning_rate": 1.9843982380821692e-05, "loss": 0.1349, "step": 34172 }, { "epoch": 0.6095137873220847, "grad_norm": 0.26535218954086304, "learning_rate": 1.9842459346136273e-05, "loss": 0.1896, "step": 34173 }, { "epoch": 0.6095316234437984, "grad_norm": 0.3155516982078552, "learning_rate": 1.9840936331443257e-05, "loss": 0.1502, "step": 34174 }, { "epoch": 0.6095494595655121, "grad_norm": 0.24373160302639008, "learning_rate": 1.9839413336748557e-05, "loss": 0.1228, "step": 34175 }, { "epoch": 0.6095672956872258, "grad_norm": 0.2694872319698334, "learning_rate": 1.983789036205806e-05, "loss": 0.1657, "step": 34176 }, { "epoch": 0.6095851318089395, "grad_norm": 0.2525143325328827, "learning_rate": 1.98363674073777e-05, "loss": 0.1219, "step": 34177 }, { "epoch": 0.6096029679306532, "grad_norm": 0.3405756950378418, "learning_rate": 1.9834844472713352e-05, "loss": 0.186, "step": 34178 }, { "epoch": 0.6096208040523668, "grad_norm": 0.16753199696540833, "learning_rate": 1.983332155807094e-05, "loss": 0.0856, "step": 34179 }, { "epoch": 0.6096386401740805, "grad_norm": 0.21299999952316284, "learning_rate": 1.9831798663456354e-05, "loss": 0.1491, "step": 34180 }, { "epoch": 0.6096564762957942, "grad_norm": 0.193110853433609, "learning_rate": 1.9830275788875493e-05, "loss": 0.0893, "step": 34181 }, { "epoch": 0.6096743124175079, "grad_norm": 0.23878173530101776, "learning_rate": 1.982875293433428e-05, "loss": 0.1487, "step": 34182 }, { "epoch": 0.6096921485392216, "grad_norm": 0.26100605726242065, "learning_rate": 1.9827230099838606e-05, "loss": 0.1289, "step": 34183 }, { "epoch": 0.6097099846609353, "grad_norm": 0.27328264713287354, "learning_rate": 1.9825707285394367e-05, "loss": 0.1483, "step": 34184 }, { "epoch": 0.609727820782649, "grad_norm": 0.30536216497421265, "learning_rate": 1.982418449100748e-05, "loss": 0.1395, "step": 34185 }, { "epoch": 0.6097456569043627, "grad_norm": 0.27769824862480164, "learning_rate": 1.982266171668384e-05, "loss": 0.1278, "step": 34186 }, { "epoch": 0.6097634930260765, "grad_norm": 0.1696663647890091, "learning_rate": 1.982113896242935e-05, "loss": 0.0707, "step": 34187 }, { "epoch": 0.6097813291477902, "grad_norm": 0.2461685836315155, "learning_rate": 1.981961622824992e-05, "loss": 0.0882, "step": 34188 }, { "epoch": 0.6097991652695038, "grad_norm": 0.3408156931400299, "learning_rate": 1.9818093514151435e-05, "loss": 0.0945, "step": 34189 }, { "epoch": 0.6098170013912175, "grad_norm": 0.3597780466079712, "learning_rate": 1.9816570820139818e-05, "loss": 0.1091, "step": 34190 }, { "epoch": 0.6098348375129312, "grad_norm": 0.1795509308576584, "learning_rate": 1.9815048146220967e-05, "loss": 0.1037, "step": 34191 }, { "epoch": 0.6098526736346449, "grad_norm": 0.22486886382102966, "learning_rate": 1.981352549240077e-05, "loss": 0.0914, "step": 34192 }, { "epoch": 0.6098705097563586, "grad_norm": 0.30734267830848694, "learning_rate": 1.9812002858685147e-05, "loss": 0.1615, "step": 34193 }, { "epoch": 0.6098883458780723, "grad_norm": 0.2527114450931549, "learning_rate": 1.9810480245079982e-05, "loss": 0.1404, "step": 34194 }, { "epoch": 0.609906181999786, "grad_norm": 0.23037639260292053, "learning_rate": 1.98089576515912e-05, "loss": 0.0911, "step": 34195 }, { "epoch": 0.6099240181214997, "grad_norm": 0.24946050345897675, "learning_rate": 1.980743507822469e-05, "loss": 0.1245, "step": 34196 }, { "epoch": 0.6099418542432133, "grad_norm": 0.2214963585138321, "learning_rate": 1.9805912524986354e-05, "loss": 0.129, "step": 34197 }, { "epoch": 0.609959690364927, "grad_norm": 0.22918926179409027, "learning_rate": 1.9804389991882086e-05, "loss": 0.1153, "step": 34198 }, { "epoch": 0.6099775264866407, "grad_norm": 0.35305845737457275, "learning_rate": 1.98028674789178e-05, "loss": 0.115, "step": 34199 }, { "epoch": 0.6099953626083544, "grad_norm": 0.2801823318004608, "learning_rate": 1.9801344986099403e-05, "loss": 0.1569, "step": 34200 }, { "epoch": 0.6100131987300681, "grad_norm": 0.22940704226493835, "learning_rate": 1.9799822513432785e-05, "loss": 0.1385, "step": 34201 }, { "epoch": 0.6100310348517818, "grad_norm": 0.34577682614326477, "learning_rate": 1.9798300060923847e-05, "loss": 0.1282, "step": 34202 }, { "epoch": 0.6100488709734955, "grad_norm": 0.2817285358905792, "learning_rate": 1.97967776285785e-05, "loss": 0.1624, "step": 34203 }, { "epoch": 0.6100667070952093, "grad_norm": 0.3031929135322571, "learning_rate": 1.9795255216402637e-05, "loss": 0.1474, "step": 34204 }, { "epoch": 0.610084543216923, "grad_norm": 0.19765150547027588, "learning_rate": 1.9793732824402166e-05, "loss": 0.1099, "step": 34205 }, { "epoch": 0.6101023793386366, "grad_norm": 0.31663721799850464, "learning_rate": 1.9792210452582983e-05, "loss": 0.1244, "step": 34206 }, { "epoch": 0.6101202154603503, "grad_norm": 0.2439422607421875, "learning_rate": 1.9790688100950983e-05, "loss": 0.1514, "step": 34207 }, { "epoch": 0.610138051582064, "grad_norm": 0.2734171450138092, "learning_rate": 1.9789165769512083e-05, "loss": 0.1354, "step": 34208 }, { "epoch": 0.6101558877037777, "grad_norm": 0.23721948266029358, "learning_rate": 1.9787643458272178e-05, "loss": 0.1209, "step": 34209 }, { "epoch": 0.6101737238254914, "grad_norm": 0.2097400724887848, "learning_rate": 1.978612116723717e-05, "loss": 0.1291, "step": 34210 }, { "epoch": 0.6101915599472051, "grad_norm": 0.23914022743701935, "learning_rate": 1.9784598896412943e-05, "loss": 0.1679, "step": 34211 }, { "epoch": 0.6102093960689188, "grad_norm": 0.27593016624450684, "learning_rate": 1.9783076645805422e-05, "loss": 0.1552, "step": 34212 }, { "epoch": 0.6102272321906325, "grad_norm": 0.27197569608688354, "learning_rate": 1.9781554415420493e-05, "loss": 0.127, "step": 34213 }, { "epoch": 0.6102450683123462, "grad_norm": 0.22706715762615204, "learning_rate": 1.978003220526407e-05, "loss": 0.1118, "step": 34214 }, { "epoch": 0.6102629044340598, "grad_norm": 0.3037537634372711, "learning_rate": 1.9778510015342034e-05, "loss": 0.1413, "step": 34215 }, { "epoch": 0.6102807405557735, "grad_norm": 0.2389712780714035, "learning_rate": 1.9776987845660305e-05, "loss": 0.1133, "step": 34216 }, { "epoch": 0.6102985766774872, "grad_norm": 0.23319010436534882, "learning_rate": 1.9775465696224777e-05, "loss": 0.1128, "step": 34217 }, { "epoch": 0.6103164127992009, "grad_norm": 0.2975747585296631, "learning_rate": 1.9773943567041346e-05, "loss": 0.0963, "step": 34218 }, { "epoch": 0.6103342489209146, "grad_norm": 0.25807785987854004, "learning_rate": 1.977242145811591e-05, "loss": 0.1057, "step": 34219 }, { "epoch": 0.6103520850426283, "grad_norm": 0.24133466184139252, "learning_rate": 1.9770899369454377e-05, "loss": 0.1077, "step": 34220 }, { "epoch": 0.6103699211643421, "grad_norm": 0.19542334973812103, "learning_rate": 1.9769377301062643e-05, "loss": 0.0914, "step": 34221 }, { "epoch": 0.6103877572860558, "grad_norm": 0.3509823977947235, "learning_rate": 1.976785525294661e-05, "loss": 0.1649, "step": 34222 }, { "epoch": 0.6104055934077695, "grad_norm": 0.27924787998199463, "learning_rate": 1.9766333225112183e-05, "loss": 0.0883, "step": 34223 }, { "epoch": 0.6104234295294831, "grad_norm": 0.31095796823501587, "learning_rate": 1.976481121756524e-05, "loss": 0.1443, "step": 34224 }, { "epoch": 0.6104412656511968, "grad_norm": 0.29152435064315796, "learning_rate": 1.9763289230311712e-05, "loss": 0.0874, "step": 34225 }, { "epoch": 0.6104591017729105, "grad_norm": 0.2683859169483185, "learning_rate": 1.976176726335748e-05, "loss": 0.127, "step": 34226 }, { "epoch": 0.6104769378946242, "grad_norm": 0.26801446080207825, "learning_rate": 1.9760245316708445e-05, "loss": 0.1143, "step": 34227 }, { "epoch": 0.6104947740163379, "grad_norm": 0.27824270725250244, "learning_rate": 1.9758723390370503e-05, "loss": 0.1502, "step": 34228 }, { "epoch": 0.6105126101380516, "grad_norm": 0.3395063281059265, "learning_rate": 1.9757201484349567e-05, "loss": 0.1607, "step": 34229 }, { "epoch": 0.6105304462597653, "grad_norm": 0.23764997720718384, "learning_rate": 1.9755679598651534e-05, "loss": 0.1029, "step": 34230 }, { "epoch": 0.610548282381479, "grad_norm": 0.4214462637901306, "learning_rate": 1.9754157733282298e-05, "loss": 0.1329, "step": 34231 }, { "epoch": 0.6105661185031926, "grad_norm": 0.3583911955356598, "learning_rate": 1.975263588824775e-05, "loss": 0.1016, "step": 34232 }, { "epoch": 0.6105839546249063, "grad_norm": 0.25084757804870605, "learning_rate": 1.97511140635538e-05, "loss": 0.1512, "step": 34233 }, { "epoch": 0.61060179074662, "grad_norm": 0.28068971633911133, "learning_rate": 1.974959225920634e-05, "loss": 0.1105, "step": 34234 }, { "epoch": 0.6106196268683337, "grad_norm": 0.2599231004714966, "learning_rate": 1.9748070475211283e-05, "loss": 0.1225, "step": 34235 }, { "epoch": 0.6106374629900474, "grad_norm": 0.2763570249080658, "learning_rate": 1.974654871157452e-05, "loss": 0.1211, "step": 34236 }, { "epoch": 0.6106552991117611, "grad_norm": 0.19660866260528564, "learning_rate": 1.9745026968301935e-05, "loss": 0.1044, "step": 34237 }, { "epoch": 0.6106731352334749, "grad_norm": 0.25641003251075745, "learning_rate": 1.9743505245399453e-05, "loss": 0.1464, "step": 34238 }, { "epoch": 0.6106909713551886, "grad_norm": 0.26230141520500183, "learning_rate": 1.9741983542872962e-05, "loss": 0.1119, "step": 34239 }, { "epoch": 0.6107088074769023, "grad_norm": 0.24544131755828857, "learning_rate": 1.974046186072835e-05, "loss": 0.1611, "step": 34240 }, { "epoch": 0.610726643598616, "grad_norm": 0.21997612714767456, "learning_rate": 1.9738940198971527e-05, "loss": 0.1438, "step": 34241 }, { "epoch": 0.6107444797203296, "grad_norm": 0.3547186851501465, "learning_rate": 1.9737418557608387e-05, "loss": 0.1408, "step": 34242 }, { "epoch": 0.6107623158420433, "grad_norm": 0.2614040970802307, "learning_rate": 1.9735896936644836e-05, "loss": 0.1023, "step": 34243 }, { "epoch": 0.610780151963757, "grad_norm": 0.31797677278518677, "learning_rate": 1.9734375336086766e-05, "loss": 0.1389, "step": 34244 }, { "epoch": 0.6107979880854707, "grad_norm": 0.29432475566864014, "learning_rate": 1.973285375594008e-05, "loss": 0.1878, "step": 34245 }, { "epoch": 0.6108158242071844, "grad_norm": 0.22685424983501434, "learning_rate": 1.9731332196210655e-05, "loss": 0.1235, "step": 34246 }, { "epoch": 0.6108336603288981, "grad_norm": 0.21128126978874207, "learning_rate": 1.9729810656904418e-05, "loss": 0.1546, "step": 34247 }, { "epoch": 0.6108514964506118, "grad_norm": 0.3172430694103241, "learning_rate": 1.9728289138027253e-05, "loss": 0.1062, "step": 34248 }, { "epoch": 0.6108693325723255, "grad_norm": 0.33118540048599243, "learning_rate": 1.9726767639585063e-05, "loss": 0.1491, "step": 34249 }, { "epoch": 0.6108871686940391, "grad_norm": 0.21668967604637146, "learning_rate": 1.9725246161583737e-05, "loss": 0.1533, "step": 34250 }, { "epoch": 0.6109050048157528, "grad_norm": 0.2716294229030609, "learning_rate": 1.9723724704029182e-05, "loss": 0.147, "step": 34251 }, { "epoch": 0.6109228409374665, "grad_norm": 0.31951966881752014, "learning_rate": 1.9722203266927296e-05, "loss": 0.1432, "step": 34252 }, { "epoch": 0.6109406770591802, "grad_norm": 0.25519925355911255, "learning_rate": 1.972068185028397e-05, "loss": 0.1288, "step": 34253 }, { "epoch": 0.6109585131808939, "grad_norm": 0.24568498134613037, "learning_rate": 1.97191604541051e-05, "loss": 0.0943, "step": 34254 }, { "epoch": 0.6109763493026077, "grad_norm": 0.24958018958568573, "learning_rate": 1.9717639078396595e-05, "loss": 0.1045, "step": 34255 }, { "epoch": 0.6109941854243214, "grad_norm": 0.34653839468955994, "learning_rate": 1.9716117723164346e-05, "loss": 0.1455, "step": 34256 }, { "epoch": 0.6110120215460351, "grad_norm": 0.2604621350765228, "learning_rate": 1.9714596388414248e-05, "loss": 0.1176, "step": 34257 }, { "epoch": 0.6110298576677488, "grad_norm": 0.22539140284061432, "learning_rate": 1.9713075074152203e-05, "loss": 0.1233, "step": 34258 }, { "epoch": 0.6110476937894624, "grad_norm": 0.29068025946617126, "learning_rate": 1.9711553780384093e-05, "loss": 0.1277, "step": 34259 }, { "epoch": 0.6110655299111761, "grad_norm": 0.2469399869441986, "learning_rate": 1.9710032507115837e-05, "loss": 0.123, "step": 34260 }, { "epoch": 0.6110833660328898, "grad_norm": 0.3095710277557373, "learning_rate": 1.9708511254353318e-05, "loss": 0.1083, "step": 34261 }, { "epoch": 0.6111012021546035, "grad_norm": 0.23690742254257202, "learning_rate": 1.9706990022102443e-05, "loss": 0.1661, "step": 34262 }, { "epoch": 0.6111190382763172, "grad_norm": 0.32723286747932434, "learning_rate": 1.970546881036909e-05, "loss": 0.123, "step": 34263 }, { "epoch": 0.6111368743980309, "grad_norm": 0.28426942229270935, "learning_rate": 1.970394761915918e-05, "loss": 0.1627, "step": 34264 }, { "epoch": 0.6111547105197446, "grad_norm": 0.23602648079395294, "learning_rate": 1.97024264484786e-05, "loss": 0.1219, "step": 34265 }, { "epoch": 0.6111725466414583, "grad_norm": 0.26101139187812805, "learning_rate": 1.9700905298333244e-05, "loss": 0.1462, "step": 34266 }, { "epoch": 0.611190382763172, "grad_norm": 0.23656578361988068, "learning_rate": 1.9699384168729e-05, "loss": 0.0578, "step": 34267 }, { "epoch": 0.6112082188848856, "grad_norm": 0.2915533483028412, "learning_rate": 1.9697863059671783e-05, "loss": 0.1806, "step": 34268 }, { "epoch": 0.6112260550065993, "grad_norm": 0.2514471411705017, "learning_rate": 1.9696341971167474e-05, "loss": 0.1252, "step": 34269 }, { "epoch": 0.611243891128313, "grad_norm": 0.228690505027771, "learning_rate": 1.9694820903221977e-05, "loss": 0.1373, "step": 34270 }, { "epoch": 0.6112617272500268, "grad_norm": 0.32270339131355286, "learning_rate": 1.9693299855841193e-05, "loss": 0.1129, "step": 34271 }, { "epoch": 0.6112795633717405, "grad_norm": 0.31586652994155884, "learning_rate": 1.9691778829031e-05, "loss": 0.1563, "step": 34272 }, { "epoch": 0.6112973994934542, "grad_norm": 0.2652791440486908, "learning_rate": 1.9690257822797315e-05, "loss": 0.1156, "step": 34273 }, { "epoch": 0.6113152356151679, "grad_norm": 0.2195061594247818, "learning_rate": 1.9688736837146025e-05, "loss": 0.0932, "step": 34274 }, { "epoch": 0.6113330717368816, "grad_norm": 0.35406753420829773, "learning_rate": 1.9687215872083016e-05, "loss": 0.1623, "step": 34275 }, { "epoch": 0.6113509078585952, "grad_norm": 0.2465466558933258, "learning_rate": 1.9685694927614194e-05, "loss": 0.1597, "step": 34276 }, { "epoch": 0.6113687439803089, "grad_norm": 0.23225544393062592, "learning_rate": 1.968417400374546e-05, "loss": 0.1112, "step": 34277 }, { "epoch": 0.6113865801020226, "grad_norm": 0.25865229964256287, "learning_rate": 1.9682653100482707e-05, "loss": 0.1227, "step": 34278 }, { "epoch": 0.6114044162237363, "grad_norm": 0.24608168005943298, "learning_rate": 1.9681132217831827e-05, "loss": 0.1574, "step": 34279 }, { "epoch": 0.61142225234545, "grad_norm": 0.22632557153701782, "learning_rate": 1.96796113557987e-05, "loss": 0.1078, "step": 34280 }, { "epoch": 0.6114400884671637, "grad_norm": 0.23244720697402954, "learning_rate": 1.9678090514389255e-05, "loss": 0.0939, "step": 34281 }, { "epoch": 0.6114579245888774, "grad_norm": 0.26269736886024475, "learning_rate": 1.967656969360936e-05, "loss": 0.1544, "step": 34282 }, { "epoch": 0.6114757607105911, "grad_norm": 0.19003522396087646, "learning_rate": 1.9675048893464927e-05, "loss": 0.0948, "step": 34283 }, { "epoch": 0.6114935968323048, "grad_norm": 0.30170726776123047, "learning_rate": 1.967352811396184e-05, "loss": 0.1636, "step": 34284 }, { "epoch": 0.6115114329540184, "grad_norm": 0.3415099084377289, "learning_rate": 1.967200735510599e-05, "loss": 0.1257, "step": 34285 }, { "epoch": 0.6115292690757321, "grad_norm": 0.34446874260902405, "learning_rate": 1.967048661690329e-05, "loss": 0.188, "step": 34286 }, { "epoch": 0.6115471051974458, "grad_norm": 0.21127624809741974, "learning_rate": 1.9668965899359622e-05, "loss": 0.1457, "step": 34287 }, { "epoch": 0.6115649413191596, "grad_norm": 0.2781338095664978, "learning_rate": 1.966744520248088e-05, "loss": 0.1149, "step": 34288 }, { "epoch": 0.6115827774408733, "grad_norm": 0.2464647889137268, "learning_rate": 1.9665924526272964e-05, "loss": 0.0987, "step": 34289 }, { "epoch": 0.611600613562587, "grad_norm": 0.364133358001709, "learning_rate": 1.9664403870741765e-05, "loss": 0.1304, "step": 34290 }, { "epoch": 0.6116184496843007, "grad_norm": 0.21656657755374908, "learning_rate": 1.9662883235893185e-05, "loss": 0.1182, "step": 34291 }, { "epoch": 0.6116362858060144, "grad_norm": 0.23903073370456696, "learning_rate": 1.966136262173311e-05, "loss": 0.1361, "step": 34292 }, { "epoch": 0.6116541219277281, "grad_norm": 0.213217630982399, "learning_rate": 1.9659842028267433e-05, "loss": 0.112, "step": 34293 }, { "epoch": 0.6116719580494417, "grad_norm": 0.28249478340148926, "learning_rate": 1.965832145550206e-05, "loss": 0.1917, "step": 34294 }, { "epoch": 0.6116897941711554, "grad_norm": 0.26392725110054016, "learning_rate": 1.965680090344288e-05, "loss": 0.1329, "step": 34295 }, { "epoch": 0.6117076302928691, "grad_norm": 0.23368078470230103, "learning_rate": 1.9655280372095773e-05, "loss": 0.0913, "step": 34296 }, { "epoch": 0.6117254664145828, "grad_norm": 0.3019675612449646, "learning_rate": 1.965375986146666e-05, "loss": 0.103, "step": 34297 }, { "epoch": 0.6117433025362965, "grad_norm": 0.2722953259944916, "learning_rate": 1.9652239371561405e-05, "loss": 0.1641, "step": 34298 }, { "epoch": 0.6117611386580102, "grad_norm": 0.22746914625167847, "learning_rate": 1.9650718902385924e-05, "loss": 0.0903, "step": 34299 }, { "epoch": 0.6117789747797239, "grad_norm": 0.27003300189971924, "learning_rate": 1.964919845394611e-05, "loss": 0.1058, "step": 34300 }, { "epoch": 0.6117968109014376, "grad_norm": 0.2875027656555176, "learning_rate": 1.964767802624785e-05, "loss": 0.1324, "step": 34301 }, { "epoch": 0.6118146470231512, "grad_norm": 0.20354218780994415, "learning_rate": 1.9646157619297027e-05, "loss": 0.1125, "step": 34302 }, { "epoch": 0.6118324831448649, "grad_norm": 0.35294947028160095, "learning_rate": 1.964463723309955e-05, "loss": 0.1317, "step": 34303 }, { "epoch": 0.6118503192665786, "grad_norm": 0.26902082562446594, "learning_rate": 1.964311686766132e-05, "loss": 0.1115, "step": 34304 }, { "epoch": 0.6118681553882924, "grad_norm": 0.1982295662164688, "learning_rate": 1.9641596522988212e-05, "loss": 0.1012, "step": 34305 }, { "epoch": 0.6118859915100061, "grad_norm": 0.23082873225212097, "learning_rate": 1.964007619908612e-05, "loss": 0.0953, "step": 34306 }, { "epoch": 0.6119038276317198, "grad_norm": 0.26585277915000916, "learning_rate": 1.9638555895960954e-05, "loss": 0.1138, "step": 34307 }, { "epoch": 0.6119216637534335, "grad_norm": 0.34653255343437195, "learning_rate": 1.9637035613618596e-05, "loss": 0.1574, "step": 34308 }, { "epoch": 0.6119394998751472, "grad_norm": 0.37732672691345215, "learning_rate": 1.9635515352064934e-05, "loss": 0.1242, "step": 34309 }, { "epoch": 0.6119573359968609, "grad_norm": 0.2179662585258484, "learning_rate": 1.9633995111305874e-05, "loss": 0.0796, "step": 34310 }, { "epoch": 0.6119751721185746, "grad_norm": 0.3336751163005829, "learning_rate": 1.9632474891347293e-05, "loss": 0.1132, "step": 34311 }, { "epoch": 0.6119930082402882, "grad_norm": 0.2968040108680725, "learning_rate": 1.9630954692195104e-05, "loss": 0.0928, "step": 34312 }, { "epoch": 0.6120108443620019, "grad_norm": 0.269959956407547, "learning_rate": 1.9629434513855185e-05, "loss": 0.103, "step": 34313 }, { "epoch": 0.6120286804837156, "grad_norm": 0.26234179735183716, "learning_rate": 1.962791435633344e-05, "loss": 0.1198, "step": 34314 }, { "epoch": 0.6120465166054293, "grad_norm": 0.22992199659347534, "learning_rate": 1.9626394219635734e-05, "loss": 0.1624, "step": 34315 }, { "epoch": 0.612064352727143, "grad_norm": 0.2733350992202759, "learning_rate": 1.9624874103768e-05, "loss": 0.0902, "step": 34316 }, { "epoch": 0.6120821888488567, "grad_norm": 0.18465390801429749, "learning_rate": 1.96233540087361e-05, "loss": 0.1095, "step": 34317 }, { "epoch": 0.6121000249705704, "grad_norm": 0.270947128534317, "learning_rate": 1.962183393454594e-05, "loss": 0.1048, "step": 34318 }, { "epoch": 0.612117861092284, "grad_norm": 0.20678196847438812, "learning_rate": 1.9620313881203406e-05, "loss": 0.1061, "step": 34319 }, { "epoch": 0.6121356972139977, "grad_norm": 0.34201112389564514, "learning_rate": 1.96187938487144e-05, "loss": 0.1642, "step": 34320 }, { "epoch": 0.6121535333357114, "grad_norm": 0.27690356969833374, "learning_rate": 1.961727383708481e-05, "loss": 0.1429, "step": 34321 }, { "epoch": 0.6121713694574252, "grad_norm": 0.26979929208755493, "learning_rate": 1.961575384632052e-05, "loss": 0.1395, "step": 34322 }, { "epoch": 0.6121892055791389, "grad_norm": 0.29312068223953247, "learning_rate": 1.9614233876427423e-05, "loss": 0.1107, "step": 34323 }, { "epoch": 0.6122070417008526, "grad_norm": 0.28420430421829224, "learning_rate": 1.961271392741142e-05, "loss": 0.1312, "step": 34324 }, { "epoch": 0.6122248778225663, "grad_norm": 0.3592130243778229, "learning_rate": 1.96111939992784e-05, "loss": 0.1331, "step": 34325 }, { "epoch": 0.61224271394428, "grad_norm": 0.24376356601715088, "learning_rate": 1.960967409203425e-05, "loss": 0.0853, "step": 34326 }, { "epoch": 0.6122605500659937, "grad_norm": 0.2419992834329605, "learning_rate": 1.9608154205684876e-05, "loss": 0.0771, "step": 34327 }, { "epoch": 0.6122783861877074, "grad_norm": 0.27290013432502747, "learning_rate": 1.960663434023614e-05, "loss": 0.1363, "step": 34328 }, { "epoch": 0.612296222309421, "grad_norm": 0.22209122776985168, "learning_rate": 1.9605114495693965e-05, "loss": 0.1201, "step": 34329 }, { "epoch": 0.6123140584311347, "grad_norm": 0.2678241729736328, "learning_rate": 1.9603594672064225e-05, "loss": 0.1493, "step": 34330 }, { "epoch": 0.6123318945528484, "grad_norm": 0.21880501508712769, "learning_rate": 1.960207486935282e-05, "loss": 0.1167, "step": 34331 }, { "epoch": 0.6123497306745621, "grad_norm": 0.25183209776878357, "learning_rate": 1.960055508756563e-05, "loss": 0.1271, "step": 34332 }, { "epoch": 0.6123675667962758, "grad_norm": 0.30424416065216064, "learning_rate": 1.959903532670856e-05, "loss": 0.131, "step": 34333 }, { "epoch": 0.6123854029179895, "grad_norm": 0.32900524139404297, "learning_rate": 1.9597515586787496e-05, "loss": 0.1343, "step": 34334 }, { "epoch": 0.6124032390397032, "grad_norm": 0.25981438159942627, "learning_rate": 1.9595995867808324e-05, "loss": 0.1148, "step": 34335 }, { "epoch": 0.6124210751614169, "grad_norm": 0.26695725321769714, "learning_rate": 1.959447616977694e-05, "loss": 0.1253, "step": 34336 }, { "epoch": 0.6124389112831305, "grad_norm": 0.277535080909729, "learning_rate": 1.959295649269923e-05, "loss": 0.1613, "step": 34337 }, { "epoch": 0.6124567474048442, "grad_norm": 0.336956262588501, "learning_rate": 1.9591436836581088e-05, "loss": 0.1542, "step": 34338 }, { "epoch": 0.612474583526558, "grad_norm": 0.20411263406276703, "learning_rate": 1.958991720142841e-05, "loss": 0.1426, "step": 34339 }, { "epoch": 0.6124924196482717, "grad_norm": 0.2600489854812622, "learning_rate": 1.958839758724708e-05, "loss": 0.1099, "step": 34340 }, { "epoch": 0.6125102557699854, "grad_norm": 0.33901867270469666, "learning_rate": 1.9586877994042984e-05, "loss": 0.1769, "step": 34341 }, { "epoch": 0.6125280918916991, "grad_norm": 0.2957024872303009, "learning_rate": 1.9585358421822024e-05, "loss": 0.1391, "step": 34342 }, { "epoch": 0.6125459280134128, "grad_norm": 0.27155688405036926, "learning_rate": 1.9583838870590087e-05, "loss": 0.0925, "step": 34343 }, { "epoch": 0.6125637641351265, "grad_norm": 0.4036776125431061, "learning_rate": 1.958231934035306e-05, "loss": 0.2128, "step": 34344 }, { "epoch": 0.6125816002568402, "grad_norm": 0.2016754448413849, "learning_rate": 1.9580799831116827e-05, "loss": 0.0806, "step": 34345 }, { "epoch": 0.6125994363785539, "grad_norm": 0.29734769463539124, "learning_rate": 1.957928034288729e-05, "loss": 0.1161, "step": 34346 }, { "epoch": 0.6126172725002675, "grad_norm": 0.3466891348361969, "learning_rate": 1.957776087567034e-05, "loss": 0.1477, "step": 34347 }, { "epoch": 0.6126351086219812, "grad_norm": 0.2298237383365631, "learning_rate": 1.957624142947186e-05, "loss": 0.1321, "step": 34348 }, { "epoch": 0.6126529447436949, "grad_norm": 0.28484708070755005, "learning_rate": 1.9574722004297745e-05, "loss": 0.1147, "step": 34349 }, { "epoch": 0.6126707808654086, "grad_norm": 0.38168346881866455, "learning_rate": 1.9573202600153868e-05, "loss": 0.1607, "step": 34350 }, { "epoch": 0.6126886169871223, "grad_norm": 0.38014307618141174, "learning_rate": 1.9571683217046143e-05, "loss": 0.1344, "step": 34351 }, { "epoch": 0.612706453108836, "grad_norm": 0.1755140721797943, "learning_rate": 1.957016385498044e-05, "loss": 0.1133, "step": 34352 }, { "epoch": 0.6127242892305497, "grad_norm": 0.2478196620941162, "learning_rate": 1.9568644513962667e-05, "loss": 0.1372, "step": 34353 }, { "epoch": 0.6127421253522634, "grad_norm": 0.2102089375257492, "learning_rate": 1.9567125193998693e-05, "loss": 0.1131, "step": 34354 }, { "epoch": 0.612759961473977, "grad_norm": 0.29458630084991455, "learning_rate": 1.956560589509443e-05, "loss": 0.1728, "step": 34355 }, { "epoch": 0.6127777975956908, "grad_norm": 0.2509872317314148, "learning_rate": 1.956408661725575e-05, "loss": 0.1361, "step": 34356 }, { "epoch": 0.6127956337174045, "grad_norm": 0.1840127557516098, "learning_rate": 1.9562567360488546e-05, "loss": 0.0811, "step": 34357 }, { "epoch": 0.6128134698391182, "grad_norm": 0.2356569468975067, "learning_rate": 1.956104812479871e-05, "loss": 0.1137, "step": 34358 }, { "epoch": 0.6128313059608319, "grad_norm": 0.3009377717971802, "learning_rate": 1.9559528910192126e-05, "loss": 0.1735, "step": 34359 }, { "epoch": 0.6128491420825456, "grad_norm": 0.24567048251628876, "learning_rate": 1.9558009716674698e-05, "loss": 0.1497, "step": 34360 }, { "epoch": 0.6128669782042593, "grad_norm": 0.29931673407554626, "learning_rate": 1.9556490544252297e-05, "loss": 0.1251, "step": 34361 }, { "epoch": 0.612884814325973, "grad_norm": 0.23354960978031158, "learning_rate": 1.9554971392930828e-05, "loss": 0.1071, "step": 34362 }, { "epoch": 0.6129026504476867, "grad_norm": 0.25356778502464294, "learning_rate": 1.9553452262716153e-05, "loss": 0.1515, "step": 34363 }, { "epoch": 0.6129204865694003, "grad_norm": 0.32689082622528076, "learning_rate": 1.955193315361419e-05, "loss": 0.1667, "step": 34364 }, { "epoch": 0.612938322691114, "grad_norm": 0.2577555179595947, "learning_rate": 1.9550414065630813e-05, "loss": 0.1415, "step": 34365 }, { "epoch": 0.6129561588128277, "grad_norm": 0.2391408085823059, "learning_rate": 1.9548894998771916e-05, "loss": 0.1365, "step": 34366 }, { "epoch": 0.6129739949345414, "grad_norm": 0.3446679413318634, "learning_rate": 1.9547375953043373e-05, "loss": 0.1255, "step": 34367 }, { "epoch": 0.6129918310562551, "grad_norm": 0.28184783458709717, "learning_rate": 1.9545856928451098e-05, "loss": 0.1754, "step": 34368 }, { "epoch": 0.6130096671779688, "grad_norm": 0.2451029121875763, "learning_rate": 1.9544337925000964e-05, "loss": 0.144, "step": 34369 }, { "epoch": 0.6130275032996825, "grad_norm": 0.24050399661064148, "learning_rate": 1.954281894269886e-05, "loss": 0.1273, "step": 34370 }, { "epoch": 0.6130453394213962, "grad_norm": 0.32015708088874817, "learning_rate": 1.9541299981550666e-05, "loss": 0.1199, "step": 34371 }, { "epoch": 0.61306317554311, "grad_norm": 0.23027445375919342, "learning_rate": 1.9539781041562284e-05, "loss": 0.1209, "step": 34372 }, { "epoch": 0.6130810116648236, "grad_norm": 0.21623660624027252, "learning_rate": 1.9538262122739596e-05, "loss": 0.1234, "step": 34373 }, { "epoch": 0.6130988477865373, "grad_norm": 0.3044774532318115, "learning_rate": 1.9536743225088496e-05, "loss": 0.1246, "step": 34374 }, { "epoch": 0.613116683908251, "grad_norm": 0.267084002494812, "learning_rate": 1.9535224348614862e-05, "loss": 0.1039, "step": 34375 }, { "epoch": 0.6131345200299647, "grad_norm": 0.32404825091362, "learning_rate": 1.953370549332458e-05, "loss": 0.1614, "step": 34376 }, { "epoch": 0.6131523561516784, "grad_norm": 0.3082652986049652, "learning_rate": 1.953218665922355e-05, "loss": 0.1478, "step": 34377 }, { "epoch": 0.6131701922733921, "grad_norm": 0.22887858748435974, "learning_rate": 1.9530667846317657e-05, "loss": 0.0915, "step": 34378 }, { "epoch": 0.6131880283951058, "grad_norm": 0.34719282388687134, "learning_rate": 1.9529149054612778e-05, "loss": 0.1594, "step": 34379 }, { "epoch": 0.6132058645168195, "grad_norm": 0.2712959945201874, "learning_rate": 1.95276302841148e-05, "loss": 0.1132, "step": 34380 }, { "epoch": 0.6132237006385332, "grad_norm": 0.1935320645570755, "learning_rate": 1.952611153482963e-05, "loss": 0.1423, "step": 34381 }, { "epoch": 0.6132415367602468, "grad_norm": 0.6655855774879456, "learning_rate": 1.952459280676314e-05, "loss": 0.1128, "step": 34382 }, { "epoch": 0.6132593728819605, "grad_norm": 0.2346189171075821, "learning_rate": 1.9523074099921223e-05, "loss": 0.1043, "step": 34383 }, { "epoch": 0.6132772090036742, "grad_norm": 0.28837835788726807, "learning_rate": 1.9521555414309747e-05, "loss": 0.1335, "step": 34384 }, { "epoch": 0.6132950451253879, "grad_norm": 0.25568902492523193, "learning_rate": 1.9520036749934628e-05, "loss": 0.149, "step": 34385 }, { "epoch": 0.6133128812471016, "grad_norm": 0.2111838310956955, "learning_rate": 1.9518518106801733e-05, "loss": 0.0832, "step": 34386 }, { "epoch": 0.6133307173688153, "grad_norm": 0.23745548725128174, "learning_rate": 1.951699948491696e-05, "loss": 0.1274, "step": 34387 }, { "epoch": 0.613348553490529, "grad_norm": 0.24505575001239777, "learning_rate": 1.951548088428619e-05, "loss": 0.1125, "step": 34388 }, { "epoch": 0.6133663896122428, "grad_norm": 0.28381475806236267, "learning_rate": 1.9513962304915302e-05, "loss": 0.1155, "step": 34389 }, { "epoch": 0.6133842257339565, "grad_norm": 0.2749321162700653, "learning_rate": 1.9512443746810204e-05, "loss": 0.1905, "step": 34390 }, { "epoch": 0.6134020618556701, "grad_norm": 0.27395883202552795, "learning_rate": 1.9510925209976762e-05, "loss": 0.1436, "step": 34391 }, { "epoch": 0.6134198979773838, "grad_norm": 0.35737577080726624, "learning_rate": 1.950940669442087e-05, "loss": 0.1411, "step": 34392 }, { "epoch": 0.6134377340990975, "grad_norm": 0.30439358949661255, "learning_rate": 1.9507888200148414e-05, "loss": 0.1414, "step": 34393 }, { "epoch": 0.6134555702208112, "grad_norm": 0.31353920698165894, "learning_rate": 1.9506369727165278e-05, "loss": 0.1468, "step": 34394 }, { "epoch": 0.6134734063425249, "grad_norm": 0.24509234726428986, "learning_rate": 1.9504851275477357e-05, "loss": 0.0892, "step": 34395 }, { "epoch": 0.6134912424642386, "grad_norm": 0.2553241550922394, "learning_rate": 1.950333284509053e-05, "loss": 0.1261, "step": 34396 }, { "epoch": 0.6135090785859523, "grad_norm": 0.32922300696372986, "learning_rate": 1.9501814436010672e-05, "loss": 0.1199, "step": 34397 }, { "epoch": 0.613526914707666, "grad_norm": 0.31108734011650085, "learning_rate": 1.950029604824369e-05, "loss": 0.1462, "step": 34398 }, { "epoch": 0.6135447508293796, "grad_norm": 0.1704423576593399, "learning_rate": 1.949877768179546e-05, "loss": 0.1186, "step": 34399 }, { "epoch": 0.6135625869510933, "grad_norm": 0.3232390582561493, "learning_rate": 1.9497259336671868e-05, "loss": 0.1092, "step": 34400 }, { "epoch": 0.613580423072807, "grad_norm": 0.26013556122779846, "learning_rate": 1.94957410128788e-05, "loss": 0.0965, "step": 34401 }, { "epoch": 0.6135982591945207, "grad_norm": 0.24986091256141663, "learning_rate": 1.949422271042213e-05, "loss": 0.1225, "step": 34402 }, { "epoch": 0.6136160953162344, "grad_norm": 0.24714888632297516, "learning_rate": 1.9492704429307768e-05, "loss": 0.0926, "step": 34403 }, { "epoch": 0.6136339314379481, "grad_norm": 0.28164413571357727, "learning_rate": 1.9491186169541585e-05, "loss": 0.1164, "step": 34404 }, { "epoch": 0.6136517675596618, "grad_norm": 0.2647169530391693, "learning_rate": 1.9489667931129465e-05, "loss": 0.1958, "step": 34405 }, { "epoch": 0.6136696036813756, "grad_norm": 0.24187271296977997, "learning_rate": 1.9488149714077287e-05, "loss": 0.128, "step": 34406 }, { "epoch": 0.6136874398030893, "grad_norm": 0.29026854038238525, "learning_rate": 1.9486631518390946e-05, "loss": 0.1544, "step": 34407 }, { "epoch": 0.613705275924803, "grad_norm": 0.25430163741111755, "learning_rate": 1.9485113344076333e-05, "loss": 0.1133, "step": 34408 }, { "epoch": 0.6137231120465166, "grad_norm": 0.2896972894668579, "learning_rate": 1.9483595191139324e-05, "loss": 0.1259, "step": 34409 }, { "epoch": 0.6137409481682303, "grad_norm": 0.3382795453071594, "learning_rate": 1.9482077059585798e-05, "loss": 0.1295, "step": 34410 }, { "epoch": 0.613758784289944, "grad_norm": 0.37541109323501587, "learning_rate": 1.9480558949421656e-05, "loss": 0.2029, "step": 34411 }, { "epoch": 0.6137766204116577, "grad_norm": 0.20420439541339874, "learning_rate": 1.9479040860652777e-05, "loss": 0.1261, "step": 34412 }, { "epoch": 0.6137944565333714, "grad_norm": 0.2869111895561218, "learning_rate": 1.9477522793285032e-05, "loss": 0.1405, "step": 34413 }, { "epoch": 0.6138122926550851, "grad_norm": 0.26251155138015747, "learning_rate": 1.9476004747324324e-05, "loss": 0.1087, "step": 34414 }, { "epoch": 0.6138301287767988, "grad_norm": 0.2096259891986847, "learning_rate": 1.947448672277652e-05, "loss": 0.1356, "step": 34415 }, { "epoch": 0.6138479648985125, "grad_norm": 0.2769961655139923, "learning_rate": 1.947296871964752e-05, "loss": 0.1445, "step": 34416 }, { "epoch": 0.6138658010202261, "grad_norm": 0.27338385581970215, "learning_rate": 1.947145073794321e-05, "loss": 0.1554, "step": 34417 }, { "epoch": 0.6138836371419398, "grad_norm": 0.27455809712409973, "learning_rate": 1.946993277766946e-05, "loss": 0.1414, "step": 34418 }, { "epoch": 0.6139014732636535, "grad_norm": 0.24713504314422607, "learning_rate": 1.946841483883215e-05, "loss": 0.1728, "step": 34419 }, { "epoch": 0.6139193093853672, "grad_norm": 0.2966313660144806, "learning_rate": 1.946689692143719e-05, "loss": 0.13, "step": 34420 }, { "epoch": 0.6139371455070809, "grad_norm": 0.35055744647979736, "learning_rate": 1.9465379025490438e-05, "loss": 0.1001, "step": 34421 }, { "epoch": 0.6139549816287946, "grad_norm": 0.3221801221370697, "learning_rate": 1.9463861150997797e-05, "loss": 0.1242, "step": 34422 }, { "epoch": 0.6139728177505084, "grad_norm": 0.22965948283672333, "learning_rate": 1.946234329796513e-05, "loss": 0.1102, "step": 34423 }, { "epoch": 0.6139906538722221, "grad_norm": 0.31719475984573364, "learning_rate": 1.9460825466398343e-05, "loss": 0.1534, "step": 34424 }, { "epoch": 0.6140084899939358, "grad_norm": 0.2211807817220688, "learning_rate": 1.945930765630331e-05, "loss": 0.1578, "step": 34425 }, { "epoch": 0.6140263261156494, "grad_norm": 0.23441603779792786, "learning_rate": 1.9457789867685916e-05, "loss": 0.1062, "step": 34426 }, { "epoch": 0.6140441622373631, "grad_norm": 0.23012542724609375, "learning_rate": 1.9456272100552035e-05, "loss": 0.1185, "step": 34427 }, { "epoch": 0.6140619983590768, "grad_norm": 0.2291250377893448, "learning_rate": 1.945475435490756e-05, "loss": 0.0959, "step": 34428 }, { "epoch": 0.6140798344807905, "grad_norm": 0.28275078535079956, "learning_rate": 1.9453236630758375e-05, "loss": 0.1624, "step": 34429 }, { "epoch": 0.6140976706025042, "grad_norm": 0.2316838800907135, "learning_rate": 1.9451718928110363e-05, "loss": 0.1045, "step": 34430 }, { "epoch": 0.6141155067242179, "grad_norm": 0.37640243768692017, "learning_rate": 1.9450201246969405e-05, "loss": 0.1487, "step": 34431 }, { "epoch": 0.6141333428459316, "grad_norm": 0.27081558108329773, "learning_rate": 1.944868358734137e-05, "loss": 0.1399, "step": 34432 }, { "epoch": 0.6141511789676453, "grad_norm": 0.28149375319480896, "learning_rate": 1.944716594923217e-05, "loss": 0.1747, "step": 34433 }, { "epoch": 0.614169015089359, "grad_norm": 0.3042205274105072, "learning_rate": 1.9445648332647667e-05, "loss": 0.129, "step": 34434 }, { "epoch": 0.6141868512110726, "grad_norm": 0.2766510844230652, "learning_rate": 1.944413073759375e-05, "loss": 0.132, "step": 34435 }, { "epoch": 0.6142046873327863, "grad_norm": 0.33994901180267334, "learning_rate": 1.9442613164076294e-05, "loss": 0.1366, "step": 34436 }, { "epoch": 0.6142225234545, "grad_norm": 0.21081985533237457, "learning_rate": 1.9441095612101202e-05, "loss": 0.1333, "step": 34437 }, { "epoch": 0.6142403595762137, "grad_norm": 0.3470522463321686, "learning_rate": 1.943957808167434e-05, "loss": 0.1094, "step": 34438 }, { "epoch": 0.6142581956979274, "grad_norm": 0.20546455681324005, "learning_rate": 1.94380605728016e-05, "loss": 0.1395, "step": 34439 }, { "epoch": 0.6142760318196412, "grad_norm": 0.30680495500564575, "learning_rate": 1.9436543085488847e-05, "loss": 0.1306, "step": 34440 }, { "epoch": 0.6142938679413549, "grad_norm": 0.24016529321670532, "learning_rate": 1.9435025619741974e-05, "loss": 0.1367, "step": 34441 }, { "epoch": 0.6143117040630686, "grad_norm": 0.3430418074131012, "learning_rate": 1.9433508175566865e-05, "loss": 0.1101, "step": 34442 }, { "epoch": 0.6143295401847823, "grad_norm": 0.2906799018383026, "learning_rate": 1.9431990752969402e-05, "loss": 0.1512, "step": 34443 }, { "epoch": 0.6143473763064959, "grad_norm": 0.26818475127220154, "learning_rate": 1.9430473351955474e-05, "loss": 0.1171, "step": 34444 }, { "epoch": 0.6143652124282096, "grad_norm": 0.3054639399051666, "learning_rate": 1.9428955972530938e-05, "loss": 0.1464, "step": 34445 }, { "epoch": 0.6143830485499233, "grad_norm": 0.23612850904464722, "learning_rate": 1.9427438614701707e-05, "loss": 0.147, "step": 34446 }, { "epoch": 0.614400884671637, "grad_norm": 0.310243159532547, "learning_rate": 1.9425921278473646e-05, "loss": 0.0852, "step": 34447 }, { "epoch": 0.6144187207933507, "grad_norm": 0.23815155029296875, "learning_rate": 1.942440396385264e-05, "loss": 0.1498, "step": 34448 }, { "epoch": 0.6144365569150644, "grad_norm": 0.37250688672065735, "learning_rate": 1.9422886670844563e-05, "loss": 0.1528, "step": 34449 }, { "epoch": 0.6144543930367781, "grad_norm": 0.30396950244903564, "learning_rate": 1.942136939945531e-05, "loss": 0.0858, "step": 34450 }, { "epoch": 0.6144722291584918, "grad_norm": 0.24318405985832214, "learning_rate": 1.9419852149690755e-05, "loss": 0.1134, "step": 34451 }, { "epoch": 0.6144900652802054, "grad_norm": 0.29396671056747437, "learning_rate": 1.9418334921556783e-05, "loss": 0.127, "step": 34452 }, { "epoch": 0.6145079014019191, "grad_norm": 0.27991798520088196, "learning_rate": 1.941681771505927e-05, "loss": 0.1275, "step": 34453 }, { "epoch": 0.6145257375236328, "grad_norm": 0.2710343301296234, "learning_rate": 1.9415300530204095e-05, "loss": 0.12, "step": 34454 }, { "epoch": 0.6145435736453465, "grad_norm": 0.2413530945777893, "learning_rate": 1.941378336699715e-05, "loss": 0.0984, "step": 34455 }, { "epoch": 0.6145614097670602, "grad_norm": 0.2243719846010208, "learning_rate": 1.9412266225444305e-05, "loss": 0.1147, "step": 34456 }, { "epoch": 0.614579245888774, "grad_norm": 0.2554042339324951, "learning_rate": 1.9410749105551453e-05, "loss": 0.117, "step": 34457 }, { "epoch": 0.6145970820104877, "grad_norm": 0.33550775051116943, "learning_rate": 1.9409232007324458e-05, "loss": 0.0943, "step": 34458 }, { "epoch": 0.6146149181322014, "grad_norm": 0.20211301743984222, "learning_rate": 1.940771493076922e-05, "loss": 0.1141, "step": 34459 }, { "epoch": 0.6146327542539151, "grad_norm": 0.2155083864927292, "learning_rate": 1.9406197875891613e-05, "loss": 0.1186, "step": 34460 }, { "epoch": 0.6146505903756287, "grad_norm": 0.3749031126499176, "learning_rate": 1.9404680842697507e-05, "loss": 0.1727, "step": 34461 }, { "epoch": 0.6146684264973424, "grad_norm": 0.3554801642894745, "learning_rate": 1.940316383119279e-05, "loss": 0.1177, "step": 34462 }, { "epoch": 0.6146862626190561, "grad_norm": 0.27102598547935486, "learning_rate": 1.9401646841383346e-05, "loss": 0.1127, "step": 34463 }, { "epoch": 0.6147040987407698, "grad_norm": 0.30681324005126953, "learning_rate": 1.9400129873275055e-05, "loss": 0.1572, "step": 34464 }, { "epoch": 0.6147219348624835, "grad_norm": 0.293938010931015, "learning_rate": 1.9398612926873792e-05, "loss": 0.1629, "step": 34465 }, { "epoch": 0.6147397709841972, "grad_norm": 0.6156755089759827, "learning_rate": 1.9397096002185447e-05, "loss": 0.1008, "step": 34466 }, { "epoch": 0.6147576071059109, "grad_norm": 0.24818457663059235, "learning_rate": 1.939557909921588e-05, "loss": 0.148, "step": 34467 }, { "epoch": 0.6147754432276246, "grad_norm": 0.29153475165367126, "learning_rate": 1.9394062217970994e-05, "loss": 0.1014, "step": 34468 }, { "epoch": 0.6147932793493383, "grad_norm": 0.3758023679256439, "learning_rate": 1.9392545358456653e-05, "loss": 0.138, "step": 34469 }, { "epoch": 0.6148111154710519, "grad_norm": 0.2715372145175934, "learning_rate": 1.939102852067875e-05, "loss": 0.1495, "step": 34470 }, { "epoch": 0.6148289515927656, "grad_norm": 0.24557875096797943, "learning_rate": 1.9389511704643143e-05, "loss": 0.1348, "step": 34471 }, { "epoch": 0.6148467877144793, "grad_norm": 0.18844127655029297, "learning_rate": 1.9387994910355743e-05, "loss": 0.1308, "step": 34472 }, { "epoch": 0.614864623836193, "grad_norm": 0.2698347568511963, "learning_rate": 1.938647813782241e-05, "loss": 0.1283, "step": 34473 }, { "epoch": 0.6148824599579068, "grad_norm": 0.2662133276462555, "learning_rate": 1.9384961387049023e-05, "loss": 0.1165, "step": 34474 }, { "epoch": 0.6149002960796205, "grad_norm": 0.23293600976467133, "learning_rate": 1.938344465804146e-05, "loss": 0.0728, "step": 34475 }, { "epoch": 0.6149181322013342, "grad_norm": 0.26369717717170715, "learning_rate": 1.9381927950805615e-05, "loss": 0.1187, "step": 34476 }, { "epoch": 0.6149359683230479, "grad_norm": 0.2170003056526184, "learning_rate": 1.938041126534735e-05, "loss": 0.0846, "step": 34477 }, { "epoch": 0.6149538044447616, "grad_norm": 0.2619723975658417, "learning_rate": 1.937889460167256e-05, "loss": 0.1615, "step": 34478 }, { "epoch": 0.6149716405664752, "grad_norm": 0.3334028422832489, "learning_rate": 1.9377377959787106e-05, "loss": 0.1099, "step": 34479 }, { "epoch": 0.6149894766881889, "grad_norm": 0.22388894855976105, "learning_rate": 1.9375861339696884e-05, "loss": 0.0987, "step": 34480 }, { "epoch": 0.6150073128099026, "grad_norm": 0.26746198534965515, "learning_rate": 1.9374344741407768e-05, "loss": 0.1226, "step": 34481 }, { "epoch": 0.6150251489316163, "grad_norm": 0.3266472816467285, "learning_rate": 1.937282816492563e-05, "loss": 0.1769, "step": 34482 }, { "epoch": 0.61504298505333, "grad_norm": 0.23216617107391357, "learning_rate": 1.9371311610256358e-05, "loss": 0.1134, "step": 34483 }, { "epoch": 0.6150608211750437, "grad_norm": 0.2086256444454193, "learning_rate": 1.9369795077405813e-05, "loss": 0.0978, "step": 34484 }, { "epoch": 0.6150786572967574, "grad_norm": 0.29555922746658325, "learning_rate": 1.9368278566379902e-05, "loss": 0.0864, "step": 34485 }, { "epoch": 0.6150964934184711, "grad_norm": 0.23405064642429352, "learning_rate": 1.9366762077184487e-05, "loss": 0.1443, "step": 34486 }, { "epoch": 0.6151143295401847, "grad_norm": 0.23380424082279205, "learning_rate": 1.9365245609825448e-05, "loss": 0.0673, "step": 34487 }, { "epoch": 0.6151321656618984, "grad_norm": 0.19976286590099335, "learning_rate": 1.936372916430865e-05, "loss": 0.0942, "step": 34488 }, { "epoch": 0.6151500017836121, "grad_norm": 0.2814916670322418, "learning_rate": 1.9362212740639997e-05, "loss": 0.119, "step": 34489 }, { "epoch": 0.6151678379053259, "grad_norm": 0.2697890102863312, "learning_rate": 1.936069633882535e-05, "loss": 0.156, "step": 34490 }, { "epoch": 0.6151856740270396, "grad_norm": 0.32851386070251465, "learning_rate": 1.9359179958870595e-05, "loss": 0.2139, "step": 34491 }, { "epoch": 0.6152035101487533, "grad_norm": 0.2962421476840973, "learning_rate": 1.93576636007816e-05, "loss": 0.1382, "step": 34492 }, { "epoch": 0.615221346270467, "grad_norm": 0.3056343197822571, "learning_rate": 1.9356147264564257e-05, "loss": 0.1818, "step": 34493 }, { "epoch": 0.6152391823921807, "grad_norm": 0.22571781277656555, "learning_rate": 1.9354630950224438e-05, "loss": 0.1378, "step": 34494 }, { "epoch": 0.6152570185138944, "grad_norm": 0.28213798999786377, "learning_rate": 1.9353114657768017e-05, "loss": 0.1575, "step": 34495 }, { "epoch": 0.615274854635608, "grad_norm": 0.3141067624092102, "learning_rate": 1.9351598387200872e-05, "loss": 0.1092, "step": 34496 }, { "epoch": 0.6152926907573217, "grad_norm": 0.2912456691265106, "learning_rate": 1.935008213852888e-05, "loss": 0.1753, "step": 34497 }, { "epoch": 0.6153105268790354, "grad_norm": 0.28660398721694946, "learning_rate": 1.9348565911757924e-05, "loss": 0.158, "step": 34498 }, { "epoch": 0.6153283630007491, "grad_norm": 0.23358003795146942, "learning_rate": 1.9347049706893884e-05, "loss": 0.1094, "step": 34499 }, { "epoch": 0.6153461991224628, "grad_norm": 0.23171283304691315, "learning_rate": 1.9345533523942628e-05, "loss": 0.1096, "step": 34500 }, { "epoch": 0.6153640352441765, "grad_norm": 0.24595412611961365, "learning_rate": 1.9344017362910026e-05, "loss": 0.129, "step": 34501 }, { "epoch": 0.6153818713658902, "grad_norm": 0.22029787302017212, "learning_rate": 1.934250122380198e-05, "loss": 0.1019, "step": 34502 }, { "epoch": 0.6153997074876039, "grad_norm": 0.26852864027023315, "learning_rate": 1.9340985106624354e-05, "loss": 0.128, "step": 34503 }, { "epoch": 0.6154175436093176, "grad_norm": 0.2965337634086609, "learning_rate": 1.9339469011383017e-05, "loss": 0.1563, "step": 34504 }, { "epoch": 0.6154353797310312, "grad_norm": 0.35104653239250183, "learning_rate": 1.933795293808385e-05, "loss": 0.1768, "step": 34505 }, { "epoch": 0.6154532158527449, "grad_norm": 0.2872408628463745, "learning_rate": 1.933643688673274e-05, "loss": 0.1758, "step": 34506 }, { "epoch": 0.6154710519744587, "grad_norm": 0.37812554836273193, "learning_rate": 1.933492085733556e-05, "loss": 0.1516, "step": 34507 }, { "epoch": 0.6154888880961724, "grad_norm": 0.18025898933410645, "learning_rate": 1.933340484989818e-05, "loss": 0.122, "step": 34508 }, { "epoch": 0.6155067242178861, "grad_norm": 0.32844042778015137, "learning_rate": 1.9331888864426482e-05, "loss": 0.1333, "step": 34509 }, { "epoch": 0.6155245603395998, "grad_norm": 0.24264642596244812, "learning_rate": 1.933037290092633e-05, "loss": 0.1221, "step": 34510 }, { "epoch": 0.6155423964613135, "grad_norm": 0.240075945854187, "learning_rate": 1.932885695940361e-05, "loss": 0.1256, "step": 34511 }, { "epoch": 0.6155602325830272, "grad_norm": 0.23587296903133392, "learning_rate": 1.9327341039864216e-05, "loss": 0.1066, "step": 34512 }, { "epoch": 0.6155780687047409, "grad_norm": 0.2907547950744629, "learning_rate": 1.9325825142314002e-05, "loss": 0.1326, "step": 34513 }, { "epoch": 0.6155959048264545, "grad_norm": 0.3378159999847412, "learning_rate": 1.932430926675884e-05, "loss": 0.109, "step": 34514 }, { "epoch": 0.6156137409481682, "grad_norm": 0.21095429360866547, "learning_rate": 1.932279341320462e-05, "loss": 0.0832, "step": 34515 }, { "epoch": 0.6156315770698819, "grad_norm": 0.31158697605133057, "learning_rate": 1.932127758165722e-05, "loss": 0.1304, "step": 34516 }, { "epoch": 0.6156494131915956, "grad_norm": 0.20838488638401031, "learning_rate": 1.9319761772122503e-05, "loss": 0.1048, "step": 34517 }, { "epoch": 0.6156672493133093, "grad_norm": 0.21778038144111633, "learning_rate": 1.9318245984606352e-05, "loss": 0.1258, "step": 34518 }, { "epoch": 0.615685085435023, "grad_norm": 0.28368258476257324, "learning_rate": 1.931673021911464e-05, "loss": 0.1583, "step": 34519 }, { "epoch": 0.6157029215567367, "grad_norm": 0.2694331705570221, "learning_rate": 1.931521447565325e-05, "loss": 0.117, "step": 34520 }, { "epoch": 0.6157207576784504, "grad_norm": 0.24522793292999268, "learning_rate": 1.9313698754228056e-05, "loss": 0.0907, "step": 34521 }, { "epoch": 0.615738593800164, "grad_norm": 0.27080926299095154, "learning_rate": 1.9312183054844924e-05, "loss": 0.1173, "step": 34522 }, { "epoch": 0.6157564299218777, "grad_norm": 0.242206871509552, "learning_rate": 1.9310667377509728e-05, "loss": 0.1594, "step": 34523 }, { "epoch": 0.6157742660435915, "grad_norm": 0.25656843185424805, "learning_rate": 1.930915172222836e-05, "loss": 0.1226, "step": 34524 }, { "epoch": 0.6157921021653052, "grad_norm": 0.3067161440849304, "learning_rate": 1.9307636089006683e-05, "loss": 0.1251, "step": 34525 }, { "epoch": 0.6158099382870189, "grad_norm": 0.2224901020526886, "learning_rate": 1.9306120477850574e-05, "loss": 0.1497, "step": 34526 }, { "epoch": 0.6158277744087326, "grad_norm": 0.23707859218120575, "learning_rate": 1.93046048887659e-05, "loss": 0.125, "step": 34527 }, { "epoch": 0.6158456105304463, "grad_norm": 0.3091038167476654, "learning_rate": 1.9303089321758555e-05, "loss": 0.1575, "step": 34528 }, { "epoch": 0.61586344665216, "grad_norm": 0.35471346974372864, "learning_rate": 1.9301573776834405e-05, "loss": 0.1107, "step": 34529 }, { "epoch": 0.6158812827738737, "grad_norm": 0.24750669300556183, "learning_rate": 1.9300058253999323e-05, "loss": 0.1243, "step": 34530 }, { "epoch": 0.6158991188955873, "grad_norm": 0.3448469936847687, "learning_rate": 1.9298542753259173e-05, "loss": 0.09, "step": 34531 }, { "epoch": 0.615916955017301, "grad_norm": 0.2808707654476166, "learning_rate": 1.9297027274619854e-05, "loss": 0.1859, "step": 34532 }, { "epoch": 0.6159347911390147, "grad_norm": 0.24901944398880005, "learning_rate": 1.9295511818087218e-05, "loss": 0.1059, "step": 34533 }, { "epoch": 0.6159526272607284, "grad_norm": 0.2689917981624603, "learning_rate": 1.9293996383667152e-05, "loss": 0.1603, "step": 34534 }, { "epoch": 0.6159704633824421, "grad_norm": 0.27367252111434937, "learning_rate": 1.9292480971365533e-05, "loss": 0.1224, "step": 34535 }, { "epoch": 0.6159882995041558, "grad_norm": 0.3604848086833954, "learning_rate": 1.9290965581188214e-05, "loss": 0.1777, "step": 34536 }, { "epoch": 0.6160061356258695, "grad_norm": 0.20487985014915466, "learning_rate": 1.9289450213141095e-05, "loss": 0.1232, "step": 34537 }, { "epoch": 0.6160239717475832, "grad_norm": 0.30140063166618347, "learning_rate": 1.9287934867230033e-05, "loss": 0.1012, "step": 34538 }, { "epoch": 0.6160418078692969, "grad_norm": 0.2638993263244629, "learning_rate": 1.9286419543460917e-05, "loss": 0.1395, "step": 34539 }, { "epoch": 0.6160596439910105, "grad_norm": 0.280154824256897, "learning_rate": 1.92849042418396e-05, "loss": 0.1135, "step": 34540 }, { "epoch": 0.6160774801127243, "grad_norm": 0.362231582403183, "learning_rate": 1.928338896237198e-05, "loss": 0.1523, "step": 34541 }, { "epoch": 0.616095316234438, "grad_norm": 0.35468077659606934, "learning_rate": 1.9281873705063915e-05, "loss": 0.1052, "step": 34542 }, { "epoch": 0.6161131523561517, "grad_norm": 0.25615227222442627, "learning_rate": 1.9280358469921286e-05, "loss": 0.1474, "step": 34543 }, { "epoch": 0.6161309884778654, "grad_norm": 0.2822414040565491, "learning_rate": 1.9278843256949952e-05, "loss": 0.1223, "step": 34544 }, { "epoch": 0.6161488245995791, "grad_norm": 0.2659647762775421, "learning_rate": 1.927732806615581e-05, "loss": 0.0817, "step": 34545 }, { "epoch": 0.6161666607212928, "grad_norm": 0.20857706665992737, "learning_rate": 1.9275812897544712e-05, "loss": 0.1019, "step": 34546 }, { "epoch": 0.6161844968430065, "grad_norm": 0.6639837622642517, "learning_rate": 1.9274297751122547e-05, "loss": 0.1182, "step": 34547 }, { "epoch": 0.6162023329647202, "grad_norm": 0.28585055470466614, "learning_rate": 1.927278262689518e-05, "loss": 0.1752, "step": 34548 }, { "epoch": 0.6162201690864338, "grad_norm": 0.25726133584976196, "learning_rate": 1.9271267524868478e-05, "loss": 0.1788, "step": 34549 }, { "epoch": 0.6162380052081475, "grad_norm": 0.2813021242618561, "learning_rate": 1.926975244504833e-05, "loss": 0.1245, "step": 34550 }, { "epoch": 0.6162558413298612, "grad_norm": 0.31017810106277466, "learning_rate": 1.9268237387440603e-05, "loss": 0.181, "step": 34551 }, { "epoch": 0.6162736774515749, "grad_norm": 0.22194547951221466, "learning_rate": 1.926672235205116e-05, "loss": 0.1123, "step": 34552 }, { "epoch": 0.6162915135732886, "grad_norm": 0.2474585324525833, "learning_rate": 1.9265207338885884e-05, "loss": 0.1269, "step": 34553 }, { "epoch": 0.6163093496950023, "grad_norm": 0.3163282871246338, "learning_rate": 1.926369234795064e-05, "loss": 0.1281, "step": 34554 }, { "epoch": 0.616327185816716, "grad_norm": 0.24996168911457062, "learning_rate": 1.9262177379251318e-05, "loss": 0.1267, "step": 34555 }, { "epoch": 0.6163450219384297, "grad_norm": 0.26003187894821167, "learning_rate": 1.926066243279377e-05, "loss": 0.1216, "step": 34556 }, { "epoch": 0.6163628580601433, "grad_norm": 0.26683512330055237, "learning_rate": 1.9259147508583876e-05, "loss": 0.1633, "step": 34557 }, { "epoch": 0.6163806941818571, "grad_norm": 0.18827903270721436, "learning_rate": 1.925763260662751e-05, "loss": 0.0968, "step": 34558 }, { "epoch": 0.6163985303035708, "grad_norm": 0.28749945759773254, "learning_rate": 1.925611772693055e-05, "loss": 0.1482, "step": 34559 }, { "epoch": 0.6164163664252845, "grad_norm": 0.24320589005947113, "learning_rate": 1.9254602869498852e-05, "loss": 0.1399, "step": 34560 }, { "epoch": 0.6164342025469982, "grad_norm": 0.21242789924144745, "learning_rate": 1.9253088034338307e-05, "loss": 0.1048, "step": 34561 }, { "epoch": 0.6164520386687119, "grad_norm": 0.3348024785518646, "learning_rate": 1.9251573221454765e-05, "loss": 0.1667, "step": 34562 }, { "epoch": 0.6164698747904256, "grad_norm": 0.24434778094291687, "learning_rate": 1.925005843085412e-05, "loss": 0.1507, "step": 34563 }, { "epoch": 0.6164877109121393, "grad_norm": 0.22308313846588135, "learning_rate": 1.9248543662542237e-05, "loss": 0.1261, "step": 34564 }, { "epoch": 0.616505547033853, "grad_norm": 0.29116660356521606, "learning_rate": 1.9247028916524984e-05, "loss": 0.1287, "step": 34565 }, { "epoch": 0.6165233831555667, "grad_norm": 0.2406667321920395, "learning_rate": 1.924551419280822e-05, "loss": 0.1029, "step": 34566 }, { "epoch": 0.6165412192772803, "grad_norm": 0.2553635537624359, "learning_rate": 1.9243999491397844e-05, "loss": 0.1153, "step": 34567 }, { "epoch": 0.616559055398994, "grad_norm": 0.2551077902317047, "learning_rate": 1.9242484812299715e-05, "loss": 0.1841, "step": 34568 }, { "epoch": 0.6165768915207077, "grad_norm": 0.2785532474517822, "learning_rate": 1.92409701555197e-05, "loss": 0.1535, "step": 34569 }, { "epoch": 0.6165947276424214, "grad_norm": 0.3574562966823578, "learning_rate": 1.923945552106367e-05, "loss": 0.1461, "step": 34570 }, { "epoch": 0.6166125637641351, "grad_norm": 0.21311809122562408, "learning_rate": 1.9237940908937504e-05, "loss": 0.1181, "step": 34571 }, { "epoch": 0.6166303998858488, "grad_norm": 0.2024383842945099, "learning_rate": 1.923642631914707e-05, "loss": 0.1096, "step": 34572 }, { "epoch": 0.6166482360075625, "grad_norm": 0.27098020911216736, "learning_rate": 1.9234911751698238e-05, "loss": 0.1451, "step": 34573 }, { "epoch": 0.6166660721292762, "grad_norm": 0.417580783367157, "learning_rate": 1.9233397206596882e-05, "loss": 0.1281, "step": 34574 }, { "epoch": 0.61668390825099, "grad_norm": 0.3413103520870209, "learning_rate": 1.9231882683848857e-05, "loss": 0.1513, "step": 34575 }, { "epoch": 0.6167017443727036, "grad_norm": 0.2721801996231079, "learning_rate": 1.923036818346006e-05, "loss": 0.1043, "step": 34576 }, { "epoch": 0.6167195804944173, "grad_norm": 0.36068153381347656, "learning_rate": 1.9228853705436346e-05, "loss": 0.2121, "step": 34577 }, { "epoch": 0.616737416616131, "grad_norm": 0.2523716688156128, "learning_rate": 1.9227339249783594e-05, "loss": 0.1594, "step": 34578 }, { "epoch": 0.6167552527378447, "grad_norm": 0.282390832901001, "learning_rate": 1.9225824816507652e-05, "loss": 0.1155, "step": 34579 }, { "epoch": 0.6167730888595584, "grad_norm": 0.22251537442207336, "learning_rate": 1.9224310405614424e-05, "loss": 0.1649, "step": 34580 }, { "epoch": 0.6167909249812721, "grad_norm": 0.24038361012935638, "learning_rate": 1.9222796017109755e-05, "loss": 0.1264, "step": 34581 }, { "epoch": 0.6168087611029858, "grad_norm": 0.3214409351348877, "learning_rate": 1.9221281650999528e-05, "loss": 0.1094, "step": 34582 }, { "epoch": 0.6168265972246995, "grad_norm": 0.2674230635166168, "learning_rate": 1.9219767307289603e-05, "loss": 0.1233, "step": 34583 }, { "epoch": 0.6168444333464131, "grad_norm": 0.273513525724411, "learning_rate": 1.9218252985985864e-05, "loss": 0.1583, "step": 34584 }, { "epoch": 0.6168622694681268, "grad_norm": 0.2540189325809479, "learning_rate": 1.9216738687094173e-05, "loss": 0.1109, "step": 34585 }, { "epoch": 0.6168801055898405, "grad_norm": 0.2843306362628937, "learning_rate": 1.92152244106204e-05, "loss": 0.1056, "step": 34586 }, { "epoch": 0.6168979417115542, "grad_norm": 0.3077537715435028, "learning_rate": 1.921371015657041e-05, "loss": 0.2278, "step": 34587 }, { "epoch": 0.6169157778332679, "grad_norm": 0.3433613181114197, "learning_rate": 1.9212195924950073e-05, "loss": 0.1423, "step": 34588 }, { "epoch": 0.6169336139549816, "grad_norm": 0.21594177186489105, "learning_rate": 1.9210681715765277e-05, "loss": 0.0986, "step": 34589 }, { "epoch": 0.6169514500766953, "grad_norm": 0.33674225211143494, "learning_rate": 1.9209167529021873e-05, "loss": 0.1237, "step": 34590 }, { "epoch": 0.6169692861984091, "grad_norm": 0.22517625987529755, "learning_rate": 1.920765336472574e-05, "loss": 0.1437, "step": 34591 }, { "epoch": 0.6169871223201228, "grad_norm": 0.21776363253593445, "learning_rate": 1.9206139222882733e-05, "loss": 0.1623, "step": 34592 }, { "epoch": 0.6170049584418364, "grad_norm": 0.31965121626853943, "learning_rate": 1.920462510349874e-05, "loss": 0.1432, "step": 34593 }, { "epoch": 0.6170227945635501, "grad_norm": 0.2996142506599426, "learning_rate": 1.9203111006579616e-05, "loss": 0.1275, "step": 34594 }, { "epoch": 0.6170406306852638, "grad_norm": 0.22301287949085236, "learning_rate": 1.9201596932131242e-05, "loss": 0.1386, "step": 34595 }, { "epoch": 0.6170584668069775, "grad_norm": 0.28665006160736084, "learning_rate": 1.920008288015947e-05, "loss": 0.1695, "step": 34596 }, { "epoch": 0.6170763029286912, "grad_norm": 0.3356695771217346, "learning_rate": 1.919856885067019e-05, "loss": 0.1569, "step": 34597 }, { "epoch": 0.6170941390504049, "grad_norm": 0.29975688457489014, "learning_rate": 1.919705484366926e-05, "loss": 0.1258, "step": 34598 }, { "epoch": 0.6171119751721186, "grad_norm": 0.23956458270549774, "learning_rate": 1.9195540859162553e-05, "loss": 0.1217, "step": 34599 }, { "epoch": 0.6171298112938323, "grad_norm": 0.19150055944919586, "learning_rate": 1.919402689715593e-05, "loss": 0.1083, "step": 34600 }, { "epoch": 0.617147647415546, "grad_norm": 0.22420692443847656, "learning_rate": 1.9192512957655263e-05, "loss": 0.1238, "step": 34601 }, { "epoch": 0.6171654835372596, "grad_norm": 0.27167558670043945, "learning_rate": 1.9190999040666418e-05, "loss": 0.2033, "step": 34602 }, { "epoch": 0.6171833196589733, "grad_norm": 0.24470174312591553, "learning_rate": 1.9189485146195273e-05, "loss": 0.1348, "step": 34603 }, { "epoch": 0.617201155780687, "grad_norm": 0.27418550848960876, "learning_rate": 1.9187971274247697e-05, "loss": 0.1132, "step": 34604 }, { "epoch": 0.6172189919024007, "grad_norm": 0.48278388381004333, "learning_rate": 1.9186457424829536e-05, "loss": 0.1301, "step": 34605 }, { "epoch": 0.6172368280241144, "grad_norm": 0.22970767319202423, "learning_rate": 1.918494359794668e-05, "loss": 0.1658, "step": 34606 }, { "epoch": 0.6172546641458281, "grad_norm": 0.2338697463274002, "learning_rate": 1.9183429793605e-05, "loss": 0.1401, "step": 34607 }, { "epoch": 0.6172725002675419, "grad_norm": 0.32704606652259827, "learning_rate": 1.9181916011810344e-05, "loss": 0.2, "step": 34608 }, { "epoch": 0.6172903363892556, "grad_norm": 0.27111607789993286, "learning_rate": 1.9180402252568595e-05, "loss": 0.1646, "step": 34609 }, { "epoch": 0.6173081725109693, "grad_norm": 0.24766652286052704, "learning_rate": 1.9178888515885614e-05, "loss": 0.1522, "step": 34610 }, { "epoch": 0.6173260086326829, "grad_norm": 0.279319167137146, "learning_rate": 1.9177374801767275e-05, "loss": 0.1451, "step": 34611 }, { "epoch": 0.6173438447543966, "grad_norm": 0.19560639560222626, "learning_rate": 1.9175861110219444e-05, "loss": 0.1347, "step": 34612 }, { "epoch": 0.6173616808761103, "grad_norm": 0.20941701531410217, "learning_rate": 1.9174347441247993e-05, "loss": 0.1462, "step": 34613 }, { "epoch": 0.617379516997824, "grad_norm": 0.24162720143795013, "learning_rate": 1.9172833794858764e-05, "loss": 0.0993, "step": 34614 }, { "epoch": 0.6173973531195377, "grad_norm": 0.2666206359863281, "learning_rate": 1.9171320171057655e-05, "loss": 0.1176, "step": 34615 }, { "epoch": 0.6174151892412514, "grad_norm": 0.2780097424983978, "learning_rate": 1.9169806569850523e-05, "loss": 0.1736, "step": 34616 }, { "epoch": 0.6174330253629651, "grad_norm": 0.258361279964447, "learning_rate": 1.916829299124324e-05, "loss": 0.0983, "step": 34617 }, { "epoch": 0.6174508614846788, "grad_norm": 0.32893630862236023, "learning_rate": 1.9166779435241653e-05, "loss": 0.1402, "step": 34618 }, { "epoch": 0.6174686976063924, "grad_norm": 0.46025940775871277, "learning_rate": 1.9165265901851654e-05, "loss": 0.1776, "step": 34619 }, { "epoch": 0.6174865337281061, "grad_norm": 0.35117071866989136, "learning_rate": 1.9163752391079103e-05, "loss": 0.1739, "step": 34620 }, { "epoch": 0.6175043698498198, "grad_norm": 0.24502874910831451, "learning_rate": 1.9162238902929852e-05, "loss": 0.1037, "step": 34621 }, { "epoch": 0.6175222059715335, "grad_norm": 0.22721615433692932, "learning_rate": 1.9160725437409785e-05, "loss": 0.0963, "step": 34622 }, { "epoch": 0.6175400420932472, "grad_norm": 0.24761302769184113, "learning_rate": 1.9159211994524763e-05, "loss": 0.1273, "step": 34623 }, { "epoch": 0.6175578782149609, "grad_norm": 0.3963606059551239, "learning_rate": 1.9157698574280656e-05, "loss": 0.1661, "step": 34624 }, { "epoch": 0.6175757143366747, "grad_norm": 0.2536150813102722, "learning_rate": 1.9156185176683324e-05, "loss": 0.0821, "step": 34625 }, { "epoch": 0.6175935504583884, "grad_norm": 0.2644543945789337, "learning_rate": 1.9154671801738642e-05, "loss": 0.1194, "step": 34626 }, { "epoch": 0.6176113865801021, "grad_norm": 0.2237444370985031, "learning_rate": 1.9153158449452456e-05, "loss": 0.147, "step": 34627 }, { "epoch": 0.6176292227018157, "grad_norm": 0.31942877173423767, "learning_rate": 1.9151645119830664e-05, "loss": 0.1234, "step": 34628 }, { "epoch": 0.6176470588235294, "grad_norm": 0.26018068194389343, "learning_rate": 1.9150131812879105e-05, "loss": 0.15, "step": 34629 }, { "epoch": 0.6176648949452431, "grad_norm": 0.24610719084739685, "learning_rate": 1.914861852860366e-05, "loss": 0.1406, "step": 34630 }, { "epoch": 0.6176827310669568, "grad_norm": 0.24228277802467346, "learning_rate": 1.9147105267010184e-05, "loss": 0.1111, "step": 34631 }, { "epoch": 0.6177005671886705, "grad_norm": 0.24873076379299164, "learning_rate": 1.914559202810456e-05, "loss": 0.0777, "step": 34632 }, { "epoch": 0.6177184033103842, "grad_norm": 0.3159685432910919, "learning_rate": 1.9144078811892642e-05, "loss": 0.1694, "step": 34633 }, { "epoch": 0.6177362394320979, "grad_norm": 0.26278772950172424, "learning_rate": 1.9142565618380296e-05, "loss": 0.1365, "step": 34634 }, { "epoch": 0.6177540755538116, "grad_norm": 0.2747803330421448, "learning_rate": 1.914105244757338e-05, "loss": 0.103, "step": 34635 }, { "epoch": 0.6177719116755253, "grad_norm": 0.24300013482570648, "learning_rate": 1.9139539299477777e-05, "loss": 0.1233, "step": 34636 }, { "epoch": 0.6177897477972389, "grad_norm": 0.2568221092224121, "learning_rate": 1.9138026174099338e-05, "loss": 0.1406, "step": 34637 }, { "epoch": 0.6178075839189526, "grad_norm": 0.3869577646255493, "learning_rate": 1.9136513071443944e-05, "loss": 0.1404, "step": 34638 }, { "epoch": 0.6178254200406663, "grad_norm": 0.2926936447620392, "learning_rate": 1.9134999991517445e-05, "loss": 0.1289, "step": 34639 }, { "epoch": 0.61784325616238, "grad_norm": 0.2502843737602234, "learning_rate": 1.9133486934325704e-05, "loss": 0.1135, "step": 34640 }, { "epoch": 0.6178610922840937, "grad_norm": 0.2720404863357544, "learning_rate": 1.9131973899874605e-05, "loss": 0.1298, "step": 34641 }, { "epoch": 0.6178789284058075, "grad_norm": 0.3430882394313812, "learning_rate": 1.913046088817e-05, "loss": 0.1905, "step": 34642 }, { "epoch": 0.6178967645275212, "grad_norm": 0.20249223709106445, "learning_rate": 1.9128947899217754e-05, "loss": 0.0725, "step": 34643 }, { "epoch": 0.6179146006492349, "grad_norm": 0.23356664180755615, "learning_rate": 1.9127434933023724e-05, "loss": 0.1566, "step": 34644 }, { "epoch": 0.6179324367709486, "grad_norm": 0.2784213125705719, "learning_rate": 1.9125921989593797e-05, "loss": 0.1495, "step": 34645 }, { "epoch": 0.6179502728926622, "grad_norm": 0.2882097661495209, "learning_rate": 1.9124409068933826e-05, "loss": 0.1132, "step": 34646 }, { "epoch": 0.6179681090143759, "grad_norm": 0.2580234706401825, "learning_rate": 1.9122896171049673e-05, "loss": 0.1635, "step": 34647 }, { "epoch": 0.6179859451360896, "grad_norm": 0.3438928723335266, "learning_rate": 1.9121383295947196e-05, "loss": 0.2015, "step": 34648 }, { "epoch": 0.6180037812578033, "grad_norm": 0.3683916926383972, "learning_rate": 1.9119870443632277e-05, "loss": 0.164, "step": 34649 }, { "epoch": 0.618021617379517, "grad_norm": 0.4905417561531067, "learning_rate": 1.9118357614110764e-05, "loss": 0.15, "step": 34650 }, { "epoch": 0.6180394535012307, "grad_norm": 0.22359319031238556, "learning_rate": 1.9116844807388536e-05, "loss": 0.117, "step": 34651 }, { "epoch": 0.6180572896229444, "grad_norm": 0.29820236563682556, "learning_rate": 1.911533202347145e-05, "loss": 0.1605, "step": 34652 }, { "epoch": 0.6180751257446581, "grad_norm": 0.35331961512565613, "learning_rate": 1.9113819262365357e-05, "loss": 0.101, "step": 34653 }, { "epoch": 0.6180929618663717, "grad_norm": 0.29426342248916626, "learning_rate": 1.9112306524076147e-05, "loss": 0.122, "step": 34654 }, { "epoch": 0.6181107979880854, "grad_norm": 0.3921533524990082, "learning_rate": 1.9110793808609667e-05, "loss": 0.1474, "step": 34655 }, { "epoch": 0.6181286341097991, "grad_norm": 0.30939364433288574, "learning_rate": 1.9109281115971782e-05, "loss": 0.1759, "step": 34656 }, { "epoch": 0.6181464702315128, "grad_norm": 0.34440159797668457, "learning_rate": 1.9107768446168357e-05, "loss": 0.1547, "step": 34657 }, { "epoch": 0.6181643063532265, "grad_norm": 0.2829227149486542, "learning_rate": 1.9106255799205256e-05, "loss": 0.1298, "step": 34658 }, { "epoch": 0.6181821424749403, "grad_norm": 0.29732856154441833, "learning_rate": 1.910474317508835e-05, "loss": 0.205, "step": 34659 }, { "epoch": 0.618199978596654, "grad_norm": 0.2636476755142212, "learning_rate": 1.9103230573823492e-05, "loss": 0.122, "step": 34660 }, { "epoch": 0.6182178147183677, "grad_norm": 0.2381862848997116, "learning_rate": 1.9101717995416542e-05, "loss": 0.1476, "step": 34661 }, { "epoch": 0.6182356508400814, "grad_norm": 0.23479658365249634, "learning_rate": 1.9100205439873382e-05, "loss": 0.0886, "step": 34662 }, { "epoch": 0.618253486961795, "grad_norm": 0.22317281365394592, "learning_rate": 1.9098692907199858e-05, "loss": 0.1242, "step": 34663 }, { "epoch": 0.6182713230835087, "grad_norm": 0.3403395712375641, "learning_rate": 1.9097180397401837e-05, "loss": 0.1386, "step": 34664 }, { "epoch": 0.6182891592052224, "grad_norm": 0.5015697479248047, "learning_rate": 1.909566791048519e-05, "loss": 0.1562, "step": 34665 }, { "epoch": 0.6183069953269361, "grad_norm": 0.3411105275154114, "learning_rate": 1.9094155446455757e-05, "loss": 0.1174, "step": 34666 }, { "epoch": 0.6183248314486498, "grad_norm": 0.27326858043670654, "learning_rate": 1.9092643005319433e-05, "loss": 0.1115, "step": 34667 }, { "epoch": 0.6183426675703635, "grad_norm": 0.3388707637786865, "learning_rate": 1.9091130587082066e-05, "loss": 0.1288, "step": 34668 }, { "epoch": 0.6183605036920772, "grad_norm": 0.27115598320961, "learning_rate": 1.9089618191749513e-05, "loss": 0.1243, "step": 34669 }, { "epoch": 0.6183783398137909, "grad_norm": 0.22266532480716705, "learning_rate": 1.9088105819327633e-05, "loss": 0.1227, "step": 34670 }, { "epoch": 0.6183961759355046, "grad_norm": 0.24149054288864136, "learning_rate": 1.9086593469822307e-05, "loss": 0.1492, "step": 34671 }, { "epoch": 0.6184140120572182, "grad_norm": 0.20407485961914062, "learning_rate": 1.9085081143239386e-05, "loss": 0.1081, "step": 34672 }, { "epoch": 0.6184318481789319, "grad_norm": 0.1755708009004593, "learning_rate": 1.9083568839584733e-05, "loss": 0.0737, "step": 34673 }, { "epoch": 0.6184496843006456, "grad_norm": 0.19151778519153595, "learning_rate": 1.90820565588642e-05, "loss": 0.1032, "step": 34674 }, { "epoch": 0.6184675204223593, "grad_norm": 0.25832992792129517, "learning_rate": 1.9080544301083677e-05, "loss": 0.1379, "step": 34675 }, { "epoch": 0.6184853565440731, "grad_norm": 0.3052057921886444, "learning_rate": 1.9079032066249004e-05, "loss": 0.1275, "step": 34676 }, { "epoch": 0.6185031926657868, "grad_norm": 0.2708165943622589, "learning_rate": 1.9077519854366044e-05, "loss": 0.1224, "step": 34677 }, { "epoch": 0.6185210287875005, "grad_norm": 0.26803216338157654, "learning_rate": 1.9076007665440665e-05, "loss": 0.1075, "step": 34678 }, { "epoch": 0.6185388649092142, "grad_norm": 0.2422078251838684, "learning_rate": 1.907449549947872e-05, "loss": 0.107, "step": 34679 }, { "epoch": 0.6185567010309279, "grad_norm": 0.2929508686065674, "learning_rate": 1.9072983356486086e-05, "loss": 0.0941, "step": 34680 }, { "epoch": 0.6185745371526415, "grad_norm": 0.2869585156440735, "learning_rate": 1.9071471236468613e-05, "loss": 0.116, "step": 34681 }, { "epoch": 0.6185923732743552, "grad_norm": 0.3429265022277832, "learning_rate": 1.9069959139432167e-05, "loss": 0.1027, "step": 34682 }, { "epoch": 0.6186102093960689, "grad_norm": 0.2660747766494751, "learning_rate": 1.9068447065382596e-05, "loss": 0.1154, "step": 34683 }, { "epoch": 0.6186280455177826, "grad_norm": 0.23053212463855743, "learning_rate": 1.9066935014325785e-05, "loss": 0.1236, "step": 34684 }, { "epoch": 0.6186458816394963, "grad_norm": 0.3426898419857025, "learning_rate": 1.906542298626758e-05, "loss": 0.1502, "step": 34685 }, { "epoch": 0.61866371776121, "grad_norm": 0.19473044574260712, "learning_rate": 1.9063910981213845e-05, "loss": 0.0907, "step": 34686 }, { "epoch": 0.6186815538829237, "grad_norm": 0.534480631351471, "learning_rate": 1.9062398999170432e-05, "loss": 0.174, "step": 34687 }, { "epoch": 0.6186993900046374, "grad_norm": 0.2357155680656433, "learning_rate": 1.9060887040143226e-05, "loss": 0.1068, "step": 34688 }, { "epoch": 0.618717226126351, "grad_norm": 0.238226518034935, "learning_rate": 1.905937510413807e-05, "loss": 0.1192, "step": 34689 }, { "epoch": 0.6187350622480647, "grad_norm": 0.4326547086238861, "learning_rate": 1.905786319116083e-05, "loss": 0.1522, "step": 34690 }, { "epoch": 0.6187528983697784, "grad_norm": 0.5117208361625671, "learning_rate": 1.9056351301217356e-05, "loss": 0.1849, "step": 34691 }, { "epoch": 0.6187707344914922, "grad_norm": 0.39278343319892883, "learning_rate": 1.9054839434313514e-05, "loss": 0.1761, "step": 34692 }, { "epoch": 0.6187885706132059, "grad_norm": 0.20553728938102722, "learning_rate": 1.9053327590455178e-05, "loss": 0.1125, "step": 34693 }, { "epoch": 0.6188064067349196, "grad_norm": 0.2404250055551529, "learning_rate": 1.9051815769648197e-05, "loss": 0.1286, "step": 34694 }, { "epoch": 0.6188242428566333, "grad_norm": 0.24899372458457947, "learning_rate": 1.9050303971898432e-05, "loss": 0.1014, "step": 34695 }, { "epoch": 0.618842078978347, "grad_norm": 0.2878899872303009, "learning_rate": 1.9048792197211738e-05, "loss": 0.1042, "step": 34696 }, { "epoch": 0.6188599151000607, "grad_norm": 0.21250182390213013, "learning_rate": 1.9047280445593986e-05, "loss": 0.1317, "step": 34697 }, { "epoch": 0.6188777512217744, "grad_norm": 0.2949186861515045, "learning_rate": 1.904576871705103e-05, "loss": 0.126, "step": 34698 }, { "epoch": 0.618895587343488, "grad_norm": 0.28523415327072144, "learning_rate": 1.9044257011588734e-05, "loss": 0.0839, "step": 34699 }, { "epoch": 0.6189134234652017, "grad_norm": 0.24234086275100708, "learning_rate": 1.9042745329212944e-05, "loss": 0.1051, "step": 34700 }, { "epoch": 0.6189312595869154, "grad_norm": 0.29429903626441956, "learning_rate": 1.9041233669929542e-05, "loss": 0.1268, "step": 34701 }, { "epoch": 0.6189490957086291, "grad_norm": 0.24774394929409027, "learning_rate": 1.9039722033744377e-05, "loss": 0.1529, "step": 34702 }, { "epoch": 0.6189669318303428, "grad_norm": 0.303415447473526, "learning_rate": 1.903821042066331e-05, "loss": 0.0951, "step": 34703 }, { "epoch": 0.6189847679520565, "grad_norm": 0.36782971024513245, "learning_rate": 1.903669883069219e-05, "loss": 0.1009, "step": 34704 }, { "epoch": 0.6190026040737702, "grad_norm": 0.539315402507782, "learning_rate": 1.9035187263836883e-05, "loss": 0.2632, "step": 34705 }, { "epoch": 0.6190204401954839, "grad_norm": 0.28242090344429016, "learning_rate": 1.9033675720103253e-05, "loss": 0.149, "step": 34706 }, { "epoch": 0.6190382763171975, "grad_norm": 0.2896815836429596, "learning_rate": 1.903216419949716e-05, "loss": 0.1433, "step": 34707 }, { "epoch": 0.6190561124389112, "grad_norm": 0.22880099713802338, "learning_rate": 1.9030652702024463e-05, "loss": 0.0924, "step": 34708 }, { "epoch": 0.619073948560625, "grad_norm": 0.23233024775981903, "learning_rate": 1.9029141227691004e-05, "loss": 0.108, "step": 34709 }, { "epoch": 0.6190917846823387, "grad_norm": 0.25911736488342285, "learning_rate": 1.902762977650267e-05, "loss": 0.1474, "step": 34710 }, { "epoch": 0.6191096208040524, "grad_norm": 0.33848893642425537, "learning_rate": 1.9026118348465306e-05, "loss": 0.1734, "step": 34711 }, { "epoch": 0.6191274569257661, "grad_norm": 0.23676836490631104, "learning_rate": 1.902460694358476e-05, "loss": 0.0838, "step": 34712 }, { "epoch": 0.6191452930474798, "grad_norm": 0.34058821201324463, "learning_rate": 1.9023095561866906e-05, "loss": 0.1424, "step": 34713 }, { "epoch": 0.6191631291691935, "grad_norm": 0.32348740100860596, "learning_rate": 1.9021584203317598e-05, "loss": 0.1966, "step": 34714 }, { "epoch": 0.6191809652909072, "grad_norm": 0.27467042207717896, "learning_rate": 1.90200728679427e-05, "loss": 0.1908, "step": 34715 }, { "epoch": 0.6191988014126208, "grad_norm": 0.2522459030151367, "learning_rate": 1.901856155574806e-05, "loss": 0.1498, "step": 34716 }, { "epoch": 0.6192166375343345, "grad_norm": 0.23489893972873688, "learning_rate": 1.9017050266739543e-05, "loss": 0.1081, "step": 34717 }, { "epoch": 0.6192344736560482, "grad_norm": 0.2017030119895935, "learning_rate": 1.9015539000923e-05, "loss": 0.1348, "step": 34718 }, { "epoch": 0.6192523097777619, "grad_norm": 0.2718386650085449, "learning_rate": 1.90140277583043e-05, "loss": 0.1295, "step": 34719 }, { "epoch": 0.6192701458994756, "grad_norm": 0.39019495248794556, "learning_rate": 1.9012516538889295e-05, "loss": 0.2074, "step": 34720 }, { "epoch": 0.6192879820211893, "grad_norm": 0.21726562082767487, "learning_rate": 1.9011005342683847e-05, "loss": 0.1016, "step": 34721 }, { "epoch": 0.619305818142903, "grad_norm": 0.25415462255477905, "learning_rate": 1.9009494169693796e-05, "loss": 0.1318, "step": 34722 }, { "epoch": 0.6193236542646167, "grad_norm": 0.2440441995859146, "learning_rate": 1.9007983019925032e-05, "loss": 0.0837, "step": 34723 }, { "epoch": 0.6193414903863304, "grad_norm": 0.30083510279655457, "learning_rate": 1.9006471893383392e-05, "loss": 0.1415, "step": 34724 }, { "epoch": 0.619359326508044, "grad_norm": 0.266088604927063, "learning_rate": 1.9004960790074734e-05, "loss": 0.1192, "step": 34725 }, { "epoch": 0.6193771626297578, "grad_norm": 0.25652962923049927, "learning_rate": 1.9003449710004916e-05, "loss": 0.1444, "step": 34726 }, { "epoch": 0.6193949987514715, "grad_norm": 0.26292935013771057, "learning_rate": 1.90019386531798e-05, "loss": 0.1206, "step": 34727 }, { "epoch": 0.6194128348731852, "grad_norm": 0.31460875272750854, "learning_rate": 1.9000427619605247e-05, "loss": 0.1319, "step": 34728 }, { "epoch": 0.6194306709948989, "grad_norm": 0.26876816153526306, "learning_rate": 1.899891660928711e-05, "loss": 0.149, "step": 34729 }, { "epoch": 0.6194485071166126, "grad_norm": 0.3208983540534973, "learning_rate": 1.899740562223124e-05, "loss": 0.1117, "step": 34730 }, { "epoch": 0.6194663432383263, "grad_norm": 0.2541704475879669, "learning_rate": 1.899589465844349e-05, "loss": 0.1593, "step": 34731 }, { "epoch": 0.61948417936004, "grad_norm": 0.3421974778175354, "learning_rate": 1.8994383717929736e-05, "loss": 0.1915, "step": 34732 }, { "epoch": 0.6195020154817537, "grad_norm": 0.3019033670425415, "learning_rate": 1.8992872800695823e-05, "loss": 0.15, "step": 34733 }, { "epoch": 0.6195198516034673, "grad_norm": 0.26344621181488037, "learning_rate": 1.8991361906747614e-05, "loss": 0.1006, "step": 34734 }, { "epoch": 0.619537687725181, "grad_norm": 0.35831519961357117, "learning_rate": 1.898985103609095e-05, "loss": 0.1487, "step": 34735 }, { "epoch": 0.6195555238468947, "grad_norm": 0.28683534264564514, "learning_rate": 1.8988340188731712e-05, "loss": 0.1165, "step": 34736 }, { "epoch": 0.6195733599686084, "grad_norm": 0.26843005418777466, "learning_rate": 1.898682936467574e-05, "loss": 0.1318, "step": 34737 }, { "epoch": 0.6195911960903221, "grad_norm": 0.31570354104042053, "learning_rate": 1.8985318563928897e-05, "loss": 0.1489, "step": 34738 }, { "epoch": 0.6196090322120358, "grad_norm": 0.3438796103000641, "learning_rate": 1.8983807786497026e-05, "loss": 0.1762, "step": 34739 }, { "epoch": 0.6196268683337495, "grad_norm": 0.32528194785118103, "learning_rate": 1.8982297032386005e-05, "loss": 0.1045, "step": 34740 }, { "epoch": 0.6196447044554632, "grad_norm": 0.3089931905269623, "learning_rate": 1.898078630160167e-05, "loss": 0.1323, "step": 34741 }, { "epoch": 0.6196625405771768, "grad_norm": 0.4437181055545807, "learning_rate": 1.8979275594149897e-05, "loss": 0.1738, "step": 34742 }, { "epoch": 0.6196803766988906, "grad_norm": 0.2528478801250458, "learning_rate": 1.8977764910036526e-05, "loss": 0.1181, "step": 34743 }, { "epoch": 0.6196982128206043, "grad_norm": 0.23057478666305542, "learning_rate": 1.897625424926741e-05, "loss": 0.1161, "step": 34744 }, { "epoch": 0.619716048942318, "grad_norm": 0.19571517407894135, "learning_rate": 1.897474361184843e-05, "loss": 0.1311, "step": 34745 }, { "epoch": 0.6197338850640317, "grad_norm": 0.33104103803634644, "learning_rate": 1.8973232997785416e-05, "loss": 0.1542, "step": 34746 }, { "epoch": 0.6197517211857454, "grad_norm": 0.20647558569908142, "learning_rate": 1.897172240708423e-05, "loss": 0.1247, "step": 34747 }, { "epoch": 0.6197695573074591, "grad_norm": 0.23894667625427246, "learning_rate": 1.8970211839750724e-05, "loss": 0.1833, "step": 34748 }, { "epoch": 0.6197873934291728, "grad_norm": 0.26335445046424866, "learning_rate": 1.896870129579077e-05, "loss": 0.1437, "step": 34749 }, { "epoch": 0.6198052295508865, "grad_norm": 0.2467309683561325, "learning_rate": 1.8967190775210214e-05, "loss": 0.0854, "step": 34750 }, { "epoch": 0.6198230656726001, "grad_norm": 0.2242477834224701, "learning_rate": 1.896568027801491e-05, "loss": 0.1147, "step": 34751 }, { "epoch": 0.6198409017943138, "grad_norm": 0.22373074293136597, "learning_rate": 1.89641698042107e-05, "loss": 0.1186, "step": 34752 }, { "epoch": 0.6198587379160275, "grad_norm": 0.2690976560115814, "learning_rate": 1.896265935380347e-05, "loss": 0.1225, "step": 34753 }, { "epoch": 0.6198765740377412, "grad_norm": 0.35138142108917236, "learning_rate": 1.8961148926799048e-05, "loss": 0.1265, "step": 34754 }, { "epoch": 0.6198944101594549, "grad_norm": 0.2890932261943817, "learning_rate": 1.8959638523203305e-05, "loss": 0.138, "step": 34755 }, { "epoch": 0.6199122462811686, "grad_norm": 0.22307175397872925, "learning_rate": 1.8958128143022086e-05, "loss": 0.1057, "step": 34756 }, { "epoch": 0.6199300824028823, "grad_norm": 0.35873791575431824, "learning_rate": 1.895661778626124e-05, "loss": 0.1605, "step": 34757 }, { "epoch": 0.619947918524596, "grad_norm": 0.2750474512577057, "learning_rate": 1.8955107452926643e-05, "loss": 0.1413, "step": 34758 }, { "epoch": 0.6199657546463097, "grad_norm": 0.22222664952278137, "learning_rate": 1.895359714302414e-05, "loss": 0.0822, "step": 34759 }, { "epoch": 0.6199835907680235, "grad_norm": 0.35030174255371094, "learning_rate": 1.8952086856559574e-05, "loss": 0.119, "step": 34760 }, { "epoch": 0.6200014268897371, "grad_norm": 0.24885420501232147, "learning_rate": 1.8950576593538804e-05, "loss": 0.0902, "step": 34761 }, { "epoch": 0.6200192630114508, "grad_norm": 0.2903689444065094, "learning_rate": 1.8949066353967694e-05, "loss": 0.1727, "step": 34762 }, { "epoch": 0.6200370991331645, "grad_norm": 0.17736518383026123, "learning_rate": 1.89475561378521e-05, "loss": 0.0853, "step": 34763 }, { "epoch": 0.6200549352548782, "grad_norm": 0.22412770986557007, "learning_rate": 1.8946045945197867e-05, "loss": 0.1222, "step": 34764 }, { "epoch": 0.6200727713765919, "grad_norm": 0.2723255157470703, "learning_rate": 1.8944535776010834e-05, "loss": 0.1546, "step": 34765 }, { "epoch": 0.6200906074983056, "grad_norm": 0.2728655934333801, "learning_rate": 1.894302563029689e-05, "loss": 0.1712, "step": 34766 }, { "epoch": 0.6201084436200193, "grad_norm": 0.267630398273468, "learning_rate": 1.894151550806187e-05, "loss": 0.1003, "step": 34767 }, { "epoch": 0.620126279741733, "grad_norm": 0.30812835693359375, "learning_rate": 1.8940005409311618e-05, "loss": 0.1416, "step": 34768 }, { "epoch": 0.6201441158634466, "grad_norm": 0.24239419400691986, "learning_rate": 1.8938495334052008e-05, "loss": 0.1337, "step": 34769 }, { "epoch": 0.6201619519851603, "grad_norm": 0.21828460693359375, "learning_rate": 1.8936985282288872e-05, "loss": 0.0862, "step": 34770 }, { "epoch": 0.620179788106874, "grad_norm": 0.2725733816623688, "learning_rate": 1.8935475254028085e-05, "loss": 0.1338, "step": 34771 }, { "epoch": 0.6201976242285877, "grad_norm": 0.25624027848243713, "learning_rate": 1.8933965249275494e-05, "loss": 0.181, "step": 34772 }, { "epoch": 0.6202154603503014, "grad_norm": 0.19563564658164978, "learning_rate": 1.893245526803694e-05, "loss": 0.1149, "step": 34773 }, { "epoch": 0.6202332964720151, "grad_norm": 0.22358255088329315, "learning_rate": 1.893094531031828e-05, "loss": 0.117, "step": 34774 }, { "epoch": 0.6202511325937288, "grad_norm": 0.212210550904274, "learning_rate": 1.892943537612538e-05, "loss": 0.149, "step": 34775 }, { "epoch": 0.6202689687154425, "grad_norm": 0.2765887975692749, "learning_rate": 1.892792546546409e-05, "loss": 0.1296, "step": 34776 }, { "epoch": 0.6202868048371563, "grad_norm": 0.3130955100059509, "learning_rate": 1.8926415578340256e-05, "loss": 0.1666, "step": 34777 }, { "epoch": 0.62030464095887, "grad_norm": 0.3359980881214142, "learning_rate": 1.8924905714759723e-05, "loss": 0.1215, "step": 34778 }, { "epoch": 0.6203224770805836, "grad_norm": 0.2572863698005676, "learning_rate": 1.8923395874728365e-05, "loss": 0.0559, "step": 34779 }, { "epoch": 0.6203403132022973, "grad_norm": 0.32762399315834045, "learning_rate": 1.892188605825202e-05, "loss": 0.1947, "step": 34780 }, { "epoch": 0.620358149324011, "grad_norm": 0.23259948194026947, "learning_rate": 1.8920376265336544e-05, "loss": 0.103, "step": 34781 }, { "epoch": 0.6203759854457247, "grad_norm": 0.20898884534835815, "learning_rate": 1.8918866495987792e-05, "loss": 0.0944, "step": 34782 }, { "epoch": 0.6203938215674384, "grad_norm": 0.18224264681339264, "learning_rate": 1.8917356750211602e-05, "loss": 0.0847, "step": 34783 }, { "epoch": 0.6204116576891521, "grad_norm": 0.3516651391983032, "learning_rate": 1.891584702801385e-05, "loss": 0.143, "step": 34784 }, { "epoch": 0.6204294938108658, "grad_norm": 0.5660027861595154, "learning_rate": 1.8914337329400374e-05, "loss": 0.169, "step": 34785 }, { "epoch": 0.6204473299325794, "grad_norm": 0.23294174671173096, "learning_rate": 1.891282765437703e-05, "loss": 0.1123, "step": 34786 }, { "epoch": 0.6204651660542931, "grad_norm": 0.22643277049064636, "learning_rate": 1.891131800294966e-05, "loss": 0.1207, "step": 34787 }, { "epoch": 0.6204830021760068, "grad_norm": 0.2500450015068054, "learning_rate": 1.890980837512413e-05, "loss": 0.1193, "step": 34788 }, { "epoch": 0.6205008382977205, "grad_norm": 0.24870911240577698, "learning_rate": 1.890829877090629e-05, "loss": 0.1241, "step": 34789 }, { "epoch": 0.6205186744194342, "grad_norm": 0.27819690108299255, "learning_rate": 1.8906789190301984e-05, "loss": 0.1088, "step": 34790 }, { "epoch": 0.6205365105411479, "grad_norm": 0.29976966977119446, "learning_rate": 1.890527963331706e-05, "loss": 0.1404, "step": 34791 }, { "epoch": 0.6205543466628616, "grad_norm": 0.26283878087997437, "learning_rate": 1.890377009995739e-05, "loss": 0.1238, "step": 34792 }, { "epoch": 0.6205721827845754, "grad_norm": 0.23070664703845978, "learning_rate": 1.890226059022881e-05, "loss": 0.1428, "step": 34793 }, { "epoch": 0.6205900189062891, "grad_norm": 0.31828173995018005, "learning_rate": 1.8900751104137178e-05, "loss": 0.0978, "step": 34794 }, { "epoch": 0.6206078550280028, "grad_norm": 0.269256055355072, "learning_rate": 1.889924164168833e-05, "loss": 0.101, "step": 34795 }, { "epoch": 0.6206256911497164, "grad_norm": 0.2324729859828949, "learning_rate": 1.8897732202888133e-05, "loss": 0.1637, "step": 34796 }, { "epoch": 0.6206435272714301, "grad_norm": 0.28690508008003235, "learning_rate": 1.889622278774243e-05, "loss": 0.1419, "step": 34797 }, { "epoch": 0.6206613633931438, "grad_norm": 0.31229984760284424, "learning_rate": 1.8894713396257086e-05, "loss": 0.1487, "step": 34798 }, { "epoch": 0.6206791995148575, "grad_norm": 0.309701532125473, "learning_rate": 1.889320402843794e-05, "loss": 0.1304, "step": 34799 }, { "epoch": 0.6206970356365712, "grad_norm": 0.28423207998275757, "learning_rate": 1.889169468429083e-05, "loss": 0.1582, "step": 34800 }, { "epoch": 0.6207148717582849, "grad_norm": 0.2632710337638855, "learning_rate": 1.8890185363821637e-05, "loss": 0.1378, "step": 34801 }, { "epoch": 0.6207327078799986, "grad_norm": 0.2610422372817993, "learning_rate": 1.888867606703619e-05, "loss": 0.149, "step": 34802 }, { "epoch": 0.6207505440017123, "grad_norm": 0.28421416878700256, "learning_rate": 1.8887166793940344e-05, "loss": 0.1641, "step": 34803 }, { "epoch": 0.620768380123426, "grad_norm": 0.22791031002998352, "learning_rate": 1.8885657544539945e-05, "loss": 0.1239, "step": 34804 }, { "epoch": 0.6207862162451396, "grad_norm": 0.273660272359848, "learning_rate": 1.888414831884086e-05, "loss": 0.1385, "step": 34805 }, { "epoch": 0.6208040523668533, "grad_norm": 0.19438989460468292, "learning_rate": 1.8882639116848928e-05, "loss": 0.1135, "step": 34806 }, { "epoch": 0.620821888488567, "grad_norm": 0.19421444833278656, "learning_rate": 1.888112993857e-05, "loss": 0.1068, "step": 34807 }, { "epoch": 0.6208397246102807, "grad_norm": 0.21850250661373138, "learning_rate": 1.8879620784009918e-05, "loss": 0.0892, "step": 34808 }, { "epoch": 0.6208575607319944, "grad_norm": 0.27259522676467896, "learning_rate": 1.8878111653174544e-05, "loss": 0.1675, "step": 34809 }, { "epoch": 0.6208753968537082, "grad_norm": 0.23691260814666748, "learning_rate": 1.887660254606972e-05, "loss": 0.1078, "step": 34810 }, { "epoch": 0.6208932329754219, "grad_norm": 0.2843174636363983, "learning_rate": 1.8875093462701308e-05, "loss": 0.1262, "step": 34811 }, { "epoch": 0.6209110690971356, "grad_norm": 0.3329859972000122, "learning_rate": 1.8873584403075144e-05, "loss": 0.1885, "step": 34812 }, { "epoch": 0.6209289052188492, "grad_norm": 0.25744545459747314, "learning_rate": 1.8872075367197077e-05, "loss": 0.1447, "step": 34813 }, { "epoch": 0.6209467413405629, "grad_norm": 0.24773626029491425, "learning_rate": 1.887056635507297e-05, "loss": 0.1418, "step": 34814 }, { "epoch": 0.6209645774622766, "grad_norm": 0.25728708505630493, "learning_rate": 1.886905736670867e-05, "loss": 0.1077, "step": 34815 }, { "epoch": 0.6209824135839903, "grad_norm": 0.33350130915641785, "learning_rate": 1.8867548402110013e-05, "loss": 0.1396, "step": 34816 }, { "epoch": 0.621000249705704, "grad_norm": 0.26106029748916626, "learning_rate": 1.8866039461282856e-05, "loss": 0.155, "step": 34817 }, { "epoch": 0.6210180858274177, "grad_norm": 0.4445750415325165, "learning_rate": 1.886453054423305e-05, "loss": 0.1108, "step": 34818 }, { "epoch": 0.6210359219491314, "grad_norm": 0.25004708766937256, "learning_rate": 1.8863021650966446e-05, "loss": 0.1409, "step": 34819 }, { "epoch": 0.6210537580708451, "grad_norm": 0.1734635978937149, "learning_rate": 1.886151278148889e-05, "loss": 0.0696, "step": 34820 }, { "epoch": 0.6210715941925588, "grad_norm": 0.28979402780532837, "learning_rate": 1.8860003935806232e-05, "loss": 0.1816, "step": 34821 }, { "epoch": 0.6210894303142724, "grad_norm": 0.29880282282829285, "learning_rate": 1.8858495113924304e-05, "loss": 0.0975, "step": 34822 }, { "epoch": 0.6211072664359861, "grad_norm": 0.29395395517349243, "learning_rate": 1.8856986315848985e-05, "loss": 0.1123, "step": 34823 }, { "epoch": 0.6211251025576998, "grad_norm": 0.3052173852920532, "learning_rate": 1.8855477541586103e-05, "loss": 0.1338, "step": 34824 }, { "epoch": 0.6211429386794135, "grad_norm": 0.23753082752227783, "learning_rate": 1.8853968791141517e-05, "loss": 0.1175, "step": 34825 }, { "epoch": 0.6211607748011272, "grad_norm": 0.24740256369113922, "learning_rate": 1.885246006452106e-05, "loss": 0.1373, "step": 34826 }, { "epoch": 0.621178610922841, "grad_norm": 0.2468625158071518, "learning_rate": 1.8850951361730603e-05, "loss": 0.1125, "step": 34827 }, { "epoch": 0.6211964470445547, "grad_norm": 0.2553881108760834, "learning_rate": 1.8849442682775984e-05, "loss": 0.0977, "step": 34828 }, { "epoch": 0.6212142831662684, "grad_norm": 0.1776779592037201, "learning_rate": 1.884793402766304e-05, "loss": 0.0435, "step": 34829 }, { "epoch": 0.621232119287982, "grad_norm": 0.2639588713645935, "learning_rate": 1.884642539639763e-05, "loss": 0.1508, "step": 34830 }, { "epoch": 0.6212499554096957, "grad_norm": 0.340195894241333, "learning_rate": 1.8844916788985603e-05, "loss": 0.1287, "step": 34831 }, { "epoch": 0.6212677915314094, "grad_norm": 0.2531953752040863, "learning_rate": 1.8843408205432807e-05, "loss": 0.1626, "step": 34832 }, { "epoch": 0.6212856276531231, "grad_norm": 0.25456300377845764, "learning_rate": 1.884189964574509e-05, "loss": 0.0749, "step": 34833 }, { "epoch": 0.6213034637748368, "grad_norm": 0.26447778940200806, "learning_rate": 1.8840391109928294e-05, "loss": 0.0955, "step": 34834 }, { "epoch": 0.6213212998965505, "grad_norm": 0.2910635471343994, "learning_rate": 1.883888259798826e-05, "loss": 0.1499, "step": 34835 }, { "epoch": 0.6213391360182642, "grad_norm": 0.24692995846271515, "learning_rate": 1.8837374109930856e-05, "loss": 0.1165, "step": 34836 }, { "epoch": 0.6213569721399779, "grad_norm": 0.28305307030677795, "learning_rate": 1.8835865645761915e-05, "loss": 0.1413, "step": 34837 }, { "epoch": 0.6213748082616916, "grad_norm": 0.3302740752696991, "learning_rate": 1.883435720548729e-05, "loss": 0.1073, "step": 34838 }, { "epoch": 0.6213926443834052, "grad_norm": 0.23480768501758575, "learning_rate": 1.8832848789112816e-05, "loss": 0.091, "step": 34839 }, { "epoch": 0.6214104805051189, "grad_norm": 0.2914566397666931, "learning_rate": 1.883134039664436e-05, "loss": 0.1474, "step": 34840 }, { "epoch": 0.6214283166268326, "grad_norm": 0.3203480839729309, "learning_rate": 1.8829832028087756e-05, "loss": 0.1548, "step": 34841 }, { "epoch": 0.6214461527485463, "grad_norm": 0.2751144766807556, "learning_rate": 1.8828323683448862e-05, "loss": 0.1295, "step": 34842 }, { "epoch": 0.62146398887026, "grad_norm": 0.25555625557899475, "learning_rate": 1.8826815362733503e-05, "loss": 0.1739, "step": 34843 }, { "epoch": 0.6214818249919738, "grad_norm": 0.250133216381073, "learning_rate": 1.882530706594755e-05, "loss": 0.0985, "step": 34844 }, { "epoch": 0.6214996611136875, "grad_norm": 0.30852439999580383, "learning_rate": 1.8823798793096832e-05, "loss": 0.2224, "step": 34845 }, { "epoch": 0.6215174972354012, "grad_norm": 0.23542943596839905, "learning_rate": 1.882229054418721e-05, "loss": 0.0653, "step": 34846 }, { "epoch": 0.6215353333571149, "grad_norm": 0.30950629711151123, "learning_rate": 1.8820782319224524e-05, "loss": 0.1665, "step": 34847 }, { "epoch": 0.6215531694788285, "grad_norm": 0.3568834066390991, "learning_rate": 1.8819274118214612e-05, "loss": 0.0878, "step": 34848 }, { "epoch": 0.6215710056005422, "grad_norm": 0.4003061056137085, "learning_rate": 1.8817765941163335e-05, "loss": 0.1161, "step": 34849 }, { "epoch": 0.6215888417222559, "grad_norm": 0.2607308030128479, "learning_rate": 1.8816257788076534e-05, "loss": 0.1639, "step": 34850 }, { "epoch": 0.6216066778439696, "grad_norm": 0.2799322307109833, "learning_rate": 1.8814749658960047e-05, "loss": 0.1577, "step": 34851 }, { "epoch": 0.6216245139656833, "grad_norm": 0.22770659625530243, "learning_rate": 1.8813241553819723e-05, "loss": 0.1039, "step": 34852 }, { "epoch": 0.621642350087397, "grad_norm": 0.26042523980140686, "learning_rate": 1.8811733472661422e-05, "loss": 0.1352, "step": 34853 }, { "epoch": 0.6216601862091107, "grad_norm": 0.223334401845932, "learning_rate": 1.881022541549098e-05, "loss": 0.1454, "step": 34854 }, { "epoch": 0.6216780223308244, "grad_norm": 0.30797743797302246, "learning_rate": 1.880871738231424e-05, "loss": 0.1655, "step": 34855 }, { "epoch": 0.621695858452538, "grad_norm": 0.16385234892368317, "learning_rate": 1.880720937313704e-05, "loss": 0.0549, "step": 34856 }, { "epoch": 0.6217136945742517, "grad_norm": 0.3494318425655365, "learning_rate": 1.880570138796525e-05, "loss": 0.166, "step": 34857 }, { "epoch": 0.6217315306959654, "grad_norm": 0.18118543922901154, "learning_rate": 1.880419342680469e-05, "loss": 0.1032, "step": 34858 }, { "epoch": 0.6217493668176791, "grad_norm": 0.3267197608947754, "learning_rate": 1.8802685489661227e-05, "loss": 0.1634, "step": 34859 }, { "epoch": 0.6217672029393928, "grad_norm": 0.31090041995048523, "learning_rate": 1.880117757654069e-05, "loss": 0.1309, "step": 34860 }, { "epoch": 0.6217850390611066, "grad_norm": 0.3254074454307556, "learning_rate": 1.8799669687448922e-05, "loss": 0.1998, "step": 34861 }, { "epoch": 0.6218028751828203, "grad_norm": 0.25550737977027893, "learning_rate": 1.8798161822391785e-05, "loss": 0.1348, "step": 34862 }, { "epoch": 0.621820711304534, "grad_norm": 0.25607091188430786, "learning_rate": 1.8796653981375114e-05, "loss": 0.115, "step": 34863 }, { "epoch": 0.6218385474262477, "grad_norm": 0.19000785052776337, "learning_rate": 1.8795146164404753e-05, "loss": 0.0887, "step": 34864 }, { "epoch": 0.6218563835479614, "grad_norm": 0.23918886482715607, "learning_rate": 1.8793638371486546e-05, "loss": 0.1412, "step": 34865 }, { "epoch": 0.621874219669675, "grad_norm": 0.30615827441215515, "learning_rate": 1.879213060262634e-05, "loss": 0.118, "step": 34866 }, { "epoch": 0.6218920557913887, "grad_norm": 0.22328411042690277, "learning_rate": 1.8790622857829988e-05, "loss": 0.0934, "step": 34867 }, { "epoch": 0.6219098919131024, "grad_norm": 0.24728624522686005, "learning_rate": 1.878911513710332e-05, "loss": 0.1477, "step": 34868 }, { "epoch": 0.6219277280348161, "grad_norm": 0.27808111906051636, "learning_rate": 1.8787607440452185e-05, "loss": 0.128, "step": 34869 }, { "epoch": 0.6219455641565298, "grad_norm": 0.2123926728963852, "learning_rate": 1.8786099767882435e-05, "loss": 0.122, "step": 34870 }, { "epoch": 0.6219634002782435, "grad_norm": 0.3079756796360016, "learning_rate": 1.8784592119399907e-05, "loss": 0.1453, "step": 34871 }, { "epoch": 0.6219812363999572, "grad_norm": 0.2823425233364105, "learning_rate": 1.8783084495010445e-05, "loss": 0.1041, "step": 34872 }, { "epoch": 0.6219990725216709, "grad_norm": 0.2872985303401947, "learning_rate": 1.8781576894719893e-05, "loss": 0.1634, "step": 34873 }, { "epoch": 0.6220169086433845, "grad_norm": 0.27197402715682983, "learning_rate": 1.8780069318534097e-05, "loss": 0.1363, "step": 34874 }, { "epoch": 0.6220347447650982, "grad_norm": 0.2895960509777069, "learning_rate": 1.8778561766458908e-05, "loss": 0.0938, "step": 34875 }, { "epoch": 0.6220525808868119, "grad_norm": 0.2363155335187912, "learning_rate": 1.877705423850016e-05, "loss": 0.1107, "step": 34876 }, { "epoch": 0.6220704170085256, "grad_norm": 0.31056854128837585, "learning_rate": 1.87755467346637e-05, "loss": 0.0787, "step": 34877 }, { "epoch": 0.6220882531302394, "grad_norm": 0.22825270891189575, "learning_rate": 1.877403925495536e-05, "loss": 0.0931, "step": 34878 }, { "epoch": 0.6221060892519531, "grad_norm": 0.29031768441200256, "learning_rate": 1.8772531799380996e-05, "loss": 0.0996, "step": 34879 }, { "epoch": 0.6221239253736668, "grad_norm": 0.20994466543197632, "learning_rate": 1.8771024367946456e-05, "loss": 0.1589, "step": 34880 }, { "epoch": 0.6221417614953805, "grad_norm": 0.2341243028640747, "learning_rate": 1.8769516960657583e-05, "loss": 0.1213, "step": 34881 }, { "epoch": 0.6221595976170942, "grad_norm": 0.2786446213722229, "learning_rate": 1.8768009577520198e-05, "loss": 0.156, "step": 34882 }, { "epoch": 0.6221774337388079, "grad_norm": 0.2707771062850952, "learning_rate": 1.876650221854017e-05, "loss": 0.1383, "step": 34883 }, { "epoch": 0.6221952698605215, "grad_norm": 0.2270188182592392, "learning_rate": 1.8764994883723336e-05, "loss": 0.1229, "step": 34884 }, { "epoch": 0.6222131059822352, "grad_norm": 0.35651758313179016, "learning_rate": 1.876348757307553e-05, "loss": 0.1828, "step": 34885 }, { "epoch": 0.6222309421039489, "grad_norm": 0.3626936078071594, "learning_rate": 1.87619802866026e-05, "loss": 0.1239, "step": 34886 }, { "epoch": 0.6222487782256626, "grad_norm": 0.20958806574344635, "learning_rate": 1.8760473024310388e-05, "loss": 0.1031, "step": 34887 }, { "epoch": 0.6222666143473763, "grad_norm": 0.23583637177944183, "learning_rate": 1.8758965786204742e-05, "loss": 0.1431, "step": 34888 }, { "epoch": 0.62228445046909, "grad_norm": 0.2802148163318634, "learning_rate": 1.8757458572291502e-05, "loss": 0.1099, "step": 34889 }, { "epoch": 0.6223022865908037, "grad_norm": 0.2654156982898712, "learning_rate": 1.875595138257651e-05, "loss": 0.1322, "step": 34890 }, { "epoch": 0.6223201227125174, "grad_norm": 0.24719032645225525, "learning_rate": 1.875444421706559e-05, "loss": 0.1392, "step": 34891 }, { "epoch": 0.622337958834231, "grad_norm": 0.39449170231819153, "learning_rate": 1.8752937075764616e-05, "loss": 0.1808, "step": 34892 }, { "epoch": 0.6223557949559447, "grad_norm": 0.19806192815303802, "learning_rate": 1.8751429958679412e-05, "loss": 0.1355, "step": 34893 }, { "epoch": 0.6223736310776585, "grad_norm": 0.3297383189201355, "learning_rate": 1.8749922865815827e-05, "loss": 0.0947, "step": 34894 }, { "epoch": 0.6223914671993722, "grad_norm": 0.29932481050491333, "learning_rate": 1.874841579717969e-05, "loss": 0.1673, "step": 34895 }, { "epoch": 0.6224093033210859, "grad_norm": 0.2314309924840927, "learning_rate": 1.8746908752776866e-05, "loss": 0.1324, "step": 34896 }, { "epoch": 0.6224271394427996, "grad_norm": 0.3002726435661316, "learning_rate": 1.8745401732613177e-05, "loss": 0.1588, "step": 34897 }, { "epoch": 0.6224449755645133, "grad_norm": 0.2145930677652359, "learning_rate": 1.8743894736694477e-05, "loss": 0.1071, "step": 34898 }, { "epoch": 0.622462811686227, "grad_norm": 0.259424090385437, "learning_rate": 1.8742387765026588e-05, "loss": 0.1705, "step": 34899 }, { "epoch": 0.6224806478079407, "grad_norm": 0.2604461908340454, "learning_rate": 1.8740880817615375e-05, "loss": 0.1346, "step": 34900 }, { "epoch": 0.6224984839296543, "grad_norm": 0.34902670979499817, "learning_rate": 1.8739373894466665e-05, "loss": 0.1103, "step": 34901 }, { "epoch": 0.622516320051368, "grad_norm": 0.3610472083091736, "learning_rate": 1.8737866995586313e-05, "loss": 0.1597, "step": 34902 }, { "epoch": 0.6225341561730817, "grad_norm": 0.2789466083049774, "learning_rate": 1.8736360120980147e-05, "loss": 0.1109, "step": 34903 }, { "epoch": 0.6225519922947954, "grad_norm": 0.26244744658470154, "learning_rate": 1.8734853270654004e-05, "loss": 0.1084, "step": 34904 }, { "epoch": 0.6225698284165091, "grad_norm": 0.22947083413600922, "learning_rate": 1.8733346444613745e-05, "loss": 0.1513, "step": 34905 }, { "epoch": 0.6225876645382228, "grad_norm": 0.24667462706565857, "learning_rate": 1.8731839642865192e-05, "loss": 0.12, "step": 34906 }, { "epoch": 0.6226055006599365, "grad_norm": 0.26830005645751953, "learning_rate": 1.8730332865414203e-05, "loss": 0.1923, "step": 34907 }, { "epoch": 0.6226233367816502, "grad_norm": 0.3659855127334595, "learning_rate": 1.8728826112266594e-05, "loss": 0.113, "step": 34908 }, { "epoch": 0.6226411729033638, "grad_norm": 0.2905159294605255, "learning_rate": 1.8727319383428232e-05, "loss": 0.1378, "step": 34909 }, { "epoch": 0.6226590090250775, "grad_norm": 0.2375769168138504, "learning_rate": 1.8725812678904946e-05, "loss": 0.1017, "step": 34910 }, { "epoch": 0.6226768451467913, "grad_norm": 0.23519118130207062, "learning_rate": 1.8724305998702582e-05, "loss": 0.1031, "step": 34911 }, { "epoch": 0.622694681268505, "grad_norm": 0.2780896723270416, "learning_rate": 1.8722799342826963e-05, "loss": 0.1167, "step": 34912 }, { "epoch": 0.6227125173902187, "grad_norm": 0.2808263599872589, "learning_rate": 1.872129271128395e-05, "loss": 0.1177, "step": 34913 }, { "epoch": 0.6227303535119324, "grad_norm": 0.23439735174179077, "learning_rate": 1.871978610407937e-05, "loss": 0.095, "step": 34914 }, { "epoch": 0.6227481896336461, "grad_norm": 0.3182724118232727, "learning_rate": 1.8718279521219077e-05, "loss": 0.1321, "step": 34915 }, { "epoch": 0.6227660257553598, "grad_norm": 0.272592693567276, "learning_rate": 1.87167729627089e-05, "loss": 0.1439, "step": 34916 }, { "epoch": 0.6227838618770735, "grad_norm": 0.29960349202156067, "learning_rate": 1.8715266428554667e-05, "loss": 0.0881, "step": 34917 }, { "epoch": 0.6228016979987872, "grad_norm": 0.22677046060562134, "learning_rate": 1.8713759918762247e-05, "loss": 0.1142, "step": 34918 }, { "epoch": 0.6228195341205008, "grad_norm": 0.39995241165161133, "learning_rate": 1.8712253433337463e-05, "loss": 0.1468, "step": 34919 }, { "epoch": 0.6228373702422145, "grad_norm": 0.3321211636066437, "learning_rate": 1.871074697228615e-05, "loss": 0.1585, "step": 34920 }, { "epoch": 0.6228552063639282, "grad_norm": 0.3118199110031128, "learning_rate": 1.870924053561416e-05, "loss": 0.1265, "step": 34921 }, { "epoch": 0.6228730424856419, "grad_norm": 0.22003091871738434, "learning_rate": 1.8707734123327323e-05, "loss": 0.1207, "step": 34922 }, { "epoch": 0.6228908786073556, "grad_norm": 0.28756463527679443, "learning_rate": 1.8706227735431485e-05, "loss": 0.1358, "step": 34923 }, { "epoch": 0.6229087147290693, "grad_norm": 0.27470022439956665, "learning_rate": 1.8704721371932484e-05, "loss": 0.1706, "step": 34924 }, { "epoch": 0.622926550850783, "grad_norm": 0.24368305504322052, "learning_rate": 1.8703215032836145e-05, "loss": 0.1467, "step": 34925 }, { "epoch": 0.6229443869724967, "grad_norm": 0.40861910581588745, "learning_rate": 1.8701708718148332e-05, "loss": 0.2057, "step": 34926 }, { "epoch": 0.6229622230942103, "grad_norm": 0.2805299460887909, "learning_rate": 1.8700202427874868e-05, "loss": 0.1248, "step": 34927 }, { "epoch": 0.6229800592159241, "grad_norm": 0.2974446415901184, "learning_rate": 1.8698696162021594e-05, "loss": 0.1369, "step": 34928 }, { "epoch": 0.6229978953376378, "grad_norm": 0.22959685325622559, "learning_rate": 1.8697189920594355e-05, "loss": 0.1481, "step": 34929 }, { "epoch": 0.6230157314593515, "grad_norm": 0.21923013031482697, "learning_rate": 1.8695683703598975e-05, "loss": 0.09, "step": 34930 }, { "epoch": 0.6230335675810652, "grad_norm": 0.30453383922576904, "learning_rate": 1.869417751104131e-05, "loss": 0.1414, "step": 34931 }, { "epoch": 0.6230514037027789, "grad_norm": 0.20010130107402802, "learning_rate": 1.8692671342927193e-05, "loss": 0.1095, "step": 34932 }, { "epoch": 0.6230692398244926, "grad_norm": 0.35191333293914795, "learning_rate": 1.869116519926245e-05, "loss": 0.1498, "step": 34933 }, { "epoch": 0.6230870759462063, "grad_norm": 0.23263229429721832, "learning_rate": 1.8689659080052934e-05, "loss": 0.122, "step": 34934 }, { "epoch": 0.62310491206792, "grad_norm": 0.2575501501560211, "learning_rate": 1.868815298530448e-05, "loss": 0.1763, "step": 34935 }, { "epoch": 0.6231227481896336, "grad_norm": 0.31331855058670044, "learning_rate": 1.8686646915022927e-05, "loss": 0.1784, "step": 34936 }, { "epoch": 0.6231405843113473, "grad_norm": 0.19131147861480713, "learning_rate": 1.8685140869214115e-05, "loss": 0.0875, "step": 34937 }, { "epoch": 0.623158420433061, "grad_norm": 0.2755252420902252, "learning_rate": 1.8683634847883865e-05, "loss": 0.1507, "step": 34938 }, { "epoch": 0.6231762565547747, "grad_norm": 0.22289063036441803, "learning_rate": 1.868212885103804e-05, "loss": 0.1119, "step": 34939 }, { "epoch": 0.6231940926764884, "grad_norm": 0.26629912853240967, "learning_rate": 1.8680622878682464e-05, "loss": 0.1095, "step": 34940 }, { "epoch": 0.6232119287982021, "grad_norm": 0.3054090142250061, "learning_rate": 1.8679116930822974e-05, "loss": 0.1474, "step": 34941 }, { "epoch": 0.6232297649199158, "grad_norm": 0.2053171694278717, "learning_rate": 1.8677611007465412e-05, "loss": 0.1046, "step": 34942 }, { "epoch": 0.6232476010416295, "grad_norm": 0.235662043094635, "learning_rate": 1.8676105108615603e-05, "loss": 0.1348, "step": 34943 }, { "epoch": 0.6232654371633432, "grad_norm": 0.3221290409564972, "learning_rate": 1.867459923427941e-05, "loss": 0.1563, "step": 34944 }, { "epoch": 0.623283273285057, "grad_norm": 0.28004884719848633, "learning_rate": 1.8673093384462647e-05, "loss": 0.1237, "step": 34945 }, { "epoch": 0.6233011094067706, "grad_norm": 0.3150613307952881, "learning_rate": 1.8671587559171165e-05, "loss": 0.1434, "step": 34946 }, { "epoch": 0.6233189455284843, "grad_norm": 0.22993752360343933, "learning_rate": 1.8670081758410784e-05, "loss": 0.111, "step": 34947 }, { "epoch": 0.623336781650198, "grad_norm": 0.2804214358329773, "learning_rate": 1.8668575982187365e-05, "loss": 0.1196, "step": 34948 }, { "epoch": 0.6233546177719117, "grad_norm": 0.21633709967136383, "learning_rate": 1.8667070230506722e-05, "loss": 0.1325, "step": 34949 }, { "epoch": 0.6233724538936254, "grad_norm": 0.28193071484565735, "learning_rate": 1.866556450337471e-05, "loss": 0.1273, "step": 34950 }, { "epoch": 0.6233902900153391, "grad_norm": 0.19965648651123047, "learning_rate": 1.8664058800797147e-05, "loss": 0.1113, "step": 34951 }, { "epoch": 0.6234081261370528, "grad_norm": 0.25982069969177246, "learning_rate": 1.866255312277989e-05, "loss": 0.1717, "step": 34952 }, { "epoch": 0.6234259622587665, "grad_norm": 0.307998389005661, "learning_rate": 1.8661047469328767e-05, "loss": 0.1556, "step": 34953 }, { "epoch": 0.6234437983804801, "grad_norm": 0.2638751268386841, "learning_rate": 1.8659541840449616e-05, "loss": 0.0961, "step": 34954 }, { "epoch": 0.6234616345021938, "grad_norm": 0.2946605682373047, "learning_rate": 1.8658036236148264e-05, "loss": 0.1414, "step": 34955 }, { "epoch": 0.6234794706239075, "grad_norm": 0.27083146572113037, "learning_rate": 1.8656530656430546e-05, "loss": 0.1483, "step": 34956 }, { "epoch": 0.6234973067456212, "grad_norm": 0.26402199268341064, "learning_rate": 1.865502510130232e-05, "loss": 0.1093, "step": 34957 }, { "epoch": 0.6235151428673349, "grad_norm": 0.29530850052833557, "learning_rate": 1.8653519570769406e-05, "loss": 0.1031, "step": 34958 }, { "epoch": 0.6235329789890486, "grad_norm": 0.21409687399864197, "learning_rate": 1.8652014064837643e-05, "loss": 0.0773, "step": 34959 }, { "epoch": 0.6235508151107623, "grad_norm": 0.29825323820114136, "learning_rate": 1.8650508583512855e-05, "loss": 0.1405, "step": 34960 }, { "epoch": 0.623568651232476, "grad_norm": 0.2612580955028534, "learning_rate": 1.86490031268009e-05, "loss": 0.1301, "step": 34961 }, { "epoch": 0.6235864873541898, "grad_norm": 0.280009925365448, "learning_rate": 1.8647497694707593e-05, "loss": 0.1037, "step": 34962 }, { "epoch": 0.6236043234759034, "grad_norm": 0.2956874966621399, "learning_rate": 1.8645992287238788e-05, "loss": 0.1327, "step": 34963 }, { "epoch": 0.6236221595976171, "grad_norm": 0.29175421595573425, "learning_rate": 1.8644486904400306e-05, "loss": 0.1531, "step": 34964 }, { "epoch": 0.6236399957193308, "grad_norm": 0.24473018944263458, "learning_rate": 1.864298154619799e-05, "loss": 0.1146, "step": 34965 }, { "epoch": 0.6236578318410445, "grad_norm": 0.22427351772785187, "learning_rate": 1.8641476212637676e-05, "loss": 0.1708, "step": 34966 }, { "epoch": 0.6236756679627582, "grad_norm": 0.26586952805519104, "learning_rate": 1.8639970903725197e-05, "loss": 0.0959, "step": 34967 }, { "epoch": 0.6236935040844719, "grad_norm": 0.24371206760406494, "learning_rate": 1.8638465619466384e-05, "loss": 0.0958, "step": 34968 }, { "epoch": 0.6237113402061856, "grad_norm": 0.2460588812828064, "learning_rate": 1.8636960359867072e-05, "loss": 0.1325, "step": 34969 }, { "epoch": 0.6237291763278993, "grad_norm": 0.22553405165672302, "learning_rate": 1.8635455124933102e-05, "loss": 0.0797, "step": 34970 }, { "epoch": 0.623747012449613, "grad_norm": 0.36743104457855225, "learning_rate": 1.8633949914670312e-05, "loss": 0.2188, "step": 34971 }, { "epoch": 0.6237648485713266, "grad_norm": 0.23060420155525208, "learning_rate": 1.863244472908453e-05, "loss": 0.102, "step": 34972 }, { "epoch": 0.6237826846930403, "grad_norm": 0.249001607298851, "learning_rate": 1.8630939568181578e-05, "loss": 0.1214, "step": 34973 }, { "epoch": 0.623800520814754, "grad_norm": 0.23359011113643646, "learning_rate": 1.862943443196732e-05, "loss": 0.1445, "step": 34974 }, { "epoch": 0.6238183569364677, "grad_norm": 0.24766728281974792, "learning_rate": 1.8627929320447568e-05, "loss": 0.1357, "step": 34975 }, { "epoch": 0.6238361930581814, "grad_norm": 0.20163387060165405, "learning_rate": 1.862642423362816e-05, "loss": 0.1431, "step": 34976 }, { "epoch": 0.6238540291798951, "grad_norm": 0.250692218542099, "learning_rate": 1.8624919171514936e-05, "loss": 0.091, "step": 34977 }, { "epoch": 0.6238718653016088, "grad_norm": 0.2954672574996948, "learning_rate": 1.8623414134113725e-05, "loss": 0.0856, "step": 34978 }, { "epoch": 0.6238897014233226, "grad_norm": 0.24775518476963043, "learning_rate": 1.8621909121430365e-05, "loss": 0.1393, "step": 34979 }, { "epoch": 0.6239075375450363, "grad_norm": 0.25005194544792175, "learning_rate": 1.862040413347069e-05, "loss": 0.0765, "step": 34980 }, { "epoch": 0.6239253736667499, "grad_norm": 0.29128676652908325, "learning_rate": 1.861889917024054e-05, "loss": 0.1363, "step": 34981 }, { "epoch": 0.6239432097884636, "grad_norm": 0.2110409438610077, "learning_rate": 1.8617394231745723e-05, "loss": 0.0768, "step": 34982 }, { "epoch": 0.6239610459101773, "grad_norm": 0.21204902231693268, "learning_rate": 1.86158893179921e-05, "loss": 0.0861, "step": 34983 }, { "epoch": 0.623978882031891, "grad_norm": 0.30694273114204407, "learning_rate": 1.8614384428985493e-05, "loss": 0.1528, "step": 34984 }, { "epoch": 0.6239967181536047, "grad_norm": 0.28929030895233154, "learning_rate": 1.8612879564731742e-05, "loss": 0.1294, "step": 34985 }, { "epoch": 0.6240145542753184, "grad_norm": 0.39789798855781555, "learning_rate": 1.8611374725236662e-05, "loss": 0.1603, "step": 34986 }, { "epoch": 0.6240323903970321, "grad_norm": 0.35863196849823, "learning_rate": 1.8609869910506118e-05, "loss": 0.1511, "step": 34987 }, { "epoch": 0.6240502265187458, "grad_norm": 0.2548728883266449, "learning_rate": 1.8608365120545918e-05, "loss": 0.1334, "step": 34988 }, { "epoch": 0.6240680626404594, "grad_norm": 0.20638984441757202, "learning_rate": 1.86068603553619e-05, "loss": 0.1348, "step": 34989 }, { "epoch": 0.6240858987621731, "grad_norm": 0.31494128704071045, "learning_rate": 1.86053556149599e-05, "loss": 0.11, "step": 34990 }, { "epoch": 0.6241037348838868, "grad_norm": 0.25856077671051025, "learning_rate": 1.860385089934575e-05, "loss": 0.1582, "step": 34991 }, { "epoch": 0.6241215710056005, "grad_norm": 0.27843621373176575, "learning_rate": 1.8602346208525284e-05, "loss": 0.1154, "step": 34992 }, { "epoch": 0.6241394071273142, "grad_norm": 0.34963467717170715, "learning_rate": 1.8600841542504337e-05, "loss": 0.1559, "step": 34993 }, { "epoch": 0.6241572432490279, "grad_norm": 0.2968690097332001, "learning_rate": 1.859933690128874e-05, "loss": 0.1018, "step": 34994 }, { "epoch": 0.6241750793707417, "grad_norm": 0.28566789627075195, "learning_rate": 1.8597832284884313e-05, "loss": 0.0909, "step": 34995 }, { "epoch": 0.6241929154924554, "grad_norm": 0.3561452031135559, "learning_rate": 1.8596327693296912e-05, "loss": 0.1413, "step": 34996 }, { "epoch": 0.6242107516141691, "grad_norm": 0.25409865379333496, "learning_rate": 1.8594823126532346e-05, "loss": 0.106, "step": 34997 }, { "epoch": 0.6242285877358827, "grad_norm": 0.25925424695014954, "learning_rate": 1.8593318584596468e-05, "loss": 0.1082, "step": 34998 }, { "epoch": 0.6242464238575964, "grad_norm": 0.2867615520954132, "learning_rate": 1.8591814067495084e-05, "loss": 0.137, "step": 34999 }, { "epoch": 0.6242642599793101, "grad_norm": 0.19471020996570587, "learning_rate": 1.8590309575234056e-05, "loss": 0.1151, "step": 35000 }, { "epoch": 0.6242642599793101, "eval_loss": 0.12341565638780594, "eval_runtime": 106.8107, "eval_samples_per_second": 9.587, "eval_steps_per_second": 1.601, "step": 35000 }, { "epoch": 0.6242820961010238, "grad_norm": 0.26165613532066345, "learning_rate": 1.85888051078192e-05, "loss": 0.1512, "step": 35001 }, { "epoch": 0.6242999322227375, "grad_norm": 0.3111760914325714, "learning_rate": 1.858730066525635e-05, "loss": 0.1654, "step": 35002 }, { "epoch": 0.6243177683444512, "grad_norm": 0.28350284695625305, "learning_rate": 1.858579624755133e-05, "loss": 0.1646, "step": 35003 }, { "epoch": 0.6243356044661649, "grad_norm": 0.292869508266449, "learning_rate": 1.858429185470999e-05, "loss": 0.1053, "step": 35004 }, { "epoch": 0.6243534405878786, "grad_norm": 0.2753280997276306, "learning_rate": 1.8582787486738144e-05, "loss": 0.1348, "step": 35005 }, { "epoch": 0.6243712767095922, "grad_norm": 0.2785457968711853, "learning_rate": 1.8581283143641634e-05, "loss": 0.1168, "step": 35006 }, { "epoch": 0.6243891128313059, "grad_norm": 0.23466309905052185, "learning_rate": 1.8579778825426286e-05, "loss": 0.1285, "step": 35007 }, { "epoch": 0.6244069489530196, "grad_norm": 0.26247933506965637, "learning_rate": 1.857827453209793e-05, "loss": 0.1088, "step": 35008 }, { "epoch": 0.6244247850747333, "grad_norm": 0.24538198113441467, "learning_rate": 1.8576770263662403e-05, "loss": 0.0781, "step": 35009 }, { "epoch": 0.624442621196447, "grad_norm": 0.2285107523202896, "learning_rate": 1.857526602012553e-05, "loss": 0.0909, "step": 35010 }, { "epoch": 0.6244604573181607, "grad_norm": 0.3254603445529938, "learning_rate": 1.8573761801493147e-05, "loss": 0.132, "step": 35011 }, { "epoch": 0.6244782934398745, "grad_norm": 0.26341378688812256, "learning_rate": 1.8572257607771077e-05, "loss": 0.0584, "step": 35012 }, { "epoch": 0.6244961295615882, "grad_norm": 0.35393351316452026, "learning_rate": 1.8570753438965162e-05, "loss": 0.2023, "step": 35013 }, { "epoch": 0.6245139656833019, "grad_norm": 0.22882148623466492, "learning_rate": 1.8569249295081233e-05, "loss": 0.0997, "step": 35014 }, { "epoch": 0.6245318018050156, "grad_norm": 0.2795863747596741, "learning_rate": 1.856774517612511e-05, "loss": 0.1317, "step": 35015 }, { "epoch": 0.6245496379267292, "grad_norm": 0.2590351998806, "learning_rate": 1.856624108210262e-05, "loss": 0.0999, "step": 35016 }, { "epoch": 0.6245674740484429, "grad_norm": 0.2701863944530487, "learning_rate": 1.856473701301961e-05, "loss": 0.1984, "step": 35017 }, { "epoch": 0.6245853101701566, "grad_norm": 0.279201477766037, "learning_rate": 1.8563232968881902e-05, "loss": 0.1296, "step": 35018 }, { "epoch": 0.6246031462918703, "grad_norm": 0.3249446153640747, "learning_rate": 1.8561728949695328e-05, "loss": 0.1588, "step": 35019 }, { "epoch": 0.624620982413584, "grad_norm": 0.24219025671482086, "learning_rate": 1.856022495546572e-05, "loss": 0.0987, "step": 35020 }, { "epoch": 0.6246388185352977, "grad_norm": 0.2391340583562851, "learning_rate": 1.855872098619889e-05, "loss": 0.1008, "step": 35021 }, { "epoch": 0.6246566546570114, "grad_norm": 0.352358877658844, "learning_rate": 1.8557217041900693e-05, "loss": 0.161, "step": 35022 }, { "epoch": 0.6246744907787251, "grad_norm": 0.27890506386756897, "learning_rate": 1.8555713122576945e-05, "loss": 0.1332, "step": 35023 }, { "epoch": 0.6246923269004387, "grad_norm": 0.26933708786964417, "learning_rate": 1.855420922823348e-05, "loss": 0.0986, "step": 35024 }, { "epoch": 0.6247101630221524, "grad_norm": 0.2250606268644333, "learning_rate": 1.855270535887612e-05, "loss": 0.1238, "step": 35025 }, { "epoch": 0.6247279991438661, "grad_norm": 0.27833935618400574, "learning_rate": 1.8551201514510708e-05, "loss": 0.1077, "step": 35026 }, { "epoch": 0.6247458352655798, "grad_norm": 0.23978637158870697, "learning_rate": 1.8549697695143065e-05, "loss": 0.1192, "step": 35027 }, { "epoch": 0.6247636713872935, "grad_norm": 0.283624529838562, "learning_rate": 1.8548193900779025e-05, "loss": 0.1359, "step": 35028 }, { "epoch": 0.6247815075090073, "grad_norm": 0.3338007628917694, "learning_rate": 1.85466901314244e-05, "loss": 0.1951, "step": 35029 }, { "epoch": 0.624799343630721, "grad_norm": 0.22621211409568787, "learning_rate": 1.854518638708505e-05, "loss": 0.1115, "step": 35030 }, { "epoch": 0.6248171797524347, "grad_norm": 0.31063011288642883, "learning_rate": 1.8543682667766783e-05, "loss": 0.1844, "step": 35031 }, { "epoch": 0.6248350158741484, "grad_norm": 0.34097880125045776, "learning_rate": 1.8542178973475423e-05, "loss": 0.2192, "step": 35032 }, { "epoch": 0.624852851995862, "grad_norm": 0.3390691876411438, "learning_rate": 1.8540675304216818e-05, "loss": 0.1911, "step": 35033 }, { "epoch": 0.6248706881175757, "grad_norm": 0.25483274459838867, "learning_rate": 1.8539171659996774e-05, "loss": 0.1235, "step": 35034 }, { "epoch": 0.6248885242392894, "grad_norm": 0.2510347366333008, "learning_rate": 1.8537668040821143e-05, "loss": 0.166, "step": 35035 }, { "epoch": 0.6249063603610031, "grad_norm": 0.21529719233512878, "learning_rate": 1.8536164446695742e-05, "loss": 0.0877, "step": 35036 }, { "epoch": 0.6249241964827168, "grad_norm": 0.3016282320022583, "learning_rate": 1.8534660877626396e-05, "loss": 0.1286, "step": 35037 }, { "epoch": 0.6249420326044305, "grad_norm": 0.3185059130191803, "learning_rate": 1.853315733361894e-05, "loss": 0.211, "step": 35038 }, { "epoch": 0.6249598687261442, "grad_norm": 0.25188443064689636, "learning_rate": 1.8531653814679195e-05, "loss": 0.138, "step": 35039 }, { "epoch": 0.6249777048478579, "grad_norm": 0.35887062549591064, "learning_rate": 1.8530150320813e-05, "loss": 0.0978, "step": 35040 }, { "epoch": 0.6249955409695716, "grad_norm": 0.24346831440925598, "learning_rate": 1.852864685202618e-05, "loss": 0.1391, "step": 35041 }, { "epoch": 0.6250133770912852, "grad_norm": 0.22355423867702484, "learning_rate": 1.852714340832455e-05, "loss": 0.1261, "step": 35042 }, { "epoch": 0.6250312132129989, "grad_norm": 0.21421067416667938, "learning_rate": 1.8525639989713954e-05, "loss": 0.1313, "step": 35043 }, { "epoch": 0.6250490493347126, "grad_norm": 0.3358588218688965, "learning_rate": 1.8524136596200216e-05, "loss": 0.1281, "step": 35044 }, { "epoch": 0.6250668854564263, "grad_norm": 0.17502638697624207, "learning_rate": 1.8522633227789153e-05, "loss": 0.1153, "step": 35045 }, { "epoch": 0.6250847215781401, "grad_norm": 0.36354413628578186, "learning_rate": 1.852112988448661e-05, "loss": 0.1676, "step": 35046 }, { "epoch": 0.6251025576998538, "grad_norm": 0.2767677903175354, "learning_rate": 1.8519626566298394e-05, "loss": 0.1234, "step": 35047 }, { "epoch": 0.6251203938215675, "grad_norm": 0.3142389953136444, "learning_rate": 1.8518123273230353e-05, "loss": 0.0912, "step": 35048 }, { "epoch": 0.6251382299432812, "grad_norm": 0.27851995825767517, "learning_rate": 1.8516620005288304e-05, "loss": 0.1101, "step": 35049 }, { "epoch": 0.6251560660649949, "grad_norm": 0.2035122662782669, "learning_rate": 1.851511676247808e-05, "loss": 0.0998, "step": 35050 }, { "epoch": 0.6251739021867085, "grad_norm": 0.19163979589939117, "learning_rate": 1.8513613544805487e-05, "loss": 0.0824, "step": 35051 }, { "epoch": 0.6251917383084222, "grad_norm": 0.236705482006073, "learning_rate": 1.851211035227638e-05, "loss": 0.1225, "step": 35052 }, { "epoch": 0.6252095744301359, "grad_norm": 0.2939296066761017, "learning_rate": 1.8510607184896573e-05, "loss": 0.1062, "step": 35053 }, { "epoch": 0.6252274105518496, "grad_norm": 0.2122703641653061, "learning_rate": 1.8509104042671893e-05, "loss": 0.1207, "step": 35054 }, { "epoch": 0.6252452466735633, "grad_norm": 0.2184041440486908, "learning_rate": 1.850760092560816e-05, "loss": 0.1363, "step": 35055 }, { "epoch": 0.625263082795277, "grad_norm": 0.209747314453125, "learning_rate": 1.850609783371122e-05, "loss": 0.1433, "step": 35056 }, { "epoch": 0.6252809189169907, "grad_norm": 0.38966289162635803, "learning_rate": 1.850459476698689e-05, "loss": 0.1179, "step": 35057 }, { "epoch": 0.6252987550387044, "grad_norm": 0.3571484088897705, "learning_rate": 1.850309172544099e-05, "loss": 0.1584, "step": 35058 }, { "epoch": 0.625316591160418, "grad_norm": 0.28210461139678955, "learning_rate": 1.8501588709079344e-05, "loss": 0.1162, "step": 35059 }, { "epoch": 0.6253344272821317, "grad_norm": 0.27694180607795715, "learning_rate": 1.850008571790778e-05, "loss": 0.0815, "step": 35060 }, { "epoch": 0.6253522634038454, "grad_norm": 0.24310360848903656, "learning_rate": 1.849858275193214e-05, "loss": 0.1257, "step": 35061 }, { "epoch": 0.6253700995255591, "grad_norm": 0.3024975061416626, "learning_rate": 1.849707981115824e-05, "loss": 0.1472, "step": 35062 }, { "epoch": 0.6253879356472729, "grad_norm": 0.2642667591571808, "learning_rate": 1.8495576895591903e-05, "loss": 0.1404, "step": 35063 }, { "epoch": 0.6254057717689866, "grad_norm": 0.3894912600517273, "learning_rate": 1.8494074005238948e-05, "loss": 0.0994, "step": 35064 }, { "epoch": 0.6254236078907003, "grad_norm": 0.2190268486738205, "learning_rate": 1.849257114010522e-05, "loss": 0.1276, "step": 35065 }, { "epoch": 0.625441444012414, "grad_norm": 0.5063741207122803, "learning_rate": 1.8491068300196526e-05, "loss": 0.1299, "step": 35066 }, { "epoch": 0.6254592801341277, "grad_norm": 0.28464534878730774, "learning_rate": 1.8489565485518707e-05, "loss": 0.1105, "step": 35067 }, { "epoch": 0.6254771162558413, "grad_norm": 0.37525391578674316, "learning_rate": 1.8488062696077567e-05, "loss": 0.0932, "step": 35068 }, { "epoch": 0.625494952377555, "grad_norm": 0.23884116113185883, "learning_rate": 1.848655993187896e-05, "loss": 0.1127, "step": 35069 }, { "epoch": 0.6255127884992687, "grad_norm": 0.27507731318473816, "learning_rate": 1.8485057192928694e-05, "loss": 0.1407, "step": 35070 }, { "epoch": 0.6255306246209824, "grad_norm": 0.2676922082901001, "learning_rate": 1.8483554479232594e-05, "loss": 0.1569, "step": 35071 }, { "epoch": 0.6255484607426961, "grad_norm": 0.24304607510566711, "learning_rate": 1.8482051790796488e-05, "loss": 0.1246, "step": 35072 }, { "epoch": 0.6255662968644098, "grad_norm": 0.21580667793750763, "learning_rate": 1.84805491276262e-05, "loss": 0.1153, "step": 35073 }, { "epoch": 0.6255841329861235, "grad_norm": 0.257205605506897, "learning_rate": 1.847904648972755e-05, "loss": 0.1021, "step": 35074 }, { "epoch": 0.6256019691078372, "grad_norm": 0.2752120792865753, "learning_rate": 1.847754387710638e-05, "loss": 0.1256, "step": 35075 }, { "epoch": 0.6256198052295509, "grad_norm": 0.2943366467952728, "learning_rate": 1.8476041289768497e-05, "loss": 0.1537, "step": 35076 }, { "epoch": 0.6256376413512645, "grad_norm": 0.2767156660556793, "learning_rate": 1.847453872771972e-05, "loss": 0.1244, "step": 35077 }, { "epoch": 0.6256554774729782, "grad_norm": 0.29404208064079285, "learning_rate": 1.84730361909659e-05, "loss": 0.1562, "step": 35078 }, { "epoch": 0.6256733135946919, "grad_norm": 0.21938000619411469, "learning_rate": 1.8471533679512844e-05, "loss": 0.1348, "step": 35079 }, { "epoch": 0.6256911497164057, "grad_norm": 0.25450754165649414, "learning_rate": 1.8470031193366372e-05, "loss": 0.0866, "step": 35080 }, { "epoch": 0.6257089858381194, "grad_norm": 0.2514766454696655, "learning_rate": 1.846852873253232e-05, "loss": 0.1102, "step": 35081 }, { "epoch": 0.6257268219598331, "grad_norm": 0.24066147208213806, "learning_rate": 1.84670262970165e-05, "loss": 0.1372, "step": 35082 }, { "epoch": 0.6257446580815468, "grad_norm": 0.26582470536231995, "learning_rate": 1.8465523886824747e-05, "loss": 0.1094, "step": 35083 }, { "epoch": 0.6257624942032605, "grad_norm": 0.2871421277523041, "learning_rate": 1.8464021501962887e-05, "loss": 0.1321, "step": 35084 }, { "epoch": 0.6257803303249742, "grad_norm": 0.28860166668891907, "learning_rate": 1.846251914243673e-05, "loss": 0.1721, "step": 35085 }, { "epoch": 0.6257981664466878, "grad_norm": 0.23838277161121368, "learning_rate": 1.84610168082521e-05, "loss": 0.1825, "step": 35086 }, { "epoch": 0.6258160025684015, "grad_norm": 0.32682380080223083, "learning_rate": 1.845951449941483e-05, "loss": 0.1362, "step": 35087 }, { "epoch": 0.6258338386901152, "grad_norm": 0.2525024712085724, "learning_rate": 1.845801221593075e-05, "loss": 0.1102, "step": 35088 }, { "epoch": 0.6258516748118289, "grad_norm": 0.4192144274711609, "learning_rate": 1.8456509957805673e-05, "loss": 0.1279, "step": 35089 }, { "epoch": 0.6258695109335426, "grad_norm": 0.23294328153133392, "learning_rate": 1.8455007725045415e-05, "loss": 0.1077, "step": 35090 }, { "epoch": 0.6258873470552563, "grad_norm": 0.3153073787689209, "learning_rate": 1.8453505517655813e-05, "loss": 0.178, "step": 35091 }, { "epoch": 0.62590518317697, "grad_norm": 0.2681513726711273, "learning_rate": 1.8452003335642688e-05, "loss": 0.1217, "step": 35092 }, { "epoch": 0.6259230192986837, "grad_norm": 0.3369585871696472, "learning_rate": 1.8450501179011853e-05, "loss": 0.1649, "step": 35093 }, { "epoch": 0.6259408554203973, "grad_norm": 0.2894989550113678, "learning_rate": 1.8448999047769138e-05, "loss": 0.1607, "step": 35094 }, { "epoch": 0.625958691542111, "grad_norm": 0.28532910346984863, "learning_rate": 1.8447496941920368e-05, "loss": 0.1092, "step": 35095 }, { "epoch": 0.6259765276638248, "grad_norm": 0.32344838976860046, "learning_rate": 1.8445994861471362e-05, "loss": 0.1416, "step": 35096 }, { "epoch": 0.6259943637855385, "grad_norm": 0.30115872621536255, "learning_rate": 1.844449280642795e-05, "loss": 0.1702, "step": 35097 }, { "epoch": 0.6260121999072522, "grad_norm": 0.21956495940685272, "learning_rate": 1.8442990776795944e-05, "loss": 0.1341, "step": 35098 }, { "epoch": 0.6260300360289659, "grad_norm": 0.32527607679367065, "learning_rate": 1.844148877258116e-05, "loss": 0.147, "step": 35099 }, { "epoch": 0.6260478721506796, "grad_norm": 0.2595917880535126, "learning_rate": 1.8439986793789443e-05, "loss": 0.179, "step": 35100 }, { "epoch": 0.6260657082723933, "grad_norm": 0.34333574771881104, "learning_rate": 1.84384848404266e-05, "loss": 0.1131, "step": 35101 }, { "epoch": 0.626083544394107, "grad_norm": 0.24040868878364563, "learning_rate": 1.8436982912498457e-05, "loss": 0.113, "step": 35102 }, { "epoch": 0.6261013805158206, "grad_norm": 0.23016753792762756, "learning_rate": 1.8435481010010826e-05, "loss": 0.1142, "step": 35103 }, { "epoch": 0.6261192166375343, "grad_norm": 0.2544845938682556, "learning_rate": 1.843397913296955e-05, "loss": 0.1258, "step": 35104 }, { "epoch": 0.626137052759248, "grad_norm": 0.19867968559265137, "learning_rate": 1.8432477281380436e-05, "loss": 0.0618, "step": 35105 }, { "epoch": 0.6261548888809617, "grad_norm": 0.2854829728603363, "learning_rate": 1.843097545524931e-05, "loss": 0.0984, "step": 35106 }, { "epoch": 0.6261727250026754, "grad_norm": 0.23106160759925842, "learning_rate": 1.842947365458198e-05, "loss": 0.0763, "step": 35107 }, { "epoch": 0.6261905611243891, "grad_norm": 0.2902849614620209, "learning_rate": 1.842797187938429e-05, "loss": 0.1153, "step": 35108 }, { "epoch": 0.6262083972461028, "grad_norm": 0.3303748071193695, "learning_rate": 1.842647012966205e-05, "loss": 0.1457, "step": 35109 }, { "epoch": 0.6262262333678165, "grad_norm": 0.29859596490859985, "learning_rate": 1.8424968405421085e-05, "loss": 0.1482, "step": 35110 }, { "epoch": 0.6262440694895302, "grad_norm": 0.27811217308044434, "learning_rate": 1.8423466706667215e-05, "loss": 0.1441, "step": 35111 }, { "epoch": 0.6262619056112438, "grad_norm": 0.22279682755470276, "learning_rate": 1.8421965033406243e-05, "loss": 0.1125, "step": 35112 }, { "epoch": 0.6262797417329576, "grad_norm": 0.24229790270328522, "learning_rate": 1.842046338564402e-05, "loss": 0.1204, "step": 35113 }, { "epoch": 0.6262975778546713, "grad_norm": 0.2506665289402008, "learning_rate": 1.841896176338635e-05, "loss": 0.085, "step": 35114 }, { "epoch": 0.626315413976385, "grad_norm": 0.2284838855266571, "learning_rate": 1.841746016663906e-05, "loss": 0.0778, "step": 35115 }, { "epoch": 0.6263332500980987, "grad_norm": 0.26494085788726807, "learning_rate": 1.8415958595407963e-05, "loss": 0.1126, "step": 35116 }, { "epoch": 0.6263510862198124, "grad_norm": 0.25604185461997986, "learning_rate": 1.841445704969889e-05, "loss": 0.1066, "step": 35117 }, { "epoch": 0.6263689223415261, "grad_norm": 0.21456775069236755, "learning_rate": 1.8412955529517655e-05, "loss": 0.0748, "step": 35118 }, { "epoch": 0.6263867584632398, "grad_norm": 0.2448813021183014, "learning_rate": 1.8411454034870082e-05, "loss": 0.1535, "step": 35119 }, { "epoch": 0.6264045945849535, "grad_norm": 0.4279812276363373, "learning_rate": 1.840995256576198e-05, "loss": 0.15, "step": 35120 }, { "epoch": 0.6264224307066671, "grad_norm": 0.311682790517807, "learning_rate": 1.8408451122199184e-05, "loss": 0.1327, "step": 35121 }, { "epoch": 0.6264402668283808, "grad_norm": 0.25895968079566956, "learning_rate": 1.8406949704187504e-05, "loss": 0.108, "step": 35122 }, { "epoch": 0.6264581029500945, "grad_norm": 0.23475706577301025, "learning_rate": 1.840544831173277e-05, "loss": 0.1099, "step": 35123 }, { "epoch": 0.6264759390718082, "grad_norm": 0.33879831433296204, "learning_rate": 1.8403946944840798e-05, "loss": 0.0652, "step": 35124 }, { "epoch": 0.6264937751935219, "grad_norm": 0.2367793768644333, "learning_rate": 1.8402445603517394e-05, "loss": 0.1329, "step": 35125 }, { "epoch": 0.6265116113152356, "grad_norm": 0.22501425445079803, "learning_rate": 1.8400944287768397e-05, "loss": 0.0997, "step": 35126 }, { "epoch": 0.6265294474369493, "grad_norm": 0.2000752091407776, "learning_rate": 1.8399442997599627e-05, "loss": 0.0914, "step": 35127 }, { "epoch": 0.626547283558663, "grad_norm": 0.22648796439170837, "learning_rate": 1.8397941733016882e-05, "loss": 0.0873, "step": 35128 }, { "epoch": 0.6265651196803766, "grad_norm": 0.2413891702890396, "learning_rate": 1.8396440494025998e-05, "loss": 0.1018, "step": 35129 }, { "epoch": 0.6265829558020904, "grad_norm": 0.2727953791618347, "learning_rate": 1.8394939280632792e-05, "loss": 0.0895, "step": 35130 }, { "epoch": 0.6266007919238041, "grad_norm": 0.2796498239040375, "learning_rate": 1.8393438092843088e-05, "loss": 0.1154, "step": 35131 }, { "epoch": 0.6266186280455178, "grad_norm": 0.32536715269088745, "learning_rate": 1.83919369306627e-05, "loss": 0.1517, "step": 35132 }, { "epoch": 0.6266364641672315, "grad_norm": 0.26453447341918945, "learning_rate": 1.8390435794097435e-05, "loss": 0.1437, "step": 35133 }, { "epoch": 0.6266543002889452, "grad_norm": 0.3098036050796509, "learning_rate": 1.8388934683153135e-05, "loss": 0.1176, "step": 35134 }, { "epoch": 0.6266721364106589, "grad_norm": 0.29212474822998047, "learning_rate": 1.8387433597835607e-05, "loss": 0.119, "step": 35135 }, { "epoch": 0.6266899725323726, "grad_norm": 0.30823978781700134, "learning_rate": 1.8385932538150667e-05, "loss": 0.0657, "step": 35136 }, { "epoch": 0.6267078086540863, "grad_norm": 0.2584109902381897, "learning_rate": 1.838443150410414e-05, "loss": 0.1288, "step": 35137 }, { "epoch": 0.6267256447758, "grad_norm": 0.24601887166500092, "learning_rate": 1.8382930495701833e-05, "loss": 0.1153, "step": 35138 }, { "epoch": 0.6267434808975136, "grad_norm": 0.3031660318374634, "learning_rate": 1.8381429512949583e-05, "loss": 0.1733, "step": 35139 }, { "epoch": 0.6267613170192273, "grad_norm": 0.3300594687461853, "learning_rate": 1.8379928555853198e-05, "loss": 0.0857, "step": 35140 }, { "epoch": 0.626779153140941, "grad_norm": 0.2779475450515747, "learning_rate": 1.8378427624418496e-05, "loss": 0.1255, "step": 35141 }, { "epoch": 0.6267969892626547, "grad_norm": 0.22482886910438538, "learning_rate": 1.8376926718651282e-05, "loss": 0.1368, "step": 35142 }, { "epoch": 0.6268148253843684, "grad_norm": 0.25578710436820984, "learning_rate": 1.83754258385574e-05, "loss": 0.1461, "step": 35143 }, { "epoch": 0.6268326615060821, "grad_norm": 0.2601737678050995, "learning_rate": 1.8373924984142654e-05, "loss": 0.082, "step": 35144 }, { "epoch": 0.6268504976277958, "grad_norm": 0.2680853605270386, "learning_rate": 1.8372424155412866e-05, "loss": 0.1383, "step": 35145 }, { "epoch": 0.6268683337495095, "grad_norm": 0.38748306035995483, "learning_rate": 1.837092335237384e-05, "loss": 0.1416, "step": 35146 }, { "epoch": 0.6268861698712233, "grad_norm": 0.2574920356273651, "learning_rate": 1.8369422575031414e-05, "loss": 0.1677, "step": 35147 }, { "epoch": 0.6269040059929369, "grad_norm": 0.235775426030159, "learning_rate": 1.83679218233914e-05, "loss": 0.1069, "step": 35148 }, { "epoch": 0.6269218421146506, "grad_norm": 0.2731079161167145, "learning_rate": 1.8366421097459602e-05, "loss": 0.1503, "step": 35149 }, { "epoch": 0.6269396782363643, "grad_norm": 0.2916219234466553, "learning_rate": 1.8364920397241856e-05, "loss": 0.157, "step": 35150 }, { "epoch": 0.626957514358078, "grad_norm": 0.3127744197845459, "learning_rate": 1.8363419722743957e-05, "loss": 0.2193, "step": 35151 }, { "epoch": 0.6269753504797917, "grad_norm": 0.24434545636177063, "learning_rate": 1.8361919073971746e-05, "loss": 0.0999, "step": 35152 }, { "epoch": 0.6269931866015054, "grad_norm": 0.22819340229034424, "learning_rate": 1.8360418450931034e-05, "loss": 0.1478, "step": 35153 }, { "epoch": 0.6270110227232191, "grad_norm": 0.297372967004776, "learning_rate": 1.835891785362763e-05, "loss": 0.0978, "step": 35154 }, { "epoch": 0.6270288588449328, "grad_norm": 0.2813517451286316, "learning_rate": 1.835741728206734e-05, "loss": 0.0918, "step": 35155 }, { "epoch": 0.6270466949666464, "grad_norm": 0.2257905900478363, "learning_rate": 1.8355916736256012e-05, "loss": 0.0963, "step": 35156 }, { "epoch": 0.6270645310883601, "grad_norm": 1.021179437637329, "learning_rate": 1.8354416216199436e-05, "loss": 0.1598, "step": 35157 }, { "epoch": 0.6270823672100738, "grad_norm": 0.20912602543830872, "learning_rate": 1.8352915721903443e-05, "loss": 0.1447, "step": 35158 }, { "epoch": 0.6271002033317875, "grad_norm": 0.2968735992908478, "learning_rate": 1.835141525337384e-05, "loss": 0.1269, "step": 35159 }, { "epoch": 0.6271180394535012, "grad_norm": 0.29712948203086853, "learning_rate": 1.834991481061645e-05, "loss": 0.108, "step": 35160 }, { "epoch": 0.6271358755752149, "grad_norm": 0.2570733428001404, "learning_rate": 1.8348414393637092e-05, "loss": 0.1159, "step": 35161 }, { "epoch": 0.6271537116969286, "grad_norm": 0.15819190442562103, "learning_rate": 1.8346914002441573e-05, "loss": 0.0953, "step": 35162 }, { "epoch": 0.6271715478186423, "grad_norm": 0.24046814441680908, "learning_rate": 1.8345413637035713e-05, "loss": 0.153, "step": 35163 }, { "epoch": 0.6271893839403561, "grad_norm": 0.24306350946426392, "learning_rate": 1.834391329742532e-05, "loss": 0.1311, "step": 35164 }, { "epoch": 0.6272072200620697, "grad_norm": 0.2626388370990753, "learning_rate": 1.834241298361623e-05, "loss": 0.1239, "step": 35165 }, { "epoch": 0.6272250561837834, "grad_norm": 0.24841630458831787, "learning_rate": 1.8340912695614246e-05, "loss": 0.1426, "step": 35166 }, { "epoch": 0.6272428923054971, "grad_norm": 0.27767080068588257, "learning_rate": 1.8339412433425186e-05, "loss": 0.1054, "step": 35167 }, { "epoch": 0.6272607284272108, "grad_norm": 0.37322691082954407, "learning_rate": 1.833791219705485e-05, "loss": 0.1271, "step": 35168 }, { "epoch": 0.6272785645489245, "grad_norm": 0.27964141964912415, "learning_rate": 1.8336411986509078e-05, "loss": 0.1114, "step": 35169 }, { "epoch": 0.6272964006706382, "grad_norm": 0.2561666667461395, "learning_rate": 1.8334911801793673e-05, "loss": 0.1249, "step": 35170 }, { "epoch": 0.6273142367923519, "grad_norm": 0.2643705904483795, "learning_rate": 1.8333411642914456e-05, "loss": 0.082, "step": 35171 }, { "epoch": 0.6273320729140656, "grad_norm": 0.29310816526412964, "learning_rate": 1.8331911509877225e-05, "loss": 0.1416, "step": 35172 }, { "epoch": 0.6273499090357793, "grad_norm": 0.2245894819498062, "learning_rate": 1.8330411402687818e-05, "loss": 0.1276, "step": 35173 }, { "epoch": 0.6273677451574929, "grad_norm": 0.32910066843032837, "learning_rate": 1.8328911321352042e-05, "loss": 0.1636, "step": 35174 }, { "epoch": 0.6273855812792066, "grad_norm": 0.2751857042312622, "learning_rate": 1.8327411265875714e-05, "loss": 0.1428, "step": 35175 }, { "epoch": 0.6274034174009203, "grad_norm": 0.2538220286369324, "learning_rate": 1.832591123626464e-05, "loss": 0.1035, "step": 35176 }, { "epoch": 0.627421253522634, "grad_norm": 0.25720953941345215, "learning_rate": 1.832441123252463e-05, "loss": 0.1662, "step": 35177 }, { "epoch": 0.6274390896443477, "grad_norm": 0.2071014940738678, "learning_rate": 1.8322911254661513e-05, "loss": 0.1321, "step": 35178 }, { "epoch": 0.6274569257660614, "grad_norm": 0.33361488580703735, "learning_rate": 1.8321411302681102e-05, "loss": 0.1273, "step": 35179 }, { "epoch": 0.6274747618877751, "grad_norm": 0.3598553240299225, "learning_rate": 1.831991137658921e-05, "loss": 0.142, "step": 35180 }, { "epoch": 0.6274925980094889, "grad_norm": 0.22990982234477997, "learning_rate": 1.8318411476391635e-05, "loss": 0.1074, "step": 35181 }, { "epoch": 0.6275104341312026, "grad_norm": 0.257588267326355, "learning_rate": 1.8316911602094218e-05, "loss": 0.1136, "step": 35182 }, { "epoch": 0.6275282702529162, "grad_norm": 0.28625985980033875, "learning_rate": 1.831541175370276e-05, "loss": 0.1325, "step": 35183 }, { "epoch": 0.6275461063746299, "grad_norm": 0.3204520642757416, "learning_rate": 1.8313911931223066e-05, "loss": 0.1374, "step": 35184 }, { "epoch": 0.6275639424963436, "grad_norm": 0.29593488574028015, "learning_rate": 1.831241213466096e-05, "loss": 0.1299, "step": 35185 }, { "epoch": 0.6275817786180573, "grad_norm": 0.28135180473327637, "learning_rate": 1.8310912364022256e-05, "loss": 0.1263, "step": 35186 }, { "epoch": 0.627599614739771, "grad_norm": 0.22319857776165009, "learning_rate": 1.8309412619312772e-05, "loss": 0.0832, "step": 35187 }, { "epoch": 0.6276174508614847, "grad_norm": 0.2607048749923706, "learning_rate": 1.8307912900538315e-05, "loss": 0.1273, "step": 35188 }, { "epoch": 0.6276352869831984, "grad_norm": 0.1879798322916031, "learning_rate": 1.8306413207704697e-05, "loss": 0.1391, "step": 35189 }, { "epoch": 0.6276531231049121, "grad_norm": 0.2310120314359665, "learning_rate": 1.8304913540817727e-05, "loss": 0.1136, "step": 35190 }, { "epoch": 0.6276709592266257, "grad_norm": 0.274537056684494, "learning_rate": 1.8303413899883223e-05, "loss": 0.1247, "step": 35191 }, { "epoch": 0.6276887953483394, "grad_norm": 0.2561551630496979, "learning_rate": 1.830191428490701e-05, "loss": 0.1562, "step": 35192 }, { "epoch": 0.6277066314700531, "grad_norm": 0.23715558648109436, "learning_rate": 1.830041469589489e-05, "loss": 0.1284, "step": 35193 }, { "epoch": 0.6277244675917668, "grad_norm": 0.32786503434181213, "learning_rate": 1.8298915132852662e-05, "loss": 0.215, "step": 35194 }, { "epoch": 0.6277423037134805, "grad_norm": 0.28258782625198364, "learning_rate": 1.8297415595786173e-05, "loss": 0.1339, "step": 35195 }, { "epoch": 0.6277601398351942, "grad_norm": 0.1871049851179123, "learning_rate": 1.829591608470121e-05, "loss": 0.0548, "step": 35196 }, { "epoch": 0.627777975956908, "grad_norm": 0.19045434892177582, "learning_rate": 1.8294416599603584e-05, "loss": 0.0911, "step": 35197 }, { "epoch": 0.6277958120786217, "grad_norm": 0.40389135479927063, "learning_rate": 1.829291714049912e-05, "loss": 0.1439, "step": 35198 }, { "epoch": 0.6278136482003354, "grad_norm": 0.37807926535606384, "learning_rate": 1.8291417707393622e-05, "loss": 0.1596, "step": 35199 }, { "epoch": 0.627831484322049, "grad_norm": 0.4714040756225586, "learning_rate": 1.8289918300292914e-05, "loss": 0.1336, "step": 35200 }, { "epoch": 0.6278493204437627, "grad_norm": 0.32570865750312805, "learning_rate": 1.82884189192028e-05, "loss": 0.1734, "step": 35201 }, { "epoch": 0.6278671565654764, "grad_norm": 0.28308457136154175, "learning_rate": 1.828691956412909e-05, "loss": 0.0957, "step": 35202 }, { "epoch": 0.6278849926871901, "grad_norm": 0.27648767828941345, "learning_rate": 1.828542023507759e-05, "loss": 0.2208, "step": 35203 }, { "epoch": 0.6279028288089038, "grad_norm": 0.2610180974006653, "learning_rate": 1.8283920932054134e-05, "loss": 0.1137, "step": 35204 }, { "epoch": 0.6279206649306175, "grad_norm": 0.20775818824768066, "learning_rate": 1.828242165506451e-05, "loss": 0.1585, "step": 35205 }, { "epoch": 0.6279385010523312, "grad_norm": 0.20920971035957336, "learning_rate": 1.8280922404114547e-05, "loss": 0.1657, "step": 35206 }, { "epoch": 0.6279563371740449, "grad_norm": 0.23494596779346466, "learning_rate": 1.8279423179210036e-05, "loss": 0.1188, "step": 35207 }, { "epoch": 0.6279741732957586, "grad_norm": 0.3598296642303467, "learning_rate": 1.8277923980356817e-05, "loss": 0.1417, "step": 35208 }, { "epoch": 0.6279920094174722, "grad_norm": 0.25582340359687805, "learning_rate": 1.8276424807560686e-05, "loss": 0.1313, "step": 35209 }, { "epoch": 0.6280098455391859, "grad_norm": 0.27216625213623047, "learning_rate": 1.8274925660827453e-05, "loss": 0.1568, "step": 35210 }, { "epoch": 0.6280276816608996, "grad_norm": 0.2343744933605194, "learning_rate": 1.827342654016292e-05, "loss": 0.1007, "step": 35211 }, { "epoch": 0.6280455177826133, "grad_norm": 0.2554820775985718, "learning_rate": 1.827192744557292e-05, "loss": 0.1278, "step": 35212 }, { "epoch": 0.628063353904327, "grad_norm": 0.24556128680706024, "learning_rate": 1.827042837706325e-05, "loss": 0.1454, "step": 35213 }, { "epoch": 0.6280811900260408, "grad_norm": 0.2335948795080185, "learning_rate": 1.8268929334639722e-05, "loss": 0.152, "step": 35214 }, { "epoch": 0.6280990261477545, "grad_norm": 0.20663274824619293, "learning_rate": 1.8267430318308157e-05, "loss": 0.115, "step": 35215 }, { "epoch": 0.6281168622694682, "grad_norm": 0.2269904911518097, "learning_rate": 1.826593132807434e-05, "loss": 0.1364, "step": 35216 }, { "epoch": 0.6281346983911819, "grad_norm": 0.4112590551376343, "learning_rate": 1.8264432363944116e-05, "loss": 0.1458, "step": 35217 }, { "epoch": 0.6281525345128955, "grad_norm": 0.2837974429130554, "learning_rate": 1.8262933425923275e-05, "loss": 0.1379, "step": 35218 }, { "epoch": 0.6281703706346092, "grad_norm": 0.2636870741844177, "learning_rate": 1.8261434514017628e-05, "loss": 0.0979, "step": 35219 }, { "epoch": 0.6281882067563229, "grad_norm": 0.2792653441429138, "learning_rate": 1.8259935628232984e-05, "loss": 0.1953, "step": 35220 }, { "epoch": 0.6282060428780366, "grad_norm": 0.21954795718193054, "learning_rate": 1.825843676857516e-05, "loss": 0.117, "step": 35221 }, { "epoch": 0.6282238789997503, "grad_norm": 0.21932348608970642, "learning_rate": 1.8256937935049973e-05, "loss": 0.0848, "step": 35222 }, { "epoch": 0.628241715121464, "grad_norm": 0.2366136759519577, "learning_rate": 1.8255439127663218e-05, "loss": 0.0794, "step": 35223 }, { "epoch": 0.6282595512431777, "grad_norm": 0.23892684280872345, "learning_rate": 1.82539403464207e-05, "loss": 0.1127, "step": 35224 }, { "epoch": 0.6282773873648914, "grad_norm": 0.3476484417915344, "learning_rate": 1.825244159132825e-05, "loss": 0.1588, "step": 35225 }, { "epoch": 0.628295223486605, "grad_norm": 0.34581294655799866, "learning_rate": 1.8250942862391667e-05, "loss": 0.1292, "step": 35226 }, { "epoch": 0.6283130596083187, "grad_norm": 0.2861466407775879, "learning_rate": 1.824944415961676e-05, "loss": 0.1482, "step": 35227 }, { "epoch": 0.6283308957300324, "grad_norm": 0.21293097734451294, "learning_rate": 1.8247945483009342e-05, "loss": 0.1391, "step": 35228 }, { "epoch": 0.6283487318517461, "grad_norm": 0.19983923435211182, "learning_rate": 1.8246446832575207e-05, "loss": 0.1055, "step": 35229 }, { "epoch": 0.6283665679734598, "grad_norm": 0.32493898272514343, "learning_rate": 1.824494820832019e-05, "loss": 0.1763, "step": 35230 }, { "epoch": 0.6283844040951736, "grad_norm": 0.30365660786628723, "learning_rate": 1.8243449610250084e-05, "loss": 0.1586, "step": 35231 }, { "epoch": 0.6284022402168873, "grad_norm": 0.22662179172039032, "learning_rate": 1.8241951038370696e-05, "loss": 0.1024, "step": 35232 }, { "epoch": 0.628420076338601, "grad_norm": 0.2568208873271942, "learning_rate": 1.824045249268784e-05, "loss": 0.1387, "step": 35233 }, { "epoch": 0.6284379124603147, "grad_norm": 0.2992926239967346, "learning_rate": 1.8238953973207325e-05, "loss": 0.1134, "step": 35234 }, { "epoch": 0.6284557485820284, "grad_norm": 0.23189367353916168, "learning_rate": 1.8237455479934963e-05, "loss": 0.131, "step": 35235 }, { "epoch": 0.628473584703742, "grad_norm": 0.16864114999771118, "learning_rate": 1.8235957012876563e-05, "loss": 0.0858, "step": 35236 }, { "epoch": 0.6284914208254557, "grad_norm": 0.28745537996292114, "learning_rate": 1.8234458572037915e-05, "loss": 0.136, "step": 35237 }, { "epoch": 0.6285092569471694, "grad_norm": 0.3122803866863251, "learning_rate": 1.8232960157424855e-05, "loss": 0.1512, "step": 35238 }, { "epoch": 0.6285270930688831, "grad_norm": 0.28759995102882385, "learning_rate": 1.8231461769043178e-05, "loss": 0.1615, "step": 35239 }, { "epoch": 0.6285449291905968, "grad_norm": 0.2791527211666107, "learning_rate": 1.8229963406898692e-05, "loss": 0.1623, "step": 35240 }, { "epoch": 0.6285627653123105, "grad_norm": 0.288387268781662, "learning_rate": 1.8228465070997208e-05, "loss": 0.1648, "step": 35241 }, { "epoch": 0.6285806014340242, "grad_norm": 0.3625470995903015, "learning_rate": 1.8226966761344523e-05, "loss": 0.1474, "step": 35242 }, { "epoch": 0.6285984375557379, "grad_norm": 0.2694432735443115, "learning_rate": 1.822546847794646e-05, "loss": 0.1595, "step": 35243 }, { "epoch": 0.6286162736774515, "grad_norm": 0.24849697947502136, "learning_rate": 1.822397022080883e-05, "loss": 0.1179, "step": 35244 }, { "epoch": 0.6286341097991652, "grad_norm": 0.3011578619480133, "learning_rate": 1.8222471989937422e-05, "loss": 0.0834, "step": 35245 }, { "epoch": 0.6286519459208789, "grad_norm": 0.24738922715187073, "learning_rate": 1.8220973785338046e-05, "loss": 0.1341, "step": 35246 }, { "epoch": 0.6286697820425926, "grad_norm": 0.28066784143447876, "learning_rate": 1.8219475607016525e-05, "loss": 0.1012, "step": 35247 }, { "epoch": 0.6286876181643064, "grad_norm": 0.2238895446062088, "learning_rate": 1.8217977454978663e-05, "loss": 0.0762, "step": 35248 }, { "epoch": 0.6287054542860201, "grad_norm": 0.2647109925746918, "learning_rate": 1.821647932923026e-05, "loss": 0.1374, "step": 35249 }, { "epoch": 0.6287232904077338, "grad_norm": 0.2317148596048355, "learning_rate": 1.8214981229777117e-05, "loss": 0.0958, "step": 35250 }, { "epoch": 0.6287411265294475, "grad_norm": 0.3785940706729889, "learning_rate": 1.8213483156625063e-05, "loss": 0.1834, "step": 35251 }, { "epoch": 0.6287589626511612, "grad_norm": 0.2935950458049774, "learning_rate": 1.8211985109779888e-05, "loss": 0.1028, "step": 35252 }, { "epoch": 0.6287767987728748, "grad_norm": 0.22883553802967072, "learning_rate": 1.8210487089247402e-05, "loss": 0.111, "step": 35253 }, { "epoch": 0.6287946348945885, "grad_norm": 0.2786867022514343, "learning_rate": 1.820898909503342e-05, "loss": 0.1429, "step": 35254 }, { "epoch": 0.6288124710163022, "grad_norm": 0.28457269072532654, "learning_rate": 1.8207491127143728e-05, "loss": 0.1401, "step": 35255 }, { "epoch": 0.6288303071380159, "grad_norm": 0.28889283537864685, "learning_rate": 1.8205993185584155e-05, "loss": 0.133, "step": 35256 }, { "epoch": 0.6288481432597296, "grad_norm": 0.2618843615055084, "learning_rate": 1.82044952703605e-05, "loss": 0.1173, "step": 35257 }, { "epoch": 0.6288659793814433, "grad_norm": 0.27908554673194885, "learning_rate": 1.820299738147857e-05, "loss": 0.1123, "step": 35258 }, { "epoch": 0.628883815503157, "grad_norm": 0.26213741302490234, "learning_rate": 1.820149951894416e-05, "loss": 0.1414, "step": 35259 }, { "epoch": 0.6289016516248707, "grad_norm": 0.44762203097343445, "learning_rate": 1.8200001682763096e-05, "loss": 0.1221, "step": 35260 }, { "epoch": 0.6289194877465843, "grad_norm": 0.21335327625274658, "learning_rate": 1.8198503872941168e-05, "loss": 0.0894, "step": 35261 }, { "epoch": 0.628937323868298, "grad_norm": 0.2803504765033722, "learning_rate": 1.819700608948419e-05, "loss": 0.165, "step": 35262 }, { "epoch": 0.6289551599900117, "grad_norm": 0.4563461244106293, "learning_rate": 1.8195508332397962e-05, "loss": 0.1187, "step": 35263 }, { "epoch": 0.6289729961117254, "grad_norm": 0.3154296875, "learning_rate": 1.8194010601688302e-05, "loss": 0.1545, "step": 35264 }, { "epoch": 0.6289908322334392, "grad_norm": 0.2549230754375458, "learning_rate": 1.8192512897361008e-05, "loss": 0.1886, "step": 35265 }, { "epoch": 0.6290086683551529, "grad_norm": 0.4323887228965759, "learning_rate": 1.8191015219421883e-05, "loss": 0.1681, "step": 35266 }, { "epoch": 0.6290265044768666, "grad_norm": 0.23212343454360962, "learning_rate": 1.8189517567876728e-05, "loss": 0.1064, "step": 35267 }, { "epoch": 0.6290443405985803, "grad_norm": 0.23868195712566376, "learning_rate": 1.8188019942731354e-05, "loss": 0.1134, "step": 35268 }, { "epoch": 0.629062176720294, "grad_norm": 0.2580852806568146, "learning_rate": 1.818652234399158e-05, "loss": 0.1795, "step": 35269 }, { "epoch": 0.6290800128420077, "grad_norm": 0.24365024268627167, "learning_rate": 1.8185024771663195e-05, "loss": 0.1416, "step": 35270 }, { "epoch": 0.6290978489637213, "grad_norm": 0.27210095524787903, "learning_rate": 1.8183527225752007e-05, "loss": 0.0799, "step": 35271 }, { "epoch": 0.629115685085435, "grad_norm": 0.2471434324979782, "learning_rate": 1.8182029706263816e-05, "loss": 0.0827, "step": 35272 }, { "epoch": 0.6291335212071487, "grad_norm": 0.24963009357452393, "learning_rate": 1.8180532213204438e-05, "loss": 0.1304, "step": 35273 }, { "epoch": 0.6291513573288624, "grad_norm": 0.4840315580368042, "learning_rate": 1.8179034746579667e-05, "loss": 0.1214, "step": 35274 }, { "epoch": 0.6291691934505761, "grad_norm": 0.3027991056442261, "learning_rate": 1.8177537306395322e-05, "loss": 0.0933, "step": 35275 }, { "epoch": 0.6291870295722898, "grad_norm": 0.2912178635597229, "learning_rate": 1.8176039892657188e-05, "loss": 0.1435, "step": 35276 }, { "epoch": 0.6292048656940035, "grad_norm": 0.2589821517467499, "learning_rate": 1.817454250537109e-05, "loss": 0.1004, "step": 35277 }, { "epoch": 0.6292227018157172, "grad_norm": 0.18125738203525543, "learning_rate": 1.817304514454282e-05, "loss": 0.1352, "step": 35278 }, { "epoch": 0.6292405379374308, "grad_norm": 0.2323274314403534, "learning_rate": 1.8171547810178187e-05, "loss": 0.1082, "step": 35279 }, { "epoch": 0.6292583740591445, "grad_norm": 0.44053277373313904, "learning_rate": 1.8170050502282983e-05, "loss": 0.1753, "step": 35280 }, { "epoch": 0.6292762101808582, "grad_norm": 0.24591851234436035, "learning_rate": 1.8168553220863034e-05, "loss": 0.1682, "step": 35281 }, { "epoch": 0.629294046302572, "grad_norm": 0.32596495747566223, "learning_rate": 1.8167055965924123e-05, "loss": 0.1467, "step": 35282 }, { "epoch": 0.6293118824242857, "grad_norm": 0.2672705352306366, "learning_rate": 1.816555873747207e-05, "loss": 0.1472, "step": 35283 }, { "epoch": 0.6293297185459994, "grad_norm": 0.19331452250480652, "learning_rate": 1.816406153551268e-05, "loss": 0.0885, "step": 35284 }, { "epoch": 0.6293475546677131, "grad_norm": 0.2907578647136688, "learning_rate": 1.8162564360051726e-05, "loss": 0.1332, "step": 35285 }, { "epoch": 0.6293653907894268, "grad_norm": 0.42572343349456787, "learning_rate": 1.8161067211095052e-05, "loss": 0.114, "step": 35286 }, { "epoch": 0.6293832269111405, "grad_norm": 0.26793837547302246, "learning_rate": 1.8159570088648438e-05, "loss": 0.1455, "step": 35287 }, { "epoch": 0.6294010630328541, "grad_norm": 0.2772933542728424, "learning_rate": 1.8158072992717693e-05, "loss": 0.1524, "step": 35288 }, { "epoch": 0.6294188991545678, "grad_norm": 0.27731627225875854, "learning_rate": 1.8156575923308616e-05, "loss": 0.1234, "step": 35289 }, { "epoch": 0.6294367352762815, "grad_norm": 0.27789533138275146, "learning_rate": 1.8155078880427016e-05, "loss": 0.1593, "step": 35290 }, { "epoch": 0.6294545713979952, "grad_norm": 0.28437814116477966, "learning_rate": 1.81535818640787e-05, "loss": 0.1187, "step": 35291 }, { "epoch": 0.6294724075197089, "grad_norm": 0.3501490652561188, "learning_rate": 1.815208487426947e-05, "loss": 0.1174, "step": 35292 }, { "epoch": 0.6294902436414226, "grad_norm": 0.26043999195098877, "learning_rate": 1.8150587911005107e-05, "loss": 0.1128, "step": 35293 }, { "epoch": 0.6295080797631363, "grad_norm": 0.2151462882757187, "learning_rate": 1.8149090974291443e-05, "loss": 0.1097, "step": 35294 }, { "epoch": 0.62952591588485, "grad_norm": 0.2857922911643982, "learning_rate": 1.814759406413427e-05, "loss": 0.1687, "step": 35295 }, { "epoch": 0.6295437520065637, "grad_norm": 0.2592170834541321, "learning_rate": 1.8146097180539385e-05, "loss": 0.1326, "step": 35296 }, { "epoch": 0.6295615881282773, "grad_norm": 0.24273072183132172, "learning_rate": 1.8144600323512595e-05, "loss": 0.1376, "step": 35297 }, { "epoch": 0.629579424249991, "grad_norm": 0.2877618670463562, "learning_rate": 1.8143103493059692e-05, "loss": 0.1389, "step": 35298 }, { "epoch": 0.6295972603717048, "grad_norm": 0.3057037591934204, "learning_rate": 1.8141606689186503e-05, "loss": 0.2019, "step": 35299 }, { "epoch": 0.6296150964934185, "grad_norm": 0.2851075828075409, "learning_rate": 1.8140109911898816e-05, "loss": 0.1354, "step": 35300 }, { "epoch": 0.6296329326151322, "grad_norm": 0.29026395082473755, "learning_rate": 1.8138613161202423e-05, "loss": 0.1338, "step": 35301 }, { "epoch": 0.6296507687368459, "grad_norm": 0.3381284177303314, "learning_rate": 1.8137116437103136e-05, "loss": 0.1433, "step": 35302 }, { "epoch": 0.6296686048585596, "grad_norm": 0.27081307768821716, "learning_rate": 1.813561973960676e-05, "loss": 0.1556, "step": 35303 }, { "epoch": 0.6296864409802733, "grad_norm": 0.2977769672870636, "learning_rate": 1.813412306871909e-05, "loss": 0.1449, "step": 35304 }, { "epoch": 0.629704277101987, "grad_norm": 0.33787962794303894, "learning_rate": 1.8132626424445937e-05, "loss": 0.1488, "step": 35305 }, { "epoch": 0.6297221132237006, "grad_norm": 0.24437111616134644, "learning_rate": 1.8131129806793084e-05, "loss": 0.0835, "step": 35306 }, { "epoch": 0.6297399493454143, "grad_norm": 0.34457412362098694, "learning_rate": 1.8129633215766353e-05, "loss": 0.1573, "step": 35307 }, { "epoch": 0.629757785467128, "grad_norm": 0.34267064929008484, "learning_rate": 1.8128136651371537e-05, "loss": 0.1436, "step": 35308 }, { "epoch": 0.6297756215888417, "grad_norm": 0.30829447507858276, "learning_rate": 1.8126640113614436e-05, "loss": 0.1282, "step": 35309 }, { "epoch": 0.6297934577105554, "grad_norm": 0.24545936286449432, "learning_rate": 1.8125143602500852e-05, "loss": 0.12, "step": 35310 }, { "epoch": 0.6298112938322691, "grad_norm": 0.22269515693187714, "learning_rate": 1.8123647118036578e-05, "loss": 0.1013, "step": 35311 }, { "epoch": 0.6298291299539828, "grad_norm": 0.2587223947048187, "learning_rate": 1.8122150660227434e-05, "loss": 0.1099, "step": 35312 }, { "epoch": 0.6298469660756965, "grad_norm": 0.22916986048221588, "learning_rate": 1.8120654229079205e-05, "loss": 0.1339, "step": 35313 }, { "epoch": 0.6298648021974101, "grad_norm": 0.2595296800136566, "learning_rate": 1.8119157824597697e-05, "loss": 0.1546, "step": 35314 }, { "epoch": 0.6298826383191239, "grad_norm": 0.25523489713668823, "learning_rate": 1.81176614467887e-05, "loss": 0.1295, "step": 35315 }, { "epoch": 0.6299004744408376, "grad_norm": 0.2520177960395813, "learning_rate": 1.8116165095658038e-05, "loss": 0.1121, "step": 35316 }, { "epoch": 0.6299183105625513, "grad_norm": 0.27788999676704407, "learning_rate": 1.811466877121149e-05, "loss": 0.1529, "step": 35317 }, { "epoch": 0.629936146684265, "grad_norm": 0.2944776713848114, "learning_rate": 1.811317247345487e-05, "loss": 0.1352, "step": 35318 }, { "epoch": 0.6299539828059787, "grad_norm": 0.2595115005970001, "learning_rate": 1.811167620239396e-05, "loss": 0.1131, "step": 35319 }, { "epoch": 0.6299718189276924, "grad_norm": 0.32978975772857666, "learning_rate": 1.8110179958034586e-05, "loss": 0.1247, "step": 35320 }, { "epoch": 0.6299896550494061, "grad_norm": 0.21254689991474152, "learning_rate": 1.8108683740382536e-05, "loss": 0.1234, "step": 35321 }, { "epoch": 0.6300074911711198, "grad_norm": 0.27821600437164307, "learning_rate": 1.81071875494436e-05, "loss": 0.0912, "step": 35322 }, { "epoch": 0.6300253272928334, "grad_norm": 0.32335394620895386, "learning_rate": 1.810569138522359e-05, "loss": 0.1322, "step": 35323 }, { "epoch": 0.6300431634145471, "grad_norm": 0.26081162691116333, "learning_rate": 1.810419524772829e-05, "loss": 0.1196, "step": 35324 }, { "epoch": 0.6300609995362608, "grad_norm": 0.255611389875412, "learning_rate": 1.810269913696352e-05, "loss": 0.0957, "step": 35325 }, { "epoch": 0.6300788356579745, "grad_norm": 0.32435518503189087, "learning_rate": 1.8101203052935074e-05, "loss": 0.1254, "step": 35326 }, { "epoch": 0.6300966717796882, "grad_norm": 0.3529960513114929, "learning_rate": 1.8099706995648746e-05, "loss": 0.1348, "step": 35327 }, { "epoch": 0.6301145079014019, "grad_norm": 0.3728181719779968, "learning_rate": 1.809821096511033e-05, "loss": 0.1127, "step": 35328 }, { "epoch": 0.6301323440231156, "grad_norm": 0.2500733733177185, "learning_rate": 1.8096714961325638e-05, "loss": 0.1084, "step": 35329 }, { "epoch": 0.6301501801448293, "grad_norm": 0.27013060450553894, "learning_rate": 1.809521898430046e-05, "loss": 0.1493, "step": 35330 }, { "epoch": 0.630168016266543, "grad_norm": 0.44017860293388367, "learning_rate": 1.8093723034040603e-05, "loss": 0.1217, "step": 35331 }, { "epoch": 0.6301858523882568, "grad_norm": 0.2977251708507538, "learning_rate": 1.809222711055185e-05, "loss": 0.1278, "step": 35332 }, { "epoch": 0.6302036885099704, "grad_norm": 0.3162805140018463, "learning_rate": 1.809073121384002e-05, "loss": 0.1953, "step": 35333 }, { "epoch": 0.6302215246316841, "grad_norm": 0.24452361464500427, "learning_rate": 1.8089235343910903e-05, "loss": 0.1103, "step": 35334 }, { "epoch": 0.6302393607533978, "grad_norm": 0.24200385808944702, "learning_rate": 1.80877395007703e-05, "loss": 0.137, "step": 35335 }, { "epoch": 0.6302571968751115, "grad_norm": 0.24446938931941986, "learning_rate": 1.8086243684423998e-05, "loss": 0.1028, "step": 35336 }, { "epoch": 0.6302750329968252, "grad_norm": 0.27976661920547485, "learning_rate": 1.80847478948778e-05, "loss": 0.1646, "step": 35337 }, { "epoch": 0.6302928691185389, "grad_norm": 0.4105447828769684, "learning_rate": 1.808325213213751e-05, "loss": 0.1822, "step": 35338 }, { "epoch": 0.6303107052402526, "grad_norm": 0.29472458362579346, "learning_rate": 1.808175639620893e-05, "loss": 0.1054, "step": 35339 }, { "epoch": 0.6303285413619663, "grad_norm": 0.25440189242362976, "learning_rate": 1.8080260687097854e-05, "loss": 0.1508, "step": 35340 }, { "epoch": 0.6303463774836799, "grad_norm": 0.22787262499332428, "learning_rate": 1.8078765004810068e-05, "loss": 0.1392, "step": 35341 }, { "epoch": 0.6303642136053936, "grad_norm": 0.29044678807258606, "learning_rate": 1.8077269349351383e-05, "loss": 0.1189, "step": 35342 }, { "epoch": 0.6303820497271073, "grad_norm": 0.25068604946136475, "learning_rate": 1.80757737207276e-05, "loss": 0.1745, "step": 35343 }, { "epoch": 0.630399885848821, "grad_norm": 0.3342738747596741, "learning_rate": 1.8074278118944497e-05, "loss": 0.1493, "step": 35344 }, { "epoch": 0.6304177219705347, "grad_norm": 0.26094210147857666, "learning_rate": 1.8072782544007884e-05, "loss": 0.1191, "step": 35345 }, { "epoch": 0.6304355580922484, "grad_norm": 0.2362978756427765, "learning_rate": 1.807128699592357e-05, "loss": 0.1196, "step": 35346 }, { "epoch": 0.6304533942139621, "grad_norm": 0.2806144058704376, "learning_rate": 1.8069791474697338e-05, "loss": 0.1461, "step": 35347 }, { "epoch": 0.6304712303356758, "grad_norm": 0.3263757526874542, "learning_rate": 1.806829598033499e-05, "loss": 0.1657, "step": 35348 }, { "epoch": 0.6304890664573896, "grad_norm": 0.36512333154678345, "learning_rate": 1.806680051284232e-05, "loss": 0.1278, "step": 35349 }, { "epoch": 0.6305069025791032, "grad_norm": 0.22032690048217773, "learning_rate": 1.8065305072225115e-05, "loss": 0.1253, "step": 35350 }, { "epoch": 0.6305247387008169, "grad_norm": 0.42624059319496155, "learning_rate": 1.8063809658489183e-05, "loss": 0.1633, "step": 35351 }, { "epoch": 0.6305425748225306, "grad_norm": 0.24347251653671265, "learning_rate": 1.8062314271640335e-05, "loss": 0.121, "step": 35352 }, { "epoch": 0.6305604109442443, "grad_norm": 0.2626495957374573, "learning_rate": 1.8060818911684347e-05, "loss": 0.2003, "step": 35353 }, { "epoch": 0.630578247065958, "grad_norm": 0.18093395233154297, "learning_rate": 1.8059323578627012e-05, "loss": 0.1031, "step": 35354 }, { "epoch": 0.6305960831876717, "grad_norm": 0.23422425985336304, "learning_rate": 1.8057828272474146e-05, "loss": 0.1394, "step": 35355 }, { "epoch": 0.6306139193093854, "grad_norm": 0.2314637303352356, "learning_rate": 1.8056332993231538e-05, "loss": 0.1144, "step": 35356 }, { "epoch": 0.6306317554310991, "grad_norm": 0.2514317035675049, "learning_rate": 1.8054837740904974e-05, "loss": 0.1, "step": 35357 }, { "epoch": 0.6306495915528127, "grad_norm": 0.2635010778903961, "learning_rate": 1.805334251550026e-05, "loss": 0.1233, "step": 35358 }, { "epoch": 0.6306674276745264, "grad_norm": 0.31342944502830505, "learning_rate": 1.8051847317023186e-05, "loss": 0.1216, "step": 35359 }, { "epoch": 0.6306852637962401, "grad_norm": 0.29472488164901733, "learning_rate": 1.805035214547956e-05, "loss": 0.1309, "step": 35360 }, { "epoch": 0.6307030999179538, "grad_norm": 0.28208810091018677, "learning_rate": 1.804885700087517e-05, "loss": 0.2039, "step": 35361 }, { "epoch": 0.6307209360396675, "grad_norm": 0.22973541915416718, "learning_rate": 1.804736188321581e-05, "loss": 0.1171, "step": 35362 }, { "epoch": 0.6307387721613812, "grad_norm": 0.2388831228017807, "learning_rate": 1.8045866792507264e-05, "loss": 0.0958, "step": 35363 }, { "epoch": 0.6307566082830949, "grad_norm": 0.36623063683509827, "learning_rate": 1.804437172875535e-05, "loss": 0.179, "step": 35364 }, { "epoch": 0.6307744444048086, "grad_norm": 0.3038334846496582, "learning_rate": 1.8042876691965854e-05, "loss": 0.1082, "step": 35365 }, { "epoch": 0.6307922805265224, "grad_norm": 0.4096284508705139, "learning_rate": 1.804138168214457e-05, "loss": 0.1713, "step": 35366 }, { "epoch": 0.630810116648236, "grad_norm": 0.23774613440036774, "learning_rate": 1.8039886699297287e-05, "loss": 0.1382, "step": 35367 }, { "epoch": 0.6308279527699497, "grad_norm": 0.22512365877628326, "learning_rate": 1.803839174342982e-05, "loss": 0.1226, "step": 35368 }, { "epoch": 0.6308457888916634, "grad_norm": 0.29915398359298706, "learning_rate": 1.803689681454795e-05, "loss": 0.153, "step": 35369 }, { "epoch": 0.6308636250133771, "grad_norm": 0.26841655373573303, "learning_rate": 1.8035401912657468e-05, "loss": 0.0868, "step": 35370 }, { "epoch": 0.6308814611350908, "grad_norm": 0.24068579077720642, "learning_rate": 1.8033907037764167e-05, "loss": 0.1445, "step": 35371 }, { "epoch": 0.6308992972568045, "grad_norm": 0.32395458221435547, "learning_rate": 1.803241218987386e-05, "loss": 0.1177, "step": 35372 }, { "epoch": 0.6309171333785182, "grad_norm": 0.23811013996601105, "learning_rate": 1.803091736899232e-05, "loss": 0.1279, "step": 35373 }, { "epoch": 0.6309349695002319, "grad_norm": 0.22637274861335754, "learning_rate": 1.802942257512536e-05, "loss": 0.1313, "step": 35374 }, { "epoch": 0.6309528056219456, "grad_norm": 0.3097386956214905, "learning_rate": 1.8027927808278765e-05, "loss": 0.1434, "step": 35375 }, { "epoch": 0.6309706417436592, "grad_norm": 0.26544082164764404, "learning_rate": 1.8026433068458322e-05, "loss": 0.1145, "step": 35376 }, { "epoch": 0.6309884778653729, "grad_norm": 0.2120247781276703, "learning_rate": 1.8024938355669837e-05, "loss": 0.0861, "step": 35377 }, { "epoch": 0.6310063139870866, "grad_norm": 0.33098259568214417, "learning_rate": 1.80234436699191e-05, "loss": 0.2105, "step": 35378 }, { "epoch": 0.6310241501088003, "grad_norm": 0.2544548511505127, "learning_rate": 1.802194901121191e-05, "loss": 0.1287, "step": 35379 }, { "epoch": 0.631041986230514, "grad_norm": 0.3424955904483795, "learning_rate": 1.8020454379554043e-05, "loss": 0.1076, "step": 35380 }, { "epoch": 0.6310598223522277, "grad_norm": 0.34930822253227234, "learning_rate": 1.8018959774951317e-05, "loss": 0.1959, "step": 35381 }, { "epoch": 0.6310776584739414, "grad_norm": 0.2676719129085541, "learning_rate": 1.8017465197409515e-05, "loss": 0.1169, "step": 35382 }, { "epoch": 0.6310954945956552, "grad_norm": 0.2419026792049408, "learning_rate": 1.8015970646934427e-05, "loss": 0.1098, "step": 35383 }, { "epoch": 0.6311133307173689, "grad_norm": 0.2810690701007843, "learning_rate": 1.8014476123531843e-05, "loss": 0.1476, "step": 35384 }, { "epoch": 0.6311311668390825, "grad_norm": 0.27666938304901123, "learning_rate": 1.801298162720757e-05, "loss": 0.1369, "step": 35385 }, { "epoch": 0.6311490029607962, "grad_norm": 0.29098209738731384, "learning_rate": 1.8011487157967385e-05, "loss": 0.1528, "step": 35386 }, { "epoch": 0.6311668390825099, "grad_norm": 0.3187029957771301, "learning_rate": 1.8009992715817098e-05, "loss": 0.1557, "step": 35387 }, { "epoch": 0.6311846752042236, "grad_norm": 0.26841095089912415, "learning_rate": 1.8008498300762494e-05, "loss": 0.1369, "step": 35388 }, { "epoch": 0.6312025113259373, "grad_norm": 0.19001835584640503, "learning_rate": 1.8007003912809356e-05, "loss": 0.1049, "step": 35389 }, { "epoch": 0.631220347447651, "grad_norm": 0.24521635472774506, "learning_rate": 1.8005509551963494e-05, "loss": 0.1163, "step": 35390 }, { "epoch": 0.6312381835693647, "grad_norm": 0.27687886357307434, "learning_rate": 1.8004015218230694e-05, "loss": 0.1474, "step": 35391 }, { "epoch": 0.6312560196910784, "grad_norm": 0.23672008514404297, "learning_rate": 1.8002520911616743e-05, "loss": 0.1028, "step": 35392 }, { "epoch": 0.631273855812792, "grad_norm": 0.24028043448925018, "learning_rate": 1.8001026632127435e-05, "loss": 0.1088, "step": 35393 }, { "epoch": 0.6312916919345057, "grad_norm": 0.27435794472694397, "learning_rate": 1.7999532379768567e-05, "loss": 0.0855, "step": 35394 }, { "epoch": 0.6313095280562194, "grad_norm": 0.26046356558799744, "learning_rate": 1.7998038154545935e-05, "loss": 0.0864, "step": 35395 }, { "epoch": 0.6313273641779331, "grad_norm": 0.29235291481018066, "learning_rate": 1.7996543956465325e-05, "loss": 0.1659, "step": 35396 }, { "epoch": 0.6313452002996468, "grad_norm": 0.274905264377594, "learning_rate": 1.7995049785532518e-05, "loss": 0.1183, "step": 35397 }, { "epoch": 0.6313630364213605, "grad_norm": 0.31036612391471863, "learning_rate": 1.799355564175333e-05, "loss": 0.1428, "step": 35398 }, { "epoch": 0.6313808725430742, "grad_norm": 0.27010050415992737, "learning_rate": 1.7992061525133543e-05, "loss": 0.1515, "step": 35399 }, { "epoch": 0.631398708664788, "grad_norm": 0.3791591227054596, "learning_rate": 1.7990567435678935e-05, "loss": 0.1558, "step": 35400 }, { "epoch": 0.6314165447865017, "grad_norm": 0.25948670506477356, "learning_rate": 1.7989073373395315e-05, "loss": 0.0875, "step": 35401 }, { "epoch": 0.6314343809082154, "grad_norm": 0.21399211883544922, "learning_rate": 1.798757933828846e-05, "loss": 0.0835, "step": 35402 }, { "epoch": 0.631452217029929, "grad_norm": 0.22009773552417755, "learning_rate": 1.7986085330364177e-05, "loss": 0.1328, "step": 35403 }, { "epoch": 0.6314700531516427, "grad_norm": 0.3885743319988251, "learning_rate": 1.798459134962825e-05, "loss": 0.1383, "step": 35404 }, { "epoch": 0.6314878892733564, "grad_norm": 0.20693336427211761, "learning_rate": 1.798309739608647e-05, "loss": 0.1289, "step": 35405 }, { "epoch": 0.6315057253950701, "grad_norm": 0.36580348014831543, "learning_rate": 1.798160346974462e-05, "loss": 0.1375, "step": 35406 }, { "epoch": 0.6315235615167838, "grad_norm": 0.2585957944393158, "learning_rate": 1.7980109570608504e-05, "loss": 0.1375, "step": 35407 }, { "epoch": 0.6315413976384975, "grad_norm": 0.2605012357234955, "learning_rate": 1.797861569868391e-05, "loss": 0.0938, "step": 35408 }, { "epoch": 0.6315592337602112, "grad_norm": 0.21506501734256744, "learning_rate": 1.797712185397663e-05, "loss": 0.1124, "step": 35409 }, { "epoch": 0.6315770698819249, "grad_norm": 0.3258536756038666, "learning_rate": 1.7975628036492444e-05, "loss": 0.1323, "step": 35410 }, { "epoch": 0.6315949060036385, "grad_norm": 0.22610147297382355, "learning_rate": 1.797413424623715e-05, "loss": 0.1152, "step": 35411 }, { "epoch": 0.6316127421253522, "grad_norm": 0.22787535190582275, "learning_rate": 1.7972640483216546e-05, "loss": 0.1298, "step": 35412 }, { "epoch": 0.6316305782470659, "grad_norm": 0.32898852229118347, "learning_rate": 1.797114674743641e-05, "loss": 0.1244, "step": 35413 }, { "epoch": 0.6316484143687796, "grad_norm": 0.2604621350765228, "learning_rate": 1.796965303890254e-05, "loss": 0.172, "step": 35414 }, { "epoch": 0.6316662504904933, "grad_norm": 0.31942859292030334, "learning_rate": 1.7968159357620712e-05, "loss": 0.1063, "step": 35415 }, { "epoch": 0.6316840866122071, "grad_norm": 0.34835657477378845, "learning_rate": 1.7966665703596736e-05, "loss": 0.1041, "step": 35416 }, { "epoch": 0.6317019227339208, "grad_norm": 0.26756277680397034, "learning_rate": 1.7965172076836394e-05, "loss": 0.1313, "step": 35417 }, { "epoch": 0.6317197588556345, "grad_norm": 0.3831746578216553, "learning_rate": 1.7963678477345477e-05, "loss": 0.168, "step": 35418 }, { "epoch": 0.6317375949773482, "grad_norm": 0.28253981471061707, "learning_rate": 1.796218490512976e-05, "loss": 0.1293, "step": 35419 }, { "epoch": 0.6317554310990618, "grad_norm": 0.28203126788139343, "learning_rate": 1.796069136019506e-05, "loss": 0.1108, "step": 35420 }, { "epoch": 0.6317732672207755, "grad_norm": 0.29136377573013306, "learning_rate": 1.795919784254714e-05, "loss": 0.139, "step": 35421 }, { "epoch": 0.6317911033424892, "grad_norm": 0.2343776375055313, "learning_rate": 1.795770435219181e-05, "loss": 0.0724, "step": 35422 }, { "epoch": 0.6318089394642029, "grad_norm": 0.2761262059211731, "learning_rate": 1.795621088913484e-05, "loss": 0.1395, "step": 35423 }, { "epoch": 0.6318267755859166, "grad_norm": 0.263091117143631, "learning_rate": 1.7954717453382035e-05, "loss": 0.0654, "step": 35424 }, { "epoch": 0.6318446117076303, "grad_norm": 0.22587265074253082, "learning_rate": 1.7953224044939186e-05, "loss": 0.1215, "step": 35425 }, { "epoch": 0.631862447829344, "grad_norm": 0.2848919928073883, "learning_rate": 1.795173066381207e-05, "loss": 0.1314, "step": 35426 }, { "epoch": 0.6318802839510577, "grad_norm": 0.28789907693862915, "learning_rate": 1.7950237310006474e-05, "loss": 0.1281, "step": 35427 }, { "epoch": 0.6318981200727714, "grad_norm": 0.2507967948913574, "learning_rate": 1.7948743983528187e-05, "loss": 0.093, "step": 35428 }, { "epoch": 0.631915956194485, "grad_norm": 0.24011845886707306, "learning_rate": 1.794725068438302e-05, "loss": 0.1097, "step": 35429 }, { "epoch": 0.6319337923161987, "grad_norm": 0.21063701808452606, "learning_rate": 1.794575741257674e-05, "loss": 0.0642, "step": 35430 }, { "epoch": 0.6319516284379124, "grad_norm": 0.28884100914001465, "learning_rate": 1.7944264168115144e-05, "loss": 0.1426, "step": 35431 }, { "epoch": 0.6319694645596261, "grad_norm": 0.2138759195804596, "learning_rate": 1.7942770951004007e-05, "loss": 0.1489, "step": 35432 }, { "epoch": 0.6319873006813399, "grad_norm": 0.2516428232192993, "learning_rate": 1.7941277761249138e-05, "loss": 0.1459, "step": 35433 }, { "epoch": 0.6320051368030536, "grad_norm": 0.30709967017173767, "learning_rate": 1.7939784598856305e-05, "loss": 0.139, "step": 35434 }, { "epoch": 0.6320229729247673, "grad_norm": 0.2141241729259491, "learning_rate": 1.7938291463831314e-05, "loss": 0.0964, "step": 35435 }, { "epoch": 0.632040809046481, "grad_norm": 0.3225373327732086, "learning_rate": 1.7936798356179928e-05, "loss": 0.142, "step": 35436 }, { "epoch": 0.6320586451681947, "grad_norm": 0.5683669447898865, "learning_rate": 1.793530527590797e-05, "loss": 0.1654, "step": 35437 }, { "epoch": 0.6320764812899083, "grad_norm": 0.20786359906196594, "learning_rate": 1.7933812223021207e-05, "loss": 0.0944, "step": 35438 }, { "epoch": 0.632094317411622, "grad_norm": 0.23722374439239502, "learning_rate": 1.7932319197525423e-05, "loss": 0.117, "step": 35439 }, { "epoch": 0.6321121535333357, "grad_norm": 0.22797037661075592, "learning_rate": 1.793082619942641e-05, "loss": 0.1134, "step": 35440 }, { "epoch": 0.6321299896550494, "grad_norm": 0.27003049850463867, "learning_rate": 1.7929333228729956e-05, "loss": 0.0831, "step": 35441 }, { "epoch": 0.6321478257767631, "grad_norm": 0.264720618724823, "learning_rate": 1.792784028544185e-05, "loss": 0.138, "step": 35442 }, { "epoch": 0.6321656618984768, "grad_norm": 0.2569016218185425, "learning_rate": 1.792634736956788e-05, "loss": 0.1218, "step": 35443 }, { "epoch": 0.6321834980201905, "grad_norm": 0.229520782828331, "learning_rate": 1.7924854481113832e-05, "loss": 0.1467, "step": 35444 }, { "epoch": 0.6322013341419042, "grad_norm": 0.3214118778705597, "learning_rate": 1.792336162008548e-05, "loss": 0.2048, "step": 35445 }, { "epoch": 0.6322191702636178, "grad_norm": 0.2533576488494873, "learning_rate": 1.7921868786488632e-05, "loss": 0.1219, "step": 35446 }, { "epoch": 0.6322370063853315, "grad_norm": 0.2557198405265808, "learning_rate": 1.792037598032907e-05, "loss": 0.1063, "step": 35447 }, { "epoch": 0.6322548425070452, "grad_norm": 0.27354636788368225, "learning_rate": 1.791888320161257e-05, "loss": 0.1562, "step": 35448 }, { "epoch": 0.6322726786287589, "grad_norm": 0.22823558747768402, "learning_rate": 1.7917390450344916e-05, "loss": 0.1284, "step": 35449 }, { "epoch": 0.6322905147504727, "grad_norm": 0.23739616572856903, "learning_rate": 1.7915897726531913e-05, "loss": 0.1166, "step": 35450 }, { "epoch": 0.6323083508721864, "grad_norm": 0.21627265214920044, "learning_rate": 1.7914405030179336e-05, "loss": 0.0861, "step": 35451 }, { "epoch": 0.6323261869939001, "grad_norm": 0.23258353769779205, "learning_rate": 1.7912912361292978e-05, "loss": 0.125, "step": 35452 }, { "epoch": 0.6323440231156138, "grad_norm": 0.23154449462890625, "learning_rate": 1.7911419719878616e-05, "loss": 0.1145, "step": 35453 }, { "epoch": 0.6323618592373275, "grad_norm": 0.21744675934314728, "learning_rate": 1.7909927105942033e-05, "loss": 0.1045, "step": 35454 }, { "epoch": 0.6323796953590411, "grad_norm": 0.38329291343688965, "learning_rate": 1.790843451948902e-05, "loss": 0.1396, "step": 35455 }, { "epoch": 0.6323975314807548, "grad_norm": 0.22451618313789368, "learning_rate": 1.7906941960525376e-05, "loss": 0.1263, "step": 35456 }, { "epoch": 0.6324153676024685, "grad_norm": 0.27160733938217163, "learning_rate": 1.790544942905687e-05, "loss": 0.127, "step": 35457 }, { "epoch": 0.6324332037241822, "grad_norm": 0.39817431569099426, "learning_rate": 1.7903956925089283e-05, "loss": 0.1451, "step": 35458 }, { "epoch": 0.6324510398458959, "grad_norm": 0.34811851382255554, "learning_rate": 1.790246444862842e-05, "loss": 0.2113, "step": 35459 }, { "epoch": 0.6324688759676096, "grad_norm": 0.2817688584327698, "learning_rate": 1.7900971999680056e-05, "loss": 0.1934, "step": 35460 }, { "epoch": 0.6324867120893233, "grad_norm": 0.2951194643974304, "learning_rate": 1.7899479578249972e-05, "loss": 0.1118, "step": 35461 }, { "epoch": 0.632504548211037, "grad_norm": 0.22974111139774323, "learning_rate": 1.789798718434396e-05, "loss": 0.1024, "step": 35462 }, { "epoch": 0.6325223843327507, "grad_norm": 0.2822018265724182, "learning_rate": 1.78964948179678e-05, "loss": 0.1206, "step": 35463 }, { "epoch": 0.6325402204544643, "grad_norm": 0.2894124686717987, "learning_rate": 1.7895002479127283e-05, "loss": 0.1408, "step": 35464 }, { "epoch": 0.632558056576178, "grad_norm": 0.3011755347251892, "learning_rate": 1.7893510167828192e-05, "loss": 0.0877, "step": 35465 }, { "epoch": 0.6325758926978917, "grad_norm": 0.2765551209449768, "learning_rate": 1.789201788407631e-05, "loss": 0.1284, "step": 35466 }, { "epoch": 0.6325937288196055, "grad_norm": 0.16731123626232147, "learning_rate": 1.7890525627877415e-05, "loss": 0.0566, "step": 35467 }, { "epoch": 0.6326115649413192, "grad_norm": 0.23484602570533752, "learning_rate": 1.7889033399237305e-05, "loss": 0.0872, "step": 35468 }, { "epoch": 0.6326294010630329, "grad_norm": 0.26866719126701355, "learning_rate": 1.788754119816175e-05, "loss": 0.0844, "step": 35469 }, { "epoch": 0.6326472371847466, "grad_norm": 0.2254677414894104, "learning_rate": 1.7886049024656555e-05, "loss": 0.1125, "step": 35470 }, { "epoch": 0.6326650733064603, "grad_norm": 0.23129501938819885, "learning_rate": 1.788455687872747e-05, "loss": 0.0864, "step": 35471 }, { "epoch": 0.632682909428174, "grad_norm": 0.25478705763816833, "learning_rate": 1.7883064760380318e-05, "loss": 0.1352, "step": 35472 }, { "epoch": 0.6327007455498876, "grad_norm": 0.2677682340145111, "learning_rate": 1.7881572669620865e-05, "loss": 0.1136, "step": 35473 }, { "epoch": 0.6327185816716013, "grad_norm": 0.25433531403541565, "learning_rate": 1.7880080606454893e-05, "loss": 0.1532, "step": 35474 }, { "epoch": 0.632736417793315, "grad_norm": 0.2669883072376251, "learning_rate": 1.7878588570888178e-05, "loss": 0.1163, "step": 35475 }, { "epoch": 0.6327542539150287, "grad_norm": 0.2778422236442566, "learning_rate": 1.787709656292652e-05, "loss": 0.1339, "step": 35476 }, { "epoch": 0.6327720900367424, "grad_norm": 0.1996283084154129, "learning_rate": 1.7875604582575695e-05, "loss": 0.0714, "step": 35477 }, { "epoch": 0.6327899261584561, "grad_norm": 0.27443283796310425, "learning_rate": 1.7874112629841494e-05, "loss": 0.0842, "step": 35478 }, { "epoch": 0.6328077622801698, "grad_norm": 0.34911268949508667, "learning_rate": 1.7872620704729688e-05, "loss": 0.0948, "step": 35479 }, { "epoch": 0.6328255984018835, "grad_norm": 0.21580761671066284, "learning_rate": 1.787112880724606e-05, "loss": 0.1, "step": 35480 }, { "epoch": 0.6328434345235971, "grad_norm": 0.23352940380573273, "learning_rate": 1.786963693739641e-05, "loss": 0.1175, "step": 35481 }, { "epoch": 0.6328612706453108, "grad_norm": 0.23411938548088074, "learning_rate": 1.78681450951865e-05, "loss": 0.1555, "step": 35482 }, { "epoch": 0.6328791067670245, "grad_norm": 0.2596302330493927, "learning_rate": 1.786665328062213e-05, "loss": 0.1349, "step": 35483 }, { "epoch": 0.6328969428887383, "grad_norm": 0.3417161703109741, "learning_rate": 1.7865161493709067e-05, "loss": 0.1198, "step": 35484 }, { "epoch": 0.632914779010452, "grad_norm": 0.31121987104415894, "learning_rate": 1.7863669734453115e-05, "loss": 0.1268, "step": 35485 }, { "epoch": 0.6329326151321657, "grad_norm": 0.22938700020313263, "learning_rate": 1.786217800286004e-05, "loss": 0.1227, "step": 35486 }, { "epoch": 0.6329504512538794, "grad_norm": 0.25341150164604187, "learning_rate": 1.786068629893563e-05, "loss": 0.1138, "step": 35487 }, { "epoch": 0.6329682873755931, "grad_norm": 0.3317761719226837, "learning_rate": 1.7859194622685653e-05, "loss": 0.0888, "step": 35488 }, { "epoch": 0.6329861234973068, "grad_norm": 0.2983003854751587, "learning_rate": 1.7857702974115915e-05, "loss": 0.1612, "step": 35489 }, { "epoch": 0.6330039596190205, "grad_norm": 0.23485371470451355, "learning_rate": 1.7856211353232184e-05, "loss": 0.0575, "step": 35490 }, { "epoch": 0.6330217957407341, "grad_norm": 0.25275713205337524, "learning_rate": 1.7854719760040247e-05, "loss": 0.1278, "step": 35491 }, { "epoch": 0.6330396318624478, "grad_norm": 0.25497502088546753, "learning_rate": 1.7853228194545887e-05, "loss": 0.1361, "step": 35492 }, { "epoch": 0.6330574679841615, "grad_norm": 0.32100817561149597, "learning_rate": 1.7851736656754872e-05, "loss": 0.1927, "step": 35493 }, { "epoch": 0.6330753041058752, "grad_norm": 0.3167615830898285, "learning_rate": 1.7850245146673005e-05, "loss": 0.1235, "step": 35494 }, { "epoch": 0.6330931402275889, "grad_norm": 0.27259528636932373, "learning_rate": 1.7848753664306056e-05, "loss": 0.106, "step": 35495 }, { "epoch": 0.6331109763493026, "grad_norm": 0.24194732308387756, "learning_rate": 1.7847262209659805e-05, "loss": 0.1338, "step": 35496 }, { "epoch": 0.6331288124710163, "grad_norm": 0.20803293585777283, "learning_rate": 1.7845770782740035e-05, "loss": 0.0623, "step": 35497 }, { "epoch": 0.63314664859273, "grad_norm": 0.2798668146133423, "learning_rate": 1.7844279383552528e-05, "loss": 0.1364, "step": 35498 }, { "epoch": 0.6331644847144436, "grad_norm": 0.26660528779029846, "learning_rate": 1.7842788012103073e-05, "loss": 0.1407, "step": 35499 }, { "epoch": 0.6331823208361573, "grad_norm": 0.28037959337234497, "learning_rate": 1.784129666839744e-05, "loss": 0.1617, "step": 35500 }, { "epoch": 0.6332001569578711, "grad_norm": 0.26895105838775635, "learning_rate": 1.7839805352441406e-05, "loss": 0.1269, "step": 35501 }, { "epoch": 0.6332179930795848, "grad_norm": 0.39157748222351074, "learning_rate": 1.7838314064240768e-05, "loss": 0.1529, "step": 35502 }, { "epoch": 0.6332358292012985, "grad_norm": 0.2692275047302246, "learning_rate": 1.7836822803801302e-05, "loss": 0.1558, "step": 35503 }, { "epoch": 0.6332536653230122, "grad_norm": 0.3016781508922577, "learning_rate": 1.7835331571128773e-05, "loss": 0.1249, "step": 35504 }, { "epoch": 0.6332715014447259, "grad_norm": 0.22750602662563324, "learning_rate": 1.7833840366228984e-05, "loss": 0.1004, "step": 35505 }, { "epoch": 0.6332893375664396, "grad_norm": 0.24716240167617798, "learning_rate": 1.7832349189107694e-05, "loss": 0.1071, "step": 35506 }, { "epoch": 0.6333071736881533, "grad_norm": 0.29719221591949463, "learning_rate": 1.7830858039770705e-05, "loss": 0.0892, "step": 35507 }, { "epoch": 0.633325009809867, "grad_norm": 0.2533837854862213, "learning_rate": 1.7829366918223782e-05, "loss": 0.1297, "step": 35508 }, { "epoch": 0.6333428459315806, "grad_norm": 0.22649559378623962, "learning_rate": 1.7827875824472707e-05, "loss": 0.0712, "step": 35509 }, { "epoch": 0.6333606820532943, "grad_norm": 0.3142421245574951, "learning_rate": 1.782638475852326e-05, "loss": 0.1693, "step": 35510 }, { "epoch": 0.633378518175008, "grad_norm": 0.3048451840877533, "learning_rate": 1.782489372038123e-05, "loss": 0.1504, "step": 35511 }, { "epoch": 0.6333963542967217, "grad_norm": 0.22719921171665192, "learning_rate": 1.782340271005239e-05, "loss": 0.1455, "step": 35512 }, { "epoch": 0.6334141904184354, "grad_norm": 0.26450788974761963, "learning_rate": 1.7821911727542524e-05, "loss": 0.1376, "step": 35513 }, { "epoch": 0.6334320265401491, "grad_norm": 0.22958171367645264, "learning_rate": 1.7820420772857392e-05, "loss": 0.1471, "step": 35514 }, { "epoch": 0.6334498626618628, "grad_norm": 0.24663661420345306, "learning_rate": 1.7818929846002802e-05, "loss": 0.0869, "step": 35515 }, { "epoch": 0.6334676987835764, "grad_norm": 0.2804535925388336, "learning_rate": 1.7817438946984523e-05, "loss": 0.1257, "step": 35516 }, { "epoch": 0.6334855349052902, "grad_norm": 0.23646791279315948, "learning_rate": 1.781594807580832e-05, "loss": 0.1403, "step": 35517 }, { "epoch": 0.6335033710270039, "grad_norm": 0.24591633677482605, "learning_rate": 1.7814457232479993e-05, "loss": 0.1691, "step": 35518 }, { "epoch": 0.6335212071487176, "grad_norm": 0.35216137766838074, "learning_rate": 1.7812966417005298e-05, "loss": 0.1352, "step": 35519 }, { "epoch": 0.6335390432704313, "grad_norm": 0.21545404195785522, "learning_rate": 1.781147562939004e-05, "loss": 0.1526, "step": 35520 }, { "epoch": 0.633556879392145, "grad_norm": 0.27540323138237, "learning_rate": 1.7809984869639986e-05, "loss": 0.0944, "step": 35521 }, { "epoch": 0.6335747155138587, "grad_norm": 0.285632461309433, "learning_rate": 1.780849413776091e-05, "loss": 0.1222, "step": 35522 }, { "epoch": 0.6335925516355724, "grad_norm": 0.3289864659309387, "learning_rate": 1.780700343375859e-05, "loss": 0.0917, "step": 35523 }, { "epoch": 0.6336103877572861, "grad_norm": 0.27732864022254944, "learning_rate": 1.7805512757638815e-05, "loss": 0.1528, "step": 35524 }, { "epoch": 0.6336282238789998, "grad_norm": 0.3110494017601013, "learning_rate": 1.7804022109407354e-05, "loss": 0.1446, "step": 35525 }, { "epoch": 0.6336460600007134, "grad_norm": 0.24167461693286896, "learning_rate": 1.7802531489069996e-05, "loss": 0.1208, "step": 35526 }, { "epoch": 0.6336638961224271, "grad_norm": 0.28206831216812134, "learning_rate": 1.7801040896632497e-05, "loss": 0.152, "step": 35527 }, { "epoch": 0.6336817322441408, "grad_norm": 0.3009721040725708, "learning_rate": 1.779955033210066e-05, "loss": 0.1823, "step": 35528 }, { "epoch": 0.6336995683658545, "grad_norm": 0.20980344712734222, "learning_rate": 1.7798059795480258e-05, "loss": 0.1058, "step": 35529 }, { "epoch": 0.6337174044875682, "grad_norm": 0.235727921128273, "learning_rate": 1.779656928677706e-05, "loss": 0.1177, "step": 35530 }, { "epoch": 0.6337352406092819, "grad_norm": 0.32229310274124146, "learning_rate": 1.7795078805996845e-05, "loss": 0.1283, "step": 35531 }, { "epoch": 0.6337530767309956, "grad_norm": 0.2604036033153534, "learning_rate": 1.7793588353145385e-05, "loss": 0.1021, "step": 35532 }, { "epoch": 0.6337709128527093, "grad_norm": 0.256980836391449, "learning_rate": 1.779209792822848e-05, "loss": 0.116, "step": 35533 }, { "epoch": 0.633788748974423, "grad_norm": 0.2389703392982483, "learning_rate": 1.7790607531251886e-05, "loss": 0.121, "step": 35534 }, { "epoch": 0.6338065850961367, "grad_norm": 0.24550354480743408, "learning_rate": 1.778911716222139e-05, "loss": 0.0616, "step": 35535 }, { "epoch": 0.6338244212178504, "grad_norm": 0.28213346004486084, "learning_rate": 1.7787626821142762e-05, "loss": 0.1214, "step": 35536 }, { "epoch": 0.6338422573395641, "grad_norm": 0.21905206143856049, "learning_rate": 1.778613650802179e-05, "loss": 0.1474, "step": 35537 }, { "epoch": 0.6338600934612778, "grad_norm": 0.3287425637245178, "learning_rate": 1.7784646222864236e-05, "loss": 0.1766, "step": 35538 }, { "epoch": 0.6338779295829915, "grad_norm": 0.18994520604610443, "learning_rate": 1.7783155965675893e-05, "loss": 0.1187, "step": 35539 }, { "epoch": 0.6338957657047052, "grad_norm": 0.29004308581352234, "learning_rate": 1.778166573646252e-05, "loss": 0.1474, "step": 35540 }, { "epoch": 0.6339136018264189, "grad_norm": 0.1801350712776184, "learning_rate": 1.7780175535229916e-05, "loss": 0.0587, "step": 35541 }, { "epoch": 0.6339314379481326, "grad_norm": 0.3327726721763611, "learning_rate": 1.7778685361983842e-05, "loss": 0.2133, "step": 35542 }, { "epoch": 0.6339492740698462, "grad_norm": 0.314936101436615, "learning_rate": 1.777719521673008e-05, "loss": 0.1558, "step": 35543 }, { "epoch": 0.6339671101915599, "grad_norm": 0.2670227885246277, "learning_rate": 1.7775705099474398e-05, "loss": 0.1464, "step": 35544 }, { "epoch": 0.6339849463132736, "grad_norm": 0.3188895285129547, "learning_rate": 1.7774215010222577e-05, "loss": 0.103, "step": 35545 }, { "epoch": 0.6340027824349873, "grad_norm": 0.27550625801086426, "learning_rate": 1.7772724948980395e-05, "loss": 0.1175, "step": 35546 }, { "epoch": 0.634020618556701, "grad_norm": 0.20753541588783264, "learning_rate": 1.777123491575363e-05, "loss": 0.0777, "step": 35547 }, { "epoch": 0.6340384546784147, "grad_norm": 0.2836662828922272, "learning_rate": 1.7769744910548062e-05, "loss": 0.0908, "step": 35548 }, { "epoch": 0.6340562908001284, "grad_norm": 0.23503363132476807, "learning_rate": 1.7768254933369445e-05, "loss": 0.1358, "step": 35549 }, { "epoch": 0.6340741269218421, "grad_norm": 0.2181215137243271, "learning_rate": 1.776676498422358e-05, "loss": 0.1199, "step": 35550 }, { "epoch": 0.6340919630435559, "grad_norm": 0.3643927574157715, "learning_rate": 1.7765275063116232e-05, "loss": 0.0994, "step": 35551 }, { "epoch": 0.6341097991652695, "grad_norm": 0.27812886238098145, "learning_rate": 1.7763785170053174e-05, "loss": 0.1238, "step": 35552 }, { "epoch": 0.6341276352869832, "grad_norm": 0.25353866815567017, "learning_rate": 1.7762295305040184e-05, "loss": 0.1569, "step": 35553 }, { "epoch": 0.6341454714086969, "grad_norm": 0.28846225142478943, "learning_rate": 1.7760805468083032e-05, "loss": 0.1461, "step": 35554 }, { "epoch": 0.6341633075304106, "grad_norm": 0.33720266819000244, "learning_rate": 1.775931565918751e-05, "loss": 0.1077, "step": 35555 }, { "epoch": 0.6341811436521243, "grad_norm": 0.32191047072410583, "learning_rate": 1.7757825878359376e-05, "loss": 0.1594, "step": 35556 }, { "epoch": 0.634198979773838, "grad_norm": 0.18688996136188507, "learning_rate": 1.775633612560441e-05, "loss": 0.0838, "step": 35557 }, { "epoch": 0.6342168158955517, "grad_norm": 0.34050777554512024, "learning_rate": 1.7754846400928382e-05, "loss": 0.1339, "step": 35558 }, { "epoch": 0.6342346520172654, "grad_norm": 0.2503282427787781, "learning_rate": 1.7753356704337076e-05, "loss": 0.1137, "step": 35559 }, { "epoch": 0.634252488138979, "grad_norm": 0.3249073624610901, "learning_rate": 1.7751867035836265e-05, "loss": 0.1313, "step": 35560 }, { "epoch": 0.6342703242606927, "grad_norm": 0.3234670162200928, "learning_rate": 1.7750377395431723e-05, "loss": 0.1454, "step": 35561 }, { "epoch": 0.6342881603824064, "grad_norm": 0.33902645111083984, "learning_rate": 1.774888778312921e-05, "loss": 0.1158, "step": 35562 }, { "epoch": 0.6343059965041201, "grad_norm": 0.28533488512039185, "learning_rate": 1.7747398198934524e-05, "loss": 0.1087, "step": 35563 }, { "epoch": 0.6343238326258338, "grad_norm": 0.24593883752822876, "learning_rate": 1.7745908642853427e-05, "loss": 0.1102, "step": 35564 }, { "epoch": 0.6343416687475475, "grad_norm": 0.23370836675167084, "learning_rate": 1.7744419114891686e-05, "loss": 0.1593, "step": 35565 }, { "epoch": 0.6343595048692612, "grad_norm": 0.22786179184913635, "learning_rate": 1.7742929615055084e-05, "loss": 0.1068, "step": 35566 }, { "epoch": 0.6343773409909749, "grad_norm": 0.28017571568489075, "learning_rate": 1.7741440143349395e-05, "loss": 0.0901, "step": 35567 }, { "epoch": 0.6343951771126887, "grad_norm": 0.2812330722808838, "learning_rate": 1.7739950699780396e-05, "loss": 0.1301, "step": 35568 }, { "epoch": 0.6344130132344024, "grad_norm": 0.3041155934333801, "learning_rate": 1.7738461284353852e-05, "loss": 0.1343, "step": 35569 }, { "epoch": 0.634430849356116, "grad_norm": 0.26265212893486023, "learning_rate": 1.7736971897075542e-05, "loss": 0.1526, "step": 35570 }, { "epoch": 0.6344486854778297, "grad_norm": 0.2535557448863983, "learning_rate": 1.7735482537951232e-05, "loss": 0.1418, "step": 35571 }, { "epoch": 0.6344665215995434, "grad_norm": 0.2951648533344269, "learning_rate": 1.7733993206986705e-05, "loss": 0.1338, "step": 35572 }, { "epoch": 0.6344843577212571, "grad_norm": 0.2221120446920395, "learning_rate": 1.7732503904187723e-05, "loss": 0.1289, "step": 35573 }, { "epoch": 0.6345021938429708, "grad_norm": 0.2539917826652527, "learning_rate": 1.773101462956008e-05, "loss": 0.1191, "step": 35574 }, { "epoch": 0.6345200299646845, "grad_norm": 0.27111244201660156, "learning_rate": 1.7729525383109518e-05, "loss": 0.0865, "step": 35575 }, { "epoch": 0.6345378660863982, "grad_norm": 0.27790161967277527, "learning_rate": 1.772803616484184e-05, "loss": 0.1288, "step": 35576 }, { "epoch": 0.6345557022081119, "grad_norm": 0.2927458882331848, "learning_rate": 1.7726546974762805e-05, "loss": 0.1265, "step": 35577 }, { "epoch": 0.6345735383298255, "grad_norm": 0.26739680767059326, "learning_rate": 1.7725057812878183e-05, "loss": 0.145, "step": 35578 }, { "epoch": 0.6345913744515392, "grad_norm": 0.27402496337890625, "learning_rate": 1.772356867919374e-05, "loss": 0.0985, "step": 35579 }, { "epoch": 0.6346092105732529, "grad_norm": 0.24193179607391357, "learning_rate": 1.7722079573715273e-05, "loss": 0.1293, "step": 35580 }, { "epoch": 0.6346270466949666, "grad_norm": 0.2774505615234375, "learning_rate": 1.772059049644853e-05, "loss": 0.1419, "step": 35581 }, { "epoch": 0.6346448828166803, "grad_norm": 0.22570741176605225, "learning_rate": 1.77191014473993e-05, "loss": 0.1248, "step": 35582 }, { "epoch": 0.634662718938394, "grad_norm": 0.28375673294067383, "learning_rate": 1.7717612426573342e-05, "loss": 0.1305, "step": 35583 }, { "epoch": 0.6346805550601077, "grad_norm": 0.19663527607917786, "learning_rate": 1.771612343397643e-05, "loss": 0.0762, "step": 35584 }, { "epoch": 0.6346983911818215, "grad_norm": 0.3376742899417877, "learning_rate": 1.771463446961435e-05, "loss": 0.1256, "step": 35585 }, { "epoch": 0.6347162273035352, "grad_norm": 0.29334262013435364, "learning_rate": 1.7713145533492854e-05, "loss": 0.1324, "step": 35586 }, { "epoch": 0.6347340634252489, "grad_norm": 0.2523557245731354, "learning_rate": 1.771165662561773e-05, "loss": 0.1556, "step": 35587 }, { "epoch": 0.6347518995469625, "grad_norm": 0.25364235043525696, "learning_rate": 1.7710167745994725e-05, "loss": 0.1171, "step": 35588 }, { "epoch": 0.6347697356686762, "grad_norm": 0.21740026772022247, "learning_rate": 1.7708678894629645e-05, "loss": 0.0898, "step": 35589 }, { "epoch": 0.6347875717903899, "grad_norm": 0.296003133058548, "learning_rate": 1.7707190071528244e-05, "loss": 0.1287, "step": 35590 }, { "epoch": 0.6348054079121036, "grad_norm": 0.384896457195282, "learning_rate": 1.770570127669629e-05, "loss": 0.1239, "step": 35591 }, { "epoch": 0.6348232440338173, "grad_norm": 0.39708447456359863, "learning_rate": 1.770421251013955e-05, "loss": 0.1216, "step": 35592 }, { "epoch": 0.634841080155531, "grad_norm": 0.27358177304267883, "learning_rate": 1.770272377186381e-05, "loss": 0.1401, "step": 35593 }, { "epoch": 0.6348589162772447, "grad_norm": 0.23284989595413208, "learning_rate": 1.7701235061874833e-05, "loss": 0.1073, "step": 35594 }, { "epoch": 0.6348767523989584, "grad_norm": 0.20216771960258484, "learning_rate": 1.769974638017839e-05, "loss": 0.0887, "step": 35595 }, { "epoch": 0.634894588520672, "grad_norm": 0.4100785255432129, "learning_rate": 1.769825772678025e-05, "loss": 0.129, "step": 35596 }, { "epoch": 0.6349124246423857, "grad_norm": 0.3061642050743103, "learning_rate": 1.7696769101686178e-05, "loss": 0.1144, "step": 35597 }, { "epoch": 0.6349302607640994, "grad_norm": 0.2426021248102188, "learning_rate": 1.7695280504901962e-05, "loss": 0.1205, "step": 35598 }, { "epoch": 0.6349480968858131, "grad_norm": 0.25247666239738464, "learning_rate": 1.7693791936433364e-05, "loss": 0.0971, "step": 35599 }, { "epoch": 0.6349659330075268, "grad_norm": 0.248277485370636, "learning_rate": 1.769230339628614e-05, "loss": 0.0957, "step": 35600 }, { "epoch": 0.6349837691292405, "grad_norm": 0.29131919145584106, "learning_rate": 1.7690814884466076e-05, "loss": 0.1475, "step": 35601 }, { "epoch": 0.6350016052509543, "grad_norm": 0.23761622607707977, "learning_rate": 1.7689326400978937e-05, "loss": 0.1116, "step": 35602 }, { "epoch": 0.635019441372668, "grad_norm": 0.2253965586423874, "learning_rate": 1.76878379458305e-05, "loss": 0.109, "step": 35603 }, { "epoch": 0.6350372774943817, "grad_norm": 0.20029105246067047, "learning_rate": 1.768634951902653e-05, "loss": 0.1178, "step": 35604 }, { "epoch": 0.6350551136160953, "grad_norm": 0.2966753840446472, "learning_rate": 1.7684861120572783e-05, "loss": 0.1764, "step": 35605 }, { "epoch": 0.635072949737809, "grad_norm": 0.3561903238296509, "learning_rate": 1.768337275047505e-05, "loss": 0.1306, "step": 35606 }, { "epoch": 0.6350907858595227, "grad_norm": 0.3040234446525574, "learning_rate": 1.768188440873909e-05, "loss": 0.1564, "step": 35607 }, { "epoch": 0.6351086219812364, "grad_norm": 0.33845001459121704, "learning_rate": 1.7680396095370672e-05, "loss": 0.1244, "step": 35608 }, { "epoch": 0.6351264581029501, "grad_norm": 0.30711135268211365, "learning_rate": 1.7678907810375574e-05, "loss": 0.1715, "step": 35609 }, { "epoch": 0.6351442942246638, "grad_norm": 0.21534603834152222, "learning_rate": 1.7677419553759546e-05, "loss": 0.0841, "step": 35610 }, { "epoch": 0.6351621303463775, "grad_norm": 0.25893518328666687, "learning_rate": 1.7675931325528375e-05, "loss": 0.0913, "step": 35611 }, { "epoch": 0.6351799664680912, "grad_norm": 0.29681482911109924, "learning_rate": 1.767444312568783e-05, "loss": 0.0675, "step": 35612 }, { "epoch": 0.6351978025898048, "grad_norm": 0.21716804802417755, "learning_rate": 1.767295495424366e-05, "loss": 0.1549, "step": 35613 }, { "epoch": 0.6352156387115185, "grad_norm": 0.28385019302368164, "learning_rate": 1.7671466811201653e-05, "loss": 0.1418, "step": 35614 }, { "epoch": 0.6352334748332322, "grad_norm": 0.4200933277606964, "learning_rate": 1.7669978696567575e-05, "loss": 0.1026, "step": 35615 }, { "epoch": 0.6352513109549459, "grad_norm": 0.1612042933702469, "learning_rate": 1.766849061034719e-05, "loss": 0.1134, "step": 35616 }, { "epoch": 0.6352691470766596, "grad_norm": 0.21531610190868378, "learning_rate": 1.766700255254627e-05, "loss": 0.13, "step": 35617 }, { "epoch": 0.6352869831983734, "grad_norm": 0.2569911479949951, "learning_rate": 1.7665514523170575e-05, "loss": 0.117, "step": 35618 }, { "epoch": 0.6353048193200871, "grad_norm": 0.3161217272281647, "learning_rate": 1.7664026522225885e-05, "loss": 0.1056, "step": 35619 }, { "epoch": 0.6353226554418008, "grad_norm": 0.2552807331085205, "learning_rate": 1.7662538549717962e-05, "loss": 0.1043, "step": 35620 }, { "epoch": 0.6353404915635145, "grad_norm": 0.2670985758304596, "learning_rate": 1.7661050605652568e-05, "loss": 0.1343, "step": 35621 }, { "epoch": 0.6353583276852282, "grad_norm": 0.2933639585971832, "learning_rate": 1.765956269003548e-05, "loss": 0.157, "step": 35622 }, { "epoch": 0.6353761638069418, "grad_norm": 0.3414352536201477, "learning_rate": 1.7658074802872456e-05, "loss": 0.166, "step": 35623 }, { "epoch": 0.6353939999286555, "grad_norm": 0.22835437953472137, "learning_rate": 1.7656586944169278e-05, "loss": 0.0802, "step": 35624 }, { "epoch": 0.6354118360503692, "grad_norm": 0.29871606826782227, "learning_rate": 1.765509911393171e-05, "loss": 0.145, "step": 35625 }, { "epoch": 0.6354296721720829, "grad_norm": 0.3048114478588104, "learning_rate": 1.765361131216551e-05, "loss": 0.1526, "step": 35626 }, { "epoch": 0.6354475082937966, "grad_norm": 0.24475961923599243, "learning_rate": 1.765212353887644e-05, "loss": 0.1202, "step": 35627 }, { "epoch": 0.6354653444155103, "grad_norm": 0.2493455857038498, "learning_rate": 1.7650635794070287e-05, "loss": 0.1356, "step": 35628 }, { "epoch": 0.635483180537224, "grad_norm": 0.23936395347118378, "learning_rate": 1.76491480777528e-05, "loss": 0.1072, "step": 35629 }, { "epoch": 0.6355010166589377, "grad_norm": 0.2471265345811844, "learning_rate": 1.7647660389929765e-05, "loss": 0.1469, "step": 35630 }, { "epoch": 0.6355188527806513, "grad_norm": 0.25238221883773804, "learning_rate": 1.7646172730606928e-05, "loss": 0.1307, "step": 35631 }, { "epoch": 0.635536688902365, "grad_norm": 0.2571694552898407, "learning_rate": 1.7644685099790073e-05, "loss": 0.1512, "step": 35632 }, { "epoch": 0.6355545250240787, "grad_norm": 0.24108360707759857, "learning_rate": 1.764319749748496e-05, "loss": 0.1126, "step": 35633 }, { "epoch": 0.6355723611457924, "grad_norm": 0.2176097333431244, "learning_rate": 1.7641709923697353e-05, "loss": 0.106, "step": 35634 }, { "epoch": 0.6355901972675062, "grad_norm": 0.32649004459381104, "learning_rate": 1.7640222378433014e-05, "loss": 0.1245, "step": 35635 }, { "epoch": 0.6356080333892199, "grad_norm": 0.28774306178092957, "learning_rate": 1.763873486169771e-05, "loss": 0.1153, "step": 35636 }, { "epoch": 0.6356258695109336, "grad_norm": 0.3249153196811676, "learning_rate": 1.7637247373497226e-05, "loss": 0.142, "step": 35637 }, { "epoch": 0.6356437056326473, "grad_norm": 0.2671641409397125, "learning_rate": 1.7635759913837314e-05, "loss": 0.15, "step": 35638 }, { "epoch": 0.635661541754361, "grad_norm": 0.3016379475593567, "learning_rate": 1.7634272482723736e-05, "loss": 0.1488, "step": 35639 }, { "epoch": 0.6356793778760746, "grad_norm": 0.4725029468536377, "learning_rate": 1.763278508016225e-05, "loss": 0.1868, "step": 35640 }, { "epoch": 0.6356972139977883, "grad_norm": 0.3273630738258362, "learning_rate": 1.763129770615865e-05, "loss": 0.1387, "step": 35641 }, { "epoch": 0.635715050119502, "grad_norm": 0.2814585566520691, "learning_rate": 1.762981036071868e-05, "loss": 0.1179, "step": 35642 }, { "epoch": 0.6357328862412157, "grad_norm": 0.3014568090438843, "learning_rate": 1.7628323043848112e-05, "loss": 0.1189, "step": 35643 }, { "epoch": 0.6357507223629294, "grad_norm": 0.26144954562187195, "learning_rate": 1.7626835755552702e-05, "loss": 0.1371, "step": 35644 }, { "epoch": 0.6357685584846431, "grad_norm": 0.2884332835674286, "learning_rate": 1.7625348495838227e-05, "loss": 0.2149, "step": 35645 }, { "epoch": 0.6357863946063568, "grad_norm": 0.2273387610912323, "learning_rate": 1.7623861264710457e-05, "loss": 0.1397, "step": 35646 }, { "epoch": 0.6358042307280705, "grad_norm": 0.27078571915626526, "learning_rate": 1.7622374062175144e-05, "loss": 0.1181, "step": 35647 }, { "epoch": 0.6358220668497842, "grad_norm": 0.2247803956270218, "learning_rate": 1.7620886888238052e-05, "loss": 0.1117, "step": 35648 }, { "epoch": 0.6358399029714978, "grad_norm": 0.21677479147911072, "learning_rate": 1.7619399742904954e-05, "loss": 0.1627, "step": 35649 }, { "epoch": 0.6358577390932115, "grad_norm": 0.26142430305480957, "learning_rate": 1.761791262618161e-05, "loss": 0.0893, "step": 35650 }, { "epoch": 0.6358755752149252, "grad_norm": 0.3166951835155487, "learning_rate": 1.761642553807379e-05, "loss": 0.1391, "step": 35651 }, { "epoch": 0.635893411336639, "grad_norm": 0.2555030286312103, "learning_rate": 1.7614938478587256e-05, "loss": 0.1149, "step": 35652 }, { "epoch": 0.6359112474583527, "grad_norm": 0.26169532537460327, "learning_rate": 1.7613451447727765e-05, "loss": 0.1681, "step": 35653 }, { "epoch": 0.6359290835800664, "grad_norm": 0.28741729259490967, "learning_rate": 1.7611964445501093e-05, "loss": 0.1178, "step": 35654 }, { "epoch": 0.6359469197017801, "grad_norm": 0.33196428418159485, "learning_rate": 1.7610477471913e-05, "loss": 0.0873, "step": 35655 }, { "epoch": 0.6359647558234938, "grad_norm": 0.2961922883987427, "learning_rate": 1.7608990526969242e-05, "loss": 0.1112, "step": 35656 }, { "epoch": 0.6359825919452075, "grad_norm": 0.27429425716400146, "learning_rate": 1.760750361067559e-05, "loss": 0.1246, "step": 35657 }, { "epoch": 0.6360004280669211, "grad_norm": 0.23447948694229126, "learning_rate": 1.7606016723037808e-05, "loss": 0.1238, "step": 35658 }, { "epoch": 0.6360182641886348, "grad_norm": 0.2777652442455292, "learning_rate": 1.7604529864061664e-05, "loss": 0.1365, "step": 35659 }, { "epoch": 0.6360361003103485, "grad_norm": 0.23477955162525177, "learning_rate": 1.7603043033752916e-05, "loss": 0.1012, "step": 35660 }, { "epoch": 0.6360539364320622, "grad_norm": 0.2730066776275635, "learning_rate": 1.7601556232117328e-05, "loss": 0.1826, "step": 35661 }, { "epoch": 0.6360717725537759, "grad_norm": 0.34212398529052734, "learning_rate": 1.7600069459160656e-05, "loss": 0.128, "step": 35662 }, { "epoch": 0.6360896086754896, "grad_norm": 0.22933344542980194, "learning_rate": 1.7598582714888674e-05, "loss": 0.1017, "step": 35663 }, { "epoch": 0.6361074447972033, "grad_norm": 0.35666170716285706, "learning_rate": 1.7597095999307144e-05, "loss": 0.156, "step": 35664 }, { "epoch": 0.636125280918917, "grad_norm": 0.26355376839637756, "learning_rate": 1.759560931242183e-05, "loss": 0.094, "step": 35665 }, { "epoch": 0.6361431170406306, "grad_norm": 0.24461154639720917, "learning_rate": 1.759412265423848e-05, "loss": 0.1304, "step": 35666 }, { "epoch": 0.6361609531623443, "grad_norm": 0.31336119771003723, "learning_rate": 1.759263602476288e-05, "loss": 0.171, "step": 35667 }, { "epoch": 0.636178789284058, "grad_norm": 0.24168451130390167, "learning_rate": 1.759114942400078e-05, "loss": 0.1055, "step": 35668 }, { "epoch": 0.6361966254057718, "grad_norm": 0.3456285893917084, "learning_rate": 1.758966285195794e-05, "loss": 0.1505, "step": 35669 }, { "epoch": 0.6362144615274855, "grad_norm": 0.3267318606376648, "learning_rate": 1.7588176308640124e-05, "loss": 0.1658, "step": 35670 }, { "epoch": 0.6362322976491992, "grad_norm": 0.2556320130825043, "learning_rate": 1.7586689794053095e-05, "loss": 0.1198, "step": 35671 }, { "epoch": 0.6362501337709129, "grad_norm": 0.29885348677635193, "learning_rate": 1.758520330820263e-05, "loss": 0.1149, "step": 35672 }, { "epoch": 0.6362679698926266, "grad_norm": 0.37620025873184204, "learning_rate": 1.758371685109447e-05, "loss": 0.1615, "step": 35673 }, { "epoch": 0.6362858060143403, "grad_norm": 0.2157006710767746, "learning_rate": 1.758223042273439e-05, "loss": 0.1186, "step": 35674 }, { "epoch": 0.636303642136054, "grad_norm": 0.25466403365135193, "learning_rate": 1.7580744023128133e-05, "loss": 0.0967, "step": 35675 }, { "epoch": 0.6363214782577676, "grad_norm": 0.2770139276981354, "learning_rate": 1.7579257652281487e-05, "loss": 0.0927, "step": 35676 }, { "epoch": 0.6363393143794813, "grad_norm": 0.20351476967334747, "learning_rate": 1.7577771310200193e-05, "loss": 0.0912, "step": 35677 }, { "epoch": 0.636357150501195, "grad_norm": 0.27072563767433167, "learning_rate": 1.757628499689003e-05, "loss": 0.1248, "step": 35678 }, { "epoch": 0.6363749866229087, "grad_norm": 0.3443397283554077, "learning_rate": 1.757479871235674e-05, "loss": 0.1182, "step": 35679 }, { "epoch": 0.6363928227446224, "grad_norm": 0.30466386675834656, "learning_rate": 1.7573312456606103e-05, "loss": 0.1494, "step": 35680 }, { "epoch": 0.6364106588663361, "grad_norm": 0.3863126337528229, "learning_rate": 1.7571826229643874e-05, "loss": 0.1812, "step": 35681 }, { "epoch": 0.6364284949880498, "grad_norm": 0.3277699649333954, "learning_rate": 1.7570340031475808e-05, "loss": 0.1362, "step": 35682 }, { "epoch": 0.6364463311097635, "grad_norm": 0.25987011194229126, "learning_rate": 1.7568853862107666e-05, "loss": 0.1372, "step": 35683 }, { "epoch": 0.6364641672314771, "grad_norm": 0.3657575249671936, "learning_rate": 1.756736772154522e-05, "loss": 0.1324, "step": 35684 }, { "epoch": 0.6364820033531908, "grad_norm": 0.3056054413318634, "learning_rate": 1.756588160979422e-05, "loss": 0.1334, "step": 35685 }, { "epoch": 0.6364998394749046, "grad_norm": 0.2705551087856293, "learning_rate": 1.7564395526860433e-05, "loss": 0.1188, "step": 35686 }, { "epoch": 0.6365176755966183, "grad_norm": 0.28838902711868286, "learning_rate": 1.756290947274961e-05, "loss": 0.1533, "step": 35687 }, { "epoch": 0.636535511718332, "grad_norm": 0.3430512845516205, "learning_rate": 1.7561423447467527e-05, "loss": 0.1664, "step": 35688 }, { "epoch": 0.6365533478400457, "grad_norm": 0.3388262391090393, "learning_rate": 1.7559937451019937e-05, "loss": 0.1484, "step": 35689 }, { "epoch": 0.6365711839617594, "grad_norm": 0.28996366262435913, "learning_rate": 1.755845148341259e-05, "loss": 0.1809, "step": 35690 }, { "epoch": 0.6365890200834731, "grad_norm": 0.23881135880947113, "learning_rate": 1.7556965544651264e-05, "loss": 0.1142, "step": 35691 }, { "epoch": 0.6366068562051868, "grad_norm": 0.4434231221675873, "learning_rate": 1.75554796347417e-05, "loss": 0.1873, "step": 35692 }, { "epoch": 0.6366246923269004, "grad_norm": 0.267465740442276, "learning_rate": 1.755399375368968e-05, "loss": 0.1638, "step": 35693 }, { "epoch": 0.6366425284486141, "grad_norm": 0.2562426030635834, "learning_rate": 1.7552507901500948e-05, "loss": 0.1707, "step": 35694 }, { "epoch": 0.6366603645703278, "grad_norm": 0.3207739293575287, "learning_rate": 1.7551022078181272e-05, "loss": 0.1342, "step": 35695 }, { "epoch": 0.6366782006920415, "grad_norm": 0.25512486696243286, "learning_rate": 1.7549536283736397e-05, "loss": 0.0666, "step": 35696 }, { "epoch": 0.6366960368137552, "grad_norm": 0.3103644847869873, "learning_rate": 1.75480505181721e-05, "loss": 0.0924, "step": 35697 }, { "epoch": 0.6367138729354689, "grad_norm": 0.2736837565898895, "learning_rate": 1.7546564781494126e-05, "loss": 0.1115, "step": 35698 }, { "epoch": 0.6367317090571826, "grad_norm": 0.2774411737918854, "learning_rate": 1.754507907370825e-05, "loss": 0.1225, "step": 35699 }, { "epoch": 0.6367495451788963, "grad_norm": 0.33015555143356323, "learning_rate": 1.754359339482021e-05, "loss": 0.1194, "step": 35700 }, { "epoch": 0.63676738130061, "grad_norm": 0.2377718836069107, "learning_rate": 1.7542107744835788e-05, "loss": 0.1177, "step": 35701 }, { "epoch": 0.6367852174223236, "grad_norm": 0.2173222303390503, "learning_rate": 1.7540622123760736e-05, "loss": 0.1022, "step": 35702 }, { "epoch": 0.6368030535440374, "grad_norm": 0.27858468890190125, "learning_rate": 1.7539136531600803e-05, "loss": 0.1105, "step": 35703 }, { "epoch": 0.6368208896657511, "grad_norm": 0.3620661497116089, "learning_rate": 1.7537650968361753e-05, "loss": 0.1499, "step": 35704 }, { "epoch": 0.6368387257874648, "grad_norm": 0.2760492265224457, "learning_rate": 1.7536165434049346e-05, "loss": 0.1991, "step": 35705 }, { "epoch": 0.6368565619091785, "grad_norm": 0.2980818450450897, "learning_rate": 1.7534679928669335e-05, "loss": 0.1687, "step": 35706 }, { "epoch": 0.6368743980308922, "grad_norm": 0.3211263120174408, "learning_rate": 1.7533194452227493e-05, "loss": 0.147, "step": 35707 }, { "epoch": 0.6368922341526059, "grad_norm": 0.2986134886741638, "learning_rate": 1.7531709004729562e-05, "loss": 0.1362, "step": 35708 }, { "epoch": 0.6369100702743196, "grad_norm": 0.30289921164512634, "learning_rate": 1.75302235861813e-05, "loss": 0.1772, "step": 35709 }, { "epoch": 0.6369279063960332, "grad_norm": 0.2835098206996918, "learning_rate": 1.7528738196588484e-05, "loss": 0.1301, "step": 35710 }, { "epoch": 0.6369457425177469, "grad_norm": 0.2816462814807892, "learning_rate": 1.7527252835956856e-05, "loss": 0.1081, "step": 35711 }, { "epoch": 0.6369635786394606, "grad_norm": 0.2283925712108612, "learning_rate": 1.7525767504292172e-05, "loss": 0.1442, "step": 35712 }, { "epoch": 0.6369814147611743, "grad_norm": 0.22793836891651154, "learning_rate": 1.752428220160019e-05, "loss": 0.147, "step": 35713 }, { "epoch": 0.636999250882888, "grad_norm": 0.4044966399669647, "learning_rate": 1.7522796927886684e-05, "loss": 0.1251, "step": 35714 }, { "epoch": 0.6370170870046017, "grad_norm": 0.24216333031654358, "learning_rate": 1.75213116831574e-05, "loss": 0.1262, "step": 35715 }, { "epoch": 0.6370349231263154, "grad_norm": 0.24650423228740692, "learning_rate": 1.7519826467418092e-05, "loss": 0.1202, "step": 35716 }, { "epoch": 0.6370527592480291, "grad_norm": 0.40519607067108154, "learning_rate": 1.751834128067452e-05, "loss": 0.073, "step": 35717 }, { "epoch": 0.6370705953697428, "grad_norm": 0.43996450304985046, "learning_rate": 1.7516856122932434e-05, "loss": 0.1438, "step": 35718 }, { "epoch": 0.6370884314914566, "grad_norm": 0.2021629363298416, "learning_rate": 1.7515370994197603e-05, "loss": 0.1516, "step": 35719 }, { "epoch": 0.6371062676131702, "grad_norm": 0.20187199115753174, "learning_rate": 1.7513885894475785e-05, "loss": 0.0818, "step": 35720 }, { "epoch": 0.6371241037348839, "grad_norm": 0.3389519453048706, "learning_rate": 1.751240082377273e-05, "loss": 0.1703, "step": 35721 }, { "epoch": 0.6371419398565976, "grad_norm": 0.2622396945953369, "learning_rate": 1.751091578209418e-05, "loss": 0.1317, "step": 35722 }, { "epoch": 0.6371597759783113, "grad_norm": 0.30014288425445557, "learning_rate": 1.7509430769445923e-05, "loss": 0.1546, "step": 35723 }, { "epoch": 0.637177612100025, "grad_norm": 0.22925706207752228, "learning_rate": 1.7507945785833697e-05, "loss": 0.1139, "step": 35724 }, { "epoch": 0.6371954482217387, "grad_norm": 0.2455938458442688, "learning_rate": 1.750646083126326e-05, "loss": 0.1087, "step": 35725 }, { "epoch": 0.6372132843434524, "grad_norm": 0.25852280855178833, "learning_rate": 1.7504975905740365e-05, "loss": 0.1463, "step": 35726 }, { "epoch": 0.6372311204651661, "grad_norm": 0.24533499777317047, "learning_rate": 1.7503491009270774e-05, "loss": 0.1449, "step": 35727 }, { "epoch": 0.6372489565868797, "grad_norm": 0.2822505831718445, "learning_rate": 1.7502006141860244e-05, "loss": 0.1117, "step": 35728 }, { "epoch": 0.6372667927085934, "grad_norm": 0.25432395935058594, "learning_rate": 1.7500521303514528e-05, "loss": 0.1003, "step": 35729 }, { "epoch": 0.6372846288303071, "grad_norm": 0.23213037848472595, "learning_rate": 1.7499036494239386e-05, "loss": 0.1525, "step": 35730 }, { "epoch": 0.6373024649520208, "grad_norm": 0.2061501294374466, "learning_rate": 1.7497551714040554e-05, "loss": 0.1212, "step": 35731 }, { "epoch": 0.6373203010737345, "grad_norm": 0.1983170211315155, "learning_rate": 1.7496066962923816e-05, "loss": 0.0937, "step": 35732 }, { "epoch": 0.6373381371954482, "grad_norm": 0.26010000705718994, "learning_rate": 1.7494582240894907e-05, "loss": 0.1626, "step": 35733 }, { "epoch": 0.6373559733171619, "grad_norm": 0.24518905580043793, "learning_rate": 1.74930975479596e-05, "loss": 0.1243, "step": 35734 }, { "epoch": 0.6373738094388756, "grad_norm": 0.24404463171958923, "learning_rate": 1.7491612884123622e-05, "loss": 0.1527, "step": 35735 }, { "epoch": 0.6373916455605894, "grad_norm": 0.22220630943775177, "learning_rate": 1.7490128249392758e-05, "loss": 0.1117, "step": 35736 }, { "epoch": 0.637409481682303, "grad_norm": 0.2968965470790863, "learning_rate": 1.748864364377275e-05, "loss": 0.1062, "step": 35737 }, { "epoch": 0.6374273178040167, "grad_norm": 0.2716768980026245, "learning_rate": 1.7487159067269356e-05, "loss": 0.1884, "step": 35738 }, { "epoch": 0.6374451539257304, "grad_norm": 0.29261428117752075, "learning_rate": 1.7485674519888318e-05, "loss": 0.1199, "step": 35739 }, { "epoch": 0.6374629900474441, "grad_norm": 0.30178436636924744, "learning_rate": 1.7484190001635408e-05, "loss": 0.1596, "step": 35740 }, { "epoch": 0.6374808261691578, "grad_norm": 0.22892168164253235, "learning_rate": 1.748270551251637e-05, "loss": 0.1141, "step": 35741 }, { "epoch": 0.6374986622908715, "grad_norm": 0.23763489723205566, "learning_rate": 1.7481221052536973e-05, "loss": 0.1752, "step": 35742 }, { "epoch": 0.6375164984125852, "grad_norm": 0.2826126515865326, "learning_rate": 1.7479736621702955e-05, "loss": 0.1254, "step": 35743 }, { "epoch": 0.6375343345342989, "grad_norm": 0.2782338559627533, "learning_rate": 1.7478252220020064e-05, "loss": 0.1395, "step": 35744 }, { "epoch": 0.6375521706560126, "grad_norm": 0.2369375079870224, "learning_rate": 1.7476767847494077e-05, "loss": 0.1246, "step": 35745 }, { "epoch": 0.6375700067777262, "grad_norm": 0.20960021018981934, "learning_rate": 1.747528350413073e-05, "loss": 0.0993, "step": 35746 }, { "epoch": 0.6375878428994399, "grad_norm": 0.2015584111213684, "learning_rate": 1.747379918993579e-05, "loss": 0.0951, "step": 35747 }, { "epoch": 0.6376056790211536, "grad_norm": 0.20795419812202454, "learning_rate": 1.7472314904914995e-05, "loss": 0.1034, "step": 35748 }, { "epoch": 0.6376235151428673, "grad_norm": 0.2713645100593567, "learning_rate": 1.7470830649074115e-05, "loss": 0.107, "step": 35749 }, { "epoch": 0.637641351264581, "grad_norm": 0.2632594704627991, "learning_rate": 1.7469346422418893e-05, "loss": 0.1032, "step": 35750 }, { "epoch": 0.6376591873862947, "grad_norm": 0.2530999481678009, "learning_rate": 1.746786222495509e-05, "loss": 0.1229, "step": 35751 }, { "epoch": 0.6376770235080084, "grad_norm": 0.20867447555065155, "learning_rate": 1.746637805668844e-05, "loss": 0.1303, "step": 35752 }, { "epoch": 0.6376948596297222, "grad_norm": 0.23367738723754883, "learning_rate": 1.7464893917624724e-05, "loss": 0.1385, "step": 35753 }, { "epoch": 0.6377126957514359, "grad_norm": 0.2347860336303711, "learning_rate": 1.7463409807769678e-05, "loss": 0.1495, "step": 35754 }, { "epoch": 0.6377305318731495, "grad_norm": 0.2776419222354889, "learning_rate": 1.7461925727129065e-05, "loss": 0.1457, "step": 35755 }, { "epoch": 0.6377483679948632, "grad_norm": 0.23866885900497437, "learning_rate": 1.7460441675708626e-05, "loss": 0.113, "step": 35756 }, { "epoch": 0.6377662041165769, "grad_norm": 0.28532931208610535, "learning_rate": 1.745895765351411e-05, "loss": 0.1442, "step": 35757 }, { "epoch": 0.6377840402382906, "grad_norm": 0.2766592800617218, "learning_rate": 1.7457473660551295e-05, "loss": 0.1352, "step": 35758 }, { "epoch": 0.6378018763600043, "grad_norm": 0.299600750207901, "learning_rate": 1.7455989696825908e-05, "loss": 0.1271, "step": 35759 }, { "epoch": 0.637819712481718, "grad_norm": 0.27411141991615295, "learning_rate": 1.745450576234371e-05, "loss": 0.1173, "step": 35760 }, { "epoch": 0.6378375486034317, "grad_norm": 0.2585819959640503, "learning_rate": 1.7453021857110455e-05, "loss": 0.082, "step": 35761 }, { "epoch": 0.6378553847251454, "grad_norm": 0.219051331281662, "learning_rate": 1.7451537981131895e-05, "loss": 0.1238, "step": 35762 }, { "epoch": 0.637873220846859, "grad_norm": 0.30432379245758057, "learning_rate": 1.745005413441378e-05, "loss": 0.1202, "step": 35763 }, { "epoch": 0.6378910569685727, "grad_norm": 0.22092677652835846, "learning_rate": 1.744857031696187e-05, "loss": 0.144, "step": 35764 }, { "epoch": 0.6379088930902864, "grad_norm": 0.3004605770111084, "learning_rate": 1.7447086528781892e-05, "loss": 0.1, "step": 35765 }, { "epoch": 0.6379267292120001, "grad_norm": 0.26434415578842163, "learning_rate": 1.7445602769879633e-05, "loss": 0.1327, "step": 35766 }, { "epoch": 0.6379445653337138, "grad_norm": 0.21752548217773438, "learning_rate": 1.7444119040260814e-05, "loss": 0.0713, "step": 35767 }, { "epoch": 0.6379624014554275, "grad_norm": 0.24605584144592285, "learning_rate": 1.7442635339931212e-05, "loss": 0.1117, "step": 35768 }, { "epoch": 0.6379802375771412, "grad_norm": 0.2782864570617676, "learning_rate": 1.744115166889656e-05, "loss": 0.1311, "step": 35769 }, { "epoch": 0.637998073698855, "grad_norm": 0.4238375425338745, "learning_rate": 1.743966802716261e-05, "loss": 0.1304, "step": 35770 }, { "epoch": 0.6380159098205687, "grad_norm": 0.26671743392944336, "learning_rate": 1.7438184414735122e-05, "loss": 0.1276, "step": 35771 }, { "epoch": 0.6380337459422823, "grad_norm": 0.21730798482894897, "learning_rate": 1.7436700831619846e-05, "loss": 0.1356, "step": 35772 }, { "epoch": 0.638051582063996, "grad_norm": 0.23240308463573456, "learning_rate": 1.7435217277822523e-05, "loss": 0.1407, "step": 35773 }, { "epoch": 0.6380694181857097, "grad_norm": 0.2365575134754181, "learning_rate": 1.7433733753348917e-05, "loss": 0.161, "step": 35774 }, { "epoch": 0.6380872543074234, "grad_norm": 0.25101301074028015, "learning_rate": 1.7432250258204766e-05, "loss": 0.1315, "step": 35775 }, { "epoch": 0.6381050904291371, "grad_norm": 0.29103437066078186, "learning_rate": 1.7430766792395835e-05, "loss": 0.1221, "step": 35776 }, { "epoch": 0.6381229265508508, "grad_norm": 0.2293984591960907, "learning_rate": 1.7429283355927865e-05, "loss": 0.1146, "step": 35777 }, { "epoch": 0.6381407626725645, "grad_norm": 0.2701701819896698, "learning_rate": 1.7427799948806595e-05, "loss": 0.0909, "step": 35778 }, { "epoch": 0.6381585987942782, "grad_norm": 0.3107643723487854, "learning_rate": 1.7426316571037802e-05, "loss": 0.1791, "step": 35779 }, { "epoch": 0.6381764349159919, "grad_norm": 0.39670756459236145, "learning_rate": 1.7424833222627218e-05, "loss": 0.1587, "step": 35780 }, { "epoch": 0.6381942710377055, "grad_norm": 0.21293073892593384, "learning_rate": 1.7423349903580596e-05, "loss": 0.1267, "step": 35781 }, { "epoch": 0.6382121071594192, "grad_norm": 0.2524206042289734, "learning_rate": 1.742186661390369e-05, "loss": 0.1107, "step": 35782 }, { "epoch": 0.6382299432811329, "grad_norm": 0.21061506867408752, "learning_rate": 1.7420383353602236e-05, "loss": 0.1124, "step": 35783 }, { "epoch": 0.6382477794028466, "grad_norm": 0.2210262268781662, "learning_rate": 1.7418900122682003e-05, "loss": 0.1003, "step": 35784 }, { "epoch": 0.6382656155245603, "grad_norm": 0.2678980827331543, "learning_rate": 1.7417416921148734e-05, "loss": 0.1138, "step": 35785 }, { "epoch": 0.638283451646274, "grad_norm": 0.2675687074661255, "learning_rate": 1.7415933749008173e-05, "loss": 0.1272, "step": 35786 }, { "epoch": 0.6383012877679878, "grad_norm": 0.28626006841659546, "learning_rate": 1.7414450606266065e-05, "loss": 0.1133, "step": 35787 }, { "epoch": 0.6383191238897015, "grad_norm": 0.3768504858016968, "learning_rate": 1.741296749292817e-05, "loss": 0.2613, "step": 35788 }, { "epoch": 0.6383369600114152, "grad_norm": 0.2312556803226471, "learning_rate": 1.741148440900024e-05, "loss": 0.1033, "step": 35789 }, { "epoch": 0.6383547961331288, "grad_norm": 0.27568963170051575, "learning_rate": 1.7410001354488015e-05, "loss": 0.1631, "step": 35790 }, { "epoch": 0.6383726322548425, "grad_norm": 0.34590527415275574, "learning_rate": 1.7408518329397236e-05, "loss": 0.1898, "step": 35791 }, { "epoch": 0.6383904683765562, "grad_norm": 0.3124268651008606, "learning_rate": 1.740703533373367e-05, "loss": 0.1312, "step": 35792 }, { "epoch": 0.6384083044982699, "grad_norm": 0.2380906641483307, "learning_rate": 1.7405552367503063e-05, "loss": 0.1432, "step": 35793 }, { "epoch": 0.6384261406199836, "grad_norm": 0.3375542461872101, "learning_rate": 1.7404069430711155e-05, "loss": 0.1361, "step": 35794 }, { "epoch": 0.6384439767416973, "grad_norm": 0.26807522773742676, "learning_rate": 1.7402586523363694e-05, "loss": 0.1456, "step": 35795 }, { "epoch": 0.638461812863411, "grad_norm": 0.2570466697216034, "learning_rate": 1.7401103645466428e-05, "loss": 0.1404, "step": 35796 }, { "epoch": 0.6384796489851247, "grad_norm": 0.2428160011768341, "learning_rate": 1.7399620797025115e-05, "loss": 0.1375, "step": 35797 }, { "epoch": 0.6384974851068383, "grad_norm": 0.36094430088996887, "learning_rate": 1.7398137978045498e-05, "loss": 0.1131, "step": 35798 }, { "epoch": 0.638515321228552, "grad_norm": 0.18215768039226532, "learning_rate": 1.7396655188533324e-05, "loss": 0.1321, "step": 35799 }, { "epoch": 0.6385331573502657, "grad_norm": 0.32127895951271057, "learning_rate": 1.7395172428494333e-05, "loss": 0.1182, "step": 35800 }, { "epoch": 0.6385509934719794, "grad_norm": 0.2995901107788086, "learning_rate": 1.739368969793429e-05, "loss": 0.087, "step": 35801 }, { "epoch": 0.6385688295936931, "grad_norm": 0.16200973093509674, "learning_rate": 1.7392206996858922e-05, "loss": 0.106, "step": 35802 }, { "epoch": 0.6385866657154068, "grad_norm": 0.21286509931087494, "learning_rate": 1.7390724325274e-05, "loss": 0.0891, "step": 35803 }, { "epoch": 0.6386045018371206, "grad_norm": 0.18890058994293213, "learning_rate": 1.7389241683185244e-05, "loss": 0.0962, "step": 35804 }, { "epoch": 0.6386223379588343, "grad_norm": 0.2941281199455261, "learning_rate": 1.738775907059843e-05, "loss": 0.136, "step": 35805 }, { "epoch": 0.638640174080548, "grad_norm": 0.23328498005867004, "learning_rate": 1.7386276487519287e-05, "loss": 0.1051, "step": 35806 }, { "epoch": 0.6386580102022617, "grad_norm": 0.2769111096858978, "learning_rate": 1.7384793933953565e-05, "loss": 0.1724, "step": 35807 }, { "epoch": 0.6386758463239753, "grad_norm": 0.22209565341472626, "learning_rate": 1.738331140990701e-05, "loss": 0.1558, "step": 35808 }, { "epoch": 0.638693682445689, "grad_norm": 0.2373506873846054, "learning_rate": 1.738182891538537e-05, "loss": 0.1084, "step": 35809 }, { "epoch": 0.6387115185674027, "grad_norm": 0.2676689624786377, "learning_rate": 1.738034645039439e-05, "loss": 0.1241, "step": 35810 }, { "epoch": 0.6387293546891164, "grad_norm": 0.30411019921302795, "learning_rate": 1.7378864014939827e-05, "loss": 0.1231, "step": 35811 }, { "epoch": 0.6387471908108301, "grad_norm": 0.293819785118103, "learning_rate": 1.737738160902742e-05, "loss": 0.1235, "step": 35812 }, { "epoch": 0.6387650269325438, "grad_norm": 0.2943223714828491, "learning_rate": 1.73758992326629e-05, "loss": 0.1163, "step": 35813 }, { "epoch": 0.6387828630542575, "grad_norm": 0.22722260653972626, "learning_rate": 1.7374416885852045e-05, "loss": 0.081, "step": 35814 }, { "epoch": 0.6388006991759712, "grad_norm": 0.30290743708610535, "learning_rate": 1.737293456860058e-05, "loss": 0.1423, "step": 35815 }, { "epoch": 0.6388185352976848, "grad_norm": 0.28768107295036316, "learning_rate": 1.737145228091425e-05, "loss": 0.1017, "step": 35816 }, { "epoch": 0.6388363714193985, "grad_norm": 0.28279608488082886, "learning_rate": 1.73699700227988e-05, "loss": 0.058, "step": 35817 }, { "epoch": 0.6388542075411122, "grad_norm": 0.2586454451084137, "learning_rate": 1.7368487794259992e-05, "loss": 0.1027, "step": 35818 }, { "epoch": 0.6388720436628259, "grad_norm": 0.3797576129436493, "learning_rate": 1.736700559530356e-05, "loss": 0.1213, "step": 35819 }, { "epoch": 0.6388898797845397, "grad_norm": 0.27608340978622437, "learning_rate": 1.7365523425935255e-05, "loss": 0.1443, "step": 35820 }, { "epoch": 0.6389077159062534, "grad_norm": 0.33411461114883423, "learning_rate": 1.7364041286160815e-05, "loss": 0.073, "step": 35821 }, { "epoch": 0.6389255520279671, "grad_norm": 0.22558556497097015, "learning_rate": 1.7362559175985975e-05, "loss": 0.1257, "step": 35822 }, { "epoch": 0.6389433881496808, "grad_norm": 0.2996184527873993, "learning_rate": 1.7361077095416502e-05, "loss": 0.1621, "step": 35823 }, { "epoch": 0.6389612242713945, "grad_norm": 0.2431306093931198, "learning_rate": 1.735959504445814e-05, "loss": 0.1192, "step": 35824 }, { "epoch": 0.6389790603931081, "grad_norm": 0.34502482414245605, "learning_rate": 1.7358113023116625e-05, "loss": 0.134, "step": 35825 }, { "epoch": 0.6389968965148218, "grad_norm": 0.2907029390335083, "learning_rate": 1.7356631031397693e-05, "loss": 0.1002, "step": 35826 }, { "epoch": 0.6390147326365355, "grad_norm": 0.19775918126106262, "learning_rate": 1.7355149069307102e-05, "loss": 0.1065, "step": 35827 }, { "epoch": 0.6390325687582492, "grad_norm": 0.3696146011352539, "learning_rate": 1.7353667136850603e-05, "loss": 0.1859, "step": 35828 }, { "epoch": 0.6390504048799629, "grad_norm": 0.308233380317688, "learning_rate": 1.7352185234033925e-05, "loss": 0.1451, "step": 35829 }, { "epoch": 0.6390682410016766, "grad_norm": 0.2556229829788208, "learning_rate": 1.7350703360862813e-05, "loss": 0.093, "step": 35830 }, { "epoch": 0.6390860771233903, "grad_norm": 0.27065831422805786, "learning_rate": 1.7349221517343022e-05, "loss": 0.1386, "step": 35831 }, { "epoch": 0.639103913245104, "grad_norm": 0.1999167948961258, "learning_rate": 1.7347739703480294e-05, "loss": 0.0911, "step": 35832 }, { "epoch": 0.6391217493668176, "grad_norm": 0.2251693159341812, "learning_rate": 1.734625791928037e-05, "loss": 0.1396, "step": 35833 }, { "epoch": 0.6391395854885313, "grad_norm": 0.279585063457489, "learning_rate": 1.7344776164748994e-05, "loss": 0.1306, "step": 35834 }, { "epoch": 0.639157421610245, "grad_norm": 0.22249330580234528, "learning_rate": 1.7343294439891898e-05, "loss": 0.1124, "step": 35835 }, { "epoch": 0.6391752577319587, "grad_norm": 0.2314291000366211, "learning_rate": 1.7341812744714854e-05, "loss": 0.0846, "step": 35836 }, { "epoch": 0.6391930938536725, "grad_norm": 0.29955893754959106, "learning_rate": 1.7340331079223576e-05, "loss": 0.159, "step": 35837 }, { "epoch": 0.6392109299753862, "grad_norm": 0.24937936663627625, "learning_rate": 1.7338849443423826e-05, "loss": 0.1335, "step": 35838 }, { "epoch": 0.6392287660970999, "grad_norm": 0.2849934697151184, "learning_rate": 1.7337367837321336e-05, "loss": 0.1838, "step": 35839 }, { "epoch": 0.6392466022188136, "grad_norm": 0.25680679082870483, "learning_rate": 1.733588626092186e-05, "loss": 0.1062, "step": 35840 }, { "epoch": 0.6392644383405273, "grad_norm": 0.26158425211906433, "learning_rate": 1.733440471423114e-05, "loss": 0.098, "step": 35841 }, { "epoch": 0.639282274462241, "grad_norm": 0.21850423514842987, "learning_rate": 1.7332923197254913e-05, "loss": 0.0955, "step": 35842 }, { "epoch": 0.6393001105839546, "grad_norm": 0.2790837585926056, "learning_rate": 1.7331441709998914e-05, "loss": 0.1351, "step": 35843 }, { "epoch": 0.6393179467056683, "grad_norm": 0.24826689064502716, "learning_rate": 1.7329960252468903e-05, "loss": 0.1728, "step": 35844 }, { "epoch": 0.639335782827382, "grad_norm": 0.223775714635849, "learning_rate": 1.732847882467062e-05, "loss": 0.1092, "step": 35845 }, { "epoch": 0.6393536189490957, "grad_norm": 0.21161296963691711, "learning_rate": 1.7326997426609798e-05, "loss": 0.0874, "step": 35846 }, { "epoch": 0.6393714550708094, "grad_norm": 0.30553925037384033, "learning_rate": 1.7325516058292187e-05, "loss": 0.1292, "step": 35847 }, { "epoch": 0.6393892911925231, "grad_norm": 0.28877514600753784, "learning_rate": 1.7324034719723518e-05, "loss": 0.1197, "step": 35848 }, { "epoch": 0.6394071273142368, "grad_norm": 0.2557810842990875, "learning_rate": 1.7322553410909548e-05, "loss": 0.1078, "step": 35849 }, { "epoch": 0.6394249634359505, "grad_norm": 0.2571440637111664, "learning_rate": 1.732107213185601e-05, "loss": 0.1314, "step": 35850 }, { "epoch": 0.6394427995576641, "grad_norm": 0.2803826928138733, "learning_rate": 1.7319590882568655e-05, "loss": 0.1511, "step": 35851 }, { "epoch": 0.6394606356793778, "grad_norm": 0.35693272948265076, "learning_rate": 1.7318109663053207e-05, "loss": 0.0879, "step": 35852 }, { "epoch": 0.6394784718010915, "grad_norm": 0.2565140426158905, "learning_rate": 1.7316628473315427e-05, "loss": 0.1003, "step": 35853 }, { "epoch": 0.6394963079228053, "grad_norm": 0.3079506754875183, "learning_rate": 1.7315147313361053e-05, "loss": 0.1525, "step": 35854 }, { "epoch": 0.639514144044519, "grad_norm": 0.21569791436195374, "learning_rate": 1.7313666183195822e-05, "loss": 0.1102, "step": 35855 }, { "epoch": 0.6395319801662327, "grad_norm": 0.2801554203033447, "learning_rate": 1.731218508282546e-05, "loss": 0.1321, "step": 35856 }, { "epoch": 0.6395498162879464, "grad_norm": 0.20545944571495056, "learning_rate": 1.7310704012255738e-05, "loss": 0.1243, "step": 35857 }, { "epoch": 0.6395676524096601, "grad_norm": 0.23091839253902435, "learning_rate": 1.7309222971492378e-05, "loss": 0.138, "step": 35858 }, { "epoch": 0.6395854885313738, "grad_norm": 0.22200728952884674, "learning_rate": 1.7307741960541128e-05, "loss": 0.125, "step": 35859 }, { "epoch": 0.6396033246530874, "grad_norm": 0.40865370631217957, "learning_rate": 1.7306260979407732e-05, "loss": 0.1897, "step": 35860 }, { "epoch": 0.6396211607748011, "grad_norm": 0.22435827553272247, "learning_rate": 1.7304780028097912e-05, "loss": 0.1281, "step": 35861 }, { "epoch": 0.6396389968965148, "grad_norm": 0.2696791887283325, "learning_rate": 1.7303299106617434e-05, "loss": 0.1322, "step": 35862 }, { "epoch": 0.6396568330182285, "grad_norm": 0.2418757528066635, "learning_rate": 1.7301818214972026e-05, "loss": 0.0813, "step": 35863 }, { "epoch": 0.6396746691399422, "grad_norm": 0.21032550930976868, "learning_rate": 1.730033735316743e-05, "loss": 0.1523, "step": 35864 }, { "epoch": 0.6396925052616559, "grad_norm": 0.24804969131946564, "learning_rate": 1.729885652120938e-05, "loss": 0.18, "step": 35865 }, { "epoch": 0.6397103413833696, "grad_norm": 0.2403768002986908, "learning_rate": 1.729737571910362e-05, "loss": 0.1308, "step": 35866 }, { "epoch": 0.6397281775050833, "grad_norm": 0.2027972787618637, "learning_rate": 1.72958949468559e-05, "loss": 0.1368, "step": 35867 }, { "epoch": 0.639746013626797, "grad_norm": 0.28435927629470825, "learning_rate": 1.7294414204471954e-05, "loss": 0.1185, "step": 35868 }, { "epoch": 0.6397638497485106, "grad_norm": 0.29404228925704956, "learning_rate": 1.7292933491957507e-05, "loss": 0.1448, "step": 35869 }, { "epoch": 0.6397816858702243, "grad_norm": 0.28542855381965637, "learning_rate": 1.7291452809318325e-05, "loss": 0.1225, "step": 35870 }, { "epoch": 0.6397995219919381, "grad_norm": 0.22775453329086304, "learning_rate": 1.7289972156560127e-05, "loss": 0.1263, "step": 35871 }, { "epoch": 0.6398173581136518, "grad_norm": 0.2884061336517334, "learning_rate": 1.7288491533688668e-05, "loss": 0.1744, "step": 35872 }, { "epoch": 0.6398351942353655, "grad_norm": 0.8027663230895996, "learning_rate": 1.7287010940709675e-05, "loss": 0.1256, "step": 35873 }, { "epoch": 0.6398530303570792, "grad_norm": 0.4754054844379425, "learning_rate": 1.7285530377628885e-05, "loss": 0.1185, "step": 35874 }, { "epoch": 0.6398708664787929, "grad_norm": 0.18994231522083282, "learning_rate": 1.7284049844452048e-05, "loss": 0.1278, "step": 35875 }, { "epoch": 0.6398887026005066, "grad_norm": 0.3504869043827057, "learning_rate": 1.7282569341184903e-05, "loss": 0.1462, "step": 35876 }, { "epoch": 0.6399065387222203, "grad_norm": 0.30787503719329834, "learning_rate": 1.728108886783318e-05, "loss": 0.1897, "step": 35877 }, { "epoch": 0.6399243748439339, "grad_norm": 0.24686484038829803, "learning_rate": 1.727960842440262e-05, "loss": 0.1098, "step": 35878 }, { "epoch": 0.6399422109656476, "grad_norm": 0.2583046853542328, "learning_rate": 1.727812801089897e-05, "loss": 0.145, "step": 35879 }, { "epoch": 0.6399600470873613, "grad_norm": 0.2936982810497284, "learning_rate": 1.7276647627327964e-05, "loss": 0.1537, "step": 35880 }, { "epoch": 0.639977883209075, "grad_norm": 0.23829172551631927, "learning_rate": 1.727516727369534e-05, "loss": 0.0931, "step": 35881 }, { "epoch": 0.6399957193307887, "grad_norm": 0.30978360772132874, "learning_rate": 1.7273686950006823e-05, "loss": 0.12, "step": 35882 }, { "epoch": 0.6400135554525024, "grad_norm": 0.2778049111366272, "learning_rate": 1.7272206656268175e-05, "loss": 0.1404, "step": 35883 }, { "epoch": 0.6400313915742161, "grad_norm": 0.2594253420829773, "learning_rate": 1.7270726392485125e-05, "loss": 0.1094, "step": 35884 }, { "epoch": 0.6400492276959298, "grad_norm": 0.3210033178329468, "learning_rate": 1.7269246158663406e-05, "loss": 0.1031, "step": 35885 }, { "epoch": 0.6400670638176434, "grad_norm": 0.31004631519317627, "learning_rate": 1.726776595480876e-05, "loss": 0.1422, "step": 35886 }, { "epoch": 0.6400848999393571, "grad_norm": 0.2223798930644989, "learning_rate": 1.7266285780926915e-05, "loss": 0.1158, "step": 35887 }, { "epoch": 0.6401027360610709, "grad_norm": 0.34921783208847046, "learning_rate": 1.7264805637023628e-05, "loss": 0.1322, "step": 35888 }, { "epoch": 0.6401205721827846, "grad_norm": 0.25858503580093384, "learning_rate": 1.7263325523104627e-05, "loss": 0.1418, "step": 35889 }, { "epoch": 0.6401384083044983, "grad_norm": 0.2730313241481781, "learning_rate": 1.7261845439175644e-05, "loss": 0.1002, "step": 35890 }, { "epoch": 0.640156244426212, "grad_norm": 0.44230887293815613, "learning_rate": 1.7260365385242415e-05, "loss": 0.1291, "step": 35891 }, { "epoch": 0.6401740805479257, "grad_norm": 0.2491108626127243, "learning_rate": 1.7258885361310697e-05, "loss": 0.1867, "step": 35892 }, { "epoch": 0.6401919166696394, "grad_norm": 0.25240418314933777, "learning_rate": 1.7257405367386198e-05, "loss": 0.1376, "step": 35893 }, { "epoch": 0.6402097527913531, "grad_norm": 0.2325664460659027, "learning_rate": 1.725592540347468e-05, "loss": 0.1374, "step": 35894 }, { "epoch": 0.6402275889130667, "grad_norm": 0.3108990490436554, "learning_rate": 1.725444546958186e-05, "loss": 0.0994, "step": 35895 }, { "epoch": 0.6402454250347804, "grad_norm": 0.23212184011936188, "learning_rate": 1.7252965565713495e-05, "loss": 0.1406, "step": 35896 }, { "epoch": 0.6402632611564941, "grad_norm": 0.26845085620880127, "learning_rate": 1.725148569187531e-05, "loss": 0.1388, "step": 35897 }, { "epoch": 0.6402810972782078, "grad_norm": 0.1796015053987503, "learning_rate": 1.7250005848073042e-05, "loss": 0.0844, "step": 35898 }, { "epoch": 0.6402989333999215, "grad_norm": 0.3172871470451355, "learning_rate": 1.7248526034312428e-05, "loss": 0.133, "step": 35899 }, { "epoch": 0.6403167695216352, "grad_norm": 0.2341238111257553, "learning_rate": 1.7247046250599195e-05, "loss": 0.1373, "step": 35900 }, { "epoch": 0.6403346056433489, "grad_norm": 0.225291907787323, "learning_rate": 1.72455664969391e-05, "loss": 0.1368, "step": 35901 }, { "epoch": 0.6403524417650626, "grad_norm": 0.3168637752532959, "learning_rate": 1.7244086773337864e-05, "loss": 0.1828, "step": 35902 }, { "epoch": 0.6403702778867763, "grad_norm": 0.31857556104660034, "learning_rate": 1.7242607079801233e-05, "loss": 0.1322, "step": 35903 }, { "epoch": 0.6403881140084899, "grad_norm": 0.30921950936317444, "learning_rate": 1.724112741633492e-05, "loss": 0.1479, "step": 35904 }, { "epoch": 0.6404059501302037, "grad_norm": 0.30880025029182434, "learning_rate": 1.723964778294469e-05, "loss": 0.1164, "step": 35905 }, { "epoch": 0.6404237862519174, "grad_norm": 0.2310551404953003, "learning_rate": 1.723816817963626e-05, "loss": 0.1296, "step": 35906 }, { "epoch": 0.6404416223736311, "grad_norm": 0.3688663840293884, "learning_rate": 1.7236688606415374e-05, "loss": 0.1532, "step": 35907 }, { "epoch": 0.6404594584953448, "grad_norm": 0.3194214701652527, "learning_rate": 1.723520906328776e-05, "loss": 0.1362, "step": 35908 }, { "epoch": 0.6404772946170585, "grad_norm": 0.28247639536857605, "learning_rate": 1.7233729550259162e-05, "loss": 0.1258, "step": 35909 }, { "epoch": 0.6404951307387722, "grad_norm": 0.32303890585899353, "learning_rate": 1.7232250067335312e-05, "loss": 0.1689, "step": 35910 }, { "epoch": 0.6405129668604859, "grad_norm": 0.24534259736537933, "learning_rate": 1.7230770614521945e-05, "loss": 0.099, "step": 35911 }, { "epoch": 0.6405308029821996, "grad_norm": 0.21690906584262848, "learning_rate": 1.7229291191824787e-05, "loss": 0.0687, "step": 35912 }, { "epoch": 0.6405486391039132, "grad_norm": 0.25254303216934204, "learning_rate": 1.7227811799249584e-05, "loss": 0.1292, "step": 35913 }, { "epoch": 0.6405664752256269, "grad_norm": 0.3543197512626648, "learning_rate": 1.7226332436802062e-05, "loss": 0.1527, "step": 35914 }, { "epoch": 0.6405843113473406, "grad_norm": 0.2187603861093521, "learning_rate": 1.722485310448797e-05, "loss": 0.1019, "step": 35915 }, { "epoch": 0.6406021474690543, "grad_norm": 0.20415639877319336, "learning_rate": 1.722337380231303e-05, "loss": 0.1366, "step": 35916 }, { "epoch": 0.640619983590768, "grad_norm": 0.24557319283485413, "learning_rate": 1.7221894530282973e-05, "loss": 0.1378, "step": 35917 }, { "epoch": 0.6406378197124817, "grad_norm": 0.30781519412994385, "learning_rate": 1.7220415288403544e-05, "loss": 0.1122, "step": 35918 }, { "epoch": 0.6406556558341954, "grad_norm": 0.19187171757221222, "learning_rate": 1.7218936076680474e-05, "loss": 0.0965, "step": 35919 }, { "epoch": 0.6406734919559091, "grad_norm": 0.297652006149292, "learning_rate": 1.7217456895119494e-05, "loss": 0.152, "step": 35920 }, { "epoch": 0.6406913280776229, "grad_norm": 0.26912230253219604, "learning_rate": 1.7215977743726332e-05, "loss": 0.1321, "step": 35921 }, { "epoch": 0.6407091641993365, "grad_norm": 0.23096053302288055, "learning_rate": 1.7214498622506736e-05, "loss": 0.1214, "step": 35922 }, { "epoch": 0.6407270003210502, "grad_norm": 0.27023589611053467, "learning_rate": 1.7213019531466433e-05, "loss": 0.1138, "step": 35923 }, { "epoch": 0.6407448364427639, "grad_norm": 0.2834970951080322, "learning_rate": 1.721154047061116e-05, "loss": 0.1096, "step": 35924 }, { "epoch": 0.6407626725644776, "grad_norm": 0.20260052382946014, "learning_rate": 1.721006143994664e-05, "loss": 0.1463, "step": 35925 }, { "epoch": 0.6407805086861913, "grad_norm": 0.3352355360984802, "learning_rate": 1.7208582439478603e-05, "loss": 0.0754, "step": 35926 }, { "epoch": 0.640798344807905, "grad_norm": 0.32622987031936646, "learning_rate": 1.7207103469212805e-05, "loss": 0.0635, "step": 35927 }, { "epoch": 0.6408161809296187, "grad_norm": 0.30420923233032227, "learning_rate": 1.7205624529154963e-05, "loss": 0.0951, "step": 35928 }, { "epoch": 0.6408340170513324, "grad_norm": 0.2683747112751007, "learning_rate": 1.7204145619310813e-05, "loss": 0.1618, "step": 35929 }, { "epoch": 0.640851853173046, "grad_norm": 0.2540469467639923, "learning_rate": 1.720266673968608e-05, "loss": 0.1564, "step": 35930 }, { "epoch": 0.6408696892947597, "grad_norm": 0.2046431452035904, "learning_rate": 1.720118789028651e-05, "loss": 0.1448, "step": 35931 }, { "epoch": 0.6408875254164734, "grad_norm": 0.32680729031562805, "learning_rate": 1.719970907111783e-05, "loss": 0.1689, "step": 35932 }, { "epoch": 0.6409053615381871, "grad_norm": 0.21240676939487457, "learning_rate": 1.719823028218577e-05, "loss": 0.1214, "step": 35933 }, { "epoch": 0.6409231976599008, "grad_norm": 0.2598065435886383, "learning_rate": 1.7196751523496062e-05, "loss": 0.1097, "step": 35934 }, { "epoch": 0.6409410337816145, "grad_norm": 0.23117214441299438, "learning_rate": 1.719527279505444e-05, "loss": 0.1105, "step": 35935 }, { "epoch": 0.6409588699033282, "grad_norm": 0.24915702641010284, "learning_rate": 1.7193794096866645e-05, "loss": 0.1353, "step": 35936 }, { "epoch": 0.6409767060250419, "grad_norm": 0.19787028431892395, "learning_rate": 1.7192315428938395e-05, "loss": 0.1213, "step": 35937 }, { "epoch": 0.6409945421467557, "grad_norm": 0.26631495356559753, "learning_rate": 1.719083679127543e-05, "loss": 0.1095, "step": 35938 }, { "epoch": 0.6410123782684694, "grad_norm": 0.2686077952384949, "learning_rate": 1.718935818388347e-05, "loss": 0.1318, "step": 35939 }, { "epoch": 0.641030214390183, "grad_norm": 0.2649911642074585, "learning_rate": 1.718787960676826e-05, "loss": 0.1292, "step": 35940 }, { "epoch": 0.6410480505118967, "grad_norm": 0.2614991366863251, "learning_rate": 1.7186401059935524e-05, "loss": 0.2089, "step": 35941 }, { "epoch": 0.6410658866336104, "grad_norm": 0.2648104429244995, "learning_rate": 1.7184922543391005e-05, "loss": 0.1365, "step": 35942 }, { "epoch": 0.6410837227553241, "grad_norm": 0.3426940441131592, "learning_rate": 1.718344405714041e-05, "loss": 0.1253, "step": 35943 }, { "epoch": 0.6411015588770378, "grad_norm": 0.30241209268569946, "learning_rate": 1.71819656011895e-05, "loss": 0.1787, "step": 35944 }, { "epoch": 0.6411193949987515, "grad_norm": 0.32440218329429626, "learning_rate": 1.718048717554399e-05, "loss": 0.1227, "step": 35945 }, { "epoch": 0.6411372311204652, "grad_norm": 0.2700096070766449, "learning_rate": 1.7179008780209615e-05, "loss": 0.1234, "step": 35946 }, { "epoch": 0.6411550672421789, "grad_norm": 0.2275232970714569, "learning_rate": 1.717753041519209e-05, "loss": 0.1144, "step": 35947 }, { "epoch": 0.6411729033638925, "grad_norm": 0.19442588090896606, "learning_rate": 1.7176052080497165e-05, "loss": 0.1141, "step": 35948 }, { "epoch": 0.6411907394856062, "grad_norm": 0.19584928452968597, "learning_rate": 1.717457377613057e-05, "loss": 0.108, "step": 35949 }, { "epoch": 0.6412085756073199, "grad_norm": 0.32251083850860596, "learning_rate": 1.717309550209803e-05, "loss": 0.0717, "step": 35950 }, { "epoch": 0.6412264117290336, "grad_norm": 0.25962111353874207, "learning_rate": 1.7171617258405273e-05, "loss": 0.1259, "step": 35951 }, { "epoch": 0.6412442478507473, "grad_norm": 0.31683894991874695, "learning_rate": 1.717013904505802e-05, "loss": 0.1283, "step": 35952 }, { "epoch": 0.641262083972461, "grad_norm": 0.34520360827445984, "learning_rate": 1.7168660862062027e-05, "loss": 0.1306, "step": 35953 }, { "epoch": 0.6412799200941747, "grad_norm": 0.20821036398410797, "learning_rate": 1.7167182709423004e-05, "loss": 0.1357, "step": 35954 }, { "epoch": 0.6412977562158885, "grad_norm": 0.30845940113067627, "learning_rate": 1.716570458714669e-05, "loss": 0.0505, "step": 35955 }, { "epoch": 0.6413155923376022, "grad_norm": 0.323530375957489, "learning_rate": 1.71642264952388e-05, "loss": 0.1336, "step": 35956 }, { "epoch": 0.6413334284593158, "grad_norm": 0.23813045024871826, "learning_rate": 1.7162748433705083e-05, "loss": 0.0763, "step": 35957 }, { "epoch": 0.6413512645810295, "grad_norm": 0.2854834198951721, "learning_rate": 1.7161270402551262e-05, "loss": 0.1141, "step": 35958 }, { "epoch": 0.6413691007027432, "grad_norm": 0.2865433394908905, "learning_rate": 1.7159792401783064e-05, "loss": 0.1144, "step": 35959 }, { "epoch": 0.6413869368244569, "grad_norm": 0.2748376727104187, "learning_rate": 1.715831443140621e-05, "loss": 0.082, "step": 35960 }, { "epoch": 0.6414047729461706, "grad_norm": 0.2565107047557831, "learning_rate": 1.715683649142645e-05, "loss": 0.1583, "step": 35961 }, { "epoch": 0.6414226090678843, "grad_norm": 0.21768087148666382, "learning_rate": 1.715535858184949e-05, "loss": 0.1128, "step": 35962 }, { "epoch": 0.641440445189598, "grad_norm": 0.25623881816864014, "learning_rate": 1.7153880702681074e-05, "loss": 0.1445, "step": 35963 }, { "epoch": 0.6414582813113117, "grad_norm": 0.22707068920135498, "learning_rate": 1.715240285392693e-05, "loss": 0.0627, "step": 35964 }, { "epoch": 0.6414761174330254, "grad_norm": 0.26484373211860657, "learning_rate": 1.7150925035592776e-05, "loss": 0.0878, "step": 35965 }, { "epoch": 0.641493953554739, "grad_norm": 0.2964410185813904, "learning_rate": 1.714944724768435e-05, "loss": 0.1623, "step": 35966 }, { "epoch": 0.6415117896764527, "grad_norm": 0.2748417258262634, "learning_rate": 1.7147969490207383e-05, "loss": 0.1453, "step": 35967 }, { "epoch": 0.6415296257981664, "grad_norm": 0.21738409996032715, "learning_rate": 1.714649176316759e-05, "loss": 0.0973, "step": 35968 }, { "epoch": 0.6415474619198801, "grad_norm": 0.3531220257282257, "learning_rate": 1.714501406657071e-05, "loss": 0.0611, "step": 35969 }, { "epoch": 0.6415652980415938, "grad_norm": 0.277566134929657, "learning_rate": 1.7143536400422467e-05, "loss": 0.1347, "step": 35970 }, { "epoch": 0.6415831341633075, "grad_norm": 0.20635554194450378, "learning_rate": 1.7142058764728597e-05, "loss": 0.0615, "step": 35971 }, { "epoch": 0.6416009702850213, "grad_norm": 0.31696733832359314, "learning_rate": 1.7140581159494825e-05, "loss": 0.1406, "step": 35972 }, { "epoch": 0.641618806406735, "grad_norm": 0.35350048542022705, "learning_rate": 1.7139103584726855e-05, "loss": 0.1526, "step": 35973 }, { "epoch": 0.6416366425284487, "grad_norm": 0.24697697162628174, "learning_rate": 1.7137626040430452e-05, "loss": 0.1129, "step": 35974 }, { "epoch": 0.6416544786501623, "grad_norm": 0.21649472415447235, "learning_rate": 1.713614852661132e-05, "loss": 0.1166, "step": 35975 }, { "epoch": 0.641672314771876, "grad_norm": 0.23622985184192657, "learning_rate": 1.7134671043275196e-05, "loss": 0.0932, "step": 35976 }, { "epoch": 0.6416901508935897, "grad_norm": 0.3605780303478241, "learning_rate": 1.7133193590427804e-05, "loss": 0.1286, "step": 35977 }, { "epoch": 0.6417079870153034, "grad_norm": 0.3458549380302429, "learning_rate": 1.713171616807486e-05, "loss": 0.1667, "step": 35978 }, { "epoch": 0.6417258231370171, "grad_norm": 0.24873648583889008, "learning_rate": 1.7130238776222112e-05, "loss": 0.14, "step": 35979 }, { "epoch": 0.6417436592587308, "grad_norm": 0.26050683856010437, "learning_rate": 1.712876141487528e-05, "loss": 0.1237, "step": 35980 }, { "epoch": 0.6417614953804445, "grad_norm": 0.22043856978416443, "learning_rate": 1.712728408404008e-05, "loss": 0.1453, "step": 35981 }, { "epoch": 0.6417793315021582, "grad_norm": 0.24657335877418518, "learning_rate": 1.7125806783722243e-05, "loss": 0.1363, "step": 35982 }, { "epoch": 0.6417971676238718, "grad_norm": 0.447663813829422, "learning_rate": 1.7124329513927504e-05, "loss": 0.1672, "step": 35983 }, { "epoch": 0.6418150037455855, "grad_norm": 0.24941036105155945, "learning_rate": 1.7122852274661584e-05, "loss": 0.1213, "step": 35984 }, { "epoch": 0.6418328398672992, "grad_norm": 0.2186688333749771, "learning_rate": 1.7121375065930212e-05, "loss": 0.0804, "step": 35985 }, { "epoch": 0.6418506759890129, "grad_norm": 0.2392829954624176, "learning_rate": 1.71198978877391e-05, "loss": 0.1258, "step": 35986 }, { "epoch": 0.6418685121107266, "grad_norm": 0.26403605937957764, "learning_rate": 1.7118420740093998e-05, "loss": 0.1426, "step": 35987 }, { "epoch": 0.6418863482324403, "grad_norm": 0.3009972870349884, "learning_rate": 1.711694362300062e-05, "loss": 0.1327, "step": 35988 }, { "epoch": 0.6419041843541541, "grad_norm": 0.23631148040294647, "learning_rate": 1.7115466536464684e-05, "loss": 0.1211, "step": 35989 }, { "epoch": 0.6419220204758678, "grad_norm": 0.3221214711666107, "learning_rate": 1.7113989480491927e-05, "loss": 0.1422, "step": 35990 }, { "epoch": 0.6419398565975815, "grad_norm": 0.26724332571029663, "learning_rate": 1.711251245508806e-05, "loss": 0.0988, "step": 35991 }, { "epoch": 0.6419576927192951, "grad_norm": 0.29655665159225464, "learning_rate": 1.711103546025883e-05, "loss": 0.1107, "step": 35992 }, { "epoch": 0.6419755288410088, "grad_norm": 0.2704985439777374, "learning_rate": 1.7109558496009952e-05, "loss": 0.14, "step": 35993 }, { "epoch": 0.6419933649627225, "grad_norm": 0.32120829820632935, "learning_rate": 1.7108081562347153e-05, "loss": 0.1737, "step": 35994 }, { "epoch": 0.6420112010844362, "grad_norm": 0.3925207555294037, "learning_rate": 1.7106604659276142e-05, "loss": 0.1918, "step": 35995 }, { "epoch": 0.6420290372061499, "grad_norm": 0.31661248207092285, "learning_rate": 1.7105127786802668e-05, "loss": 0.1601, "step": 35996 }, { "epoch": 0.6420468733278636, "grad_norm": 0.391795814037323, "learning_rate": 1.7103650944932443e-05, "loss": 0.0995, "step": 35997 }, { "epoch": 0.6420647094495773, "grad_norm": 0.28095105290412903, "learning_rate": 1.7102174133671197e-05, "loss": 0.1039, "step": 35998 }, { "epoch": 0.642082545571291, "grad_norm": 0.26800575852394104, "learning_rate": 1.7100697353024642e-05, "loss": 0.1336, "step": 35999 }, { "epoch": 0.6421003816930047, "grad_norm": 0.2607799172401428, "learning_rate": 1.7099220602998522e-05, "loss": 0.132, "step": 36000 }, { "epoch": 0.6421003816930047, "eval_loss": 0.12125831842422485, "eval_runtime": 106.6956, "eval_samples_per_second": 9.597, "eval_steps_per_second": 1.603, "step": 36000 }, { "epoch": 0.6421182178147183, "grad_norm": 0.2816165089607239, "learning_rate": 1.7097743883598555e-05, "loss": 0.1578, "step": 36001 }, { "epoch": 0.642136053936432, "grad_norm": 0.22245261073112488, "learning_rate": 1.7096267194830457e-05, "loss": 0.118, "step": 36002 }, { "epoch": 0.6421538900581457, "grad_norm": 0.307090163230896, "learning_rate": 1.7094790536699957e-05, "loss": 0.1103, "step": 36003 }, { "epoch": 0.6421717261798594, "grad_norm": 0.2222188264131546, "learning_rate": 1.7093313909212772e-05, "loss": 0.0873, "step": 36004 }, { "epoch": 0.6421895623015731, "grad_norm": 0.27537801861763, "learning_rate": 1.7091837312374644e-05, "loss": 0.1772, "step": 36005 }, { "epoch": 0.6422073984232869, "grad_norm": 0.32166436314582825, "learning_rate": 1.7090360746191285e-05, "loss": 0.0986, "step": 36006 }, { "epoch": 0.6422252345450006, "grad_norm": 0.20470120012760162, "learning_rate": 1.708888421066842e-05, "loss": 0.0992, "step": 36007 }, { "epoch": 0.6422430706667143, "grad_norm": 0.20956771075725555, "learning_rate": 1.708740770581176e-05, "loss": 0.096, "step": 36008 }, { "epoch": 0.642260906788428, "grad_norm": 0.2966596782207489, "learning_rate": 1.7085931231627055e-05, "loss": 0.1703, "step": 36009 }, { "epoch": 0.6422787429101416, "grad_norm": 0.3364904224872589, "learning_rate": 1.7084454788120006e-05, "loss": 0.1068, "step": 36010 }, { "epoch": 0.6422965790318553, "grad_norm": 0.23444807529449463, "learning_rate": 1.708297837529635e-05, "loss": 0.0957, "step": 36011 }, { "epoch": 0.642314415153569, "grad_norm": 0.3281082808971405, "learning_rate": 1.7081501993161792e-05, "loss": 0.1165, "step": 36012 }, { "epoch": 0.6423322512752827, "grad_norm": 0.20914587378501892, "learning_rate": 1.7080025641722082e-05, "loss": 0.1079, "step": 36013 }, { "epoch": 0.6423500873969964, "grad_norm": 0.27111852169036865, "learning_rate": 1.7078549320982922e-05, "loss": 0.1245, "step": 36014 }, { "epoch": 0.6423679235187101, "grad_norm": 0.35877928137779236, "learning_rate": 1.7077073030950048e-05, "loss": 0.0839, "step": 36015 }, { "epoch": 0.6423857596404238, "grad_norm": 0.22931590676307678, "learning_rate": 1.7075596771629165e-05, "loss": 0.1019, "step": 36016 }, { "epoch": 0.6424035957621375, "grad_norm": 0.24991843104362488, "learning_rate": 1.7074120543026007e-05, "loss": 0.113, "step": 36017 }, { "epoch": 0.6424214318838511, "grad_norm": 0.22310291230678558, "learning_rate": 1.7072644345146295e-05, "loss": 0.1298, "step": 36018 }, { "epoch": 0.6424392680055648, "grad_norm": 0.286344975233078, "learning_rate": 1.7071168177995757e-05, "loss": 0.1552, "step": 36019 }, { "epoch": 0.6424571041272785, "grad_norm": 0.29034942388534546, "learning_rate": 1.7069692041580114e-05, "loss": 0.1067, "step": 36020 }, { "epoch": 0.6424749402489922, "grad_norm": 0.23364169895648956, "learning_rate": 1.706821593590507e-05, "loss": 0.1106, "step": 36021 }, { "epoch": 0.642492776370706, "grad_norm": 0.27880609035491943, "learning_rate": 1.7066739860976367e-05, "loss": 0.1342, "step": 36022 }, { "epoch": 0.6425106124924197, "grad_norm": 0.2684519290924072, "learning_rate": 1.7065263816799725e-05, "loss": 0.0925, "step": 36023 }, { "epoch": 0.6425284486141334, "grad_norm": 0.2521001398563385, "learning_rate": 1.7063787803380856e-05, "loss": 0.172, "step": 36024 }, { "epoch": 0.6425462847358471, "grad_norm": 0.27383750677108765, "learning_rate": 1.706231182072548e-05, "loss": 0.1846, "step": 36025 }, { "epoch": 0.6425641208575608, "grad_norm": 0.3476960062980652, "learning_rate": 1.7060835868839333e-05, "loss": 0.2084, "step": 36026 }, { "epoch": 0.6425819569792744, "grad_norm": 0.2195775806903839, "learning_rate": 1.7059359947728132e-05, "loss": 0.128, "step": 36027 }, { "epoch": 0.6425997931009881, "grad_norm": 0.2555517256259918, "learning_rate": 1.7057884057397593e-05, "loss": 0.1401, "step": 36028 }, { "epoch": 0.6426176292227018, "grad_norm": 0.26960569620132446, "learning_rate": 1.705640819785344e-05, "loss": 0.1411, "step": 36029 }, { "epoch": 0.6426354653444155, "grad_norm": 0.2766706347465515, "learning_rate": 1.705493236910138e-05, "loss": 0.1105, "step": 36030 }, { "epoch": 0.6426533014661292, "grad_norm": 0.33902889490127563, "learning_rate": 1.7053456571147152e-05, "loss": 0.0995, "step": 36031 }, { "epoch": 0.6426711375878429, "grad_norm": 0.3518093228340149, "learning_rate": 1.7051980803996474e-05, "loss": 0.1596, "step": 36032 }, { "epoch": 0.6426889737095566, "grad_norm": 0.3192064166069031, "learning_rate": 1.705050506765507e-05, "loss": 0.1208, "step": 36033 }, { "epoch": 0.6427068098312703, "grad_norm": 0.23662027716636658, "learning_rate": 1.7049029362128637e-05, "loss": 0.1695, "step": 36034 }, { "epoch": 0.642724645952984, "grad_norm": 0.20568744838237762, "learning_rate": 1.7047553687422925e-05, "loss": 0.0996, "step": 36035 }, { "epoch": 0.6427424820746976, "grad_norm": 0.2770882248878479, "learning_rate": 1.7046078043543642e-05, "loss": 0.1899, "step": 36036 }, { "epoch": 0.6427603181964113, "grad_norm": 0.2567894458770752, "learning_rate": 1.7044602430496504e-05, "loss": 0.1019, "step": 36037 }, { "epoch": 0.642778154318125, "grad_norm": 0.2708216905593872, "learning_rate": 1.7043126848287234e-05, "loss": 0.1059, "step": 36038 }, { "epoch": 0.6427959904398388, "grad_norm": 0.3747189939022064, "learning_rate": 1.704165129692155e-05, "loss": 0.126, "step": 36039 }, { "epoch": 0.6428138265615525, "grad_norm": 0.4661749303340912, "learning_rate": 1.704017577640518e-05, "loss": 0.1293, "step": 36040 }, { "epoch": 0.6428316626832662, "grad_norm": 0.2464616745710373, "learning_rate": 1.703870028674384e-05, "loss": 0.1106, "step": 36041 }, { "epoch": 0.6428494988049799, "grad_norm": 0.29713109135627747, "learning_rate": 1.7037224827943246e-05, "loss": 0.1056, "step": 36042 }, { "epoch": 0.6428673349266936, "grad_norm": 0.3102748990058899, "learning_rate": 1.703574940000911e-05, "loss": 0.1749, "step": 36043 }, { "epoch": 0.6428851710484073, "grad_norm": 0.22702239453792572, "learning_rate": 1.703427400294717e-05, "loss": 0.0937, "step": 36044 }, { "epoch": 0.6429030071701209, "grad_norm": 0.2195417582988739, "learning_rate": 1.703279863676313e-05, "loss": 0.1045, "step": 36045 }, { "epoch": 0.6429208432918346, "grad_norm": 0.28234267234802246, "learning_rate": 1.7031323301462716e-05, "loss": 0.1081, "step": 36046 }, { "epoch": 0.6429386794135483, "grad_norm": 0.39787518978118896, "learning_rate": 1.702984799705164e-05, "loss": 0.1442, "step": 36047 }, { "epoch": 0.642956515535262, "grad_norm": 0.2355199158191681, "learning_rate": 1.7028372723535633e-05, "loss": 0.1391, "step": 36048 }, { "epoch": 0.6429743516569757, "grad_norm": 0.23079955577850342, "learning_rate": 1.7026897480920408e-05, "loss": 0.1013, "step": 36049 }, { "epoch": 0.6429921877786894, "grad_norm": 0.24834102392196655, "learning_rate": 1.7025422269211684e-05, "loss": 0.1214, "step": 36050 }, { "epoch": 0.6430100239004031, "grad_norm": 0.27061596512794495, "learning_rate": 1.7023947088415163e-05, "loss": 0.1409, "step": 36051 }, { "epoch": 0.6430278600221168, "grad_norm": 0.22637474536895752, "learning_rate": 1.7022471938536587e-05, "loss": 0.1328, "step": 36052 }, { "epoch": 0.6430456961438304, "grad_norm": 0.5987288951873779, "learning_rate": 1.7020996819581664e-05, "loss": 0.1467, "step": 36053 }, { "epoch": 0.6430635322655441, "grad_norm": 0.2617497742176056, "learning_rate": 1.701952173155612e-05, "loss": 0.1279, "step": 36054 }, { "epoch": 0.6430813683872578, "grad_norm": 0.23993206024169922, "learning_rate": 1.7018046674465666e-05, "loss": 0.1253, "step": 36055 }, { "epoch": 0.6430992045089716, "grad_norm": 0.21473948657512665, "learning_rate": 1.7016571648316005e-05, "loss": 0.0846, "step": 36056 }, { "epoch": 0.6431170406306853, "grad_norm": 0.19074192643165588, "learning_rate": 1.7015096653112884e-05, "loss": 0.0971, "step": 36057 }, { "epoch": 0.643134876752399, "grad_norm": 0.27036252617836, "learning_rate": 1.7013621688861996e-05, "loss": 0.1113, "step": 36058 }, { "epoch": 0.6431527128741127, "grad_norm": 0.30164045095443726, "learning_rate": 1.701214675556908e-05, "loss": 0.0903, "step": 36059 }, { "epoch": 0.6431705489958264, "grad_norm": 0.2766153812408447, "learning_rate": 1.701067185323983e-05, "loss": 0.1285, "step": 36060 }, { "epoch": 0.6431883851175401, "grad_norm": 0.17690503597259521, "learning_rate": 1.700919698187998e-05, "loss": 0.0917, "step": 36061 }, { "epoch": 0.6432062212392538, "grad_norm": 0.23135077953338623, "learning_rate": 1.700772214149525e-05, "loss": 0.1242, "step": 36062 }, { "epoch": 0.6432240573609674, "grad_norm": 0.26133644580841064, "learning_rate": 1.7006247332091348e-05, "loss": 0.1209, "step": 36063 }, { "epoch": 0.6432418934826811, "grad_norm": 0.2696077823638916, "learning_rate": 1.7004772553673983e-05, "loss": 0.0748, "step": 36064 }, { "epoch": 0.6432597296043948, "grad_norm": 0.25742682814598083, "learning_rate": 1.7003297806248886e-05, "loss": 0.1012, "step": 36065 }, { "epoch": 0.6432775657261085, "grad_norm": 0.2589276134967804, "learning_rate": 1.7001823089821766e-05, "loss": 0.1216, "step": 36066 }, { "epoch": 0.6432954018478222, "grad_norm": 0.25723347067832947, "learning_rate": 1.700034840439835e-05, "loss": 0.0764, "step": 36067 }, { "epoch": 0.6433132379695359, "grad_norm": 0.22905592620372772, "learning_rate": 1.6998873749984344e-05, "loss": 0.1285, "step": 36068 }, { "epoch": 0.6433310740912496, "grad_norm": 0.2404824048280716, "learning_rate": 1.6997399126585457e-05, "loss": 0.1047, "step": 36069 }, { "epoch": 0.6433489102129633, "grad_norm": 0.24083207547664642, "learning_rate": 1.6995924534207424e-05, "loss": 0.0988, "step": 36070 }, { "epoch": 0.6433667463346769, "grad_norm": 0.2020757645368576, "learning_rate": 1.6994449972855953e-05, "loss": 0.0889, "step": 36071 }, { "epoch": 0.6433845824563906, "grad_norm": 0.3013845980167389, "learning_rate": 1.6992975442536757e-05, "loss": 0.133, "step": 36072 }, { "epoch": 0.6434024185781044, "grad_norm": 0.2327493280172348, "learning_rate": 1.699150094325555e-05, "loss": 0.1048, "step": 36073 }, { "epoch": 0.6434202546998181, "grad_norm": 0.24093088507652283, "learning_rate": 1.699002647501805e-05, "loss": 0.1029, "step": 36074 }, { "epoch": 0.6434380908215318, "grad_norm": 0.28378912806510925, "learning_rate": 1.6988552037829983e-05, "loss": 0.1156, "step": 36075 }, { "epoch": 0.6434559269432455, "grad_norm": 0.2900311052799225, "learning_rate": 1.6987077631697056e-05, "loss": 0.1336, "step": 36076 }, { "epoch": 0.6434737630649592, "grad_norm": 0.2954043447971344, "learning_rate": 1.698560325662497e-05, "loss": 0.1185, "step": 36077 }, { "epoch": 0.6434915991866729, "grad_norm": 0.24001437425613403, "learning_rate": 1.6984128912619463e-05, "loss": 0.1452, "step": 36078 }, { "epoch": 0.6435094353083866, "grad_norm": 0.30434873700141907, "learning_rate": 1.6982654599686242e-05, "loss": 0.1331, "step": 36079 }, { "epoch": 0.6435272714301002, "grad_norm": 0.3666851222515106, "learning_rate": 1.698118031783102e-05, "loss": 0.1589, "step": 36080 }, { "epoch": 0.6435451075518139, "grad_norm": 0.20924252271652222, "learning_rate": 1.6979706067059513e-05, "loss": 0.0816, "step": 36081 }, { "epoch": 0.6435629436735276, "grad_norm": 0.2992520034313202, "learning_rate": 1.697823184737743e-05, "loss": 0.1173, "step": 36082 }, { "epoch": 0.6435807797952413, "grad_norm": 0.24502576887607574, "learning_rate": 1.6976757658790495e-05, "loss": 0.1061, "step": 36083 }, { "epoch": 0.643598615916955, "grad_norm": 0.38678261637687683, "learning_rate": 1.6975283501304422e-05, "loss": 0.1387, "step": 36084 }, { "epoch": 0.6436164520386687, "grad_norm": 0.24350497126579285, "learning_rate": 1.6973809374924915e-05, "loss": 0.1791, "step": 36085 }, { "epoch": 0.6436342881603824, "grad_norm": 0.22026783227920532, "learning_rate": 1.6972335279657698e-05, "loss": 0.1187, "step": 36086 }, { "epoch": 0.6436521242820961, "grad_norm": 0.3392118811607361, "learning_rate": 1.6970861215508482e-05, "loss": 0.1551, "step": 36087 }, { "epoch": 0.6436699604038097, "grad_norm": 0.2958141267299652, "learning_rate": 1.696938718248298e-05, "loss": 0.1385, "step": 36088 }, { "epoch": 0.6436877965255234, "grad_norm": 0.24164767563343048, "learning_rate": 1.6967913180586916e-05, "loss": 0.1198, "step": 36089 }, { "epoch": 0.6437056326472372, "grad_norm": 0.22696180641651154, "learning_rate": 1.696643920982598e-05, "loss": 0.0825, "step": 36090 }, { "epoch": 0.6437234687689509, "grad_norm": 0.2741759419441223, "learning_rate": 1.696496527020591e-05, "loss": 0.1192, "step": 36091 }, { "epoch": 0.6437413048906646, "grad_norm": 0.19998280704021454, "learning_rate": 1.696349136173241e-05, "loss": 0.1368, "step": 36092 }, { "epoch": 0.6437591410123783, "grad_norm": 0.24313251674175262, "learning_rate": 1.6962017484411188e-05, "loss": 0.1281, "step": 36093 }, { "epoch": 0.643776977134092, "grad_norm": 0.2841765284538269, "learning_rate": 1.6960543638247964e-05, "loss": 0.1327, "step": 36094 }, { "epoch": 0.6437948132558057, "grad_norm": 0.2545332908630371, "learning_rate": 1.6959069823248445e-05, "loss": 0.1306, "step": 36095 }, { "epoch": 0.6438126493775194, "grad_norm": 0.30480217933654785, "learning_rate": 1.695759603941836e-05, "loss": 0.1272, "step": 36096 }, { "epoch": 0.643830485499233, "grad_norm": 0.23049749433994293, "learning_rate": 1.695612228676341e-05, "loss": 0.0851, "step": 36097 }, { "epoch": 0.6438483216209467, "grad_norm": 0.3085883557796478, "learning_rate": 1.6954648565289304e-05, "loss": 0.1292, "step": 36098 }, { "epoch": 0.6438661577426604, "grad_norm": 0.23839989304542542, "learning_rate": 1.6953174875001753e-05, "loss": 0.1176, "step": 36099 }, { "epoch": 0.6438839938643741, "grad_norm": 0.2832323908805847, "learning_rate": 1.695170121590648e-05, "loss": 0.1078, "step": 36100 }, { "epoch": 0.6439018299860878, "grad_norm": 0.2851586639881134, "learning_rate": 1.6950227588009194e-05, "loss": 0.0741, "step": 36101 }, { "epoch": 0.6439196661078015, "grad_norm": 0.2892163395881653, "learning_rate": 1.694875399131561e-05, "loss": 0.1528, "step": 36102 }, { "epoch": 0.6439375022295152, "grad_norm": 0.2886578440666199, "learning_rate": 1.6947280425831423e-05, "loss": 0.1211, "step": 36103 }, { "epoch": 0.6439553383512289, "grad_norm": 0.2814652919769287, "learning_rate": 1.694580689156237e-05, "loss": 0.0901, "step": 36104 }, { "epoch": 0.6439731744729426, "grad_norm": 0.24924111366271973, "learning_rate": 1.6944333388514148e-05, "loss": 0.1286, "step": 36105 }, { "epoch": 0.6439910105946562, "grad_norm": 0.3414084017276764, "learning_rate": 1.6942859916692477e-05, "loss": 0.0766, "step": 36106 }, { "epoch": 0.64400884671637, "grad_norm": 0.23955604434013367, "learning_rate": 1.6941386476103056e-05, "loss": 0.1415, "step": 36107 }, { "epoch": 0.6440266828380837, "grad_norm": 0.23224258422851562, "learning_rate": 1.6939913066751606e-05, "loss": 0.1281, "step": 36108 }, { "epoch": 0.6440445189597974, "grad_norm": 0.2662828266620636, "learning_rate": 1.693843968864384e-05, "loss": 0.104, "step": 36109 }, { "epoch": 0.6440623550815111, "grad_norm": 0.374972939491272, "learning_rate": 1.693696634178547e-05, "loss": 0.1304, "step": 36110 }, { "epoch": 0.6440801912032248, "grad_norm": 0.42245882749557495, "learning_rate": 1.6935493026182197e-05, "loss": 0.1789, "step": 36111 }, { "epoch": 0.6440980273249385, "grad_norm": 0.408315509557724, "learning_rate": 1.6934019741839735e-05, "loss": 0.1477, "step": 36112 }, { "epoch": 0.6441158634466522, "grad_norm": 0.29706352949142456, "learning_rate": 1.6932546488763808e-05, "loss": 0.1218, "step": 36113 }, { "epoch": 0.6441336995683659, "grad_norm": 0.3683321475982666, "learning_rate": 1.6931073266960107e-05, "loss": 0.1427, "step": 36114 }, { "epoch": 0.6441515356900795, "grad_norm": 0.23992182314395905, "learning_rate": 1.6929600076434364e-05, "loss": 0.107, "step": 36115 }, { "epoch": 0.6441693718117932, "grad_norm": 0.2670586407184601, "learning_rate": 1.6928126917192262e-05, "loss": 0.1607, "step": 36116 }, { "epoch": 0.6441872079335069, "grad_norm": 0.3094174265861511, "learning_rate": 1.6926653789239544e-05, "loss": 0.1833, "step": 36117 }, { "epoch": 0.6442050440552206, "grad_norm": 0.27157357335090637, "learning_rate": 1.69251806925819e-05, "loss": 0.1313, "step": 36118 }, { "epoch": 0.6442228801769343, "grad_norm": 0.2662277817726135, "learning_rate": 1.692370762722505e-05, "loss": 0.1656, "step": 36119 }, { "epoch": 0.644240716298648, "grad_norm": 0.24142740666866302, "learning_rate": 1.692223459317468e-05, "loss": 0.0742, "step": 36120 }, { "epoch": 0.6442585524203617, "grad_norm": 0.23111611604690552, "learning_rate": 1.6920761590436536e-05, "loss": 0.0935, "step": 36121 }, { "epoch": 0.6442763885420754, "grad_norm": 0.2675308287143707, "learning_rate": 1.6919288619016306e-05, "loss": 0.1086, "step": 36122 }, { "epoch": 0.644294224663789, "grad_norm": 0.27754026651382446, "learning_rate": 1.6917815678919706e-05, "loss": 0.1297, "step": 36123 }, { "epoch": 0.6443120607855028, "grad_norm": 0.25029289722442627, "learning_rate": 1.6916342770152443e-05, "loss": 0.1109, "step": 36124 }, { "epoch": 0.6443298969072165, "grad_norm": 0.23299990594387054, "learning_rate": 1.691486989272022e-05, "loss": 0.1135, "step": 36125 }, { "epoch": 0.6443477330289302, "grad_norm": 0.22696441411972046, "learning_rate": 1.6913397046628765e-05, "loss": 0.143, "step": 36126 }, { "epoch": 0.6443655691506439, "grad_norm": 0.23908481001853943, "learning_rate": 1.6911924231883776e-05, "loss": 0.0963, "step": 36127 }, { "epoch": 0.6443834052723576, "grad_norm": 0.43284371495246887, "learning_rate": 1.691045144849095e-05, "loss": 0.1693, "step": 36128 }, { "epoch": 0.6444012413940713, "grad_norm": 0.2912270426750183, "learning_rate": 1.6908978696456015e-05, "loss": 0.1288, "step": 36129 }, { "epoch": 0.644419077515785, "grad_norm": 0.42308008670806885, "learning_rate": 1.690750597578467e-05, "loss": 0.143, "step": 36130 }, { "epoch": 0.6444369136374987, "grad_norm": 0.24147115647792816, "learning_rate": 1.6906033286482637e-05, "loss": 0.0954, "step": 36131 }, { "epoch": 0.6444547497592124, "grad_norm": 0.27462226152420044, "learning_rate": 1.690456062855561e-05, "loss": 0.1192, "step": 36132 }, { "epoch": 0.644472585880926, "grad_norm": 0.2730329632759094, "learning_rate": 1.6903088002009292e-05, "loss": 0.1269, "step": 36133 }, { "epoch": 0.6444904220026397, "grad_norm": 0.29419299960136414, "learning_rate": 1.6901615406849415e-05, "loss": 0.1409, "step": 36134 }, { "epoch": 0.6445082581243534, "grad_norm": 0.3213551640510559, "learning_rate": 1.6900142843081667e-05, "loss": 0.1047, "step": 36135 }, { "epoch": 0.6445260942460671, "grad_norm": 0.24554812908172607, "learning_rate": 1.6898670310711766e-05, "loss": 0.135, "step": 36136 }, { "epoch": 0.6445439303677808, "grad_norm": 0.32526692748069763, "learning_rate": 1.689719780974542e-05, "loss": 0.1031, "step": 36137 }, { "epoch": 0.6445617664894945, "grad_norm": 0.23407816886901855, "learning_rate": 1.6895725340188316e-05, "loss": 0.1296, "step": 36138 }, { "epoch": 0.6445796026112082, "grad_norm": 0.29700562357902527, "learning_rate": 1.68942529020462e-05, "loss": 0.1096, "step": 36139 }, { "epoch": 0.644597438732922, "grad_norm": 0.3054434657096863, "learning_rate": 1.6892780495324756e-05, "loss": 0.0952, "step": 36140 }, { "epoch": 0.6446152748546357, "grad_norm": 0.3107934594154358, "learning_rate": 1.6891308120029685e-05, "loss": 0.1653, "step": 36141 }, { "epoch": 0.6446331109763493, "grad_norm": 0.21975281834602356, "learning_rate": 1.6889835776166713e-05, "loss": 0.1026, "step": 36142 }, { "epoch": 0.644650947098063, "grad_norm": 0.22903771698474884, "learning_rate": 1.6888363463741534e-05, "loss": 0.0924, "step": 36143 }, { "epoch": 0.6446687832197767, "grad_norm": 0.33508217334747314, "learning_rate": 1.6886891182759865e-05, "loss": 0.2171, "step": 36144 }, { "epoch": 0.6446866193414904, "grad_norm": 0.3408792316913605, "learning_rate": 1.6885418933227408e-05, "loss": 0.1719, "step": 36145 }, { "epoch": 0.6447044554632041, "grad_norm": 0.26822131872177124, "learning_rate": 1.688394671514986e-05, "loss": 0.0906, "step": 36146 }, { "epoch": 0.6447222915849178, "grad_norm": 0.3770483732223511, "learning_rate": 1.688247452853295e-05, "loss": 0.1431, "step": 36147 }, { "epoch": 0.6447401277066315, "grad_norm": 0.3141520023345947, "learning_rate": 1.6881002373382367e-05, "loss": 0.0999, "step": 36148 }, { "epoch": 0.6447579638283452, "grad_norm": 0.201151043176651, "learning_rate": 1.6879530249703824e-05, "loss": 0.1254, "step": 36149 }, { "epoch": 0.6447757999500588, "grad_norm": 0.24054493010044098, "learning_rate": 1.6878058157503027e-05, "loss": 0.1212, "step": 36150 }, { "epoch": 0.6447936360717725, "grad_norm": 0.33309999108314514, "learning_rate": 1.6876586096785673e-05, "loss": 0.1762, "step": 36151 }, { "epoch": 0.6448114721934862, "grad_norm": 0.33809059858322144, "learning_rate": 1.6875114067557486e-05, "loss": 0.1398, "step": 36152 }, { "epoch": 0.6448293083151999, "grad_norm": 0.3542320728302002, "learning_rate": 1.6873642069824167e-05, "loss": 0.0625, "step": 36153 }, { "epoch": 0.6448471444369136, "grad_norm": 0.25446248054504395, "learning_rate": 1.6872170103591417e-05, "loss": 0.1516, "step": 36154 }, { "epoch": 0.6448649805586273, "grad_norm": 0.2221420258283615, "learning_rate": 1.6870698168864928e-05, "loss": 0.1365, "step": 36155 }, { "epoch": 0.644882816680341, "grad_norm": 0.261918842792511, "learning_rate": 1.6869226265650436e-05, "loss": 0.1174, "step": 36156 }, { "epoch": 0.6449006528020548, "grad_norm": 0.288920134305954, "learning_rate": 1.6867754393953623e-05, "loss": 0.1442, "step": 36157 }, { "epoch": 0.6449184889237685, "grad_norm": 0.3040105998516083, "learning_rate": 1.686628255378021e-05, "loss": 0.1179, "step": 36158 }, { "epoch": 0.6449363250454822, "grad_norm": 0.2427533119916916, "learning_rate": 1.6864810745135885e-05, "loss": 0.1212, "step": 36159 }, { "epoch": 0.6449541611671958, "grad_norm": 0.257458359003067, "learning_rate": 1.6863338968026375e-05, "loss": 0.1302, "step": 36160 }, { "epoch": 0.6449719972889095, "grad_norm": 0.2693089544773102, "learning_rate": 1.686186722245737e-05, "loss": 0.0975, "step": 36161 }, { "epoch": 0.6449898334106232, "grad_norm": 0.3116805851459503, "learning_rate": 1.6860395508434574e-05, "loss": 0.1545, "step": 36162 }, { "epoch": 0.6450076695323369, "grad_norm": 0.2290050983428955, "learning_rate": 1.6858923825963702e-05, "loss": 0.117, "step": 36163 }, { "epoch": 0.6450255056540506, "grad_norm": 0.2532957196235657, "learning_rate": 1.6857452175050446e-05, "loss": 0.1196, "step": 36164 }, { "epoch": 0.6450433417757643, "grad_norm": 0.2511919140815735, "learning_rate": 1.6855980555700523e-05, "loss": 0.1317, "step": 36165 }, { "epoch": 0.645061177897478, "grad_norm": 0.25995415449142456, "learning_rate": 1.6854508967919634e-05, "loss": 0.1174, "step": 36166 }, { "epoch": 0.6450790140191917, "grad_norm": 0.2380102425813675, "learning_rate": 1.6853037411713484e-05, "loss": 0.1216, "step": 36167 }, { "epoch": 0.6450968501409053, "grad_norm": 0.2656167149543762, "learning_rate": 1.685156588708776e-05, "loss": 0.0825, "step": 36168 }, { "epoch": 0.645114686262619, "grad_norm": 0.29976800084114075, "learning_rate": 1.6850094394048194e-05, "loss": 0.1186, "step": 36169 }, { "epoch": 0.6451325223843327, "grad_norm": 0.27171820402145386, "learning_rate": 1.6848622932600473e-05, "loss": 0.1398, "step": 36170 }, { "epoch": 0.6451503585060464, "grad_norm": 0.20812223851680756, "learning_rate": 1.6847151502750307e-05, "loss": 0.1009, "step": 36171 }, { "epoch": 0.6451681946277601, "grad_norm": 0.24184690415859222, "learning_rate": 1.684568010450339e-05, "loss": 0.1297, "step": 36172 }, { "epoch": 0.6451860307494738, "grad_norm": 0.31422850489616394, "learning_rate": 1.6844208737865443e-05, "loss": 0.0909, "step": 36173 }, { "epoch": 0.6452038668711876, "grad_norm": 0.2704690992832184, "learning_rate": 1.684273740284216e-05, "loss": 0.0752, "step": 36174 }, { "epoch": 0.6452217029929013, "grad_norm": 0.24138988554477692, "learning_rate": 1.6841266099439243e-05, "loss": 0.1509, "step": 36175 }, { "epoch": 0.645239539114615, "grad_norm": 0.2506619393825531, "learning_rate": 1.6839794827662393e-05, "loss": 0.1149, "step": 36176 }, { "epoch": 0.6452573752363286, "grad_norm": 0.23446524143218994, "learning_rate": 1.6838323587517316e-05, "loss": 0.1364, "step": 36177 }, { "epoch": 0.6452752113580423, "grad_norm": 0.2749983072280884, "learning_rate": 1.6836852379009717e-05, "loss": 0.1307, "step": 36178 }, { "epoch": 0.645293047479756, "grad_norm": 0.28372645378112793, "learning_rate": 1.68353812021453e-05, "loss": 0.198, "step": 36179 }, { "epoch": 0.6453108836014697, "grad_norm": 0.28295403718948364, "learning_rate": 1.6833910056929768e-05, "loss": 0.1216, "step": 36180 }, { "epoch": 0.6453287197231834, "grad_norm": 0.22426429390907288, "learning_rate": 1.6832438943368805e-05, "loss": 0.0969, "step": 36181 }, { "epoch": 0.6453465558448971, "grad_norm": 0.19216197729110718, "learning_rate": 1.6830967861468145e-05, "loss": 0.1148, "step": 36182 }, { "epoch": 0.6453643919666108, "grad_norm": 0.2421443909406662, "learning_rate": 1.6829496811233474e-05, "loss": 0.1171, "step": 36183 }, { "epoch": 0.6453822280883245, "grad_norm": 0.25270938873291016, "learning_rate": 1.6828025792670492e-05, "loss": 0.1617, "step": 36184 }, { "epoch": 0.6454000642100381, "grad_norm": 0.3278418779373169, "learning_rate": 1.68265548057849e-05, "loss": 0.1203, "step": 36185 }, { "epoch": 0.6454179003317518, "grad_norm": 0.22865568101406097, "learning_rate": 1.682508385058241e-05, "loss": 0.1389, "step": 36186 }, { "epoch": 0.6454357364534655, "grad_norm": 0.2596084177494049, "learning_rate": 1.6823612927068723e-05, "loss": 0.1136, "step": 36187 }, { "epoch": 0.6454535725751792, "grad_norm": 0.34987136721611023, "learning_rate": 1.6822142035249538e-05, "loss": 0.1406, "step": 36188 }, { "epoch": 0.6454714086968929, "grad_norm": 0.34618711471557617, "learning_rate": 1.6820671175130544e-05, "loss": 0.1388, "step": 36189 }, { "epoch": 0.6454892448186066, "grad_norm": 0.30179232358932495, "learning_rate": 1.6819200346717456e-05, "loss": 0.0788, "step": 36190 }, { "epoch": 0.6455070809403204, "grad_norm": 0.2815258502960205, "learning_rate": 1.6817729550015975e-05, "loss": 0.0901, "step": 36191 }, { "epoch": 0.6455249170620341, "grad_norm": 0.2742551863193512, "learning_rate": 1.68162587850318e-05, "loss": 0.1336, "step": 36192 }, { "epoch": 0.6455427531837478, "grad_norm": 0.20963624119758606, "learning_rate": 1.681478805177064e-05, "loss": 0.1182, "step": 36193 }, { "epoch": 0.6455605893054615, "grad_norm": 0.2866366505622864, "learning_rate": 1.6813317350238176e-05, "loss": 0.1015, "step": 36194 }, { "epoch": 0.6455784254271751, "grad_norm": 0.277170866727829, "learning_rate": 1.681184668044013e-05, "loss": 0.119, "step": 36195 }, { "epoch": 0.6455962615488888, "grad_norm": 0.20249556005001068, "learning_rate": 1.6810376042382193e-05, "loss": 0.1248, "step": 36196 }, { "epoch": 0.6456140976706025, "grad_norm": 0.2913576662540436, "learning_rate": 1.6808905436070066e-05, "loss": 0.1683, "step": 36197 }, { "epoch": 0.6456319337923162, "grad_norm": 0.2869216203689575, "learning_rate": 1.6807434861509446e-05, "loss": 0.1525, "step": 36198 }, { "epoch": 0.6456497699140299, "grad_norm": 0.19986768066883087, "learning_rate": 1.6805964318706043e-05, "loss": 0.1054, "step": 36199 }, { "epoch": 0.6456676060357436, "grad_norm": 0.20991632342338562, "learning_rate": 1.6804493807665555e-05, "loss": 0.136, "step": 36200 }, { "epoch": 0.6456854421574573, "grad_norm": 0.29451504349708557, "learning_rate": 1.680302332839368e-05, "loss": 0.1445, "step": 36201 }, { "epoch": 0.645703278279171, "grad_norm": 0.3638874590396881, "learning_rate": 1.6801552880896115e-05, "loss": 0.1193, "step": 36202 }, { "epoch": 0.6457211144008846, "grad_norm": 0.2540421187877655, "learning_rate": 1.6800082465178558e-05, "loss": 0.1338, "step": 36203 }, { "epoch": 0.6457389505225983, "grad_norm": 0.45227232575416565, "learning_rate": 1.6798612081246717e-05, "loss": 0.1278, "step": 36204 }, { "epoch": 0.645756786644312, "grad_norm": 0.24821460247039795, "learning_rate": 1.6797141729106287e-05, "loss": 0.0638, "step": 36205 }, { "epoch": 0.6457746227660257, "grad_norm": 0.22063791751861572, "learning_rate": 1.6795671408762976e-05, "loss": 0.1703, "step": 36206 }, { "epoch": 0.6457924588877394, "grad_norm": 0.34213772416114807, "learning_rate": 1.6794201120222465e-05, "loss": 0.1661, "step": 36207 }, { "epoch": 0.6458102950094532, "grad_norm": 0.19845445454120636, "learning_rate": 1.679273086349047e-05, "loss": 0.1269, "step": 36208 }, { "epoch": 0.6458281311311669, "grad_norm": 0.2927187979221344, "learning_rate": 1.679126063857269e-05, "loss": 0.1044, "step": 36209 }, { "epoch": 0.6458459672528806, "grad_norm": 0.22445560991764069, "learning_rate": 1.678979044547482e-05, "loss": 0.1552, "step": 36210 }, { "epoch": 0.6458638033745943, "grad_norm": 0.24601800739765167, "learning_rate": 1.6788320284202544e-05, "loss": 0.127, "step": 36211 }, { "epoch": 0.645881639496308, "grad_norm": 0.36156129837036133, "learning_rate": 1.678685015476158e-05, "loss": 0.1691, "step": 36212 }, { "epoch": 0.6458994756180216, "grad_norm": 0.20540089905261993, "learning_rate": 1.678538005715763e-05, "loss": 0.1188, "step": 36213 }, { "epoch": 0.6459173117397353, "grad_norm": 0.243947371840477, "learning_rate": 1.678390999139638e-05, "loss": 0.1008, "step": 36214 }, { "epoch": 0.645935147861449, "grad_norm": 0.21612975001335144, "learning_rate": 1.6782439957483537e-05, "loss": 0.1266, "step": 36215 }, { "epoch": 0.6459529839831627, "grad_norm": 0.3539649546146393, "learning_rate": 1.678096995542478e-05, "loss": 0.1315, "step": 36216 }, { "epoch": 0.6459708201048764, "grad_norm": 0.25346601009368896, "learning_rate": 1.6779499985225835e-05, "loss": 0.0822, "step": 36217 }, { "epoch": 0.6459886562265901, "grad_norm": 0.3246941566467285, "learning_rate": 1.677803004689238e-05, "loss": 0.1154, "step": 36218 }, { "epoch": 0.6460064923483038, "grad_norm": 0.2321978211402893, "learning_rate": 1.677656014043013e-05, "loss": 0.1207, "step": 36219 }, { "epoch": 0.6460243284700175, "grad_norm": 0.23822705447673798, "learning_rate": 1.677509026584476e-05, "loss": 0.0964, "step": 36220 }, { "epoch": 0.6460421645917311, "grad_norm": 0.2451213002204895, "learning_rate": 1.6773620423141993e-05, "loss": 0.1193, "step": 36221 }, { "epoch": 0.6460600007134448, "grad_norm": 0.304071843624115, "learning_rate": 1.6772150612327512e-05, "loss": 0.1353, "step": 36222 }, { "epoch": 0.6460778368351585, "grad_norm": 0.26478129625320435, "learning_rate": 1.6770680833407017e-05, "loss": 0.1468, "step": 36223 }, { "epoch": 0.6460956729568722, "grad_norm": 0.2717961370944977, "learning_rate": 1.6769211086386195e-05, "loss": 0.1653, "step": 36224 }, { "epoch": 0.646113509078586, "grad_norm": 0.2218887060880661, "learning_rate": 1.6767741371270767e-05, "loss": 0.1295, "step": 36225 }, { "epoch": 0.6461313452002997, "grad_norm": 0.20231322944164276, "learning_rate": 1.6766271688066408e-05, "loss": 0.1412, "step": 36226 }, { "epoch": 0.6461491813220134, "grad_norm": 0.2450684905052185, "learning_rate": 1.6764802036778833e-05, "loss": 0.0997, "step": 36227 }, { "epoch": 0.6461670174437271, "grad_norm": 0.24115802347660065, "learning_rate": 1.6763332417413727e-05, "loss": 0.1251, "step": 36228 }, { "epoch": 0.6461848535654408, "grad_norm": 0.38725319504737854, "learning_rate": 1.676186282997678e-05, "loss": 0.1815, "step": 36229 }, { "epoch": 0.6462026896871544, "grad_norm": 0.4499475359916687, "learning_rate": 1.6760393274473705e-05, "loss": 0.1293, "step": 36230 }, { "epoch": 0.6462205258088681, "grad_norm": 0.26100361347198486, "learning_rate": 1.6758923750910198e-05, "loss": 0.0841, "step": 36231 }, { "epoch": 0.6462383619305818, "grad_norm": 0.32052701711654663, "learning_rate": 1.6757454259291938e-05, "loss": 0.1381, "step": 36232 }, { "epoch": 0.6462561980522955, "grad_norm": 0.3102074861526489, "learning_rate": 1.6755984799624635e-05, "loss": 0.1462, "step": 36233 }, { "epoch": 0.6462740341740092, "grad_norm": 0.2903602719306946, "learning_rate": 1.675451537191398e-05, "loss": 0.1492, "step": 36234 }, { "epoch": 0.6462918702957229, "grad_norm": 0.2678000032901764, "learning_rate": 1.6753045976165678e-05, "loss": 0.1484, "step": 36235 }, { "epoch": 0.6463097064174366, "grad_norm": 0.2865830659866333, "learning_rate": 1.6751576612385422e-05, "loss": 0.1307, "step": 36236 }, { "epoch": 0.6463275425391503, "grad_norm": 0.3205627202987671, "learning_rate": 1.6750107280578884e-05, "loss": 0.1273, "step": 36237 }, { "epoch": 0.646345378660864, "grad_norm": 0.351855993270874, "learning_rate": 1.67486379807518e-05, "loss": 0.1811, "step": 36238 }, { "epoch": 0.6463632147825776, "grad_norm": 0.33485251665115356, "learning_rate": 1.6747168712909837e-05, "loss": 0.1789, "step": 36239 }, { "epoch": 0.6463810509042913, "grad_norm": 0.3263581395149231, "learning_rate": 1.6745699477058702e-05, "loss": 0.1758, "step": 36240 }, { "epoch": 0.6463988870260051, "grad_norm": 0.3114212453365326, "learning_rate": 1.6744230273204087e-05, "loss": 0.1226, "step": 36241 }, { "epoch": 0.6464167231477188, "grad_norm": 0.5191286206245422, "learning_rate": 1.6742761101351678e-05, "loss": 0.1303, "step": 36242 }, { "epoch": 0.6464345592694325, "grad_norm": 0.306167870759964, "learning_rate": 1.6741291961507187e-05, "loss": 0.1729, "step": 36243 }, { "epoch": 0.6464523953911462, "grad_norm": 0.25029274821281433, "learning_rate": 1.67398228536763e-05, "loss": 0.1257, "step": 36244 }, { "epoch": 0.6464702315128599, "grad_norm": 0.3108735680580139, "learning_rate": 1.673835377786471e-05, "loss": 0.1558, "step": 36245 }, { "epoch": 0.6464880676345736, "grad_norm": 0.3183059096336365, "learning_rate": 1.6736884734078114e-05, "loss": 0.114, "step": 36246 }, { "epoch": 0.6465059037562872, "grad_norm": 0.24241836369037628, "learning_rate": 1.6735415722322206e-05, "loss": 0.1219, "step": 36247 }, { "epoch": 0.6465237398780009, "grad_norm": 0.2833211123943329, "learning_rate": 1.6733946742602684e-05, "loss": 0.1154, "step": 36248 }, { "epoch": 0.6465415759997146, "grad_norm": 0.23954704403877258, "learning_rate": 1.6732477794925245e-05, "loss": 0.1473, "step": 36249 }, { "epoch": 0.6465594121214283, "grad_norm": 0.44087162613868713, "learning_rate": 1.6731008879295566e-05, "loss": 0.1261, "step": 36250 }, { "epoch": 0.646577248243142, "grad_norm": 0.25401169061660767, "learning_rate": 1.672953999571936e-05, "loss": 0.1271, "step": 36251 }, { "epoch": 0.6465950843648557, "grad_norm": 0.27473825216293335, "learning_rate": 1.6728071144202315e-05, "loss": 0.1104, "step": 36252 }, { "epoch": 0.6466129204865694, "grad_norm": 0.46097078919410706, "learning_rate": 1.6726602324750117e-05, "loss": 0.1473, "step": 36253 }, { "epoch": 0.6466307566082831, "grad_norm": 0.28831246495246887, "learning_rate": 1.6725133537368473e-05, "loss": 0.1507, "step": 36254 }, { "epoch": 0.6466485927299968, "grad_norm": 0.3747731149196625, "learning_rate": 1.672366478206306e-05, "loss": 0.1165, "step": 36255 }, { "epoch": 0.6466664288517104, "grad_norm": 0.2572822868824005, "learning_rate": 1.6722196058839587e-05, "loss": 0.0803, "step": 36256 }, { "epoch": 0.6466842649734241, "grad_norm": 0.2616421580314636, "learning_rate": 1.6720727367703743e-05, "loss": 0.1261, "step": 36257 }, { "epoch": 0.6467021010951379, "grad_norm": 0.23427090048789978, "learning_rate": 1.671925870866122e-05, "loss": 0.1029, "step": 36258 }, { "epoch": 0.6467199372168516, "grad_norm": 0.20507794618606567, "learning_rate": 1.67177900817177e-05, "loss": 0.1415, "step": 36259 }, { "epoch": 0.6467377733385653, "grad_norm": 0.2586061954498291, "learning_rate": 1.6716321486878894e-05, "loss": 0.1593, "step": 36260 }, { "epoch": 0.646755609460279, "grad_norm": 0.24695369601249695, "learning_rate": 1.6714852924150486e-05, "loss": 0.1734, "step": 36261 }, { "epoch": 0.6467734455819927, "grad_norm": 0.42236751317977905, "learning_rate": 1.6713384393538173e-05, "loss": 0.1379, "step": 36262 }, { "epoch": 0.6467912817037064, "grad_norm": 0.2234976887702942, "learning_rate": 1.6711915895047633e-05, "loss": 0.1261, "step": 36263 }, { "epoch": 0.64680911782542, "grad_norm": 0.28489330410957336, "learning_rate": 1.671044742868458e-05, "loss": 0.1322, "step": 36264 }, { "epoch": 0.6468269539471337, "grad_norm": 0.2886865437030792, "learning_rate": 1.67089789944547e-05, "loss": 0.1295, "step": 36265 }, { "epoch": 0.6468447900688474, "grad_norm": 0.25815892219543457, "learning_rate": 1.6707510592363672e-05, "loss": 0.1273, "step": 36266 }, { "epoch": 0.6468626261905611, "grad_norm": 0.2884007394313812, "learning_rate": 1.6706042222417202e-05, "loss": 0.1237, "step": 36267 }, { "epoch": 0.6468804623122748, "grad_norm": 0.46964511275291443, "learning_rate": 1.6704573884620968e-05, "loss": 0.1564, "step": 36268 }, { "epoch": 0.6468982984339885, "grad_norm": 0.2360130399465561, "learning_rate": 1.670310557898068e-05, "loss": 0.1149, "step": 36269 }, { "epoch": 0.6469161345557022, "grad_norm": 0.2545284628868103, "learning_rate": 1.670163730550202e-05, "loss": 0.1091, "step": 36270 }, { "epoch": 0.6469339706774159, "grad_norm": 0.2422153204679489, "learning_rate": 1.670016906419068e-05, "loss": 0.11, "step": 36271 }, { "epoch": 0.6469518067991296, "grad_norm": 0.3159201145172119, "learning_rate": 1.6698700855052343e-05, "loss": 0.1337, "step": 36272 }, { "epoch": 0.6469696429208432, "grad_norm": 0.24787463247776031, "learning_rate": 1.6697232678092718e-05, "loss": 0.0966, "step": 36273 }, { "epoch": 0.6469874790425569, "grad_norm": 0.2403479665517807, "learning_rate": 1.6695764533317482e-05, "loss": 0.0861, "step": 36274 }, { "epoch": 0.6470053151642707, "grad_norm": 0.26724961400032043, "learning_rate": 1.6694296420732337e-05, "loss": 0.1447, "step": 36275 }, { "epoch": 0.6470231512859844, "grad_norm": 0.24436023831367493, "learning_rate": 1.6692828340342955e-05, "loss": 0.1088, "step": 36276 }, { "epoch": 0.6470409874076981, "grad_norm": 0.35987207293510437, "learning_rate": 1.669136029215505e-05, "loss": 0.0616, "step": 36277 }, { "epoch": 0.6470588235294118, "grad_norm": 0.18692877888679504, "learning_rate": 1.6689892276174302e-05, "loss": 0.0947, "step": 36278 }, { "epoch": 0.6470766596511255, "grad_norm": 0.3369114100933075, "learning_rate": 1.6688424292406405e-05, "loss": 0.1396, "step": 36279 }, { "epoch": 0.6470944957728392, "grad_norm": 0.287370890378952, "learning_rate": 1.6686956340857036e-05, "loss": 0.121, "step": 36280 }, { "epoch": 0.6471123318945529, "grad_norm": 0.2611466944217682, "learning_rate": 1.66854884215319e-05, "loss": 0.0755, "step": 36281 }, { "epoch": 0.6471301680162665, "grad_norm": 0.3269018232822418, "learning_rate": 1.668402053443668e-05, "loss": 0.1188, "step": 36282 }, { "epoch": 0.6471480041379802, "grad_norm": 0.2913071811199188, "learning_rate": 1.6682552679577075e-05, "loss": 0.1184, "step": 36283 }, { "epoch": 0.6471658402596939, "grad_norm": 0.31349894404411316, "learning_rate": 1.668108485695877e-05, "loss": 0.0667, "step": 36284 }, { "epoch": 0.6471836763814076, "grad_norm": 0.31691157817840576, "learning_rate": 1.667961706658744e-05, "loss": 0.1177, "step": 36285 }, { "epoch": 0.6472015125031213, "grad_norm": 0.21011942625045776, "learning_rate": 1.66781493084688e-05, "loss": 0.0976, "step": 36286 }, { "epoch": 0.647219348624835, "grad_norm": 0.24647122621536255, "learning_rate": 1.6676681582608527e-05, "loss": 0.0921, "step": 36287 }, { "epoch": 0.6472371847465487, "grad_norm": 0.3006668984889984, "learning_rate": 1.6675213889012308e-05, "loss": 0.1108, "step": 36288 }, { "epoch": 0.6472550208682624, "grad_norm": 0.35260167717933655, "learning_rate": 1.6673746227685826e-05, "loss": 0.0982, "step": 36289 }, { "epoch": 0.647272856989976, "grad_norm": 0.24643471837043762, "learning_rate": 1.6672278598634795e-05, "loss": 0.1282, "step": 36290 }, { "epoch": 0.6472906931116897, "grad_norm": 0.24415390193462372, "learning_rate": 1.6670811001864882e-05, "loss": 0.1131, "step": 36291 }, { "epoch": 0.6473085292334035, "grad_norm": 0.2958952784538269, "learning_rate": 1.6669343437381787e-05, "loss": 0.143, "step": 36292 }, { "epoch": 0.6473263653551172, "grad_norm": 0.38828209042549133, "learning_rate": 1.6667875905191187e-05, "loss": 0.1421, "step": 36293 }, { "epoch": 0.6473442014768309, "grad_norm": 0.31823015213012695, "learning_rate": 1.666640840529878e-05, "loss": 0.1072, "step": 36294 }, { "epoch": 0.6473620375985446, "grad_norm": 0.32994458079338074, "learning_rate": 1.666494093771025e-05, "loss": 0.1531, "step": 36295 }, { "epoch": 0.6473798737202583, "grad_norm": 0.39449965953826904, "learning_rate": 1.6663473502431294e-05, "loss": 0.2115, "step": 36296 }, { "epoch": 0.647397709841972, "grad_norm": 0.2324059009552002, "learning_rate": 1.6662006099467594e-05, "loss": 0.1574, "step": 36297 }, { "epoch": 0.6474155459636857, "grad_norm": 0.3697079122066498, "learning_rate": 1.666053872882483e-05, "loss": 0.1455, "step": 36298 }, { "epoch": 0.6474333820853994, "grad_norm": 0.2839532792568207, "learning_rate": 1.6659071390508703e-05, "loss": 0.1038, "step": 36299 }, { "epoch": 0.647451218207113, "grad_norm": 0.20604731142520905, "learning_rate": 1.66576040845249e-05, "loss": 0.0891, "step": 36300 }, { "epoch": 0.6474690543288267, "grad_norm": 0.20650988817214966, "learning_rate": 1.66561368108791e-05, "loss": 0.1208, "step": 36301 }, { "epoch": 0.6474868904505404, "grad_norm": 0.19601276516914368, "learning_rate": 1.6654669569577e-05, "loss": 0.1408, "step": 36302 }, { "epoch": 0.6475047265722541, "grad_norm": 0.1895613968372345, "learning_rate": 1.6653202360624274e-05, "loss": 0.0602, "step": 36303 }, { "epoch": 0.6475225626939678, "grad_norm": 0.30013152956962585, "learning_rate": 1.665173518402663e-05, "loss": 0.1001, "step": 36304 }, { "epoch": 0.6475403988156815, "grad_norm": 0.24785785377025604, "learning_rate": 1.665026803978974e-05, "loss": 0.1042, "step": 36305 }, { "epoch": 0.6475582349373952, "grad_norm": 0.28571587800979614, "learning_rate": 1.6648800927919294e-05, "loss": 0.0973, "step": 36306 }, { "epoch": 0.6475760710591089, "grad_norm": 0.21622537076473236, "learning_rate": 1.6647333848420973e-05, "loss": 0.1153, "step": 36307 }, { "epoch": 0.6475939071808225, "grad_norm": 0.38675451278686523, "learning_rate": 1.664586680130048e-05, "loss": 0.1655, "step": 36308 }, { "epoch": 0.6476117433025363, "grad_norm": 0.31630417704582214, "learning_rate": 1.664439978656349e-05, "loss": 0.1549, "step": 36309 }, { "epoch": 0.64762957942425, "grad_norm": 0.37522152066230774, "learning_rate": 1.6642932804215693e-05, "loss": 0.1301, "step": 36310 }, { "epoch": 0.6476474155459637, "grad_norm": 0.22811684012413025, "learning_rate": 1.6641465854262767e-05, "loss": 0.1337, "step": 36311 }, { "epoch": 0.6476652516676774, "grad_norm": 0.2267579585313797, "learning_rate": 1.6639998936710417e-05, "loss": 0.1339, "step": 36312 }, { "epoch": 0.6476830877893911, "grad_norm": 0.2597489058971405, "learning_rate": 1.663853205156432e-05, "loss": 0.1177, "step": 36313 }, { "epoch": 0.6477009239111048, "grad_norm": 0.27039024233818054, "learning_rate": 1.663706519883016e-05, "loss": 0.107, "step": 36314 }, { "epoch": 0.6477187600328185, "grad_norm": 0.23155201971530914, "learning_rate": 1.663559837851361e-05, "loss": 0.1174, "step": 36315 }, { "epoch": 0.6477365961545322, "grad_norm": 0.26490023732185364, "learning_rate": 1.663413159062038e-05, "loss": 0.1378, "step": 36316 }, { "epoch": 0.6477544322762459, "grad_norm": 0.27218809723854065, "learning_rate": 1.663266483515615e-05, "loss": 0.1204, "step": 36317 }, { "epoch": 0.6477722683979595, "grad_norm": 0.2294764369726181, "learning_rate": 1.6631198112126595e-05, "loss": 0.0989, "step": 36318 }, { "epoch": 0.6477901045196732, "grad_norm": 0.21647778153419495, "learning_rate": 1.6629731421537407e-05, "loss": 0.0577, "step": 36319 }, { "epoch": 0.6478079406413869, "grad_norm": 0.3042411506175995, "learning_rate": 1.6628264763394267e-05, "loss": 0.1286, "step": 36320 }, { "epoch": 0.6478257767631006, "grad_norm": 0.20827218890190125, "learning_rate": 1.6626798137702874e-05, "loss": 0.0902, "step": 36321 }, { "epoch": 0.6478436128848143, "grad_norm": 0.310955673456192, "learning_rate": 1.6625331544468895e-05, "loss": 0.1566, "step": 36322 }, { "epoch": 0.647861449006528, "grad_norm": 0.29786819219589233, "learning_rate": 1.6623864983698027e-05, "loss": 0.1314, "step": 36323 }, { "epoch": 0.6478792851282417, "grad_norm": 0.32501375675201416, "learning_rate": 1.6622398455395948e-05, "loss": 0.1523, "step": 36324 }, { "epoch": 0.6478971212499554, "grad_norm": 0.3099982738494873, "learning_rate": 1.662093195956835e-05, "loss": 0.1397, "step": 36325 }, { "epoch": 0.6479149573716692, "grad_norm": 0.3020944893360138, "learning_rate": 1.6619465496220916e-05, "loss": 0.1175, "step": 36326 }, { "epoch": 0.6479327934933828, "grad_norm": 0.1899459809064865, "learning_rate": 1.661799906535933e-05, "loss": 0.0725, "step": 36327 }, { "epoch": 0.6479506296150965, "grad_norm": 0.2732452154159546, "learning_rate": 1.661653266698926e-05, "loss": 0.131, "step": 36328 }, { "epoch": 0.6479684657368102, "grad_norm": 0.30049529671669006, "learning_rate": 1.6615066301116418e-05, "loss": 0.1241, "step": 36329 }, { "epoch": 0.6479863018585239, "grad_norm": 0.23166239261627197, "learning_rate": 1.6613599967746472e-05, "loss": 0.131, "step": 36330 }, { "epoch": 0.6480041379802376, "grad_norm": 0.28806835412979126, "learning_rate": 1.661213366688511e-05, "loss": 0.1829, "step": 36331 }, { "epoch": 0.6480219741019513, "grad_norm": 0.31063300371170044, "learning_rate": 1.6610667398538017e-05, "loss": 0.1555, "step": 36332 }, { "epoch": 0.648039810223665, "grad_norm": 0.24637821316719055, "learning_rate": 1.6609201162710865e-05, "loss": 0.0964, "step": 36333 }, { "epoch": 0.6480576463453787, "grad_norm": 0.2434396594762802, "learning_rate": 1.6607734959409357e-05, "loss": 0.1249, "step": 36334 }, { "epoch": 0.6480754824670923, "grad_norm": 0.2530849874019623, "learning_rate": 1.6606268788639166e-05, "loss": 0.1377, "step": 36335 }, { "epoch": 0.648093318588806, "grad_norm": 0.27908849716186523, "learning_rate": 1.6604802650405974e-05, "loss": 0.0817, "step": 36336 }, { "epoch": 0.6481111547105197, "grad_norm": 0.23288792371749878, "learning_rate": 1.6603336544715463e-05, "loss": 0.1121, "step": 36337 }, { "epoch": 0.6481289908322334, "grad_norm": 0.3089233636856079, "learning_rate": 1.660187047157332e-05, "loss": 0.1723, "step": 36338 }, { "epoch": 0.6481468269539471, "grad_norm": 0.2522965967655182, "learning_rate": 1.6600404430985236e-05, "loss": 0.1109, "step": 36339 }, { "epoch": 0.6481646630756608, "grad_norm": 0.279598593711853, "learning_rate": 1.6598938422956885e-05, "loss": 0.1199, "step": 36340 }, { "epoch": 0.6481824991973745, "grad_norm": 0.252302348613739, "learning_rate": 1.659747244749394e-05, "loss": 0.1184, "step": 36341 }, { "epoch": 0.6482003353190883, "grad_norm": 0.24944572150707245, "learning_rate": 1.6596006504602102e-05, "loss": 0.0956, "step": 36342 }, { "epoch": 0.648218171440802, "grad_norm": 0.28131645917892456, "learning_rate": 1.6594540594287038e-05, "loss": 0.1104, "step": 36343 }, { "epoch": 0.6482360075625156, "grad_norm": 0.1979651302099228, "learning_rate": 1.6593074716554448e-05, "loss": 0.1144, "step": 36344 }, { "epoch": 0.6482538436842293, "grad_norm": 0.35066723823547363, "learning_rate": 1.6591608871410004e-05, "loss": 0.1359, "step": 36345 }, { "epoch": 0.648271679805943, "grad_norm": 0.2735930383205414, "learning_rate": 1.6590143058859374e-05, "loss": 0.1025, "step": 36346 }, { "epoch": 0.6482895159276567, "grad_norm": 0.25720998644828796, "learning_rate": 1.6588677278908264e-05, "loss": 0.1246, "step": 36347 }, { "epoch": 0.6483073520493704, "grad_norm": 0.24710716307163239, "learning_rate": 1.6587211531562353e-05, "loss": 0.1518, "step": 36348 }, { "epoch": 0.6483251881710841, "grad_norm": 0.2664247751235962, "learning_rate": 1.6585745816827304e-05, "loss": 0.1272, "step": 36349 }, { "epoch": 0.6483430242927978, "grad_norm": 0.20622743666172028, "learning_rate": 1.658428013470881e-05, "loss": 0.0709, "step": 36350 }, { "epoch": 0.6483608604145115, "grad_norm": 0.24626660346984863, "learning_rate": 1.6582814485212563e-05, "loss": 0.1329, "step": 36351 }, { "epoch": 0.6483786965362252, "grad_norm": 0.2569271922111511, "learning_rate": 1.6581348868344226e-05, "loss": 0.1372, "step": 36352 }, { "epoch": 0.6483965326579388, "grad_norm": 0.37782010436058044, "learning_rate": 1.6579883284109498e-05, "loss": 0.1116, "step": 36353 }, { "epoch": 0.6484143687796525, "grad_norm": 0.2427438497543335, "learning_rate": 1.6578417732514035e-05, "loss": 0.1578, "step": 36354 }, { "epoch": 0.6484322049013662, "grad_norm": 0.3771073520183563, "learning_rate": 1.6576952213563545e-05, "loss": 0.1861, "step": 36355 }, { "epoch": 0.6484500410230799, "grad_norm": 0.22933746874332428, "learning_rate": 1.65754867272637e-05, "loss": 0.1156, "step": 36356 }, { "epoch": 0.6484678771447936, "grad_norm": 0.22353418171405792, "learning_rate": 1.6574021273620172e-05, "loss": 0.1076, "step": 36357 }, { "epoch": 0.6484857132665073, "grad_norm": 0.23841258883476257, "learning_rate": 1.657255585263865e-05, "loss": 0.1272, "step": 36358 }, { "epoch": 0.6485035493882211, "grad_norm": 0.23630429804325104, "learning_rate": 1.6571090464324804e-05, "loss": 0.0882, "step": 36359 }, { "epoch": 0.6485213855099348, "grad_norm": 0.4377550482749939, "learning_rate": 1.6569625108684332e-05, "loss": 0.1623, "step": 36360 }, { "epoch": 0.6485392216316485, "grad_norm": 0.25166547298431396, "learning_rate": 1.656815978572291e-05, "loss": 0.1292, "step": 36361 }, { "epoch": 0.6485570577533621, "grad_norm": 0.2722752094268799, "learning_rate": 1.6566694495446205e-05, "loss": 0.1115, "step": 36362 }, { "epoch": 0.6485748938750758, "grad_norm": 0.24089182913303375, "learning_rate": 1.6565229237859897e-05, "loss": 0.1222, "step": 36363 }, { "epoch": 0.6485927299967895, "grad_norm": 0.24389807879924774, "learning_rate": 1.6563764012969688e-05, "loss": 0.1484, "step": 36364 }, { "epoch": 0.6486105661185032, "grad_norm": 0.3104889690876007, "learning_rate": 1.6562298820781235e-05, "loss": 0.1021, "step": 36365 }, { "epoch": 0.6486284022402169, "grad_norm": 0.3075515925884247, "learning_rate": 1.656083366130023e-05, "loss": 0.118, "step": 36366 }, { "epoch": 0.6486462383619306, "grad_norm": 0.3432249128818512, "learning_rate": 1.6559368534532344e-05, "loss": 0.2021, "step": 36367 }, { "epoch": 0.6486640744836443, "grad_norm": 0.29838305711746216, "learning_rate": 1.6557903440483266e-05, "loss": 0.1279, "step": 36368 }, { "epoch": 0.648681910605358, "grad_norm": 0.23626558482646942, "learning_rate": 1.6556438379158673e-05, "loss": 0.1308, "step": 36369 }, { "epoch": 0.6486997467270716, "grad_norm": 0.20485560595989227, "learning_rate": 1.6554973350564232e-05, "loss": 0.0889, "step": 36370 }, { "epoch": 0.6487175828487853, "grad_norm": 0.2429179847240448, "learning_rate": 1.6553508354705636e-05, "loss": 0.1302, "step": 36371 }, { "epoch": 0.648735418970499, "grad_norm": 0.320940226316452, "learning_rate": 1.6552043391588554e-05, "loss": 0.1045, "step": 36372 }, { "epoch": 0.6487532550922127, "grad_norm": 0.2593878209590912, "learning_rate": 1.6550578461218676e-05, "loss": 0.1133, "step": 36373 }, { "epoch": 0.6487710912139264, "grad_norm": 0.20208634436130524, "learning_rate": 1.6549113563601675e-05, "loss": 0.0769, "step": 36374 }, { "epoch": 0.6487889273356401, "grad_norm": 0.25661903619766235, "learning_rate": 1.6547648698743233e-05, "loss": 0.1344, "step": 36375 }, { "epoch": 0.6488067634573539, "grad_norm": 0.2480912059545517, "learning_rate": 1.6546183866649005e-05, "loss": 0.1334, "step": 36376 }, { "epoch": 0.6488245995790676, "grad_norm": 0.22495205700397491, "learning_rate": 1.6544719067324706e-05, "loss": 0.106, "step": 36377 }, { "epoch": 0.6488424357007813, "grad_norm": 0.29496297240257263, "learning_rate": 1.654325430077599e-05, "loss": 0.1541, "step": 36378 }, { "epoch": 0.648860271822495, "grad_norm": 0.3045872747898102, "learning_rate": 1.6541789567008542e-05, "loss": 0.174, "step": 36379 }, { "epoch": 0.6488781079442086, "grad_norm": 0.20767450332641602, "learning_rate": 1.6540324866028034e-05, "loss": 0.0715, "step": 36380 }, { "epoch": 0.6488959440659223, "grad_norm": 0.308723509311676, "learning_rate": 1.6538860197840156e-05, "loss": 0.1058, "step": 36381 }, { "epoch": 0.648913780187636, "grad_norm": 0.2800751030445099, "learning_rate": 1.6537395562450577e-05, "loss": 0.1242, "step": 36382 }, { "epoch": 0.6489316163093497, "grad_norm": 0.2784283757209778, "learning_rate": 1.6535930959864977e-05, "loss": 0.2043, "step": 36383 }, { "epoch": 0.6489494524310634, "grad_norm": 0.23597945272922516, "learning_rate": 1.653446639008903e-05, "loss": 0.1005, "step": 36384 }, { "epoch": 0.6489672885527771, "grad_norm": 0.2835776209831238, "learning_rate": 1.653300185312841e-05, "loss": 0.1554, "step": 36385 }, { "epoch": 0.6489851246744908, "grad_norm": 0.2600294053554535, "learning_rate": 1.6531537348988803e-05, "loss": 0.1418, "step": 36386 }, { "epoch": 0.6490029607962045, "grad_norm": 0.28589382767677307, "learning_rate": 1.6530072877675885e-05, "loss": 0.1042, "step": 36387 }, { "epoch": 0.6490207969179181, "grad_norm": 0.2674868702888489, "learning_rate": 1.652860843919533e-05, "loss": 0.1154, "step": 36388 }, { "epoch": 0.6490386330396318, "grad_norm": 0.3101354241371155, "learning_rate": 1.6527144033552805e-05, "loss": 0.1401, "step": 36389 }, { "epoch": 0.6490564691613455, "grad_norm": 0.2235003560781479, "learning_rate": 1.6525679660754006e-05, "loss": 0.0861, "step": 36390 }, { "epoch": 0.6490743052830592, "grad_norm": 0.27728989720344543, "learning_rate": 1.6524215320804602e-05, "loss": 0.1419, "step": 36391 }, { "epoch": 0.6490921414047729, "grad_norm": 0.20885300636291504, "learning_rate": 1.652275101371026e-05, "loss": 0.1097, "step": 36392 }, { "epoch": 0.6491099775264867, "grad_norm": 0.16699521243572235, "learning_rate": 1.652128673947666e-05, "loss": 0.0667, "step": 36393 }, { "epoch": 0.6491278136482004, "grad_norm": 0.25783777236938477, "learning_rate": 1.651982249810949e-05, "loss": 0.1369, "step": 36394 }, { "epoch": 0.6491456497699141, "grad_norm": 0.31035658717155457, "learning_rate": 1.6518358289614415e-05, "loss": 0.1015, "step": 36395 }, { "epoch": 0.6491634858916278, "grad_norm": 0.21649883687496185, "learning_rate": 1.651689411399711e-05, "loss": 0.1099, "step": 36396 }, { "epoch": 0.6491813220133414, "grad_norm": 0.4408283233642578, "learning_rate": 1.651542997126326e-05, "loss": 0.1819, "step": 36397 }, { "epoch": 0.6491991581350551, "grad_norm": 0.22239850461483002, "learning_rate": 1.651396586141852e-05, "loss": 0.0948, "step": 36398 }, { "epoch": 0.6492169942567688, "grad_norm": 0.2303265631198883, "learning_rate": 1.6512501784468588e-05, "loss": 0.1221, "step": 36399 }, { "epoch": 0.6492348303784825, "grad_norm": 0.3153400123119354, "learning_rate": 1.651103774041913e-05, "loss": 0.1081, "step": 36400 }, { "epoch": 0.6492526665001962, "grad_norm": 0.25307878851890564, "learning_rate": 1.6509573729275823e-05, "loss": 0.1101, "step": 36401 }, { "epoch": 0.6492705026219099, "grad_norm": 0.23737239837646484, "learning_rate": 1.6508109751044334e-05, "loss": 0.1311, "step": 36402 }, { "epoch": 0.6492883387436236, "grad_norm": 0.3012915551662445, "learning_rate": 1.6506645805730352e-05, "loss": 0.1411, "step": 36403 }, { "epoch": 0.6493061748653373, "grad_norm": 0.21931597590446472, "learning_rate": 1.6505181893339548e-05, "loss": 0.1417, "step": 36404 }, { "epoch": 0.649324010987051, "grad_norm": 0.27210038900375366, "learning_rate": 1.6503718013877583e-05, "loss": 0.0862, "step": 36405 }, { "epoch": 0.6493418471087646, "grad_norm": 0.23444069921970367, "learning_rate": 1.6502254167350146e-05, "loss": 0.1135, "step": 36406 }, { "epoch": 0.6493596832304783, "grad_norm": 0.23188433051109314, "learning_rate": 1.6500790353762903e-05, "loss": 0.1388, "step": 36407 }, { "epoch": 0.649377519352192, "grad_norm": 0.23815785348415375, "learning_rate": 1.6499326573121538e-05, "loss": 0.1237, "step": 36408 }, { "epoch": 0.6493953554739057, "grad_norm": 0.24979661405086517, "learning_rate": 1.649786282543172e-05, "loss": 0.1382, "step": 36409 }, { "epoch": 0.6494131915956195, "grad_norm": 0.21794262528419495, "learning_rate": 1.6496399110699123e-05, "loss": 0.1123, "step": 36410 }, { "epoch": 0.6494310277173332, "grad_norm": 0.31610429286956787, "learning_rate": 1.649493542892941e-05, "loss": 0.0961, "step": 36411 }, { "epoch": 0.6494488638390469, "grad_norm": 0.2783012092113495, "learning_rate": 1.6493471780128272e-05, "loss": 0.0985, "step": 36412 }, { "epoch": 0.6494666999607606, "grad_norm": 0.35128551721572876, "learning_rate": 1.6492008164301375e-05, "loss": 0.1644, "step": 36413 }, { "epoch": 0.6494845360824743, "grad_norm": 0.2892989218235016, "learning_rate": 1.6490544581454394e-05, "loss": 0.122, "step": 36414 }, { "epoch": 0.6495023722041879, "grad_norm": 0.32591888308525085, "learning_rate": 1.6489081031592996e-05, "loss": 0.1734, "step": 36415 }, { "epoch": 0.6495202083259016, "grad_norm": 0.2848375737667084, "learning_rate": 1.6487617514722865e-05, "loss": 0.1781, "step": 36416 }, { "epoch": 0.6495380444476153, "grad_norm": 0.3675004541873932, "learning_rate": 1.648615403084967e-05, "loss": 0.0993, "step": 36417 }, { "epoch": 0.649555880569329, "grad_norm": 0.2759046256542206, "learning_rate": 1.6484690579979083e-05, "loss": 0.0879, "step": 36418 }, { "epoch": 0.6495737166910427, "grad_norm": 0.23948530852794647, "learning_rate": 1.6483227162116765e-05, "loss": 0.1219, "step": 36419 }, { "epoch": 0.6495915528127564, "grad_norm": 0.240462526679039, "learning_rate": 1.6481763777268404e-05, "loss": 0.1548, "step": 36420 }, { "epoch": 0.6496093889344701, "grad_norm": 0.255749374628067, "learning_rate": 1.6480300425439678e-05, "loss": 0.1049, "step": 36421 }, { "epoch": 0.6496272250561838, "grad_norm": 0.30505040287971497, "learning_rate": 1.6478837106636247e-05, "loss": 0.1195, "step": 36422 }, { "epoch": 0.6496450611778974, "grad_norm": 0.2784702777862549, "learning_rate": 1.647737382086379e-05, "loss": 0.0709, "step": 36423 }, { "epoch": 0.6496628972996111, "grad_norm": 0.23502427339553833, "learning_rate": 1.6475910568127962e-05, "loss": 0.1001, "step": 36424 }, { "epoch": 0.6496807334213248, "grad_norm": 0.341061532497406, "learning_rate": 1.6474447348434458e-05, "loss": 0.1271, "step": 36425 }, { "epoch": 0.6496985695430385, "grad_norm": 0.29591915011405945, "learning_rate": 1.647298416178894e-05, "loss": 0.1814, "step": 36426 }, { "epoch": 0.6497164056647523, "grad_norm": 0.26693111658096313, "learning_rate": 1.6471521008197084e-05, "loss": 0.11, "step": 36427 }, { "epoch": 0.649734241786466, "grad_norm": 0.24729366600513458, "learning_rate": 1.6470057887664544e-05, "loss": 0.1333, "step": 36428 }, { "epoch": 0.6497520779081797, "grad_norm": 0.4628967046737671, "learning_rate": 1.6468594800197016e-05, "loss": 0.1053, "step": 36429 }, { "epoch": 0.6497699140298934, "grad_norm": 0.22024314105510712, "learning_rate": 1.646713174580017e-05, "loss": 0.1091, "step": 36430 }, { "epoch": 0.6497877501516071, "grad_norm": 0.223907470703125, "learning_rate": 1.646566872447966e-05, "loss": 0.0935, "step": 36431 }, { "epoch": 0.6498055862733207, "grad_norm": 0.2727627456188202, "learning_rate": 1.6464205736241157e-05, "loss": 0.128, "step": 36432 }, { "epoch": 0.6498234223950344, "grad_norm": 0.3021184504032135, "learning_rate": 1.6462742781090357e-05, "loss": 0.1222, "step": 36433 }, { "epoch": 0.6498412585167481, "grad_norm": 0.29952171444892883, "learning_rate": 1.64612798590329e-05, "loss": 0.1432, "step": 36434 }, { "epoch": 0.6498590946384618, "grad_norm": 0.24076001346111298, "learning_rate": 1.6459816970074483e-05, "loss": 0.0952, "step": 36435 }, { "epoch": 0.6498769307601755, "grad_norm": 0.2131306529045105, "learning_rate": 1.6458354114220766e-05, "loss": 0.1173, "step": 36436 }, { "epoch": 0.6498947668818892, "grad_norm": 0.2728300094604492, "learning_rate": 1.6456891291477407e-05, "loss": 0.1367, "step": 36437 }, { "epoch": 0.6499126030036029, "grad_norm": 0.1958208680152893, "learning_rate": 1.6455428501850096e-05, "loss": 0.1282, "step": 36438 }, { "epoch": 0.6499304391253166, "grad_norm": 0.343657523393631, "learning_rate": 1.6453965745344497e-05, "loss": 0.1647, "step": 36439 }, { "epoch": 0.6499482752470302, "grad_norm": 0.2814064919948578, "learning_rate": 1.6452503021966272e-05, "loss": 0.1054, "step": 36440 }, { "epoch": 0.6499661113687439, "grad_norm": 0.2679210305213928, "learning_rate": 1.6451040331721096e-05, "loss": 0.101, "step": 36441 }, { "epoch": 0.6499839474904576, "grad_norm": 0.23407524824142456, "learning_rate": 1.6449577674614644e-05, "loss": 0.0737, "step": 36442 }, { "epoch": 0.6500017836121714, "grad_norm": 0.22013986110687256, "learning_rate": 1.6448115050652584e-05, "loss": 0.1094, "step": 36443 }, { "epoch": 0.6500196197338851, "grad_norm": 0.2233494222164154, "learning_rate": 1.644665245984059e-05, "loss": 0.12, "step": 36444 }, { "epoch": 0.6500374558555988, "grad_norm": 0.29774734377861023, "learning_rate": 1.644518990218431e-05, "loss": 0.135, "step": 36445 }, { "epoch": 0.6500552919773125, "grad_norm": 0.2491357922554016, "learning_rate": 1.6443727377689437e-05, "loss": 0.1274, "step": 36446 }, { "epoch": 0.6500731280990262, "grad_norm": 0.26864978671073914, "learning_rate": 1.644226488636163e-05, "loss": 0.1243, "step": 36447 }, { "epoch": 0.6500909642207399, "grad_norm": 0.3040514588356018, "learning_rate": 1.6440802428206565e-05, "loss": 0.1074, "step": 36448 }, { "epoch": 0.6501088003424536, "grad_norm": 0.301169753074646, "learning_rate": 1.643934000322991e-05, "loss": 0.0944, "step": 36449 }, { "epoch": 0.6501266364641672, "grad_norm": 0.37722280621528625, "learning_rate": 1.6437877611437314e-05, "loss": 0.2293, "step": 36450 }, { "epoch": 0.6501444725858809, "grad_norm": 0.26749464869499207, "learning_rate": 1.643641525283447e-05, "loss": 0.1476, "step": 36451 }, { "epoch": 0.6501623087075946, "grad_norm": 0.25366055965423584, "learning_rate": 1.6434952927427043e-05, "loss": 0.1162, "step": 36452 }, { "epoch": 0.6501801448293083, "grad_norm": 0.2756160497665405, "learning_rate": 1.6433490635220687e-05, "loss": 0.0932, "step": 36453 }, { "epoch": 0.650197980951022, "grad_norm": 0.23393933475017548, "learning_rate": 1.6432028376221086e-05, "loss": 0.1422, "step": 36454 }, { "epoch": 0.6502158170727357, "grad_norm": 0.3221278488636017, "learning_rate": 1.64305661504339e-05, "loss": 0.1636, "step": 36455 }, { "epoch": 0.6502336531944494, "grad_norm": 0.3012202978134155, "learning_rate": 1.6429103957864806e-05, "loss": 0.1538, "step": 36456 }, { "epoch": 0.6502514893161631, "grad_norm": 0.2315862476825714, "learning_rate": 1.642764179851946e-05, "loss": 0.1317, "step": 36457 }, { "epoch": 0.6502693254378767, "grad_norm": 0.409334272146225, "learning_rate": 1.642617967240353e-05, "loss": 0.1573, "step": 36458 }, { "epoch": 0.6502871615595904, "grad_norm": 0.31774845719337463, "learning_rate": 1.6424717579522696e-05, "loss": 0.1466, "step": 36459 }, { "epoch": 0.6503049976813042, "grad_norm": 0.2690187990665436, "learning_rate": 1.6423255519882622e-05, "loss": 0.1109, "step": 36460 }, { "epoch": 0.6503228338030179, "grad_norm": 0.30237069725990295, "learning_rate": 1.6421793493488964e-05, "loss": 0.1759, "step": 36461 }, { "epoch": 0.6503406699247316, "grad_norm": 0.2925209403038025, "learning_rate": 1.64203315003474e-05, "loss": 0.1144, "step": 36462 }, { "epoch": 0.6503585060464453, "grad_norm": 0.2588273882865906, "learning_rate": 1.641886954046359e-05, "loss": 0.1746, "step": 36463 }, { "epoch": 0.650376342168159, "grad_norm": 0.30670827627182007, "learning_rate": 1.6417407613843214e-05, "loss": 0.1273, "step": 36464 }, { "epoch": 0.6503941782898727, "grad_norm": 0.29957830905914307, "learning_rate": 1.641594572049193e-05, "loss": 0.1078, "step": 36465 }, { "epoch": 0.6504120144115864, "grad_norm": 0.22714495658874512, "learning_rate": 1.6414483860415404e-05, "loss": 0.1129, "step": 36466 }, { "epoch": 0.6504298505333, "grad_norm": 0.26168763637542725, "learning_rate": 1.6413022033619294e-05, "loss": 0.1372, "step": 36467 }, { "epoch": 0.6504476866550137, "grad_norm": 0.2847360670566559, "learning_rate": 1.6411560240109285e-05, "loss": 0.0949, "step": 36468 }, { "epoch": 0.6504655227767274, "grad_norm": 0.2242434173822403, "learning_rate": 1.6410098479891035e-05, "loss": 0.1013, "step": 36469 }, { "epoch": 0.6504833588984411, "grad_norm": 0.21255548298358917, "learning_rate": 1.6408636752970212e-05, "loss": 0.1244, "step": 36470 }, { "epoch": 0.6505011950201548, "grad_norm": 0.27606844902038574, "learning_rate": 1.640717505935247e-05, "loss": 0.1584, "step": 36471 }, { "epoch": 0.6505190311418685, "grad_norm": 0.21023155748844147, "learning_rate": 1.6405713399043493e-05, "loss": 0.1066, "step": 36472 }, { "epoch": 0.6505368672635822, "grad_norm": 0.3528434932231903, "learning_rate": 1.6404251772048946e-05, "loss": 0.1644, "step": 36473 }, { "epoch": 0.6505547033852959, "grad_norm": 0.27668148279190063, "learning_rate": 1.6402790178374484e-05, "loss": 0.1425, "step": 36474 }, { "epoch": 0.6505725395070096, "grad_norm": 0.26946985721588135, "learning_rate": 1.6401328618025767e-05, "loss": 0.0557, "step": 36475 }, { "epoch": 0.6505903756287232, "grad_norm": 0.2576809227466583, "learning_rate": 1.639986709100847e-05, "loss": 0.0878, "step": 36476 }, { "epoch": 0.650608211750437, "grad_norm": 0.285159170627594, "learning_rate": 1.639840559732827e-05, "loss": 0.0972, "step": 36477 }, { "epoch": 0.6506260478721507, "grad_norm": 0.25818583369255066, "learning_rate": 1.6396944136990817e-05, "loss": 0.1072, "step": 36478 }, { "epoch": 0.6506438839938644, "grad_norm": 0.24948260188102722, "learning_rate": 1.6395482710001787e-05, "loss": 0.1252, "step": 36479 }, { "epoch": 0.6506617201155781, "grad_norm": 0.2777738869190216, "learning_rate": 1.639402131636682e-05, "loss": 0.115, "step": 36480 }, { "epoch": 0.6506795562372918, "grad_norm": 0.288896769285202, "learning_rate": 1.6392559956091615e-05, "loss": 0.1548, "step": 36481 }, { "epoch": 0.6506973923590055, "grad_norm": 0.23145286738872528, "learning_rate": 1.639109862918181e-05, "loss": 0.1228, "step": 36482 }, { "epoch": 0.6507152284807192, "grad_norm": 0.2289765328168869, "learning_rate": 1.6389637335643088e-05, "loss": 0.143, "step": 36483 }, { "epoch": 0.6507330646024329, "grad_norm": 0.31107097864151, "learning_rate": 1.6388176075481092e-05, "loss": 0.1567, "step": 36484 }, { "epoch": 0.6507509007241465, "grad_norm": 0.29385697841644287, "learning_rate": 1.6386714848701513e-05, "loss": 0.1277, "step": 36485 }, { "epoch": 0.6507687368458602, "grad_norm": 0.32878878712654114, "learning_rate": 1.6385253655310003e-05, "loss": 0.1413, "step": 36486 }, { "epoch": 0.6507865729675739, "grad_norm": 0.30792805552482605, "learning_rate": 1.638379249531222e-05, "loss": 0.1025, "step": 36487 }, { "epoch": 0.6508044090892876, "grad_norm": 0.3447185754776001, "learning_rate": 1.6382331368713834e-05, "loss": 0.103, "step": 36488 }, { "epoch": 0.6508222452110013, "grad_norm": 0.26704999804496765, "learning_rate": 1.638087027552051e-05, "loss": 0.1207, "step": 36489 }, { "epoch": 0.650840081332715, "grad_norm": 0.23830831050872803, "learning_rate": 1.6379409215737905e-05, "loss": 0.0776, "step": 36490 }, { "epoch": 0.6508579174544287, "grad_norm": 0.22711147367954254, "learning_rate": 1.6377948189371694e-05, "loss": 0.1273, "step": 36491 }, { "epoch": 0.6508757535761424, "grad_norm": 0.2503523528575897, "learning_rate": 1.6376487196427533e-05, "loss": 0.0811, "step": 36492 }, { "epoch": 0.650893589697856, "grad_norm": 0.2805105745792389, "learning_rate": 1.637502623691108e-05, "loss": 0.0857, "step": 36493 }, { "epoch": 0.6509114258195698, "grad_norm": 0.296419620513916, "learning_rate": 1.637356531082801e-05, "loss": 0.1356, "step": 36494 }, { "epoch": 0.6509292619412835, "grad_norm": 0.376533180475235, "learning_rate": 1.637210441818398e-05, "loss": 0.1345, "step": 36495 }, { "epoch": 0.6509470980629972, "grad_norm": 0.3167206048965454, "learning_rate": 1.6370643558984648e-05, "loss": 0.1859, "step": 36496 }, { "epoch": 0.6509649341847109, "grad_norm": 0.24843305349349976, "learning_rate": 1.6369182733235677e-05, "loss": 0.126, "step": 36497 }, { "epoch": 0.6509827703064246, "grad_norm": 0.24652425944805145, "learning_rate": 1.6367721940942748e-05, "loss": 0.1316, "step": 36498 }, { "epoch": 0.6510006064281383, "grad_norm": 0.3290373980998993, "learning_rate": 1.6366261182111507e-05, "loss": 0.1064, "step": 36499 }, { "epoch": 0.651018442549852, "grad_norm": 0.3015643060207367, "learning_rate": 1.6364800456747622e-05, "loss": 0.1616, "step": 36500 }, { "epoch": 0.6510362786715657, "grad_norm": 0.32982712984085083, "learning_rate": 1.6363339764856745e-05, "loss": 0.201, "step": 36501 }, { "epoch": 0.6510541147932793, "grad_norm": 0.26367032527923584, "learning_rate": 1.636187910644455e-05, "loss": 0.1098, "step": 36502 }, { "epoch": 0.651071950914993, "grad_norm": 0.1986067146062851, "learning_rate": 1.636041848151669e-05, "loss": 0.0999, "step": 36503 }, { "epoch": 0.6510897870367067, "grad_norm": 0.28099775314331055, "learning_rate": 1.6358957890078846e-05, "loss": 0.0872, "step": 36504 }, { "epoch": 0.6511076231584204, "grad_norm": 0.2609463036060333, "learning_rate": 1.635749733213666e-05, "loss": 0.1042, "step": 36505 }, { "epoch": 0.6511254592801341, "grad_norm": 0.3888946771621704, "learning_rate": 1.6356036807695788e-05, "loss": 0.1265, "step": 36506 }, { "epoch": 0.6511432954018478, "grad_norm": 0.4863142967224121, "learning_rate": 1.6354576316761915e-05, "loss": 0.1389, "step": 36507 }, { "epoch": 0.6511611315235615, "grad_norm": 0.23898905515670776, "learning_rate": 1.635311585934069e-05, "loss": 0.1282, "step": 36508 }, { "epoch": 0.6511789676452752, "grad_norm": 0.2688154876232147, "learning_rate": 1.635165543543777e-05, "loss": 0.127, "step": 36509 }, { "epoch": 0.6511968037669889, "grad_norm": 0.3582136332988739, "learning_rate": 1.6350195045058817e-05, "loss": 0.1177, "step": 36510 }, { "epoch": 0.6512146398887027, "grad_norm": 0.30040034651756287, "learning_rate": 1.63487346882095e-05, "loss": 0.1222, "step": 36511 }, { "epoch": 0.6512324760104163, "grad_norm": 0.27914533019065857, "learning_rate": 1.6347274364895476e-05, "loss": 0.1124, "step": 36512 }, { "epoch": 0.65125031213213, "grad_norm": 0.3377338647842407, "learning_rate": 1.6345814075122407e-05, "loss": 0.1273, "step": 36513 }, { "epoch": 0.6512681482538437, "grad_norm": 0.27192583680152893, "learning_rate": 1.634435381889594e-05, "loss": 0.1565, "step": 36514 }, { "epoch": 0.6512859843755574, "grad_norm": 0.30426520109176636, "learning_rate": 1.634289359622176e-05, "loss": 0.1439, "step": 36515 }, { "epoch": 0.6513038204972711, "grad_norm": 0.24232463538646698, "learning_rate": 1.6341433407105518e-05, "loss": 0.1534, "step": 36516 }, { "epoch": 0.6513216566189848, "grad_norm": 0.26685845851898193, "learning_rate": 1.633997325155286e-05, "loss": 0.1123, "step": 36517 }, { "epoch": 0.6513394927406985, "grad_norm": 0.21063801646232605, "learning_rate": 1.6338513129569463e-05, "loss": 0.1038, "step": 36518 }, { "epoch": 0.6513573288624122, "grad_norm": 0.3503827154636383, "learning_rate": 1.633705304116097e-05, "loss": 0.1289, "step": 36519 }, { "epoch": 0.6513751649841258, "grad_norm": 0.2744157016277313, "learning_rate": 1.6335592986333063e-05, "loss": 0.148, "step": 36520 }, { "epoch": 0.6513930011058395, "grad_norm": 0.2273973673582077, "learning_rate": 1.633413296509139e-05, "loss": 0.1122, "step": 36521 }, { "epoch": 0.6514108372275532, "grad_norm": 0.32366955280303955, "learning_rate": 1.6332672977441608e-05, "loss": 0.1019, "step": 36522 }, { "epoch": 0.6514286733492669, "grad_norm": 0.4621117413043976, "learning_rate": 1.6331213023389374e-05, "loss": 0.1288, "step": 36523 }, { "epoch": 0.6514465094709806, "grad_norm": 0.2193380892276764, "learning_rate": 1.6329753102940353e-05, "loss": 0.1161, "step": 36524 }, { "epoch": 0.6514643455926943, "grad_norm": 0.3388438820838928, "learning_rate": 1.6328293216100214e-05, "loss": 0.1447, "step": 36525 }, { "epoch": 0.651482181714408, "grad_norm": 0.24322980642318726, "learning_rate": 1.63268333628746e-05, "loss": 0.1169, "step": 36526 }, { "epoch": 0.6515000178361217, "grad_norm": 0.3384764790534973, "learning_rate": 1.632537354326917e-05, "loss": 0.1764, "step": 36527 }, { "epoch": 0.6515178539578355, "grad_norm": 0.3127395212650299, "learning_rate": 1.6323913757289593e-05, "loss": 0.118, "step": 36528 }, { "epoch": 0.6515356900795491, "grad_norm": 0.22918832302093506, "learning_rate": 1.632245400494153e-05, "loss": 0.0829, "step": 36529 }, { "epoch": 0.6515535262012628, "grad_norm": 0.19899854063987732, "learning_rate": 1.632099428623062e-05, "loss": 0.0791, "step": 36530 }, { "epoch": 0.6515713623229765, "grad_norm": 0.2879485785961151, "learning_rate": 1.6319534601162544e-05, "loss": 0.1024, "step": 36531 }, { "epoch": 0.6515891984446902, "grad_norm": 0.22283729910850525, "learning_rate": 1.6318074949742934e-05, "loss": 0.0893, "step": 36532 }, { "epoch": 0.6516070345664039, "grad_norm": 0.24246029555797577, "learning_rate": 1.631661533197748e-05, "loss": 0.1398, "step": 36533 }, { "epoch": 0.6516248706881176, "grad_norm": 0.35749685764312744, "learning_rate": 1.6315155747871823e-05, "loss": 0.1366, "step": 36534 }, { "epoch": 0.6516427068098313, "grad_norm": 0.3563341796398163, "learning_rate": 1.631369619743162e-05, "loss": 0.1227, "step": 36535 }, { "epoch": 0.651660542931545, "grad_norm": 0.208069309592247, "learning_rate": 1.6312236680662525e-05, "loss": 0.1073, "step": 36536 }, { "epoch": 0.6516783790532586, "grad_norm": 0.25359785556793213, "learning_rate": 1.6310777197570205e-05, "loss": 0.1136, "step": 36537 }, { "epoch": 0.6516962151749723, "grad_norm": 0.273954838514328, "learning_rate": 1.6309317748160312e-05, "loss": 0.1347, "step": 36538 }, { "epoch": 0.651714051296686, "grad_norm": 0.3235228955745697, "learning_rate": 1.6307858332438513e-05, "loss": 0.1697, "step": 36539 }, { "epoch": 0.6517318874183997, "grad_norm": 0.22341467440128326, "learning_rate": 1.6306398950410443e-05, "loss": 0.0636, "step": 36540 }, { "epoch": 0.6517497235401134, "grad_norm": 0.24738852679729462, "learning_rate": 1.6304939602081783e-05, "loss": 0.1257, "step": 36541 }, { "epoch": 0.6517675596618271, "grad_norm": 0.6136704683303833, "learning_rate": 1.6303480287458184e-05, "loss": 0.1652, "step": 36542 }, { "epoch": 0.6517853957835408, "grad_norm": 0.2681564390659332, "learning_rate": 1.6302021006545296e-05, "loss": 0.1095, "step": 36543 }, { "epoch": 0.6518032319052546, "grad_norm": 0.30172199010849, "learning_rate": 1.6300561759348775e-05, "loss": 0.1739, "step": 36544 }, { "epoch": 0.6518210680269683, "grad_norm": 0.22530655562877655, "learning_rate": 1.629910254587428e-05, "loss": 0.1269, "step": 36545 }, { "epoch": 0.651838904148682, "grad_norm": 0.18717870116233826, "learning_rate": 1.629764336612747e-05, "loss": 0.0847, "step": 36546 }, { "epoch": 0.6518567402703956, "grad_norm": 0.3527531623840332, "learning_rate": 1.6296184220114005e-05, "loss": 0.1493, "step": 36547 }, { "epoch": 0.6518745763921093, "grad_norm": 0.26330482959747314, "learning_rate": 1.6294725107839536e-05, "loss": 0.119, "step": 36548 }, { "epoch": 0.651892412513823, "grad_norm": 0.26929330825805664, "learning_rate": 1.6293266029309707e-05, "loss": 0.0991, "step": 36549 }, { "epoch": 0.6519102486355367, "grad_norm": 0.28058624267578125, "learning_rate": 1.62918069845302e-05, "loss": 0.1234, "step": 36550 }, { "epoch": 0.6519280847572504, "grad_norm": 0.2653823792934418, "learning_rate": 1.6290347973506656e-05, "loss": 0.0905, "step": 36551 }, { "epoch": 0.6519459208789641, "grad_norm": 0.24755524098873138, "learning_rate": 1.6288888996244726e-05, "loss": 0.1466, "step": 36552 }, { "epoch": 0.6519637570006778, "grad_norm": 0.23279769718647003, "learning_rate": 1.6287430052750062e-05, "loss": 0.1251, "step": 36553 }, { "epoch": 0.6519815931223915, "grad_norm": 0.2427375167608261, "learning_rate": 1.6285971143028344e-05, "loss": 0.066, "step": 36554 }, { "epoch": 0.6519994292441051, "grad_norm": 0.27041172981262207, "learning_rate": 1.6284512267085212e-05, "loss": 0.1446, "step": 36555 }, { "epoch": 0.6520172653658188, "grad_norm": 0.22511109709739685, "learning_rate": 1.6283053424926314e-05, "loss": 0.1106, "step": 36556 }, { "epoch": 0.6520351014875325, "grad_norm": 0.27532774209976196, "learning_rate": 1.6281594616557312e-05, "loss": 0.1333, "step": 36557 }, { "epoch": 0.6520529376092462, "grad_norm": 0.2043699026107788, "learning_rate": 1.628013584198386e-05, "loss": 0.0577, "step": 36558 }, { "epoch": 0.6520707737309599, "grad_norm": 0.3530641794204712, "learning_rate": 1.6278677101211615e-05, "loss": 0.1114, "step": 36559 }, { "epoch": 0.6520886098526736, "grad_norm": 0.22718781232833862, "learning_rate": 1.627721839424623e-05, "loss": 0.099, "step": 36560 }, { "epoch": 0.6521064459743874, "grad_norm": 0.2519572079181671, "learning_rate": 1.627575972109336e-05, "loss": 0.1555, "step": 36561 }, { "epoch": 0.6521242820961011, "grad_norm": 0.2823973298072815, "learning_rate": 1.6274301081758653e-05, "loss": 0.1236, "step": 36562 }, { "epoch": 0.6521421182178148, "grad_norm": 0.25831878185272217, "learning_rate": 1.6272842476247774e-05, "loss": 0.1324, "step": 36563 }, { "epoch": 0.6521599543395284, "grad_norm": 0.2902027666568756, "learning_rate": 1.6271383904566374e-05, "loss": 0.0829, "step": 36564 }, { "epoch": 0.6521777904612421, "grad_norm": 0.2948017120361328, "learning_rate": 1.6269925366720102e-05, "loss": 0.1481, "step": 36565 }, { "epoch": 0.6521956265829558, "grad_norm": 0.2625763416290283, "learning_rate": 1.626846686271461e-05, "loss": 0.0787, "step": 36566 }, { "epoch": 0.6522134627046695, "grad_norm": 0.3372184932231903, "learning_rate": 1.6267008392555556e-05, "loss": 0.1731, "step": 36567 }, { "epoch": 0.6522312988263832, "grad_norm": 0.3237401247024536, "learning_rate": 1.6265549956248606e-05, "loss": 0.1827, "step": 36568 }, { "epoch": 0.6522491349480969, "grad_norm": 0.2846832871437073, "learning_rate": 1.626409155379939e-05, "loss": 0.1373, "step": 36569 }, { "epoch": 0.6522669710698106, "grad_norm": 0.30843910574913025, "learning_rate": 1.6262633185213582e-05, "loss": 0.12, "step": 36570 }, { "epoch": 0.6522848071915243, "grad_norm": 0.24406075477600098, "learning_rate": 1.6261174850496812e-05, "loss": 0.0752, "step": 36571 }, { "epoch": 0.652302643313238, "grad_norm": 0.2792684733867645, "learning_rate": 1.6259716549654757e-05, "loss": 0.1378, "step": 36572 }, { "epoch": 0.6523204794349516, "grad_norm": 0.240322083234787, "learning_rate": 1.6258258282693053e-05, "loss": 0.1248, "step": 36573 }, { "epoch": 0.6523383155566653, "grad_norm": 0.29805871844291687, "learning_rate": 1.6256800049617367e-05, "loss": 0.151, "step": 36574 }, { "epoch": 0.652356151678379, "grad_norm": 0.19931620359420776, "learning_rate": 1.625534185043333e-05, "loss": 0.1365, "step": 36575 }, { "epoch": 0.6523739878000927, "grad_norm": 0.24127689003944397, "learning_rate": 1.6253883685146622e-05, "loss": 0.1463, "step": 36576 }, { "epoch": 0.6523918239218064, "grad_norm": 0.29494351148605347, "learning_rate": 1.625242555376288e-05, "loss": 0.1675, "step": 36577 }, { "epoch": 0.6524096600435202, "grad_norm": 0.296726256608963, "learning_rate": 1.625096745628776e-05, "loss": 0.0924, "step": 36578 }, { "epoch": 0.6524274961652339, "grad_norm": 0.2584410011768341, "learning_rate": 1.6249509392726896e-05, "loss": 0.1329, "step": 36579 }, { "epoch": 0.6524453322869476, "grad_norm": 0.30448561906814575, "learning_rate": 1.6248051363085966e-05, "loss": 0.1142, "step": 36580 }, { "epoch": 0.6524631684086613, "grad_norm": 0.29676708579063416, "learning_rate": 1.6246593367370616e-05, "loss": 0.081, "step": 36581 }, { "epoch": 0.6524810045303749, "grad_norm": 0.28955918550491333, "learning_rate": 1.624513540558649e-05, "loss": 0.1582, "step": 36582 }, { "epoch": 0.6524988406520886, "grad_norm": 0.3830130398273468, "learning_rate": 1.624367747773925e-05, "loss": 0.1056, "step": 36583 }, { "epoch": 0.6525166767738023, "grad_norm": 0.2280164361000061, "learning_rate": 1.624221958383453e-05, "loss": 0.0884, "step": 36584 }, { "epoch": 0.652534512895516, "grad_norm": 0.16443973779678345, "learning_rate": 1.6240761723877994e-05, "loss": 0.0975, "step": 36585 }, { "epoch": 0.6525523490172297, "grad_norm": 0.2433801144361496, "learning_rate": 1.623930389787529e-05, "loss": 0.1158, "step": 36586 }, { "epoch": 0.6525701851389434, "grad_norm": 0.33221641182899475, "learning_rate": 1.623784610583208e-05, "loss": 0.1469, "step": 36587 }, { "epoch": 0.6525880212606571, "grad_norm": 0.6027882695198059, "learning_rate": 1.6236388347753984e-05, "loss": 0.1325, "step": 36588 }, { "epoch": 0.6526058573823708, "grad_norm": 0.23628756403923035, "learning_rate": 1.623493062364669e-05, "loss": 0.165, "step": 36589 }, { "epoch": 0.6526236935040844, "grad_norm": 0.29483330249786377, "learning_rate": 1.623347293351583e-05, "loss": 0.0888, "step": 36590 }, { "epoch": 0.6526415296257981, "grad_norm": 0.2532746493816376, "learning_rate": 1.6232015277367062e-05, "loss": 0.1564, "step": 36591 }, { "epoch": 0.6526593657475118, "grad_norm": 0.3249821066856384, "learning_rate": 1.6230557655206015e-05, "loss": 0.1461, "step": 36592 }, { "epoch": 0.6526772018692255, "grad_norm": 0.2694990336894989, "learning_rate": 1.6229100067038372e-05, "loss": 0.1127, "step": 36593 }, { "epoch": 0.6526950379909392, "grad_norm": 0.32716861367225647, "learning_rate": 1.6227642512869757e-05, "loss": 0.151, "step": 36594 }, { "epoch": 0.652712874112653, "grad_norm": 0.29200080037117004, "learning_rate": 1.6226184992705833e-05, "loss": 0.1053, "step": 36595 }, { "epoch": 0.6527307102343667, "grad_norm": 0.32692429423332214, "learning_rate": 1.622472750655225e-05, "loss": 0.1206, "step": 36596 }, { "epoch": 0.6527485463560804, "grad_norm": 0.258450984954834, "learning_rate": 1.6223270054414642e-05, "loss": 0.1286, "step": 36597 }, { "epoch": 0.6527663824777941, "grad_norm": 0.3511247932910919, "learning_rate": 1.622181263629868e-05, "loss": 0.1628, "step": 36598 }, { "epoch": 0.6527842185995077, "grad_norm": 0.25084805488586426, "learning_rate": 1.6220355252210008e-05, "loss": 0.0885, "step": 36599 }, { "epoch": 0.6528020547212214, "grad_norm": 0.2578677535057068, "learning_rate": 1.6218897902154263e-05, "loss": 0.1078, "step": 36600 }, { "epoch": 0.6528198908429351, "grad_norm": 0.39849749207496643, "learning_rate": 1.6217440586137097e-05, "loss": 0.2284, "step": 36601 }, { "epoch": 0.6528377269646488, "grad_norm": 0.1891050636768341, "learning_rate": 1.6215983304164177e-05, "loss": 0.0667, "step": 36602 }, { "epoch": 0.6528555630863625, "grad_norm": 0.260824054479599, "learning_rate": 1.6214526056241138e-05, "loss": 0.1826, "step": 36603 }, { "epoch": 0.6528733992080762, "grad_norm": 0.3125922977924347, "learning_rate": 1.621306884237363e-05, "loss": 0.1529, "step": 36604 }, { "epoch": 0.6528912353297899, "grad_norm": 0.28257906436920166, "learning_rate": 1.6211611662567294e-05, "loss": 0.1198, "step": 36605 }, { "epoch": 0.6529090714515036, "grad_norm": 0.3025089204311371, "learning_rate": 1.62101545168278e-05, "loss": 0.089, "step": 36606 }, { "epoch": 0.6529269075732173, "grad_norm": 0.25927066802978516, "learning_rate": 1.620869740516077e-05, "loss": 0.1249, "step": 36607 }, { "epoch": 0.6529447436949309, "grad_norm": 0.2381625920534134, "learning_rate": 1.6207240327571875e-05, "loss": 0.1176, "step": 36608 }, { "epoch": 0.6529625798166446, "grad_norm": 0.29237988591194153, "learning_rate": 1.620578328406675e-05, "loss": 0.1321, "step": 36609 }, { "epoch": 0.6529804159383583, "grad_norm": 0.3511929512023926, "learning_rate": 1.6204326274651037e-05, "loss": 0.1419, "step": 36610 }, { "epoch": 0.652998252060072, "grad_norm": 0.23130159080028534, "learning_rate": 1.6202869299330407e-05, "loss": 0.101, "step": 36611 }, { "epoch": 0.6530160881817858, "grad_norm": 0.3628959655761719, "learning_rate": 1.6201412358110493e-05, "loss": 0.1716, "step": 36612 }, { "epoch": 0.6530339243034995, "grad_norm": 0.286790132522583, "learning_rate": 1.619995545099694e-05, "loss": 0.1554, "step": 36613 }, { "epoch": 0.6530517604252132, "grad_norm": 0.33376434445381165, "learning_rate": 1.6198498577995397e-05, "loss": 0.1423, "step": 36614 }, { "epoch": 0.6530695965469269, "grad_norm": 0.20308566093444824, "learning_rate": 1.6197041739111514e-05, "loss": 0.1381, "step": 36615 }, { "epoch": 0.6530874326686406, "grad_norm": 0.21876616775989532, "learning_rate": 1.619558493435094e-05, "loss": 0.1319, "step": 36616 }, { "epoch": 0.6531052687903542, "grad_norm": 0.2928486764431, "learning_rate": 1.619412816371932e-05, "loss": 0.132, "step": 36617 }, { "epoch": 0.6531231049120679, "grad_norm": 0.3087984621524811, "learning_rate": 1.6192671427222296e-05, "loss": 0.1788, "step": 36618 }, { "epoch": 0.6531409410337816, "grad_norm": 0.23868723213672638, "learning_rate": 1.619121472486552e-05, "loss": 0.1063, "step": 36619 }, { "epoch": 0.6531587771554953, "grad_norm": 0.25535544753074646, "learning_rate": 1.6189758056654646e-05, "loss": 0.1242, "step": 36620 }, { "epoch": 0.653176613277209, "grad_norm": 0.26487067341804504, "learning_rate": 1.618830142259531e-05, "loss": 0.1234, "step": 36621 }, { "epoch": 0.6531944493989227, "grad_norm": 0.20190942287445068, "learning_rate": 1.618684482269316e-05, "loss": 0.1064, "step": 36622 }, { "epoch": 0.6532122855206364, "grad_norm": 0.29771608114242554, "learning_rate": 1.6185388256953834e-05, "loss": 0.1384, "step": 36623 }, { "epoch": 0.6532301216423501, "grad_norm": 0.2840154767036438, "learning_rate": 1.6183931725383e-05, "loss": 0.1603, "step": 36624 }, { "epoch": 0.6532479577640637, "grad_norm": 0.31259262561798096, "learning_rate": 1.618247522798629e-05, "loss": 0.1127, "step": 36625 }, { "epoch": 0.6532657938857774, "grad_norm": 0.2954271733760834, "learning_rate": 1.618101876476935e-05, "loss": 0.0857, "step": 36626 }, { "epoch": 0.6532836300074911, "grad_norm": 0.2797059118747711, "learning_rate": 1.6179562335737822e-05, "loss": 0.1001, "step": 36627 }, { "epoch": 0.6533014661292048, "grad_norm": 0.20581994950771332, "learning_rate": 1.6178105940897365e-05, "loss": 0.109, "step": 36628 }, { "epoch": 0.6533193022509186, "grad_norm": 0.24776767194271088, "learning_rate": 1.6176649580253613e-05, "loss": 0.1214, "step": 36629 }, { "epoch": 0.6533371383726323, "grad_norm": 0.29020121693611145, "learning_rate": 1.6175193253812215e-05, "loss": 0.098, "step": 36630 }, { "epoch": 0.653354974494346, "grad_norm": 0.21446137130260468, "learning_rate": 1.6173736961578804e-05, "loss": 0.1118, "step": 36631 }, { "epoch": 0.6533728106160597, "grad_norm": 0.3136409819126129, "learning_rate": 1.6172280703559057e-05, "loss": 0.1483, "step": 36632 }, { "epoch": 0.6533906467377734, "grad_norm": 0.3030506670475006, "learning_rate": 1.617082447975859e-05, "loss": 0.14, "step": 36633 }, { "epoch": 0.653408482859487, "grad_norm": 0.2149314284324646, "learning_rate": 1.6169368290183056e-05, "loss": 0.1223, "step": 36634 }, { "epoch": 0.6534263189812007, "grad_norm": 0.29998061060905457, "learning_rate": 1.6167912134838102e-05, "loss": 0.1749, "step": 36635 }, { "epoch": 0.6534441551029144, "grad_norm": 0.24123790860176086, "learning_rate": 1.6166456013729365e-05, "loss": 0.1209, "step": 36636 }, { "epoch": 0.6534619912246281, "grad_norm": 0.3291873335838318, "learning_rate": 1.6164999926862505e-05, "loss": 0.1397, "step": 36637 }, { "epoch": 0.6534798273463418, "grad_norm": 0.2772262990474701, "learning_rate": 1.6163543874243155e-05, "loss": 0.1123, "step": 36638 }, { "epoch": 0.6534976634680555, "grad_norm": 0.2379869520664215, "learning_rate": 1.616208785587696e-05, "loss": 0.0894, "step": 36639 }, { "epoch": 0.6535154995897692, "grad_norm": 0.21421286463737488, "learning_rate": 1.616063187176956e-05, "loss": 0.1177, "step": 36640 }, { "epoch": 0.6535333357114829, "grad_norm": 0.324499249458313, "learning_rate": 1.6159175921926612e-05, "loss": 0.1484, "step": 36641 }, { "epoch": 0.6535511718331966, "grad_norm": 0.2708475589752197, "learning_rate": 1.6157720006353744e-05, "loss": 0.1897, "step": 36642 }, { "epoch": 0.6535690079549102, "grad_norm": 0.19356797635555267, "learning_rate": 1.6156264125056613e-05, "loss": 0.1316, "step": 36643 }, { "epoch": 0.6535868440766239, "grad_norm": 0.21283204853534698, "learning_rate": 1.615480827804085e-05, "loss": 0.1091, "step": 36644 }, { "epoch": 0.6536046801983377, "grad_norm": 0.24207979440689087, "learning_rate": 1.615335246531211e-05, "loss": 0.0879, "step": 36645 }, { "epoch": 0.6536225163200514, "grad_norm": 0.249407559633255, "learning_rate": 1.6151896686876035e-05, "loss": 0.1135, "step": 36646 }, { "epoch": 0.6536403524417651, "grad_norm": 0.20008577406406403, "learning_rate": 1.6150440942738265e-05, "loss": 0.0819, "step": 36647 }, { "epoch": 0.6536581885634788, "grad_norm": 0.2643984258174896, "learning_rate": 1.6148985232904434e-05, "loss": 0.1165, "step": 36648 }, { "epoch": 0.6536760246851925, "grad_norm": 0.24120087921619415, "learning_rate": 1.61475295573802e-05, "loss": 0.1659, "step": 36649 }, { "epoch": 0.6536938608069062, "grad_norm": 0.25766047835350037, "learning_rate": 1.614607391617119e-05, "loss": 0.2027, "step": 36650 }, { "epoch": 0.6537116969286199, "grad_norm": 0.2485821694135666, "learning_rate": 1.6144618309283066e-05, "loss": 0.155, "step": 36651 }, { "epoch": 0.6537295330503335, "grad_norm": 0.26759710907936096, "learning_rate": 1.6143162736721455e-05, "loss": 0.1197, "step": 36652 }, { "epoch": 0.6537473691720472, "grad_norm": 0.256425678730011, "learning_rate": 1.6141707198492e-05, "loss": 0.0932, "step": 36653 }, { "epoch": 0.6537652052937609, "grad_norm": 0.26338186860084534, "learning_rate": 1.6140251694600357e-05, "loss": 0.1373, "step": 36654 }, { "epoch": 0.6537830414154746, "grad_norm": 0.3361250162124634, "learning_rate": 1.6138796225052156e-05, "loss": 0.1741, "step": 36655 }, { "epoch": 0.6538008775371883, "grad_norm": 0.24860045313835144, "learning_rate": 1.6137340789853038e-05, "loss": 0.1433, "step": 36656 }, { "epoch": 0.653818713658902, "grad_norm": 0.2154330611228943, "learning_rate": 1.6135885389008644e-05, "loss": 0.0919, "step": 36657 }, { "epoch": 0.6538365497806157, "grad_norm": 0.23145145177841187, "learning_rate": 1.6134430022524627e-05, "loss": 0.1149, "step": 36658 }, { "epoch": 0.6538543859023294, "grad_norm": 0.2496713399887085, "learning_rate": 1.6132974690406623e-05, "loss": 0.1318, "step": 36659 }, { "epoch": 0.653872222024043, "grad_norm": 0.26362666487693787, "learning_rate": 1.613151939266027e-05, "loss": 0.1569, "step": 36660 }, { "epoch": 0.6538900581457567, "grad_norm": 0.3588272035121918, "learning_rate": 1.6130064129291205e-05, "loss": 0.1902, "step": 36661 }, { "epoch": 0.6539078942674705, "grad_norm": 0.21589115262031555, "learning_rate": 1.612860890030508e-05, "loss": 0.1523, "step": 36662 }, { "epoch": 0.6539257303891842, "grad_norm": 0.3719049394130707, "learning_rate": 1.6127153705707525e-05, "loss": 0.1664, "step": 36663 }, { "epoch": 0.6539435665108979, "grad_norm": 0.36984556913375854, "learning_rate": 1.61256985455042e-05, "loss": 0.1154, "step": 36664 }, { "epoch": 0.6539614026326116, "grad_norm": 0.19224058091640472, "learning_rate": 1.6124243419700724e-05, "loss": 0.1032, "step": 36665 }, { "epoch": 0.6539792387543253, "grad_norm": 0.30286917090415955, "learning_rate": 1.612278832830274e-05, "loss": 0.1323, "step": 36666 }, { "epoch": 0.653997074876039, "grad_norm": 0.2640041410923004, "learning_rate": 1.6121333271315906e-05, "loss": 0.1574, "step": 36667 }, { "epoch": 0.6540149109977527, "grad_norm": 0.235600546002388, "learning_rate": 1.611987824874585e-05, "loss": 0.1564, "step": 36668 }, { "epoch": 0.6540327471194664, "grad_norm": 0.2564827501773834, "learning_rate": 1.6118423260598205e-05, "loss": 0.1209, "step": 36669 }, { "epoch": 0.65405058324118, "grad_norm": 0.24721381068229675, "learning_rate": 1.611696830687862e-05, "loss": 0.1377, "step": 36670 }, { "epoch": 0.6540684193628937, "grad_norm": 0.23483727872371674, "learning_rate": 1.6115513387592733e-05, "loss": 0.1321, "step": 36671 }, { "epoch": 0.6540862554846074, "grad_norm": 0.29178568720817566, "learning_rate": 1.6114058502746195e-05, "loss": 0.1682, "step": 36672 }, { "epoch": 0.6541040916063211, "grad_norm": 0.2708684206008911, "learning_rate": 1.6112603652344634e-05, "loss": 0.1284, "step": 36673 }, { "epoch": 0.6541219277280348, "grad_norm": 0.2366989552974701, "learning_rate": 1.6111148836393685e-05, "loss": 0.1639, "step": 36674 }, { "epoch": 0.6541397638497485, "grad_norm": 0.3067340552806854, "learning_rate": 1.610969405489899e-05, "loss": 0.1139, "step": 36675 }, { "epoch": 0.6541575999714622, "grad_norm": 0.24117280542850494, "learning_rate": 1.6108239307866196e-05, "loss": 0.1808, "step": 36676 }, { "epoch": 0.6541754360931759, "grad_norm": 0.2600924074649811, "learning_rate": 1.6106784595300938e-05, "loss": 0.1434, "step": 36677 }, { "epoch": 0.6541932722148895, "grad_norm": 0.3785773813724518, "learning_rate": 1.6105329917208857e-05, "loss": 0.1626, "step": 36678 }, { "epoch": 0.6542111083366033, "grad_norm": 0.23973430693149567, "learning_rate": 1.6103875273595574e-05, "loss": 0.1316, "step": 36679 }, { "epoch": 0.654228944458317, "grad_norm": 0.2361200600862503, "learning_rate": 1.610242066446676e-05, "loss": 0.1131, "step": 36680 }, { "epoch": 0.6542467805800307, "grad_norm": 0.415348619222641, "learning_rate": 1.6100966089828035e-05, "loss": 0.1164, "step": 36681 }, { "epoch": 0.6542646167017444, "grad_norm": 0.21911504864692688, "learning_rate": 1.6099511549685044e-05, "loss": 0.1025, "step": 36682 }, { "epoch": 0.6542824528234581, "grad_norm": 0.2965392470359802, "learning_rate": 1.6098057044043403e-05, "loss": 0.1013, "step": 36683 }, { "epoch": 0.6543002889451718, "grad_norm": 0.2654988467693329, "learning_rate": 1.6096602572908772e-05, "loss": 0.1355, "step": 36684 }, { "epoch": 0.6543181250668855, "grad_norm": 0.24729987978935242, "learning_rate": 1.609514813628679e-05, "loss": 0.1608, "step": 36685 }, { "epoch": 0.6543359611885992, "grad_norm": 0.2575724422931671, "learning_rate": 1.6093693734183096e-05, "loss": 0.0807, "step": 36686 }, { "epoch": 0.6543537973103128, "grad_norm": 0.30059322714805603, "learning_rate": 1.609223936660332e-05, "loss": 0.0856, "step": 36687 }, { "epoch": 0.6543716334320265, "grad_norm": 0.3593789041042328, "learning_rate": 1.6090785033553087e-05, "loss": 0.0964, "step": 36688 }, { "epoch": 0.6543894695537402, "grad_norm": 0.23953427374362946, "learning_rate": 1.6089330735038055e-05, "loss": 0.0602, "step": 36689 }, { "epoch": 0.6544073056754539, "grad_norm": 0.23126541078090668, "learning_rate": 1.6087876471063855e-05, "loss": 0.1121, "step": 36690 }, { "epoch": 0.6544251417971676, "grad_norm": 0.28415441513061523, "learning_rate": 1.6086422241636127e-05, "loss": 0.1136, "step": 36691 }, { "epoch": 0.6544429779188813, "grad_norm": 0.3295811116695404, "learning_rate": 1.6084968046760496e-05, "loss": 0.1321, "step": 36692 }, { "epoch": 0.654460814040595, "grad_norm": 0.2556428015232086, "learning_rate": 1.608351388644262e-05, "loss": 0.0838, "step": 36693 }, { "epoch": 0.6544786501623087, "grad_norm": 0.202883780002594, "learning_rate": 1.608205976068812e-05, "loss": 0.1283, "step": 36694 }, { "epoch": 0.6544964862840223, "grad_norm": 0.21548840403556824, "learning_rate": 1.6080605669502638e-05, "loss": 0.0958, "step": 36695 }, { "epoch": 0.6545143224057361, "grad_norm": 0.3393465578556061, "learning_rate": 1.60791516128918e-05, "loss": 0.1186, "step": 36696 }, { "epoch": 0.6545321585274498, "grad_norm": 0.26381170749664307, "learning_rate": 1.6077697590861258e-05, "loss": 0.1762, "step": 36697 }, { "epoch": 0.6545499946491635, "grad_norm": 0.3036540448665619, "learning_rate": 1.607624360341664e-05, "loss": 0.1498, "step": 36698 }, { "epoch": 0.6545678307708772, "grad_norm": 0.3125988245010376, "learning_rate": 1.607478965056359e-05, "loss": 0.2193, "step": 36699 }, { "epoch": 0.6545856668925909, "grad_norm": 0.21715353429317474, "learning_rate": 1.607333573230774e-05, "loss": 0.1198, "step": 36700 }, { "epoch": 0.6546035030143046, "grad_norm": 0.2881722152233124, "learning_rate": 1.6071881848654706e-05, "loss": 0.1284, "step": 36701 }, { "epoch": 0.6546213391360183, "grad_norm": 0.28733929991722107, "learning_rate": 1.6070427999610156e-05, "loss": 0.1008, "step": 36702 }, { "epoch": 0.654639175257732, "grad_norm": 0.2897832691669464, "learning_rate": 1.6068974185179712e-05, "loss": 0.1241, "step": 36703 }, { "epoch": 0.6546570113794457, "grad_norm": 0.23576177656650543, "learning_rate": 1.6067520405369002e-05, "loss": 0.1125, "step": 36704 }, { "epoch": 0.6546748475011593, "grad_norm": 0.26221007108688354, "learning_rate": 1.606606666018367e-05, "loss": 0.0948, "step": 36705 }, { "epoch": 0.654692683622873, "grad_norm": 0.2798518240451813, "learning_rate": 1.6064612949629347e-05, "loss": 0.0842, "step": 36706 }, { "epoch": 0.6547105197445867, "grad_norm": 0.30837762355804443, "learning_rate": 1.6063159273711677e-05, "loss": 0.144, "step": 36707 }, { "epoch": 0.6547283558663004, "grad_norm": 0.32586193084716797, "learning_rate": 1.6061705632436292e-05, "loss": 0.158, "step": 36708 }, { "epoch": 0.6547461919880141, "grad_norm": 0.2606954574584961, "learning_rate": 1.6060252025808807e-05, "loss": 0.1452, "step": 36709 }, { "epoch": 0.6547640281097278, "grad_norm": 0.22420062124729156, "learning_rate": 1.6058798453834884e-05, "loss": 0.1069, "step": 36710 }, { "epoch": 0.6547818642314415, "grad_norm": 0.28502655029296875, "learning_rate": 1.6057344916520145e-05, "loss": 0.1007, "step": 36711 }, { "epoch": 0.6547997003531552, "grad_norm": 0.2172628939151764, "learning_rate": 1.6055891413870227e-05, "loss": 0.1203, "step": 36712 }, { "epoch": 0.654817536474869, "grad_norm": 0.22606569528579712, "learning_rate": 1.6054437945890767e-05, "loss": 0.1122, "step": 36713 }, { "epoch": 0.6548353725965826, "grad_norm": 0.21293863654136658, "learning_rate": 1.605298451258738e-05, "loss": 0.1122, "step": 36714 }, { "epoch": 0.6548532087182963, "grad_norm": 0.2248375415802002, "learning_rate": 1.605153111396573e-05, "loss": 0.1657, "step": 36715 }, { "epoch": 0.65487104484001, "grad_norm": 0.4894512891769409, "learning_rate": 1.6050077750031433e-05, "loss": 0.1289, "step": 36716 }, { "epoch": 0.6548888809617237, "grad_norm": 0.27615973353385925, "learning_rate": 1.604862442079012e-05, "loss": 0.2331, "step": 36717 }, { "epoch": 0.6549067170834374, "grad_norm": 0.23804430663585663, "learning_rate": 1.6047171126247434e-05, "loss": 0.1251, "step": 36718 }, { "epoch": 0.6549245532051511, "grad_norm": 0.24958065152168274, "learning_rate": 1.6045717866409003e-05, "loss": 0.1315, "step": 36719 }, { "epoch": 0.6549423893268648, "grad_norm": 0.231605663895607, "learning_rate": 1.6044264641280465e-05, "loss": 0.1054, "step": 36720 }, { "epoch": 0.6549602254485785, "grad_norm": 0.23613178730010986, "learning_rate": 1.6042811450867457e-05, "loss": 0.0794, "step": 36721 }, { "epoch": 0.6549780615702921, "grad_norm": 0.2802891731262207, "learning_rate": 1.604135829517559e-05, "loss": 0.1407, "step": 36722 }, { "epoch": 0.6549958976920058, "grad_norm": 0.2869025766849518, "learning_rate": 1.6039905174210522e-05, "loss": 0.1379, "step": 36723 }, { "epoch": 0.6550137338137195, "grad_norm": 0.3316204249858856, "learning_rate": 1.6038452087977878e-05, "loss": 0.1316, "step": 36724 }, { "epoch": 0.6550315699354332, "grad_norm": 0.3360423743724823, "learning_rate": 1.6036999036483284e-05, "loss": 0.1494, "step": 36725 }, { "epoch": 0.6550494060571469, "grad_norm": 0.25246569514274597, "learning_rate": 1.6035546019732383e-05, "loss": 0.1211, "step": 36726 }, { "epoch": 0.6550672421788606, "grad_norm": 0.15617601573467255, "learning_rate": 1.603409303773079e-05, "loss": 0.0671, "step": 36727 }, { "epoch": 0.6550850783005743, "grad_norm": 0.22333931922912598, "learning_rate": 1.6032640090484165e-05, "loss": 0.1157, "step": 36728 }, { "epoch": 0.655102914422288, "grad_norm": 0.2514164447784424, "learning_rate": 1.6031187177998117e-05, "loss": 0.1067, "step": 36729 }, { "epoch": 0.6551207505440018, "grad_norm": 0.19420118629932404, "learning_rate": 1.602973430027829e-05, "loss": 0.1108, "step": 36730 }, { "epoch": 0.6551385866657154, "grad_norm": 0.3844279944896698, "learning_rate": 1.6028281457330295e-05, "loss": 0.1365, "step": 36731 }, { "epoch": 0.6551564227874291, "grad_norm": 0.20976576209068298, "learning_rate": 1.6026828649159797e-05, "loss": 0.0951, "step": 36732 }, { "epoch": 0.6551742589091428, "grad_norm": 0.25581127405166626, "learning_rate": 1.60253758757724e-05, "loss": 0.1242, "step": 36733 }, { "epoch": 0.6551920950308565, "grad_norm": 0.23793557286262512, "learning_rate": 1.6023923137173754e-05, "loss": 0.1358, "step": 36734 }, { "epoch": 0.6552099311525702, "grad_norm": 0.3044520318508148, "learning_rate": 1.602247043336947e-05, "loss": 0.1253, "step": 36735 }, { "epoch": 0.6552277672742839, "grad_norm": 0.2654673159122467, "learning_rate": 1.6021017764365203e-05, "loss": 0.1191, "step": 36736 }, { "epoch": 0.6552456033959976, "grad_norm": 0.2546164393424988, "learning_rate": 1.601956513016657e-05, "loss": 0.2122, "step": 36737 }, { "epoch": 0.6552634395177113, "grad_norm": 0.26594099402427673, "learning_rate": 1.6018112530779205e-05, "loss": 0.0932, "step": 36738 }, { "epoch": 0.655281275639425, "grad_norm": 0.18992877006530762, "learning_rate": 1.6016659966208738e-05, "loss": 0.103, "step": 36739 }, { "epoch": 0.6552991117611386, "grad_norm": 0.301959753036499, "learning_rate": 1.6015207436460792e-05, "loss": 0.1153, "step": 36740 }, { "epoch": 0.6553169478828523, "grad_norm": 0.25648316740989685, "learning_rate": 1.6013754941541014e-05, "loss": 0.0859, "step": 36741 }, { "epoch": 0.655334784004566, "grad_norm": 0.21612359583377838, "learning_rate": 1.6012302481455026e-05, "loss": 0.096, "step": 36742 }, { "epoch": 0.6553526201262797, "grad_norm": 0.2982625663280487, "learning_rate": 1.6010850056208455e-05, "loss": 0.1573, "step": 36743 }, { "epoch": 0.6553704562479934, "grad_norm": 0.23175761103630066, "learning_rate": 1.600939766580693e-05, "loss": 0.1157, "step": 36744 }, { "epoch": 0.6553882923697071, "grad_norm": 0.28712818026542664, "learning_rate": 1.600794531025609e-05, "loss": 0.1464, "step": 36745 }, { "epoch": 0.6554061284914209, "grad_norm": 0.19944629073143005, "learning_rate": 1.6006492989561556e-05, "loss": 0.0913, "step": 36746 }, { "epoch": 0.6554239646131346, "grad_norm": 0.19325481355190277, "learning_rate": 1.600504070372897e-05, "loss": 0.0788, "step": 36747 }, { "epoch": 0.6554418007348483, "grad_norm": 0.24068132042884827, "learning_rate": 1.6003588452763936e-05, "loss": 0.1054, "step": 36748 }, { "epoch": 0.655459636856562, "grad_norm": 0.2822006642818451, "learning_rate": 1.6002136236672117e-05, "loss": 0.0884, "step": 36749 }, { "epoch": 0.6554774729782756, "grad_norm": 0.41239863634109497, "learning_rate": 1.600068405545912e-05, "loss": 0.1465, "step": 36750 }, { "epoch": 0.6554953090999893, "grad_norm": 0.27067211270332336, "learning_rate": 1.5999231909130585e-05, "loss": 0.1513, "step": 36751 }, { "epoch": 0.655513145221703, "grad_norm": 0.23247037827968597, "learning_rate": 1.5997779797692127e-05, "loss": 0.1117, "step": 36752 }, { "epoch": 0.6555309813434167, "grad_norm": 0.276947021484375, "learning_rate": 1.5996327721149386e-05, "loss": 0.0959, "step": 36753 }, { "epoch": 0.6555488174651304, "grad_norm": 0.3171471059322357, "learning_rate": 1.5994875679507987e-05, "loss": 0.1664, "step": 36754 }, { "epoch": 0.6555666535868441, "grad_norm": 0.2898976504802704, "learning_rate": 1.5993423672773565e-05, "loss": 0.0861, "step": 36755 }, { "epoch": 0.6555844897085578, "grad_norm": 0.26713642477989197, "learning_rate": 1.5991971700951747e-05, "loss": 0.0912, "step": 36756 }, { "epoch": 0.6556023258302714, "grad_norm": 0.23357582092285156, "learning_rate": 1.5990519764048144e-05, "loss": 0.0912, "step": 36757 }, { "epoch": 0.6556201619519851, "grad_norm": 0.2820734977722168, "learning_rate": 1.598906786206841e-05, "loss": 0.1412, "step": 36758 }, { "epoch": 0.6556379980736988, "grad_norm": 0.222149059176445, "learning_rate": 1.5987615995018162e-05, "loss": 0.0783, "step": 36759 }, { "epoch": 0.6556558341954125, "grad_norm": 0.3330352306365967, "learning_rate": 1.598616416290302e-05, "loss": 0.2134, "step": 36760 }, { "epoch": 0.6556736703171262, "grad_norm": 0.21069347858428955, "learning_rate": 1.5984712365728616e-05, "loss": 0.1493, "step": 36761 }, { "epoch": 0.6556915064388399, "grad_norm": 0.25062334537506104, "learning_rate": 1.5983260603500587e-05, "loss": 0.1475, "step": 36762 }, { "epoch": 0.6557093425605537, "grad_norm": 0.29943299293518066, "learning_rate": 1.5981808876224557e-05, "loss": 0.0934, "step": 36763 }, { "epoch": 0.6557271786822674, "grad_norm": 0.26778480410575867, "learning_rate": 1.5980357183906153e-05, "loss": 0.1454, "step": 36764 }, { "epoch": 0.6557450148039811, "grad_norm": 0.3308759927749634, "learning_rate": 1.5978905526550992e-05, "loss": 0.1256, "step": 36765 }, { "epoch": 0.6557628509256948, "grad_norm": 0.2862666845321655, "learning_rate": 1.597745390416471e-05, "loss": 0.1056, "step": 36766 }, { "epoch": 0.6557806870474084, "grad_norm": 0.23114730417728424, "learning_rate": 1.597600231675293e-05, "loss": 0.0913, "step": 36767 }, { "epoch": 0.6557985231691221, "grad_norm": 0.2226518839597702, "learning_rate": 1.597455076432129e-05, "loss": 0.1028, "step": 36768 }, { "epoch": 0.6558163592908358, "grad_norm": 0.29544445872306824, "learning_rate": 1.5973099246875408e-05, "loss": 0.1827, "step": 36769 }, { "epoch": 0.6558341954125495, "grad_norm": 0.329287052154541, "learning_rate": 1.59716477644209e-05, "loss": 0.1857, "step": 36770 }, { "epoch": 0.6558520315342632, "grad_norm": 0.28796297311782837, "learning_rate": 1.5970196316963416e-05, "loss": 0.1466, "step": 36771 }, { "epoch": 0.6558698676559769, "grad_norm": 0.2649242579936981, "learning_rate": 1.5968744904508567e-05, "loss": 0.1768, "step": 36772 }, { "epoch": 0.6558877037776906, "grad_norm": 0.2383357435464859, "learning_rate": 1.596729352706198e-05, "loss": 0.1196, "step": 36773 }, { "epoch": 0.6559055398994043, "grad_norm": 0.24642813205718994, "learning_rate": 1.5965842184629283e-05, "loss": 0.1664, "step": 36774 }, { "epoch": 0.6559233760211179, "grad_norm": 0.24704863131046295, "learning_rate": 1.5964390877216094e-05, "loss": 0.1128, "step": 36775 }, { "epoch": 0.6559412121428316, "grad_norm": 0.2743159532546997, "learning_rate": 1.5962939604828063e-05, "loss": 0.1505, "step": 36776 }, { "epoch": 0.6559590482645453, "grad_norm": 0.46685105562210083, "learning_rate": 1.5961488367470794e-05, "loss": 0.1633, "step": 36777 }, { "epoch": 0.655976884386259, "grad_norm": 0.20926645398139954, "learning_rate": 1.5960037165149914e-05, "loss": 0.1405, "step": 36778 }, { "epoch": 0.6559947205079727, "grad_norm": 0.20892958343029022, "learning_rate": 1.5958585997871052e-05, "loss": 0.1442, "step": 36779 }, { "epoch": 0.6560125566296865, "grad_norm": 0.21241679787635803, "learning_rate": 1.5957134865639835e-05, "loss": 0.1255, "step": 36780 }, { "epoch": 0.6560303927514002, "grad_norm": 0.2082797735929489, "learning_rate": 1.5955683768461884e-05, "loss": 0.0813, "step": 36781 }, { "epoch": 0.6560482288731139, "grad_norm": 0.1898459792137146, "learning_rate": 1.5954232706342833e-05, "loss": 0.0725, "step": 36782 }, { "epoch": 0.6560660649948276, "grad_norm": 0.2718604803085327, "learning_rate": 1.595278167928829e-05, "loss": 0.1515, "step": 36783 }, { "epoch": 0.6560839011165412, "grad_norm": 0.23570284247398376, "learning_rate": 1.5951330687303902e-05, "loss": 0.1541, "step": 36784 }, { "epoch": 0.6561017372382549, "grad_norm": 0.3093301057815552, "learning_rate": 1.5949879730395278e-05, "loss": 0.1286, "step": 36785 }, { "epoch": 0.6561195733599686, "grad_norm": 0.3159850239753723, "learning_rate": 1.5948428808568044e-05, "loss": 0.1027, "step": 36786 }, { "epoch": 0.6561374094816823, "grad_norm": 0.253013014793396, "learning_rate": 1.5946977921827822e-05, "loss": 0.1438, "step": 36787 }, { "epoch": 0.656155245603396, "grad_norm": 0.2947070598602295, "learning_rate": 1.594552707018024e-05, "loss": 0.1374, "step": 36788 }, { "epoch": 0.6561730817251097, "grad_norm": 0.31292644143104553, "learning_rate": 1.594407625363093e-05, "loss": 0.1542, "step": 36789 }, { "epoch": 0.6561909178468234, "grad_norm": 0.24879132211208344, "learning_rate": 1.594262547218551e-05, "loss": 0.1792, "step": 36790 }, { "epoch": 0.6562087539685371, "grad_norm": 0.21500030159950256, "learning_rate": 1.59411747258496e-05, "loss": 0.1426, "step": 36791 }, { "epoch": 0.6562265900902507, "grad_norm": 0.2210945338010788, "learning_rate": 1.5939724014628816e-05, "loss": 0.1066, "step": 36792 }, { "epoch": 0.6562444262119644, "grad_norm": 0.26017841696739197, "learning_rate": 1.5938273338528802e-05, "loss": 0.1624, "step": 36793 }, { "epoch": 0.6562622623336781, "grad_norm": 0.21701890230178833, "learning_rate": 1.5936822697555166e-05, "loss": 0.1479, "step": 36794 }, { "epoch": 0.6562800984553918, "grad_norm": 0.2686643898487091, "learning_rate": 1.5935372091713536e-05, "loss": 0.1422, "step": 36795 }, { "epoch": 0.6562979345771055, "grad_norm": 0.2434459924697876, "learning_rate": 1.5933921521009526e-05, "loss": 0.0992, "step": 36796 }, { "epoch": 0.6563157706988193, "grad_norm": 0.26744312047958374, "learning_rate": 1.593247098544878e-05, "loss": 0.126, "step": 36797 }, { "epoch": 0.656333606820533, "grad_norm": 0.28083112835884094, "learning_rate": 1.593102048503691e-05, "loss": 0.1702, "step": 36798 }, { "epoch": 0.6563514429422467, "grad_norm": 0.26350125670433044, "learning_rate": 1.5929570019779533e-05, "loss": 0.1566, "step": 36799 }, { "epoch": 0.6563692790639604, "grad_norm": 0.3550322949886322, "learning_rate": 1.5928119589682267e-05, "loss": 0.0997, "step": 36800 }, { "epoch": 0.656387115185674, "grad_norm": 0.24596041440963745, "learning_rate": 1.592666919475075e-05, "loss": 0.1622, "step": 36801 }, { "epoch": 0.6564049513073877, "grad_norm": 0.28364241123199463, "learning_rate": 1.5925218834990596e-05, "loss": 0.1241, "step": 36802 }, { "epoch": 0.6564227874291014, "grad_norm": 0.20477141439914703, "learning_rate": 1.592376851040743e-05, "loss": 0.1216, "step": 36803 }, { "epoch": 0.6564406235508151, "grad_norm": 0.2321634739637375, "learning_rate": 1.5922318221006875e-05, "loss": 0.0839, "step": 36804 }, { "epoch": 0.6564584596725288, "grad_norm": 0.27941617369651794, "learning_rate": 1.5920867966794538e-05, "loss": 0.0998, "step": 36805 }, { "epoch": 0.6564762957942425, "grad_norm": 0.19286619126796722, "learning_rate": 1.5919417747776063e-05, "loss": 0.0718, "step": 36806 }, { "epoch": 0.6564941319159562, "grad_norm": 0.21201549470424652, "learning_rate": 1.5917967563957063e-05, "loss": 0.114, "step": 36807 }, { "epoch": 0.6565119680376699, "grad_norm": 0.22896206378936768, "learning_rate": 1.591651741534315e-05, "loss": 0.0786, "step": 36808 }, { "epoch": 0.6565298041593836, "grad_norm": 0.2870287001132965, "learning_rate": 1.5915067301939953e-05, "loss": 0.1288, "step": 36809 }, { "epoch": 0.6565476402810972, "grad_norm": 0.23888632655143738, "learning_rate": 1.5913617223753093e-05, "loss": 0.1108, "step": 36810 }, { "epoch": 0.6565654764028109, "grad_norm": 0.28307342529296875, "learning_rate": 1.5912167180788194e-05, "loss": 0.1156, "step": 36811 }, { "epoch": 0.6565833125245246, "grad_norm": 0.4067381024360657, "learning_rate": 1.591071717305088e-05, "loss": 0.1558, "step": 36812 }, { "epoch": 0.6566011486462383, "grad_norm": 0.2559942305088043, "learning_rate": 1.5909267200546753e-05, "loss": 0.1419, "step": 36813 }, { "epoch": 0.6566189847679521, "grad_norm": 0.25679758191108704, "learning_rate": 1.5907817263281456e-05, "loss": 0.1075, "step": 36814 }, { "epoch": 0.6566368208896658, "grad_norm": 0.25280389189720154, "learning_rate": 1.5906367361260593e-05, "loss": 0.1116, "step": 36815 }, { "epoch": 0.6566546570113795, "grad_norm": 0.2718549072742462, "learning_rate": 1.5904917494489795e-05, "loss": 0.1559, "step": 36816 }, { "epoch": 0.6566724931330932, "grad_norm": 0.2740013599395752, "learning_rate": 1.590346766297468e-05, "loss": 0.1422, "step": 36817 }, { "epoch": 0.6566903292548069, "grad_norm": 0.2877633571624756, "learning_rate": 1.5902017866720857e-05, "loss": 0.139, "step": 36818 }, { "epoch": 0.6567081653765205, "grad_norm": 0.26468172669410706, "learning_rate": 1.5900568105733965e-05, "loss": 0.1057, "step": 36819 }, { "epoch": 0.6567260014982342, "grad_norm": 0.3145536780357361, "learning_rate": 1.589911838001961e-05, "loss": 0.1154, "step": 36820 }, { "epoch": 0.6567438376199479, "grad_norm": 0.24582944810390472, "learning_rate": 1.5897668689583418e-05, "loss": 0.1042, "step": 36821 }, { "epoch": 0.6567616737416616, "grad_norm": 0.30437421798706055, "learning_rate": 1.5896219034431e-05, "loss": 0.1595, "step": 36822 }, { "epoch": 0.6567795098633753, "grad_norm": 0.22705450654029846, "learning_rate": 1.589476941456798e-05, "loss": 0.1052, "step": 36823 }, { "epoch": 0.656797345985089, "grad_norm": 0.23533622920513153, "learning_rate": 1.5893319829999987e-05, "loss": 0.0945, "step": 36824 }, { "epoch": 0.6568151821068027, "grad_norm": 0.24495822191238403, "learning_rate": 1.5891870280732632e-05, "loss": 0.1221, "step": 36825 }, { "epoch": 0.6568330182285164, "grad_norm": 0.2742573022842407, "learning_rate": 1.5890420766771525e-05, "loss": 0.1, "step": 36826 }, { "epoch": 0.65685085435023, "grad_norm": 0.4061833620071411, "learning_rate": 1.58889712881223e-05, "loss": 0.1925, "step": 36827 }, { "epoch": 0.6568686904719437, "grad_norm": 0.2576970160007477, "learning_rate": 1.588752184479057e-05, "loss": 0.1603, "step": 36828 }, { "epoch": 0.6568865265936574, "grad_norm": 0.25764453411102295, "learning_rate": 1.5886072436781945e-05, "loss": 0.1339, "step": 36829 }, { "epoch": 0.6569043627153711, "grad_norm": 0.2629469037055969, "learning_rate": 1.588462306410206e-05, "loss": 0.1046, "step": 36830 }, { "epoch": 0.6569221988370849, "grad_norm": 0.2936239540576935, "learning_rate": 1.588317372675652e-05, "loss": 0.1129, "step": 36831 }, { "epoch": 0.6569400349587986, "grad_norm": 0.2374291718006134, "learning_rate": 1.5881724424750946e-05, "loss": 0.1002, "step": 36832 }, { "epoch": 0.6569578710805123, "grad_norm": 0.30064189434051514, "learning_rate": 1.5880275158090964e-05, "loss": 0.11, "step": 36833 }, { "epoch": 0.656975707202226, "grad_norm": 0.2121768444776535, "learning_rate": 1.5878825926782183e-05, "loss": 0.1132, "step": 36834 }, { "epoch": 0.6569935433239397, "grad_norm": 0.1883653700351715, "learning_rate": 1.587737673083021e-05, "loss": 0.1404, "step": 36835 }, { "epoch": 0.6570113794456534, "grad_norm": 0.41294899582862854, "learning_rate": 1.5875927570240695e-05, "loss": 0.1462, "step": 36836 }, { "epoch": 0.657029215567367, "grad_norm": 0.26511743664741516, "learning_rate": 1.5874478445019227e-05, "loss": 0.1305, "step": 36837 }, { "epoch": 0.6570470516890807, "grad_norm": 0.4264880418777466, "learning_rate": 1.5873029355171433e-05, "loss": 0.1112, "step": 36838 }, { "epoch": 0.6570648878107944, "grad_norm": 0.3528462052345276, "learning_rate": 1.587158030070292e-05, "loss": 0.1159, "step": 36839 }, { "epoch": 0.6570827239325081, "grad_norm": 0.2888167202472687, "learning_rate": 1.587013128161933e-05, "loss": 0.121, "step": 36840 }, { "epoch": 0.6571005600542218, "grad_norm": 0.21214784681797028, "learning_rate": 1.5868682297926262e-05, "loss": 0.087, "step": 36841 }, { "epoch": 0.6571183961759355, "grad_norm": 0.17510448396205902, "learning_rate": 1.586723334962933e-05, "loss": 0.1075, "step": 36842 }, { "epoch": 0.6571362322976492, "grad_norm": 0.23990722000598907, "learning_rate": 1.586578443673416e-05, "loss": 0.1168, "step": 36843 }, { "epoch": 0.6571540684193629, "grad_norm": 0.2533864974975586, "learning_rate": 1.5864335559246357e-05, "loss": 0.1539, "step": 36844 }, { "epoch": 0.6571719045410765, "grad_norm": 0.4161410927772522, "learning_rate": 1.5862886717171553e-05, "loss": 0.1141, "step": 36845 }, { "epoch": 0.6571897406627902, "grad_norm": 0.3502938449382782, "learning_rate": 1.5861437910515354e-05, "loss": 0.1548, "step": 36846 }, { "epoch": 0.657207576784504, "grad_norm": 0.3132579028606415, "learning_rate": 1.585998913928338e-05, "loss": 0.053, "step": 36847 }, { "epoch": 0.6572254129062177, "grad_norm": 0.2882586419582367, "learning_rate": 1.5858540403481238e-05, "loss": 0.1141, "step": 36848 }, { "epoch": 0.6572432490279314, "grad_norm": 0.24397030472755432, "learning_rate": 1.5857091703114555e-05, "loss": 0.0764, "step": 36849 }, { "epoch": 0.6572610851496451, "grad_norm": 0.37102025747299194, "learning_rate": 1.585564303818894e-05, "loss": 0.1525, "step": 36850 }, { "epoch": 0.6572789212713588, "grad_norm": 0.2623554468154907, "learning_rate": 1.5854194408710014e-05, "loss": 0.1365, "step": 36851 }, { "epoch": 0.6572967573930725, "grad_norm": 0.26536741852760315, "learning_rate": 1.5852745814683385e-05, "loss": 0.1322, "step": 36852 }, { "epoch": 0.6573145935147862, "grad_norm": 0.2044999748468399, "learning_rate": 1.585129725611468e-05, "loss": 0.0934, "step": 36853 }, { "epoch": 0.6573324296364998, "grad_norm": 0.2445453554391861, "learning_rate": 1.5849848733009502e-05, "loss": 0.1103, "step": 36854 }, { "epoch": 0.6573502657582135, "grad_norm": 0.2444705069065094, "learning_rate": 1.5848400245373473e-05, "loss": 0.1231, "step": 36855 }, { "epoch": 0.6573681018799272, "grad_norm": 0.21722236275672913, "learning_rate": 1.5846951793212205e-05, "loss": 0.1007, "step": 36856 }, { "epoch": 0.6573859380016409, "grad_norm": 0.27680715918540955, "learning_rate": 1.584550337653131e-05, "loss": 0.1166, "step": 36857 }, { "epoch": 0.6574037741233546, "grad_norm": 0.24962249398231506, "learning_rate": 1.5844054995336406e-05, "loss": 0.1058, "step": 36858 }, { "epoch": 0.6574216102450683, "grad_norm": 0.2826508581638336, "learning_rate": 1.5842606649633114e-05, "loss": 0.1358, "step": 36859 }, { "epoch": 0.657439446366782, "grad_norm": 0.40490302443504333, "learning_rate": 1.5841158339427036e-05, "loss": 0.2105, "step": 36860 }, { "epoch": 0.6574572824884957, "grad_norm": 0.27626582980155945, "learning_rate": 1.5839710064723784e-05, "loss": 0.0891, "step": 36861 }, { "epoch": 0.6574751186102094, "grad_norm": 0.27266401052474976, "learning_rate": 1.5838261825528994e-05, "loss": 0.0898, "step": 36862 }, { "epoch": 0.657492954731923, "grad_norm": 0.23167169094085693, "learning_rate": 1.5836813621848262e-05, "loss": 0.1127, "step": 36863 }, { "epoch": 0.6575107908536368, "grad_norm": 0.2848247289657593, "learning_rate": 1.58353654536872e-05, "loss": 0.1361, "step": 36864 }, { "epoch": 0.6575286269753505, "grad_norm": 0.2954593896865845, "learning_rate": 1.5833917321051424e-05, "loss": 0.1532, "step": 36865 }, { "epoch": 0.6575464630970642, "grad_norm": 0.2845885753631592, "learning_rate": 1.5832469223946556e-05, "loss": 0.1205, "step": 36866 }, { "epoch": 0.6575642992187779, "grad_norm": 0.24876411259174347, "learning_rate": 1.5831021162378207e-05, "loss": 0.0983, "step": 36867 }, { "epoch": 0.6575821353404916, "grad_norm": 0.23809607326984406, "learning_rate": 1.5829573136351987e-05, "loss": 0.1451, "step": 36868 }, { "epoch": 0.6575999714622053, "grad_norm": 0.2451048493385315, "learning_rate": 1.58281251458735e-05, "loss": 0.1205, "step": 36869 }, { "epoch": 0.657617807583919, "grad_norm": 0.2045835256576538, "learning_rate": 1.5826677190948375e-05, "loss": 0.1304, "step": 36870 }, { "epoch": 0.6576356437056327, "grad_norm": 0.3112563192844391, "learning_rate": 1.582522927158221e-05, "loss": 0.1472, "step": 36871 }, { "epoch": 0.6576534798273463, "grad_norm": 0.21952079236507416, "learning_rate": 1.5823781387780635e-05, "loss": 0.0855, "step": 36872 }, { "epoch": 0.65767131594906, "grad_norm": 0.27830639481544495, "learning_rate": 1.582233353954925e-05, "loss": 0.1736, "step": 36873 }, { "epoch": 0.6576891520707737, "grad_norm": 0.2714455723762512, "learning_rate": 1.5820885726893665e-05, "loss": 0.1422, "step": 36874 }, { "epoch": 0.6577069881924874, "grad_norm": 0.3178907036781311, "learning_rate": 1.5819437949819502e-05, "loss": 0.1352, "step": 36875 }, { "epoch": 0.6577248243142011, "grad_norm": 0.23411467671394348, "learning_rate": 1.581799020833237e-05, "loss": 0.135, "step": 36876 }, { "epoch": 0.6577426604359148, "grad_norm": 0.2939034700393677, "learning_rate": 1.5816542502437876e-05, "loss": 0.1608, "step": 36877 }, { "epoch": 0.6577604965576285, "grad_norm": 0.29110389947891235, "learning_rate": 1.581509483214163e-05, "loss": 0.135, "step": 36878 }, { "epoch": 0.6577783326793422, "grad_norm": 0.3218103051185608, "learning_rate": 1.5813647197449254e-05, "loss": 0.1176, "step": 36879 }, { "epoch": 0.6577961688010558, "grad_norm": 0.2967422306537628, "learning_rate": 1.5812199598366356e-05, "loss": 0.1625, "step": 36880 }, { "epoch": 0.6578140049227696, "grad_norm": 0.19686435163021088, "learning_rate": 1.581075203489855e-05, "loss": 0.1245, "step": 36881 }, { "epoch": 0.6578318410444833, "grad_norm": 0.2903866171836853, "learning_rate": 1.580930450705144e-05, "loss": 0.0989, "step": 36882 }, { "epoch": 0.657849677166197, "grad_norm": 0.24043291807174683, "learning_rate": 1.580785701483063e-05, "loss": 0.1727, "step": 36883 }, { "epoch": 0.6578675132879107, "grad_norm": 0.3193179666996002, "learning_rate": 1.5806409558241753e-05, "loss": 0.1781, "step": 36884 }, { "epoch": 0.6578853494096244, "grad_norm": 0.2343636006116867, "learning_rate": 1.5804962137290398e-05, "loss": 0.1001, "step": 36885 }, { "epoch": 0.6579031855313381, "grad_norm": 0.2991853952407837, "learning_rate": 1.5803514751982197e-05, "loss": 0.1548, "step": 36886 }, { "epoch": 0.6579210216530518, "grad_norm": 0.29604876041412354, "learning_rate": 1.5802067402322736e-05, "loss": 0.1555, "step": 36887 }, { "epoch": 0.6579388577747655, "grad_norm": 0.38010314106941223, "learning_rate": 1.5800620088317646e-05, "loss": 0.141, "step": 36888 }, { "epoch": 0.6579566938964792, "grad_norm": 0.2148333191871643, "learning_rate": 1.5799172809972535e-05, "loss": 0.1223, "step": 36889 }, { "epoch": 0.6579745300181928, "grad_norm": 0.1873437762260437, "learning_rate": 1.5797725567293006e-05, "loss": 0.1185, "step": 36890 }, { "epoch": 0.6579923661399065, "grad_norm": 0.21285198628902435, "learning_rate": 1.579627836028466e-05, "loss": 0.1194, "step": 36891 }, { "epoch": 0.6580102022616202, "grad_norm": 0.26031050086021423, "learning_rate": 1.5794831188953123e-05, "loss": 0.1397, "step": 36892 }, { "epoch": 0.6580280383833339, "grad_norm": 0.23620277643203735, "learning_rate": 1.5793384053304007e-05, "loss": 0.124, "step": 36893 }, { "epoch": 0.6580458745050476, "grad_norm": 0.2794645428657532, "learning_rate": 1.5791936953342913e-05, "loss": 0.1188, "step": 36894 }, { "epoch": 0.6580637106267613, "grad_norm": 0.20696409046649933, "learning_rate": 1.5790489889075442e-05, "loss": 0.0847, "step": 36895 }, { "epoch": 0.658081546748475, "grad_norm": 0.256647527217865, "learning_rate": 1.578904286050722e-05, "loss": 0.1642, "step": 36896 }, { "epoch": 0.6580993828701887, "grad_norm": 0.2705405652523041, "learning_rate": 1.5787595867643855e-05, "loss": 0.1344, "step": 36897 }, { "epoch": 0.6581172189919025, "grad_norm": 0.2152704894542694, "learning_rate": 1.578614891049094e-05, "loss": 0.1309, "step": 36898 }, { "epoch": 0.6581350551136161, "grad_norm": 0.31815510988235474, "learning_rate": 1.5784701989054102e-05, "loss": 0.1236, "step": 36899 }, { "epoch": 0.6581528912353298, "grad_norm": 0.21713735163211823, "learning_rate": 1.5783255103338933e-05, "loss": 0.1254, "step": 36900 }, { "epoch": 0.6581707273570435, "grad_norm": 0.2968268394470215, "learning_rate": 1.578180825335106e-05, "loss": 0.103, "step": 36901 }, { "epoch": 0.6581885634787572, "grad_norm": 0.27430063486099243, "learning_rate": 1.5780361439096078e-05, "loss": 0.1088, "step": 36902 }, { "epoch": 0.6582063996004709, "grad_norm": 0.2858230769634247, "learning_rate": 1.5778914660579604e-05, "loss": 0.1356, "step": 36903 }, { "epoch": 0.6582242357221846, "grad_norm": 0.2608380615711212, "learning_rate": 1.5777467917807228e-05, "loss": 0.1196, "step": 36904 }, { "epoch": 0.6582420718438983, "grad_norm": 0.21127164363861084, "learning_rate": 1.5776021210784586e-05, "loss": 0.1055, "step": 36905 }, { "epoch": 0.658259907965612, "grad_norm": 0.2470557540655136, "learning_rate": 1.577457453951726e-05, "loss": 0.1363, "step": 36906 }, { "epoch": 0.6582777440873256, "grad_norm": 0.3971419334411621, "learning_rate": 1.5773127904010882e-05, "loss": 0.1071, "step": 36907 }, { "epoch": 0.6582955802090393, "grad_norm": 0.28614407777786255, "learning_rate": 1.577168130427103e-05, "loss": 0.1279, "step": 36908 }, { "epoch": 0.658313416330753, "grad_norm": 0.242633655667305, "learning_rate": 1.5770234740303347e-05, "loss": 0.1204, "step": 36909 }, { "epoch": 0.6583312524524667, "grad_norm": 0.25513705611228943, "learning_rate": 1.5768788212113416e-05, "loss": 0.0872, "step": 36910 }, { "epoch": 0.6583490885741804, "grad_norm": 0.3504336178302765, "learning_rate": 1.5767341719706852e-05, "loss": 0.1624, "step": 36911 }, { "epoch": 0.6583669246958941, "grad_norm": 0.31027668714523315, "learning_rate": 1.5765895263089254e-05, "loss": 0.1621, "step": 36912 }, { "epoch": 0.6583847608176078, "grad_norm": 0.2630026042461395, "learning_rate": 1.5764448842266238e-05, "loss": 0.1365, "step": 36913 }, { "epoch": 0.6584025969393215, "grad_norm": 0.18853366374969482, "learning_rate": 1.5763002457243405e-05, "loss": 0.085, "step": 36914 }, { "epoch": 0.6584204330610353, "grad_norm": 0.3125610947608948, "learning_rate": 1.5761556108026372e-05, "loss": 0.0983, "step": 36915 }, { "epoch": 0.658438269182749, "grad_norm": 0.28227341175079346, "learning_rate": 1.5760109794620737e-05, "loss": 0.0968, "step": 36916 }, { "epoch": 0.6584561053044626, "grad_norm": 0.2525133788585663, "learning_rate": 1.57586635170321e-05, "loss": 0.1481, "step": 36917 }, { "epoch": 0.6584739414261763, "grad_norm": 0.2040320485830307, "learning_rate": 1.5757217275266082e-05, "loss": 0.0962, "step": 36918 }, { "epoch": 0.65849177754789, "grad_norm": 0.36210864782333374, "learning_rate": 1.5755771069328278e-05, "loss": 0.1517, "step": 36919 }, { "epoch": 0.6585096136696037, "grad_norm": 0.3569958209991455, "learning_rate": 1.5754324899224304e-05, "loss": 0.1592, "step": 36920 }, { "epoch": 0.6585274497913174, "grad_norm": 0.2147177755832672, "learning_rate": 1.575287876495975e-05, "loss": 0.0895, "step": 36921 }, { "epoch": 0.6585452859130311, "grad_norm": 0.2651868462562561, "learning_rate": 1.5751432666540245e-05, "loss": 0.1138, "step": 36922 }, { "epoch": 0.6585631220347448, "grad_norm": 0.30353134870529175, "learning_rate": 1.574998660397138e-05, "loss": 0.1335, "step": 36923 }, { "epoch": 0.6585809581564585, "grad_norm": 0.20026427507400513, "learning_rate": 1.5748540577258757e-05, "loss": 0.1472, "step": 36924 }, { "epoch": 0.6585987942781721, "grad_norm": 0.21521052718162537, "learning_rate": 1.5747094586407984e-05, "loss": 0.1536, "step": 36925 }, { "epoch": 0.6586166303998858, "grad_norm": 0.3330404460430145, "learning_rate": 1.5745648631424666e-05, "loss": 0.1049, "step": 36926 }, { "epoch": 0.6586344665215995, "grad_norm": 0.24024979770183563, "learning_rate": 1.5744202712314417e-05, "loss": 0.1637, "step": 36927 }, { "epoch": 0.6586523026433132, "grad_norm": 0.24170449376106262, "learning_rate": 1.5742756829082832e-05, "loss": 0.1349, "step": 36928 }, { "epoch": 0.6586701387650269, "grad_norm": 0.2711646556854248, "learning_rate": 1.5741310981735525e-05, "loss": 0.111, "step": 36929 }, { "epoch": 0.6586879748867406, "grad_norm": 0.19986176490783691, "learning_rate": 1.5739865170278077e-05, "loss": 0.1212, "step": 36930 }, { "epoch": 0.6587058110084543, "grad_norm": 0.28152593970298767, "learning_rate": 1.5738419394716127e-05, "loss": 0.1741, "step": 36931 }, { "epoch": 0.6587236471301681, "grad_norm": 0.26788750290870667, "learning_rate": 1.5736973655055262e-05, "loss": 0.0571, "step": 36932 }, { "epoch": 0.6587414832518818, "grad_norm": 0.23063886165618896, "learning_rate": 1.573552795130108e-05, "loss": 0.0957, "step": 36933 }, { "epoch": 0.6587593193735954, "grad_norm": 0.24991591274738312, "learning_rate": 1.573408228345919e-05, "loss": 0.1202, "step": 36934 }, { "epoch": 0.6587771554953091, "grad_norm": 0.21629846096038818, "learning_rate": 1.57326366515352e-05, "loss": 0.0837, "step": 36935 }, { "epoch": 0.6587949916170228, "grad_norm": 0.2798084616661072, "learning_rate": 1.5731191055534714e-05, "loss": 0.1531, "step": 36936 }, { "epoch": 0.6588128277387365, "grad_norm": 0.23518231511116028, "learning_rate": 1.5729745495463333e-05, "loss": 0.1435, "step": 36937 }, { "epoch": 0.6588306638604502, "grad_norm": 0.3445497453212738, "learning_rate": 1.572829997132666e-05, "loss": 0.1299, "step": 36938 }, { "epoch": 0.6588484999821639, "grad_norm": 0.3493834137916565, "learning_rate": 1.572685448313029e-05, "loss": 0.1336, "step": 36939 }, { "epoch": 0.6588663361038776, "grad_norm": 0.44469985365867615, "learning_rate": 1.5725409030879845e-05, "loss": 0.1948, "step": 36940 }, { "epoch": 0.6588841722255913, "grad_norm": 0.27621257305145264, "learning_rate": 1.5723963614580912e-05, "loss": 0.0811, "step": 36941 }, { "epoch": 0.658902008347305, "grad_norm": 0.23101843893527985, "learning_rate": 1.5722518234239104e-05, "loss": 0.1048, "step": 36942 }, { "epoch": 0.6589198444690186, "grad_norm": 0.2903430759906769, "learning_rate": 1.572107288986001e-05, "loss": 0.1957, "step": 36943 }, { "epoch": 0.6589376805907323, "grad_norm": 0.30021992325782776, "learning_rate": 1.5719627581449255e-05, "loss": 0.1363, "step": 36944 }, { "epoch": 0.658955516712446, "grad_norm": 0.2059231847524643, "learning_rate": 1.5718182309012427e-05, "loss": 0.0921, "step": 36945 }, { "epoch": 0.6589733528341597, "grad_norm": 0.24417108297348022, "learning_rate": 1.5716737072555127e-05, "loss": 0.0898, "step": 36946 }, { "epoch": 0.6589911889558734, "grad_norm": 0.2771771550178528, "learning_rate": 1.5715291872082958e-05, "loss": 0.1144, "step": 36947 }, { "epoch": 0.6590090250775872, "grad_norm": 0.23978090286254883, "learning_rate": 1.5713846707601527e-05, "loss": 0.0886, "step": 36948 }, { "epoch": 0.6590268611993009, "grad_norm": 0.22668437659740448, "learning_rate": 1.5712401579116434e-05, "loss": 0.1347, "step": 36949 }, { "epoch": 0.6590446973210146, "grad_norm": 0.3065158724784851, "learning_rate": 1.571095648663328e-05, "loss": 0.1225, "step": 36950 }, { "epoch": 0.6590625334427282, "grad_norm": 0.34733688831329346, "learning_rate": 1.5709511430157675e-05, "loss": 0.1057, "step": 36951 }, { "epoch": 0.6590803695644419, "grad_norm": 0.33316707611083984, "learning_rate": 1.5708066409695193e-05, "loss": 0.1937, "step": 36952 }, { "epoch": 0.6590982056861556, "grad_norm": 0.2104162871837616, "learning_rate": 1.5706621425251472e-05, "loss": 0.1233, "step": 36953 }, { "epoch": 0.6591160418078693, "grad_norm": 0.2924996614456177, "learning_rate": 1.570517647683209e-05, "loss": 0.1209, "step": 36954 }, { "epoch": 0.659133877929583, "grad_norm": 0.302206814289093, "learning_rate": 1.570373156444266e-05, "loss": 0.1504, "step": 36955 }, { "epoch": 0.6591517140512967, "grad_norm": 0.2522921860218048, "learning_rate": 1.5702286688088762e-05, "loss": 0.1255, "step": 36956 }, { "epoch": 0.6591695501730104, "grad_norm": 0.24559594690799713, "learning_rate": 1.5700841847776028e-05, "loss": 0.0931, "step": 36957 }, { "epoch": 0.6591873862947241, "grad_norm": 0.31043174862861633, "learning_rate": 1.569939704351004e-05, "loss": 0.1095, "step": 36958 }, { "epoch": 0.6592052224164378, "grad_norm": 0.3222244381904602, "learning_rate": 1.5697952275296397e-05, "loss": 0.1059, "step": 36959 }, { "epoch": 0.6592230585381514, "grad_norm": 0.2710152566432953, "learning_rate": 1.5696507543140698e-05, "loss": 0.1253, "step": 36960 }, { "epoch": 0.6592408946598651, "grad_norm": 0.2671167552471161, "learning_rate": 1.569506284704856e-05, "loss": 0.1323, "step": 36961 }, { "epoch": 0.6592587307815788, "grad_norm": 0.1917286366224289, "learning_rate": 1.569361818702557e-05, "loss": 0.111, "step": 36962 }, { "epoch": 0.6592765669032925, "grad_norm": 0.2842090427875519, "learning_rate": 1.569217356307733e-05, "loss": 0.1303, "step": 36963 }, { "epoch": 0.6592944030250062, "grad_norm": 0.2837993800640106, "learning_rate": 1.5690728975209444e-05, "loss": 0.1533, "step": 36964 }, { "epoch": 0.65931223914672, "grad_norm": 0.3480733633041382, "learning_rate": 1.5689284423427493e-05, "loss": 0.1632, "step": 36965 }, { "epoch": 0.6593300752684337, "grad_norm": 0.2921847999095917, "learning_rate": 1.5687839907737108e-05, "loss": 0.1025, "step": 36966 }, { "epoch": 0.6593479113901474, "grad_norm": 0.31307971477508545, "learning_rate": 1.5686395428143865e-05, "loss": 0.1128, "step": 36967 }, { "epoch": 0.6593657475118611, "grad_norm": 0.22050459682941437, "learning_rate": 1.5684950984653368e-05, "loss": 0.1043, "step": 36968 }, { "epoch": 0.6593835836335747, "grad_norm": 0.19954946637153625, "learning_rate": 1.5683506577271212e-05, "loss": 0.1127, "step": 36969 }, { "epoch": 0.6594014197552884, "grad_norm": 0.2509217858314514, "learning_rate": 1.5682062206003017e-05, "loss": 0.0912, "step": 36970 }, { "epoch": 0.6594192558770021, "grad_norm": 0.18361979722976685, "learning_rate": 1.5680617870854363e-05, "loss": 0.0719, "step": 36971 }, { "epoch": 0.6594370919987158, "grad_norm": 0.35508817434310913, "learning_rate": 1.5679173571830852e-05, "loss": 0.1202, "step": 36972 }, { "epoch": 0.6594549281204295, "grad_norm": 0.28412824869155884, "learning_rate": 1.5677729308938076e-05, "loss": 0.1375, "step": 36973 }, { "epoch": 0.6594727642421432, "grad_norm": 0.3273811340332031, "learning_rate": 1.567628508218165e-05, "loss": 0.1225, "step": 36974 }, { "epoch": 0.6594906003638569, "grad_norm": 0.47485023736953735, "learning_rate": 1.5674840891567162e-05, "loss": 0.126, "step": 36975 }, { "epoch": 0.6595084364855706, "grad_norm": 0.22489425539970398, "learning_rate": 1.567339673710021e-05, "loss": 0.1145, "step": 36976 }, { "epoch": 0.6595262726072842, "grad_norm": 0.31754279136657715, "learning_rate": 1.5671952618786398e-05, "loss": 0.1423, "step": 36977 }, { "epoch": 0.6595441087289979, "grad_norm": 0.2403276413679123, "learning_rate": 1.567050853663131e-05, "loss": 0.1292, "step": 36978 }, { "epoch": 0.6595619448507116, "grad_norm": 0.46837303042411804, "learning_rate": 1.5669064490640562e-05, "loss": 0.1654, "step": 36979 }, { "epoch": 0.6595797809724253, "grad_norm": 0.24127860367298126, "learning_rate": 1.566762048081974e-05, "loss": 0.1551, "step": 36980 }, { "epoch": 0.659597617094139, "grad_norm": 0.28468719124794006, "learning_rate": 1.5666176507174442e-05, "loss": 0.1218, "step": 36981 }, { "epoch": 0.6596154532158528, "grad_norm": 0.2550612986087799, "learning_rate": 1.5664732569710268e-05, "loss": 0.1352, "step": 36982 }, { "epoch": 0.6596332893375665, "grad_norm": 0.23893840610980988, "learning_rate": 1.566328866843281e-05, "loss": 0.1138, "step": 36983 }, { "epoch": 0.6596511254592802, "grad_norm": 0.2854664921760559, "learning_rate": 1.5661844803347684e-05, "loss": 0.1472, "step": 36984 }, { "epoch": 0.6596689615809939, "grad_norm": 0.3304499089717865, "learning_rate": 1.5660400974460465e-05, "loss": 0.1413, "step": 36985 }, { "epoch": 0.6596867977027076, "grad_norm": 0.2861810326576233, "learning_rate": 1.565895718177675e-05, "loss": 0.123, "step": 36986 }, { "epoch": 0.6597046338244212, "grad_norm": 0.1872393637895584, "learning_rate": 1.5657513425302157e-05, "loss": 0.1225, "step": 36987 }, { "epoch": 0.6597224699461349, "grad_norm": 0.2831312119960785, "learning_rate": 1.5656069705042266e-05, "loss": 0.1424, "step": 36988 }, { "epoch": 0.6597403060678486, "grad_norm": 0.27119460701942444, "learning_rate": 1.565462602100267e-05, "loss": 0.1293, "step": 36989 }, { "epoch": 0.6597581421895623, "grad_norm": 0.23875048756599426, "learning_rate": 1.5653182373188976e-05, "loss": 0.1001, "step": 36990 }, { "epoch": 0.659775978311276, "grad_norm": 0.2693100869655609, "learning_rate": 1.5651738761606766e-05, "loss": 0.1451, "step": 36991 }, { "epoch": 0.6597938144329897, "grad_norm": 0.24230840802192688, "learning_rate": 1.565029518626166e-05, "loss": 0.1177, "step": 36992 }, { "epoch": 0.6598116505547034, "grad_norm": 0.43695560097694397, "learning_rate": 1.564885164715923e-05, "loss": 0.1176, "step": 36993 }, { "epoch": 0.659829486676417, "grad_norm": 0.26083457469940186, "learning_rate": 1.5647408144305087e-05, "loss": 0.1224, "step": 36994 }, { "epoch": 0.6598473227981307, "grad_norm": 0.37520378828048706, "learning_rate": 1.564596467770481e-05, "loss": 0.0835, "step": 36995 }, { "epoch": 0.6598651589198444, "grad_norm": 0.2315322607755661, "learning_rate": 1.564452124736401e-05, "loss": 0.1009, "step": 36996 }, { "epoch": 0.6598829950415581, "grad_norm": 0.2620389461517334, "learning_rate": 1.564307785328828e-05, "loss": 0.1107, "step": 36997 }, { "epoch": 0.6599008311632718, "grad_norm": 0.28540104627609253, "learning_rate": 1.564163449548321e-05, "loss": 0.1022, "step": 36998 }, { "epoch": 0.6599186672849856, "grad_norm": 0.3569800555706024, "learning_rate": 1.5640191173954393e-05, "loss": 0.1158, "step": 36999 }, { "epoch": 0.6599365034066993, "grad_norm": 0.15573696792125702, "learning_rate": 1.5638747888707434e-05, "loss": 0.0525, "step": 37000 }, { "epoch": 0.6599365034066993, "eval_loss": 0.11902497708797455, "eval_runtime": 106.7536, "eval_samples_per_second": 9.592, "eval_steps_per_second": 1.602, "step": 37000 }, { "epoch": 0.659954339528413, "grad_norm": 0.25685834884643555, "learning_rate": 1.563730463974792e-05, "loss": 0.0974, "step": 37001 }, { "epoch": 0.6599721756501267, "grad_norm": 0.20308485627174377, "learning_rate": 1.5635861427081443e-05, "loss": 0.1196, "step": 37002 }, { "epoch": 0.6599900117718404, "grad_norm": 0.26290422677993774, "learning_rate": 1.563441825071361e-05, "loss": 0.0959, "step": 37003 }, { "epoch": 0.660007847893554, "grad_norm": 0.28677183389663696, "learning_rate": 1.563297511064999e-05, "loss": 0.1042, "step": 37004 }, { "epoch": 0.6600256840152677, "grad_norm": 0.30590012669563293, "learning_rate": 1.563153200689621e-05, "loss": 0.1691, "step": 37005 }, { "epoch": 0.6600435201369814, "grad_norm": 0.29537612199783325, "learning_rate": 1.5630088939457847e-05, "loss": 0.1578, "step": 37006 }, { "epoch": 0.6600613562586951, "grad_norm": 0.24706660211086273, "learning_rate": 1.5628645908340492e-05, "loss": 0.1418, "step": 37007 }, { "epoch": 0.6600791923804088, "grad_norm": 0.299190878868103, "learning_rate": 1.5627202913549734e-05, "loss": 0.1556, "step": 37008 }, { "epoch": 0.6600970285021225, "grad_norm": 0.23550699651241302, "learning_rate": 1.5625759955091183e-05, "loss": 0.119, "step": 37009 }, { "epoch": 0.6601148646238362, "grad_norm": 0.27453306317329407, "learning_rate": 1.5624317032970424e-05, "loss": 0.1393, "step": 37010 }, { "epoch": 0.6601327007455499, "grad_norm": 0.39263662695884705, "learning_rate": 1.562287414719305e-05, "loss": 0.1517, "step": 37011 }, { "epoch": 0.6601505368672635, "grad_norm": 0.2503941059112549, "learning_rate": 1.562143129776465e-05, "loss": 0.1196, "step": 37012 }, { "epoch": 0.6601683729889772, "grad_norm": 0.26782235503196716, "learning_rate": 1.561998848469083e-05, "loss": 0.1284, "step": 37013 }, { "epoch": 0.6601862091106909, "grad_norm": 0.36488696932792664, "learning_rate": 1.561854570797717e-05, "loss": 0.1557, "step": 37014 }, { "epoch": 0.6602040452324046, "grad_norm": 0.21011121571063995, "learning_rate": 1.5617102967629272e-05, "loss": 0.0765, "step": 37015 }, { "epoch": 0.6602218813541184, "grad_norm": 0.36573705077171326, "learning_rate": 1.5615660263652714e-05, "loss": 0.1066, "step": 37016 }, { "epoch": 0.6602397174758321, "grad_norm": 0.2442290037870407, "learning_rate": 1.56142175960531e-05, "loss": 0.1307, "step": 37017 }, { "epoch": 0.6602575535975458, "grad_norm": 0.23664802312850952, "learning_rate": 1.5612774964836026e-05, "loss": 0.1017, "step": 37018 }, { "epoch": 0.6602753897192595, "grad_norm": 0.18912160396575928, "learning_rate": 1.5611332370007076e-05, "loss": 0.0713, "step": 37019 }, { "epoch": 0.6602932258409732, "grad_norm": 0.2211618274450302, "learning_rate": 1.5609889811571846e-05, "loss": 0.1131, "step": 37020 }, { "epoch": 0.6603110619626869, "grad_norm": 0.31006449460983276, "learning_rate": 1.5608447289535915e-05, "loss": 0.1202, "step": 37021 }, { "epoch": 0.6603288980844005, "grad_norm": 0.2946932315826416, "learning_rate": 1.5607004803904902e-05, "loss": 0.1158, "step": 37022 }, { "epoch": 0.6603467342061142, "grad_norm": 0.3183962106704712, "learning_rate": 1.5605562354684372e-05, "loss": 0.1113, "step": 37023 }, { "epoch": 0.6603645703278279, "grad_norm": 0.3085103929042816, "learning_rate": 1.5604119941879936e-05, "loss": 0.0918, "step": 37024 }, { "epoch": 0.6603824064495416, "grad_norm": 0.22984778881072998, "learning_rate": 1.5602677565497166e-05, "loss": 0.0977, "step": 37025 }, { "epoch": 0.6604002425712553, "grad_norm": 0.2717246413230896, "learning_rate": 1.560123522554167e-05, "loss": 0.1374, "step": 37026 }, { "epoch": 0.660418078692969, "grad_norm": 0.21724514663219452, "learning_rate": 1.5599792922019037e-05, "loss": 0.1205, "step": 37027 }, { "epoch": 0.6604359148146827, "grad_norm": 0.25974664092063904, "learning_rate": 1.5598350654934846e-05, "loss": 0.1527, "step": 37028 }, { "epoch": 0.6604537509363964, "grad_norm": 0.3335629105567932, "learning_rate": 1.5596908424294695e-05, "loss": 0.1591, "step": 37029 }, { "epoch": 0.66047158705811, "grad_norm": 0.31999802589416504, "learning_rate": 1.5595466230104178e-05, "loss": 0.1653, "step": 37030 }, { "epoch": 0.6604894231798237, "grad_norm": 0.23707224428653717, "learning_rate": 1.5594024072368878e-05, "loss": 0.1449, "step": 37031 }, { "epoch": 0.6605072593015374, "grad_norm": 0.3162710666656494, "learning_rate": 1.5592581951094392e-05, "loss": 0.1135, "step": 37032 }, { "epoch": 0.6605250954232512, "grad_norm": 0.2938390076160431, "learning_rate": 1.5591139866286315e-05, "loss": 0.106, "step": 37033 }, { "epoch": 0.6605429315449649, "grad_norm": 0.28506210446357727, "learning_rate": 1.558969781795021e-05, "loss": 0.1314, "step": 37034 }, { "epoch": 0.6605607676666786, "grad_norm": 0.22351442277431488, "learning_rate": 1.5588255806091703e-05, "loss": 0.1647, "step": 37035 }, { "epoch": 0.6605786037883923, "grad_norm": 0.25973811745643616, "learning_rate": 1.558681383071637e-05, "loss": 0.1155, "step": 37036 }, { "epoch": 0.660596439910106, "grad_norm": 0.24580179154872894, "learning_rate": 1.5585371891829788e-05, "loss": 0.1224, "step": 37037 }, { "epoch": 0.6606142760318197, "grad_norm": 0.24619829654693604, "learning_rate": 1.5583929989437554e-05, "loss": 0.113, "step": 37038 }, { "epoch": 0.6606321121535333, "grad_norm": 0.22422625124454498, "learning_rate": 1.5582488123545264e-05, "loss": 0.1338, "step": 37039 }, { "epoch": 0.660649948275247, "grad_norm": 0.2383342683315277, "learning_rate": 1.5581046294158504e-05, "loss": 0.0967, "step": 37040 }, { "epoch": 0.6606677843969607, "grad_norm": 0.2628973424434662, "learning_rate": 1.5579604501282867e-05, "loss": 0.1096, "step": 37041 }, { "epoch": 0.6606856205186744, "grad_norm": 0.29809632897377014, "learning_rate": 1.5578162744923934e-05, "loss": 0.1553, "step": 37042 }, { "epoch": 0.6607034566403881, "grad_norm": 0.3029145300388336, "learning_rate": 1.5576721025087283e-05, "loss": 0.1229, "step": 37043 }, { "epoch": 0.6607212927621018, "grad_norm": 0.27769985795021057, "learning_rate": 1.5575279341778532e-05, "loss": 0.1145, "step": 37044 }, { "epoch": 0.6607391288838155, "grad_norm": 0.2558722198009491, "learning_rate": 1.5573837695003243e-05, "loss": 0.1277, "step": 37045 }, { "epoch": 0.6607569650055292, "grad_norm": 0.28375545144081116, "learning_rate": 1.557239608476702e-05, "loss": 0.1434, "step": 37046 }, { "epoch": 0.6607748011272429, "grad_norm": 0.2731107175350189, "learning_rate": 1.5570954511075443e-05, "loss": 0.1328, "step": 37047 }, { "epoch": 0.6607926372489565, "grad_norm": 0.2662769854068756, "learning_rate": 1.5569512973934106e-05, "loss": 0.0956, "step": 37048 }, { "epoch": 0.6608104733706702, "grad_norm": 0.34471702575683594, "learning_rate": 1.5568071473348596e-05, "loss": 0.1335, "step": 37049 }, { "epoch": 0.660828309492384, "grad_norm": 0.22524121403694153, "learning_rate": 1.55666300093245e-05, "loss": 0.071, "step": 37050 }, { "epoch": 0.6608461456140977, "grad_norm": 0.22285377979278564, "learning_rate": 1.556518858186739e-05, "loss": 0.1395, "step": 37051 }, { "epoch": 0.6608639817358114, "grad_norm": 0.27272310853004456, "learning_rate": 1.5563747190982877e-05, "loss": 0.1536, "step": 37052 }, { "epoch": 0.6608818178575251, "grad_norm": 0.2512317895889282, "learning_rate": 1.5562305836676545e-05, "loss": 0.1238, "step": 37053 }, { "epoch": 0.6608996539792388, "grad_norm": 0.20758996903896332, "learning_rate": 1.556086451895397e-05, "loss": 0.1195, "step": 37054 }, { "epoch": 0.6609174901009525, "grad_norm": 0.2207065373659134, "learning_rate": 1.555942323782075e-05, "loss": 0.0982, "step": 37055 }, { "epoch": 0.6609353262226662, "grad_norm": 0.22319954633712769, "learning_rate": 1.5557981993282453e-05, "loss": 0.1404, "step": 37056 }, { "epoch": 0.6609531623443798, "grad_norm": 0.2827005982398987, "learning_rate": 1.555654078534469e-05, "loss": 0.15, "step": 37057 }, { "epoch": 0.6609709984660935, "grad_norm": 0.2364160716533661, "learning_rate": 1.555509961401303e-05, "loss": 0.1656, "step": 37058 }, { "epoch": 0.6609888345878072, "grad_norm": 0.2683470845222473, "learning_rate": 1.5553658479293073e-05, "loss": 0.096, "step": 37059 }, { "epoch": 0.6610066707095209, "grad_norm": 0.23723892867565155, "learning_rate": 1.5552217381190386e-05, "loss": 0.1434, "step": 37060 }, { "epoch": 0.6610245068312346, "grad_norm": 0.30018407106399536, "learning_rate": 1.555077631971058e-05, "loss": 0.1035, "step": 37061 }, { "epoch": 0.6610423429529483, "grad_norm": 0.2841120660305023, "learning_rate": 1.554933529485923e-05, "loss": 0.0937, "step": 37062 }, { "epoch": 0.661060179074662, "grad_norm": 0.2536258101463318, "learning_rate": 1.554789430664192e-05, "loss": 0.1138, "step": 37063 }, { "epoch": 0.6610780151963757, "grad_norm": 0.24432630836963654, "learning_rate": 1.5546453355064223e-05, "loss": 0.0986, "step": 37064 }, { "epoch": 0.6610958513180893, "grad_norm": 0.315021276473999, "learning_rate": 1.554501244013175e-05, "loss": 0.1183, "step": 37065 }, { "epoch": 0.6611136874398031, "grad_norm": 0.25056859850883484, "learning_rate": 1.5543571561850068e-05, "loss": 0.1317, "step": 37066 }, { "epoch": 0.6611315235615168, "grad_norm": 0.2400984764099121, "learning_rate": 1.5542130720224773e-05, "loss": 0.1257, "step": 37067 }, { "epoch": 0.6611493596832305, "grad_norm": 0.197922483086586, "learning_rate": 1.554068991526145e-05, "loss": 0.0692, "step": 37068 }, { "epoch": 0.6611671958049442, "grad_norm": 0.26362791657447815, "learning_rate": 1.5539249146965667e-05, "loss": 0.0845, "step": 37069 }, { "epoch": 0.6611850319266579, "grad_norm": 0.3231179714202881, "learning_rate": 1.5537808415343033e-05, "loss": 0.1395, "step": 37070 }, { "epoch": 0.6612028680483716, "grad_norm": 0.36305803060531616, "learning_rate": 1.5536367720399122e-05, "loss": 0.0883, "step": 37071 }, { "epoch": 0.6612207041700853, "grad_norm": 0.2532045543193817, "learning_rate": 1.5534927062139516e-05, "loss": 0.1132, "step": 37072 }, { "epoch": 0.661238540291799, "grad_norm": 0.2202557772397995, "learning_rate": 1.553348644056979e-05, "loss": 0.0853, "step": 37073 }, { "epoch": 0.6612563764135126, "grad_norm": 0.27331575751304626, "learning_rate": 1.5532045855695553e-05, "loss": 0.0891, "step": 37074 }, { "epoch": 0.6612742125352263, "grad_norm": 0.2807004749774933, "learning_rate": 1.553060530752238e-05, "loss": 0.1072, "step": 37075 }, { "epoch": 0.66129204865694, "grad_norm": 0.2401042878627777, "learning_rate": 1.5529164796055847e-05, "loss": 0.1217, "step": 37076 }, { "epoch": 0.6613098847786537, "grad_norm": 0.24894565343856812, "learning_rate": 1.5527724321301534e-05, "loss": 0.114, "step": 37077 }, { "epoch": 0.6613277209003674, "grad_norm": 0.2939464747905731, "learning_rate": 1.5526283883265043e-05, "loss": 0.1005, "step": 37078 }, { "epoch": 0.6613455570220811, "grad_norm": 0.3336678147315979, "learning_rate": 1.5524843481951944e-05, "loss": 0.2024, "step": 37079 }, { "epoch": 0.6613633931437948, "grad_norm": 0.31239572167396545, "learning_rate": 1.552340311736783e-05, "loss": 0.1907, "step": 37080 }, { "epoch": 0.6613812292655085, "grad_norm": 0.32804518938064575, "learning_rate": 1.5521962789518273e-05, "loss": 0.1463, "step": 37081 }, { "epoch": 0.6613990653872222, "grad_norm": 0.24787935614585876, "learning_rate": 1.552052249840886e-05, "loss": 0.1263, "step": 37082 }, { "epoch": 0.661416901508936, "grad_norm": 0.2364760786294937, "learning_rate": 1.551908224404518e-05, "loss": 0.1311, "step": 37083 }, { "epoch": 0.6614347376306496, "grad_norm": 0.1966044157743454, "learning_rate": 1.5517642026432815e-05, "loss": 0.0789, "step": 37084 }, { "epoch": 0.6614525737523633, "grad_norm": 0.28517889976501465, "learning_rate": 1.551620184557734e-05, "loss": 0.0986, "step": 37085 }, { "epoch": 0.661470409874077, "grad_norm": 0.2824646830558777, "learning_rate": 1.5514761701484338e-05, "loss": 0.1125, "step": 37086 }, { "epoch": 0.6614882459957907, "grad_norm": 0.2853250801563263, "learning_rate": 1.5513321594159396e-05, "loss": 0.111, "step": 37087 }, { "epoch": 0.6615060821175044, "grad_norm": 0.2811721861362457, "learning_rate": 1.5511881523608105e-05, "loss": 0.1776, "step": 37088 }, { "epoch": 0.6615239182392181, "grad_norm": 0.2946011424064636, "learning_rate": 1.5510441489836035e-05, "loss": 0.1037, "step": 37089 }, { "epoch": 0.6615417543609318, "grad_norm": 0.42056822776794434, "learning_rate": 1.5509001492848763e-05, "loss": 0.1161, "step": 37090 }, { "epoch": 0.6615595904826455, "grad_norm": 0.19736723601818085, "learning_rate": 1.550756153265189e-05, "loss": 0.0645, "step": 37091 }, { "epoch": 0.6615774266043591, "grad_norm": 0.22733968496322632, "learning_rate": 1.5506121609250986e-05, "loss": 0.0844, "step": 37092 }, { "epoch": 0.6615952627260728, "grad_norm": 0.30161646008491516, "learning_rate": 1.5504681722651633e-05, "loss": 0.1301, "step": 37093 }, { "epoch": 0.6616130988477865, "grad_norm": 0.2754213511943817, "learning_rate": 1.5503241872859412e-05, "loss": 0.1739, "step": 37094 }, { "epoch": 0.6616309349695002, "grad_norm": 0.33259668946266174, "learning_rate": 1.5501802059879896e-05, "loss": 0.1814, "step": 37095 }, { "epoch": 0.6616487710912139, "grad_norm": 0.21730954945087433, "learning_rate": 1.5500362283718687e-05, "loss": 0.1063, "step": 37096 }, { "epoch": 0.6616666072129276, "grad_norm": 0.2618711590766907, "learning_rate": 1.5498922544381355e-05, "loss": 0.0734, "step": 37097 }, { "epoch": 0.6616844433346413, "grad_norm": 0.2211068868637085, "learning_rate": 1.5497482841873483e-05, "loss": 0.1394, "step": 37098 }, { "epoch": 0.661702279456355, "grad_norm": 0.26202213764190674, "learning_rate": 1.5496043176200637e-05, "loss": 0.1552, "step": 37099 }, { "epoch": 0.6617201155780688, "grad_norm": 0.33530083298683167, "learning_rate": 1.5494603547368415e-05, "loss": 0.1059, "step": 37100 }, { "epoch": 0.6617379516997824, "grad_norm": 0.28246310353279114, "learning_rate": 1.54931639553824e-05, "loss": 0.1759, "step": 37101 }, { "epoch": 0.6617557878214961, "grad_norm": 0.3158358335494995, "learning_rate": 1.5491724400248165e-05, "loss": 0.1016, "step": 37102 }, { "epoch": 0.6617736239432098, "grad_norm": 0.26427677273750305, "learning_rate": 1.549028488197128e-05, "loss": 0.0545, "step": 37103 }, { "epoch": 0.6617914600649235, "grad_norm": 0.2480769008398056, "learning_rate": 1.5488845400557344e-05, "loss": 0.1519, "step": 37104 }, { "epoch": 0.6618092961866372, "grad_norm": 0.2479245662689209, "learning_rate": 1.548740595601193e-05, "loss": 0.1457, "step": 37105 }, { "epoch": 0.6618271323083509, "grad_norm": 0.2954351007938385, "learning_rate": 1.548596654834061e-05, "loss": 0.1114, "step": 37106 }, { "epoch": 0.6618449684300646, "grad_norm": 0.20915383100509644, "learning_rate": 1.5484527177548973e-05, "loss": 0.0931, "step": 37107 }, { "epoch": 0.6618628045517783, "grad_norm": 0.24162811040878296, "learning_rate": 1.5483087843642585e-05, "loss": 0.133, "step": 37108 }, { "epoch": 0.661880640673492, "grad_norm": 0.23137404024600983, "learning_rate": 1.5481648546627046e-05, "loss": 0.0609, "step": 37109 }, { "epoch": 0.6618984767952056, "grad_norm": 0.3720117509365082, "learning_rate": 1.5480209286507928e-05, "loss": 0.1066, "step": 37110 }, { "epoch": 0.6619163129169193, "grad_norm": 0.242386132478714, "learning_rate": 1.5478770063290803e-05, "loss": 0.1062, "step": 37111 }, { "epoch": 0.661934149038633, "grad_norm": 0.28530818223953247, "learning_rate": 1.5477330876981248e-05, "loss": 0.1329, "step": 37112 }, { "epoch": 0.6619519851603467, "grad_norm": 0.32816416025161743, "learning_rate": 1.5475891727584853e-05, "loss": 0.1362, "step": 37113 }, { "epoch": 0.6619698212820604, "grad_norm": 0.32355543971061707, "learning_rate": 1.547445261510719e-05, "loss": 0.1703, "step": 37114 }, { "epoch": 0.6619876574037741, "grad_norm": 0.2863290011882782, "learning_rate": 1.5473013539553843e-05, "loss": 0.0849, "step": 37115 }, { "epoch": 0.6620054935254878, "grad_norm": 0.2806033194065094, "learning_rate": 1.5471574500930374e-05, "loss": 0.0936, "step": 37116 }, { "epoch": 0.6620233296472016, "grad_norm": 0.2882058918476105, "learning_rate": 1.5470135499242383e-05, "loss": 0.1492, "step": 37117 }, { "epoch": 0.6620411657689153, "grad_norm": 0.251399964094162, "learning_rate": 1.546869653449544e-05, "loss": 0.0901, "step": 37118 }, { "epoch": 0.6620590018906289, "grad_norm": 0.26312190294265747, "learning_rate": 1.546725760669512e-05, "loss": 0.1041, "step": 37119 }, { "epoch": 0.6620768380123426, "grad_norm": 0.25243645906448364, "learning_rate": 1.5465818715847e-05, "loss": 0.1312, "step": 37120 }, { "epoch": 0.6620946741340563, "grad_norm": 0.23666419088840485, "learning_rate": 1.5464379861956658e-05, "loss": 0.1287, "step": 37121 }, { "epoch": 0.66211251025577, "grad_norm": 0.2283872812986374, "learning_rate": 1.546294104502967e-05, "loss": 0.1346, "step": 37122 }, { "epoch": 0.6621303463774837, "grad_norm": 0.21539998054504395, "learning_rate": 1.5461502265071625e-05, "loss": 0.1354, "step": 37123 }, { "epoch": 0.6621481824991974, "grad_norm": 0.2578134536743164, "learning_rate": 1.5460063522088093e-05, "loss": 0.1418, "step": 37124 }, { "epoch": 0.6621660186209111, "grad_norm": 0.32014885544776917, "learning_rate": 1.5458624816084634e-05, "loss": 0.1148, "step": 37125 }, { "epoch": 0.6621838547426248, "grad_norm": 0.23078584671020508, "learning_rate": 1.5457186147066854e-05, "loss": 0.1244, "step": 37126 }, { "epoch": 0.6622016908643384, "grad_norm": 0.3438572287559509, "learning_rate": 1.545574751504032e-05, "loss": 0.1541, "step": 37127 }, { "epoch": 0.6622195269860521, "grad_norm": 0.2518502175807953, "learning_rate": 1.5454308920010596e-05, "loss": 0.1101, "step": 37128 }, { "epoch": 0.6622373631077658, "grad_norm": 0.388161838054657, "learning_rate": 1.5452870361983264e-05, "loss": 0.1676, "step": 37129 }, { "epoch": 0.6622551992294795, "grad_norm": 0.2901150584220886, "learning_rate": 1.5451431840963916e-05, "loss": 0.1485, "step": 37130 }, { "epoch": 0.6622730353511932, "grad_norm": 0.23931673169136047, "learning_rate": 1.544999335695811e-05, "loss": 0.071, "step": 37131 }, { "epoch": 0.6622908714729069, "grad_norm": 0.301135390996933, "learning_rate": 1.5448554909971433e-05, "loss": 0.1609, "step": 37132 }, { "epoch": 0.6623087075946206, "grad_norm": 0.266620397567749, "learning_rate": 1.544711650000945e-05, "loss": 0.1494, "step": 37133 }, { "epoch": 0.6623265437163344, "grad_norm": 0.18081001937389374, "learning_rate": 1.544567812707774e-05, "loss": 0.1484, "step": 37134 }, { "epoch": 0.6623443798380481, "grad_norm": 0.22067302465438843, "learning_rate": 1.5444239791181885e-05, "loss": 0.0966, "step": 37135 }, { "epoch": 0.6623622159597617, "grad_norm": 0.2599610388278961, "learning_rate": 1.5442801492327462e-05, "loss": 0.1028, "step": 37136 }, { "epoch": 0.6623800520814754, "grad_norm": 0.20911681652069092, "learning_rate": 1.5441363230520043e-05, "loss": 0.0843, "step": 37137 }, { "epoch": 0.6623978882031891, "grad_norm": 0.306350976228714, "learning_rate": 1.5439925005765187e-05, "loss": 0.1257, "step": 37138 }, { "epoch": 0.6624157243249028, "grad_norm": 0.24656684696674347, "learning_rate": 1.54384868180685e-05, "loss": 0.1086, "step": 37139 }, { "epoch": 0.6624335604466165, "grad_norm": 0.32594624161720276, "learning_rate": 1.5437048667435534e-05, "loss": 0.134, "step": 37140 }, { "epoch": 0.6624513965683302, "grad_norm": 0.5861173272132874, "learning_rate": 1.543561055387187e-05, "loss": 0.1653, "step": 37141 }, { "epoch": 0.6624692326900439, "grad_norm": 0.2312593162059784, "learning_rate": 1.543417247738308e-05, "loss": 0.0929, "step": 37142 }, { "epoch": 0.6624870688117576, "grad_norm": 0.3028632700443268, "learning_rate": 1.543273443797474e-05, "loss": 0.0984, "step": 37143 }, { "epoch": 0.6625049049334713, "grad_norm": 0.27155882120132446, "learning_rate": 1.543129643565243e-05, "loss": 0.1334, "step": 37144 }, { "epoch": 0.6625227410551849, "grad_norm": 0.29891037940979004, "learning_rate": 1.5429858470421727e-05, "loss": 0.132, "step": 37145 }, { "epoch": 0.6625405771768986, "grad_norm": 0.2779270112514496, "learning_rate": 1.5428420542288193e-05, "loss": 0.1685, "step": 37146 }, { "epoch": 0.6625584132986123, "grad_norm": 0.23913122713565826, "learning_rate": 1.54269826512574e-05, "loss": 0.0875, "step": 37147 }, { "epoch": 0.662576249420326, "grad_norm": 0.43862831592559814, "learning_rate": 1.5425544797334933e-05, "loss": 0.1914, "step": 37148 }, { "epoch": 0.6625940855420397, "grad_norm": 0.24144157767295837, "learning_rate": 1.542410698052636e-05, "loss": 0.1196, "step": 37149 }, { "epoch": 0.6626119216637534, "grad_norm": 0.2764016389846802, "learning_rate": 1.542266920083726e-05, "loss": 0.1084, "step": 37150 }, { "epoch": 0.6626297577854672, "grad_norm": 0.2363121211528778, "learning_rate": 1.5421231458273195e-05, "loss": 0.0701, "step": 37151 }, { "epoch": 0.6626475939071809, "grad_norm": 0.2862124443054199, "learning_rate": 1.541979375283975e-05, "loss": 0.1192, "step": 37152 }, { "epoch": 0.6626654300288946, "grad_norm": 0.19498895108699799, "learning_rate": 1.54183560845425e-05, "loss": 0.104, "step": 37153 }, { "epoch": 0.6626832661506082, "grad_norm": 0.2652069628238678, "learning_rate": 1.5416918453387006e-05, "loss": 0.1861, "step": 37154 }, { "epoch": 0.6627011022723219, "grad_norm": 0.22711940109729767, "learning_rate": 1.5415480859378836e-05, "loss": 0.131, "step": 37155 }, { "epoch": 0.6627189383940356, "grad_norm": 0.19824570417404175, "learning_rate": 1.541404330252358e-05, "loss": 0.1005, "step": 37156 }, { "epoch": 0.6627367745157493, "grad_norm": 0.1812756061553955, "learning_rate": 1.5412605782826805e-05, "loss": 0.1215, "step": 37157 }, { "epoch": 0.662754610637463, "grad_norm": 0.2489568591117859, "learning_rate": 1.5411168300294086e-05, "loss": 0.0906, "step": 37158 }, { "epoch": 0.6627724467591767, "grad_norm": 0.2969771921634674, "learning_rate": 1.540973085493099e-05, "loss": 0.191, "step": 37159 }, { "epoch": 0.6627902828808904, "grad_norm": 0.21314893662929535, "learning_rate": 1.5408293446743073e-05, "loss": 0.1119, "step": 37160 }, { "epoch": 0.6628081190026041, "grad_norm": 0.27313944697380066, "learning_rate": 1.5406856075735936e-05, "loss": 0.1526, "step": 37161 }, { "epoch": 0.6628259551243177, "grad_norm": 0.2530764043331146, "learning_rate": 1.5405418741915138e-05, "loss": 0.1398, "step": 37162 }, { "epoch": 0.6628437912460314, "grad_norm": 0.2006148099899292, "learning_rate": 1.5403981445286252e-05, "loss": 0.1064, "step": 37163 }, { "epoch": 0.6628616273677451, "grad_norm": 0.27970704436302185, "learning_rate": 1.540254418585484e-05, "loss": 0.1132, "step": 37164 }, { "epoch": 0.6628794634894588, "grad_norm": 0.27723613381385803, "learning_rate": 1.5401106963626487e-05, "loss": 0.1354, "step": 37165 }, { "epoch": 0.6628972996111725, "grad_norm": 0.24947862327098846, "learning_rate": 1.5399669778606766e-05, "loss": 0.0935, "step": 37166 }, { "epoch": 0.6629151357328863, "grad_norm": 0.3269851803779602, "learning_rate": 1.5398232630801233e-05, "loss": 0.1329, "step": 37167 }, { "epoch": 0.6629329718546, "grad_norm": 0.23876972496509552, "learning_rate": 1.5396795520215464e-05, "loss": 0.1363, "step": 37168 }, { "epoch": 0.6629508079763137, "grad_norm": 0.3513764441013336, "learning_rate": 1.539535844685504e-05, "loss": 0.1736, "step": 37169 }, { "epoch": 0.6629686440980274, "grad_norm": 0.28996118903160095, "learning_rate": 1.539392141072552e-05, "loss": 0.1251, "step": 37170 }, { "epoch": 0.662986480219741, "grad_norm": 0.2390926033258438, "learning_rate": 1.539248441183248e-05, "loss": 0.134, "step": 37171 }, { "epoch": 0.6630043163414547, "grad_norm": 0.30860915780067444, "learning_rate": 1.539104745018149e-05, "loss": 0.1434, "step": 37172 }, { "epoch": 0.6630221524631684, "grad_norm": 0.2948002815246582, "learning_rate": 1.5389610525778107e-05, "loss": 0.1352, "step": 37173 }, { "epoch": 0.6630399885848821, "grad_norm": 0.25885844230651855, "learning_rate": 1.538817363862793e-05, "loss": 0.0972, "step": 37174 }, { "epoch": 0.6630578247065958, "grad_norm": 0.1641605794429779, "learning_rate": 1.5386736788736506e-05, "loss": 0.0861, "step": 37175 }, { "epoch": 0.6630756608283095, "grad_norm": 0.2730233371257782, "learning_rate": 1.538529997610941e-05, "loss": 0.0921, "step": 37176 }, { "epoch": 0.6630934969500232, "grad_norm": 0.20238915085792542, "learning_rate": 1.5383863200752198e-05, "loss": 0.1117, "step": 37177 }, { "epoch": 0.6631113330717369, "grad_norm": 0.3468170762062073, "learning_rate": 1.538242646267047e-05, "loss": 0.1084, "step": 37178 }, { "epoch": 0.6631291691934506, "grad_norm": 0.1766020953655243, "learning_rate": 1.538098976186978e-05, "loss": 0.0938, "step": 37179 }, { "epoch": 0.6631470053151642, "grad_norm": 0.3839959502220154, "learning_rate": 1.5379553098355693e-05, "loss": 0.1266, "step": 37180 }, { "epoch": 0.6631648414368779, "grad_norm": 0.23723424971103668, "learning_rate": 1.5378116472133774e-05, "loss": 0.1335, "step": 37181 }, { "epoch": 0.6631826775585916, "grad_norm": 0.2310997098684311, "learning_rate": 1.5376679883209606e-05, "loss": 0.1107, "step": 37182 }, { "epoch": 0.6632005136803053, "grad_norm": 0.2645007073879242, "learning_rate": 1.5375243331588746e-05, "loss": 0.0872, "step": 37183 }, { "epoch": 0.6632183498020191, "grad_norm": 0.2963446378707886, "learning_rate": 1.5373806817276773e-05, "loss": 0.1009, "step": 37184 }, { "epoch": 0.6632361859237328, "grad_norm": 0.24584689736366272, "learning_rate": 1.537237034027925e-05, "loss": 0.1025, "step": 37185 }, { "epoch": 0.6632540220454465, "grad_norm": 0.24448126554489136, "learning_rate": 1.5370933900601732e-05, "loss": 0.1009, "step": 37186 }, { "epoch": 0.6632718581671602, "grad_norm": 0.23954446613788605, "learning_rate": 1.5369497498249812e-05, "loss": 0.1155, "step": 37187 }, { "epoch": 0.6632896942888739, "grad_norm": 0.23547305166721344, "learning_rate": 1.5368061133229046e-05, "loss": 0.1507, "step": 37188 }, { "epoch": 0.6633075304105875, "grad_norm": 0.25817275047302246, "learning_rate": 1.5366624805544998e-05, "loss": 0.0991, "step": 37189 }, { "epoch": 0.6633253665323012, "grad_norm": 0.24378524720668793, "learning_rate": 1.5365188515203237e-05, "loss": 0.1229, "step": 37190 }, { "epoch": 0.6633432026540149, "grad_norm": 0.2722286880016327, "learning_rate": 1.5363752262209334e-05, "loss": 0.0969, "step": 37191 }, { "epoch": 0.6633610387757286, "grad_norm": 0.2856522500514984, "learning_rate": 1.5362316046568866e-05, "loss": 0.1521, "step": 37192 }, { "epoch": 0.6633788748974423, "grad_norm": 0.28451889753341675, "learning_rate": 1.5360879868287382e-05, "loss": 0.1366, "step": 37193 }, { "epoch": 0.663396711019156, "grad_norm": 0.34682589769363403, "learning_rate": 1.535944372737045e-05, "loss": 0.134, "step": 37194 }, { "epoch": 0.6634145471408697, "grad_norm": 0.44659557938575745, "learning_rate": 1.5358007623823652e-05, "loss": 0.1414, "step": 37195 }, { "epoch": 0.6634323832625834, "grad_norm": 0.2704588770866394, "learning_rate": 1.535657155765255e-05, "loss": 0.1479, "step": 37196 }, { "epoch": 0.663450219384297, "grad_norm": 0.2523496150970459, "learning_rate": 1.53551355288627e-05, "loss": 0.1383, "step": 37197 }, { "epoch": 0.6634680555060107, "grad_norm": 0.2615732252597809, "learning_rate": 1.535369953745968e-05, "loss": 0.1094, "step": 37198 }, { "epoch": 0.6634858916277244, "grad_norm": 0.2392926961183548, "learning_rate": 1.5352263583449044e-05, "loss": 0.1184, "step": 37199 }, { "epoch": 0.6635037277494381, "grad_norm": 0.23568804562091827, "learning_rate": 1.5350827666836377e-05, "loss": 0.1109, "step": 37200 }, { "epoch": 0.6635215638711519, "grad_norm": 0.22187836468219757, "learning_rate": 1.534939178762723e-05, "loss": 0.1619, "step": 37201 }, { "epoch": 0.6635393999928656, "grad_norm": 0.26541996002197266, "learning_rate": 1.534795594582718e-05, "loss": 0.1248, "step": 37202 }, { "epoch": 0.6635572361145793, "grad_norm": 0.2507448196411133, "learning_rate": 1.5346520141441774e-05, "loss": 0.1064, "step": 37203 }, { "epoch": 0.663575072236293, "grad_norm": 0.45367196202278137, "learning_rate": 1.53450843744766e-05, "loss": 0.1273, "step": 37204 }, { "epoch": 0.6635929083580067, "grad_norm": 0.28770527243614197, "learning_rate": 1.534364864493721e-05, "loss": 0.0901, "step": 37205 }, { "epoch": 0.6636107444797203, "grad_norm": 0.2517922520637512, "learning_rate": 1.5342212952829178e-05, "loss": 0.1215, "step": 37206 }, { "epoch": 0.663628580601434, "grad_norm": 0.23865675926208496, "learning_rate": 1.534077729815805e-05, "loss": 0.1325, "step": 37207 }, { "epoch": 0.6636464167231477, "grad_norm": 0.27530112862586975, "learning_rate": 1.533934168092942e-05, "loss": 0.1339, "step": 37208 }, { "epoch": 0.6636642528448614, "grad_norm": 0.212025985121727, "learning_rate": 1.5337906101148837e-05, "loss": 0.1161, "step": 37209 }, { "epoch": 0.6636820889665751, "grad_norm": 0.2298341542482376, "learning_rate": 1.5336470558821865e-05, "loss": 0.1057, "step": 37210 }, { "epoch": 0.6636999250882888, "grad_norm": 0.20415493845939636, "learning_rate": 1.5335035053954068e-05, "loss": 0.1196, "step": 37211 }, { "epoch": 0.6637177612100025, "grad_norm": 0.24964316189289093, "learning_rate": 1.533359958655101e-05, "loss": 0.1169, "step": 37212 }, { "epoch": 0.6637355973317162, "grad_norm": 0.2127217799425125, "learning_rate": 1.5332164156618266e-05, "loss": 0.1157, "step": 37213 }, { "epoch": 0.6637534334534299, "grad_norm": 0.31767717003822327, "learning_rate": 1.5330728764161394e-05, "loss": 0.0895, "step": 37214 }, { "epoch": 0.6637712695751435, "grad_norm": 0.22804713249206543, "learning_rate": 1.5329293409185957e-05, "loss": 0.1074, "step": 37215 }, { "epoch": 0.6637891056968572, "grad_norm": 0.350273996591568, "learning_rate": 1.5327858091697508e-05, "loss": 0.1315, "step": 37216 }, { "epoch": 0.6638069418185709, "grad_norm": 0.26878586411476135, "learning_rate": 1.532642281170163e-05, "loss": 0.1079, "step": 37217 }, { "epoch": 0.6638247779402847, "grad_norm": 0.19158531725406647, "learning_rate": 1.5324987569203878e-05, "loss": 0.0865, "step": 37218 }, { "epoch": 0.6638426140619984, "grad_norm": 0.260093629360199, "learning_rate": 1.532355236420982e-05, "loss": 0.1128, "step": 37219 }, { "epoch": 0.6638604501837121, "grad_norm": 0.31091323494911194, "learning_rate": 1.5322117196725e-05, "loss": 0.1787, "step": 37220 }, { "epoch": 0.6638782863054258, "grad_norm": 0.2746206521987915, "learning_rate": 1.532068206675501e-05, "loss": 0.1502, "step": 37221 }, { "epoch": 0.6638961224271395, "grad_norm": 0.2346046268939972, "learning_rate": 1.53192469743054e-05, "loss": 0.1166, "step": 37222 }, { "epoch": 0.6639139585488532, "grad_norm": 0.3615010976791382, "learning_rate": 1.5317811919381732e-05, "loss": 0.1681, "step": 37223 }, { "epoch": 0.6639317946705668, "grad_norm": 0.22678299248218536, "learning_rate": 1.5316376901989565e-05, "loss": 0.0922, "step": 37224 }, { "epoch": 0.6639496307922805, "grad_norm": 0.22100037336349487, "learning_rate": 1.5314941922134463e-05, "loss": 0.1232, "step": 37225 }, { "epoch": 0.6639674669139942, "grad_norm": 0.3014891445636749, "learning_rate": 1.5313506979821996e-05, "loss": 0.0988, "step": 37226 }, { "epoch": 0.6639853030357079, "grad_norm": 0.31242236495018005, "learning_rate": 1.531207207505772e-05, "loss": 0.1717, "step": 37227 }, { "epoch": 0.6640031391574216, "grad_norm": 0.2907034754753113, "learning_rate": 1.5310637207847204e-05, "loss": 0.1076, "step": 37228 }, { "epoch": 0.6640209752791353, "grad_norm": 0.18815724551677704, "learning_rate": 1.530920237819599e-05, "loss": 0.1239, "step": 37229 }, { "epoch": 0.664038811400849, "grad_norm": 0.22786502540111542, "learning_rate": 1.530776758610967e-05, "loss": 0.0936, "step": 37230 }, { "epoch": 0.6640566475225627, "grad_norm": 0.20799139142036438, "learning_rate": 1.530633283159379e-05, "loss": 0.091, "step": 37231 }, { "epoch": 0.6640744836442763, "grad_norm": 0.3690548837184906, "learning_rate": 1.5304898114653903e-05, "loss": 0.1083, "step": 37232 }, { "epoch": 0.66409231976599, "grad_norm": 0.39600130915641785, "learning_rate": 1.5303463435295577e-05, "loss": 0.1529, "step": 37233 }, { "epoch": 0.6641101558877037, "grad_norm": 0.23733268678188324, "learning_rate": 1.5302028793524385e-05, "loss": 0.1042, "step": 37234 }, { "epoch": 0.6641279920094175, "grad_norm": 0.24712589383125305, "learning_rate": 1.5300594189345878e-05, "loss": 0.1648, "step": 37235 }, { "epoch": 0.6641458281311312, "grad_norm": 0.2119656801223755, "learning_rate": 1.5299159622765618e-05, "loss": 0.1097, "step": 37236 }, { "epoch": 0.6641636642528449, "grad_norm": 0.2934284806251526, "learning_rate": 1.5297725093789166e-05, "loss": 0.1696, "step": 37237 }, { "epoch": 0.6641815003745586, "grad_norm": 0.22951065003871918, "learning_rate": 1.5296290602422074e-05, "loss": 0.1099, "step": 37238 }, { "epoch": 0.6641993364962723, "grad_norm": 0.23003602027893066, "learning_rate": 1.5294856148669916e-05, "loss": 0.1209, "step": 37239 }, { "epoch": 0.664217172617986, "grad_norm": 0.2253616452217102, "learning_rate": 1.5293421732538256e-05, "loss": 0.1133, "step": 37240 }, { "epoch": 0.6642350087396997, "grad_norm": 0.32475483417510986, "learning_rate": 1.5291987354032643e-05, "loss": 0.1652, "step": 37241 }, { "epoch": 0.6642528448614133, "grad_norm": 0.3503333628177643, "learning_rate": 1.529055301315863e-05, "loss": 0.1639, "step": 37242 }, { "epoch": 0.664270680983127, "grad_norm": 0.20953485369682312, "learning_rate": 1.5289118709921794e-05, "loss": 0.0969, "step": 37243 }, { "epoch": 0.6642885171048407, "grad_norm": 0.22294431924819946, "learning_rate": 1.5287684444327693e-05, "loss": 0.1151, "step": 37244 }, { "epoch": 0.6643063532265544, "grad_norm": 0.28788459300994873, "learning_rate": 1.5286250216381874e-05, "loss": 0.1151, "step": 37245 }, { "epoch": 0.6643241893482681, "grad_norm": 0.37174153327941895, "learning_rate": 1.5284816026089906e-05, "loss": 0.1762, "step": 37246 }, { "epoch": 0.6643420254699818, "grad_norm": 0.3149736225605011, "learning_rate": 1.528338187345734e-05, "loss": 0.1407, "step": 37247 }, { "epoch": 0.6643598615916955, "grad_norm": 0.31120601296424866, "learning_rate": 1.5281947758489753e-05, "loss": 0.1104, "step": 37248 }, { "epoch": 0.6643776977134092, "grad_norm": 0.20940278470516205, "learning_rate": 1.5280513681192695e-05, "loss": 0.1242, "step": 37249 }, { "epoch": 0.6643955338351228, "grad_norm": 0.27586403489112854, "learning_rate": 1.5279079641571716e-05, "loss": 0.1004, "step": 37250 }, { "epoch": 0.6644133699568365, "grad_norm": 0.21651116013526917, "learning_rate": 1.5277645639632377e-05, "loss": 0.1126, "step": 37251 }, { "epoch": 0.6644312060785503, "grad_norm": 0.25119549036026, "learning_rate": 1.5276211675380248e-05, "loss": 0.1385, "step": 37252 }, { "epoch": 0.664449042200264, "grad_norm": 0.29774191975593567, "learning_rate": 1.5274777748820878e-05, "loss": 0.1543, "step": 37253 }, { "epoch": 0.6644668783219777, "grad_norm": 0.26036617159843445, "learning_rate": 1.5273343859959835e-05, "loss": 0.1102, "step": 37254 }, { "epoch": 0.6644847144436914, "grad_norm": 0.21600110828876495, "learning_rate": 1.527191000880266e-05, "loss": 0.086, "step": 37255 }, { "epoch": 0.6645025505654051, "grad_norm": 0.29409074783325195, "learning_rate": 1.527047619535493e-05, "loss": 0.1457, "step": 37256 }, { "epoch": 0.6645203866871188, "grad_norm": 0.22073373198509216, "learning_rate": 1.5269042419622196e-05, "loss": 0.0739, "step": 37257 }, { "epoch": 0.6645382228088325, "grad_norm": 0.25438931584358215, "learning_rate": 1.5267608681610013e-05, "loss": 0.0983, "step": 37258 }, { "epoch": 0.6645560589305461, "grad_norm": 0.2637360990047455, "learning_rate": 1.5266174981323934e-05, "loss": 0.1175, "step": 37259 }, { "epoch": 0.6645738950522598, "grad_norm": 0.2647033631801605, "learning_rate": 1.5264741318769522e-05, "loss": 0.1314, "step": 37260 }, { "epoch": 0.6645917311739735, "grad_norm": 0.39525842666625977, "learning_rate": 1.5263307693952344e-05, "loss": 0.1175, "step": 37261 }, { "epoch": 0.6646095672956872, "grad_norm": 0.35060861706733704, "learning_rate": 1.526187410687795e-05, "loss": 0.1691, "step": 37262 }, { "epoch": 0.6646274034174009, "grad_norm": 0.31796735525131226, "learning_rate": 1.5260440557551892e-05, "loss": 0.1187, "step": 37263 }, { "epoch": 0.6646452395391146, "grad_norm": 0.255008727312088, "learning_rate": 1.525900704597972e-05, "loss": 0.1312, "step": 37264 }, { "epoch": 0.6646630756608283, "grad_norm": 0.25454312562942505, "learning_rate": 1.5257573572167011e-05, "loss": 0.1523, "step": 37265 }, { "epoch": 0.664680911782542, "grad_norm": 0.23359286785125732, "learning_rate": 1.525614013611931e-05, "loss": 0.1236, "step": 37266 }, { "epoch": 0.6646987479042556, "grad_norm": 0.32658764719963074, "learning_rate": 1.5254706737842179e-05, "loss": 0.1456, "step": 37267 }, { "epoch": 0.6647165840259694, "grad_norm": 0.1909789741039276, "learning_rate": 1.525327337734116e-05, "loss": 0.0754, "step": 37268 }, { "epoch": 0.6647344201476831, "grad_norm": 0.23027926683425903, "learning_rate": 1.525184005462183e-05, "loss": 0.1032, "step": 37269 }, { "epoch": 0.6647522562693968, "grad_norm": 0.21528609097003937, "learning_rate": 1.5250406769689734e-05, "loss": 0.1168, "step": 37270 }, { "epoch": 0.6647700923911105, "grad_norm": 0.2575491666793823, "learning_rate": 1.5248973522550431e-05, "loss": 0.1291, "step": 37271 }, { "epoch": 0.6647879285128242, "grad_norm": 0.29947635531425476, "learning_rate": 1.524754031320946e-05, "loss": 0.1296, "step": 37272 }, { "epoch": 0.6648057646345379, "grad_norm": 0.2557018995285034, "learning_rate": 1.5246107141672405e-05, "loss": 0.146, "step": 37273 }, { "epoch": 0.6648236007562516, "grad_norm": 0.19982996582984924, "learning_rate": 1.52446740079448e-05, "loss": 0.1089, "step": 37274 }, { "epoch": 0.6648414368779653, "grad_norm": 0.3643823564052582, "learning_rate": 1.5243240912032214e-05, "loss": 0.1353, "step": 37275 }, { "epoch": 0.664859272999679, "grad_norm": 0.26143956184387207, "learning_rate": 1.5241807853940199e-05, "loss": 0.1463, "step": 37276 }, { "epoch": 0.6648771091213926, "grad_norm": 0.3711717128753662, "learning_rate": 1.5240374833674292e-05, "loss": 0.1471, "step": 37277 }, { "epoch": 0.6648949452431063, "grad_norm": 0.23246586322784424, "learning_rate": 1.523894185124008e-05, "loss": 0.0898, "step": 37278 }, { "epoch": 0.66491278136482, "grad_norm": 0.27794933319091797, "learning_rate": 1.5237508906643094e-05, "loss": 0.1653, "step": 37279 }, { "epoch": 0.6649306174865337, "grad_norm": 0.28819993138313293, "learning_rate": 1.5236075999888894e-05, "loss": 0.1708, "step": 37280 }, { "epoch": 0.6649484536082474, "grad_norm": 0.2146557718515396, "learning_rate": 1.5234643130983028e-05, "loss": 0.1228, "step": 37281 }, { "epoch": 0.6649662897299611, "grad_norm": 0.24644805490970612, "learning_rate": 1.5233210299931072e-05, "loss": 0.1597, "step": 37282 }, { "epoch": 0.6649841258516748, "grad_norm": 0.2855125069618225, "learning_rate": 1.5231777506738564e-05, "loss": 0.1579, "step": 37283 }, { "epoch": 0.6650019619733885, "grad_norm": 0.22150960564613342, "learning_rate": 1.5230344751411058e-05, "loss": 0.077, "step": 37284 }, { "epoch": 0.6650197980951023, "grad_norm": 0.25853249430656433, "learning_rate": 1.5228912033954104e-05, "loss": 0.1007, "step": 37285 }, { "epoch": 0.6650376342168159, "grad_norm": 0.19901502132415771, "learning_rate": 1.522747935437327e-05, "loss": 0.1035, "step": 37286 }, { "epoch": 0.6650554703385296, "grad_norm": 0.3312702775001526, "learning_rate": 1.5226046712674096e-05, "loss": 0.1569, "step": 37287 }, { "epoch": 0.6650733064602433, "grad_norm": 0.2932724952697754, "learning_rate": 1.5224614108862145e-05, "loss": 0.1242, "step": 37288 }, { "epoch": 0.665091142581957, "grad_norm": 0.34969285130500793, "learning_rate": 1.5223181542942969e-05, "loss": 0.0715, "step": 37289 }, { "epoch": 0.6651089787036707, "grad_norm": 0.3771221935749054, "learning_rate": 1.5221749014922104e-05, "loss": 0.1118, "step": 37290 }, { "epoch": 0.6651268148253844, "grad_norm": 0.26814377307891846, "learning_rate": 1.522031652480513e-05, "loss": 0.1423, "step": 37291 }, { "epoch": 0.6651446509470981, "grad_norm": 0.25429239869117737, "learning_rate": 1.5218884072597588e-05, "loss": 0.1601, "step": 37292 }, { "epoch": 0.6651624870688118, "grad_norm": 0.29808053374290466, "learning_rate": 1.521745165830502e-05, "loss": 0.1251, "step": 37293 }, { "epoch": 0.6651803231905254, "grad_norm": 0.19373822212219238, "learning_rate": 1.5216019281932994e-05, "loss": 0.1011, "step": 37294 }, { "epoch": 0.6651981593122391, "grad_norm": 0.22746190428733826, "learning_rate": 1.5214586943487052e-05, "loss": 0.1144, "step": 37295 }, { "epoch": 0.6652159954339528, "grad_norm": 0.3558097779750824, "learning_rate": 1.521315464297276e-05, "loss": 0.1155, "step": 37296 }, { "epoch": 0.6652338315556665, "grad_norm": 0.24452945590019226, "learning_rate": 1.5211722380395657e-05, "loss": 0.127, "step": 37297 }, { "epoch": 0.6652516676773802, "grad_norm": 0.3131833076477051, "learning_rate": 1.5210290155761292e-05, "loss": 0.128, "step": 37298 }, { "epoch": 0.6652695037990939, "grad_norm": 0.1862787902355194, "learning_rate": 1.520885796907523e-05, "loss": 0.105, "step": 37299 }, { "epoch": 0.6652873399208076, "grad_norm": 0.22851228713989258, "learning_rate": 1.5207425820343018e-05, "loss": 0.0992, "step": 37300 }, { "epoch": 0.6653051760425213, "grad_norm": 0.27868667244911194, "learning_rate": 1.5205993709570202e-05, "loss": 0.1431, "step": 37301 }, { "epoch": 0.6653230121642351, "grad_norm": 0.22867079079151154, "learning_rate": 1.5204561636762336e-05, "loss": 0.1396, "step": 37302 }, { "epoch": 0.6653408482859487, "grad_norm": 0.2716962695121765, "learning_rate": 1.520312960192497e-05, "loss": 0.1526, "step": 37303 }, { "epoch": 0.6653586844076624, "grad_norm": 0.23462536931037903, "learning_rate": 1.5201697605063667e-05, "loss": 0.1459, "step": 37304 }, { "epoch": 0.6653765205293761, "grad_norm": 0.3461110591888428, "learning_rate": 1.5200265646183965e-05, "loss": 0.1721, "step": 37305 }, { "epoch": 0.6653943566510898, "grad_norm": 0.20824572443962097, "learning_rate": 1.519883372529142e-05, "loss": 0.0859, "step": 37306 }, { "epoch": 0.6654121927728035, "grad_norm": 0.23570944368839264, "learning_rate": 1.5197401842391568e-05, "loss": 0.1198, "step": 37307 }, { "epoch": 0.6654300288945172, "grad_norm": 0.24916639924049377, "learning_rate": 1.5195969997489983e-05, "loss": 0.1244, "step": 37308 }, { "epoch": 0.6654478650162309, "grad_norm": 0.3105781078338623, "learning_rate": 1.5194538190592204e-05, "loss": 0.1539, "step": 37309 }, { "epoch": 0.6654657011379446, "grad_norm": 0.3340488076210022, "learning_rate": 1.519310642170378e-05, "loss": 0.1397, "step": 37310 }, { "epoch": 0.6654835372596583, "grad_norm": 0.27205079793930054, "learning_rate": 1.5191674690830258e-05, "loss": 0.1242, "step": 37311 }, { "epoch": 0.6655013733813719, "grad_norm": 0.2699366807937622, "learning_rate": 1.51902429979772e-05, "loss": 0.1558, "step": 37312 }, { "epoch": 0.6655192095030856, "grad_norm": 0.2082771509885788, "learning_rate": 1.518881134315015e-05, "loss": 0.0981, "step": 37313 }, { "epoch": 0.6655370456247993, "grad_norm": 0.39234793186187744, "learning_rate": 1.5187379726354651e-05, "loss": 0.1204, "step": 37314 }, { "epoch": 0.665554881746513, "grad_norm": 0.2821088135242462, "learning_rate": 1.5185948147596251e-05, "loss": 0.1681, "step": 37315 }, { "epoch": 0.6655727178682267, "grad_norm": 0.33678463101387024, "learning_rate": 1.5184516606880511e-05, "loss": 0.1437, "step": 37316 }, { "epoch": 0.6655905539899404, "grad_norm": 0.2640470266342163, "learning_rate": 1.518308510421298e-05, "loss": 0.1276, "step": 37317 }, { "epoch": 0.6656083901116541, "grad_norm": 0.2837303578853607, "learning_rate": 1.5181653639599202e-05, "loss": 0.1774, "step": 37318 }, { "epoch": 0.6656262262333679, "grad_norm": 0.2199104279279709, "learning_rate": 1.5180222213044725e-05, "loss": 0.1041, "step": 37319 }, { "epoch": 0.6656440623550816, "grad_norm": 0.231767475605011, "learning_rate": 1.5178790824555089e-05, "loss": 0.0982, "step": 37320 }, { "epoch": 0.6656618984767952, "grad_norm": 0.25228768587112427, "learning_rate": 1.517735947413586e-05, "loss": 0.1254, "step": 37321 }, { "epoch": 0.6656797345985089, "grad_norm": 0.173648864030838, "learning_rate": 1.5175928161792574e-05, "loss": 0.1081, "step": 37322 }, { "epoch": 0.6656975707202226, "grad_norm": 0.18578499555587769, "learning_rate": 1.517449688753079e-05, "loss": 0.0601, "step": 37323 }, { "epoch": 0.6657154068419363, "grad_norm": 0.27416977286338806, "learning_rate": 1.5173065651356039e-05, "loss": 0.1344, "step": 37324 }, { "epoch": 0.66573324296365, "grad_norm": 0.28001996874809265, "learning_rate": 1.517163445327389e-05, "loss": 0.0907, "step": 37325 }, { "epoch": 0.6657510790853637, "grad_norm": 0.28628695011138916, "learning_rate": 1.5170203293289878e-05, "loss": 0.1269, "step": 37326 }, { "epoch": 0.6657689152070774, "grad_norm": 0.3076476454734802, "learning_rate": 1.5168772171409556e-05, "loss": 0.1108, "step": 37327 }, { "epoch": 0.6657867513287911, "grad_norm": 0.17745445668697357, "learning_rate": 1.5167341087638457e-05, "loss": 0.1041, "step": 37328 }, { "epoch": 0.6658045874505047, "grad_norm": 0.4940611720085144, "learning_rate": 1.516591004198215e-05, "loss": 0.1069, "step": 37329 }, { "epoch": 0.6658224235722184, "grad_norm": 0.342750608921051, "learning_rate": 1.5164479034446166e-05, "loss": 0.1252, "step": 37330 }, { "epoch": 0.6658402596939321, "grad_norm": 0.3039979040622711, "learning_rate": 1.5163048065036067e-05, "loss": 0.1551, "step": 37331 }, { "epoch": 0.6658580958156458, "grad_norm": 0.2881886065006256, "learning_rate": 1.5161617133757389e-05, "loss": 0.111, "step": 37332 }, { "epoch": 0.6658759319373595, "grad_norm": 0.3448079824447632, "learning_rate": 1.516018624061567e-05, "loss": 0.1385, "step": 37333 }, { "epoch": 0.6658937680590732, "grad_norm": 0.2973722815513611, "learning_rate": 1.515875538561648e-05, "loss": 0.1542, "step": 37334 }, { "epoch": 0.6659116041807869, "grad_norm": 0.2840515077114105, "learning_rate": 1.5157324568765352e-05, "loss": 0.1621, "step": 37335 }, { "epoch": 0.6659294403025007, "grad_norm": 0.2529444694519043, "learning_rate": 1.5155893790067833e-05, "loss": 0.1333, "step": 37336 }, { "epoch": 0.6659472764242144, "grad_norm": 0.30991122126579285, "learning_rate": 1.515446304952946e-05, "loss": 0.1544, "step": 37337 }, { "epoch": 0.665965112545928, "grad_norm": 0.283700555562973, "learning_rate": 1.5153032347155799e-05, "loss": 0.143, "step": 37338 }, { "epoch": 0.6659829486676417, "grad_norm": 0.3248513638973236, "learning_rate": 1.5151601682952384e-05, "loss": 0.1698, "step": 37339 }, { "epoch": 0.6660007847893554, "grad_norm": 0.1941169798374176, "learning_rate": 1.5150171056924766e-05, "loss": 0.1146, "step": 37340 }, { "epoch": 0.6660186209110691, "grad_norm": 0.2569960355758667, "learning_rate": 1.5148740469078476e-05, "loss": 0.1083, "step": 37341 }, { "epoch": 0.6660364570327828, "grad_norm": 0.29788339138031006, "learning_rate": 1.5147309919419078e-05, "loss": 0.1055, "step": 37342 }, { "epoch": 0.6660542931544965, "grad_norm": 0.24948754906654358, "learning_rate": 1.5145879407952107e-05, "loss": 0.1125, "step": 37343 }, { "epoch": 0.6660721292762102, "grad_norm": 0.35152512788772583, "learning_rate": 1.5144448934683113e-05, "loss": 0.1285, "step": 37344 }, { "epoch": 0.6660899653979239, "grad_norm": 0.2097112536430359, "learning_rate": 1.5143018499617645e-05, "loss": 0.129, "step": 37345 }, { "epoch": 0.6661078015196376, "grad_norm": 0.24409887194633484, "learning_rate": 1.5141588102761229e-05, "loss": 0.1279, "step": 37346 }, { "epoch": 0.6661256376413512, "grad_norm": 0.2049250304698944, "learning_rate": 1.5140157744119432e-05, "loss": 0.0879, "step": 37347 }, { "epoch": 0.6661434737630649, "grad_norm": 0.2546696364879608, "learning_rate": 1.5138727423697788e-05, "loss": 0.1083, "step": 37348 }, { "epoch": 0.6661613098847786, "grad_norm": 0.3335992693901062, "learning_rate": 1.5137297141501838e-05, "loss": 0.1343, "step": 37349 }, { "epoch": 0.6661791460064923, "grad_norm": 0.25380873680114746, "learning_rate": 1.5135866897537131e-05, "loss": 0.1332, "step": 37350 }, { "epoch": 0.666196982128206, "grad_norm": 0.2732788026332855, "learning_rate": 1.5134436691809212e-05, "loss": 0.0794, "step": 37351 }, { "epoch": 0.6662148182499197, "grad_norm": 0.33003032207489014, "learning_rate": 1.5133006524323629e-05, "loss": 0.1209, "step": 37352 }, { "epoch": 0.6662326543716335, "grad_norm": 0.256030797958374, "learning_rate": 1.5131576395085916e-05, "loss": 0.1542, "step": 37353 }, { "epoch": 0.6662504904933472, "grad_norm": 0.23467621207237244, "learning_rate": 1.5130146304101616e-05, "loss": 0.1185, "step": 37354 }, { "epoch": 0.6662683266150609, "grad_norm": 0.22649657726287842, "learning_rate": 1.5128716251376285e-05, "loss": 0.0935, "step": 37355 }, { "epoch": 0.6662861627367745, "grad_norm": 0.24411751329898834, "learning_rate": 1.5127286236915461e-05, "loss": 0.127, "step": 37356 }, { "epoch": 0.6663039988584882, "grad_norm": 0.27348047494888306, "learning_rate": 1.512585626072468e-05, "loss": 0.1117, "step": 37357 }, { "epoch": 0.6663218349802019, "grad_norm": 0.2979251444339752, "learning_rate": 1.51244263228095e-05, "loss": 0.1199, "step": 37358 }, { "epoch": 0.6663396711019156, "grad_norm": 0.22357137501239777, "learning_rate": 1.512299642317544e-05, "loss": 0.0629, "step": 37359 }, { "epoch": 0.6663575072236293, "grad_norm": 0.299640029668808, "learning_rate": 1.5121566561828065e-05, "loss": 0.1337, "step": 37360 }, { "epoch": 0.666375343345343, "grad_norm": 0.2895599901676178, "learning_rate": 1.5120136738772914e-05, "loss": 0.171, "step": 37361 }, { "epoch": 0.6663931794670567, "grad_norm": 0.29385510087013245, "learning_rate": 1.5118706954015524e-05, "loss": 0.098, "step": 37362 }, { "epoch": 0.6664110155887704, "grad_norm": 0.2517750561237335, "learning_rate": 1.5117277207561428e-05, "loss": 0.1021, "step": 37363 }, { "epoch": 0.666428851710484, "grad_norm": 0.2462937831878662, "learning_rate": 1.5115847499416186e-05, "loss": 0.0917, "step": 37364 }, { "epoch": 0.6664466878321977, "grad_norm": 0.2444039285182953, "learning_rate": 1.5114417829585336e-05, "loss": 0.1111, "step": 37365 }, { "epoch": 0.6664645239539114, "grad_norm": 0.27546748518943787, "learning_rate": 1.5112988198074418e-05, "loss": 0.1172, "step": 37366 }, { "epoch": 0.6664823600756251, "grad_norm": 0.2604631781578064, "learning_rate": 1.511155860488896e-05, "loss": 0.1736, "step": 37367 }, { "epoch": 0.6665001961973388, "grad_norm": 0.45146656036376953, "learning_rate": 1.511012905003453e-05, "loss": 0.1038, "step": 37368 }, { "epoch": 0.6665180323190526, "grad_norm": 0.2320471704006195, "learning_rate": 1.5108699533516652e-05, "loss": 0.1395, "step": 37369 }, { "epoch": 0.6665358684407663, "grad_norm": 0.17088526487350464, "learning_rate": 1.5107270055340869e-05, "loss": 0.0862, "step": 37370 }, { "epoch": 0.66655370456248, "grad_norm": 0.18300753831863403, "learning_rate": 1.5105840615512728e-05, "loss": 0.1143, "step": 37371 }, { "epoch": 0.6665715406841937, "grad_norm": 0.3051344156265259, "learning_rate": 1.5104411214037756e-05, "loss": 0.1457, "step": 37372 }, { "epoch": 0.6665893768059074, "grad_norm": 0.21027489006519318, "learning_rate": 1.5102981850921515e-05, "loss": 0.1067, "step": 37373 }, { "epoch": 0.666607212927621, "grad_norm": 0.2476443201303482, "learning_rate": 1.5101552526169533e-05, "loss": 0.125, "step": 37374 }, { "epoch": 0.6666250490493347, "grad_norm": 0.2863208055496216, "learning_rate": 1.5100123239787352e-05, "loss": 0.1456, "step": 37375 }, { "epoch": 0.6666428851710484, "grad_norm": 0.19317860901355743, "learning_rate": 1.5098693991780504e-05, "loss": 0.0869, "step": 37376 }, { "epoch": 0.6666607212927621, "grad_norm": 0.2885204553604126, "learning_rate": 1.5097264782154547e-05, "loss": 0.1105, "step": 37377 }, { "epoch": 0.6666785574144758, "grad_norm": 0.26823538541793823, "learning_rate": 1.509583561091501e-05, "loss": 0.1138, "step": 37378 }, { "epoch": 0.6666963935361895, "grad_norm": 0.23470832407474518, "learning_rate": 1.5094406478067436e-05, "loss": 0.1386, "step": 37379 }, { "epoch": 0.6667142296579032, "grad_norm": 0.2544650137424469, "learning_rate": 1.5092977383617357e-05, "loss": 0.0663, "step": 37380 }, { "epoch": 0.6667320657796169, "grad_norm": 0.27774569392204285, "learning_rate": 1.5091548327570327e-05, "loss": 0.1469, "step": 37381 }, { "epoch": 0.6667499019013305, "grad_norm": 0.23503939807415009, "learning_rate": 1.5090119309931878e-05, "loss": 0.1154, "step": 37382 }, { "epoch": 0.6667677380230442, "grad_norm": 0.2723938226699829, "learning_rate": 1.508869033070755e-05, "loss": 0.1223, "step": 37383 }, { "epoch": 0.6667855741447579, "grad_norm": 0.2845105528831482, "learning_rate": 1.5087261389902876e-05, "loss": 0.1532, "step": 37384 }, { "epoch": 0.6668034102664716, "grad_norm": 0.24683833122253418, "learning_rate": 1.5085832487523399e-05, "loss": 0.1496, "step": 37385 }, { "epoch": 0.6668212463881854, "grad_norm": 0.28045058250427246, "learning_rate": 1.5084403623574664e-05, "loss": 0.1389, "step": 37386 }, { "epoch": 0.6668390825098991, "grad_norm": 0.3507380187511444, "learning_rate": 1.5082974798062208e-05, "loss": 0.1226, "step": 37387 }, { "epoch": 0.6668569186316128, "grad_norm": 0.2572086751461029, "learning_rate": 1.5081546010991566e-05, "loss": 0.1087, "step": 37388 }, { "epoch": 0.6668747547533265, "grad_norm": 0.2823340892791748, "learning_rate": 1.5080117262368269e-05, "loss": 0.154, "step": 37389 }, { "epoch": 0.6668925908750402, "grad_norm": 0.21687743067741394, "learning_rate": 1.5078688552197872e-05, "loss": 0.1105, "step": 37390 }, { "epoch": 0.6669104269967538, "grad_norm": 0.324459433555603, "learning_rate": 1.50772598804859e-05, "loss": 0.1171, "step": 37391 }, { "epoch": 0.6669282631184675, "grad_norm": 0.29453200101852417, "learning_rate": 1.5075831247237905e-05, "loss": 0.1638, "step": 37392 }, { "epoch": 0.6669460992401812, "grad_norm": 0.24993082880973816, "learning_rate": 1.5074402652459401e-05, "loss": 0.0735, "step": 37393 }, { "epoch": 0.6669639353618949, "grad_norm": 0.23562286794185638, "learning_rate": 1.5072974096155951e-05, "loss": 0.1473, "step": 37394 }, { "epoch": 0.6669817714836086, "grad_norm": 0.3867155611515045, "learning_rate": 1.5071545578333084e-05, "loss": 0.1459, "step": 37395 }, { "epoch": 0.6669996076053223, "grad_norm": 0.2661600708961487, "learning_rate": 1.5070117098996333e-05, "loss": 0.1373, "step": 37396 }, { "epoch": 0.667017443727036, "grad_norm": 0.24916689097881317, "learning_rate": 1.5068688658151236e-05, "loss": 0.113, "step": 37397 }, { "epoch": 0.6670352798487497, "grad_norm": 0.29195889830589294, "learning_rate": 1.5067260255803328e-05, "loss": 0.1025, "step": 37398 }, { "epoch": 0.6670531159704634, "grad_norm": 0.31373482942581177, "learning_rate": 1.5065831891958154e-05, "loss": 0.1264, "step": 37399 }, { "epoch": 0.667070952092177, "grad_norm": 0.3319110870361328, "learning_rate": 1.506440356662125e-05, "loss": 0.1112, "step": 37400 }, { "epoch": 0.6670887882138907, "grad_norm": 0.2719859480857849, "learning_rate": 1.5062975279798147e-05, "loss": 0.1612, "step": 37401 }, { "epoch": 0.6671066243356044, "grad_norm": 0.28437674045562744, "learning_rate": 1.5061547031494377e-05, "loss": 0.1255, "step": 37402 }, { "epoch": 0.6671244604573182, "grad_norm": 0.33990347385406494, "learning_rate": 1.5060118821715494e-05, "loss": 0.1567, "step": 37403 }, { "epoch": 0.6671422965790319, "grad_norm": 0.2851102352142334, "learning_rate": 1.5058690650467022e-05, "loss": 0.1356, "step": 37404 }, { "epoch": 0.6671601327007456, "grad_norm": 0.2860081195831299, "learning_rate": 1.5057262517754495e-05, "loss": 0.1412, "step": 37405 }, { "epoch": 0.6671779688224593, "grad_norm": 0.2555182874202728, "learning_rate": 1.505583442358345e-05, "loss": 0.1107, "step": 37406 }, { "epoch": 0.667195804944173, "grad_norm": 0.35433822870254517, "learning_rate": 1.5054406367959428e-05, "loss": 0.1064, "step": 37407 }, { "epoch": 0.6672136410658867, "grad_norm": 0.27691397070884705, "learning_rate": 1.5052978350887963e-05, "loss": 0.1711, "step": 37408 }, { "epoch": 0.6672314771876003, "grad_norm": 0.22487658262252808, "learning_rate": 1.5051550372374596e-05, "loss": 0.1222, "step": 37409 }, { "epoch": 0.667249313309314, "grad_norm": 0.23512691259384155, "learning_rate": 1.5050122432424852e-05, "loss": 0.162, "step": 37410 }, { "epoch": 0.6672671494310277, "grad_norm": 0.2749473452568054, "learning_rate": 1.5048694531044261e-05, "loss": 0.1995, "step": 37411 }, { "epoch": 0.6672849855527414, "grad_norm": 0.22493837773799896, "learning_rate": 1.5047266668238374e-05, "loss": 0.1129, "step": 37412 }, { "epoch": 0.6673028216744551, "grad_norm": 0.23057669401168823, "learning_rate": 1.5045838844012716e-05, "loss": 0.0628, "step": 37413 }, { "epoch": 0.6673206577961688, "grad_norm": 0.29819393157958984, "learning_rate": 1.5044411058372831e-05, "loss": 0.1287, "step": 37414 }, { "epoch": 0.6673384939178825, "grad_norm": 0.21266312897205353, "learning_rate": 1.5042983311324236e-05, "loss": 0.1094, "step": 37415 }, { "epoch": 0.6673563300395962, "grad_norm": 0.286541223526001, "learning_rate": 1.5041555602872487e-05, "loss": 0.1372, "step": 37416 }, { "epoch": 0.6673741661613098, "grad_norm": 0.25222474336624146, "learning_rate": 1.5040127933023108e-05, "loss": 0.1015, "step": 37417 }, { "epoch": 0.6673920022830235, "grad_norm": 0.24851714074611664, "learning_rate": 1.5038700301781627e-05, "loss": 0.1526, "step": 37418 }, { "epoch": 0.6674098384047372, "grad_norm": 0.26983392238616943, "learning_rate": 1.5037272709153587e-05, "loss": 0.1368, "step": 37419 }, { "epoch": 0.667427674526451, "grad_norm": 0.2393546849489212, "learning_rate": 1.5035845155144517e-05, "loss": 0.1037, "step": 37420 }, { "epoch": 0.6674455106481647, "grad_norm": 0.2689007520675659, "learning_rate": 1.5034417639759956e-05, "loss": 0.1096, "step": 37421 }, { "epoch": 0.6674633467698784, "grad_norm": 0.20499089360237122, "learning_rate": 1.5032990163005439e-05, "loss": 0.1121, "step": 37422 }, { "epoch": 0.6674811828915921, "grad_norm": 0.23303039371967316, "learning_rate": 1.503156272488649e-05, "loss": 0.1179, "step": 37423 }, { "epoch": 0.6674990190133058, "grad_norm": 0.24827894568443298, "learning_rate": 1.503013532540864e-05, "loss": 0.0918, "step": 37424 }, { "epoch": 0.6675168551350195, "grad_norm": 0.25609734654426575, "learning_rate": 1.5028707964577437e-05, "loss": 0.0972, "step": 37425 }, { "epoch": 0.6675346912567331, "grad_norm": 0.30394768714904785, "learning_rate": 1.5027280642398401e-05, "loss": 0.1581, "step": 37426 }, { "epoch": 0.6675525273784468, "grad_norm": 0.20294636487960815, "learning_rate": 1.5025853358877076e-05, "loss": 0.1018, "step": 37427 }, { "epoch": 0.6675703635001605, "grad_norm": 0.274728000164032, "learning_rate": 1.5024426114018977e-05, "loss": 0.126, "step": 37428 }, { "epoch": 0.6675881996218742, "grad_norm": 0.2742089331150055, "learning_rate": 1.5022998907829658e-05, "loss": 0.1272, "step": 37429 }, { "epoch": 0.6676060357435879, "grad_norm": 0.23590481281280518, "learning_rate": 1.5021571740314644e-05, "loss": 0.119, "step": 37430 }, { "epoch": 0.6676238718653016, "grad_norm": 0.3870883285999298, "learning_rate": 1.5020144611479458e-05, "loss": 0.1061, "step": 37431 }, { "epoch": 0.6676417079870153, "grad_norm": 0.21080902218818665, "learning_rate": 1.5018717521329633e-05, "loss": 0.1259, "step": 37432 }, { "epoch": 0.667659544108729, "grad_norm": 0.28261756896972656, "learning_rate": 1.5017290469870718e-05, "loss": 0.1083, "step": 37433 }, { "epoch": 0.6676773802304427, "grad_norm": 0.20457933843135834, "learning_rate": 1.5015863457108225e-05, "loss": 0.0788, "step": 37434 }, { "epoch": 0.6676952163521563, "grad_norm": 0.2747241258621216, "learning_rate": 1.50144364830477e-05, "loss": 0.1056, "step": 37435 }, { "epoch": 0.66771305247387, "grad_norm": 0.32505983114242554, "learning_rate": 1.5013009547694668e-05, "loss": 0.1596, "step": 37436 }, { "epoch": 0.6677308885955838, "grad_norm": 0.3339903652667999, "learning_rate": 1.501158265105465e-05, "loss": 0.1605, "step": 37437 }, { "epoch": 0.6677487247172975, "grad_norm": 0.23056331276893616, "learning_rate": 1.5010155793133196e-05, "loss": 0.1439, "step": 37438 }, { "epoch": 0.6677665608390112, "grad_norm": 0.3030514419078827, "learning_rate": 1.5008728973935832e-05, "loss": 0.1267, "step": 37439 }, { "epoch": 0.6677843969607249, "grad_norm": 0.2898868918418884, "learning_rate": 1.5007302193468076e-05, "loss": 0.1573, "step": 37440 }, { "epoch": 0.6678022330824386, "grad_norm": 0.24350489675998688, "learning_rate": 1.5005875451735466e-05, "loss": 0.0958, "step": 37441 }, { "epoch": 0.6678200692041523, "grad_norm": 0.3009876310825348, "learning_rate": 1.5004448748743543e-05, "loss": 0.1374, "step": 37442 }, { "epoch": 0.667837905325866, "grad_norm": 0.2699940800666809, "learning_rate": 1.500302208449783e-05, "loss": 0.1459, "step": 37443 }, { "epoch": 0.6678557414475796, "grad_norm": 0.3586570918560028, "learning_rate": 1.500159545900386e-05, "loss": 0.126, "step": 37444 }, { "epoch": 0.6678735775692933, "grad_norm": 0.2170565277338028, "learning_rate": 1.5000168872267143e-05, "loss": 0.0849, "step": 37445 }, { "epoch": 0.667891413691007, "grad_norm": 0.24756716191768646, "learning_rate": 1.499874232429324e-05, "loss": 0.1256, "step": 37446 }, { "epoch": 0.6679092498127207, "grad_norm": 0.263731449842453, "learning_rate": 1.4997315815087656e-05, "loss": 0.1356, "step": 37447 }, { "epoch": 0.6679270859344344, "grad_norm": 0.32832127809524536, "learning_rate": 1.4995889344655939e-05, "loss": 0.1379, "step": 37448 }, { "epoch": 0.6679449220561481, "grad_norm": 0.1897130012512207, "learning_rate": 1.499446291300361e-05, "loss": 0.0969, "step": 37449 }, { "epoch": 0.6679627581778618, "grad_norm": 0.25627386569976807, "learning_rate": 1.4993036520136189e-05, "loss": 0.1066, "step": 37450 }, { "epoch": 0.6679805942995755, "grad_norm": 0.29881152510643005, "learning_rate": 1.4991610166059222e-05, "loss": 0.0961, "step": 37451 }, { "epoch": 0.6679984304212891, "grad_norm": 0.2977713942527771, "learning_rate": 1.4990183850778233e-05, "loss": 0.149, "step": 37452 }, { "epoch": 0.6680162665430028, "grad_norm": 0.2646772563457489, "learning_rate": 1.4988757574298745e-05, "loss": 0.0877, "step": 37453 }, { "epoch": 0.6680341026647166, "grad_norm": 0.24207238852977753, "learning_rate": 1.498733133662629e-05, "loss": 0.1714, "step": 37454 }, { "epoch": 0.6680519387864303, "grad_norm": 0.17280913889408112, "learning_rate": 1.4985905137766396e-05, "loss": 0.071, "step": 37455 }, { "epoch": 0.668069774908144, "grad_norm": 0.21218106150627136, "learning_rate": 1.4984478977724598e-05, "loss": 0.1087, "step": 37456 }, { "epoch": 0.6680876110298577, "grad_norm": 0.32297825813293457, "learning_rate": 1.4983052856506419e-05, "loss": 0.1698, "step": 37457 }, { "epoch": 0.6681054471515714, "grad_norm": 0.2423536479473114, "learning_rate": 1.4981626774117375e-05, "loss": 0.1154, "step": 37458 }, { "epoch": 0.6681232832732851, "grad_norm": 0.26053690910339355, "learning_rate": 1.4980200730563016e-05, "loss": 0.1514, "step": 37459 }, { "epoch": 0.6681411193949988, "grad_norm": 0.29639753699302673, "learning_rate": 1.497877472584886e-05, "loss": 0.1771, "step": 37460 }, { "epoch": 0.6681589555167124, "grad_norm": 0.28842461109161377, "learning_rate": 1.4977348759980431e-05, "loss": 0.1221, "step": 37461 }, { "epoch": 0.6681767916384261, "grad_norm": 0.29308587312698364, "learning_rate": 1.4975922832963263e-05, "loss": 0.1149, "step": 37462 }, { "epoch": 0.6681946277601398, "grad_norm": 0.2799105644226074, "learning_rate": 1.4974496944802873e-05, "loss": 0.1297, "step": 37463 }, { "epoch": 0.6682124638818535, "grad_norm": 0.3396545350551605, "learning_rate": 1.4973071095504804e-05, "loss": 0.0983, "step": 37464 }, { "epoch": 0.6682303000035672, "grad_norm": 0.3781348168849945, "learning_rate": 1.4971645285074573e-05, "loss": 0.1468, "step": 37465 }, { "epoch": 0.6682481361252809, "grad_norm": 0.32027482986450195, "learning_rate": 1.497021951351771e-05, "loss": 0.1794, "step": 37466 }, { "epoch": 0.6682659722469946, "grad_norm": 0.2661987543106079, "learning_rate": 1.4968793780839729e-05, "loss": 0.127, "step": 37467 }, { "epoch": 0.6682838083687083, "grad_norm": 0.26616767048835754, "learning_rate": 1.4967368087046173e-05, "loss": 0.131, "step": 37468 }, { "epoch": 0.668301644490422, "grad_norm": 0.24877332150936127, "learning_rate": 1.496594243214257e-05, "loss": 0.1112, "step": 37469 }, { "epoch": 0.6683194806121358, "grad_norm": 0.4432615339756012, "learning_rate": 1.4964516816134443e-05, "loss": 0.1462, "step": 37470 }, { "epoch": 0.6683373167338494, "grad_norm": 0.2551150918006897, "learning_rate": 1.49630912390273e-05, "loss": 0.1285, "step": 37471 }, { "epoch": 0.6683551528555631, "grad_norm": 0.24343405663967133, "learning_rate": 1.4961665700826694e-05, "loss": 0.1049, "step": 37472 }, { "epoch": 0.6683729889772768, "grad_norm": 0.3006855547428131, "learning_rate": 1.496024020153814e-05, "loss": 0.1246, "step": 37473 }, { "epoch": 0.6683908250989905, "grad_norm": 0.2872680723667145, "learning_rate": 1.4958814741167154e-05, "loss": 0.1437, "step": 37474 }, { "epoch": 0.6684086612207042, "grad_norm": 0.24515847861766815, "learning_rate": 1.4957389319719278e-05, "loss": 0.1271, "step": 37475 }, { "epoch": 0.6684264973424179, "grad_norm": 0.2896585464477539, "learning_rate": 1.4955963937200019e-05, "loss": 0.1064, "step": 37476 }, { "epoch": 0.6684443334641316, "grad_norm": 0.2942967116832733, "learning_rate": 1.4954538593614922e-05, "loss": 0.1402, "step": 37477 }, { "epoch": 0.6684621695858453, "grad_norm": 0.264669805765152, "learning_rate": 1.4953113288969503e-05, "loss": 0.1538, "step": 37478 }, { "epoch": 0.6684800057075589, "grad_norm": 0.2579793632030487, "learning_rate": 1.495168802326929e-05, "loss": 0.1263, "step": 37479 }, { "epoch": 0.6684978418292726, "grad_norm": 0.23053547739982605, "learning_rate": 1.495026279651979e-05, "loss": 0.088, "step": 37480 }, { "epoch": 0.6685156779509863, "grad_norm": 0.2720099687576294, "learning_rate": 1.4948837608726558e-05, "loss": 0.1734, "step": 37481 }, { "epoch": 0.6685335140727, "grad_norm": 0.21285288035869598, "learning_rate": 1.4947412459895093e-05, "loss": 0.126, "step": 37482 }, { "epoch": 0.6685513501944137, "grad_norm": 0.23870287835597992, "learning_rate": 1.4945987350030934e-05, "loss": 0.0877, "step": 37483 }, { "epoch": 0.6685691863161274, "grad_norm": 0.2172452062368393, "learning_rate": 1.4944562279139596e-05, "loss": 0.0836, "step": 37484 }, { "epoch": 0.6685870224378411, "grad_norm": 0.27493658661842346, "learning_rate": 1.4943137247226614e-05, "loss": 0.1008, "step": 37485 }, { "epoch": 0.6686048585595548, "grad_norm": 0.2739734947681427, "learning_rate": 1.4941712254297504e-05, "loss": 0.1229, "step": 37486 }, { "epoch": 0.6686226946812686, "grad_norm": 0.21329782903194427, "learning_rate": 1.4940287300357794e-05, "loss": 0.0987, "step": 37487 }, { "epoch": 0.6686405308029822, "grad_norm": 0.34743237495422363, "learning_rate": 1.4938862385413e-05, "loss": 0.1747, "step": 37488 }, { "epoch": 0.6686583669246959, "grad_norm": 0.3650725185871124, "learning_rate": 1.4937437509468649e-05, "loss": 0.0952, "step": 37489 }, { "epoch": 0.6686762030464096, "grad_norm": 0.3573608100414276, "learning_rate": 1.4936012672530264e-05, "loss": 0.1652, "step": 37490 }, { "epoch": 0.6686940391681233, "grad_norm": 0.28686070442199707, "learning_rate": 1.4934587874603379e-05, "loss": 0.1206, "step": 37491 }, { "epoch": 0.668711875289837, "grad_norm": 0.2425878793001175, "learning_rate": 1.4933163115693505e-05, "loss": 0.0896, "step": 37492 }, { "epoch": 0.6687297114115507, "grad_norm": 0.2659091055393219, "learning_rate": 1.493173839580616e-05, "loss": 0.1164, "step": 37493 }, { "epoch": 0.6687475475332644, "grad_norm": 0.3583313822746277, "learning_rate": 1.4930313714946884e-05, "loss": 0.168, "step": 37494 }, { "epoch": 0.6687653836549781, "grad_norm": 0.2514447271823883, "learning_rate": 1.4928889073121183e-05, "loss": 0.0971, "step": 37495 }, { "epoch": 0.6687832197766918, "grad_norm": 0.25375911593437195, "learning_rate": 1.4927464470334592e-05, "loss": 0.1044, "step": 37496 }, { "epoch": 0.6688010558984054, "grad_norm": 0.21628263592720032, "learning_rate": 1.492603990659262e-05, "loss": 0.1277, "step": 37497 }, { "epoch": 0.6688188920201191, "grad_norm": 0.2946949005126953, "learning_rate": 1.4924615381900803e-05, "loss": 0.1275, "step": 37498 }, { "epoch": 0.6688367281418328, "grad_norm": 0.2594294250011444, "learning_rate": 1.4923190896264661e-05, "loss": 0.1148, "step": 37499 }, { "epoch": 0.6688545642635465, "grad_norm": 0.25827738642692566, "learning_rate": 1.4921766449689709e-05, "loss": 0.1141, "step": 37500 }, { "epoch": 0.6688724003852602, "grad_norm": 0.27559417486190796, "learning_rate": 1.4920342042181468e-05, "loss": 0.1147, "step": 37501 }, { "epoch": 0.6688902365069739, "grad_norm": 0.17399421334266663, "learning_rate": 1.4918917673745458e-05, "loss": 0.1275, "step": 37502 }, { "epoch": 0.6689080726286876, "grad_norm": 0.2777664065361023, "learning_rate": 1.491749334438721e-05, "loss": 0.1374, "step": 37503 }, { "epoch": 0.6689259087504014, "grad_norm": 0.2537831962108612, "learning_rate": 1.4916069054112242e-05, "loss": 0.1382, "step": 37504 }, { "epoch": 0.668943744872115, "grad_norm": 0.23989103734493256, "learning_rate": 1.4914644802926075e-05, "loss": 0.1101, "step": 37505 }, { "epoch": 0.6689615809938287, "grad_norm": 0.27971115708351135, "learning_rate": 1.4913220590834218e-05, "loss": 0.1088, "step": 37506 }, { "epoch": 0.6689794171155424, "grad_norm": 0.27331462502479553, "learning_rate": 1.4911796417842211e-05, "loss": 0.1273, "step": 37507 }, { "epoch": 0.6689972532372561, "grad_norm": 0.2122882902622223, "learning_rate": 1.4910372283955565e-05, "loss": 0.1375, "step": 37508 }, { "epoch": 0.6690150893589698, "grad_norm": 0.2488301545381546, "learning_rate": 1.49089481891798e-05, "loss": 0.135, "step": 37509 }, { "epoch": 0.6690329254806835, "grad_norm": 0.30871787667274475, "learning_rate": 1.490752413352043e-05, "loss": 0.1519, "step": 37510 }, { "epoch": 0.6690507616023972, "grad_norm": 0.276216596364975, "learning_rate": 1.4906100116982988e-05, "loss": 0.1311, "step": 37511 }, { "epoch": 0.6690685977241109, "grad_norm": 0.315306156873703, "learning_rate": 1.4904676139572992e-05, "loss": 0.1344, "step": 37512 }, { "epoch": 0.6690864338458246, "grad_norm": 0.29117345809936523, "learning_rate": 1.4903252201295959e-05, "loss": 0.1151, "step": 37513 }, { "epoch": 0.6691042699675382, "grad_norm": 0.3025839328765869, "learning_rate": 1.4901828302157406e-05, "loss": 0.1338, "step": 37514 }, { "epoch": 0.6691221060892519, "grad_norm": 0.3293962776660919, "learning_rate": 1.4900404442162843e-05, "loss": 0.1179, "step": 37515 }, { "epoch": 0.6691399422109656, "grad_norm": 0.3022843301296234, "learning_rate": 1.4898980621317813e-05, "loss": 0.0851, "step": 37516 }, { "epoch": 0.6691577783326793, "grad_norm": 0.20449146628379822, "learning_rate": 1.4897556839627818e-05, "loss": 0.0773, "step": 37517 }, { "epoch": 0.669175614454393, "grad_norm": 0.26335957646369934, "learning_rate": 1.4896133097098385e-05, "loss": 0.1209, "step": 37518 }, { "epoch": 0.6691934505761067, "grad_norm": 0.37627536058425903, "learning_rate": 1.489470939373502e-05, "loss": 0.1152, "step": 37519 }, { "epoch": 0.6692112866978204, "grad_norm": 0.21723400056362152, "learning_rate": 1.4893285729543263e-05, "loss": 0.0907, "step": 37520 }, { "epoch": 0.6692291228195342, "grad_norm": 0.25401490926742554, "learning_rate": 1.4891862104528619e-05, "loss": 0.0872, "step": 37521 }, { "epoch": 0.6692469589412479, "grad_norm": 0.2620532512664795, "learning_rate": 1.4890438518696608e-05, "loss": 0.127, "step": 37522 }, { "epoch": 0.6692647950629615, "grad_norm": 0.3761610984802246, "learning_rate": 1.4889014972052745e-05, "loss": 0.1748, "step": 37523 }, { "epoch": 0.6692826311846752, "grad_norm": 0.27166199684143066, "learning_rate": 1.4887591464602557e-05, "loss": 0.1638, "step": 37524 }, { "epoch": 0.6693004673063889, "grad_norm": 0.27220532298088074, "learning_rate": 1.4886167996351557e-05, "loss": 0.0843, "step": 37525 }, { "epoch": 0.6693183034281026, "grad_norm": 0.4786548614501953, "learning_rate": 1.4884744567305265e-05, "loss": 0.0943, "step": 37526 }, { "epoch": 0.6693361395498163, "grad_norm": 0.46628257632255554, "learning_rate": 1.4883321177469197e-05, "loss": 0.1853, "step": 37527 }, { "epoch": 0.66935397567153, "grad_norm": 0.2542572617530823, "learning_rate": 1.4881897826848861e-05, "loss": 0.0616, "step": 37528 }, { "epoch": 0.6693718117932437, "grad_norm": 0.21073196828365326, "learning_rate": 1.4880474515449794e-05, "loss": 0.0873, "step": 37529 }, { "epoch": 0.6693896479149574, "grad_norm": 0.2993996739387512, "learning_rate": 1.4879051243277497e-05, "loss": 0.1544, "step": 37530 }, { "epoch": 0.669407484036671, "grad_norm": 0.26914775371551514, "learning_rate": 1.4877628010337496e-05, "loss": 0.1671, "step": 37531 }, { "epoch": 0.6694253201583847, "grad_norm": 0.2967449426651001, "learning_rate": 1.4876204816635297e-05, "loss": 0.1226, "step": 37532 }, { "epoch": 0.6694431562800984, "grad_norm": 0.3515028655529022, "learning_rate": 1.4874781662176434e-05, "loss": 0.066, "step": 37533 }, { "epoch": 0.6694609924018121, "grad_norm": 0.3658161759376526, "learning_rate": 1.4873358546966415e-05, "loss": 0.164, "step": 37534 }, { "epoch": 0.6694788285235258, "grad_norm": 0.26576781272888184, "learning_rate": 1.4871935471010756e-05, "loss": 0.1076, "step": 37535 }, { "epoch": 0.6694966646452395, "grad_norm": 0.366319477558136, "learning_rate": 1.4870512434314965e-05, "loss": 0.1495, "step": 37536 }, { "epoch": 0.6695145007669532, "grad_norm": 0.2648712992668152, "learning_rate": 1.4869089436884576e-05, "loss": 0.0945, "step": 37537 }, { "epoch": 0.669532336888667, "grad_norm": 0.227285236120224, "learning_rate": 1.4867666478725087e-05, "loss": 0.129, "step": 37538 }, { "epoch": 0.6695501730103807, "grad_norm": 0.24719049036502838, "learning_rate": 1.486624355984203e-05, "loss": 0.1423, "step": 37539 }, { "epoch": 0.6695680091320944, "grad_norm": 0.3355978727340698, "learning_rate": 1.4864820680240913e-05, "loss": 0.1006, "step": 37540 }, { "epoch": 0.669585845253808, "grad_norm": 0.2794986069202423, "learning_rate": 1.486339783992724e-05, "loss": 0.0564, "step": 37541 }, { "epoch": 0.6696036813755217, "grad_norm": 0.3182423412799835, "learning_rate": 1.486197503890655e-05, "loss": 0.1518, "step": 37542 }, { "epoch": 0.6696215174972354, "grad_norm": 0.2631544768810272, "learning_rate": 1.4860552277184344e-05, "loss": 0.1127, "step": 37543 }, { "epoch": 0.6696393536189491, "grad_norm": 0.2074270248413086, "learning_rate": 1.4859129554766139e-05, "loss": 0.1505, "step": 37544 }, { "epoch": 0.6696571897406628, "grad_norm": 0.2757417857646942, "learning_rate": 1.485770687165744e-05, "loss": 0.0953, "step": 37545 }, { "epoch": 0.6696750258623765, "grad_norm": 0.3053838908672333, "learning_rate": 1.4856284227863784e-05, "loss": 0.1253, "step": 37546 }, { "epoch": 0.6696928619840902, "grad_norm": 0.32925012707710266, "learning_rate": 1.4854861623390676e-05, "loss": 0.0962, "step": 37547 }, { "epoch": 0.6697106981058039, "grad_norm": 0.23687401413917542, "learning_rate": 1.4853439058243626e-05, "loss": 0.1458, "step": 37548 }, { "epoch": 0.6697285342275175, "grad_norm": 0.2599889636039734, "learning_rate": 1.4852016532428143e-05, "loss": 0.1104, "step": 37549 }, { "epoch": 0.6697463703492312, "grad_norm": 0.35102471709251404, "learning_rate": 1.4850594045949757e-05, "loss": 0.1561, "step": 37550 }, { "epoch": 0.6697642064709449, "grad_norm": 0.24412554502487183, "learning_rate": 1.4849171598813966e-05, "loss": 0.1223, "step": 37551 }, { "epoch": 0.6697820425926586, "grad_norm": 0.228230282664299, "learning_rate": 1.48477491910263e-05, "loss": 0.1014, "step": 37552 }, { "epoch": 0.6697998787143723, "grad_norm": 0.3021867275238037, "learning_rate": 1.4846326822592265e-05, "loss": 0.091, "step": 37553 }, { "epoch": 0.669817714836086, "grad_norm": 0.2875548303127289, "learning_rate": 1.4844904493517364e-05, "loss": 0.1095, "step": 37554 }, { "epoch": 0.6698355509577998, "grad_norm": 0.29142051935195923, "learning_rate": 1.4843482203807132e-05, "loss": 0.0867, "step": 37555 }, { "epoch": 0.6698533870795135, "grad_norm": 0.4533658027648926, "learning_rate": 1.484205995346707e-05, "loss": 0.1635, "step": 37556 }, { "epoch": 0.6698712232012272, "grad_norm": 0.2554011344909668, "learning_rate": 1.4840637742502686e-05, "loss": 0.1144, "step": 37557 }, { "epoch": 0.6698890593229408, "grad_norm": 0.3522297143936157, "learning_rate": 1.4839215570919501e-05, "loss": 0.1675, "step": 37558 }, { "epoch": 0.6699068954446545, "grad_norm": 0.22275668382644653, "learning_rate": 1.4837793438723024e-05, "loss": 0.1349, "step": 37559 }, { "epoch": 0.6699247315663682, "grad_norm": 0.2554265558719635, "learning_rate": 1.4836371345918775e-05, "loss": 0.1304, "step": 37560 }, { "epoch": 0.6699425676880819, "grad_norm": 0.3709714710712433, "learning_rate": 1.4834949292512259e-05, "loss": 0.1173, "step": 37561 }, { "epoch": 0.6699604038097956, "grad_norm": 0.30835068225860596, "learning_rate": 1.4833527278508985e-05, "loss": 0.2022, "step": 37562 }, { "epoch": 0.6699782399315093, "grad_norm": 0.30311504006385803, "learning_rate": 1.4832105303914478e-05, "loss": 0.1439, "step": 37563 }, { "epoch": 0.669996076053223, "grad_norm": 0.2063443958759308, "learning_rate": 1.4830683368734244e-05, "loss": 0.1062, "step": 37564 }, { "epoch": 0.6700139121749367, "grad_norm": 0.43419355154037476, "learning_rate": 1.4829261472973784e-05, "loss": 0.1415, "step": 37565 }, { "epoch": 0.6700317482966504, "grad_norm": 0.28292039036750793, "learning_rate": 1.4827839616638628e-05, "loss": 0.0943, "step": 37566 }, { "epoch": 0.670049584418364, "grad_norm": 0.2872313857078552, "learning_rate": 1.482641779973427e-05, "loss": 0.1559, "step": 37567 }, { "epoch": 0.6700674205400777, "grad_norm": 0.2834457457065582, "learning_rate": 1.4824996022266242e-05, "loss": 0.1601, "step": 37568 }, { "epoch": 0.6700852566617914, "grad_norm": 0.21420888602733612, "learning_rate": 1.4823574284240037e-05, "loss": 0.0839, "step": 37569 }, { "epoch": 0.6701030927835051, "grad_norm": 0.3201926052570343, "learning_rate": 1.482215258566118e-05, "loss": 0.2149, "step": 37570 }, { "epoch": 0.6701209289052189, "grad_norm": 0.2816302180290222, "learning_rate": 1.4820730926535159e-05, "loss": 0.1251, "step": 37571 }, { "epoch": 0.6701387650269326, "grad_norm": 0.4552721679210663, "learning_rate": 1.481930930686751e-05, "loss": 0.1166, "step": 37572 }, { "epoch": 0.6701566011486463, "grad_norm": 0.2416018694639206, "learning_rate": 1.4817887726663736e-05, "loss": 0.112, "step": 37573 }, { "epoch": 0.67017443727036, "grad_norm": 0.30778974294662476, "learning_rate": 1.4816466185929351e-05, "loss": 0.1872, "step": 37574 }, { "epoch": 0.6701922733920737, "grad_norm": 0.2449854463338852, "learning_rate": 1.4815044684669848e-05, "loss": 0.0933, "step": 37575 }, { "epoch": 0.6702101095137873, "grad_norm": 0.21616952121257782, "learning_rate": 1.4813623222890758e-05, "loss": 0.12, "step": 37576 }, { "epoch": 0.670227945635501, "grad_norm": 0.24348503351211548, "learning_rate": 1.4812201800597583e-05, "loss": 0.1092, "step": 37577 }, { "epoch": 0.6702457817572147, "grad_norm": 0.37442681193351746, "learning_rate": 1.4810780417795827e-05, "loss": 0.1085, "step": 37578 }, { "epoch": 0.6702636178789284, "grad_norm": 0.28844305872917175, "learning_rate": 1.4809359074491014e-05, "loss": 0.1168, "step": 37579 }, { "epoch": 0.6702814540006421, "grad_norm": 0.29024919867515564, "learning_rate": 1.4807937770688632e-05, "loss": 0.1468, "step": 37580 }, { "epoch": 0.6702992901223558, "grad_norm": 0.34914064407348633, "learning_rate": 1.4806516506394214e-05, "loss": 0.1353, "step": 37581 }, { "epoch": 0.6703171262440695, "grad_norm": 0.232282817363739, "learning_rate": 1.4805095281613257e-05, "loss": 0.1102, "step": 37582 }, { "epoch": 0.6703349623657832, "grad_norm": 0.28244367241859436, "learning_rate": 1.4803674096351274e-05, "loss": 0.1173, "step": 37583 }, { "epoch": 0.6703527984874968, "grad_norm": 0.3104051947593689, "learning_rate": 1.4802252950613763e-05, "loss": 0.1219, "step": 37584 }, { "epoch": 0.6703706346092105, "grad_norm": 0.3874141573905945, "learning_rate": 1.480083184440625e-05, "loss": 0.1066, "step": 37585 }, { "epoch": 0.6703884707309242, "grad_norm": 0.26109880208969116, "learning_rate": 1.4799410777734229e-05, "loss": 0.1437, "step": 37586 }, { "epoch": 0.6704063068526379, "grad_norm": 0.33147236704826355, "learning_rate": 1.4797989750603223e-05, "loss": 0.1077, "step": 37587 }, { "epoch": 0.6704241429743517, "grad_norm": 0.28374454379081726, "learning_rate": 1.479656876301872e-05, "loss": 0.1403, "step": 37588 }, { "epoch": 0.6704419790960654, "grad_norm": 0.26651573181152344, "learning_rate": 1.4795147814986254e-05, "loss": 0.1233, "step": 37589 }, { "epoch": 0.6704598152177791, "grad_norm": 0.24789181351661682, "learning_rate": 1.4793726906511316e-05, "loss": 0.1185, "step": 37590 }, { "epoch": 0.6704776513394928, "grad_norm": 0.26695364713668823, "learning_rate": 1.4792306037599419e-05, "loss": 0.1285, "step": 37591 }, { "epoch": 0.6704954874612065, "grad_norm": 0.2959669828414917, "learning_rate": 1.4790885208256064e-05, "loss": 0.0921, "step": 37592 }, { "epoch": 0.6705133235829202, "grad_norm": 0.26789140701293945, "learning_rate": 1.4789464418486764e-05, "loss": 0.1107, "step": 37593 }, { "epoch": 0.6705311597046338, "grad_norm": 0.2850610017776489, "learning_rate": 1.4788043668297027e-05, "loss": 0.1426, "step": 37594 }, { "epoch": 0.6705489958263475, "grad_norm": 0.3007853925228119, "learning_rate": 1.4786622957692364e-05, "loss": 0.12, "step": 37595 }, { "epoch": 0.6705668319480612, "grad_norm": 0.24679973721504211, "learning_rate": 1.478520228667828e-05, "loss": 0.1015, "step": 37596 }, { "epoch": 0.6705846680697749, "grad_norm": 0.17484255135059357, "learning_rate": 1.478378165526027e-05, "loss": 0.0678, "step": 37597 }, { "epoch": 0.6706025041914886, "grad_norm": 0.3607724905014038, "learning_rate": 1.4782361063443858e-05, "loss": 0.1119, "step": 37598 }, { "epoch": 0.6706203403132023, "grad_norm": 0.25202685594558716, "learning_rate": 1.4780940511234542e-05, "loss": 0.1121, "step": 37599 }, { "epoch": 0.670638176434916, "grad_norm": 0.25669777393341064, "learning_rate": 1.4779519998637833e-05, "loss": 0.1373, "step": 37600 }, { "epoch": 0.6706560125566297, "grad_norm": 0.29675689339637756, "learning_rate": 1.4778099525659225e-05, "loss": 0.1726, "step": 37601 }, { "epoch": 0.6706738486783433, "grad_norm": 0.2819582223892212, "learning_rate": 1.4776679092304244e-05, "loss": 0.1096, "step": 37602 }, { "epoch": 0.670691684800057, "grad_norm": 0.26130354404449463, "learning_rate": 1.4775258698578388e-05, "loss": 0.1066, "step": 37603 }, { "epoch": 0.6707095209217707, "grad_norm": 0.28545647859573364, "learning_rate": 1.4773838344487156e-05, "loss": 0.1283, "step": 37604 }, { "epoch": 0.6707273570434845, "grad_norm": 0.24724625051021576, "learning_rate": 1.4772418030036056e-05, "loss": 0.1517, "step": 37605 }, { "epoch": 0.6707451931651982, "grad_norm": 0.2551341950893402, "learning_rate": 1.4770997755230598e-05, "loss": 0.1013, "step": 37606 }, { "epoch": 0.6707630292869119, "grad_norm": 0.3370712697505951, "learning_rate": 1.4769577520076284e-05, "loss": 0.1375, "step": 37607 }, { "epoch": 0.6707808654086256, "grad_norm": 0.35645297169685364, "learning_rate": 1.4768157324578624e-05, "loss": 0.1083, "step": 37608 }, { "epoch": 0.6707987015303393, "grad_norm": 0.36033329367637634, "learning_rate": 1.4766737168743122e-05, "loss": 0.1439, "step": 37609 }, { "epoch": 0.670816537652053, "grad_norm": 0.21240262687206268, "learning_rate": 1.4765317052575273e-05, "loss": 0.1099, "step": 37610 }, { "epoch": 0.6708343737737666, "grad_norm": 0.2624545395374298, "learning_rate": 1.4763896976080595e-05, "loss": 0.127, "step": 37611 }, { "epoch": 0.6708522098954803, "grad_norm": 0.30583328008651733, "learning_rate": 1.4762476939264592e-05, "loss": 0.1148, "step": 37612 }, { "epoch": 0.670870046017194, "grad_norm": 0.2538442611694336, "learning_rate": 1.476105694213276e-05, "loss": 0.1191, "step": 37613 }, { "epoch": 0.6708878821389077, "grad_norm": 0.26179274916648865, "learning_rate": 1.4759636984690606e-05, "loss": 0.1357, "step": 37614 }, { "epoch": 0.6709057182606214, "grad_norm": 0.22708620131015778, "learning_rate": 1.4758217066943636e-05, "loss": 0.1486, "step": 37615 }, { "epoch": 0.6709235543823351, "grad_norm": 0.22707833349704742, "learning_rate": 1.4756797188897359e-05, "loss": 0.133, "step": 37616 }, { "epoch": 0.6709413905040488, "grad_norm": 0.3958587348461151, "learning_rate": 1.4755377350557274e-05, "loss": 0.2082, "step": 37617 }, { "epoch": 0.6709592266257625, "grad_norm": 0.2120359241962433, "learning_rate": 1.4753957551928881e-05, "loss": 0.117, "step": 37618 }, { "epoch": 0.6709770627474761, "grad_norm": 0.28987663984298706, "learning_rate": 1.4752537793017684e-05, "loss": 0.1308, "step": 37619 }, { "epoch": 0.6709948988691898, "grad_norm": 0.21637216210365295, "learning_rate": 1.4751118073829195e-05, "loss": 0.1628, "step": 37620 }, { "epoch": 0.6710127349909035, "grad_norm": 0.30588361620903015, "learning_rate": 1.4749698394368911e-05, "loss": 0.1438, "step": 37621 }, { "epoch": 0.6710305711126173, "grad_norm": 0.24781139194965363, "learning_rate": 1.4748278754642341e-05, "loss": 0.1043, "step": 37622 }, { "epoch": 0.671048407234331, "grad_norm": 0.27530962228775024, "learning_rate": 1.474685915465497e-05, "loss": 0.1183, "step": 37623 }, { "epoch": 0.6710662433560447, "grad_norm": 0.315487265586853, "learning_rate": 1.4745439594412328e-05, "loss": 0.1818, "step": 37624 }, { "epoch": 0.6710840794777584, "grad_norm": 0.3093649744987488, "learning_rate": 1.47440200739199e-05, "loss": 0.1105, "step": 37625 }, { "epoch": 0.6711019155994721, "grad_norm": 0.21223153173923492, "learning_rate": 1.4742600593183192e-05, "loss": 0.1227, "step": 37626 }, { "epoch": 0.6711197517211858, "grad_norm": 0.23524074256420135, "learning_rate": 1.4741181152207702e-05, "loss": 0.0975, "step": 37627 }, { "epoch": 0.6711375878428995, "grad_norm": 0.20990169048309326, "learning_rate": 1.4739761750998943e-05, "loss": 0.1123, "step": 37628 }, { "epoch": 0.6711554239646131, "grad_norm": 0.31458204984664917, "learning_rate": 1.473834238956241e-05, "loss": 0.1226, "step": 37629 }, { "epoch": 0.6711732600863268, "grad_norm": 0.3979092538356781, "learning_rate": 1.473692306790361e-05, "loss": 0.18, "step": 37630 }, { "epoch": 0.6711910962080405, "grad_norm": 0.32381775975227356, "learning_rate": 1.473550378602804e-05, "loss": 0.1098, "step": 37631 }, { "epoch": 0.6712089323297542, "grad_norm": 0.2998827397823334, "learning_rate": 1.4734084543941192e-05, "loss": 0.139, "step": 37632 }, { "epoch": 0.6712267684514679, "grad_norm": 0.24762079119682312, "learning_rate": 1.4732665341648587e-05, "loss": 0.1503, "step": 37633 }, { "epoch": 0.6712446045731816, "grad_norm": 0.20519985258579254, "learning_rate": 1.4731246179155716e-05, "loss": 0.1049, "step": 37634 }, { "epoch": 0.6712624406948953, "grad_norm": 0.24733471870422363, "learning_rate": 1.4729827056468088e-05, "loss": 0.1011, "step": 37635 }, { "epoch": 0.671280276816609, "grad_norm": 0.3979268968105316, "learning_rate": 1.472840797359118e-05, "loss": 0.1097, "step": 37636 }, { "epoch": 0.6712981129383226, "grad_norm": 0.35445818305015564, "learning_rate": 1.4726988930530528e-05, "loss": 0.1076, "step": 37637 }, { "epoch": 0.6713159490600363, "grad_norm": 0.22412709891796112, "learning_rate": 1.472556992729161e-05, "loss": 0.1164, "step": 37638 }, { "epoch": 0.6713337851817501, "grad_norm": 0.2835678458213806, "learning_rate": 1.4724150963879935e-05, "loss": 0.1383, "step": 37639 }, { "epoch": 0.6713516213034638, "grad_norm": 0.26894861459732056, "learning_rate": 1.4722732040300985e-05, "loss": 0.1286, "step": 37640 }, { "epoch": 0.6713694574251775, "grad_norm": 0.29006555676460266, "learning_rate": 1.4721313156560291e-05, "loss": 0.1593, "step": 37641 }, { "epoch": 0.6713872935468912, "grad_norm": 0.2659665048122406, "learning_rate": 1.471989431266333e-05, "loss": 0.1596, "step": 37642 }, { "epoch": 0.6714051296686049, "grad_norm": 0.22495977580547333, "learning_rate": 1.4718475508615612e-05, "loss": 0.1216, "step": 37643 }, { "epoch": 0.6714229657903186, "grad_norm": 0.23820440471172333, "learning_rate": 1.4717056744422635e-05, "loss": 0.1599, "step": 37644 }, { "epoch": 0.6714408019120323, "grad_norm": 0.29072341322898865, "learning_rate": 1.471563802008989e-05, "loss": 0.102, "step": 37645 }, { "epoch": 0.671458638033746, "grad_norm": 0.2867191433906555, "learning_rate": 1.4714219335622891e-05, "loss": 0.1816, "step": 37646 }, { "epoch": 0.6714764741554596, "grad_norm": 0.4542645514011383, "learning_rate": 1.4712800691027135e-05, "loss": 0.1742, "step": 37647 }, { "epoch": 0.6714943102771733, "grad_norm": 0.33744609355926514, "learning_rate": 1.4711382086308109e-05, "loss": 0.0977, "step": 37648 }, { "epoch": 0.671512146398887, "grad_norm": 0.16860638558864594, "learning_rate": 1.4709963521471314e-05, "loss": 0.095, "step": 37649 }, { "epoch": 0.6715299825206007, "grad_norm": 0.244256392121315, "learning_rate": 1.4708544996522267e-05, "loss": 0.07, "step": 37650 }, { "epoch": 0.6715478186423144, "grad_norm": 0.1727474480867386, "learning_rate": 1.4707126511466452e-05, "loss": 0.1388, "step": 37651 }, { "epoch": 0.6715656547640281, "grad_norm": 0.27268147468566895, "learning_rate": 1.470570806630937e-05, "loss": 0.1342, "step": 37652 }, { "epoch": 0.6715834908857418, "grad_norm": 0.23442377150058746, "learning_rate": 1.4704289661056509e-05, "loss": 0.1139, "step": 37653 }, { "epoch": 0.6716013270074555, "grad_norm": 0.34102222323417664, "learning_rate": 1.4702871295713386e-05, "loss": 0.1338, "step": 37654 }, { "epoch": 0.6716191631291691, "grad_norm": 0.35570889711380005, "learning_rate": 1.4701452970285487e-05, "loss": 0.1748, "step": 37655 }, { "epoch": 0.6716369992508829, "grad_norm": 0.2313218116760254, "learning_rate": 1.4700034684778319e-05, "loss": 0.1355, "step": 37656 }, { "epoch": 0.6716548353725966, "grad_norm": 0.24994917213916779, "learning_rate": 1.4698616439197372e-05, "loss": 0.0958, "step": 37657 }, { "epoch": 0.6716726714943103, "grad_norm": 0.3116813600063324, "learning_rate": 1.4697198233548137e-05, "loss": 0.2071, "step": 37658 }, { "epoch": 0.671690507616024, "grad_norm": 0.2499520629644394, "learning_rate": 1.4695780067836129e-05, "loss": 0.0825, "step": 37659 }, { "epoch": 0.6717083437377377, "grad_norm": 0.24971351027488708, "learning_rate": 1.4694361942066836e-05, "loss": 0.0953, "step": 37660 }, { "epoch": 0.6717261798594514, "grad_norm": 0.2825527489185333, "learning_rate": 1.4692943856245752e-05, "loss": 0.1155, "step": 37661 }, { "epoch": 0.6717440159811651, "grad_norm": 0.26531845331192017, "learning_rate": 1.4691525810378379e-05, "loss": 0.1456, "step": 37662 }, { "epoch": 0.6717618521028788, "grad_norm": 0.26592448353767395, "learning_rate": 1.469010780447021e-05, "loss": 0.1072, "step": 37663 }, { "epoch": 0.6717796882245924, "grad_norm": 0.22369691729545593, "learning_rate": 1.4688689838526751e-05, "loss": 0.0864, "step": 37664 }, { "epoch": 0.6717975243463061, "grad_norm": 0.19722050428390503, "learning_rate": 1.468727191255349e-05, "loss": 0.0697, "step": 37665 }, { "epoch": 0.6718153604680198, "grad_norm": 0.2788572311401367, "learning_rate": 1.4685854026555915e-05, "loss": 0.2088, "step": 37666 }, { "epoch": 0.6718331965897335, "grad_norm": 0.20969094336032867, "learning_rate": 1.4684436180539543e-05, "loss": 0.096, "step": 37667 }, { "epoch": 0.6718510327114472, "grad_norm": 0.25735971331596375, "learning_rate": 1.4683018374509857e-05, "loss": 0.1556, "step": 37668 }, { "epoch": 0.6718688688331609, "grad_norm": 0.2810255289077759, "learning_rate": 1.4681600608472352e-05, "loss": 0.1574, "step": 37669 }, { "epoch": 0.6718867049548746, "grad_norm": 0.21014846861362457, "learning_rate": 1.4680182882432534e-05, "loss": 0.0914, "step": 37670 }, { "epoch": 0.6719045410765883, "grad_norm": 0.22909945249557495, "learning_rate": 1.4678765196395876e-05, "loss": 0.0959, "step": 37671 }, { "epoch": 0.6719223771983021, "grad_norm": 0.25569790601730347, "learning_rate": 1.4677347550367904e-05, "loss": 0.1828, "step": 37672 }, { "epoch": 0.6719402133200157, "grad_norm": 0.22032766044139862, "learning_rate": 1.4675929944354098e-05, "loss": 0.12, "step": 37673 }, { "epoch": 0.6719580494417294, "grad_norm": 0.3262254297733307, "learning_rate": 1.467451237835995e-05, "loss": 0.1647, "step": 37674 }, { "epoch": 0.6719758855634431, "grad_norm": 0.24804438650608063, "learning_rate": 1.4673094852390951e-05, "loss": 0.1138, "step": 37675 }, { "epoch": 0.6719937216851568, "grad_norm": 0.27605322003364563, "learning_rate": 1.4671677366452607e-05, "loss": 0.1046, "step": 37676 }, { "epoch": 0.6720115578068705, "grad_norm": 0.23179854452610016, "learning_rate": 1.4670259920550417e-05, "loss": 0.079, "step": 37677 }, { "epoch": 0.6720293939285842, "grad_norm": 0.23570826649665833, "learning_rate": 1.4668842514689863e-05, "loss": 0.1103, "step": 37678 }, { "epoch": 0.6720472300502979, "grad_norm": 0.21014539897441864, "learning_rate": 1.4667425148876435e-05, "loss": 0.1246, "step": 37679 }, { "epoch": 0.6720650661720116, "grad_norm": 0.2720472514629364, "learning_rate": 1.4666007823115646e-05, "loss": 0.1531, "step": 37680 }, { "epoch": 0.6720829022937252, "grad_norm": 0.2559182941913605, "learning_rate": 1.466459053741298e-05, "loss": 0.1738, "step": 37681 }, { "epoch": 0.6721007384154389, "grad_norm": 0.2380245476961136, "learning_rate": 1.4663173291773924e-05, "loss": 0.0951, "step": 37682 }, { "epoch": 0.6721185745371526, "grad_norm": 0.3904139995574951, "learning_rate": 1.4661756086203987e-05, "loss": 0.1241, "step": 37683 }, { "epoch": 0.6721364106588663, "grad_norm": 0.2362600564956665, "learning_rate": 1.4660338920708644e-05, "loss": 0.1296, "step": 37684 }, { "epoch": 0.67215424678058, "grad_norm": 0.22976341843605042, "learning_rate": 1.4658921795293407e-05, "loss": 0.0897, "step": 37685 }, { "epoch": 0.6721720829022937, "grad_norm": 0.3275347352027893, "learning_rate": 1.4657504709963759e-05, "loss": 0.1434, "step": 37686 }, { "epoch": 0.6721899190240074, "grad_norm": 0.27674782276153564, "learning_rate": 1.4656087664725199e-05, "loss": 0.1039, "step": 37687 }, { "epoch": 0.6722077551457211, "grad_norm": 0.2540251612663269, "learning_rate": 1.4654670659583203e-05, "loss": 0.1368, "step": 37688 }, { "epoch": 0.6722255912674349, "grad_norm": 0.23808301985263824, "learning_rate": 1.4653253694543283e-05, "loss": 0.1324, "step": 37689 }, { "epoch": 0.6722434273891486, "grad_norm": 0.32675492763519287, "learning_rate": 1.4651836769610927e-05, "loss": 0.1093, "step": 37690 }, { "epoch": 0.6722612635108622, "grad_norm": 0.24685119092464447, "learning_rate": 1.4650419884791628e-05, "loss": 0.1304, "step": 37691 }, { "epoch": 0.6722790996325759, "grad_norm": 0.2423398494720459, "learning_rate": 1.4649003040090867e-05, "loss": 0.1824, "step": 37692 }, { "epoch": 0.6722969357542896, "grad_norm": 0.39113450050354004, "learning_rate": 1.4647586235514155e-05, "loss": 0.1382, "step": 37693 }, { "epoch": 0.6723147718760033, "grad_norm": 0.27027618885040283, "learning_rate": 1.4646169471066973e-05, "loss": 0.0794, "step": 37694 }, { "epoch": 0.672332607997717, "grad_norm": 0.2594882547855377, "learning_rate": 1.4644752746754817e-05, "loss": 0.1707, "step": 37695 }, { "epoch": 0.6723504441194307, "grad_norm": 0.17667639255523682, "learning_rate": 1.4643336062583168e-05, "loss": 0.1104, "step": 37696 }, { "epoch": 0.6723682802411444, "grad_norm": 0.21792756021022797, "learning_rate": 1.464191941855753e-05, "loss": 0.1102, "step": 37697 }, { "epoch": 0.672386116362858, "grad_norm": 0.2616289556026459, "learning_rate": 1.4640502814683385e-05, "loss": 0.157, "step": 37698 }, { "epoch": 0.6724039524845717, "grad_norm": 0.23419159650802612, "learning_rate": 1.4639086250966238e-05, "loss": 0.0988, "step": 37699 }, { "epoch": 0.6724217886062854, "grad_norm": 0.2555391788482666, "learning_rate": 1.4637669727411569e-05, "loss": 0.167, "step": 37700 }, { "epoch": 0.6724396247279991, "grad_norm": 0.23203125596046448, "learning_rate": 1.4636253244024861e-05, "loss": 0.1292, "step": 37701 }, { "epoch": 0.6724574608497128, "grad_norm": 0.24973362684249878, "learning_rate": 1.4634836800811624e-05, "loss": 0.1655, "step": 37702 }, { "epoch": 0.6724752969714265, "grad_norm": 0.2770857512950897, "learning_rate": 1.4633420397777337e-05, "loss": 0.116, "step": 37703 }, { "epoch": 0.6724931330931402, "grad_norm": 0.28690508008003235, "learning_rate": 1.4632004034927496e-05, "loss": 0.0732, "step": 37704 }, { "epoch": 0.6725109692148539, "grad_norm": 0.30182111263275146, "learning_rate": 1.4630587712267579e-05, "loss": 0.1007, "step": 37705 }, { "epoch": 0.6725288053365677, "grad_norm": 0.20065154135227203, "learning_rate": 1.4629171429803096e-05, "loss": 0.1003, "step": 37706 }, { "epoch": 0.6725466414582814, "grad_norm": 0.25812432169914246, "learning_rate": 1.4627755187539526e-05, "loss": 0.114, "step": 37707 }, { "epoch": 0.672564477579995, "grad_norm": 0.32575470209121704, "learning_rate": 1.4626338985482363e-05, "loss": 0.0902, "step": 37708 }, { "epoch": 0.6725823137017087, "grad_norm": 0.3000944256782532, "learning_rate": 1.4624922823637077e-05, "loss": 0.109, "step": 37709 }, { "epoch": 0.6726001498234224, "grad_norm": 0.22485367953777313, "learning_rate": 1.4623506702009188e-05, "loss": 0.1388, "step": 37710 }, { "epoch": 0.6726179859451361, "grad_norm": 0.22892051935195923, "learning_rate": 1.4622090620604162e-05, "loss": 0.1014, "step": 37711 }, { "epoch": 0.6726358220668498, "grad_norm": 0.23906515538692474, "learning_rate": 1.4620674579427507e-05, "loss": 0.1079, "step": 37712 }, { "epoch": 0.6726536581885635, "grad_norm": 0.32567185163497925, "learning_rate": 1.4619258578484698e-05, "loss": 0.1644, "step": 37713 }, { "epoch": 0.6726714943102772, "grad_norm": 0.27001798152923584, "learning_rate": 1.461784261778122e-05, "loss": 0.0826, "step": 37714 }, { "epoch": 0.6726893304319909, "grad_norm": 0.28006112575531006, "learning_rate": 1.461642669732258e-05, "loss": 0.082, "step": 37715 }, { "epoch": 0.6727071665537045, "grad_norm": 0.2673995792865753, "learning_rate": 1.4615010817114255e-05, "loss": 0.1182, "step": 37716 }, { "epoch": 0.6727250026754182, "grad_norm": 0.3754531145095825, "learning_rate": 1.4613594977161732e-05, "loss": 0.1327, "step": 37717 }, { "epoch": 0.6727428387971319, "grad_norm": 0.24347876012325287, "learning_rate": 1.4612179177470504e-05, "loss": 0.1362, "step": 37718 }, { "epoch": 0.6727606749188456, "grad_norm": 0.2931426167488098, "learning_rate": 1.4610763418046053e-05, "loss": 0.0847, "step": 37719 }, { "epoch": 0.6727785110405593, "grad_norm": 0.26672571897506714, "learning_rate": 1.4609347698893877e-05, "loss": 0.1656, "step": 37720 }, { "epoch": 0.672796347162273, "grad_norm": 0.24935072660446167, "learning_rate": 1.460793202001946e-05, "loss": 0.0904, "step": 37721 }, { "epoch": 0.6728141832839867, "grad_norm": 0.25385987758636475, "learning_rate": 1.4606516381428276e-05, "loss": 0.1003, "step": 37722 }, { "epoch": 0.6728320194057005, "grad_norm": 0.23812651634216309, "learning_rate": 1.4605100783125836e-05, "loss": 0.1066, "step": 37723 }, { "epoch": 0.6728498555274142, "grad_norm": 0.2761055827140808, "learning_rate": 1.4603685225117614e-05, "loss": 0.0984, "step": 37724 }, { "epoch": 0.6728676916491279, "grad_norm": 0.3269663453102112, "learning_rate": 1.4602269707409094e-05, "loss": 0.1232, "step": 37725 }, { "epoch": 0.6728855277708415, "grad_norm": 0.2708067297935486, "learning_rate": 1.4600854230005775e-05, "loss": 0.0806, "step": 37726 }, { "epoch": 0.6729033638925552, "grad_norm": 0.31648701429367065, "learning_rate": 1.4599438792913126e-05, "loss": 0.1025, "step": 37727 }, { "epoch": 0.6729212000142689, "grad_norm": 0.3103322684764862, "learning_rate": 1.4598023396136653e-05, "loss": 0.1413, "step": 37728 }, { "epoch": 0.6729390361359826, "grad_norm": 0.3118903934955597, "learning_rate": 1.4596608039681835e-05, "loss": 0.1422, "step": 37729 }, { "epoch": 0.6729568722576963, "grad_norm": 0.28199806809425354, "learning_rate": 1.4595192723554158e-05, "loss": 0.183, "step": 37730 }, { "epoch": 0.67297470837941, "grad_norm": 0.23366296291351318, "learning_rate": 1.4593777447759096e-05, "loss": 0.1082, "step": 37731 }, { "epoch": 0.6729925445011237, "grad_norm": 0.19522027671337128, "learning_rate": 1.4592362212302158e-05, "loss": 0.1084, "step": 37732 }, { "epoch": 0.6730103806228374, "grad_norm": 0.25478604435920715, "learning_rate": 1.4590947017188819e-05, "loss": 0.137, "step": 37733 }, { "epoch": 0.673028216744551, "grad_norm": 0.31069138646125793, "learning_rate": 1.4589531862424555e-05, "loss": 0.1136, "step": 37734 }, { "epoch": 0.6730460528662647, "grad_norm": 0.30225273966789246, "learning_rate": 1.4588116748014869e-05, "loss": 0.1278, "step": 37735 }, { "epoch": 0.6730638889879784, "grad_norm": 0.4390554130077362, "learning_rate": 1.458670167396523e-05, "loss": 0.1379, "step": 37736 }, { "epoch": 0.6730817251096921, "grad_norm": 0.2897048890590668, "learning_rate": 1.4585286640281144e-05, "loss": 0.1609, "step": 37737 }, { "epoch": 0.6730995612314058, "grad_norm": 0.18407145142555237, "learning_rate": 1.4583871646968082e-05, "loss": 0.1144, "step": 37738 }, { "epoch": 0.6731173973531195, "grad_norm": 0.22237777709960938, "learning_rate": 1.4582456694031529e-05, "loss": 0.1183, "step": 37739 }, { "epoch": 0.6731352334748333, "grad_norm": 0.27603867650032043, "learning_rate": 1.4581041781476965e-05, "loss": 0.1045, "step": 37740 }, { "epoch": 0.673153069596547, "grad_norm": 0.23941335082054138, "learning_rate": 1.457962690930989e-05, "loss": 0.1036, "step": 37741 }, { "epoch": 0.6731709057182607, "grad_norm": 0.24371333420276642, "learning_rate": 1.457821207753578e-05, "loss": 0.1171, "step": 37742 }, { "epoch": 0.6731887418399743, "grad_norm": 0.2937003970146179, "learning_rate": 1.4576797286160121e-05, "loss": 0.1585, "step": 37743 }, { "epoch": 0.673206577961688, "grad_norm": 0.21572428941726685, "learning_rate": 1.4575382535188386e-05, "loss": 0.1314, "step": 37744 }, { "epoch": 0.6732244140834017, "grad_norm": 0.25045835971832275, "learning_rate": 1.4573967824626078e-05, "loss": 0.1522, "step": 37745 }, { "epoch": 0.6732422502051154, "grad_norm": 0.2256544828414917, "learning_rate": 1.4572553154478672e-05, "loss": 0.0705, "step": 37746 }, { "epoch": 0.6732600863268291, "grad_norm": 0.1936129331588745, "learning_rate": 1.4571138524751652e-05, "loss": 0.1348, "step": 37747 }, { "epoch": 0.6732779224485428, "grad_norm": 0.2671549916267395, "learning_rate": 1.4569723935450492e-05, "loss": 0.1277, "step": 37748 }, { "epoch": 0.6732957585702565, "grad_norm": 0.23808681964874268, "learning_rate": 1.4568309386580693e-05, "loss": 0.1205, "step": 37749 }, { "epoch": 0.6733135946919702, "grad_norm": 0.2457624226808548, "learning_rate": 1.4566894878147718e-05, "loss": 0.1246, "step": 37750 }, { "epoch": 0.6733314308136839, "grad_norm": 0.32891029119491577, "learning_rate": 1.4565480410157073e-05, "loss": 0.0401, "step": 37751 }, { "epoch": 0.6733492669353975, "grad_norm": 0.3560783565044403, "learning_rate": 1.456406598261423e-05, "loss": 0.1454, "step": 37752 }, { "epoch": 0.6733671030571112, "grad_norm": 0.24829766154289246, "learning_rate": 1.4562651595524662e-05, "loss": 0.1293, "step": 37753 }, { "epoch": 0.6733849391788249, "grad_norm": 0.348664790391922, "learning_rate": 1.4561237248893872e-05, "loss": 0.13, "step": 37754 }, { "epoch": 0.6734027753005386, "grad_norm": 0.28492653369903564, "learning_rate": 1.455982294272733e-05, "loss": 0.1264, "step": 37755 }, { "epoch": 0.6734206114222523, "grad_norm": 0.2991258502006531, "learning_rate": 1.4558408677030522e-05, "loss": 0.1346, "step": 37756 }, { "epoch": 0.6734384475439661, "grad_norm": 0.29029542207717896, "learning_rate": 1.4556994451808919e-05, "loss": 0.1613, "step": 37757 }, { "epoch": 0.6734562836656798, "grad_norm": 0.3601286709308624, "learning_rate": 1.4555580267068023e-05, "loss": 0.1574, "step": 37758 }, { "epoch": 0.6734741197873935, "grad_norm": 0.2240452915430069, "learning_rate": 1.4554166122813303e-05, "loss": 0.1322, "step": 37759 }, { "epoch": 0.6734919559091072, "grad_norm": 0.33050811290740967, "learning_rate": 1.4552752019050241e-05, "loss": 0.1286, "step": 37760 }, { "epoch": 0.6735097920308208, "grad_norm": 0.25536200404167175, "learning_rate": 1.4551337955784317e-05, "loss": 0.1477, "step": 37761 }, { "epoch": 0.6735276281525345, "grad_norm": 0.22343213856220245, "learning_rate": 1.454992393302102e-05, "loss": 0.0807, "step": 37762 }, { "epoch": 0.6735454642742482, "grad_norm": 0.2785812020301819, "learning_rate": 1.454850995076582e-05, "loss": 0.1293, "step": 37763 }, { "epoch": 0.6735633003959619, "grad_norm": 0.1965450942516327, "learning_rate": 1.4547096009024214e-05, "loss": 0.1142, "step": 37764 }, { "epoch": 0.6735811365176756, "grad_norm": 0.25995826721191406, "learning_rate": 1.4545682107801676e-05, "loss": 0.081, "step": 37765 }, { "epoch": 0.6735989726393893, "grad_norm": 0.27341073751449585, "learning_rate": 1.4544268247103673e-05, "loss": 0.1224, "step": 37766 }, { "epoch": 0.673616808761103, "grad_norm": 0.3620646297931671, "learning_rate": 1.4542854426935709e-05, "loss": 0.0664, "step": 37767 }, { "epoch": 0.6736346448828167, "grad_norm": 0.256929486989975, "learning_rate": 1.4541440647303251e-05, "loss": 0.1063, "step": 37768 }, { "epoch": 0.6736524810045303, "grad_norm": 0.20610018074512482, "learning_rate": 1.4540026908211785e-05, "loss": 0.1225, "step": 37769 }, { "epoch": 0.673670317126244, "grad_norm": 0.2695777416229248, "learning_rate": 1.4538613209666774e-05, "loss": 0.1223, "step": 37770 }, { "epoch": 0.6736881532479577, "grad_norm": 0.3033485412597656, "learning_rate": 1.4537199551673725e-05, "loss": 0.1521, "step": 37771 }, { "epoch": 0.6737059893696714, "grad_norm": 0.24626515805721283, "learning_rate": 1.4535785934238099e-05, "loss": 0.083, "step": 37772 }, { "epoch": 0.6737238254913852, "grad_norm": 0.24730589985847473, "learning_rate": 1.4534372357365383e-05, "loss": 0.1731, "step": 37773 }, { "epoch": 0.6737416616130989, "grad_norm": 0.22525490820407867, "learning_rate": 1.4532958821061047e-05, "loss": 0.0921, "step": 37774 }, { "epoch": 0.6737594977348126, "grad_norm": 0.4234084486961365, "learning_rate": 1.4531545325330587e-05, "loss": 0.1158, "step": 37775 }, { "epoch": 0.6737773338565263, "grad_norm": 0.2178460657596588, "learning_rate": 1.4530131870179469e-05, "loss": 0.0833, "step": 37776 }, { "epoch": 0.67379516997824, "grad_norm": 0.3677380084991455, "learning_rate": 1.4528718455613172e-05, "loss": 0.1582, "step": 37777 }, { "epoch": 0.6738130060999536, "grad_norm": 0.2249336689710617, "learning_rate": 1.4527305081637186e-05, "loss": 0.0623, "step": 37778 }, { "epoch": 0.6738308422216673, "grad_norm": 0.2222917079925537, "learning_rate": 1.452589174825697e-05, "loss": 0.0818, "step": 37779 }, { "epoch": 0.673848678343381, "grad_norm": 0.31255391240119934, "learning_rate": 1.452447845547803e-05, "loss": 0.0998, "step": 37780 }, { "epoch": 0.6738665144650947, "grad_norm": 0.2845156788825989, "learning_rate": 1.4523065203305828e-05, "loss": 0.1106, "step": 37781 }, { "epoch": 0.6738843505868084, "grad_norm": 0.2839960753917694, "learning_rate": 1.4521651991745844e-05, "loss": 0.1401, "step": 37782 }, { "epoch": 0.6739021867085221, "grad_norm": 0.3479841947555542, "learning_rate": 1.4520238820803545e-05, "loss": 0.1033, "step": 37783 }, { "epoch": 0.6739200228302358, "grad_norm": 0.2025499790906906, "learning_rate": 1.4518825690484427e-05, "loss": 0.1007, "step": 37784 }, { "epoch": 0.6739378589519495, "grad_norm": 0.22952230274677277, "learning_rate": 1.4517412600793966e-05, "loss": 0.1191, "step": 37785 }, { "epoch": 0.6739556950736632, "grad_norm": 0.24251607060432434, "learning_rate": 1.4515999551737633e-05, "loss": 0.1389, "step": 37786 }, { "epoch": 0.6739735311953768, "grad_norm": 0.2477579116821289, "learning_rate": 1.4514586543320897e-05, "loss": 0.1142, "step": 37787 }, { "epoch": 0.6739913673170905, "grad_norm": 0.34693047404289246, "learning_rate": 1.4513173575549255e-05, "loss": 0.1321, "step": 37788 }, { "epoch": 0.6740092034388042, "grad_norm": 0.25677019357681274, "learning_rate": 1.4511760648428171e-05, "loss": 0.131, "step": 37789 }, { "epoch": 0.674027039560518, "grad_norm": 0.2964639663696289, "learning_rate": 1.451034776196312e-05, "loss": 0.201, "step": 37790 }, { "epoch": 0.6740448756822317, "grad_norm": 0.28781330585479736, "learning_rate": 1.450893491615959e-05, "loss": 0.1566, "step": 37791 }, { "epoch": 0.6740627118039454, "grad_norm": 0.20691516995429993, "learning_rate": 1.4507522111023044e-05, "loss": 0.054, "step": 37792 }, { "epoch": 0.6740805479256591, "grad_norm": 0.22368700802326202, "learning_rate": 1.4506109346558976e-05, "loss": 0.125, "step": 37793 }, { "epoch": 0.6740983840473728, "grad_norm": 0.24548479914665222, "learning_rate": 1.4504696622772854e-05, "loss": 0.1542, "step": 37794 }, { "epoch": 0.6741162201690865, "grad_norm": 0.21965786814689636, "learning_rate": 1.4503283939670151e-05, "loss": 0.0587, "step": 37795 }, { "epoch": 0.6741340562908001, "grad_norm": 0.29775798320770264, "learning_rate": 1.4501871297256336e-05, "loss": 0.1096, "step": 37796 }, { "epoch": 0.6741518924125138, "grad_norm": 0.2897217869758606, "learning_rate": 1.45004586955369e-05, "loss": 0.0896, "step": 37797 }, { "epoch": 0.6741697285342275, "grad_norm": 0.2840767800807953, "learning_rate": 1.449904613451732e-05, "loss": 0.159, "step": 37798 }, { "epoch": 0.6741875646559412, "grad_norm": 0.31331580877304077, "learning_rate": 1.4497633614203057e-05, "loss": 0.1007, "step": 37799 }, { "epoch": 0.6742054007776549, "grad_norm": 0.2730228304862976, "learning_rate": 1.4496221134599586e-05, "loss": 0.1323, "step": 37800 }, { "epoch": 0.6742232368993686, "grad_norm": 0.3634718954563141, "learning_rate": 1.4494808695712401e-05, "loss": 0.1249, "step": 37801 }, { "epoch": 0.6742410730210823, "grad_norm": 0.2818793058395386, "learning_rate": 1.4493396297546964e-05, "loss": 0.1188, "step": 37802 }, { "epoch": 0.674258909142796, "grad_norm": 0.3337445557117462, "learning_rate": 1.4491983940108752e-05, "loss": 0.1132, "step": 37803 }, { "epoch": 0.6742767452645096, "grad_norm": 0.3687921464443207, "learning_rate": 1.4490571623403232e-05, "loss": 0.1138, "step": 37804 }, { "epoch": 0.6742945813862233, "grad_norm": 0.31602203845977783, "learning_rate": 1.4489159347435882e-05, "loss": 0.1033, "step": 37805 }, { "epoch": 0.674312417507937, "grad_norm": 0.26125404238700867, "learning_rate": 1.4487747112212196e-05, "loss": 0.1338, "step": 37806 }, { "epoch": 0.6743302536296508, "grad_norm": 0.18155883252620697, "learning_rate": 1.4486334917737629e-05, "loss": 0.1115, "step": 37807 }, { "epoch": 0.6743480897513645, "grad_norm": 0.1911650151014328, "learning_rate": 1.4484922764017661e-05, "loss": 0.1281, "step": 37808 }, { "epoch": 0.6743659258730782, "grad_norm": 0.30961698293685913, "learning_rate": 1.4483510651057752e-05, "loss": 0.1569, "step": 37809 }, { "epoch": 0.6743837619947919, "grad_norm": 0.23227068781852722, "learning_rate": 1.4482098578863401e-05, "loss": 0.1463, "step": 37810 }, { "epoch": 0.6744015981165056, "grad_norm": 0.2824059724807739, "learning_rate": 1.4480686547440067e-05, "loss": 0.1092, "step": 37811 }, { "epoch": 0.6744194342382193, "grad_norm": 0.27961429953575134, "learning_rate": 1.4479274556793226e-05, "loss": 0.1035, "step": 37812 }, { "epoch": 0.674437270359933, "grad_norm": 0.21328777074813843, "learning_rate": 1.4477862606928338e-05, "loss": 0.0839, "step": 37813 }, { "epoch": 0.6744551064816466, "grad_norm": 0.24824877083301544, "learning_rate": 1.4476450697850902e-05, "loss": 0.1318, "step": 37814 }, { "epoch": 0.6744729426033603, "grad_norm": 0.2871030569076538, "learning_rate": 1.447503882956638e-05, "loss": 0.0963, "step": 37815 }, { "epoch": 0.674490778725074, "grad_norm": 0.2195233702659607, "learning_rate": 1.4473627002080237e-05, "loss": 0.1216, "step": 37816 }, { "epoch": 0.6745086148467877, "grad_norm": 0.40571096539497375, "learning_rate": 1.4472215215397944e-05, "loss": 0.123, "step": 37817 }, { "epoch": 0.6745264509685014, "grad_norm": 0.3914256989955902, "learning_rate": 1.4470803469524991e-05, "loss": 0.0853, "step": 37818 }, { "epoch": 0.6745442870902151, "grad_norm": 0.2930600941181183, "learning_rate": 1.4469391764466828e-05, "loss": 0.1433, "step": 37819 }, { "epoch": 0.6745621232119288, "grad_norm": 0.3278384506702423, "learning_rate": 1.4467980100228951e-05, "loss": 0.1494, "step": 37820 }, { "epoch": 0.6745799593336425, "grad_norm": 0.41129058599472046, "learning_rate": 1.4466568476816822e-05, "loss": 0.1588, "step": 37821 }, { "epoch": 0.6745977954553561, "grad_norm": 0.2529033422470093, "learning_rate": 1.4465156894235904e-05, "loss": 0.1384, "step": 37822 }, { "epoch": 0.6746156315770698, "grad_norm": 0.2949357032775879, "learning_rate": 1.4463745352491682e-05, "loss": 0.1182, "step": 37823 }, { "epoch": 0.6746334676987836, "grad_norm": 0.25833311676979065, "learning_rate": 1.4462333851589624e-05, "loss": 0.13, "step": 37824 }, { "epoch": 0.6746513038204973, "grad_norm": 0.33982226252555847, "learning_rate": 1.4460922391535197e-05, "loss": 0.1381, "step": 37825 }, { "epoch": 0.674669139942211, "grad_norm": 0.32433032989501953, "learning_rate": 1.445951097233387e-05, "loss": 0.2395, "step": 37826 }, { "epoch": 0.6746869760639247, "grad_norm": 0.24779917299747467, "learning_rate": 1.4458099593991126e-05, "loss": 0.1379, "step": 37827 }, { "epoch": 0.6747048121856384, "grad_norm": 0.20598551630973816, "learning_rate": 1.445668825651243e-05, "loss": 0.0713, "step": 37828 }, { "epoch": 0.6747226483073521, "grad_norm": 0.1937335729598999, "learning_rate": 1.4455276959903252e-05, "loss": 0.1075, "step": 37829 }, { "epoch": 0.6747404844290658, "grad_norm": 0.28307947516441345, "learning_rate": 1.4453865704169062e-05, "loss": 0.1461, "step": 37830 }, { "epoch": 0.6747583205507794, "grad_norm": 0.2247610092163086, "learning_rate": 1.445245448931532e-05, "loss": 0.1144, "step": 37831 }, { "epoch": 0.6747761566724931, "grad_norm": 0.33952969312667847, "learning_rate": 1.4451043315347517e-05, "loss": 0.0988, "step": 37832 }, { "epoch": 0.6747939927942068, "grad_norm": 0.253804087638855, "learning_rate": 1.4449632182271106e-05, "loss": 0.1209, "step": 37833 }, { "epoch": 0.6748118289159205, "grad_norm": 0.253116637468338, "learning_rate": 1.4448221090091574e-05, "loss": 0.0978, "step": 37834 }, { "epoch": 0.6748296650376342, "grad_norm": 0.20662766695022583, "learning_rate": 1.4446810038814371e-05, "loss": 0.1137, "step": 37835 }, { "epoch": 0.6748475011593479, "grad_norm": 0.2772424817085266, "learning_rate": 1.4445399028444987e-05, "loss": 0.0877, "step": 37836 }, { "epoch": 0.6748653372810616, "grad_norm": 0.23932193219661713, "learning_rate": 1.4443988058988884e-05, "loss": 0.0926, "step": 37837 }, { "epoch": 0.6748831734027753, "grad_norm": 0.2180161029100418, "learning_rate": 1.4442577130451524e-05, "loss": 0.1122, "step": 37838 }, { "epoch": 0.674901009524489, "grad_norm": 0.2553662061691284, "learning_rate": 1.4441166242838378e-05, "loss": 0.0767, "step": 37839 }, { "epoch": 0.6749188456462026, "grad_norm": 0.390030175447464, "learning_rate": 1.4439755396154925e-05, "loss": 0.1513, "step": 37840 }, { "epoch": 0.6749366817679164, "grad_norm": 0.2173352986574173, "learning_rate": 1.4438344590406627e-05, "loss": 0.0845, "step": 37841 }, { "epoch": 0.6749545178896301, "grad_norm": 0.2620837688446045, "learning_rate": 1.4436933825598952e-05, "loss": 0.1291, "step": 37842 }, { "epoch": 0.6749723540113438, "grad_norm": 0.28236278891563416, "learning_rate": 1.4435523101737375e-05, "loss": 0.1094, "step": 37843 }, { "epoch": 0.6749901901330575, "grad_norm": 0.2069295197725296, "learning_rate": 1.4434112418827344e-05, "loss": 0.1052, "step": 37844 }, { "epoch": 0.6750080262547712, "grad_norm": 0.3842110335826874, "learning_rate": 1.4432701776874354e-05, "loss": 0.1411, "step": 37845 }, { "epoch": 0.6750258623764849, "grad_norm": 0.22237244248390198, "learning_rate": 1.4431291175883854e-05, "loss": 0.0957, "step": 37846 }, { "epoch": 0.6750436984981986, "grad_norm": 0.26728424429893494, "learning_rate": 1.442988061586133e-05, "loss": 0.1002, "step": 37847 }, { "epoch": 0.6750615346199123, "grad_norm": 0.22909174859523773, "learning_rate": 1.4428470096812224e-05, "loss": 0.0889, "step": 37848 }, { "epoch": 0.6750793707416259, "grad_norm": 0.23479020595550537, "learning_rate": 1.4427059618742034e-05, "loss": 0.121, "step": 37849 }, { "epoch": 0.6750972068633396, "grad_norm": 0.29722630977630615, "learning_rate": 1.4425649181656213e-05, "loss": 0.0984, "step": 37850 }, { "epoch": 0.6751150429850533, "grad_norm": 0.27301540970802307, "learning_rate": 1.4424238785560226e-05, "loss": 0.1244, "step": 37851 }, { "epoch": 0.675132879106767, "grad_norm": 0.28271132707595825, "learning_rate": 1.4422828430459533e-05, "loss": 0.1273, "step": 37852 }, { "epoch": 0.6751507152284807, "grad_norm": 0.2761266231536865, "learning_rate": 1.442141811635962e-05, "loss": 0.1695, "step": 37853 }, { "epoch": 0.6751685513501944, "grad_norm": 0.24770000576972961, "learning_rate": 1.4420007843265943e-05, "loss": 0.103, "step": 37854 }, { "epoch": 0.6751863874719081, "grad_norm": 0.197562575340271, "learning_rate": 1.4418597611183973e-05, "loss": 0.083, "step": 37855 }, { "epoch": 0.6752042235936218, "grad_norm": 0.4004027843475342, "learning_rate": 1.4417187420119171e-05, "loss": 0.1343, "step": 37856 }, { "epoch": 0.6752220597153354, "grad_norm": 0.2654779851436615, "learning_rate": 1.4415777270076996e-05, "loss": 0.1447, "step": 37857 }, { "epoch": 0.6752398958370492, "grad_norm": 0.21590454876422882, "learning_rate": 1.4414367161062936e-05, "loss": 0.0801, "step": 37858 }, { "epoch": 0.6752577319587629, "grad_norm": 0.2510049343109131, "learning_rate": 1.4412957093082441e-05, "loss": 0.0771, "step": 37859 }, { "epoch": 0.6752755680804766, "grad_norm": 0.34335780143737793, "learning_rate": 1.4411547066140974e-05, "loss": 0.1702, "step": 37860 }, { "epoch": 0.6752934042021903, "grad_norm": 0.2221246212720871, "learning_rate": 1.4410137080244007e-05, "loss": 0.1331, "step": 37861 }, { "epoch": 0.675311240323904, "grad_norm": 0.24388906359672546, "learning_rate": 1.4408727135397016e-05, "loss": 0.1557, "step": 37862 }, { "epoch": 0.6753290764456177, "grad_norm": 0.2664140462875366, "learning_rate": 1.4407317231605455e-05, "loss": 0.0898, "step": 37863 }, { "epoch": 0.6753469125673314, "grad_norm": 0.25646162033081055, "learning_rate": 1.4405907368874793e-05, "loss": 0.0945, "step": 37864 }, { "epoch": 0.6753647486890451, "grad_norm": 0.26063966751098633, "learning_rate": 1.4404497547210485e-05, "loss": 0.1266, "step": 37865 }, { "epoch": 0.6753825848107587, "grad_norm": 0.3244636356830597, "learning_rate": 1.4403087766618011e-05, "loss": 0.1215, "step": 37866 }, { "epoch": 0.6754004209324724, "grad_norm": 0.304988831281662, "learning_rate": 1.4401678027102833e-05, "loss": 0.1505, "step": 37867 }, { "epoch": 0.6754182570541861, "grad_norm": 0.28232863545417786, "learning_rate": 1.4400268328670407e-05, "loss": 0.1566, "step": 37868 }, { "epoch": 0.6754360931758998, "grad_norm": 0.2966096103191376, "learning_rate": 1.4398858671326203e-05, "loss": 0.1068, "step": 37869 }, { "epoch": 0.6754539292976135, "grad_norm": 0.24642762541770935, "learning_rate": 1.4397449055075674e-05, "loss": 0.1062, "step": 37870 }, { "epoch": 0.6754717654193272, "grad_norm": 0.38957732915878296, "learning_rate": 1.4396039479924307e-05, "loss": 0.1656, "step": 37871 }, { "epoch": 0.6754896015410409, "grad_norm": 0.24745139479637146, "learning_rate": 1.4394629945877552e-05, "loss": 0.1093, "step": 37872 }, { "epoch": 0.6755074376627546, "grad_norm": 0.2550305426120758, "learning_rate": 1.4393220452940864e-05, "loss": 0.0631, "step": 37873 }, { "epoch": 0.6755252737844682, "grad_norm": 0.22651255130767822, "learning_rate": 1.4391811001119727e-05, "loss": 0.067, "step": 37874 }, { "epoch": 0.675543109906182, "grad_norm": 0.3496115803718567, "learning_rate": 1.4390401590419584e-05, "loss": 0.1319, "step": 37875 }, { "epoch": 0.6755609460278957, "grad_norm": 0.23300904035568237, "learning_rate": 1.438899222084592e-05, "loss": 0.108, "step": 37876 }, { "epoch": 0.6755787821496094, "grad_norm": 0.36536699533462524, "learning_rate": 1.4387582892404184e-05, "loss": 0.1628, "step": 37877 }, { "epoch": 0.6755966182713231, "grad_norm": 0.2752687633037567, "learning_rate": 1.4386173605099835e-05, "loss": 0.1644, "step": 37878 }, { "epoch": 0.6756144543930368, "grad_norm": 0.2863720953464508, "learning_rate": 1.4384764358938351e-05, "loss": 0.1266, "step": 37879 }, { "epoch": 0.6756322905147505, "grad_norm": 0.25772207975387573, "learning_rate": 1.4383355153925191e-05, "loss": 0.1158, "step": 37880 }, { "epoch": 0.6756501266364642, "grad_norm": 0.19938026368618011, "learning_rate": 1.4381945990065809e-05, "loss": 0.1282, "step": 37881 }, { "epoch": 0.6756679627581779, "grad_norm": 0.2639893591403961, "learning_rate": 1.4380536867365674e-05, "loss": 0.116, "step": 37882 }, { "epoch": 0.6756857988798916, "grad_norm": 0.2264513075351715, "learning_rate": 1.4379127785830238e-05, "loss": 0.1034, "step": 37883 }, { "epoch": 0.6757036350016052, "grad_norm": 0.34920793771743774, "learning_rate": 1.4377718745464975e-05, "loss": 0.1295, "step": 37884 }, { "epoch": 0.6757214711233189, "grad_norm": 0.20631419122219086, "learning_rate": 1.4376309746275345e-05, "loss": 0.1468, "step": 37885 }, { "epoch": 0.6757393072450326, "grad_norm": 0.2549431622028351, "learning_rate": 1.437490078826681e-05, "loss": 0.0909, "step": 37886 }, { "epoch": 0.6757571433667463, "grad_norm": 0.2594625651836395, "learning_rate": 1.4373491871444822e-05, "loss": 0.0856, "step": 37887 }, { "epoch": 0.67577497948846, "grad_norm": 0.34404274821281433, "learning_rate": 1.4372082995814845e-05, "loss": 0.105, "step": 37888 }, { "epoch": 0.6757928156101737, "grad_norm": 0.30066806077957153, "learning_rate": 1.4370674161382355e-05, "loss": 0.1412, "step": 37889 }, { "epoch": 0.6758106517318874, "grad_norm": 0.3054562211036682, "learning_rate": 1.4369265368152803e-05, "loss": 0.1424, "step": 37890 }, { "epoch": 0.6758284878536012, "grad_norm": 0.23939236998558044, "learning_rate": 1.4367856616131642e-05, "loss": 0.1216, "step": 37891 }, { "epoch": 0.6758463239753149, "grad_norm": 0.23606866598129272, "learning_rate": 1.436644790532435e-05, "loss": 0.1296, "step": 37892 }, { "epoch": 0.6758641600970285, "grad_norm": 0.23873786628246307, "learning_rate": 1.4365039235736383e-05, "loss": 0.1733, "step": 37893 }, { "epoch": 0.6758819962187422, "grad_norm": 0.2883201837539673, "learning_rate": 1.436363060737319e-05, "loss": 0.1186, "step": 37894 }, { "epoch": 0.6758998323404559, "grad_norm": 0.23939408361911774, "learning_rate": 1.4362222020240243e-05, "loss": 0.1131, "step": 37895 }, { "epoch": 0.6759176684621696, "grad_norm": 0.2642723023891449, "learning_rate": 1.4360813474342988e-05, "loss": 0.0918, "step": 37896 }, { "epoch": 0.6759355045838833, "grad_norm": 0.25423380732536316, "learning_rate": 1.4359404969686902e-05, "loss": 0.0975, "step": 37897 }, { "epoch": 0.675953340705597, "grad_norm": 0.31318897008895874, "learning_rate": 1.4357996506277438e-05, "loss": 0.1159, "step": 37898 }, { "epoch": 0.6759711768273107, "grad_norm": 0.20078317821025848, "learning_rate": 1.4356588084120055e-05, "loss": 0.1083, "step": 37899 }, { "epoch": 0.6759890129490244, "grad_norm": 0.3042323887348175, "learning_rate": 1.4355179703220204e-05, "loss": 0.1653, "step": 37900 }, { "epoch": 0.676006849070738, "grad_norm": 0.25910013914108276, "learning_rate": 1.4353771363583362e-05, "loss": 0.1568, "step": 37901 }, { "epoch": 0.6760246851924517, "grad_norm": 0.31457364559173584, "learning_rate": 1.435236306521497e-05, "loss": 0.1314, "step": 37902 }, { "epoch": 0.6760425213141654, "grad_norm": 0.2601189613342285, "learning_rate": 1.4350954808120507e-05, "loss": 0.1316, "step": 37903 }, { "epoch": 0.6760603574358791, "grad_norm": 0.314890593290329, "learning_rate": 1.4349546592305408e-05, "loss": 0.1175, "step": 37904 }, { "epoch": 0.6760781935575928, "grad_norm": 0.26978427171707153, "learning_rate": 1.4348138417775159e-05, "loss": 0.1, "step": 37905 }, { "epoch": 0.6760960296793065, "grad_norm": 0.22783416509628296, "learning_rate": 1.4346730284535204e-05, "loss": 0.1043, "step": 37906 }, { "epoch": 0.6761138658010202, "grad_norm": 0.2718282639980316, "learning_rate": 1.4345322192591e-05, "loss": 0.069, "step": 37907 }, { "epoch": 0.676131701922734, "grad_norm": 0.33067601919174194, "learning_rate": 1.4343914141948006e-05, "loss": 0.1567, "step": 37908 }, { "epoch": 0.6761495380444477, "grad_norm": 0.2838038206100464, "learning_rate": 1.4342506132611675e-05, "loss": 0.1721, "step": 37909 }, { "epoch": 0.6761673741661614, "grad_norm": 0.2564777731895447, "learning_rate": 1.434109816458748e-05, "loss": 0.1374, "step": 37910 }, { "epoch": 0.676185210287875, "grad_norm": 0.3623526692390442, "learning_rate": 1.4339690237880869e-05, "loss": 0.1656, "step": 37911 }, { "epoch": 0.6762030464095887, "grad_norm": 0.22427743673324585, "learning_rate": 1.4338282352497301e-05, "loss": 0.1404, "step": 37912 }, { "epoch": 0.6762208825313024, "grad_norm": 0.26960861682891846, "learning_rate": 1.4336874508442222e-05, "loss": 0.1439, "step": 37913 }, { "epoch": 0.6762387186530161, "grad_norm": 0.2435963749885559, "learning_rate": 1.4335466705721113e-05, "loss": 0.1157, "step": 37914 }, { "epoch": 0.6762565547747298, "grad_norm": 0.2895001769065857, "learning_rate": 1.4334058944339403e-05, "loss": 0.1116, "step": 37915 }, { "epoch": 0.6762743908964435, "grad_norm": 0.2829170227050781, "learning_rate": 1.433265122430258e-05, "loss": 0.1549, "step": 37916 }, { "epoch": 0.6762922270181572, "grad_norm": 0.169818714261055, "learning_rate": 1.4331243545616075e-05, "loss": 0.0932, "step": 37917 }, { "epoch": 0.6763100631398709, "grad_norm": 0.30012017488479614, "learning_rate": 1.4329835908285361e-05, "loss": 0.1634, "step": 37918 }, { "epoch": 0.6763278992615845, "grad_norm": 0.2552509903907776, "learning_rate": 1.432842831231589e-05, "loss": 0.0841, "step": 37919 }, { "epoch": 0.6763457353832982, "grad_norm": 0.21990834176540375, "learning_rate": 1.4327020757713116e-05, "loss": 0.1197, "step": 37920 }, { "epoch": 0.6763635715050119, "grad_norm": 0.27054712176322937, "learning_rate": 1.43256132444825e-05, "loss": 0.1602, "step": 37921 }, { "epoch": 0.6763814076267256, "grad_norm": 0.27129170298576355, "learning_rate": 1.4324205772629481e-05, "loss": 0.1018, "step": 37922 }, { "epoch": 0.6763992437484393, "grad_norm": 0.24261508882045746, "learning_rate": 1.4322798342159537e-05, "loss": 0.1258, "step": 37923 }, { "epoch": 0.676417079870153, "grad_norm": 0.27131831645965576, "learning_rate": 1.4321390953078117e-05, "loss": 0.14, "step": 37924 }, { "epoch": 0.6764349159918668, "grad_norm": 0.31937697529792786, "learning_rate": 1.4319983605390672e-05, "loss": 0.1184, "step": 37925 }, { "epoch": 0.6764527521135805, "grad_norm": 0.27219003438949585, "learning_rate": 1.4318576299102648e-05, "loss": 0.0814, "step": 37926 }, { "epoch": 0.6764705882352942, "grad_norm": 0.31953704357147217, "learning_rate": 1.4317169034219524e-05, "loss": 0.1688, "step": 37927 }, { "epoch": 0.6764884243570078, "grad_norm": 0.2735213339328766, "learning_rate": 1.4315761810746741e-05, "loss": 0.1239, "step": 37928 }, { "epoch": 0.6765062604787215, "grad_norm": 0.30123355984687805, "learning_rate": 1.4314354628689746e-05, "loss": 0.1326, "step": 37929 }, { "epoch": 0.6765240966004352, "grad_norm": 0.22299256920814514, "learning_rate": 1.4312947488054013e-05, "loss": 0.112, "step": 37930 }, { "epoch": 0.6765419327221489, "grad_norm": 0.24233022332191467, "learning_rate": 1.431154038884498e-05, "loss": 0.0892, "step": 37931 }, { "epoch": 0.6765597688438626, "grad_norm": 0.29069089889526367, "learning_rate": 1.4310133331068112e-05, "loss": 0.1507, "step": 37932 }, { "epoch": 0.6765776049655763, "grad_norm": 0.19256910681724548, "learning_rate": 1.4308726314728863e-05, "loss": 0.094, "step": 37933 }, { "epoch": 0.67659544108729, "grad_norm": 0.3973245322704315, "learning_rate": 1.4307319339832681e-05, "loss": 0.0842, "step": 37934 }, { "epoch": 0.6766132772090037, "grad_norm": 0.23836465179920197, "learning_rate": 1.4305912406385016e-05, "loss": 0.1159, "step": 37935 }, { "epoch": 0.6766311133307173, "grad_norm": 0.23498216271400452, "learning_rate": 1.4304505514391337e-05, "loss": 0.1188, "step": 37936 }, { "epoch": 0.676648949452431, "grad_norm": 0.3663347065448761, "learning_rate": 1.430309866385709e-05, "loss": 0.1617, "step": 37937 }, { "epoch": 0.6766667855741447, "grad_norm": 0.22820499539375305, "learning_rate": 1.4301691854787725e-05, "loss": 0.1235, "step": 37938 }, { "epoch": 0.6766846216958584, "grad_norm": 0.1825525313615799, "learning_rate": 1.4300285087188686e-05, "loss": 0.0908, "step": 37939 }, { "epoch": 0.6767024578175721, "grad_norm": 0.22258968651294708, "learning_rate": 1.4298878361065449e-05, "loss": 0.1015, "step": 37940 }, { "epoch": 0.6767202939392858, "grad_norm": 0.2557908892631531, "learning_rate": 1.4297471676423458e-05, "loss": 0.1315, "step": 37941 }, { "epoch": 0.6767381300609996, "grad_norm": 0.31204521656036377, "learning_rate": 1.429606503326815e-05, "loss": 0.1506, "step": 37942 }, { "epoch": 0.6767559661827133, "grad_norm": 0.24505315721035004, "learning_rate": 1.4294658431605002e-05, "loss": 0.1057, "step": 37943 }, { "epoch": 0.676773802304427, "grad_norm": 0.26397737860679626, "learning_rate": 1.4293251871439445e-05, "loss": 0.1489, "step": 37944 }, { "epoch": 0.6767916384261407, "grad_norm": 0.3072804808616638, "learning_rate": 1.429184535277695e-05, "loss": 0.0872, "step": 37945 }, { "epoch": 0.6768094745478543, "grad_norm": 0.2764657139778137, "learning_rate": 1.4290438875622964e-05, "loss": 0.1518, "step": 37946 }, { "epoch": 0.676827310669568, "grad_norm": 0.37035417556762695, "learning_rate": 1.4289032439982936e-05, "loss": 0.1557, "step": 37947 }, { "epoch": 0.6768451467912817, "grad_norm": 0.26884639263153076, "learning_rate": 1.4287626045862307e-05, "loss": 0.1413, "step": 37948 }, { "epoch": 0.6768629829129954, "grad_norm": 0.40568098425865173, "learning_rate": 1.4286219693266551e-05, "loss": 0.158, "step": 37949 }, { "epoch": 0.6768808190347091, "grad_norm": 0.19726970791816711, "learning_rate": 1.4284813382201103e-05, "loss": 0.1037, "step": 37950 }, { "epoch": 0.6768986551564228, "grad_norm": 0.2574869394302368, "learning_rate": 1.4283407112671423e-05, "loss": 0.1276, "step": 37951 }, { "epoch": 0.6769164912781365, "grad_norm": 0.26717349886894226, "learning_rate": 1.4282000884682947e-05, "loss": 0.1491, "step": 37952 }, { "epoch": 0.6769343273998502, "grad_norm": 0.22183527052402496, "learning_rate": 1.4280594698241148e-05, "loss": 0.1354, "step": 37953 }, { "epoch": 0.6769521635215638, "grad_norm": 0.4951947331428528, "learning_rate": 1.4279188553351469e-05, "loss": 0.1491, "step": 37954 }, { "epoch": 0.6769699996432775, "grad_norm": 0.24742573499679565, "learning_rate": 1.4277782450019353e-05, "loss": 0.0809, "step": 37955 }, { "epoch": 0.6769878357649912, "grad_norm": 0.3553015887737274, "learning_rate": 1.4276376388250249e-05, "loss": 0.1096, "step": 37956 }, { "epoch": 0.6770056718867049, "grad_norm": 0.2534266412258148, "learning_rate": 1.4274970368049623e-05, "loss": 0.131, "step": 37957 }, { "epoch": 0.6770235080084186, "grad_norm": 0.3052847981452942, "learning_rate": 1.4273564389422906e-05, "loss": 0.1349, "step": 37958 }, { "epoch": 0.6770413441301324, "grad_norm": 0.30340835452079773, "learning_rate": 1.4272158452375568e-05, "loss": 0.1651, "step": 37959 }, { "epoch": 0.6770591802518461, "grad_norm": 0.24183939397335052, "learning_rate": 1.4270752556913047e-05, "loss": 0.1273, "step": 37960 }, { "epoch": 0.6770770163735598, "grad_norm": 0.3222278654575348, "learning_rate": 1.426934670304079e-05, "loss": 0.1409, "step": 37961 }, { "epoch": 0.6770948524952735, "grad_norm": 0.3161703050136566, "learning_rate": 1.426794089076426e-05, "loss": 0.1281, "step": 37962 }, { "epoch": 0.6771126886169871, "grad_norm": 0.222113236784935, "learning_rate": 1.4266535120088894e-05, "loss": 0.1509, "step": 37963 }, { "epoch": 0.6771305247387008, "grad_norm": 0.2669108510017395, "learning_rate": 1.4265129391020149e-05, "loss": 0.1033, "step": 37964 }, { "epoch": 0.6771483608604145, "grad_norm": 0.26555803418159485, "learning_rate": 1.4263723703563459e-05, "loss": 0.1142, "step": 37965 }, { "epoch": 0.6771661969821282, "grad_norm": 0.3308393955230713, "learning_rate": 1.4262318057724294e-05, "loss": 0.1154, "step": 37966 }, { "epoch": 0.6771840331038419, "grad_norm": 0.24701416492462158, "learning_rate": 1.4260912453508091e-05, "loss": 0.1173, "step": 37967 }, { "epoch": 0.6772018692255556, "grad_norm": 0.2714207172393799, "learning_rate": 1.4259506890920304e-05, "loss": 0.1447, "step": 37968 }, { "epoch": 0.6772197053472693, "grad_norm": 0.2641317844390869, "learning_rate": 1.4258101369966364e-05, "loss": 0.1211, "step": 37969 }, { "epoch": 0.677237541468983, "grad_norm": 0.2663409113883972, "learning_rate": 1.4256695890651747e-05, "loss": 0.1131, "step": 37970 }, { "epoch": 0.6772553775906967, "grad_norm": 0.2812822759151459, "learning_rate": 1.4255290452981877e-05, "loss": 0.132, "step": 37971 }, { "epoch": 0.6772732137124103, "grad_norm": 0.2187214493751526, "learning_rate": 1.4253885056962218e-05, "loss": 0.1357, "step": 37972 }, { "epoch": 0.677291049834124, "grad_norm": 0.1870632767677307, "learning_rate": 1.4252479702598218e-05, "loss": 0.0774, "step": 37973 }, { "epoch": 0.6773088859558377, "grad_norm": 0.27847856283187866, "learning_rate": 1.4251074389895305e-05, "loss": 0.1545, "step": 37974 }, { "epoch": 0.6773267220775514, "grad_norm": 0.17982667684555054, "learning_rate": 1.4249669118858952e-05, "loss": 0.0852, "step": 37975 }, { "epoch": 0.6773445581992652, "grad_norm": 0.4032396376132965, "learning_rate": 1.4248263889494595e-05, "loss": 0.1075, "step": 37976 }, { "epoch": 0.6773623943209789, "grad_norm": 0.36110034584999084, "learning_rate": 1.4246858701807677e-05, "loss": 0.1064, "step": 37977 }, { "epoch": 0.6773802304426926, "grad_norm": 0.2389470934867859, "learning_rate": 1.4245453555803642e-05, "loss": 0.1521, "step": 37978 }, { "epoch": 0.6773980665644063, "grad_norm": 0.2208462357521057, "learning_rate": 1.4244048451487952e-05, "loss": 0.1247, "step": 37979 }, { "epoch": 0.67741590268612, "grad_norm": 0.1915493905544281, "learning_rate": 1.4242643388866046e-05, "loss": 0.1028, "step": 37980 }, { "epoch": 0.6774337388078336, "grad_norm": 0.23680944740772247, "learning_rate": 1.4241238367943371e-05, "loss": 0.1201, "step": 37981 }, { "epoch": 0.6774515749295473, "grad_norm": 0.29377880692481995, "learning_rate": 1.423983338872536e-05, "loss": 0.1323, "step": 37982 }, { "epoch": 0.677469411051261, "grad_norm": 0.26315832138061523, "learning_rate": 1.423842845121748e-05, "loss": 0.122, "step": 37983 }, { "epoch": 0.6774872471729747, "grad_norm": 0.22639748454093933, "learning_rate": 1.4237023555425173e-05, "loss": 0.1052, "step": 37984 }, { "epoch": 0.6775050832946884, "grad_norm": 0.21553198993206024, "learning_rate": 1.4235618701353864e-05, "loss": 0.0991, "step": 37985 }, { "epoch": 0.6775229194164021, "grad_norm": 0.29160547256469727, "learning_rate": 1.423421388900903e-05, "loss": 0.1266, "step": 37986 }, { "epoch": 0.6775407555381158, "grad_norm": 0.2924550473690033, "learning_rate": 1.423280911839609e-05, "loss": 0.1159, "step": 37987 }, { "epoch": 0.6775585916598295, "grad_norm": 0.2519051730632782, "learning_rate": 1.4231404389520508e-05, "loss": 0.0973, "step": 37988 }, { "epoch": 0.6775764277815431, "grad_norm": 0.32179516553878784, "learning_rate": 1.4229999702387724e-05, "loss": 0.1296, "step": 37989 }, { "epoch": 0.6775942639032568, "grad_norm": 0.26298460364341736, "learning_rate": 1.422859505700318e-05, "loss": 0.1148, "step": 37990 }, { "epoch": 0.6776121000249705, "grad_norm": 0.3153688311576843, "learning_rate": 1.4227190453372314e-05, "loss": 0.0666, "step": 37991 }, { "epoch": 0.6776299361466843, "grad_norm": 0.19584353268146515, "learning_rate": 1.4225785891500587e-05, "loss": 0.093, "step": 37992 }, { "epoch": 0.677647772268398, "grad_norm": 0.23734787106513977, "learning_rate": 1.4224381371393436e-05, "loss": 0.1743, "step": 37993 }, { "epoch": 0.6776656083901117, "grad_norm": 0.24842730164527893, "learning_rate": 1.4222976893056306e-05, "loss": 0.1662, "step": 37994 }, { "epoch": 0.6776834445118254, "grad_norm": 0.25893813371658325, "learning_rate": 1.4221572456494629e-05, "loss": 0.1278, "step": 37995 }, { "epoch": 0.6777012806335391, "grad_norm": 0.2621501088142395, "learning_rate": 1.4220168061713867e-05, "loss": 0.1407, "step": 37996 }, { "epoch": 0.6777191167552528, "grad_norm": 0.2936629056930542, "learning_rate": 1.4218763708719457e-05, "loss": 0.1716, "step": 37997 }, { "epoch": 0.6777369528769664, "grad_norm": 0.28599438071250916, "learning_rate": 1.4217359397516838e-05, "loss": 0.0671, "step": 37998 }, { "epoch": 0.6777547889986801, "grad_norm": 0.2325516939163208, "learning_rate": 1.4215955128111463e-05, "loss": 0.1094, "step": 37999 }, { "epoch": 0.6777726251203938, "grad_norm": 0.43801024556159973, "learning_rate": 1.421455090050876e-05, "loss": 0.1363, "step": 38000 }, { "epoch": 0.6777726251203938, "eval_loss": 0.11776335537433624, "eval_runtime": 106.7526, "eval_samples_per_second": 9.592, "eval_steps_per_second": 1.602, "step": 38000 }, { "epoch": 0.6777904612421075, "grad_norm": 0.3258973956108093, "learning_rate": 1.4213146714714199e-05, "loss": 0.1173, "step": 38001 }, { "epoch": 0.6778082973638212, "grad_norm": 0.22279764711856842, "learning_rate": 1.42117425707332e-05, "loss": 0.0783, "step": 38002 }, { "epoch": 0.6778261334855349, "grad_norm": 0.23170334100723267, "learning_rate": 1.4210338468571216e-05, "loss": 0.129, "step": 38003 }, { "epoch": 0.6778439696072486, "grad_norm": 0.21238280832767487, "learning_rate": 1.4208934408233677e-05, "loss": 0.1474, "step": 38004 }, { "epoch": 0.6778618057289623, "grad_norm": 0.22141441702842712, "learning_rate": 1.4207530389726049e-05, "loss": 0.0939, "step": 38005 }, { "epoch": 0.677879641850676, "grad_norm": 0.276857852935791, "learning_rate": 1.4206126413053755e-05, "loss": 0.1432, "step": 38006 }, { "epoch": 0.6778974779723896, "grad_norm": 0.2408115118741989, "learning_rate": 1.4204722478222244e-05, "loss": 0.0811, "step": 38007 }, { "epoch": 0.6779153140941033, "grad_norm": 0.2695843279361725, "learning_rate": 1.4203318585236946e-05, "loss": 0.1055, "step": 38008 }, { "epoch": 0.6779331502158171, "grad_norm": 0.24992306530475616, "learning_rate": 1.4201914734103327e-05, "loss": 0.1303, "step": 38009 }, { "epoch": 0.6779509863375308, "grad_norm": 0.24770306050777435, "learning_rate": 1.4200510924826816e-05, "loss": 0.1166, "step": 38010 }, { "epoch": 0.6779688224592445, "grad_norm": 0.2797127366065979, "learning_rate": 1.4199107157412855e-05, "loss": 0.1381, "step": 38011 }, { "epoch": 0.6779866585809582, "grad_norm": 0.3100208044052124, "learning_rate": 1.4197703431866875e-05, "loss": 0.1612, "step": 38012 }, { "epoch": 0.6780044947026719, "grad_norm": 0.3306817412376404, "learning_rate": 1.4196299748194337e-05, "loss": 0.1283, "step": 38013 }, { "epoch": 0.6780223308243856, "grad_norm": 0.25073903799057007, "learning_rate": 1.4194896106400663e-05, "loss": 0.1237, "step": 38014 }, { "epoch": 0.6780401669460993, "grad_norm": 0.26074302196502686, "learning_rate": 1.4193492506491313e-05, "loss": 0.1351, "step": 38015 }, { "epoch": 0.6780580030678129, "grad_norm": 0.24974024295806885, "learning_rate": 1.4192088948471715e-05, "loss": 0.065, "step": 38016 }, { "epoch": 0.6780758391895266, "grad_norm": 0.2402353435754776, "learning_rate": 1.4190685432347309e-05, "loss": 0.1375, "step": 38017 }, { "epoch": 0.6780936753112403, "grad_norm": 0.2304908186197281, "learning_rate": 1.4189281958123545e-05, "loss": 0.0962, "step": 38018 }, { "epoch": 0.678111511432954, "grad_norm": 0.294159471988678, "learning_rate": 1.4187878525805865e-05, "loss": 0.1553, "step": 38019 }, { "epoch": 0.6781293475546677, "grad_norm": 0.22550486028194427, "learning_rate": 1.4186475135399696e-05, "loss": 0.1109, "step": 38020 }, { "epoch": 0.6781471836763814, "grad_norm": 0.247028186917305, "learning_rate": 1.4185071786910476e-05, "loss": 0.0728, "step": 38021 }, { "epoch": 0.6781650197980951, "grad_norm": 0.2688398063182831, "learning_rate": 1.4183668480343665e-05, "loss": 0.1187, "step": 38022 }, { "epoch": 0.6781828559198088, "grad_norm": 0.23364390432834625, "learning_rate": 1.4182265215704688e-05, "loss": 0.1033, "step": 38023 }, { "epoch": 0.6782006920415224, "grad_norm": 0.25726598501205444, "learning_rate": 1.4180861992998988e-05, "loss": 0.1493, "step": 38024 }, { "epoch": 0.6782185281632361, "grad_norm": 0.23520156741142273, "learning_rate": 1.4179458812231994e-05, "loss": 0.1569, "step": 38025 }, { "epoch": 0.6782363642849499, "grad_norm": 0.3169076144695282, "learning_rate": 1.4178055673409163e-05, "loss": 0.1064, "step": 38026 }, { "epoch": 0.6782542004066636, "grad_norm": 0.2397921085357666, "learning_rate": 1.4176652576535921e-05, "loss": 0.1575, "step": 38027 }, { "epoch": 0.6782720365283773, "grad_norm": 0.23026040196418762, "learning_rate": 1.4175249521617717e-05, "loss": 0.092, "step": 38028 }, { "epoch": 0.678289872650091, "grad_norm": 0.2579258680343628, "learning_rate": 1.4173846508659989e-05, "loss": 0.1083, "step": 38029 }, { "epoch": 0.6783077087718047, "grad_norm": 0.26648446917533875, "learning_rate": 1.4172443537668156e-05, "loss": 0.1199, "step": 38030 }, { "epoch": 0.6783255448935184, "grad_norm": 0.27058619260787964, "learning_rate": 1.4171040608647684e-05, "loss": 0.1478, "step": 38031 }, { "epoch": 0.6783433810152321, "grad_norm": 0.242412269115448, "learning_rate": 1.4169637721603999e-05, "loss": 0.0828, "step": 38032 }, { "epoch": 0.6783612171369457, "grad_norm": 0.2512744069099426, "learning_rate": 1.4168234876542539e-05, "loss": 0.1462, "step": 38033 }, { "epoch": 0.6783790532586594, "grad_norm": 0.28903698921203613, "learning_rate": 1.416683207346873e-05, "loss": 0.1101, "step": 38034 }, { "epoch": 0.6783968893803731, "grad_norm": 0.25134021043777466, "learning_rate": 1.4165429312388034e-05, "loss": 0.1253, "step": 38035 }, { "epoch": 0.6784147255020868, "grad_norm": 0.27152127027511597, "learning_rate": 1.4164026593305874e-05, "loss": 0.0893, "step": 38036 }, { "epoch": 0.6784325616238005, "grad_norm": 0.22014084458351135, "learning_rate": 1.416262391622769e-05, "loss": 0.1169, "step": 38037 }, { "epoch": 0.6784503977455142, "grad_norm": 0.23176322877407074, "learning_rate": 1.416122128115892e-05, "loss": 0.1251, "step": 38038 }, { "epoch": 0.6784682338672279, "grad_norm": 0.32257190346717834, "learning_rate": 1.4159818688104993e-05, "loss": 0.1129, "step": 38039 }, { "epoch": 0.6784860699889416, "grad_norm": 0.22879919409751892, "learning_rate": 1.4158416137071356e-05, "loss": 0.1629, "step": 38040 }, { "epoch": 0.6785039061106553, "grad_norm": 0.22757573425769806, "learning_rate": 1.4157013628063437e-05, "loss": 0.1161, "step": 38041 }, { "epoch": 0.6785217422323689, "grad_norm": 0.24358348548412323, "learning_rate": 1.4155611161086687e-05, "loss": 0.0933, "step": 38042 }, { "epoch": 0.6785395783540827, "grad_norm": 0.26805564761161804, "learning_rate": 1.4154208736146523e-05, "loss": 0.1064, "step": 38043 }, { "epoch": 0.6785574144757964, "grad_norm": 0.2571731209754944, "learning_rate": 1.4152806353248405e-05, "loss": 0.1105, "step": 38044 }, { "epoch": 0.6785752505975101, "grad_norm": 0.20159876346588135, "learning_rate": 1.4151404012397754e-05, "loss": 0.1263, "step": 38045 }, { "epoch": 0.6785930867192238, "grad_norm": 0.2972625195980072, "learning_rate": 1.4150001713600009e-05, "loss": 0.1471, "step": 38046 }, { "epoch": 0.6786109228409375, "grad_norm": 0.2562756836414337, "learning_rate": 1.4148599456860592e-05, "loss": 0.0902, "step": 38047 }, { "epoch": 0.6786287589626512, "grad_norm": 0.2638951241970062, "learning_rate": 1.4147197242184962e-05, "loss": 0.1221, "step": 38048 }, { "epoch": 0.6786465950843649, "grad_norm": 0.34354645013809204, "learning_rate": 1.4145795069578546e-05, "loss": 0.152, "step": 38049 }, { "epoch": 0.6786644312060786, "grad_norm": 0.2503903806209564, "learning_rate": 1.4144392939046774e-05, "loss": 0.1178, "step": 38050 }, { "epoch": 0.6786822673277922, "grad_norm": 0.30100035667419434, "learning_rate": 1.414299085059509e-05, "loss": 0.0988, "step": 38051 }, { "epoch": 0.6787001034495059, "grad_norm": 0.28491219878196716, "learning_rate": 1.414158880422891e-05, "loss": 0.1449, "step": 38052 }, { "epoch": 0.6787179395712196, "grad_norm": 0.4232577383518219, "learning_rate": 1.4140186799953691e-05, "loss": 0.1605, "step": 38053 }, { "epoch": 0.6787357756929333, "grad_norm": 0.31380370259284973, "learning_rate": 1.4138784837774849e-05, "loss": 0.1571, "step": 38054 }, { "epoch": 0.678753611814647, "grad_norm": 0.21956247091293335, "learning_rate": 1.4137382917697842e-05, "loss": 0.074, "step": 38055 }, { "epoch": 0.6787714479363607, "grad_norm": 0.30031442642211914, "learning_rate": 1.4135981039728078e-05, "loss": 0.1381, "step": 38056 }, { "epoch": 0.6787892840580744, "grad_norm": 0.30037590861320496, "learning_rate": 1.4134579203871013e-05, "loss": 0.1949, "step": 38057 }, { "epoch": 0.6788071201797881, "grad_norm": 0.24318231642246246, "learning_rate": 1.4133177410132073e-05, "loss": 0.1054, "step": 38058 }, { "epoch": 0.6788249563015017, "grad_norm": 0.2966897189617157, "learning_rate": 1.4131775658516689e-05, "loss": 0.0859, "step": 38059 }, { "epoch": 0.6788427924232155, "grad_norm": 0.26094427704811096, "learning_rate": 1.413037394903029e-05, "loss": 0.1251, "step": 38060 }, { "epoch": 0.6788606285449292, "grad_norm": 0.22348086535930634, "learning_rate": 1.4128972281678321e-05, "loss": 0.0888, "step": 38061 }, { "epoch": 0.6788784646666429, "grad_norm": 0.2923192083835602, "learning_rate": 1.4127570656466212e-05, "loss": 0.1512, "step": 38062 }, { "epoch": 0.6788963007883566, "grad_norm": 0.26692765951156616, "learning_rate": 1.4126169073399398e-05, "loss": 0.0908, "step": 38063 }, { "epoch": 0.6789141369100703, "grad_norm": 0.2778342664241791, "learning_rate": 1.4124767532483302e-05, "loss": 0.1414, "step": 38064 }, { "epoch": 0.678931973031784, "grad_norm": 0.23958182334899902, "learning_rate": 1.4123366033723356e-05, "loss": 0.1542, "step": 38065 }, { "epoch": 0.6789498091534977, "grad_norm": 0.2612308859825134, "learning_rate": 1.4121964577125014e-05, "loss": 0.0899, "step": 38066 }, { "epoch": 0.6789676452752114, "grad_norm": 0.24842728674411774, "learning_rate": 1.412056316269369e-05, "loss": 0.0868, "step": 38067 }, { "epoch": 0.678985481396925, "grad_norm": 0.25560522079467773, "learning_rate": 1.4119161790434809e-05, "loss": 0.1457, "step": 38068 }, { "epoch": 0.6790033175186387, "grad_norm": 0.23789770901203156, "learning_rate": 1.4117760460353819e-05, "loss": 0.1012, "step": 38069 }, { "epoch": 0.6790211536403524, "grad_norm": 0.21970634162425995, "learning_rate": 1.4116359172456156e-05, "loss": 0.1114, "step": 38070 }, { "epoch": 0.6790389897620661, "grad_norm": 0.2765413820743561, "learning_rate": 1.4114957926747247e-05, "loss": 0.1291, "step": 38071 }, { "epoch": 0.6790568258837798, "grad_norm": 0.21076099574565887, "learning_rate": 1.4113556723232519e-05, "loss": 0.0921, "step": 38072 }, { "epoch": 0.6790746620054935, "grad_norm": 0.2992746829986572, "learning_rate": 1.4112155561917395e-05, "loss": 0.1709, "step": 38073 }, { "epoch": 0.6790924981272072, "grad_norm": 0.2188761681318283, "learning_rate": 1.4110754442807325e-05, "loss": 0.1164, "step": 38074 }, { "epoch": 0.6791103342489209, "grad_norm": 0.2258126586675644, "learning_rate": 1.4109353365907732e-05, "loss": 0.0964, "step": 38075 }, { "epoch": 0.6791281703706346, "grad_norm": 0.2761717140674591, "learning_rate": 1.410795233122405e-05, "loss": 0.14, "step": 38076 }, { "epoch": 0.6791460064923484, "grad_norm": 0.3077312707901001, "learning_rate": 1.4106551338761704e-05, "loss": 0.1028, "step": 38077 }, { "epoch": 0.679163842614062, "grad_norm": 0.22233155369758606, "learning_rate": 1.4105150388526117e-05, "loss": 0.1004, "step": 38078 }, { "epoch": 0.6791816787357757, "grad_norm": 0.24543443322181702, "learning_rate": 1.410374948052274e-05, "loss": 0.091, "step": 38079 }, { "epoch": 0.6791995148574894, "grad_norm": 0.2750838100910187, "learning_rate": 1.4102348614756994e-05, "loss": 0.107, "step": 38080 }, { "epoch": 0.6792173509792031, "grad_norm": 0.29360508918762207, "learning_rate": 1.41009477912343e-05, "loss": 0.1414, "step": 38081 }, { "epoch": 0.6792351871009168, "grad_norm": 0.23823639750480652, "learning_rate": 1.4099547009960108e-05, "loss": 0.1546, "step": 38082 }, { "epoch": 0.6792530232226305, "grad_norm": 0.29856178164482117, "learning_rate": 1.4098146270939825e-05, "loss": 0.1086, "step": 38083 }, { "epoch": 0.6792708593443442, "grad_norm": 0.3999932110309601, "learning_rate": 1.40967455741789e-05, "loss": 0.1548, "step": 38084 }, { "epoch": 0.6792886954660579, "grad_norm": 0.24247169494628906, "learning_rate": 1.4095344919682757e-05, "loss": 0.1244, "step": 38085 }, { "epoch": 0.6793065315877715, "grad_norm": 0.2690492868423462, "learning_rate": 1.4093944307456813e-05, "loss": 0.1254, "step": 38086 }, { "epoch": 0.6793243677094852, "grad_norm": 0.16117660701274872, "learning_rate": 1.4092543737506519e-05, "loss": 0.0491, "step": 38087 }, { "epoch": 0.6793422038311989, "grad_norm": 0.23193974792957306, "learning_rate": 1.4091143209837294e-05, "loss": 0.1125, "step": 38088 }, { "epoch": 0.6793600399529126, "grad_norm": 0.2804194390773773, "learning_rate": 1.4089742724454564e-05, "loss": 0.1043, "step": 38089 }, { "epoch": 0.6793778760746263, "grad_norm": 0.25015395879745483, "learning_rate": 1.4088342281363759e-05, "loss": 0.0958, "step": 38090 }, { "epoch": 0.67939571219634, "grad_norm": 0.5646789073944092, "learning_rate": 1.4086941880570298e-05, "loss": 0.1782, "step": 38091 }, { "epoch": 0.6794135483180537, "grad_norm": 0.3682366907596588, "learning_rate": 1.4085541522079631e-05, "loss": 0.1553, "step": 38092 }, { "epoch": 0.6794313844397675, "grad_norm": 0.19553418457508087, "learning_rate": 1.4084141205897172e-05, "loss": 0.1314, "step": 38093 }, { "epoch": 0.6794492205614812, "grad_norm": 0.29568129777908325, "learning_rate": 1.4082740932028354e-05, "loss": 0.1239, "step": 38094 }, { "epoch": 0.6794670566831948, "grad_norm": 0.3364834189414978, "learning_rate": 1.4081340700478593e-05, "loss": 0.0928, "step": 38095 }, { "epoch": 0.6794848928049085, "grad_norm": 0.279713898897171, "learning_rate": 1.4079940511253325e-05, "loss": 0.0963, "step": 38096 }, { "epoch": 0.6795027289266222, "grad_norm": 0.24845629930496216, "learning_rate": 1.4078540364357989e-05, "loss": 0.1478, "step": 38097 }, { "epoch": 0.6795205650483359, "grad_norm": 0.3844321072101593, "learning_rate": 1.4077140259798006e-05, "loss": 0.1679, "step": 38098 }, { "epoch": 0.6795384011700496, "grad_norm": 0.23167596757411957, "learning_rate": 1.4075740197578788e-05, "loss": 0.087, "step": 38099 }, { "epoch": 0.6795562372917633, "grad_norm": 0.2533831298351288, "learning_rate": 1.4074340177705786e-05, "loss": 0.1426, "step": 38100 }, { "epoch": 0.679574073413477, "grad_norm": 0.3005765974521637, "learning_rate": 1.4072940200184412e-05, "loss": 0.1383, "step": 38101 }, { "epoch": 0.6795919095351907, "grad_norm": 0.2920003831386566, "learning_rate": 1.4071540265020094e-05, "loss": 0.1437, "step": 38102 }, { "epoch": 0.6796097456569044, "grad_norm": 0.24694103002548218, "learning_rate": 1.4070140372218255e-05, "loss": 0.1024, "step": 38103 }, { "epoch": 0.679627581778618, "grad_norm": 0.3437347114086151, "learning_rate": 1.4068740521784334e-05, "loss": 0.1831, "step": 38104 }, { "epoch": 0.6796454179003317, "grad_norm": 0.20567935705184937, "learning_rate": 1.4067340713723754e-05, "loss": 0.1015, "step": 38105 }, { "epoch": 0.6796632540220454, "grad_norm": 0.2873671352863312, "learning_rate": 1.4065940948041934e-05, "loss": 0.0762, "step": 38106 }, { "epoch": 0.6796810901437591, "grad_norm": 0.31024056673049927, "learning_rate": 1.4064541224744305e-05, "loss": 0.1175, "step": 38107 }, { "epoch": 0.6796989262654728, "grad_norm": 0.2765016555786133, "learning_rate": 1.4063141543836284e-05, "loss": 0.0913, "step": 38108 }, { "epoch": 0.6797167623871865, "grad_norm": 0.27351024746894836, "learning_rate": 1.4061741905323309e-05, "loss": 0.118, "step": 38109 }, { "epoch": 0.6797345985089003, "grad_norm": 0.29077038168907166, "learning_rate": 1.4060342309210792e-05, "loss": 0.1159, "step": 38110 }, { "epoch": 0.679752434630614, "grad_norm": 0.23768246173858643, "learning_rate": 1.4058942755504176e-05, "loss": 0.1086, "step": 38111 }, { "epoch": 0.6797702707523277, "grad_norm": 0.4070793688297272, "learning_rate": 1.4057543244208868e-05, "loss": 0.1351, "step": 38112 }, { "epoch": 0.6797881068740413, "grad_norm": 0.23685990273952484, "learning_rate": 1.405614377533031e-05, "loss": 0.1793, "step": 38113 }, { "epoch": 0.679805942995755, "grad_norm": 0.19624833762645721, "learning_rate": 1.4054744348873921e-05, "loss": 0.1395, "step": 38114 }, { "epoch": 0.6798237791174687, "grad_norm": 0.2356448769569397, "learning_rate": 1.4053344964845122e-05, "loss": 0.1261, "step": 38115 }, { "epoch": 0.6798416152391824, "grad_norm": 0.2942589819431305, "learning_rate": 1.405194562324933e-05, "loss": 0.0917, "step": 38116 }, { "epoch": 0.6798594513608961, "grad_norm": 0.2452307939529419, "learning_rate": 1.4050546324091985e-05, "loss": 0.109, "step": 38117 }, { "epoch": 0.6798772874826098, "grad_norm": 0.24454204738140106, "learning_rate": 1.4049147067378507e-05, "loss": 0.0803, "step": 38118 }, { "epoch": 0.6798951236043235, "grad_norm": 0.3530655801296234, "learning_rate": 1.4047747853114318e-05, "loss": 0.1368, "step": 38119 }, { "epoch": 0.6799129597260372, "grad_norm": 0.36142921447753906, "learning_rate": 1.4046348681304838e-05, "loss": 0.1726, "step": 38120 }, { "epoch": 0.6799307958477508, "grad_norm": 0.23010893166065216, "learning_rate": 1.4044949551955485e-05, "loss": 0.094, "step": 38121 }, { "epoch": 0.6799486319694645, "grad_norm": 0.24662913382053375, "learning_rate": 1.4043550465071704e-05, "loss": 0.1059, "step": 38122 }, { "epoch": 0.6799664680911782, "grad_norm": 0.2754177451133728, "learning_rate": 1.4042151420658891e-05, "loss": 0.1217, "step": 38123 }, { "epoch": 0.6799843042128919, "grad_norm": 0.3141807019710541, "learning_rate": 1.4040752418722497e-05, "loss": 0.149, "step": 38124 }, { "epoch": 0.6800021403346056, "grad_norm": 0.2542325556278229, "learning_rate": 1.4039353459267921e-05, "loss": 0.1596, "step": 38125 }, { "epoch": 0.6800199764563193, "grad_norm": 0.3023487329483032, "learning_rate": 1.4037954542300607e-05, "loss": 0.1192, "step": 38126 }, { "epoch": 0.6800378125780331, "grad_norm": 0.2309402972459793, "learning_rate": 1.4036555667825969e-05, "loss": 0.1029, "step": 38127 }, { "epoch": 0.6800556486997468, "grad_norm": 0.21685844659805298, "learning_rate": 1.4035156835849423e-05, "loss": 0.1038, "step": 38128 }, { "epoch": 0.6800734848214605, "grad_norm": 0.2672184705734253, "learning_rate": 1.4033758046376388e-05, "loss": 0.1442, "step": 38129 }, { "epoch": 0.6800913209431741, "grad_norm": 0.2531728446483612, "learning_rate": 1.4032359299412307e-05, "loss": 0.1109, "step": 38130 }, { "epoch": 0.6801091570648878, "grad_norm": 0.26913121342658997, "learning_rate": 1.4030960594962589e-05, "loss": 0.199, "step": 38131 }, { "epoch": 0.6801269931866015, "grad_norm": 0.2779064476490021, "learning_rate": 1.4029561933032653e-05, "loss": 0.1124, "step": 38132 }, { "epoch": 0.6801448293083152, "grad_norm": 0.35877105593681335, "learning_rate": 1.402816331362793e-05, "loss": 0.1724, "step": 38133 }, { "epoch": 0.6801626654300289, "grad_norm": 0.2864941656589508, "learning_rate": 1.402676473675382e-05, "loss": 0.1763, "step": 38134 }, { "epoch": 0.6801805015517426, "grad_norm": 0.26372548937797546, "learning_rate": 1.4025366202415772e-05, "loss": 0.0856, "step": 38135 }, { "epoch": 0.6801983376734563, "grad_norm": 0.2866208255290985, "learning_rate": 1.4023967710619195e-05, "loss": 0.1778, "step": 38136 }, { "epoch": 0.68021617379517, "grad_norm": 0.304635226726532, "learning_rate": 1.40225692613695e-05, "loss": 0.1284, "step": 38137 }, { "epoch": 0.6802340099168837, "grad_norm": 0.31587427854537964, "learning_rate": 1.4021170854672127e-05, "loss": 0.1373, "step": 38138 }, { "epoch": 0.6802518460385973, "grad_norm": 0.26686543226242065, "learning_rate": 1.401977249053248e-05, "loss": 0.0964, "step": 38139 }, { "epoch": 0.680269682160311, "grad_norm": 0.23689597845077515, "learning_rate": 1.4018374168955995e-05, "loss": 0.1093, "step": 38140 }, { "epoch": 0.6802875182820247, "grad_norm": 0.34281957149505615, "learning_rate": 1.4016975889948086e-05, "loss": 0.1621, "step": 38141 }, { "epoch": 0.6803053544037384, "grad_norm": 0.20335371792316437, "learning_rate": 1.4015577653514161e-05, "loss": 0.079, "step": 38142 }, { "epoch": 0.6803231905254521, "grad_norm": 0.22631576657295227, "learning_rate": 1.4014179459659662e-05, "loss": 0.1195, "step": 38143 }, { "epoch": 0.6803410266471659, "grad_norm": 0.28651463985443115, "learning_rate": 1.4012781308389997e-05, "loss": 0.1442, "step": 38144 }, { "epoch": 0.6803588627688796, "grad_norm": 0.28875383734703064, "learning_rate": 1.4011383199710587e-05, "loss": 0.1502, "step": 38145 }, { "epoch": 0.6803766988905933, "grad_norm": 0.23822379112243652, "learning_rate": 1.4009985133626853e-05, "loss": 0.1304, "step": 38146 }, { "epoch": 0.680394535012307, "grad_norm": 0.2789832353591919, "learning_rate": 1.40085871101442e-05, "loss": 0.1374, "step": 38147 }, { "epoch": 0.6804123711340206, "grad_norm": 0.30027708411216736, "learning_rate": 1.4007189129268067e-05, "loss": 0.1247, "step": 38148 }, { "epoch": 0.6804302072557343, "grad_norm": 0.35845959186553955, "learning_rate": 1.4005791191003869e-05, "loss": 0.1331, "step": 38149 }, { "epoch": 0.680448043377448, "grad_norm": 0.3312264084815979, "learning_rate": 1.4004393295357013e-05, "loss": 0.1669, "step": 38150 }, { "epoch": 0.6804658794991617, "grad_norm": 0.29060032963752747, "learning_rate": 1.4002995442332934e-05, "loss": 0.098, "step": 38151 }, { "epoch": 0.6804837156208754, "grad_norm": 0.18560989201068878, "learning_rate": 1.4001597631937036e-05, "loss": 0.0637, "step": 38152 }, { "epoch": 0.6805015517425891, "grad_norm": 0.31550469994544983, "learning_rate": 1.4000199864174752e-05, "loss": 0.1687, "step": 38153 }, { "epoch": 0.6805193878643028, "grad_norm": 0.3084012269973755, "learning_rate": 1.3998802139051493e-05, "loss": 0.1269, "step": 38154 }, { "epoch": 0.6805372239860165, "grad_norm": 0.2431219518184662, "learning_rate": 1.3997404456572663e-05, "loss": 0.0841, "step": 38155 }, { "epoch": 0.6805550601077301, "grad_norm": 0.2584455907344818, "learning_rate": 1.3996006816743706e-05, "loss": 0.079, "step": 38156 }, { "epoch": 0.6805728962294438, "grad_norm": 0.23360052704811096, "learning_rate": 1.399460921957003e-05, "loss": 0.1094, "step": 38157 }, { "epoch": 0.6805907323511575, "grad_norm": 0.2123311460018158, "learning_rate": 1.3993211665057046e-05, "loss": 0.1107, "step": 38158 }, { "epoch": 0.6806085684728712, "grad_norm": 0.2764964997768402, "learning_rate": 1.3991814153210175e-05, "loss": 0.1147, "step": 38159 }, { "epoch": 0.6806264045945849, "grad_norm": 0.1956997662782669, "learning_rate": 1.3990416684034829e-05, "loss": 0.1232, "step": 38160 }, { "epoch": 0.6806442407162987, "grad_norm": 0.2130974978208542, "learning_rate": 1.3989019257536437e-05, "loss": 0.091, "step": 38161 }, { "epoch": 0.6806620768380124, "grad_norm": 0.3136894106864929, "learning_rate": 1.3987621873720411e-05, "loss": 0.1488, "step": 38162 }, { "epoch": 0.6806799129597261, "grad_norm": 0.32723692059516907, "learning_rate": 1.3986224532592165e-05, "loss": 0.121, "step": 38163 }, { "epoch": 0.6806977490814398, "grad_norm": 0.21379972994327545, "learning_rate": 1.3984827234157106e-05, "loss": 0.1047, "step": 38164 }, { "epoch": 0.6807155852031535, "grad_norm": 0.36420774459838867, "learning_rate": 1.3983429978420673e-05, "loss": 0.1565, "step": 38165 }, { "epoch": 0.6807334213248671, "grad_norm": 0.24340316653251648, "learning_rate": 1.3982032765388258e-05, "loss": 0.081, "step": 38166 }, { "epoch": 0.6807512574465808, "grad_norm": 0.24933785200119019, "learning_rate": 1.3980635595065303e-05, "loss": 0.1187, "step": 38167 }, { "epoch": 0.6807690935682945, "grad_norm": 0.2732901871204376, "learning_rate": 1.3979238467457201e-05, "loss": 0.122, "step": 38168 }, { "epoch": 0.6807869296900082, "grad_norm": 0.246701180934906, "learning_rate": 1.3977841382569384e-05, "loss": 0.1467, "step": 38169 }, { "epoch": 0.6808047658117219, "grad_norm": 0.2657492458820343, "learning_rate": 1.397644434040726e-05, "loss": 0.0997, "step": 38170 }, { "epoch": 0.6808226019334356, "grad_norm": 0.25838497281074524, "learning_rate": 1.3975047340976246e-05, "loss": 0.1105, "step": 38171 }, { "epoch": 0.6808404380551493, "grad_norm": 0.3133302927017212, "learning_rate": 1.3973650384281758e-05, "loss": 0.1006, "step": 38172 }, { "epoch": 0.680858274176863, "grad_norm": 0.2004750370979309, "learning_rate": 1.39722534703292e-05, "loss": 0.0739, "step": 38173 }, { "epoch": 0.6808761102985766, "grad_norm": 0.29933902621269226, "learning_rate": 1.3970856599124005e-05, "loss": 0.0798, "step": 38174 }, { "epoch": 0.6808939464202903, "grad_norm": 0.28791409730911255, "learning_rate": 1.3969459770671579e-05, "loss": 0.1313, "step": 38175 }, { "epoch": 0.680911782542004, "grad_norm": 0.2405257672071457, "learning_rate": 1.3968062984977337e-05, "loss": 0.1042, "step": 38176 }, { "epoch": 0.6809296186637177, "grad_norm": 0.30018922686576843, "learning_rate": 1.3966666242046683e-05, "loss": 0.0747, "step": 38177 }, { "epoch": 0.6809474547854315, "grad_norm": 0.3320558965206146, "learning_rate": 1.396526954188505e-05, "loss": 0.11, "step": 38178 }, { "epoch": 0.6809652909071452, "grad_norm": 0.28587329387664795, "learning_rate": 1.3963872884497837e-05, "loss": 0.106, "step": 38179 }, { "epoch": 0.6809831270288589, "grad_norm": 0.3686194121837616, "learning_rate": 1.396247626989047e-05, "loss": 0.1475, "step": 38180 }, { "epoch": 0.6810009631505726, "grad_norm": 0.2667557895183563, "learning_rate": 1.396107969806835e-05, "loss": 0.1421, "step": 38181 }, { "epoch": 0.6810187992722863, "grad_norm": 0.3066580891609192, "learning_rate": 1.3959683169036908e-05, "loss": 0.1108, "step": 38182 }, { "epoch": 0.681036635394, "grad_norm": 0.2698015570640564, "learning_rate": 1.3958286682801546e-05, "loss": 0.1744, "step": 38183 }, { "epoch": 0.6810544715157136, "grad_norm": 0.2537696361541748, "learning_rate": 1.3956890239367678e-05, "loss": 0.0825, "step": 38184 }, { "epoch": 0.6810723076374273, "grad_norm": 0.22206906974315643, "learning_rate": 1.3955493838740718e-05, "loss": 0.1499, "step": 38185 }, { "epoch": 0.681090143759141, "grad_norm": 0.37641796469688416, "learning_rate": 1.395409748092607e-05, "loss": 0.1679, "step": 38186 }, { "epoch": 0.6811079798808547, "grad_norm": 0.24186834692955017, "learning_rate": 1.3952701165929166e-05, "loss": 0.1495, "step": 38187 }, { "epoch": 0.6811258160025684, "grad_norm": 0.2486027479171753, "learning_rate": 1.3951304893755407e-05, "loss": 0.109, "step": 38188 }, { "epoch": 0.6811436521242821, "grad_norm": 0.2330179363489151, "learning_rate": 1.3949908664410205e-05, "loss": 0.092, "step": 38189 }, { "epoch": 0.6811614882459958, "grad_norm": 0.5090229511260986, "learning_rate": 1.3948512477898965e-05, "loss": 0.1246, "step": 38190 }, { "epoch": 0.6811793243677094, "grad_norm": 0.2555474638938904, "learning_rate": 1.3947116334227117e-05, "loss": 0.1096, "step": 38191 }, { "epoch": 0.6811971604894231, "grad_norm": 0.3159734308719635, "learning_rate": 1.3945720233400065e-05, "loss": 0.1506, "step": 38192 }, { "epoch": 0.6812149966111368, "grad_norm": 0.2880256474018097, "learning_rate": 1.3944324175423207e-05, "loss": 0.1244, "step": 38193 }, { "epoch": 0.6812328327328506, "grad_norm": 0.27575504779815674, "learning_rate": 1.3942928160301976e-05, "loss": 0.1481, "step": 38194 }, { "epoch": 0.6812506688545643, "grad_norm": 0.2570909857749939, "learning_rate": 1.3941532188041768e-05, "loss": 0.1741, "step": 38195 }, { "epoch": 0.681268504976278, "grad_norm": 0.28382623195648193, "learning_rate": 1.3940136258648013e-05, "loss": 0.1365, "step": 38196 }, { "epoch": 0.6812863410979917, "grad_norm": 0.2137383222579956, "learning_rate": 1.3938740372126102e-05, "loss": 0.0733, "step": 38197 }, { "epoch": 0.6813041772197054, "grad_norm": 0.3670567274093628, "learning_rate": 1.393734452848146e-05, "loss": 0.1571, "step": 38198 }, { "epoch": 0.6813220133414191, "grad_norm": 0.19791445136070251, "learning_rate": 1.3935948727719478e-05, "loss": 0.1032, "step": 38199 }, { "epoch": 0.6813398494631328, "grad_norm": 0.2627556025981903, "learning_rate": 1.3934552969845593e-05, "loss": 0.079, "step": 38200 }, { "epoch": 0.6813576855848464, "grad_norm": 0.30776503682136536, "learning_rate": 1.39331572548652e-05, "loss": 0.1639, "step": 38201 }, { "epoch": 0.6813755217065601, "grad_norm": 0.23980633914470673, "learning_rate": 1.3931761582783714e-05, "loss": 0.1445, "step": 38202 }, { "epoch": 0.6813933578282738, "grad_norm": 0.28446152806282043, "learning_rate": 1.3930365953606533e-05, "loss": 0.1294, "step": 38203 }, { "epoch": 0.6814111939499875, "grad_norm": 0.2952460050582886, "learning_rate": 1.3928970367339083e-05, "loss": 0.118, "step": 38204 }, { "epoch": 0.6814290300717012, "grad_norm": 0.24420614540576935, "learning_rate": 1.3927574823986772e-05, "loss": 0.1224, "step": 38205 }, { "epoch": 0.6814468661934149, "grad_norm": 0.27015960216522217, "learning_rate": 1.3926179323554995e-05, "loss": 0.1035, "step": 38206 }, { "epoch": 0.6814647023151286, "grad_norm": 0.2446109801530838, "learning_rate": 1.3924783866049179e-05, "loss": 0.1311, "step": 38207 }, { "epoch": 0.6814825384368423, "grad_norm": 0.22301532328128815, "learning_rate": 1.3923388451474717e-05, "loss": 0.1199, "step": 38208 }, { "epoch": 0.6815003745585559, "grad_norm": 0.2690506875514984, "learning_rate": 1.3921993079837037e-05, "loss": 0.1173, "step": 38209 }, { "epoch": 0.6815182106802696, "grad_norm": 0.3371462821960449, "learning_rate": 1.392059775114154e-05, "loss": 0.1391, "step": 38210 }, { "epoch": 0.6815360468019834, "grad_norm": 0.26927849650382996, "learning_rate": 1.3919202465393633e-05, "loss": 0.1066, "step": 38211 }, { "epoch": 0.6815538829236971, "grad_norm": 0.2634313702583313, "learning_rate": 1.3917807222598712e-05, "loss": 0.097, "step": 38212 }, { "epoch": 0.6815717190454108, "grad_norm": 0.6263816356658936, "learning_rate": 1.391641202276221e-05, "loss": 0.0811, "step": 38213 }, { "epoch": 0.6815895551671245, "grad_norm": 0.2044709026813507, "learning_rate": 1.3915016865889519e-05, "loss": 0.1076, "step": 38214 }, { "epoch": 0.6816073912888382, "grad_norm": 0.209680438041687, "learning_rate": 1.3913621751986056e-05, "loss": 0.1426, "step": 38215 }, { "epoch": 0.6816252274105519, "grad_norm": 0.28347915410995483, "learning_rate": 1.391222668105721e-05, "loss": 0.1113, "step": 38216 }, { "epoch": 0.6816430635322656, "grad_norm": 0.23850694298744202, "learning_rate": 1.3910831653108417e-05, "loss": 0.0663, "step": 38217 }, { "epoch": 0.6816608996539792, "grad_norm": 0.3260178565979004, "learning_rate": 1.3909436668145069e-05, "loss": 0.1366, "step": 38218 }, { "epoch": 0.6816787357756929, "grad_norm": 0.34857356548309326, "learning_rate": 1.3908041726172574e-05, "loss": 0.1337, "step": 38219 }, { "epoch": 0.6816965718974066, "grad_norm": 0.25902673602104187, "learning_rate": 1.3906646827196333e-05, "loss": 0.0871, "step": 38220 }, { "epoch": 0.6817144080191203, "grad_norm": 0.29883676767349243, "learning_rate": 1.3905251971221767e-05, "loss": 0.1153, "step": 38221 }, { "epoch": 0.681732244140834, "grad_norm": 0.2750854194164276, "learning_rate": 1.3903857158254269e-05, "loss": 0.0991, "step": 38222 }, { "epoch": 0.6817500802625477, "grad_norm": 0.29598814249038696, "learning_rate": 1.3902462388299262e-05, "loss": 0.1161, "step": 38223 }, { "epoch": 0.6817679163842614, "grad_norm": 0.25487640500068665, "learning_rate": 1.3901067661362144e-05, "loss": 0.0974, "step": 38224 }, { "epoch": 0.6817857525059751, "grad_norm": 0.28686612844467163, "learning_rate": 1.3899672977448311e-05, "loss": 0.094, "step": 38225 }, { "epoch": 0.6818035886276888, "grad_norm": 0.2694266140460968, "learning_rate": 1.3898278336563189e-05, "loss": 0.1457, "step": 38226 }, { "epoch": 0.6818214247494024, "grad_norm": 0.3161659240722656, "learning_rate": 1.3896883738712174e-05, "loss": 0.1045, "step": 38227 }, { "epoch": 0.6818392608711162, "grad_norm": 0.2819994390010834, "learning_rate": 1.3895489183900673e-05, "loss": 0.1186, "step": 38228 }, { "epoch": 0.6818570969928299, "grad_norm": 0.30145150423049927, "learning_rate": 1.3894094672134084e-05, "loss": 0.1715, "step": 38229 }, { "epoch": 0.6818749331145436, "grad_norm": 0.2924537658691406, "learning_rate": 1.3892700203417827e-05, "loss": 0.1186, "step": 38230 }, { "epoch": 0.6818927692362573, "grad_norm": 0.20499379932880402, "learning_rate": 1.3891305777757301e-05, "loss": 0.0833, "step": 38231 }, { "epoch": 0.681910605357971, "grad_norm": 0.24573004245758057, "learning_rate": 1.3889911395157911e-05, "loss": 0.1002, "step": 38232 }, { "epoch": 0.6819284414796847, "grad_norm": 0.3260311782360077, "learning_rate": 1.3888517055625053e-05, "loss": 0.1284, "step": 38233 }, { "epoch": 0.6819462776013984, "grad_norm": 0.29326748847961426, "learning_rate": 1.3887122759164151e-05, "loss": 0.1006, "step": 38234 }, { "epoch": 0.681964113723112, "grad_norm": 0.18743669986724854, "learning_rate": 1.3885728505780588e-05, "loss": 0.0675, "step": 38235 }, { "epoch": 0.6819819498448257, "grad_norm": 0.3353661596775055, "learning_rate": 1.3884334295479786e-05, "loss": 0.14, "step": 38236 }, { "epoch": 0.6819997859665394, "grad_norm": 0.4249752163887024, "learning_rate": 1.3882940128267146e-05, "loss": 0.107, "step": 38237 }, { "epoch": 0.6820176220882531, "grad_norm": 0.22529543936252594, "learning_rate": 1.3881546004148063e-05, "loss": 0.124, "step": 38238 }, { "epoch": 0.6820354582099668, "grad_norm": 0.27800649404525757, "learning_rate": 1.3880151923127957e-05, "loss": 0.0919, "step": 38239 }, { "epoch": 0.6820532943316805, "grad_norm": 0.2507452964782715, "learning_rate": 1.3878757885212221e-05, "loss": 0.1155, "step": 38240 }, { "epoch": 0.6820711304533942, "grad_norm": 0.25226354598999023, "learning_rate": 1.3877363890406261e-05, "loss": 0.1264, "step": 38241 }, { "epoch": 0.6820889665751079, "grad_norm": 0.2639915645122528, "learning_rate": 1.3875969938715472e-05, "loss": 0.1114, "step": 38242 }, { "epoch": 0.6821068026968216, "grad_norm": 0.27226176857948303, "learning_rate": 1.3874576030145276e-05, "loss": 0.1542, "step": 38243 }, { "epoch": 0.6821246388185352, "grad_norm": 0.2571322023868561, "learning_rate": 1.387318216470106e-05, "loss": 0.1315, "step": 38244 }, { "epoch": 0.682142474940249, "grad_norm": 0.3140402138233185, "learning_rate": 1.3871788342388236e-05, "loss": 0.1089, "step": 38245 }, { "epoch": 0.6821603110619627, "grad_norm": 0.5863073468208313, "learning_rate": 1.3870394563212197e-05, "loss": 0.1168, "step": 38246 }, { "epoch": 0.6821781471836764, "grad_norm": 0.3086020052433014, "learning_rate": 1.3869000827178363e-05, "loss": 0.157, "step": 38247 }, { "epoch": 0.6821959833053901, "grad_norm": 0.27945834398269653, "learning_rate": 1.386760713429212e-05, "loss": 0.128, "step": 38248 }, { "epoch": 0.6822138194271038, "grad_norm": 0.28308427333831787, "learning_rate": 1.3866213484558874e-05, "loss": 0.1361, "step": 38249 }, { "epoch": 0.6822316555488175, "grad_norm": 0.2819865643978119, "learning_rate": 1.3864819877984036e-05, "loss": 0.1399, "step": 38250 }, { "epoch": 0.6822494916705312, "grad_norm": 0.29427313804626465, "learning_rate": 1.3863426314572991e-05, "loss": 0.1528, "step": 38251 }, { "epoch": 0.6822673277922449, "grad_norm": 0.2779558002948761, "learning_rate": 1.3862032794331165e-05, "loss": 0.125, "step": 38252 }, { "epoch": 0.6822851639139585, "grad_norm": 0.24587909877300262, "learning_rate": 1.3860639317263947e-05, "loss": 0.1035, "step": 38253 }, { "epoch": 0.6823030000356722, "grad_norm": 0.29528847336769104, "learning_rate": 1.3859245883376736e-05, "loss": 0.116, "step": 38254 }, { "epoch": 0.6823208361573859, "grad_norm": 0.21087324619293213, "learning_rate": 1.3857852492674927e-05, "loss": 0.0948, "step": 38255 }, { "epoch": 0.6823386722790996, "grad_norm": 0.31591928005218506, "learning_rate": 1.3856459145163942e-05, "loss": 0.1268, "step": 38256 }, { "epoch": 0.6823565084008133, "grad_norm": 0.34696435928344727, "learning_rate": 1.3855065840849168e-05, "loss": 0.1533, "step": 38257 }, { "epoch": 0.682374344522527, "grad_norm": 0.2870495617389679, "learning_rate": 1.385367257973601e-05, "loss": 0.1325, "step": 38258 }, { "epoch": 0.6823921806442407, "grad_norm": 0.27288681268692017, "learning_rate": 1.3852279361829853e-05, "loss": 0.119, "step": 38259 }, { "epoch": 0.6824100167659544, "grad_norm": 0.18147170543670654, "learning_rate": 1.3850886187136126e-05, "loss": 0.0966, "step": 38260 }, { "epoch": 0.682427852887668, "grad_norm": 0.31475985050201416, "learning_rate": 1.3849493055660211e-05, "loss": 0.139, "step": 38261 }, { "epoch": 0.6824456890093819, "grad_norm": 0.2707485556602478, "learning_rate": 1.3848099967407504e-05, "loss": 0.0828, "step": 38262 }, { "epoch": 0.6824635251310955, "grad_norm": 0.249025359749794, "learning_rate": 1.3846706922383423e-05, "loss": 0.1096, "step": 38263 }, { "epoch": 0.6824813612528092, "grad_norm": 0.20857711136341095, "learning_rate": 1.384531392059335e-05, "loss": 0.1103, "step": 38264 }, { "epoch": 0.6824991973745229, "grad_norm": 0.28961077332496643, "learning_rate": 1.38439209620427e-05, "loss": 0.0723, "step": 38265 }, { "epoch": 0.6825170334962366, "grad_norm": 0.3273436427116394, "learning_rate": 1.3842528046736869e-05, "loss": 0.1646, "step": 38266 }, { "epoch": 0.6825348696179503, "grad_norm": 0.2047920525074005, "learning_rate": 1.384113517468125e-05, "loss": 0.114, "step": 38267 }, { "epoch": 0.682552705739664, "grad_norm": 0.17825815081596375, "learning_rate": 1.3839742345881234e-05, "loss": 0.1186, "step": 38268 }, { "epoch": 0.6825705418613777, "grad_norm": 0.23689785599708557, "learning_rate": 1.3838349560342246e-05, "loss": 0.0824, "step": 38269 }, { "epoch": 0.6825883779830914, "grad_norm": 0.32322752475738525, "learning_rate": 1.3836956818069668e-05, "loss": 0.1087, "step": 38270 }, { "epoch": 0.682606214104805, "grad_norm": 0.2611806094646454, "learning_rate": 1.38355641190689e-05, "loss": 0.0745, "step": 38271 }, { "epoch": 0.6826240502265187, "grad_norm": 0.28862065076828003, "learning_rate": 1.383417146334533e-05, "loss": 0.1463, "step": 38272 }, { "epoch": 0.6826418863482324, "grad_norm": 0.29576367139816284, "learning_rate": 1.3832778850904381e-05, "loss": 0.1372, "step": 38273 }, { "epoch": 0.6826597224699461, "grad_norm": 0.4100019633769989, "learning_rate": 1.3831386281751438e-05, "loss": 0.1351, "step": 38274 }, { "epoch": 0.6826775585916598, "grad_norm": 0.23411719501018524, "learning_rate": 1.3829993755891898e-05, "loss": 0.1239, "step": 38275 }, { "epoch": 0.6826953947133735, "grad_norm": 0.244368314743042, "learning_rate": 1.3828601273331152e-05, "loss": 0.1082, "step": 38276 }, { "epoch": 0.6827132308350872, "grad_norm": 0.26957738399505615, "learning_rate": 1.3827208834074606e-05, "loss": 0.1175, "step": 38277 }, { "epoch": 0.6827310669568009, "grad_norm": 0.23769696056842804, "learning_rate": 1.3825816438127664e-05, "loss": 0.122, "step": 38278 }, { "epoch": 0.6827489030785147, "grad_norm": 0.36357536911964417, "learning_rate": 1.3824424085495718e-05, "loss": 0.1434, "step": 38279 }, { "epoch": 0.6827667392002283, "grad_norm": 0.31610673666000366, "learning_rate": 1.3823031776184167e-05, "loss": 0.105, "step": 38280 }, { "epoch": 0.682784575321942, "grad_norm": 0.24689598381519318, "learning_rate": 1.3821639510198395e-05, "loss": 0.1164, "step": 38281 }, { "epoch": 0.6828024114436557, "grad_norm": 0.24583686888217926, "learning_rate": 1.3820247287543817e-05, "loss": 0.0868, "step": 38282 }, { "epoch": 0.6828202475653694, "grad_norm": 0.1959419846534729, "learning_rate": 1.3818855108225823e-05, "loss": 0.1041, "step": 38283 }, { "epoch": 0.6828380836870831, "grad_norm": 0.2432994395494461, "learning_rate": 1.3817462972249806e-05, "loss": 0.1326, "step": 38284 }, { "epoch": 0.6828559198087968, "grad_norm": 0.20067833364009857, "learning_rate": 1.3816070879621157e-05, "loss": 0.125, "step": 38285 }, { "epoch": 0.6828737559305105, "grad_norm": 0.2468937337398529, "learning_rate": 1.381467883034529e-05, "loss": 0.0881, "step": 38286 }, { "epoch": 0.6828915920522242, "grad_norm": 0.29089730978012085, "learning_rate": 1.3813286824427593e-05, "loss": 0.1175, "step": 38287 }, { "epoch": 0.6829094281739378, "grad_norm": 0.20711497962474823, "learning_rate": 1.3811894861873458e-05, "loss": 0.1139, "step": 38288 }, { "epoch": 0.6829272642956515, "grad_norm": 0.27331846952438354, "learning_rate": 1.3810502942688274e-05, "loss": 0.1299, "step": 38289 }, { "epoch": 0.6829451004173652, "grad_norm": 0.31152036786079407, "learning_rate": 1.3809111066877454e-05, "loss": 0.1503, "step": 38290 }, { "epoch": 0.6829629365390789, "grad_norm": 0.35022714734077454, "learning_rate": 1.3807719234446375e-05, "loss": 0.1113, "step": 38291 }, { "epoch": 0.6829807726607926, "grad_norm": 0.26127803325653076, "learning_rate": 1.3806327445400452e-05, "loss": 0.1069, "step": 38292 }, { "epoch": 0.6829986087825063, "grad_norm": 0.31824514269828796, "learning_rate": 1.380493569974507e-05, "loss": 0.1369, "step": 38293 }, { "epoch": 0.68301644490422, "grad_norm": 0.2234506756067276, "learning_rate": 1.3803543997485613e-05, "loss": 0.1084, "step": 38294 }, { "epoch": 0.6830342810259338, "grad_norm": 0.18394304811954498, "learning_rate": 1.3802152338627498e-05, "loss": 0.0654, "step": 38295 }, { "epoch": 0.6830521171476475, "grad_norm": 0.2944035232067108, "learning_rate": 1.3800760723176106e-05, "loss": 0.1311, "step": 38296 }, { "epoch": 0.6830699532693612, "grad_norm": 0.23413337767124176, "learning_rate": 1.3799369151136836e-05, "loss": 0.0966, "step": 38297 }, { "epoch": 0.6830877893910748, "grad_norm": 0.23401190340518951, "learning_rate": 1.3797977622515068e-05, "loss": 0.1466, "step": 38298 }, { "epoch": 0.6831056255127885, "grad_norm": 0.29377564787864685, "learning_rate": 1.379658613731622e-05, "loss": 0.142, "step": 38299 }, { "epoch": 0.6831234616345022, "grad_norm": 0.2092708796262741, "learning_rate": 1.3795194695545672e-05, "loss": 0.0835, "step": 38300 }, { "epoch": 0.6831412977562159, "grad_norm": 0.3363591730594635, "learning_rate": 1.3793803297208818e-05, "loss": 0.093, "step": 38301 }, { "epoch": 0.6831591338779296, "grad_norm": 0.36219626665115356, "learning_rate": 1.3792411942311056e-05, "loss": 0.0986, "step": 38302 }, { "epoch": 0.6831769699996433, "grad_norm": 0.22186565399169922, "learning_rate": 1.3791020630857764e-05, "loss": 0.1252, "step": 38303 }, { "epoch": 0.683194806121357, "grad_norm": 0.3064078390598297, "learning_rate": 1.3789629362854351e-05, "loss": 0.1475, "step": 38304 }, { "epoch": 0.6832126422430707, "grad_norm": 0.22408241033554077, "learning_rate": 1.3788238138306211e-05, "loss": 0.1164, "step": 38305 }, { "epoch": 0.6832304783647843, "grad_norm": 0.3340977132320404, "learning_rate": 1.3786846957218736e-05, "loss": 0.1263, "step": 38306 }, { "epoch": 0.683248314486498, "grad_norm": 0.2569003403186798, "learning_rate": 1.3785455819597303e-05, "loss": 0.0868, "step": 38307 }, { "epoch": 0.6832661506082117, "grad_norm": 0.32586854696273804, "learning_rate": 1.378406472544733e-05, "loss": 0.1143, "step": 38308 }, { "epoch": 0.6832839867299254, "grad_norm": 0.34712693095207214, "learning_rate": 1.3782673674774193e-05, "loss": 0.1112, "step": 38309 }, { "epoch": 0.6833018228516391, "grad_norm": 0.23863783478736877, "learning_rate": 1.378128266758329e-05, "loss": 0.1336, "step": 38310 }, { "epoch": 0.6833196589733528, "grad_norm": 0.25527501106262207, "learning_rate": 1.3779891703879997e-05, "loss": 0.1086, "step": 38311 }, { "epoch": 0.6833374950950666, "grad_norm": 0.3521053194999695, "learning_rate": 1.3778500783669728e-05, "loss": 0.1005, "step": 38312 }, { "epoch": 0.6833553312167803, "grad_norm": 0.2253001183271408, "learning_rate": 1.3777109906957869e-05, "loss": 0.1092, "step": 38313 }, { "epoch": 0.683373167338494, "grad_norm": 0.3078317642211914, "learning_rate": 1.377571907374981e-05, "loss": 0.1387, "step": 38314 }, { "epoch": 0.6833910034602076, "grad_norm": 0.20839102566242218, "learning_rate": 1.3774328284050936e-05, "loss": 0.1106, "step": 38315 }, { "epoch": 0.6834088395819213, "grad_norm": 0.23857508599758148, "learning_rate": 1.3772937537866634e-05, "loss": 0.1405, "step": 38316 }, { "epoch": 0.683426675703635, "grad_norm": 0.3254587948322296, "learning_rate": 1.3771546835202315e-05, "loss": 0.1468, "step": 38317 }, { "epoch": 0.6834445118253487, "grad_norm": 0.31444141268730164, "learning_rate": 1.3770156176063347e-05, "loss": 0.1434, "step": 38318 }, { "epoch": 0.6834623479470624, "grad_norm": 0.3288553059101105, "learning_rate": 1.3768765560455144e-05, "loss": 0.1592, "step": 38319 }, { "epoch": 0.6834801840687761, "grad_norm": 0.2745189070701599, "learning_rate": 1.3767374988383075e-05, "loss": 0.1503, "step": 38320 }, { "epoch": 0.6834980201904898, "grad_norm": 0.26712653040885925, "learning_rate": 1.3765984459852548e-05, "loss": 0.1955, "step": 38321 }, { "epoch": 0.6835158563122035, "grad_norm": 0.38259992003440857, "learning_rate": 1.376459397486895e-05, "loss": 0.1046, "step": 38322 }, { "epoch": 0.6835336924339172, "grad_norm": 0.24853625893592834, "learning_rate": 1.3763203533437663e-05, "loss": 0.1463, "step": 38323 }, { "epoch": 0.6835515285556308, "grad_norm": 0.3358849585056305, "learning_rate": 1.3761813135564069e-05, "loss": 0.136, "step": 38324 }, { "epoch": 0.6835693646773445, "grad_norm": 0.21480616927146912, "learning_rate": 1.376042278125358e-05, "loss": 0.1159, "step": 38325 }, { "epoch": 0.6835872007990582, "grad_norm": 0.23901121318340302, "learning_rate": 1.3759032470511573e-05, "loss": 0.082, "step": 38326 }, { "epoch": 0.6836050369207719, "grad_norm": 0.2030390352010727, "learning_rate": 1.3757642203343444e-05, "loss": 0.1314, "step": 38327 }, { "epoch": 0.6836228730424856, "grad_norm": 0.27238669991493225, "learning_rate": 1.375625197975457e-05, "loss": 0.1052, "step": 38328 }, { "epoch": 0.6836407091641994, "grad_norm": 0.2330402433872223, "learning_rate": 1.3754861799750341e-05, "loss": 0.1505, "step": 38329 }, { "epoch": 0.6836585452859131, "grad_norm": 0.256360799074173, "learning_rate": 1.3753471663336158e-05, "loss": 0.0734, "step": 38330 }, { "epoch": 0.6836763814076268, "grad_norm": 0.16624684631824493, "learning_rate": 1.37520815705174e-05, "loss": 0.0455, "step": 38331 }, { "epoch": 0.6836942175293405, "grad_norm": 0.20241419970989227, "learning_rate": 1.3750691521299464e-05, "loss": 0.0987, "step": 38332 }, { "epoch": 0.6837120536510541, "grad_norm": 0.2972196340560913, "learning_rate": 1.3749301515687724e-05, "loss": 0.1215, "step": 38333 }, { "epoch": 0.6837298897727678, "grad_norm": 0.20276778936386108, "learning_rate": 1.3747911553687587e-05, "loss": 0.0566, "step": 38334 }, { "epoch": 0.6837477258944815, "grad_norm": 0.20941853523254395, "learning_rate": 1.374652163530443e-05, "loss": 0.1144, "step": 38335 }, { "epoch": 0.6837655620161952, "grad_norm": 0.3057221472263336, "learning_rate": 1.3745131760543643e-05, "loss": 0.1115, "step": 38336 }, { "epoch": 0.6837833981379089, "grad_norm": 0.22856928408145905, "learning_rate": 1.3743741929410604e-05, "loss": 0.128, "step": 38337 }, { "epoch": 0.6838012342596226, "grad_norm": 0.3734128773212433, "learning_rate": 1.3742352141910714e-05, "loss": 0.1469, "step": 38338 }, { "epoch": 0.6838190703813363, "grad_norm": 0.20477478206157684, "learning_rate": 1.374096239804936e-05, "loss": 0.1003, "step": 38339 }, { "epoch": 0.68383690650305, "grad_norm": 0.2997352182865143, "learning_rate": 1.3739572697831924e-05, "loss": 0.1249, "step": 38340 }, { "epoch": 0.6838547426247636, "grad_norm": 0.20431646704673767, "learning_rate": 1.3738183041263791e-05, "loss": 0.0743, "step": 38341 }, { "epoch": 0.6838725787464773, "grad_norm": 0.23744510114192963, "learning_rate": 1.3736793428350341e-05, "loss": 0.127, "step": 38342 }, { "epoch": 0.683890414868191, "grad_norm": 0.29310956597328186, "learning_rate": 1.3735403859096982e-05, "loss": 0.1529, "step": 38343 }, { "epoch": 0.6839082509899047, "grad_norm": 0.2078559547662735, "learning_rate": 1.3734014333509088e-05, "loss": 0.0802, "step": 38344 }, { "epoch": 0.6839260871116184, "grad_norm": 0.2983554005622864, "learning_rate": 1.3732624851592035e-05, "loss": 0.1148, "step": 38345 }, { "epoch": 0.6839439232333322, "grad_norm": 0.30653002858161926, "learning_rate": 1.3731235413351229e-05, "loss": 0.1195, "step": 38346 }, { "epoch": 0.6839617593550459, "grad_norm": 0.4141486585140228, "learning_rate": 1.3729846018792037e-05, "loss": 0.1093, "step": 38347 }, { "epoch": 0.6839795954767596, "grad_norm": 0.22888389229774475, "learning_rate": 1.3728456667919864e-05, "loss": 0.0718, "step": 38348 }, { "epoch": 0.6839974315984733, "grad_norm": 0.19211259484291077, "learning_rate": 1.3727067360740086e-05, "loss": 0.0865, "step": 38349 }, { "epoch": 0.684015267720187, "grad_norm": 0.26405784487724304, "learning_rate": 1.372567809725808e-05, "loss": 0.0814, "step": 38350 }, { "epoch": 0.6840331038419006, "grad_norm": 0.3216015696525574, "learning_rate": 1.3724288877479249e-05, "loss": 0.0942, "step": 38351 }, { "epoch": 0.6840509399636143, "grad_norm": 0.26392215490341187, "learning_rate": 1.372289970140897e-05, "loss": 0.1289, "step": 38352 }, { "epoch": 0.684068776085328, "grad_norm": 0.2547518312931061, "learning_rate": 1.3721510569052622e-05, "loss": 0.1365, "step": 38353 }, { "epoch": 0.6840866122070417, "grad_norm": 0.25171855092048645, "learning_rate": 1.3720121480415599e-05, "loss": 0.1182, "step": 38354 }, { "epoch": 0.6841044483287554, "grad_norm": 0.2351503223180771, "learning_rate": 1.3718732435503271e-05, "loss": 0.0804, "step": 38355 }, { "epoch": 0.6841222844504691, "grad_norm": 0.2107308954000473, "learning_rate": 1.3717343434321039e-05, "loss": 0.0991, "step": 38356 }, { "epoch": 0.6841401205721828, "grad_norm": 0.2591079771518707, "learning_rate": 1.3715954476874285e-05, "loss": 0.134, "step": 38357 }, { "epoch": 0.6841579566938965, "grad_norm": 0.23552291095256805, "learning_rate": 1.3714565563168386e-05, "loss": 0.139, "step": 38358 }, { "epoch": 0.6841757928156101, "grad_norm": 0.2855210602283478, "learning_rate": 1.371317669320872e-05, "loss": 0.1081, "step": 38359 }, { "epoch": 0.6841936289373238, "grad_norm": 0.17529477179050446, "learning_rate": 1.3711787867000681e-05, "loss": 0.073, "step": 38360 }, { "epoch": 0.6842114650590375, "grad_norm": 0.2917133867740631, "learning_rate": 1.3710399084549657e-05, "loss": 0.1084, "step": 38361 }, { "epoch": 0.6842293011807512, "grad_norm": 0.27219751477241516, "learning_rate": 1.370901034586103e-05, "loss": 0.1488, "step": 38362 }, { "epoch": 0.684247137302465, "grad_norm": 0.2616942822933197, "learning_rate": 1.3707621650940166e-05, "loss": 0.1259, "step": 38363 }, { "epoch": 0.6842649734241787, "grad_norm": 0.32580018043518066, "learning_rate": 1.370623299979247e-05, "loss": 0.1324, "step": 38364 }, { "epoch": 0.6842828095458924, "grad_norm": 0.4417336881160736, "learning_rate": 1.3704844392423315e-05, "loss": 0.1389, "step": 38365 }, { "epoch": 0.6843006456676061, "grad_norm": 0.3365018665790558, "learning_rate": 1.3703455828838089e-05, "loss": 0.1153, "step": 38366 }, { "epoch": 0.6843184817893198, "grad_norm": 0.3291773796081543, "learning_rate": 1.3702067309042167e-05, "loss": 0.1064, "step": 38367 }, { "epoch": 0.6843363179110334, "grad_norm": 0.33683764934539795, "learning_rate": 1.3700678833040926e-05, "loss": 0.1101, "step": 38368 }, { "epoch": 0.6843541540327471, "grad_norm": 0.33964017033576965, "learning_rate": 1.3699290400839762e-05, "loss": 0.1104, "step": 38369 }, { "epoch": 0.6843719901544608, "grad_norm": 0.2485402375459671, "learning_rate": 1.3697902012444053e-05, "loss": 0.1207, "step": 38370 }, { "epoch": 0.6843898262761745, "grad_norm": 0.25063398480415344, "learning_rate": 1.3696513667859181e-05, "loss": 0.1631, "step": 38371 }, { "epoch": 0.6844076623978882, "grad_norm": 0.2872093617916107, "learning_rate": 1.3695125367090517e-05, "loss": 0.1166, "step": 38372 }, { "epoch": 0.6844254985196019, "grad_norm": 0.3052406907081604, "learning_rate": 1.3693737110143462e-05, "loss": 0.1515, "step": 38373 }, { "epoch": 0.6844433346413156, "grad_norm": 0.2045532763004303, "learning_rate": 1.3692348897023374e-05, "loss": 0.1178, "step": 38374 }, { "epoch": 0.6844611707630293, "grad_norm": 0.27117741107940674, "learning_rate": 1.3690960727735659e-05, "loss": 0.1222, "step": 38375 }, { "epoch": 0.684479006884743, "grad_norm": 0.3235396444797516, "learning_rate": 1.3689572602285678e-05, "loss": 0.1283, "step": 38376 }, { "epoch": 0.6844968430064566, "grad_norm": 0.31171759963035583, "learning_rate": 1.3688184520678827e-05, "loss": 0.145, "step": 38377 }, { "epoch": 0.6845146791281703, "grad_norm": 0.27213242650032043, "learning_rate": 1.368679648292048e-05, "loss": 0.0772, "step": 38378 }, { "epoch": 0.684532515249884, "grad_norm": 0.2412949800491333, "learning_rate": 1.3685408489016017e-05, "loss": 0.1064, "step": 38379 }, { "epoch": 0.6845503513715978, "grad_norm": 0.21386411786079407, "learning_rate": 1.3684020538970821e-05, "loss": 0.1321, "step": 38380 }, { "epoch": 0.6845681874933115, "grad_norm": 0.22135698795318604, "learning_rate": 1.368263263279026e-05, "loss": 0.1181, "step": 38381 }, { "epoch": 0.6845860236150252, "grad_norm": 0.22843441367149353, "learning_rate": 1.3681244770479731e-05, "loss": 0.114, "step": 38382 }, { "epoch": 0.6846038597367389, "grad_norm": 0.25926870107650757, "learning_rate": 1.3679856952044606e-05, "loss": 0.1416, "step": 38383 }, { "epoch": 0.6846216958584526, "grad_norm": 0.24429389834403992, "learning_rate": 1.3678469177490268e-05, "loss": 0.0902, "step": 38384 }, { "epoch": 0.6846395319801662, "grad_norm": 0.27275997400283813, "learning_rate": 1.3677081446822085e-05, "loss": 0.1087, "step": 38385 }, { "epoch": 0.6846573681018799, "grad_norm": 0.23695170879364014, "learning_rate": 1.3675693760045451e-05, "loss": 0.1212, "step": 38386 }, { "epoch": 0.6846752042235936, "grad_norm": 0.27595528960227966, "learning_rate": 1.3674306117165733e-05, "loss": 0.1028, "step": 38387 }, { "epoch": 0.6846930403453073, "grad_norm": 0.2717142701148987, "learning_rate": 1.3672918518188326e-05, "loss": 0.1162, "step": 38388 }, { "epoch": 0.684710876467021, "grad_norm": 0.3610185980796814, "learning_rate": 1.3671530963118587e-05, "loss": 0.0841, "step": 38389 }, { "epoch": 0.6847287125887347, "grad_norm": 0.2965988218784332, "learning_rate": 1.3670143451961918e-05, "loss": 0.1139, "step": 38390 }, { "epoch": 0.6847465487104484, "grad_norm": 0.30460888147354126, "learning_rate": 1.3668755984723686e-05, "loss": 0.1133, "step": 38391 }, { "epoch": 0.6847643848321621, "grad_norm": 0.24197007715702057, "learning_rate": 1.3667368561409268e-05, "loss": 0.1507, "step": 38392 }, { "epoch": 0.6847822209538758, "grad_norm": 0.24504749476909637, "learning_rate": 1.3665981182024045e-05, "loss": 0.128, "step": 38393 }, { "epoch": 0.6848000570755894, "grad_norm": 0.24964097142219543, "learning_rate": 1.3664593846573385e-05, "loss": 0.0588, "step": 38394 }, { "epoch": 0.6848178931973031, "grad_norm": 0.3125212490558624, "learning_rate": 1.366320655506268e-05, "loss": 0.1828, "step": 38395 }, { "epoch": 0.6848357293190169, "grad_norm": 0.2686334550380707, "learning_rate": 1.3661819307497306e-05, "loss": 0.1087, "step": 38396 }, { "epoch": 0.6848535654407306, "grad_norm": 0.24931973218917847, "learning_rate": 1.3660432103882636e-05, "loss": 0.0604, "step": 38397 }, { "epoch": 0.6848714015624443, "grad_norm": 0.2861267328262329, "learning_rate": 1.3659044944224036e-05, "loss": 0.1116, "step": 38398 }, { "epoch": 0.684889237684158, "grad_norm": 0.22131329774856567, "learning_rate": 1.3657657828526907e-05, "loss": 0.1297, "step": 38399 }, { "epoch": 0.6849070738058717, "grad_norm": 0.25230592489242554, "learning_rate": 1.3656270756796613e-05, "loss": 0.1175, "step": 38400 }, { "epoch": 0.6849249099275854, "grad_norm": 0.3339843451976776, "learning_rate": 1.365488372903852e-05, "loss": 0.1094, "step": 38401 }, { "epoch": 0.6849427460492991, "grad_norm": 0.33061057329177856, "learning_rate": 1.3653496745258027e-05, "loss": 0.1825, "step": 38402 }, { "epoch": 0.6849605821710127, "grad_norm": 0.21584612131118774, "learning_rate": 1.365210980546049e-05, "loss": 0.1065, "step": 38403 }, { "epoch": 0.6849784182927264, "grad_norm": 0.296165406703949, "learning_rate": 1.3650722909651303e-05, "loss": 0.1768, "step": 38404 }, { "epoch": 0.6849962544144401, "grad_norm": 0.2808534502983093, "learning_rate": 1.3649336057835838e-05, "loss": 0.1602, "step": 38405 }, { "epoch": 0.6850140905361538, "grad_norm": 0.18003782629966736, "learning_rate": 1.3647949250019465e-05, "loss": 0.0663, "step": 38406 }, { "epoch": 0.6850319266578675, "grad_norm": 0.252089262008667, "learning_rate": 1.364656248620755e-05, "loss": 0.0969, "step": 38407 }, { "epoch": 0.6850497627795812, "grad_norm": 0.27056699991226196, "learning_rate": 1.3645175766405493e-05, "loss": 0.0966, "step": 38408 }, { "epoch": 0.6850675989012949, "grad_norm": 0.27369362115859985, "learning_rate": 1.3643789090618652e-05, "loss": 0.1212, "step": 38409 }, { "epoch": 0.6850854350230086, "grad_norm": 0.354015052318573, "learning_rate": 1.364240245885241e-05, "loss": 0.1306, "step": 38410 }, { "epoch": 0.6851032711447222, "grad_norm": 0.20980007946491241, "learning_rate": 1.3641015871112129e-05, "loss": 0.1456, "step": 38411 }, { "epoch": 0.6851211072664359, "grad_norm": 0.21975672245025635, "learning_rate": 1.3639629327403203e-05, "loss": 0.1267, "step": 38412 }, { "epoch": 0.6851389433881497, "grad_norm": 0.2790791988372803, "learning_rate": 1.3638242827730998e-05, "loss": 0.1295, "step": 38413 }, { "epoch": 0.6851567795098634, "grad_norm": 0.28701257705688477, "learning_rate": 1.3636856372100875e-05, "loss": 0.0835, "step": 38414 }, { "epoch": 0.6851746156315771, "grad_norm": 0.41110920906066895, "learning_rate": 1.3635469960518237e-05, "loss": 0.1311, "step": 38415 }, { "epoch": 0.6851924517532908, "grad_norm": 0.22192950546741486, "learning_rate": 1.3634083592988428e-05, "loss": 0.0983, "step": 38416 }, { "epoch": 0.6852102878750045, "grad_norm": 0.3859463036060333, "learning_rate": 1.363269726951685e-05, "loss": 0.1042, "step": 38417 }, { "epoch": 0.6852281239967182, "grad_norm": 0.26612839102745056, "learning_rate": 1.3631310990108862e-05, "loss": 0.1137, "step": 38418 }, { "epoch": 0.6852459601184319, "grad_norm": 0.2530892491340637, "learning_rate": 1.362992475476984e-05, "loss": 0.1498, "step": 38419 }, { "epoch": 0.6852637962401456, "grad_norm": 0.2104545384645462, "learning_rate": 1.3628538563505144e-05, "loss": 0.1032, "step": 38420 }, { "epoch": 0.6852816323618592, "grad_norm": 0.18383771181106567, "learning_rate": 1.362715241632017e-05, "loss": 0.0644, "step": 38421 }, { "epoch": 0.6852994684835729, "grad_norm": 0.27254554629325867, "learning_rate": 1.3625766313220285e-05, "loss": 0.1366, "step": 38422 }, { "epoch": 0.6853173046052866, "grad_norm": 0.23755870759487152, "learning_rate": 1.3624380254210855e-05, "loss": 0.1389, "step": 38423 }, { "epoch": 0.6853351407270003, "grad_norm": 0.22425946593284607, "learning_rate": 1.3622994239297248e-05, "loss": 0.0954, "step": 38424 }, { "epoch": 0.685352976848714, "grad_norm": 0.25088125467300415, "learning_rate": 1.3621608268484857e-05, "loss": 0.0972, "step": 38425 }, { "epoch": 0.6853708129704277, "grad_norm": 0.2909787893295288, "learning_rate": 1.3620222341779038e-05, "loss": 0.1341, "step": 38426 }, { "epoch": 0.6853886490921414, "grad_norm": 0.32008832693099976, "learning_rate": 1.361883645918517e-05, "loss": 0.1082, "step": 38427 }, { "epoch": 0.685406485213855, "grad_norm": 0.22382010519504547, "learning_rate": 1.3617450620708613e-05, "loss": 0.1136, "step": 38428 }, { "epoch": 0.6854243213355687, "grad_norm": 0.23693999648094177, "learning_rate": 1.3616064826354757e-05, "loss": 0.1355, "step": 38429 }, { "epoch": 0.6854421574572825, "grad_norm": 0.26569345593452454, "learning_rate": 1.3614679076128958e-05, "loss": 0.0922, "step": 38430 }, { "epoch": 0.6854599935789962, "grad_norm": 0.2200886756181717, "learning_rate": 1.3613293370036604e-05, "loss": 0.105, "step": 38431 }, { "epoch": 0.6854778297007099, "grad_norm": 0.43981683254241943, "learning_rate": 1.3611907708083057e-05, "loss": 0.0853, "step": 38432 }, { "epoch": 0.6854956658224236, "grad_norm": 0.2803592383861542, "learning_rate": 1.3610522090273681e-05, "loss": 0.1255, "step": 38433 }, { "epoch": 0.6855135019441373, "grad_norm": 0.3195376396179199, "learning_rate": 1.3609136516613863e-05, "loss": 0.1108, "step": 38434 }, { "epoch": 0.685531338065851, "grad_norm": 0.24816769361495972, "learning_rate": 1.3607750987108966e-05, "loss": 0.1501, "step": 38435 }, { "epoch": 0.6855491741875647, "grad_norm": 0.34660109877586365, "learning_rate": 1.3606365501764363e-05, "loss": 0.1284, "step": 38436 }, { "epoch": 0.6855670103092784, "grad_norm": 0.29819396138191223, "learning_rate": 1.3604980060585412e-05, "loss": 0.1359, "step": 38437 }, { "epoch": 0.685584846430992, "grad_norm": 0.2540644109249115, "learning_rate": 1.3603594663577501e-05, "loss": 0.0885, "step": 38438 }, { "epoch": 0.6856026825527057, "grad_norm": 0.2896649241447449, "learning_rate": 1.3602209310745998e-05, "loss": 0.1182, "step": 38439 }, { "epoch": 0.6856205186744194, "grad_norm": 0.2289072722196579, "learning_rate": 1.3600824002096265e-05, "loss": 0.1208, "step": 38440 }, { "epoch": 0.6856383547961331, "grad_norm": 0.2511662244796753, "learning_rate": 1.3599438737633668e-05, "loss": 0.1296, "step": 38441 }, { "epoch": 0.6856561909178468, "grad_norm": 0.20983365178108215, "learning_rate": 1.359805351736359e-05, "loss": 0.1108, "step": 38442 }, { "epoch": 0.6856740270395605, "grad_norm": 0.22050683200359344, "learning_rate": 1.3596668341291391e-05, "loss": 0.1295, "step": 38443 }, { "epoch": 0.6856918631612742, "grad_norm": 0.22735293209552765, "learning_rate": 1.3595283209422449e-05, "loss": 0.1064, "step": 38444 }, { "epoch": 0.6857096992829879, "grad_norm": 0.23720484972000122, "learning_rate": 1.3593898121762128e-05, "loss": 0.0864, "step": 38445 }, { "epoch": 0.6857275354047015, "grad_norm": 0.21472570300102234, "learning_rate": 1.3592513078315792e-05, "loss": 0.1094, "step": 38446 }, { "epoch": 0.6857453715264153, "grad_norm": 0.20450402796268463, "learning_rate": 1.3591128079088823e-05, "loss": 0.1393, "step": 38447 }, { "epoch": 0.685763207648129, "grad_norm": 0.22828178107738495, "learning_rate": 1.3589743124086578e-05, "loss": 0.0868, "step": 38448 }, { "epoch": 0.6857810437698427, "grad_norm": 0.20493710041046143, "learning_rate": 1.3588358213314433e-05, "loss": 0.0888, "step": 38449 }, { "epoch": 0.6857988798915564, "grad_norm": 0.27139800786972046, "learning_rate": 1.3586973346777745e-05, "loss": 0.1201, "step": 38450 }, { "epoch": 0.6858167160132701, "grad_norm": 0.21251267194747925, "learning_rate": 1.35855885244819e-05, "loss": 0.0965, "step": 38451 }, { "epoch": 0.6858345521349838, "grad_norm": 0.38066670298576355, "learning_rate": 1.3584203746432253e-05, "loss": 0.1166, "step": 38452 }, { "epoch": 0.6858523882566975, "grad_norm": 0.19657441973686218, "learning_rate": 1.3582819012634176e-05, "loss": 0.0917, "step": 38453 }, { "epoch": 0.6858702243784112, "grad_norm": 0.25900018215179443, "learning_rate": 1.3581434323093028e-05, "loss": 0.1246, "step": 38454 }, { "epoch": 0.6858880605001249, "grad_norm": 0.23921701312065125, "learning_rate": 1.3580049677814194e-05, "loss": 0.1083, "step": 38455 }, { "epoch": 0.6859058966218385, "grad_norm": 0.2900693118572235, "learning_rate": 1.3578665076803029e-05, "loss": 0.1344, "step": 38456 }, { "epoch": 0.6859237327435522, "grad_norm": 0.3682699203491211, "learning_rate": 1.3577280520064894e-05, "loss": 0.1887, "step": 38457 }, { "epoch": 0.6859415688652659, "grad_norm": 0.20448656380176544, "learning_rate": 1.3575896007605177e-05, "loss": 0.0877, "step": 38458 }, { "epoch": 0.6859594049869796, "grad_norm": 0.31979233026504517, "learning_rate": 1.3574511539429221e-05, "loss": 0.1047, "step": 38459 }, { "epoch": 0.6859772411086933, "grad_norm": 0.373167484998703, "learning_rate": 1.3573127115542417e-05, "loss": 0.1444, "step": 38460 }, { "epoch": 0.685995077230407, "grad_norm": 0.22824648022651672, "learning_rate": 1.3571742735950117e-05, "loss": 0.0922, "step": 38461 }, { "epoch": 0.6860129133521207, "grad_norm": 0.24930021166801453, "learning_rate": 1.357035840065769e-05, "loss": 0.1101, "step": 38462 }, { "epoch": 0.6860307494738344, "grad_norm": 0.23855338990688324, "learning_rate": 1.3568974109670491e-05, "loss": 0.1545, "step": 38463 }, { "epoch": 0.6860485855955482, "grad_norm": 0.2848866879940033, "learning_rate": 1.3567589862993906e-05, "loss": 0.1249, "step": 38464 }, { "epoch": 0.6860664217172618, "grad_norm": 0.3375401198863983, "learning_rate": 1.356620566063329e-05, "loss": 0.1366, "step": 38465 }, { "epoch": 0.6860842578389755, "grad_norm": 0.24678131937980652, "learning_rate": 1.3564821502594013e-05, "loss": 0.0966, "step": 38466 }, { "epoch": 0.6861020939606892, "grad_norm": 0.303362101316452, "learning_rate": 1.3563437388881428e-05, "loss": 0.1356, "step": 38467 }, { "epoch": 0.6861199300824029, "grad_norm": 0.28642475605010986, "learning_rate": 1.3562053319500917e-05, "loss": 0.1228, "step": 38468 }, { "epoch": 0.6861377662041166, "grad_norm": 0.2539806067943573, "learning_rate": 1.356066929445784e-05, "loss": 0.0862, "step": 38469 }, { "epoch": 0.6861556023258303, "grad_norm": 0.28071328997612, "learning_rate": 1.3559285313757548e-05, "loss": 0.1161, "step": 38470 }, { "epoch": 0.686173438447544, "grad_norm": 0.28178900480270386, "learning_rate": 1.355790137740543e-05, "loss": 0.1385, "step": 38471 }, { "epoch": 0.6861912745692577, "grad_norm": 0.28673335909843445, "learning_rate": 1.3556517485406824e-05, "loss": 0.1373, "step": 38472 }, { "epoch": 0.6862091106909713, "grad_norm": 0.25109684467315674, "learning_rate": 1.355513363776712e-05, "loss": 0.1262, "step": 38473 }, { "epoch": 0.686226946812685, "grad_norm": 0.20516128838062286, "learning_rate": 1.3553749834491675e-05, "loss": 0.0937, "step": 38474 }, { "epoch": 0.6862447829343987, "grad_norm": 0.3105356693267822, "learning_rate": 1.3552366075585845e-05, "loss": 0.0899, "step": 38475 }, { "epoch": 0.6862626190561124, "grad_norm": 0.4162426292896271, "learning_rate": 1.355098236105499e-05, "loss": 0.1084, "step": 38476 }, { "epoch": 0.6862804551778261, "grad_norm": 0.25846436619758606, "learning_rate": 1.3549598690904491e-05, "loss": 0.1701, "step": 38477 }, { "epoch": 0.6862982912995398, "grad_norm": 0.23674516379833221, "learning_rate": 1.3548215065139703e-05, "loss": 0.1402, "step": 38478 }, { "epoch": 0.6863161274212535, "grad_norm": 0.2504197359085083, "learning_rate": 1.3546831483765987e-05, "loss": 0.2215, "step": 38479 }, { "epoch": 0.6863339635429672, "grad_norm": 0.3130221962928772, "learning_rate": 1.3545447946788698e-05, "loss": 0.194, "step": 38480 }, { "epoch": 0.686351799664681, "grad_norm": 0.36830708384513855, "learning_rate": 1.354406445421322e-05, "loss": 0.1372, "step": 38481 }, { "epoch": 0.6863696357863946, "grad_norm": 0.2909860908985138, "learning_rate": 1.3542681006044904e-05, "loss": 0.1051, "step": 38482 }, { "epoch": 0.6863874719081083, "grad_norm": 0.28509071469306946, "learning_rate": 1.3541297602289115e-05, "loss": 0.1637, "step": 38483 }, { "epoch": 0.686405308029822, "grad_norm": 0.3719830811023712, "learning_rate": 1.3539914242951207e-05, "loss": 0.0584, "step": 38484 }, { "epoch": 0.6864231441515357, "grad_norm": 0.23574262857437134, "learning_rate": 1.3538530928036544e-05, "loss": 0.1236, "step": 38485 }, { "epoch": 0.6864409802732494, "grad_norm": 0.30539509654045105, "learning_rate": 1.3537147657550508e-05, "loss": 0.1274, "step": 38486 }, { "epoch": 0.6864588163949631, "grad_norm": 0.2478194534778595, "learning_rate": 1.3535764431498443e-05, "loss": 0.1198, "step": 38487 }, { "epoch": 0.6864766525166768, "grad_norm": 0.4017721712589264, "learning_rate": 1.3534381249885719e-05, "loss": 0.1046, "step": 38488 }, { "epoch": 0.6864944886383905, "grad_norm": 0.4043966233730316, "learning_rate": 1.353299811271768e-05, "loss": 0.1052, "step": 38489 }, { "epoch": 0.6865123247601042, "grad_norm": 0.19515439867973328, "learning_rate": 1.353161501999971e-05, "loss": 0.1162, "step": 38490 }, { "epoch": 0.6865301608818178, "grad_norm": 0.27910029888153076, "learning_rate": 1.3530231971737164e-05, "loss": 0.1105, "step": 38491 }, { "epoch": 0.6865479970035315, "grad_norm": 0.20086170732975006, "learning_rate": 1.35288489679354e-05, "loss": 0.0981, "step": 38492 }, { "epoch": 0.6865658331252452, "grad_norm": 0.28241032361984253, "learning_rate": 1.352746600859977e-05, "loss": 0.1237, "step": 38493 }, { "epoch": 0.6865836692469589, "grad_norm": 0.23580418527126312, "learning_rate": 1.3526083093735654e-05, "loss": 0.1228, "step": 38494 }, { "epoch": 0.6866015053686726, "grad_norm": 0.220314159989357, "learning_rate": 1.3524700223348402e-05, "loss": 0.1004, "step": 38495 }, { "epoch": 0.6866193414903863, "grad_norm": 0.22770333290100098, "learning_rate": 1.3523317397443374e-05, "loss": 0.1139, "step": 38496 }, { "epoch": 0.6866371776121001, "grad_norm": 0.2761106491088867, "learning_rate": 1.3521934616025922e-05, "loss": 0.111, "step": 38497 }, { "epoch": 0.6866550137338138, "grad_norm": 0.3761462867259979, "learning_rate": 1.3520551879101428e-05, "loss": 0.1025, "step": 38498 }, { "epoch": 0.6866728498555275, "grad_norm": 0.33922600746154785, "learning_rate": 1.351916918667523e-05, "loss": 0.138, "step": 38499 }, { "epoch": 0.6866906859772411, "grad_norm": 0.2886148691177368, "learning_rate": 1.3517786538752705e-05, "loss": 0.1147, "step": 38500 }, { "epoch": 0.6867085220989548, "grad_norm": 0.3000772297382355, "learning_rate": 1.3516403935339206e-05, "loss": 0.1564, "step": 38501 }, { "epoch": 0.6867263582206685, "grad_norm": 0.3180055022239685, "learning_rate": 1.3515021376440084e-05, "loss": 0.1327, "step": 38502 }, { "epoch": 0.6867441943423822, "grad_norm": 0.33008190989494324, "learning_rate": 1.3513638862060712e-05, "loss": 0.1085, "step": 38503 }, { "epoch": 0.6867620304640959, "grad_norm": 0.2843044698238373, "learning_rate": 1.3512256392206445e-05, "loss": 0.0941, "step": 38504 }, { "epoch": 0.6867798665858096, "grad_norm": 0.26780855655670166, "learning_rate": 1.3510873966882642e-05, "loss": 0.1185, "step": 38505 }, { "epoch": 0.6867977027075233, "grad_norm": 0.3334581255912781, "learning_rate": 1.3509491586094646e-05, "loss": 0.1329, "step": 38506 }, { "epoch": 0.686815538829237, "grad_norm": 0.2519510090351105, "learning_rate": 1.3508109249847845e-05, "loss": 0.0916, "step": 38507 }, { "epoch": 0.6868333749509506, "grad_norm": 0.28137853741645813, "learning_rate": 1.3506726958147575e-05, "loss": 0.1208, "step": 38508 }, { "epoch": 0.6868512110726643, "grad_norm": 0.2665141522884369, "learning_rate": 1.3505344710999205e-05, "loss": 0.0902, "step": 38509 }, { "epoch": 0.686869047194378, "grad_norm": 0.315267950296402, "learning_rate": 1.3503962508408076e-05, "loss": 0.1557, "step": 38510 }, { "epoch": 0.6868868833160917, "grad_norm": 0.2413836121559143, "learning_rate": 1.3502580350379573e-05, "loss": 0.0974, "step": 38511 }, { "epoch": 0.6869047194378054, "grad_norm": 0.31613633036613464, "learning_rate": 1.3501198236919039e-05, "loss": 0.1211, "step": 38512 }, { "epoch": 0.6869225555595191, "grad_norm": 0.18966291844844818, "learning_rate": 1.349981616803182e-05, "loss": 0.1067, "step": 38513 }, { "epoch": 0.6869403916812329, "grad_norm": 0.19223885238170624, "learning_rate": 1.3498434143723293e-05, "loss": 0.0995, "step": 38514 }, { "epoch": 0.6869582278029466, "grad_norm": 0.2586239278316498, "learning_rate": 1.3497052163998803e-05, "loss": 0.132, "step": 38515 }, { "epoch": 0.6869760639246603, "grad_norm": 0.27736833691596985, "learning_rate": 1.3495670228863721e-05, "loss": 0.1148, "step": 38516 }, { "epoch": 0.686993900046374, "grad_norm": 0.33557626605033875, "learning_rate": 1.3494288338323392e-05, "loss": 0.1689, "step": 38517 }, { "epoch": 0.6870117361680876, "grad_norm": 0.286602258682251, "learning_rate": 1.3492906492383179e-05, "loss": 0.168, "step": 38518 }, { "epoch": 0.6870295722898013, "grad_norm": 0.2325119525194168, "learning_rate": 1.3491524691048422e-05, "loss": 0.1071, "step": 38519 }, { "epoch": 0.687047408411515, "grad_norm": 0.3034343719482422, "learning_rate": 1.3490142934324502e-05, "loss": 0.1629, "step": 38520 }, { "epoch": 0.6870652445332287, "grad_norm": 0.23229782283306122, "learning_rate": 1.3488761222216761e-05, "loss": 0.0948, "step": 38521 }, { "epoch": 0.6870830806549424, "grad_norm": 0.26982471346855164, "learning_rate": 1.3487379554730556e-05, "loss": 0.1357, "step": 38522 }, { "epoch": 0.6871009167766561, "grad_norm": 0.22699788212776184, "learning_rate": 1.3485997931871236e-05, "loss": 0.1045, "step": 38523 }, { "epoch": 0.6871187528983698, "grad_norm": 0.28402313590049744, "learning_rate": 1.3484616353644177e-05, "loss": 0.1647, "step": 38524 }, { "epoch": 0.6871365890200835, "grad_norm": 0.26465165615081787, "learning_rate": 1.3483234820054722e-05, "loss": 0.0935, "step": 38525 }, { "epoch": 0.6871544251417971, "grad_norm": 0.31918010115623474, "learning_rate": 1.3481853331108213e-05, "loss": 0.1325, "step": 38526 }, { "epoch": 0.6871722612635108, "grad_norm": 0.26944291591644287, "learning_rate": 1.3480471886810031e-05, "loss": 0.1303, "step": 38527 }, { "epoch": 0.6871900973852245, "grad_norm": 0.21865399181842804, "learning_rate": 1.3479090487165511e-05, "loss": 0.0884, "step": 38528 }, { "epoch": 0.6872079335069382, "grad_norm": 0.41744452714920044, "learning_rate": 1.3477709132180023e-05, "loss": 0.0937, "step": 38529 }, { "epoch": 0.6872257696286519, "grad_norm": 0.30443131923675537, "learning_rate": 1.3476327821858913e-05, "loss": 0.1469, "step": 38530 }, { "epoch": 0.6872436057503657, "grad_norm": 0.2349502444267273, "learning_rate": 1.347494655620754e-05, "loss": 0.1587, "step": 38531 }, { "epoch": 0.6872614418720794, "grad_norm": 0.2678750157356262, "learning_rate": 1.3473565335231241e-05, "loss": 0.1344, "step": 38532 }, { "epoch": 0.6872792779937931, "grad_norm": 0.2812378406524658, "learning_rate": 1.3472184158935396e-05, "loss": 0.0911, "step": 38533 }, { "epoch": 0.6872971141155068, "grad_norm": 0.3774079978466034, "learning_rate": 1.3470803027325345e-05, "loss": 0.1354, "step": 38534 }, { "epoch": 0.6873149502372204, "grad_norm": 0.23949551582336426, "learning_rate": 1.3469421940406445e-05, "loss": 0.124, "step": 38535 }, { "epoch": 0.6873327863589341, "grad_norm": 0.26348742842674255, "learning_rate": 1.3468040898184042e-05, "loss": 0.0877, "step": 38536 }, { "epoch": 0.6873506224806478, "grad_norm": 0.3536117374897003, "learning_rate": 1.34666599006635e-05, "loss": 0.149, "step": 38537 }, { "epoch": 0.6873684586023615, "grad_norm": 0.20122745633125305, "learning_rate": 1.3465278947850169e-05, "loss": 0.1203, "step": 38538 }, { "epoch": 0.6873862947240752, "grad_norm": 0.2623310387134552, "learning_rate": 1.34638980397494e-05, "loss": 0.1135, "step": 38539 }, { "epoch": 0.6874041308457889, "grad_norm": 0.25378933548927307, "learning_rate": 1.346251717636654e-05, "loss": 0.1029, "step": 38540 }, { "epoch": 0.6874219669675026, "grad_norm": 0.265180379152298, "learning_rate": 1.3461136357706944e-05, "loss": 0.1229, "step": 38541 }, { "epoch": 0.6874398030892163, "grad_norm": 0.2195325642824173, "learning_rate": 1.345975558377598e-05, "loss": 0.1174, "step": 38542 }, { "epoch": 0.68745763921093, "grad_norm": 0.18819034099578857, "learning_rate": 1.3458374854578993e-05, "loss": 0.0931, "step": 38543 }, { "epoch": 0.6874754753326436, "grad_norm": 0.3780516982078552, "learning_rate": 1.3456994170121326e-05, "loss": 0.1421, "step": 38544 }, { "epoch": 0.6874933114543573, "grad_norm": 0.2784189283847809, "learning_rate": 1.345561353040833e-05, "loss": 0.1254, "step": 38545 }, { "epoch": 0.687511147576071, "grad_norm": 0.2901017963886261, "learning_rate": 1.345423293544537e-05, "loss": 0.088, "step": 38546 }, { "epoch": 0.6875289836977847, "grad_norm": 0.23261678218841553, "learning_rate": 1.3452852385237796e-05, "loss": 0.0897, "step": 38547 }, { "epoch": 0.6875468198194985, "grad_norm": 0.3297034204006195, "learning_rate": 1.345147187979095e-05, "loss": 0.1303, "step": 38548 }, { "epoch": 0.6875646559412122, "grad_norm": 0.37196218967437744, "learning_rate": 1.3450091419110178e-05, "loss": 0.1222, "step": 38549 }, { "epoch": 0.6875824920629259, "grad_norm": 0.27657291293144226, "learning_rate": 1.3448711003200853e-05, "loss": 0.1868, "step": 38550 }, { "epoch": 0.6876003281846396, "grad_norm": 0.2765156924724579, "learning_rate": 1.344733063206831e-05, "loss": 0.1653, "step": 38551 }, { "epoch": 0.6876181643063533, "grad_norm": 0.23933175206184387, "learning_rate": 1.3445950305717909e-05, "loss": 0.1293, "step": 38552 }, { "epoch": 0.6876360004280669, "grad_norm": 0.21674111485481262, "learning_rate": 1.3444570024154984e-05, "loss": 0.0883, "step": 38553 }, { "epoch": 0.6876538365497806, "grad_norm": 0.20923608541488647, "learning_rate": 1.3443189787384904e-05, "loss": 0.1312, "step": 38554 }, { "epoch": 0.6876716726714943, "grad_norm": 0.371404767036438, "learning_rate": 1.3441809595413005e-05, "loss": 0.2429, "step": 38555 }, { "epoch": 0.687689508793208, "grad_norm": 0.2733886241912842, "learning_rate": 1.3440429448244652e-05, "loss": 0.1407, "step": 38556 }, { "epoch": 0.6877073449149217, "grad_norm": 0.35108649730682373, "learning_rate": 1.3439049345885188e-05, "loss": 0.0941, "step": 38557 }, { "epoch": 0.6877251810366354, "grad_norm": 0.28271621465682983, "learning_rate": 1.343766928833995e-05, "loss": 0.0989, "step": 38558 }, { "epoch": 0.6877430171583491, "grad_norm": 0.3597377836704254, "learning_rate": 1.3436289275614311e-05, "loss": 0.1225, "step": 38559 }, { "epoch": 0.6877608532800628, "grad_norm": 0.22796548902988434, "learning_rate": 1.3434909307713609e-05, "loss": 0.1444, "step": 38560 }, { "epoch": 0.6877786894017764, "grad_norm": 0.21248580515384674, "learning_rate": 1.3433529384643193e-05, "loss": 0.1422, "step": 38561 }, { "epoch": 0.6877965255234901, "grad_norm": 0.21324306726455688, "learning_rate": 1.34321495064084e-05, "loss": 0.1153, "step": 38562 }, { "epoch": 0.6878143616452038, "grad_norm": 0.259677529335022, "learning_rate": 1.3430769673014604e-05, "loss": 0.1398, "step": 38563 }, { "epoch": 0.6878321977669175, "grad_norm": 0.18702569603919983, "learning_rate": 1.3429389884467137e-05, "loss": 0.117, "step": 38564 }, { "epoch": 0.6878500338886313, "grad_norm": 0.30660855770111084, "learning_rate": 1.3428010140771351e-05, "loss": 0.105, "step": 38565 }, { "epoch": 0.687867870010345, "grad_norm": 0.19275590777397156, "learning_rate": 1.34266304419326e-05, "loss": 0.1139, "step": 38566 }, { "epoch": 0.6878857061320587, "grad_norm": 0.2929665744304657, "learning_rate": 1.3425250787956212e-05, "loss": 0.11, "step": 38567 }, { "epoch": 0.6879035422537724, "grad_norm": 0.3055505156517029, "learning_rate": 1.3423871178847552e-05, "loss": 0.1565, "step": 38568 }, { "epoch": 0.6879213783754861, "grad_norm": 0.2522900402545929, "learning_rate": 1.3422491614611976e-05, "loss": 0.2286, "step": 38569 }, { "epoch": 0.6879392144971997, "grad_norm": 0.37942132353782654, "learning_rate": 1.3421112095254817e-05, "loss": 0.0962, "step": 38570 }, { "epoch": 0.6879570506189134, "grad_norm": 0.26580050587654114, "learning_rate": 1.3419732620781422e-05, "loss": 0.1521, "step": 38571 }, { "epoch": 0.6879748867406271, "grad_norm": 0.3553601801395416, "learning_rate": 1.341835319119715e-05, "loss": 0.1485, "step": 38572 }, { "epoch": 0.6879927228623408, "grad_norm": 0.3606205880641937, "learning_rate": 1.3416973806507344e-05, "loss": 0.1924, "step": 38573 }, { "epoch": 0.6880105589840545, "grad_norm": 0.2929568588733673, "learning_rate": 1.3415594466717345e-05, "loss": 0.1098, "step": 38574 }, { "epoch": 0.6880283951057682, "grad_norm": 0.2457340657711029, "learning_rate": 1.3414215171832496e-05, "loss": 0.1448, "step": 38575 }, { "epoch": 0.6880462312274819, "grad_norm": 0.29361286759376526, "learning_rate": 1.3412835921858158e-05, "loss": 0.1171, "step": 38576 }, { "epoch": 0.6880640673491956, "grad_norm": 0.24348865449428558, "learning_rate": 1.341145671679967e-05, "loss": 0.0837, "step": 38577 }, { "epoch": 0.6880819034709093, "grad_norm": 0.25722333788871765, "learning_rate": 1.3410077556662376e-05, "loss": 0.0769, "step": 38578 }, { "epoch": 0.6880997395926229, "grad_norm": 0.3594392240047455, "learning_rate": 1.3408698441451628e-05, "loss": 0.0985, "step": 38579 }, { "epoch": 0.6881175757143366, "grad_norm": 0.36963778734207153, "learning_rate": 1.3407319371172761e-05, "loss": 0.102, "step": 38580 }, { "epoch": 0.6881354118360503, "grad_norm": 0.2212645709514618, "learning_rate": 1.3405940345831134e-05, "loss": 0.0988, "step": 38581 }, { "epoch": 0.6881532479577641, "grad_norm": 0.2580206096172333, "learning_rate": 1.3404561365432077e-05, "loss": 0.0906, "step": 38582 }, { "epoch": 0.6881710840794778, "grad_norm": 0.2727537453174591, "learning_rate": 1.3403182429980954e-05, "loss": 0.1276, "step": 38583 }, { "epoch": 0.6881889202011915, "grad_norm": 0.21269245445728302, "learning_rate": 1.3401803539483093e-05, "loss": 0.1241, "step": 38584 }, { "epoch": 0.6882067563229052, "grad_norm": 0.23468634486198425, "learning_rate": 1.3400424693943858e-05, "loss": 0.1134, "step": 38585 }, { "epoch": 0.6882245924446189, "grad_norm": 0.24647195637226105, "learning_rate": 1.3399045893368583e-05, "loss": 0.0794, "step": 38586 }, { "epoch": 0.6882424285663326, "grad_norm": 0.3625646233558655, "learning_rate": 1.3397667137762613e-05, "loss": 0.1101, "step": 38587 }, { "epoch": 0.6882602646880462, "grad_norm": 0.23308062553405762, "learning_rate": 1.3396288427131281e-05, "loss": 0.1035, "step": 38588 }, { "epoch": 0.6882781008097599, "grad_norm": 0.25125983357429504, "learning_rate": 1.3394909761479954e-05, "loss": 0.0882, "step": 38589 }, { "epoch": 0.6882959369314736, "grad_norm": 0.342476487159729, "learning_rate": 1.3393531140813964e-05, "loss": 0.1397, "step": 38590 }, { "epoch": 0.6883137730531873, "grad_norm": 0.2825044095516205, "learning_rate": 1.3392152565138657e-05, "loss": 0.1188, "step": 38591 }, { "epoch": 0.688331609174901, "grad_norm": 0.22510160505771637, "learning_rate": 1.3390774034459377e-05, "loss": 0.0778, "step": 38592 }, { "epoch": 0.6883494452966147, "grad_norm": 0.257340669631958, "learning_rate": 1.3389395548781456e-05, "loss": 0.0837, "step": 38593 }, { "epoch": 0.6883672814183284, "grad_norm": 0.27798953652381897, "learning_rate": 1.3388017108110257e-05, "loss": 0.0816, "step": 38594 }, { "epoch": 0.6883851175400421, "grad_norm": 0.1957148015499115, "learning_rate": 1.3386638712451105e-05, "loss": 0.0766, "step": 38595 }, { "epoch": 0.6884029536617557, "grad_norm": 0.28676503896713257, "learning_rate": 1.3385260361809362e-05, "loss": 0.1078, "step": 38596 }, { "epoch": 0.6884207897834694, "grad_norm": 0.3771875202655792, "learning_rate": 1.338388205619035e-05, "loss": 0.0817, "step": 38597 }, { "epoch": 0.6884386259051832, "grad_norm": 0.36797305941581726, "learning_rate": 1.3382503795599439e-05, "loss": 0.1272, "step": 38598 }, { "epoch": 0.6884564620268969, "grad_norm": 0.26945650577545166, "learning_rate": 1.3381125580041948e-05, "loss": 0.0722, "step": 38599 }, { "epoch": 0.6884742981486106, "grad_norm": 0.27191072702407837, "learning_rate": 1.3379747409523232e-05, "loss": 0.0882, "step": 38600 }, { "epoch": 0.6884921342703243, "grad_norm": 0.21759861707687378, "learning_rate": 1.337836928404862e-05, "loss": 0.1179, "step": 38601 }, { "epoch": 0.688509970392038, "grad_norm": 0.2839098870754242, "learning_rate": 1.3376991203623467e-05, "loss": 0.1058, "step": 38602 }, { "epoch": 0.6885278065137517, "grad_norm": 0.42385223507881165, "learning_rate": 1.3375613168253115e-05, "loss": 0.1014, "step": 38603 }, { "epoch": 0.6885456426354654, "grad_norm": 0.2928169071674347, "learning_rate": 1.3374235177942902e-05, "loss": 0.1439, "step": 38604 }, { "epoch": 0.688563478757179, "grad_norm": 0.22677674889564514, "learning_rate": 1.3372857232698166e-05, "loss": 0.1047, "step": 38605 }, { "epoch": 0.6885813148788927, "grad_norm": 0.2896255850791931, "learning_rate": 1.3371479332524239e-05, "loss": 0.0797, "step": 38606 }, { "epoch": 0.6885991510006064, "grad_norm": 0.24892179667949677, "learning_rate": 1.3370101477426489e-05, "loss": 0.0903, "step": 38607 }, { "epoch": 0.6886169871223201, "grad_norm": 0.2192019820213318, "learning_rate": 1.3368723667410243e-05, "loss": 0.0762, "step": 38608 }, { "epoch": 0.6886348232440338, "grad_norm": 0.27080243825912476, "learning_rate": 1.3367345902480826e-05, "loss": 0.1309, "step": 38609 }, { "epoch": 0.6886526593657475, "grad_norm": 0.30355319380760193, "learning_rate": 1.336596818264361e-05, "loss": 0.0979, "step": 38610 }, { "epoch": 0.6886704954874612, "grad_norm": 0.3359048664569855, "learning_rate": 1.3364590507903906e-05, "loss": 0.1014, "step": 38611 }, { "epoch": 0.6886883316091749, "grad_norm": 0.2574026584625244, "learning_rate": 1.3363212878267078e-05, "loss": 0.1451, "step": 38612 }, { "epoch": 0.6887061677308886, "grad_norm": 0.462470680475235, "learning_rate": 1.3361835293738458e-05, "loss": 0.1282, "step": 38613 }, { "epoch": 0.6887240038526022, "grad_norm": 0.2831890881061554, "learning_rate": 1.3360457754323374e-05, "loss": 0.0837, "step": 38614 }, { "epoch": 0.688741839974316, "grad_norm": 0.2723994851112366, "learning_rate": 1.3359080260027184e-05, "loss": 0.1498, "step": 38615 }, { "epoch": 0.6887596760960297, "grad_norm": 0.2538544237613678, "learning_rate": 1.3357702810855221e-05, "loss": 0.1147, "step": 38616 }, { "epoch": 0.6887775122177434, "grad_norm": 0.2999408543109894, "learning_rate": 1.3356325406812826e-05, "loss": 0.0972, "step": 38617 }, { "epoch": 0.6887953483394571, "grad_norm": 0.24906311929225922, "learning_rate": 1.335494804790533e-05, "loss": 0.1305, "step": 38618 }, { "epoch": 0.6888131844611708, "grad_norm": 0.24978883564472198, "learning_rate": 1.3353570734138072e-05, "loss": 0.0811, "step": 38619 }, { "epoch": 0.6888310205828845, "grad_norm": 0.3234630525112152, "learning_rate": 1.3352193465516402e-05, "loss": 0.1275, "step": 38620 }, { "epoch": 0.6888488567045982, "grad_norm": 0.3482932150363922, "learning_rate": 1.3350816242045655e-05, "loss": 0.1179, "step": 38621 }, { "epoch": 0.6888666928263119, "grad_norm": 0.2004636973142624, "learning_rate": 1.3349439063731157e-05, "loss": 0.078, "step": 38622 }, { "epoch": 0.6888845289480255, "grad_norm": 0.33838585019111633, "learning_rate": 1.334806193057827e-05, "loss": 0.1443, "step": 38623 }, { "epoch": 0.6889023650697392, "grad_norm": 0.2522510886192322, "learning_rate": 1.3346684842592306e-05, "loss": 0.1129, "step": 38624 }, { "epoch": 0.6889202011914529, "grad_norm": 0.27040895819664, "learning_rate": 1.3345307799778627e-05, "loss": 0.087, "step": 38625 }, { "epoch": 0.6889380373131666, "grad_norm": 0.22355616092681885, "learning_rate": 1.3343930802142562e-05, "loss": 0.088, "step": 38626 }, { "epoch": 0.6889558734348803, "grad_norm": 0.25743889808654785, "learning_rate": 1.3342553849689437e-05, "loss": 0.0766, "step": 38627 }, { "epoch": 0.688973709556594, "grad_norm": 0.343999445438385, "learning_rate": 1.334117694242461e-05, "loss": 0.1553, "step": 38628 }, { "epoch": 0.6889915456783077, "grad_norm": 0.3359681963920593, "learning_rate": 1.3339800080353407e-05, "loss": 0.1461, "step": 38629 }, { "epoch": 0.6890093818000214, "grad_norm": 0.24476896226406097, "learning_rate": 1.3338423263481164e-05, "loss": 0.0896, "step": 38630 }, { "epoch": 0.689027217921735, "grad_norm": 0.539818525314331, "learning_rate": 1.3337046491813223e-05, "loss": 0.0683, "step": 38631 }, { "epoch": 0.6890450540434488, "grad_norm": 0.31362634897232056, "learning_rate": 1.3335669765354907e-05, "loss": 0.167, "step": 38632 }, { "epoch": 0.6890628901651625, "grad_norm": 0.335553377866745, "learning_rate": 1.3334293084111576e-05, "loss": 0.1183, "step": 38633 }, { "epoch": 0.6890807262868762, "grad_norm": 0.2639838755130768, "learning_rate": 1.333291644808855e-05, "loss": 0.0856, "step": 38634 }, { "epoch": 0.6890985624085899, "grad_norm": 0.26502877473831177, "learning_rate": 1.3331539857291175e-05, "loss": 0.1418, "step": 38635 }, { "epoch": 0.6891163985303036, "grad_norm": 0.21990294754505157, "learning_rate": 1.3330163311724766e-05, "loss": 0.0943, "step": 38636 }, { "epoch": 0.6891342346520173, "grad_norm": 0.24346661567687988, "learning_rate": 1.3328786811394688e-05, "loss": 0.0991, "step": 38637 }, { "epoch": 0.689152070773731, "grad_norm": 0.2155342549085617, "learning_rate": 1.3327410356306252e-05, "loss": 0.0941, "step": 38638 }, { "epoch": 0.6891699068954447, "grad_norm": 0.2636861205101013, "learning_rate": 1.3326033946464816e-05, "loss": 0.1109, "step": 38639 }, { "epoch": 0.6891877430171583, "grad_norm": 0.2761094868183136, "learning_rate": 1.3324657581875694e-05, "loss": 0.148, "step": 38640 }, { "epoch": 0.689205579138872, "grad_norm": 0.41489556431770325, "learning_rate": 1.3323281262544243e-05, "loss": 0.1009, "step": 38641 }, { "epoch": 0.6892234152605857, "grad_norm": 0.32335516810417175, "learning_rate": 1.3321904988475787e-05, "loss": 0.1108, "step": 38642 }, { "epoch": 0.6892412513822994, "grad_norm": 0.3143722116947174, "learning_rate": 1.3320528759675657e-05, "loss": 0.1467, "step": 38643 }, { "epoch": 0.6892590875040131, "grad_norm": 0.23518264293670654, "learning_rate": 1.3319152576149197e-05, "loss": 0.1462, "step": 38644 }, { "epoch": 0.6892769236257268, "grad_norm": 0.2167765498161316, "learning_rate": 1.331777643790172e-05, "loss": 0.1344, "step": 38645 }, { "epoch": 0.6892947597474405, "grad_norm": 0.30202847719192505, "learning_rate": 1.331640034493859e-05, "loss": 0.1713, "step": 38646 }, { "epoch": 0.6893125958691542, "grad_norm": 0.31204459071159363, "learning_rate": 1.3315024297265128e-05, "loss": 0.1355, "step": 38647 }, { "epoch": 0.6893304319908679, "grad_norm": 0.2837125062942505, "learning_rate": 1.3313648294886666e-05, "loss": 0.1919, "step": 38648 }, { "epoch": 0.6893482681125817, "grad_norm": 0.2383567988872528, "learning_rate": 1.331227233780853e-05, "loss": 0.1573, "step": 38649 }, { "epoch": 0.6893661042342953, "grad_norm": 0.30589544773101807, "learning_rate": 1.3310896426036074e-05, "loss": 0.1526, "step": 38650 }, { "epoch": 0.689383940356009, "grad_norm": 0.2975884974002838, "learning_rate": 1.3309520559574612e-05, "loss": 0.1717, "step": 38651 }, { "epoch": 0.6894017764777227, "grad_norm": 0.2432190328836441, "learning_rate": 1.3308144738429492e-05, "loss": 0.1002, "step": 38652 }, { "epoch": 0.6894196125994364, "grad_norm": 0.21989594399929047, "learning_rate": 1.3306768962606034e-05, "loss": 0.0824, "step": 38653 }, { "epoch": 0.6894374487211501, "grad_norm": 0.26220694184303284, "learning_rate": 1.3305393232109586e-05, "loss": 0.0981, "step": 38654 }, { "epoch": 0.6894552848428638, "grad_norm": 0.3841293752193451, "learning_rate": 1.3304017546945478e-05, "loss": 0.2178, "step": 38655 }, { "epoch": 0.6894731209645775, "grad_norm": 0.29004284739494324, "learning_rate": 1.3302641907119034e-05, "loss": 0.1911, "step": 38656 }, { "epoch": 0.6894909570862912, "grad_norm": 0.40998777747154236, "learning_rate": 1.3301266312635591e-05, "loss": 0.0948, "step": 38657 }, { "epoch": 0.6895087932080048, "grad_norm": 0.28436097502708435, "learning_rate": 1.329989076350047e-05, "loss": 0.1785, "step": 38658 }, { "epoch": 0.6895266293297185, "grad_norm": 0.23973765969276428, "learning_rate": 1.3298515259719024e-05, "loss": 0.0837, "step": 38659 }, { "epoch": 0.6895444654514322, "grad_norm": 0.24880464375019073, "learning_rate": 1.3297139801296572e-05, "loss": 0.1216, "step": 38660 }, { "epoch": 0.6895623015731459, "grad_norm": 0.23176822066307068, "learning_rate": 1.3295764388238451e-05, "loss": 0.1508, "step": 38661 }, { "epoch": 0.6895801376948596, "grad_norm": 0.26152318716049194, "learning_rate": 1.329438902054998e-05, "loss": 0.1593, "step": 38662 }, { "epoch": 0.6895979738165733, "grad_norm": 0.2617315948009491, "learning_rate": 1.3293013698236506e-05, "loss": 0.072, "step": 38663 }, { "epoch": 0.689615809938287, "grad_norm": 0.25507086515426636, "learning_rate": 1.329163842130336e-05, "loss": 0.1327, "step": 38664 }, { "epoch": 0.6896336460600007, "grad_norm": 0.3612326979637146, "learning_rate": 1.3290263189755852e-05, "loss": 0.0904, "step": 38665 }, { "epoch": 0.6896514821817145, "grad_norm": 0.2828935384750366, "learning_rate": 1.3288888003599342e-05, "loss": 0.1363, "step": 38666 }, { "epoch": 0.6896693183034281, "grad_norm": 0.2619461715221405, "learning_rate": 1.3287512862839135e-05, "loss": 0.1204, "step": 38667 }, { "epoch": 0.6896871544251418, "grad_norm": 0.2610267102718353, "learning_rate": 1.3286137767480586e-05, "loss": 0.164, "step": 38668 }, { "epoch": 0.6897049905468555, "grad_norm": 0.2898586690425873, "learning_rate": 1.3284762717529009e-05, "loss": 0.1287, "step": 38669 }, { "epoch": 0.6897228266685692, "grad_norm": 0.260958731174469, "learning_rate": 1.3283387712989743e-05, "loss": 0.1225, "step": 38670 }, { "epoch": 0.6897406627902829, "grad_norm": 0.268326997756958, "learning_rate": 1.32820127538681e-05, "loss": 0.1388, "step": 38671 }, { "epoch": 0.6897584989119966, "grad_norm": 0.29754361510276794, "learning_rate": 1.3280637840169433e-05, "loss": 0.1159, "step": 38672 }, { "epoch": 0.6897763350337103, "grad_norm": 0.3323124051094055, "learning_rate": 1.3279262971899062e-05, "loss": 0.1482, "step": 38673 }, { "epoch": 0.689794171155424, "grad_norm": 0.3237851560115814, "learning_rate": 1.3277888149062314e-05, "loss": 0.1397, "step": 38674 }, { "epoch": 0.6898120072771377, "grad_norm": 0.3286314606666565, "learning_rate": 1.3276513371664511e-05, "loss": 0.1028, "step": 38675 }, { "epoch": 0.6898298433988513, "grad_norm": 0.3324311673641205, "learning_rate": 1.3275138639711005e-05, "loss": 0.1802, "step": 38676 }, { "epoch": 0.689847679520565, "grad_norm": 0.26701226830482483, "learning_rate": 1.3273763953207108e-05, "loss": 0.1286, "step": 38677 }, { "epoch": 0.6898655156422787, "grad_norm": 0.32240742444992065, "learning_rate": 1.3272389312158143e-05, "loss": 0.1026, "step": 38678 }, { "epoch": 0.6898833517639924, "grad_norm": 0.28120702505111694, "learning_rate": 1.3271014716569457e-05, "loss": 0.1434, "step": 38679 }, { "epoch": 0.6899011878857061, "grad_norm": 0.26151809096336365, "learning_rate": 1.3269640166446357e-05, "loss": 0.0963, "step": 38680 }, { "epoch": 0.6899190240074198, "grad_norm": 0.3740496039390564, "learning_rate": 1.3268265661794196e-05, "loss": 0.2064, "step": 38681 }, { "epoch": 0.6899368601291335, "grad_norm": 0.2708732485771179, "learning_rate": 1.326689120261829e-05, "loss": 0.1053, "step": 38682 }, { "epoch": 0.6899546962508473, "grad_norm": 0.3348347246646881, "learning_rate": 1.3265516788923965e-05, "loss": 0.1317, "step": 38683 }, { "epoch": 0.689972532372561, "grad_norm": 0.24740757048130035, "learning_rate": 1.326414242071654e-05, "loss": 0.1029, "step": 38684 }, { "epoch": 0.6899903684942746, "grad_norm": 0.23616833984851837, "learning_rate": 1.326276809800136e-05, "loss": 0.1084, "step": 38685 }, { "epoch": 0.6900082046159883, "grad_norm": 0.3803771138191223, "learning_rate": 1.3261393820783746e-05, "loss": 0.1391, "step": 38686 }, { "epoch": 0.690026040737702, "grad_norm": 0.28437551856040955, "learning_rate": 1.3260019589069028e-05, "loss": 0.1541, "step": 38687 }, { "epoch": 0.6900438768594157, "grad_norm": 0.2313799113035202, "learning_rate": 1.3258645402862512e-05, "loss": 0.1137, "step": 38688 }, { "epoch": 0.6900617129811294, "grad_norm": 0.28760936856269836, "learning_rate": 1.3257271262169557e-05, "loss": 0.1112, "step": 38689 }, { "epoch": 0.6900795491028431, "grad_norm": 0.22957073152065277, "learning_rate": 1.3255897166995474e-05, "loss": 0.1069, "step": 38690 }, { "epoch": 0.6900973852245568, "grad_norm": 0.3053368031978607, "learning_rate": 1.3254523117345591e-05, "loss": 0.0807, "step": 38691 }, { "epoch": 0.6901152213462705, "grad_norm": 0.2762482762336731, "learning_rate": 1.325314911322522e-05, "loss": 0.1368, "step": 38692 }, { "epoch": 0.6901330574679841, "grad_norm": 0.30173760652542114, "learning_rate": 1.3251775154639713e-05, "loss": 0.1591, "step": 38693 }, { "epoch": 0.6901508935896978, "grad_norm": 0.23821094632148743, "learning_rate": 1.3250401241594368e-05, "loss": 0.1377, "step": 38694 }, { "epoch": 0.6901687297114115, "grad_norm": 0.2922179698944092, "learning_rate": 1.324902737409454e-05, "loss": 0.1424, "step": 38695 }, { "epoch": 0.6901865658331252, "grad_norm": 0.32731255888938904, "learning_rate": 1.3247653552145538e-05, "loss": 0.131, "step": 38696 }, { "epoch": 0.6902044019548389, "grad_norm": 0.3112957179546356, "learning_rate": 1.3246279775752685e-05, "loss": 0.1418, "step": 38697 }, { "epoch": 0.6902222380765526, "grad_norm": 0.24798676371574402, "learning_rate": 1.3244906044921317e-05, "loss": 0.1323, "step": 38698 }, { "epoch": 0.6902400741982663, "grad_norm": 0.23981525003910065, "learning_rate": 1.3243532359656754e-05, "loss": 0.1388, "step": 38699 }, { "epoch": 0.6902579103199801, "grad_norm": 0.28346219658851624, "learning_rate": 1.324215871996432e-05, "loss": 0.0753, "step": 38700 }, { "epoch": 0.6902757464416938, "grad_norm": 0.37999963760375977, "learning_rate": 1.324078512584933e-05, "loss": 0.1345, "step": 38701 }, { "epoch": 0.6902935825634074, "grad_norm": 0.2923862934112549, "learning_rate": 1.3239411577317129e-05, "loss": 0.0996, "step": 38702 }, { "epoch": 0.6903114186851211, "grad_norm": 0.19239555299282074, "learning_rate": 1.3238038074373033e-05, "loss": 0.1096, "step": 38703 }, { "epoch": 0.6903292548068348, "grad_norm": 0.29090577363967896, "learning_rate": 1.323666461702236e-05, "loss": 0.1518, "step": 38704 }, { "epoch": 0.6903470909285485, "grad_norm": 0.23829719424247742, "learning_rate": 1.3235291205270427e-05, "loss": 0.1234, "step": 38705 }, { "epoch": 0.6903649270502622, "grad_norm": 0.2725439667701721, "learning_rate": 1.3233917839122583e-05, "loss": 0.1262, "step": 38706 }, { "epoch": 0.6903827631719759, "grad_norm": 0.3757858872413635, "learning_rate": 1.3232544518584122e-05, "loss": 0.1297, "step": 38707 }, { "epoch": 0.6904005992936896, "grad_norm": 0.3936978876590729, "learning_rate": 1.3231171243660398e-05, "loss": 0.1558, "step": 38708 }, { "epoch": 0.6904184354154033, "grad_norm": 0.2817973494529724, "learning_rate": 1.3229798014356717e-05, "loss": 0.1122, "step": 38709 }, { "epoch": 0.690436271537117, "grad_norm": 0.30825358629226685, "learning_rate": 1.3228424830678394e-05, "loss": 0.1178, "step": 38710 }, { "epoch": 0.6904541076588306, "grad_norm": 0.26283615827560425, "learning_rate": 1.322705169263077e-05, "loss": 0.0772, "step": 38711 }, { "epoch": 0.6904719437805443, "grad_norm": 0.22746725380420685, "learning_rate": 1.3225678600219165e-05, "loss": 0.1399, "step": 38712 }, { "epoch": 0.690489779902258, "grad_norm": 0.28221169114112854, "learning_rate": 1.3224305553448893e-05, "loss": 0.1152, "step": 38713 }, { "epoch": 0.6905076160239717, "grad_norm": 0.2240988165140152, "learning_rate": 1.3222932552325271e-05, "loss": 0.1146, "step": 38714 }, { "epoch": 0.6905254521456854, "grad_norm": 0.2932063341140747, "learning_rate": 1.3221559596853638e-05, "loss": 0.0592, "step": 38715 }, { "epoch": 0.6905432882673992, "grad_norm": 0.48504117131233215, "learning_rate": 1.322018668703931e-05, "loss": 0.2323, "step": 38716 }, { "epoch": 0.6905611243891129, "grad_norm": 0.20611225068569183, "learning_rate": 1.3218813822887607e-05, "loss": 0.0792, "step": 38717 }, { "epoch": 0.6905789605108266, "grad_norm": 0.28257375955581665, "learning_rate": 1.3217441004403842e-05, "loss": 0.1441, "step": 38718 }, { "epoch": 0.6905967966325403, "grad_norm": 0.2722945213317871, "learning_rate": 1.3216068231593354e-05, "loss": 0.1559, "step": 38719 }, { "epoch": 0.6906146327542539, "grad_norm": 0.2578687071800232, "learning_rate": 1.3214695504461455e-05, "loss": 0.1135, "step": 38720 }, { "epoch": 0.6906324688759676, "grad_norm": 0.2690184712409973, "learning_rate": 1.3213322823013457e-05, "loss": 0.1213, "step": 38721 }, { "epoch": 0.6906503049976813, "grad_norm": 0.26990264654159546, "learning_rate": 1.3211950187254702e-05, "loss": 0.1799, "step": 38722 }, { "epoch": 0.690668141119395, "grad_norm": 0.2215307056903839, "learning_rate": 1.3210577597190489e-05, "loss": 0.1521, "step": 38723 }, { "epoch": 0.6906859772411087, "grad_norm": 0.2137995809316635, "learning_rate": 1.3209205052826158e-05, "loss": 0.1239, "step": 38724 }, { "epoch": 0.6907038133628224, "grad_norm": 0.26220327615737915, "learning_rate": 1.3207832554167021e-05, "loss": 0.1089, "step": 38725 }, { "epoch": 0.6907216494845361, "grad_norm": 0.3278980255126953, "learning_rate": 1.3206460101218396e-05, "loss": 0.1691, "step": 38726 }, { "epoch": 0.6907394856062498, "grad_norm": 0.28485000133514404, "learning_rate": 1.3205087693985596e-05, "loss": 0.1744, "step": 38727 }, { "epoch": 0.6907573217279634, "grad_norm": 0.2428922802209854, "learning_rate": 1.3203715332473962e-05, "loss": 0.1063, "step": 38728 }, { "epoch": 0.6907751578496771, "grad_norm": 0.2448032945394516, "learning_rate": 1.32023430166888e-05, "loss": 0.0901, "step": 38729 }, { "epoch": 0.6907929939713908, "grad_norm": 0.262446790933609, "learning_rate": 1.3200970746635432e-05, "loss": 0.1308, "step": 38730 }, { "epoch": 0.6908108300931045, "grad_norm": 0.28618475794792175, "learning_rate": 1.3199598522319168e-05, "loss": 0.1172, "step": 38731 }, { "epoch": 0.6908286662148182, "grad_norm": 0.16633619368076324, "learning_rate": 1.3198226343745343e-05, "loss": 0.1072, "step": 38732 }, { "epoch": 0.690846502336532, "grad_norm": 0.3581560552120209, "learning_rate": 1.319685421091927e-05, "loss": 0.1796, "step": 38733 }, { "epoch": 0.6908643384582457, "grad_norm": 0.30605944991111755, "learning_rate": 1.319548212384626e-05, "loss": 0.1467, "step": 38734 }, { "epoch": 0.6908821745799594, "grad_norm": 0.32501140236854553, "learning_rate": 1.3194110082531643e-05, "loss": 0.1252, "step": 38735 }, { "epoch": 0.6909000107016731, "grad_norm": 0.2701601982116699, "learning_rate": 1.3192738086980726e-05, "loss": 0.1618, "step": 38736 }, { "epoch": 0.6909178468233867, "grad_norm": 0.22822974622249603, "learning_rate": 1.3191366137198843e-05, "loss": 0.1315, "step": 38737 }, { "epoch": 0.6909356829451004, "grad_norm": 0.2660581171512604, "learning_rate": 1.3189994233191305e-05, "loss": 0.166, "step": 38738 }, { "epoch": 0.6909535190668141, "grad_norm": 0.27075493335723877, "learning_rate": 1.3188622374963428e-05, "loss": 0.1236, "step": 38739 }, { "epoch": 0.6909713551885278, "grad_norm": 0.2880050241947174, "learning_rate": 1.318725056252052e-05, "loss": 0.094, "step": 38740 }, { "epoch": 0.6909891913102415, "grad_norm": 0.27749377489089966, "learning_rate": 1.3185878795867917e-05, "loss": 0.0964, "step": 38741 }, { "epoch": 0.6910070274319552, "grad_norm": 0.3644750714302063, "learning_rate": 1.318450707501093e-05, "loss": 0.1608, "step": 38742 }, { "epoch": 0.6910248635536689, "grad_norm": 0.3186701834201813, "learning_rate": 1.3183135399954873e-05, "loss": 0.1587, "step": 38743 }, { "epoch": 0.6910426996753826, "grad_norm": 0.27826863527297974, "learning_rate": 1.3181763770705059e-05, "loss": 0.1583, "step": 38744 }, { "epoch": 0.6910605357970963, "grad_norm": 0.26755356788635254, "learning_rate": 1.3180392187266816e-05, "loss": 0.1184, "step": 38745 }, { "epoch": 0.6910783719188099, "grad_norm": 0.3084157705307007, "learning_rate": 1.3179020649645458e-05, "loss": 0.0934, "step": 38746 }, { "epoch": 0.6910962080405236, "grad_norm": 0.28145831823349, "learning_rate": 1.3177649157846295e-05, "loss": 0.1106, "step": 38747 }, { "epoch": 0.6911140441622373, "grad_norm": 0.2863115072250366, "learning_rate": 1.317627771187464e-05, "loss": 0.1371, "step": 38748 }, { "epoch": 0.691131880283951, "grad_norm": 0.2876375913619995, "learning_rate": 1.3174906311735815e-05, "loss": 0.1029, "step": 38749 }, { "epoch": 0.6911497164056648, "grad_norm": 0.4812483787536621, "learning_rate": 1.3173534957435149e-05, "loss": 0.1443, "step": 38750 }, { "epoch": 0.6911675525273785, "grad_norm": 0.30926138162612915, "learning_rate": 1.3172163648977948e-05, "loss": 0.1617, "step": 38751 }, { "epoch": 0.6911853886490922, "grad_norm": 0.32044175267219543, "learning_rate": 1.3170792386369521e-05, "loss": 0.1216, "step": 38752 }, { "epoch": 0.6912032247708059, "grad_norm": 0.33879876136779785, "learning_rate": 1.3169421169615182e-05, "loss": 0.1066, "step": 38753 }, { "epoch": 0.6912210608925196, "grad_norm": 0.36022135615348816, "learning_rate": 1.316804999872026e-05, "loss": 0.1167, "step": 38754 }, { "epoch": 0.6912388970142332, "grad_norm": 0.2358321100473404, "learning_rate": 1.3166678873690064e-05, "loss": 0.133, "step": 38755 }, { "epoch": 0.6912567331359469, "grad_norm": 0.20222005248069763, "learning_rate": 1.3165307794529908e-05, "loss": 0.0655, "step": 38756 }, { "epoch": 0.6912745692576606, "grad_norm": 0.32699280977249146, "learning_rate": 1.31639367612451e-05, "loss": 0.1246, "step": 38757 }, { "epoch": 0.6912924053793743, "grad_norm": 0.3404202461242676, "learning_rate": 1.3162565773840968e-05, "loss": 0.1455, "step": 38758 }, { "epoch": 0.691310241501088, "grad_norm": 0.36428165435791016, "learning_rate": 1.3161194832322818e-05, "loss": 0.1672, "step": 38759 }, { "epoch": 0.6913280776228017, "grad_norm": 0.19613413512706757, "learning_rate": 1.3159823936695967e-05, "loss": 0.1127, "step": 38760 }, { "epoch": 0.6913459137445154, "grad_norm": 0.3601176142692566, "learning_rate": 1.3158453086965716e-05, "loss": 0.1582, "step": 38761 }, { "epoch": 0.6913637498662291, "grad_norm": 0.2544485330581665, "learning_rate": 1.3157082283137406e-05, "loss": 0.1126, "step": 38762 }, { "epoch": 0.6913815859879427, "grad_norm": 0.34593045711517334, "learning_rate": 1.3155711525216321e-05, "loss": 0.099, "step": 38763 }, { "epoch": 0.6913994221096564, "grad_norm": 0.25740107893943787, "learning_rate": 1.31543408132078e-05, "loss": 0.1298, "step": 38764 }, { "epoch": 0.6914172582313701, "grad_norm": 0.2705414593219757, "learning_rate": 1.3152970147117147e-05, "loss": 0.1046, "step": 38765 }, { "epoch": 0.6914350943530838, "grad_norm": 0.3260417580604553, "learning_rate": 1.3151599526949663e-05, "loss": 0.1163, "step": 38766 }, { "epoch": 0.6914529304747976, "grad_norm": 0.18829238414764404, "learning_rate": 1.315022895271068e-05, "loss": 0.0902, "step": 38767 }, { "epoch": 0.6914707665965113, "grad_norm": 0.2498045563697815, "learning_rate": 1.3148858424405503e-05, "loss": 0.1008, "step": 38768 }, { "epoch": 0.691488602718225, "grad_norm": 0.3177548944950104, "learning_rate": 1.3147487942039444e-05, "loss": 0.1328, "step": 38769 }, { "epoch": 0.6915064388399387, "grad_norm": 0.2703242301940918, "learning_rate": 1.3146117505617806e-05, "loss": 0.1058, "step": 38770 }, { "epoch": 0.6915242749616524, "grad_norm": 0.2493327409029007, "learning_rate": 1.3144747115145923e-05, "loss": 0.1358, "step": 38771 }, { "epoch": 0.691542111083366, "grad_norm": 0.3020995259284973, "learning_rate": 1.314337677062909e-05, "loss": 0.1759, "step": 38772 }, { "epoch": 0.6915599472050797, "grad_norm": 0.24847684800624847, "learning_rate": 1.3142006472072626e-05, "loss": 0.1063, "step": 38773 }, { "epoch": 0.6915777833267934, "grad_norm": 0.23036056756973267, "learning_rate": 1.3140636219481838e-05, "loss": 0.0956, "step": 38774 }, { "epoch": 0.6915956194485071, "grad_norm": 0.24159425497055054, "learning_rate": 1.3139266012862034e-05, "loss": 0.1446, "step": 38775 }, { "epoch": 0.6916134555702208, "grad_norm": 0.21169061958789825, "learning_rate": 1.3137895852218532e-05, "loss": 0.0944, "step": 38776 }, { "epoch": 0.6916312916919345, "grad_norm": 0.34019920229911804, "learning_rate": 1.3136525737556648e-05, "loss": 0.1528, "step": 38777 }, { "epoch": 0.6916491278136482, "grad_norm": 0.2619572877883911, "learning_rate": 1.3135155668881694e-05, "loss": 0.1197, "step": 38778 }, { "epoch": 0.6916669639353619, "grad_norm": 0.2956441640853882, "learning_rate": 1.3133785646198959e-05, "loss": 0.1284, "step": 38779 }, { "epoch": 0.6916848000570756, "grad_norm": 0.1832607537508011, "learning_rate": 1.3132415669513784e-05, "loss": 0.0871, "step": 38780 }, { "epoch": 0.6917026361787892, "grad_norm": 0.25110000371932983, "learning_rate": 1.313104573883146e-05, "loss": 0.1316, "step": 38781 }, { "epoch": 0.6917204723005029, "grad_norm": 0.27382761240005493, "learning_rate": 1.3129675854157306e-05, "loss": 0.0686, "step": 38782 }, { "epoch": 0.6917383084222166, "grad_norm": 0.2729042172431946, "learning_rate": 1.3128306015496616e-05, "loss": 0.1271, "step": 38783 }, { "epoch": 0.6917561445439304, "grad_norm": 0.5074036717414856, "learning_rate": 1.3126936222854724e-05, "loss": 0.1655, "step": 38784 }, { "epoch": 0.6917739806656441, "grad_norm": 0.34796684980392456, "learning_rate": 1.3125566476236928e-05, "loss": 0.1095, "step": 38785 }, { "epoch": 0.6917918167873578, "grad_norm": 0.27179834246635437, "learning_rate": 1.3124196775648534e-05, "loss": 0.1074, "step": 38786 }, { "epoch": 0.6918096529090715, "grad_norm": 0.26308971643447876, "learning_rate": 1.312282712109486e-05, "loss": 0.1336, "step": 38787 }, { "epoch": 0.6918274890307852, "grad_norm": 0.229720339179039, "learning_rate": 1.3121457512581197e-05, "loss": 0.115, "step": 38788 }, { "epoch": 0.6918453251524989, "grad_norm": 0.24698026478290558, "learning_rate": 1.312008795011288e-05, "loss": 0.1204, "step": 38789 }, { "epoch": 0.6918631612742125, "grad_norm": 0.20926906168460846, "learning_rate": 1.3118718433695194e-05, "loss": 0.0895, "step": 38790 }, { "epoch": 0.6918809973959262, "grad_norm": 0.266743004322052, "learning_rate": 1.3117348963333468e-05, "loss": 0.1147, "step": 38791 }, { "epoch": 0.6918988335176399, "grad_norm": 0.32903027534484863, "learning_rate": 1.3115979539032991e-05, "loss": 0.1259, "step": 38792 }, { "epoch": 0.6919166696393536, "grad_norm": 0.23266242444515228, "learning_rate": 1.3114610160799095e-05, "loss": 0.1716, "step": 38793 }, { "epoch": 0.6919345057610673, "grad_norm": 0.23769375681877136, "learning_rate": 1.3113240828637075e-05, "loss": 0.1282, "step": 38794 }, { "epoch": 0.691952341882781, "grad_norm": 0.2318408042192459, "learning_rate": 1.3111871542552234e-05, "loss": 0.1678, "step": 38795 }, { "epoch": 0.6919701780044947, "grad_norm": 0.2351434975862503, "learning_rate": 1.3110502302549882e-05, "loss": 0.1349, "step": 38796 }, { "epoch": 0.6919880141262084, "grad_norm": 0.27161964774131775, "learning_rate": 1.3109133108635335e-05, "loss": 0.1088, "step": 38797 }, { "epoch": 0.692005850247922, "grad_norm": 0.29230543971061707, "learning_rate": 1.3107763960813896e-05, "loss": 0.0948, "step": 38798 }, { "epoch": 0.6920236863696357, "grad_norm": 0.2311091125011444, "learning_rate": 1.310639485909087e-05, "loss": 0.0996, "step": 38799 }, { "epoch": 0.6920415224913494, "grad_norm": 0.30151063203811646, "learning_rate": 1.3105025803471565e-05, "loss": 0.1031, "step": 38800 }, { "epoch": 0.6920593586130632, "grad_norm": 0.265648752450943, "learning_rate": 1.3103656793961282e-05, "loss": 0.0795, "step": 38801 }, { "epoch": 0.6920771947347769, "grad_norm": 0.24831724166870117, "learning_rate": 1.310228783056534e-05, "loss": 0.1713, "step": 38802 }, { "epoch": 0.6920950308564906, "grad_norm": 0.31847429275512695, "learning_rate": 1.3100918913289034e-05, "loss": 0.1954, "step": 38803 }, { "epoch": 0.6921128669782043, "grad_norm": 0.23262082040309906, "learning_rate": 1.3099550042137684e-05, "loss": 0.0996, "step": 38804 }, { "epoch": 0.692130703099918, "grad_norm": 0.3489309251308441, "learning_rate": 1.3098181217116578e-05, "loss": 0.1453, "step": 38805 }, { "epoch": 0.6921485392216317, "grad_norm": 0.26697972416877747, "learning_rate": 1.3096812438231043e-05, "loss": 0.128, "step": 38806 }, { "epoch": 0.6921663753433454, "grad_norm": 0.22062751650810242, "learning_rate": 1.3095443705486377e-05, "loss": 0.0871, "step": 38807 }, { "epoch": 0.692184211465059, "grad_norm": 0.21080462634563446, "learning_rate": 1.3094075018887878e-05, "loss": 0.1084, "step": 38808 }, { "epoch": 0.6922020475867727, "grad_norm": 0.30563896894454956, "learning_rate": 1.309270637844085e-05, "loss": 0.1437, "step": 38809 }, { "epoch": 0.6922198837084864, "grad_norm": 0.22260555624961853, "learning_rate": 1.309133778415061e-05, "loss": 0.1298, "step": 38810 }, { "epoch": 0.6922377198302001, "grad_norm": 0.31000182032585144, "learning_rate": 1.3089969236022461e-05, "loss": 0.0621, "step": 38811 }, { "epoch": 0.6922555559519138, "grad_norm": 0.28880545496940613, "learning_rate": 1.3088600734061707e-05, "loss": 0.1326, "step": 38812 }, { "epoch": 0.6922733920736275, "grad_norm": 0.24089579284191132, "learning_rate": 1.3087232278273649e-05, "loss": 0.1515, "step": 38813 }, { "epoch": 0.6922912281953412, "grad_norm": 0.31125473976135254, "learning_rate": 1.308586386866358e-05, "loss": 0.1443, "step": 38814 }, { "epoch": 0.6923090643170549, "grad_norm": 0.3251326084136963, "learning_rate": 1.3084495505236833e-05, "loss": 0.1168, "step": 38815 }, { "epoch": 0.6923269004387685, "grad_norm": 0.18446235358715057, "learning_rate": 1.3083127187998692e-05, "loss": 0.1071, "step": 38816 }, { "epoch": 0.6923447365604823, "grad_norm": 0.20737780630588531, "learning_rate": 1.3081758916954456e-05, "loss": 0.0971, "step": 38817 }, { "epoch": 0.692362572682196, "grad_norm": 0.1935952752828598, "learning_rate": 1.3080390692109451e-05, "loss": 0.1014, "step": 38818 }, { "epoch": 0.6923804088039097, "grad_norm": 0.3421616554260254, "learning_rate": 1.307902251346896e-05, "loss": 0.1399, "step": 38819 }, { "epoch": 0.6923982449256234, "grad_norm": 0.20943127572536469, "learning_rate": 1.3077654381038304e-05, "loss": 0.086, "step": 38820 }, { "epoch": 0.6924160810473371, "grad_norm": 0.27631518244743347, "learning_rate": 1.3076286294822776e-05, "loss": 0.1575, "step": 38821 }, { "epoch": 0.6924339171690508, "grad_norm": 0.22087550163269043, "learning_rate": 1.3074918254827673e-05, "loss": 0.1045, "step": 38822 }, { "epoch": 0.6924517532907645, "grad_norm": 0.26400530338287354, "learning_rate": 1.3073550261058315e-05, "loss": 0.121, "step": 38823 }, { "epoch": 0.6924695894124782, "grad_norm": 0.24591878056526184, "learning_rate": 1.3072182313519993e-05, "loss": 0.1487, "step": 38824 }, { "epoch": 0.6924874255341918, "grad_norm": 0.32643479108810425, "learning_rate": 1.3070814412218017e-05, "loss": 0.1432, "step": 38825 }, { "epoch": 0.6925052616559055, "grad_norm": 0.2526044547557831, "learning_rate": 1.306944655715768e-05, "loss": 0.0962, "step": 38826 }, { "epoch": 0.6925230977776192, "grad_norm": 0.23792684078216553, "learning_rate": 1.306807874834428e-05, "loss": 0.0721, "step": 38827 }, { "epoch": 0.6925409338993329, "grad_norm": 0.270885169506073, "learning_rate": 1.3066710985783136e-05, "loss": 0.1018, "step": 38828 }, { "epoch": 0.6925587700210466, "grad_norm": 0.374444842338562, "learning_rate": 1.3065343269479547e-05, "loss": 0.1365, "step": 38829 }, { "epoch": 0.6925766061427603, "grad_norm": 0.22914782166481018, "learning_rate": 1.3063975599438797e-05, "loss": 0.1283, "step": 38830 }, { "epoch": 0.692594442264474, "grad_norm": 0.2915685772895813, "learning_rate": 1.3062607975666208e-05, "loss": 0.1056, "step": 38831 }, { "epoch": 0.6926122783861877, "grad_norm": 0.21610741317272186, "learning_rate": 1.3061240398167069e-05, "loss": 0.1367, "step": 38832 }, { "epoch": 0.6926301145079014, "grad_norm": 0.27557480335235596, "learning_rate": 1.305987286694669e-05, "loss": 0.1422, "step": 38833 }, { "epoch": 0.6926479506296152, "grad_norm": 0.27477696537971497, "learning_rate": 1.305850538201037e-05, "loss": 0.1325, "step": 38834 }, { "epoch": 0.6926657867513288, "grad_norm": 0.23252776265144348, "learning_rate": 1.3057137943363396e-05, "loss": 0.0904, "step": 38835 }, { "epoch": 0.6926836228730425, "grad_norm": 0.2752423584461212, "learning_rate": 1.3055770551011093e-05, "loss": 0.1081, "step": 38836 }, { "epoch": 0.6927014589947562, "grad_norm": 0.29582762718200684, "learning_rate": 1.3054403204958749e-05, "loss": 0.1395, "step": 38837 }, { "epoch": 0.6927192951164699, "grad_norm": 0.2734878659248352, "learning_rate": 1.305303590521166e-05, "loss": 0.1415, "step": 38838 }, { "epoch": 0.6927371312381836, "grad_norm": 0.285553902387619, "learning_rate": 1.3051668651775134e-05, "loss": 0.1734, "step": 38839 }, { "epoch": 0.6927549673598973, "grad_norm": 0.24469208717346191, "learning_rate": 1.3050301444654456e-05, "loss": 0.1272, "step": 38840 }, { "epoch": 0.692772803481611, "grad_norm": 0.2895817458629608, "learning_rate": 1.3048934283854946e-05, "loss": 0.108, "step": 38841 }, { "epoch": 0.6927906396033247, "grad_norm": 0.3367748558521271, "learning_rate": 1.3047567169381897e-05, "loss": 0.1599, "step": 38842 }, { "epoch": 0.6928084757250383, "grad_norm": 0.25952938199043274, "learning_rate": 1.3046200101240602e-05, "loss": 0.1119, "step": 38843 }, { "epoch": 0.692826311846752, "grad_norm": 0.2831670045852661, "learning_rate": 1.3044833079436359e-05, "loss": 0.0843, "step": 38844 }, { "epoch": 0.6928441479684657, "grad_norm": 0.37477907538414, "learning_rate": 1.3043466103974478e-05, "loss": 0.1078, "step": 38845 }, { "epoch": 0.6928619840901794, "grad_norm": 0.3395490348339081, "learning_rate": 1.3042099174860242e-05, "loss": 0.0994, "step": 38846 }, { "epoch": 0.6928798202118931, "grad_norm": 0.24265864491462708, "learning_rate": 1.3040732292098973e-05, "loss": 0.1704, "step": 38847 }, { "epoch": 0.6928976563336068, "grad_norm": 0.2586183249950409, "learning_rate": 1.3039365455695943e-05, "loss": 0.1481, "step": 38848 }, { "epoch": 0.6929154924553205, "grad_norm": 0.3862290382385254, "learning_rate": 1.3037998665656475e-05, "loss": 0.1619, "step": 38849 }, { "epoch": 0.6929333285770342, "grad_norm": 0.2527387738227844, "learning_rate": 1.3036631921985854e-05, "loss": 0.1338, "step": 38850 }, { "epoch": 0.692951164698748, "grad_norm": 0.26722294092178345, "learning_rate": 1.303526522468938e-05, "loss": 0.1484, "step": 38851 }, { "epoch": 0.6929690008204616, "grad_norm": 0.26204484701156616, "learning_rate": 1.3033898573772355e-05, "loss": 0.139, "step": 38852 }, { "epoch": 0.6929868369421753, "grad_norm": 0.2863484025001526, "learning_rate": 1.3032531969240058e-05, "loss": 0.108, "step": 38853 }, { "epoch": 0.693004673063889, "grad_norm": 0.19015048444271088, "learning_rate": 1.3031165411097813e-05, "loss": 0.0938, "step": 38854 }, { "epoch": 0.6930225091856027, "grad_norm": 0.35718753933906555, "learning_rate": 1.3029798899350904e-05, "loss": 0.1072, "step": 38855 }, { "epoch": 0.6930403453073164, "grad_norm": 0.203440323472023, "learning_rate": 1.3028432434004625e-05, "loss": 0.0937, "step": 38856 }, { "epoch": 0.6930581814290301, "grad_norm": 0.1964787393808365, "learning_rate": 1.302706601506427e-05, "loss": 0.1316, "step": 38857 }, { "epoch": 0.6930760175507438, "grad_norm": 0.26423177123069763, "learning_rate": 1.3025699642535152e-05, "loss": 0.1303, "step": 38858 }, { "epoch": 0.6930938536724575, "grad_norm": 0.3048575818538666, "learning_rate": 1.302433331642255e-05, "loss": 0.1029, "step": 38859 }, { "epoch": 0.6931116897941711, "grad_norm": 0.2601907253265381, "learning_rate": 1.3022967036731775e-05, "loss": 0.0932, "step": 38860 }, { "epoch": 0.6931295259158848, "grad_norm": 0.32533133029937744, "learning_rate": 1.3021600803468109e-05, "loss": 0.1552, "step": 38861 }, { "epoch": 0.6931473620375985, "grad_norm": 0.25909945368766785, "learning_rate": 1.3020234616636864e-05, "loss": 0.1218, "step": 38862 }, { "epoch": 0.6931651981593122, "grad_norm": 0.26075488328933716, "learning_rate": 1.3018868476243328e-05, "loss": 0.0956, "step": 38863 }, { "epoch": 0.6931830342810259, "grad_norm": 0.2390093356370926, "learning_rate": 1.3017502382292795e-05, "loss": 0.1588, "step": 38864 }, { "epoch": 0.6932008704027396, "grad_norm": 0.2447652667760849, "learning_rate": 1.3016136334790563e-05, "loss": 0.0589, "step": 38865 }, { "epoch": 0.6932187065244533, "grad_norm": 0.3241710960865021, "learning_rate": 1.3014770333741915e-05, "loss": 0.1923, "step": 38866 }, { "epoch": 0.693236542646167, "grad_norm": 0.25113940238952637, "learning_rate": 1.3013404379152167e-05, "loss": 0.0962, "step": 38867 }, { "epoch": 0.6932543787678808, "grad_norm": 0.25811827182769775, "learning_rate": 1.30120384710266e-05, "loss": 0.1222, "step": 38868 }, { "epoch": 0.6932722148895945, "grad_norm": 0.44056758284568787, "learning_rate": 1.3010672609370517e-05, "loss": 0.1556, "step": 38869 }, { "epoch": 0.6932900510113081, "grad_norm": 0.3130090832710266, "learning_rate": 1.3009306794189197e-05, "loss": 0.1001, "step": 38870 }, { "epoch": 0.6933078871330218, "grad_norm": 0.27695757150650024, "learning_rate": 1.3007941025487955e-05, "loss": 0.101, "step": 38871 }, { "epoch": 0.6933257232547355, "grad_norm": 0.3238756060600281, "learning_rate": 1.3006575303272076e-05, "loss": 0.0817, "step": 38872 }, { "epoch": 0.6933435593764492, "grad_norm": 0.33694007992744446, "learning_rate": 1.3005209627546844e-05, "loss": 0.1778, "step": 38873 }, { "epoch": 0.6933613954981629, "grad_norm": 0.45675989985466003, "learning_rate": 1.3003843998317568e-05, "loss": 0.1893, "step": 38874 }, { "epoch": 0.6933792316198766, "grad_norm": 0.19218678772449493, "learning_rate": 1.300247841558953e-05, "loss": 0.1107, "step": 38875 }, { "epoch": 0.6933970677415903, "grad_norm": 0.29433581233024597, "learning_rate": 1.3001112879368038e-05, "loss": 0.1027, "step": 38876 }, { "epoch": 0.693414903863304, "grad_norm": 0.2407170534133911, "learning_rate": 1.2999747389658378e-05, "loss": 0.0948, "step": 38877 }, { "epoch": 0.6934327399850176, "grad_norm": 0.25088241696357727, "learning_rate": 1.2998381946465842e-05, "loss": 0.1291, "step": 38878 }, { "epoch": 0.6934505761067313, "grad_norm": 0.2478271871805191, "learning_rate": 1.2997016549795713e-05, "loss": 0.0933, "step": 38879 }, { "epoch": 0.693468412228445, "grad_norm": 0.321100115776062, "learning_rate": 1.2995651199653302e-05, "loss": 0.1406, "step": 38880 }, { "epoch": 0.6934862483501587, "grad_norm": 0.314493864774704, "learning_rate": 1.2994285896043896e-05, "loss": 0.11, "step": 38881 }, { "epoch": 0.6935040844718724, "grad_norm": 0.2880284786224365, "learning_rate": 1.2992920638972777e-05, "loss": 0.1704, "step": 38882 }, { "epoch": 0.6935219205935861, "grad_norm": 0.3332328796386719, "learning_rate": 1.2991555428445243e-05, "loss": 0.1212, "step": 38883 }, { "epoch": 0.6935397567152998, "grad_norm": 0.3110034167766571, "learning_rate": 1.2990190264466596e-05, "loss": 0.1397, "step": 38884 }, { "epoch": 0.6935575928370136, "grad_norm": 0.24640539288520813, "learning_rate": 1.2988825147042116e-05, "loss": 0.0876, "step": 38885 }, { "epoch": 0.6935754289587273, "grad_norm": 0.23988419771194458, "learning_rate": 1.2987460076177091e-05, "loss": 0.1482, "step": 38886 }, { "epoch": 0.693593265080441, "grad_norm": 0.28728044033050537, "learning_rate": 1.2986095051876828e-05, "loss": 0.1273, "step": 38887 }, { "epoch": 0.6936111012021546, "grad_norm": 0.2788718342781067, "learning_rate": 1.2984730074146603e-05, "loss": 0.1374, "step": 38888 }, { "epoch": 0.6936289373238683, "grad_norm": 0.20722664892673492, "learning_rate": 1.298336514299172e-05, "loss": 0.1402, "step": 38889 }, { "epoch": 0.693646773445582, "grad_norm": 0.23695799708366394, "learning_rate": 1.298200025841747e-05, "loss": 0.1399, "step": 38890 }, { "epoch": 0.6936646095672957, "grad_norm": 0.32240235805511475, "learning_rate": 1.2980635420429133e-05, "loss": 0.1583, "step": 38891 }, { "epoch": 0.6936824456890094, "grad_norm": 0.20412838459014893, "learning_rate": 1.2979270629031997e-05, "loss": 0.1178, "step": 38892 }, { "epoch": 0.6937002818107231, "grad_norm": 0.25681132078170776, "learning_rate": 1.2977905884231367e-05, "loss": 0.0894, "step": 38893 }, { "epoch": 0.6937181179324368, "grad_norm": 0.34713780879974365, "learning_rate": 1.297654118603253e-05, "loss": 0.1234, "step": 38894 }, { "epoch": 0.6937359540541504, "grad_norm": 0.22974295914173126, "learning_rate": 1.297517653444077e-05, "loss": 0.1347, "step": 38895 }, { "epoch": 0.6937537901758641, "grad_norm": 0.32635554671287537, "learning_rate": 1.2973811929461372e-05, "loss": 0.1592, "step": 38896 }, { "epoch": 0.6937716262975778, "grad_norm": 0.21161885559558868, "learning_rate": 1.2972447371099639e-05, "loss": 0.1234, "step": 38897 }, { "epoch": 0.6937894624192915, "grad_norm": 0.3161948323249817, "learning_rate": 1.2971082859360854e-05, "loss": 0.0781, "step": 38898 }, { "epoch": 0.6938072985410052, "grad_norm": 0.26616141200065613, "learning_rate": 1.2969718394250308e-05, "loss": 0.0947, "step": 38899 }, { "epoch": 0.6938251346627189, "grad_norm": 0.2649553120136261, "learning_rate": 1.2968353975773279e-05, "loss": 0.1061, "step": 38900 }, { "epoch": 0.6938429707844326, "grad_norm": 0.291670560836792, "learning_rate": 1.296698960393507e-05, "loss": 0.0881, "step": 38901 }, { "epoch": 0.6938608069061464, "grad_norm": 0.31786757707595825, "learning_rate": 1.2965625278740962e-05, "loss": 0.1248, "step": 38902 }, { "epoch": 0.6938786430278601, "grad_norm": 0.29399222135543823, "learning_rate": 1.2964261000196255e-05, "loss": 0.1122, "step": 38903 }, { "epoch": 0.6938964791495738, "grad_norm": 0.3356807231903076, "learning_rate": 1.2962896768306229e-05, "loss": 0.1648, "step": 38904 }, { "epoch": 0.6939143152712874, "grad_norm": 0.3318850100040436, "learning_rate": 1.2961532583076163e-05, "loss": 0.1707, "step": 38905 }, { "epoch": 0.6939321513930011, "grad_norm": 0.23528605699539185, "learning_rate": 1.2960168444511367e-05, "loss": 0.1054, "step": 38906 }, { "epoch": 0.6939499875147148, "grad_norm": 0.18605659902095795, "learning_rate": 1.2958804352617114e-05, "loss": 0.0833, "step": 38907 }, { "epoch": 0.6939678236364285, "grad_norm": 0.23467952013015747, "learning_rate": 1.2957440307398694e-05, "loss": 0.1071, "step": 38908 }, { "epoch": 0.6939856597581422, "grad_norm": 0.39236992597579956, "learning_rate": 1.2956076308861385e-05, "loss": 0.1232, "step": 38909 }, { "epoch": 0.6940034958798559, "grad_norm": 0.21431805193424225, "learning_rate": 1.2954712357010494e-05, "loss": 0.0985, "step": 38910 }, { "epoch": 0.6940213320015696, "grad_norm": 0.3000572919845581, "learning_rate": 1.29533484518513e-05, "loss": 0.1362, "step": 38911 }, { "epoch": 0.6940391681232833, "grad_norm": 0.29470011591911316, "learning_rate": 1.2951984593389082e-05, "loss": 0.1309, "step": 38912 }, { "epoch": 0.694057004244997, "grad_norm": 0.22623272240161896, "learning_rate": 1.295062078162913e-05, "loss": 0.0798, "step": 38913 }, { "epoch": 0.6940748403667106, "grad_norm": 0.2676866948604584, "learning_rate": 1.294925701657674e-05, "loss": 0.1128, "step": 38914 }, { "epoch": 0.6940926764884243, "grad_norm": 0.2591826319694519, "learning_rate": 1.2947893298237185e-05, "loss": 0.1032, "step": 38915 }, { "epoch": 0.694110512610138, "grad_norm": 0.23538632690906525, "learning_rate": 1.2946529626615767e-05, "loss": 0.1331, "step": 38916 }, { "epoch": 0.6941283487318517, "grad_norm": 0.26029500365257263, "learning_rate": 1.2945166001717752e-05, "loss": 0.1133, "step": 38917 }, { "epoch": 0.6941461848535655, "grad_norm": 0.2218397855758667, "learning_rate": 1.2943802423548446e-05, "loss": 0.1238, "step": 38918 }, { "epoch": 0.6941640209752792, "grad_norm": 0.256335586309433, "learning_rate": 1.2942438892113129e-05, "loss": 0.1185, "step": 38919 }, { "epoch": 0.6941818570969929, "grad_norm": 0.26803240180015564, "learning_rate": 1.294107540741708e-05, "loss": 0.132, "step": 38920 }, { "epoch": 0.6941996932187066, "grad_norm": 0.3438301980495453, "learning_rate": 1.2939711969465589e-05, "loss": 0.1485, "step": 38921 }, { "epoch": 0.6942175293404202, "grad_norm": 0.19693708419799805, "learning_rate": 1.2938348578263934e-05, "loss": 0.0992, "step": 38922 }, { "epoch": 0.6942353654621339, "grad_norm": 0.46287044882774353, "learning_rate": 1.2936985233817411e-05, "loss": 0.1429, "step": 38923 }, { "epoch": 0.6942532015838476, "grad_norm": 0.24437500536441803, "learning_rate": 1.2935621936131304e-05, "loss": 0.1195, "step": 38924 }, { "epoch": 0.6942710377055613, "grad_norm": 0.3722871243953705, "learning_rate": 1.2934258685210887e-05, "loss": 0.0954, "step": 38925 }, { "epoch": 0.694288873827275, "grad_norm": 0.4296792447566986, "learning_rate": 1.2932895481061447e-05, "loss": 0.153, "step": 38926 }, { "epoch": 0.6943067099489887, "grad_norm": 0.21547015011310577, "learning_rate": 1.2931532323688278e-05, "loss": 0.0986, "step": 38927 }, { "epoch": 0.6943245460707024, "grad_norm": 0.3268570899963379, "learning_rate": 1.293016921309666e-05, "loss": 0.138, "step": 38928 }, { "epoch": 0.6943423821924161, "grad_norm": 0.30655694007873535, "learning_rate": 1.2928806149291865e-05, "loss": 0.1203, "step": 38929 }, { "epoch": 0.6943602183141298, "grad_norm": 0.2059771716594696, "learning_rate": 1.2927443132279186e-05, "loss": 0.1134, "step": 38930 }, { "epoch": 0.6943780544358434, "grad_norm": 0.20445942878723145, "learning_rate": 1.2926080162063917e-05, "loss": 0.0763, "step": 38931 }, { "epoch": 0.6943958905575571, "grad_norm": 0.2293536514043808, "learning_rate": 1.292471723865133e-05, "loss": 0.1181, "step": 38932 }, { "epoch": 0.6944137266792708, "grad_norm": 0.27545976638793945, "learning_rate": 1.2923354362046711e-05, "loss": 0.1101, "step": 38933 }, { "epoch": 0.6944315628009845, "grad_norm": 0.258241206407547, "learning_rate": 1.2921991532255343e-05, "loss": 0.1236, "step": 38934 }, { "epoch": 0.6944493989226983, "grad_norm": 0.2016286998987198, "learning_rate": 1.2920628749282495e-05, "loss": 0.0911, "step": 38935 }, { "epoch": 0.694467235044412, "grad_norm": 0.29968076944351196, "learning_rate": 1.2919266013133475e-05, "loss": 0.0789, "step": 38936 }, { "epoch": 0.6944850711661257, "grad_norm": 0.2846897840499878, "learning_rate": 1.2917903323813549e-05, "loss": 0.1421, "step": 38937 }, { "epoch": 0.6945029072878394, "grad_norm": 0.2914271056652069, "learning_rate": 1.2916540681328004e-05, "loss": 0.1312, "step": 38938 }, { "epoch": 0.694520743409553, "grad_norm": 0.2852572500705719, "learning_rate": 1.2915178085682112e-05, "loss": 0.096, "step": 38939 }, { "epoch": 0.6945385795312667, "grad_norm": 0.2819148302078247, "learning_rate": 1.2913815536881174e-05, "loss": 0.0864, "step": 38940 }, { "epoch": 0.6945564156529804, "grad_norm": 0.2402840554714203, "learning_rate": 1.291245303493046e-05, "loss": 0.1043, "step": 38941 }, { "epoch": 0.6945742517746941, "grad_norm": 0.3452276587486267, "learning_rate": 1.2911090579835244e-05, "loss": 0.1598, "step": 38942 }, { "epoch": 0.6945920878964078, "grad_norm": 0.17727108299732208, "learning_rate": 1.2909728171600824e-05, "loss": 0.0892, "step": 38943 }, { "epoch": 0.6946099240181215, "grad_norm": 0.2905954122543335, "learning_rate": 1.2908365810232465e-05, "loss": 0.1601, "step": 38944 }, { "epoch": 0.6946277601398352, "grad_norm": 0.32615017890930176, "learning_rate": 1.2907003495735467e-05, "loss": 0.1255, "step": 38945 }, { "epoch": 0.6946455962615489, "grad_norm": 0.31594735383987427, "learning_rate": 1.2905641228115101e-05, "loss": 0.1319, "step": 38946 }, { "epoch": 0.6946634323832626, "grad_norm": 0.3191177546977997, "learning_rate": 1.2904279007376644e-05, "loss": 0.1912, "step": 38947 }, { "epoch": 0.6946812685049762, "grad_norm": 0.19566045701503754, "learning_rate": 1.2902916833525369e-05, "loss": 0.0796, "step": 38948 }, { "epoch": 0.6946991046266899, "grad_norm": 0.27914050221443176, "learning_rate": 1.2901554706566581e-05, "loss": 0.1313, "step": 38949 }, { "epoch": 0.6947169407484036, "grad_norm": 0.3291770815849304, "learning_rate": 1.2900192626505541e-05, "loss": 0.1646, "step": 38950 }, { "epoch": 0.6947347768701173, "grad_norm": 0.2316390722990036, "learning_rate": 1.2898830593347538e-05, "loss": 0.1279, "step": 38951 }, { "epoch": 0.6947526129918311, "grad_norm": 0.2263980209827423, "learning_rate": 1.2897468607097835e-05, "loss": 0.0637, "step": 38952 }, { "epoch": 0.6947704491135448, "grad_norm": 0.2116321623325348, "learning_rate": 1.2896106667761732e-05, "loss": 0.1148, "step": 38953 }, { "epoch": 0.6947882852352585, "grad_norm": 0.3965723514556885, "learning_rate": 1.2894744775344503e-05, "loss": 0.1399, "step": 38954 }, { "epoch": 0.6948061213569722, "grad_norm": 0.2113034874200821, "learning_rate": 1.2893382929851423e-05, "loss": 0.1431, "step": 38955 }, { "epoch": 0.6948239574786859, "grad_norm": 0.21018700301647186, "learning_rate": 1.2892021131287763e-05, "loss": 0.0907, "step": 38956 }, { "epoch": 0.6948417936003995, "grad_norm": 0.35073137283325195, "learning_rate": 1.289065937965881e-05, "loss": 0.1327, "step": 38957 }, { "epoch": 0.6948596297221132, "grad_norm": 0.2525736689567566, "learning_rate": 1.2889297674969853e-05, "loss": 0.1081, "step": 38958 }, { "epoch": 0.6948774658438269, "grad_norm": 0.3054443895816803, "learning_rate": 1.2887936017226159e-05, "loss": 0.1325, "step": 38959 }, { "epoch": 0.6948953019655406, "grad_norm": 0.22958900034427643, "learning_rate": 1.2886574406433014e-05, "loss": 0.0847, "step": 38960 }, { "epoch": 0.6949131380872543, "grad_norm": 0.4076766073703766, "learning_rate": 1.2885212842595678e-05, "loss": 0.1571, "step": 38961 }, { "epoch": 0.694930974208968, "grad_norm": 0.23542045056819916, "learning_rate": 1.288385132571945e-05, "loss": 0.1066, "step": 38962 }, { "epoch": 0.6949488103306817, "grad_norm": 0.22955989837646484, "learning_rate": 1.2882489855809602e-05, "loss": 0.1229, "step": 38963 }, { "epoch": 0.6949666464523954, "grad_norm": 0.21638444066047668, "learning_rate": 1.2881128432871406e-05, "loss": 0.1171, "step": 38964 }, { "epoch": 0.694984482574109, "grad_norm": 0.21046389639377594, "learning_rate": 1.2879767056910133e-05, "loss": 0.083, "step": 38965 }, { "epoch": 0.6950023186958227, "grad_norm": 0.22104981541633606, "learning_rate": 1.2878405727931078e-05, "loss": 0.1171, "step": 38966 }, { "epoch": 0.6950201548175364, "grad_norm": 0.2039240151643753, "learning_rate": 1.287704444593951e-05, "loss": 0.071, "step": 38967 }, { "epoch": 0.6950379909392501, "grad_norm": 0.25528669357299805, "learning_rate": 1.2875683210940704e-05, "loss": 0.1463, "step": 38968 }, { "epoch": 0.6950558270609639, "grad_norm": 0.6063457727432251, "learning_rate": 1.2874322022939927e-05, "loss": 0.1915, "step": 38969 }, { "epoch": 0.6950736631826776, "grad_norm": 0.27676084637641907, "learning_rate": 1.2872960881942481e-05, "loss": 0.1479, "step": 38970 }, { "epoch": 0.6950914993043913, "grad_norm": 0.21087561547756195, "learning_rate": 1.2871599787953612e-05, "loss": 0.0934, "step": 38971 }, { "epoch": 0.695109335426105, "grad_norm": 0.4068029820919037, "learning_rate": 1.2870238740978623e-05, "loss": 0.1315, "step": 38972 }, { "epoch": 0.6951271715478187, "grad_norm": 0.28620705008506775, "learning_rate": 1.2868877741022778e-05, "loss": 0.1258, "step": 38973 }, { "epoch": 0.6951450076695324, "grad_norm": 0.2266935408115387, "learning_rate": 1.2867516788091342e-05, "loss": 0.1244, "step": 38974 }, { "epoch": 0.695162843791246, "grad_norm": 0.28696364164352417, "learning_rate": 1.2866155882189613e-05, "loss": 0.1277, "step": 38975 }, { "epoch": 0.6951806799129597, "grad_norm": 0.24171146750450134, "learning_rate": 1.2864795023322851e-05, "loss": 0.0879, "step": 38976 }, { "epoch": 0.6951985160346734, "grad_norm": 0.1903069168329239, "learning_rate": 1.2863434211496339e-05, "loss": 0.0619, "step": 38977 }, { "epoch": 0.6952163521563871, "grad_norm": 0.3499768078327179, "learning_rate": 1.2862073446715333e-05, "loss": 0.1452, "step": 38978 }, { "epoch": 0.6952341882781008, "grad_norm": 0.31532683968544006, "learning_rate": 1.2860712728985137e-05, "loss": 0.097, "step": 38979 }, { "epoch": 0.6952520243998145, "grad_norm": 0.4439140856266022, "learning_rate": 1.2859352058311011e-05, "loss": 0.1453, "step": 38980 }, { "epoch": 0.6952698605215282, "grad_norm": 0.21643291413784027, "learning_rate": 1.285799143469823e-05, "loss": 0.1435, "step": 38981 }, { "epoch": 0.6952876966432419, "grad_norm": 0.2755575180053711, "learning_rate": 1.2856630858152056e-05, "loss": 0.1594, "step": 38982 }, { "epoch": 0.6953055327649555, "grad_norm": 0.3381027579307556, "learning_rate": 1.2855270328677784e-05, "loss": 0.1605, "step": 38983 }, { "epoch": 0.6953233688866692, "grad_norm": 0.2376745492219925, "learning_rate": 1.285390984628067e-05, "loss": 0.1269, "step": 38984 }, { "epoch": 0.6953412050083829, "grad_norm": 0.2377690225839615, "learning_rate": 1.2852549410966009e-05, "loss": 0.1513, "step": 38985 }, { "epoch": 0.6953590411300967, "grad_norm": 0.3392479419708252, "learning_rate": 1.285118902273906e-05, "loss": 0.1363, "step": 38986 }, { "epoch": 0.6953768772518104, "grad_norm": 0.28568512201309204, "learning_rate": 1.2849828681605089e-05, "loss": 0.1166, "step": 38987 }, { "epoch": 0.6953947133735241, "grad_norm": 0.27334463596343994, "learning_rate": 1.2848468387569388e-05, "loss": 0.1503, "step": 38988 }, { "epoch": 0.6954125494952378, "grad_norm": 0.2065826803445816, "learning_rate": 1.284710814063722e-05, "loss": 0.1009, "step": 38989 }, { "epoch": 0.6954303856169515, "grad_norm": 0.24931828677654266, "learning_rate": 1.2845747940813857e-05, "loss": 0.0686, "step": 38990 }, { "epoch": 0.6954482217386652, "grad_norm": 0.3238942325115204, "learning_rate": 1.2844387788104567e-05, "loss": 0.1065, "step": 38991 }, { "epoch": 0.6954660578603789, "grad_norm": 0.2807333469390869, "learning_rate": 1.2843027682514635e-05, "loss": 0.1324, "step": 38992 }, { "epoch": 0.6954838939820925, "grad_norm": 0.3370315730571747, "learning_rate": 1.2841667624049331e-05, "loss": 0.1382, "step": 38993 }, { "epoch": 0.6955017301038062, "grad_norm": 0.237172931432724, "learning_rate": 1.2840307612713917e-05, "loss": 0.1084, "step": 38994 }, { "epoch": 0.6955195662255199, "grad_norm": 0.38796600699424744, "learning_rate": 1.2838947648513663e-05, "loss": 0.123, "step": 38995 }, { "epoch": 0.6955374023472336, "grad_norm": 0.28400012850761414, "learning_rate": 1.283758773145386e-05, "loss": 0.1085, "step": 38996 }, { "epoch": 0.6955552384689473, "grad_norm": 0.2858881652355194, "learning_rate": 1.2836227861539762e-05, "loss": 0.0826, "step": 38997 }, { "epoch": 0.695573074590661, "grad_norm": 0.4085495173931122, "learning_rate": 1.2834868038776644e-05, "loss": 0.1512, "step": 38998 }, { "epoch": 0.6955909107123747, "grad_norm": 0.3054777979850769, "learning_rate": 1.2833508263169783e-05, "loss": 0.179, "step": 38999 }, { "epoch": 0.6956087468340884, "grad_norm": 0.277476042509079, "learning_rate": 1.2832148534724439e-05, "loss": 0.0871, "step": 39000 }, { "epoch": 0.6956087468340884, "eval_loss": 0.11643356084823608, "eval_runtime": 106.9618, "eval_samples_per_second": 9.574, "eval_steps_per_second": 1.599, "step": 39000 }, { "epoch": 0.695626582955802, "grad_norm": 0.25121405720710754, "learning_rate": 1.28307888534459e-05, "loss": 0.1163, "step": 39001 }, { "epoch": 0.6956444190775157, "grad_norm": 0.21921661496162415, "learning_rate": 1.2829429219339423e-05, "loss": 0.1413, "step": 39002 }, { "epoch": 0.6956622551992295, "grad_norm": 0.20281681418418884, "learning_rate": 1.2828069632410286e-05, "loss": 0.0809, "step": 39003 }, { "epoch": 0.6956800913209432, "grad_norm": 0.21603518724441528, "learning_rate": 1.2826710092663747e-05, "loss": 0.133, "step": 39004 }, { "epoch": 0.6956979274426569, "grad_norm": 0.3089606463909149, "learning_rate": 1.2825350600105088e-05, "loss": 0.1543, "step": 39005 }, { "epoch": 0.6957157635643706, "grad_norm": 0.2746087312698364, "learning_rate": 1.2823991154739579e-05, "loss": 0.1284, "step": 39006 }, { "epoch": 0.6957335996860843, "grad_norm": 0.302846223115921, "learning_rate": 1.2822631756572484e-05, "loss": 0.0938, "step": 39007 }, { "epoch": 0.695751435807798, "grad_norm": 0.23596924543380737, "learning_rate": 1.2821272405609064e-05, "loss": 0.0848, "step": 39008 }, { "epoch": 0.6957692719295117, "grad_norm": 0.2683382034301758, "learning_rate": 1.281991310185461e-05, "loss": 0.0836, "step": 39009 }, { "epoch": 0.6957871080512253, "grad_norm": 0.21829615533351898, "learning_rate": 1.281855384531438e-05, "loss": 0.102, "step": 39010 }, { "epoch": 0.695804944172939, "grad_norm": 0.26865509152412415, "learning_rate": 1.281719463599364e-05, "loss": 0.1183, "step": 39011 }, { "epoch": 0.6958227802946527, "grad_norm": 0.3482833504676819, "learning_rate": 1.2815835473897655e-05, "loss": 0.1298, "step": 39012 }, { "epoch": 0.6958406164163664, "grad_norm": 0.27884677052497864, "learning_rate": 1.28144763590317e-05, "loss": 0.1173, "step": 39013 }, { "epoch": 0.6958584525380801, "grad_norm": 0.21725775301456451, "learning_rate": 1.281311729140105e-05, "loss": 0.1192, "step": 39014 }, { "epoch": 0.6958762886597938, "grad_norm": 0.27511894702911377, "learning_rate": 1.2811758271010968e-05, "loss": 0.1132, "step": 39015 }, { "epoch": 0.6958941247815075, "grad_norm": 0.23363026976585388, "learning_rate": 1.2810399297866721e-05, "loss": 0.0774, "step": 39016 }, { "epoch": 0.6959119609032212, "grad_norm": 0.25134822726249695, "learning_rate": 1.2809040371973568e-05, "loss": 0.1259, "step": 39017 }, { "epoch": 0.6959297970249348, "grad_norm": 0.25535765290260315, "learning_rate": 1.2807681493336792e-05, "loss": 0.1113, "step": 39018 }, { "epoch": 0.6959476331466486, "grad_norm": 0.2510998845100403, "learning_rate": 1.2806322661961656e-05, "loss": 0.0755, "step": 39019 }, { "epoch": 0.6959654692683623, "grad_norm": 0.281025230884552, "learning_rate": 1.280496387785342e-05, "loss": 0.0903, "step": 39020 }, { "epoch": 0.695983305390076, "grad_norm": 0.3614897131919861, "learning_rate": 1.2803605141017354e-05, "loss": 0.0657, "step": 39021 }, { "epoch": 0.6960011415117897, "grad_norm": 0.2231587916612625, "learning_rate": 1.2802246451458732e-05, "loss": 0.1364, "step": 39022 }, { "epoch": 0.6960189776335034, "grad_norm": 0.2773556709289551, "learning_rate": 1.2800887809182815e-05, "loss": 0.1211, "step": 39023 }, { "epoch": 0.6960368137552171, "grad_norm": 0.21469847857952118, "learning_rate": 1.2799529214194872e-05, "loss": 0.1084, "step": 39024 }, { "epoch": 0.6960546498769308, "grad_norm": 0.29153093695640564, "learning_rate": 1.2798170666500158e-05, "loss": 0.1268, "step": 39025 }, { "epoch": 0.6960724859986445, "grad_norm": 0.24175620079040527, "learning_rate": 1.2796812166103961e-05, "loss": 0.1118, "step": 39026 }, { "epoch": 0.6960903221203582, "grad_norm": 0.26647093892097473, "learning_rate": 1.2795453713011525e-05, "loss": 0.1282, "step": 39027 }, { "epoch": 0.6961081582420718, "grad_norm": 0.23232296109199524, "learning_rate": 1.279409530722813e-05, "loss": 0.1094, "step": 39028 }, { "epoch": 0.6961259943637855, "grad_norm": 0.2349260449409485, "learning_rate": 1.279273694875904e-05, "loss": 0.1065, "step": 39029 }, { "epoch": 0.6961438304854992, "grad_norm": 0.2272673398256302, "learning_rate": 1.2791378637609508e-05, "loss": 0.1101, "step": 39030 }, { "epoch": 0.6961616666072129, "grad_norm": 0.3893534243106842, "learning_rate": 1.2790020373784822e-05, "loss": 0.1434, "step": 39031 }, { "epoch": 0.6961795027289266, "grad_norm": 0.24964436888694763, "learning_rate": 1.2788662157290231e-05, "loss": 0.1003, "step": 39032 }, { "epoch": 0.6961973388506403, "grad_norm": 0.2861902713775635, "learning_rate": 1.2787303988131003e-05, "loss": 0.083, "step": 39033 }, { "epoch": 0.696215174972354, "grad_norm": 0.17898684740066528, "learning_rate": 1.2785945866312393e-05, "loss": 0.0843, "step": 39034 }, { "epoch": 0.6962330110940677, "grad_norm": 0.30016759037971497, "learning_rate": 1.2784587791839686e-05, "loss": 0.0726, "step": 39035 }, { "epoch": 0.6962508472157815, "grad_norm": 0.3075457811355591, "learning_rate": 1.2783229764718136e-05, "loss": 0.091, "step": 39036 }, { "epoch": 0.6962686833374951, "grad_norm": 0.4590596854686737, "learning_rate": 1.278187178495301e-05, "loss": 0.1639, "step": 39037 }, { "epoch": 0.6962865194592088, "grad_norm": 0.3507436513900757, "learning_rate": 1.2780513852549564e-05, "loss": 0.1888, "step": 39038 }, { "epoch": 0.6963043555809225, "grad_norm": 0.18821682035923004, "learning_rate": 1.2779155967513057e-05, "loss": 0.1158, "step": 39039 }, { "epoch": 0.6963221917026362, "grad_norm": 0.25792309641838074, "learning_rate": 1.2777798129848768e-05, "loss": 0.1029, "step": 39040 }, { "epoch": 0.6963400278243499, "grad_norm": 0.2089913934469223, "learning_rate": 1.2776440339561957e-05, "loss": 0.0993, "step": 39041 }, { "epoch": 0.6963578639460636, "grad_norm": 0.2833325266838074, "learning_rate": 1.2775082596657889e-05, "loss": 0.114, "step": 39042 }, { "epoch": 0.6963757000677773, "grad_norm": 0.27349862456321716, "learning_rate": 1.2773724901141815e-05, "loss": 0.109, "step": 39043 }, { "epoch": 0.696393536189491, "grad_norm": 0.34258368611335754, "learning_rate": 1.2772367253019014e-05, "loss": 0.1112, "step": 39044 }, { "epoch": 0.6964113723112046, "grad_norm": 0.2869550287723541, "learning_rate": 1.2771009652294741e-05, "loss": 0.0941, "step": 39045 }, { "epoch": 0.6964292084329183, "grad_norm": 0.31704646348953247, "learning_rate": 1.2769652098974261e-05, "loss": 0.1056, "step": 39046 }, { "epoch": 0.696447044554632, "grad_norm": 0.33068206906318665, "learning_rate": 1.2768294593062818e-05, "loss": 0.1395, "step": 39047 }, { "epoch": 0.6964648806763457, "grad_norm": 0.29258713126182556, "learning_rate": 1.2766937134565704e-05, "loss": 0.1912, "step": 39048 }, { "epoch": 0.6964827167980594, "grad_norm": 0.2919904887676239, "learning_rate": 1.2765579723488167e-05, "loss": 0.0935, "step": 39049 }, { "epoch": 0.6965005529197731, "grad_norm": 0.31889808177948, "learning_rate": 1.2764222359835468e-05, "loss": 0.1509, "step": 39050 }, { "epoch": 0.6965183890414868, "grad_norm": 0.2491060197353363, "learning_rate": 1.2762865043612868e-05, "loss": 0.0383, "step": 39051 }, { "epoch": 0.6965362251632005, "grad_norm": 0.20252561569213867, "learning_rate": 1.276150777482562e-05, "loss": 0.0704, "step": 39052 }, { "epoch": 0.6965540612849143, "grad_norm": 0.2772665321826935, "learning_rate": 1.2760150553479006e-05, "loss": 0.1465, "step": 39053 }, { "epoch": 0.696571897406628, "grad_norm": 0.24536865949630737, "learning_rate": 1.2758793379578266e-05, "loss": 0.1314, "step": 39054 }, { "epoch": 0.6965897335283416, "grad_norm": 0.26596805453300476, "learning_rate": 1.2757436253128679e-05, "loss": 0.1249, "step": 39055 }, { "epoch": 0.6966075696500553, "grad_norm": 0.17847612500190735, "learning_rate": 1.2756079174135489e-05, "loss": 0.0683, "step": 39056 }, { "epoch": 0.696625405771769, "grad_norm": 0.2210547775030136, "learning_rate": 1.2754722142603975e-05, "loss": 0.0879, "step": 39057 }, { "epoch": 0.6966432418934827, "grad_norm": 0.35463643074035645, "learning_rate": 1.2753365158539387e-05, "loss": 0.0852, "step": 39058 }, { "epoch": 0.6966610780151964, "grad_norm": 0.36180663108825684, "learning_rate": 1.2752008221946987e-05, "loss": 0.1528, "step": 39059 }, { "epoch": 0.6966789141369101, "grad_norm": 0.22196823358535767, "learning_rate": 1.2750651332832019e-05, "loss": 0.1089, "step": 39060 }, { "epoch": 0.6966967502586238, "grad_norm": 0.32768359780311584, "learning_rate": 1.274929449119977e-05, "loss": 0.1207, "step": 39061 }, { "epoch": 0.6967145863803375, "grad_norm": 0.30993857979774475, "learning_rate": 1.274793769705549e-05, "loss": 0.1051, "step": 39062 }, { "epoch": 0.6967324225020511, "grad_norm": 0.42698532342910767, "learning_rate": 1.274658095040443e-05, "loss": 0.0899, "step": 39063 }, { "epoch": 0.6967502586237648, "grad_norm": 0.26853567361831665, "learning_rate": 1.2745224251251858e-05, "loss": 0.1185, "step": 39064 }, { "epoch": 0.6967680947454785, "grad_norm": 0.24525515735149384, "learning_rate": 1.2743867599603019e-05, "loss": 0.1257, "step": 39065 }, { "epoch": 0.6967859308671922, "grad_norm": 0.21994318068027496, "learning_rate": 1.2742510995463192e-05, "loss": 0.0964, "step": 39066 }, { "epoch": 0.6968037669889059, "grad_norm": 0.3300603926181793, "learning_rate": 1.2741154438837616e-05, "loss": 0.1464, "step": 39067 }, { "epoch": 0.6968216031106196, "grad_norm": 0.18020690977573395, "learning_rate": 1.2739797929731567e-05, "loss": 0.0966, "step": 39068 }, { "epoch": 0.6968394392323333, "grad_norm": 0.2755752503871918, "learning_rate": 1.2738441468150286e-05, "loss": 0.1517, "step": 39069 }, { "epoch": 0.6968572753540471, "grad_norm": 0.26277220249176025, "learning_rate": 1.2737085054099055e-05, "loss": 0.1213, "step": 39070 }, { "epoch": 0.6968751114757608, "grad_norm": 0.2911570966243744, "learning_rate": 1.2735728687583116e-05, "loss": 0.1182, "step": 39071 }, { "epoch": 0.6968929475974744, "grad_norm": 0.2323385626077652, "learning_rate": 1.2734372368607728e-05, "loss": 0.0992, "step": 39072 }, { "epoch": 0.6969107837191881, "grad_norm": 0.30016231536865234, "learning_rate": 1.2733016097178139e-05, "loss": 0.147, "step": 39073 }, { "epoch": 0.6969286198409018, "grad_norm": 0.27335840463638306, "learning_rate": 1.2731659873299625e-05, "loss": 0.0779, "step": 39074 }, { "epoch": 0.6969464559626155, "grad_norm": 0.23359227180480957, "learning_rate": 1.2730303696977436e-05, "loss": 0.1245, "step": 39075 }, { "epoch": 0.6969642920843292, "grad_norm": 0.4167681038379669, "learning_rate": 1.2728947568216828e-05, "loss": 0.1173, "step": 39076 }, { "epoch": 0.6969821282060429, "grad_norm": 0.33337992429733276, "learning_rate": 1.2727591487023057e-05, "loss": 0.1451, "step": 39077 }, { "epoch": 0.6969999643277566, "grad_norm": 0.2162632793188095, "learning_rate": 1.2726235453401369e-05, "loss": 0.1221, "step": 39078 }, { "epoch": 0.6970178004494703, "grad_norm": 0.2172084003686905, "learning_rate": 1.2724879467357046e-05, "loss": 0.1473, "step": 39079 }, { "epoch": 0.697035636571184, "grad_norm": 0.2695470154285431, "learning_rate": 1.2723523528895327e-05, "loss": 0.1108, "step": 39080 }, { "epoch": 0.6970534726928976, "grad_norm": 0.260147362947464, "learning_rate": 1.272216763802146e-05, "loss": 0.1053, "step": 39081 }, { "epoch": 0.6970713088146113, "grad_norm": 0.37548601627349854, "learning_rate": 1.2720811794740723e-05, "loss": 0.1014, "step": 39082 }, { "epoch": 0.697089144936325, "grad_norm": 0.3185540437698364, "learning_rate": 1.2719455999058349e-05, "loss": 0.1172, "step": 39083 }, { "epoch": 0.6971069810580387, "grad_norm": 0.23268212378025055, "learning_rate": 1.2718100250979619e-05, "loss": 0.1018, "step": 39084 }, { "epoch": 0.6971248171797524, "grad_norm": 0.33350467681884766, "learning_rate": 1.271674455050977e-05, "loss": 0.1951, "step": 39085 }, { "epoch": 0.6971426533014661, "grad_norm": 0.2379380464553833, "learning_rate": 1.2715388897654058e-05, "loss": 0.1579, "step": 39086 }, { "epoch": 0.6971604894231799, "grad_norm": 0.22094188630580902, "learning_rate": 1.2714033292417743e-05, "loss": 0.0904, "step": 39087 }, { "epoch": 0.6971783255448936, "grad_norm": 0.1776876002550125, "learning_rate": 1.2712677734806082e-05, "loss": 0.0871, "step": 39088 }, { "epoch": 0.6971961616666073, "grad_norm": 0.2052459567785263, "learning_rate": 1.271132222482433e-05, "loss": 0.0219, "step": 39089 }, { "epoch": 0.6972139977883209, "grad_norm": 0.3054584264755249, "learning_rate": 1.2709966762477731e-05, "loss": 0.1223, "step": 39090 }, { "epoch": 0.6972318339100346, "grad_norm": 0.25725704431533813, "learning_rate": 1.270861134777154e-05, "loss": 0.1078, "step": 39091 }, { "epoch": 0.6972496700317483, "grad_norm": 0.28233176469802856, "learning_rate": 1.2707255980711024e-05, "loss": 0.0899, "step": 39092 }, { "epoch": 0.697267506153462, "grad_norm": 0.2727392911911011, "learning_rate": 1.2705900661301434e-05, "loss": 0.1366, "step": 39093 }, { "epoch": 0.6972853422751757, "grad_norm": 0.28299516439437866, "learning_rate": 1.2704545389548006e-05, "loss": 0.1351, "step": 39094 }, { "epoch": 0.6973031783968894, "grad_norm": 0.24756821990013123, "learning_rate": 1.2703190165456016e-05, "loss": 0.1183, "step": 39095 }, { "epoch": 0.6973210145186031, "grad_norm": 0.2845204174518585, "learning_rate": 1.27018349890307e-05, "loss": 0.1456, "step": 39096 }, { "epoch": 0.6973388506403168, "grad_norm": 0.2900078594684601, "learning_rate": 1.2700479860277328e-05, "loss": 0.1525, "step": 39097 }, { "epoch": 0.6973566867620304, "grad_norm": 0.2275385856628418, "learning_rate": 1.2699124779201144e-05, "loss": 0.1277, "step": 39098 }, { "epoch": 0.6973745228837441, "grad_norm": 0.25815024971961975, "learning_rate": 1.2697769745807391e-05, "loss": 0.118, "step": 39099 }, { "epoch": 0.6973923590054578, "grad_norm": 0.23723527789115906, "learning_rate": 1.2696414760101344e-05, "loss": 0.1031, "step": 39100 }, { "epoch": 0.6974101951271715, "grad_norm": 0.24545933306217194, "learning_rate": 1.269505982208824e-05, "loss": 0.1293, "step": 39101 }, { "epoch": 0.6974280312488852, "grad_norm": 0.39338332414627075, "learning_rate": 1.2693704931773336e-05, "loss": 0.1102, "step": 39102 }, { "epoch": 0.6974458673705989, "grad_norm": 0.2605094313621521, "learning_rate": 1.269235008916188e-05, "loss": 0.1322, "step": 39103 }, { "epoch": 0.6974637034923127, "grad_norm": 0.28584545850753784, "learning_rate": 1.2690995294259118e-05, "loss": 0.1563, "step": 39104 }, { "epoch": 0.6974815396140264, "grad_norm": 0.2810380160808563, "learning_rate": 1.268964054707032e-05, "loss": 0.0751, "step": 39105 }, { "epoch": 0.6974993757357401, "grad_norm": 0.22144712507724762, "learning_rate": 1.2688285847600726e-05, "loss": 0.135, "step": 39106 }, { "epoch": 0.6975172118574537, "grad_norm": 0.2636159360408783, "learning_rate": 1.2686931195855587e-05, "loss": 0.1024, "step": 39107 }, { "epoch": 0.6975350479791674, "grad_norm": 0.20967800915241241, "learning_rate": 1.2685576591840149e-05, "loss": 0.0919, "step": 39108 }, { "epoch": 0.6975528841008811, "grad_norm": 0.1997174620628357, "learning_rate": 1.2684222035559676e-05, "loss": 0.0903, "step": 39109 }, { "epoch": 0.6975707202225948, "grad_norm": 0.35322344303131104, "learning_rate": 1.2682867527019404e-05, "loss": 0.1153, "step": 39110 }, { "epoch": 0.6975885563443085, "grad_norm": 0.2915917634963989, "learning_rate": 1.2681513066224598e-05, "loss": 0.1418, "step": 39111 }, { "epoch": 0.6976063924660222, "grad_norm": 0.3704468011856079, "learning_rate": 1.2680158653180497e-05, "loss": 0.1629, "step": 39112 }, { "epoch": 0.6976242285877359, "grad_norm": 0.28715503215789795, "learning_rate": 1.2678804287892366e-05, "loss": 0.1047, "step": 39113 }, { "epoch": 0.6976420647094496, "grad_norm": 0.2410965859889984, "learning_rate": 1.2677449970365441e-05, "loss": 0.1601, "step": 39114 }, { "epoch": 0.6976599008311632, "grad_norm": 0.3165033459663391, "learning_rate": 1.2676095700604978e-05, "loss": 0.1081, "step": 39115 }, { "epoch": 0.6976777369528769, "grad_norm": 0.21846450865268707, "learning_rate": 1.2674741478616226e-05, "loss": 0.1206, "step": 39116 }, { "epoch": 0.6976955730745906, "grad_norm": 0.25390514731407166, "learning_rate": 1.2673387304404421e-05, "loss": 0.1171, "step": 39117 }, { "epoch": 0.6977134091963043, "grad_norm": 0.2805967330932617, "learning_rate": 1.2672033177974834e-05, "loss": 0.1486, "step": 39118 }, { "epoch": 0.697731245318018, "grad_norm": 0.25417935848236084, "learning_rate": 1.2670679099332707e-05, "loss": 0.0773, "step": 39119 }, { "epoch": 0.6977490814397318, "grad_norm": 0.23024597764015198, "learning_rate": 1.2669325068483284e-05, "loss": 0.1247, "step": 39120 }, { "epoch": 0.6977669175614455, "grad_norm": 0.25610220432281494, "learning_rate": 1.266797108543181e-05, "loss": 0.1756, "step": 39121 }, { "epoch": 0.6977847536831592, "grad_norm": 0.27678313851356506, "learning_rate": 1.2666617150183546e-05, "loss": 0.0853, "step": 39122 }, { "epoch": 0.6978025898048729, "grad_norm": 0.23821666836738586, "learning_rate": 1.2665263262743724e-05, "loss": 0.1092, "step": 39123 }, { "epoch": 0.6978204259265866, "grad_norm": 0.20323744416236877, "learning_rate": 1.266390942311761e-05, "loss": 0.1208, "step": 39124 }, { "epoch": 0.6978382620483002, "grad_norm": 0.23441927134990692, "learning_rate": 1.2662555631310435e-05, "loss": 0.1169, "step": 39125 }, { "epoch": 0.6978560981700139, "grad_norm": 0.23083360493183136, "learning_rate": 1.2661201887327468e-05, "loss": 0.107, "step": 39126 }, { "epoch": 0.6978739342917276, "grad_norm": 0.27246028184890747, "learning_rate": 1.2659848191173942e-05, "loss": 0.1234, "step": 39127 }, { "epoch": 0.6978917704134413, "grad_norm": 0.3090742826461792, "learning_rate": 1.265849454285511e-05, "loss": 0.0892, "step": 39128 }, { "epoch": 0.697909606535155, "grad_norm": 0.25338998436927795, "learning_rate": 1.2657140942376211e-05, "loss": 0.1229, "step": 39129 }, { "epoch": 0.6979274426568687, "grad_norm": 0.28995487093925476, "learning_rate": 1.2655787389742488e-05, "loss": 0.134, "step": 39130 }, { "epoch": 0.6979452787785824, "grad_norm": 0.34188976883888245, "learning_rate": 1.2654433884959207e-05, "loss": 0.0916, "step": 39131 }, { "epoch": 0.697963114900296, "grad_norm": 0.30309998989105225, "learning_rate": 1.2653080428031604e-05, "loss": 0.0736, "step": 39132 }, { "epoch": 0.6979809510220097, "grad_norm": 0.2233080267906189, "learning_rate": 1.2651727018964925e-05, "loss": 0.0697, "step": 39133 }, { "epoch": 0.6979987871437234, "grad_norm": 0.2173866331577301, "learning_rate": 1.2650373657764409e-05, "loss": 0.1052, "step": 39134 }, { "epoch": 0.6980166232654371, "grad_norm": 0.23665744066238403, "learning_rate": 1.264902034443532e-05, "loss": 0.1151, "step": 39135 }, { "epoch": 0.6980344593871508, "grad_norm": 0.2823314964771271, "learning_rate": 1.2647667078982894e-05, "loss": 0.1351, "step": 39136 }, { "epoch": 0.6980522955088646, "grad_norm": 0.2626916766166687, "learning_rate": 1.2646313861412368e-05, "loss": 0.1009, "step": 39137 }, { "epoch": 0.6980701316305783, "grad_norm": 0.23058198392391205, "learning_rate": 1.2644960691728991e-05, "loss": 0.1272, "step": 39138 }, { "epoch": 0.698087967752292, "grad_norm": 0.25708097219467163, "learning_rate": 1.2643607569938029e-05, "loss": 0.0992, "step": 39139 }, { "epoch": 0.6981058038740057, "grad_norm": 0.40056949853897095, "learning_rate": 1.2642254496044708e-05, "loss": 0.0999, "step": 39140 }, { "epoch": 0.6981236399957194, "grad_norm": 0.2038673758506775, "learning_rate": 1.2640901470054279e-05, "loss": 0.0929, "step": 39141 }, { "epoch": 0.698141476117433, "grad_norm": 0.3673347532749176, "learning_rate": 1.2639548491971987e-05, "loss": 0.1172, "step": 39142 }, { "epoch": 0.6981593122391467, "grad_norm": 0.4534304440021515, "learning_rate": 1.2638195561803062e-05, "loss": 0.2537, "step": 39143 }, { "epoch": 0.6981771483608604, "grad_norm": 0.2370782345533371, "learning_rate": 1.2636842679552769e-05, "loss": 0.1275, "step": 39144 }, { "epoch": 0.6981949844825741, "grad_norm": 0.3435683846473694, "learning_rate": 1.2635489845226344e-05, "loss": 0.1769, "step": 39145 }, { "epoch": 0.6982128206042878, "grad_norm": 0.27369722723960876, "learning_rate": 1.2634137058829031e-05, "loss": 0.1079, "step": 39146 }, { "epoch": 0.6982306567260015, "grad_norm": 0.27731940150260925, "learning_rate": 1.2632784320366065e-05, "loss": 0.1104, "step": 39147 }, { "epoch": 0.6982484928477152, "grad_norm": 0.2214890867471695, "learning_rate": 1.263143162984271e-05, "loss": 0.1021, "step": 39148 }, { "epoch": 0.6982663289694289, "grad_norm": 0.30670398473739624, "learning_rate": 1.2630078987264196e-05, "loss": 0.1756, "step": 39149 }, { "epoch": 0.6982841650911426, "grad_norm": 0.29138460755348206, "learning_rate": 1.2628726392635759e-05, "loss": 0.1317, "step": 39150 }, { "epoch": 0.6983020012128562, "grad_norm": 0.2378193736076355, "learning_rate": 1.2627373845962658e-05, "loss": 0.1328, "step": 39151 }, { "epoch": 0.6983198373345699, "grad_norm": 0.17932955920696259, "learning_rate": 1.2626021347250125e-05, "loss": 0.1007, "step": 39152 }, { "epoch": 0.6983376734562836, "grad_norm": 0.29061880707740784, "learning_rate": 1.2624668896503414e-05, "loss": 0.1483, "step": 39153 }, { "epoch": 0.6983555095779974, "grad_norm": 0.31485599279403687, "learning_rate": 1.262331649372776e-05, "loss": 0.1508, "step": 39154 }, { "epoch": 0.6983733456997111, "grad_norm": 0.22489584982395172, "learning_rate": 1.2621964138928408e-05, "loss": 0.1051, "step": 39155 }, { "epoch": 0.6983911818214248, "grad_norm": 0.2558549642562866, "learning_rate": 1.2620611832110587e-05, "loss": 0.1241, "step": 39156 }, { "epoch": 0.6984090179431385, "grad_norm": 0.25634926557540894, "learning_rate": 1.261925957327956e-05, "loss": 0.1136, "step": 39157 }, { "epoch": 0.6984268540648522, "grad_norm": 0.29607006907463074, "learning_rate": 1.2617907362440562e-05, "loss": 0.127, "step": 39158 }, { "epoch": 0.6984446901865659, "grad_norm": 0.2850937843322754, "learning_rate": 1.2616555199598829e-05, "loss": 0.1123, "step": 39159 }, { "epoch": 0.6984625263082795, "grad_norm": 0.30544018745422363, "learning_rate": 1.2615203084759594e-05, "loss": 0.1169, "step": 39160 }, { "epoch": 0.6984803624299932, "grad_norm": 0.3253500759601593, "learning_rate": 1.2613851017928119e-05, "loss": 0.1083, "step": 39161 }, { "epoch": 0.6984981985517069, "grad_norm": 0.4971618056297302, "learning_rate": 1.261249899910964e-05, "loss": 0.1339, "step": 39162 }, { "epoch": 0.6985160346734206, "grad_norm": 0.4263477623462677, "learning_rate": 1.2611147028309388e-05, "loss": 0.099, "step": 39163 }, { "epoch": 0.6985338707951343, "grad_norm": 0.27318835258483887, "learning_rate": 1.2609795105532604e-05, "loss": 0.1003, "step": 39164 }, { "epoch": 0.698551706916848, "grad_norm": 0.27858197689056396, "learning_rate": 1.2608443230784539e-05, "loss": 0.1171, "step": 39165 }, { "epoch": 0.6985695430385617, "grad_norm": 0.247537761926651, "learning_rate": 1.2607091404070424e-05, "loss": 0.0919, "step": 39166 }, { "epoch": 0.6985873791602754, "grad_norm": 0.22903065383434296, "learning_rate": 1.2605739625395508e-05, "loss": 0.0576, "step": 39167 }, { "epoch": 0.698605215281989, "grad_norm": 0.29690563678741455, "learning_rate": 1.2604387894765026e-05, "loss": 0.1427, "step": 39168 }, { "epoch": 0.6986230514037027, "grad_norm": 0.2974201440811157, "learning_rate": 1.2603036212184211e-05, "loss": 0.1356, "step": 39169 }, { "epoch": 0.6986408875254164, "grad_norm": 0.21565239131450653, "learning_rate": 1.2601684577658318e-05, "loss": 0.1257, "step": 39170 }, { "epoch": 0.6986587236471302, "grad_norm": 0.25304117798805237, "learning_rate": 1.2600332991192576e-05, "loss": 0.1522, "step": 39171 }, { "epoch": 0.6986765597688439, "grad_norm": 0.1776355504989624, "learning_rate": 1.259898145279223e-05, "loss": 0.1016, "step": 39172 }, { "epoch": 0.6986943958905576, "grad_norm": 0.19958031177520752, "learning_rate": 1.2597629962462502e-05, "loss": 0.1285, "step": 39173 }, { "epoch": 0.6987122320122713, "grad_norm": 0.27183884382247925, "learning_rate": 1.2596278520208654e-05, "loss": 0.1274, "step": 39174 }, { "epoch": 0.698730068133985, "grad_norm": 0.2655390202999115, "learning_rate": 1.2594927126035917e-05, "loss": 0.1208, "step": 39175 }, { "epoch": 0.6987479042556987, "grad_norm": 0.2636301517486572, "learning_rate": 1.2593575779949524e-05, "loss": 0.1032, "step": 39176 }, { "epoch": 0.6987657403774123, "grad_norm": 0.25221994519233704, "learning_rate": 1.2592224481954707e-05, "loss": 0.135, "step": 39177 }, { "epoch": 0.698783576499126, "grad_norm": 0.18371526896953583, "learning_rate": 1.2590873232056724e-05, "loss": 0.1019, "step": 39178 }, { "epoch": 0.6988014126208397, "grad_norm": 0.27536481618881226, "learning_rate": 1.2589522030260791e-05, "loss": 0.0696, "step": 39179 }, { "epoch": 0.6988192487425534, "grad_norm": 0.2518484890460968, "learning_rate": 1.258817087657217e-05, "loss": 0.1101, "step": 39180 }, { "epoch": 0.6988370848642671, "grad_norm": 0.29254433512687683, "learning_rate": 1.2586819770996083e-05, "loss": 0.114, "step": 39181 }, { "epoch": 0.6988549209859808, "grad_norm": 0.29313719272613525, "learning_rate": 1.2585468713537762e-05, "loss": 0.1204, "step": 39182 }, { "epoch": 0.6988727571076945, "grad_norm": 0.2611317038536072, "learning_rate": 1.2584117704202459e-05, "loss": 0.1307, "step": 39183 }, { "epoch": 0.6988905932294082, "grad_norm": 0.28403565287590027, "learning_rate": 1.2582766742995405e-05, "loss": 0.1517, "step": 39184 }, { "epoch": 0.6989084293511219, "grad_norm": 0.3277135193347931, "learning_rate": 1.2581415829921839e-05, "loss": 0.102, "step": 39185 }, { "epoch": 0.6989262654728355, "grad_norm": 0.28685420751571655, "learning_rate": 1.2580064964986981e-05, "loss": 0.1076, "step": 39186 }, { "epoch": 0.6989441015945492, "grad_norm": 0.27156829833984375, "learning_rate": 1.2578714148196092e-05, "loss": 0.1213, "step": 39187 }, { "epoch": 0.698961937716263, "grad_norm": 0.26629018783569336, "learning_rate": 1.2577363379554396e-05, "loss": 0.151, "step": 39188 }, { "epoch": 0.6989797738379767, "grad_norm": 0.24965910613536835, "learning_rate": 1.2576012659067133e-05, "loss": 0.1081, "step": 39189 }, { "epoch": 0.6989976099596904, "grad_norm": 0.26446905732154846, "learning_rate": 1.2574661986739528e-05, "loss": 0.1183, "step": 39190 }, { "epoch": 0.6990154460814041, "grad_norm": 0.26350221037864685, "learning_rate": 1.2573311362576828e-05, "loss": 0.1316, "step": 39191 }, { "epoch": 0.6990332822031178, "grad_norm": 0.28374266624450684, "learning_rate": 1.257196078658427e-05, "loss": 0.1443, "step": 39192 }, { "epoch": 0.6990511183248315, "grad_norm": 0.19795873761177063, "learning_rate": 1.2570610258767073e-05, "loss": 0.1089, "step": 39193 }, { "epoch": 0.6990689544465452, "grad_norm": 0.20379579067230225, "learning_rate": 1.2569259779130493e-05, "loss": 0.0769, "step": 39194 }, { "epoch": 0.6990867905682588, "grad_norm": 0.25196146965026855, "learning_rate": 1.2567909347679745e-05, "loss": 0.1179, "step": 39195 }, { "epoch": 0.6991046266899725, "grad_norm": 0.23913459479808807, "learning_rate": 1.2566558964420088e-05, "loss": 0.1201, "step": 39196 }, { "epoch": 0.6991224628116862, "grad_norm": 0.2760443687438965, "learning_rate": 1.256520862935674e-05, "loss": 0.1376, "step": 39197 }, { "epoch": 0.6991402989333999, "grad_norm": 0.22437980771064758, "learning_rate": 1.2563858342494938e-05, "loss": 0.0982, "step": 39198 }, { "epoch": 0.6991581350551136, "grad_norm": 0.3428071141242981, "learning_rate": 1.2562508103839908e-05, "loss": 0.2368, "step": 39199 }, { "epoch": 0.6991759711768273, "grad_norm": 0.2954291105270386, "learning_rate": 1.2561157913396898e-05, "loss": 0.1451, "step": 39200 }, { "epoch": 0.699193807298541, "grad_norm": 0.29232439398765564, "learning_rate": 1.2559807771171139e-05, "loss": 0.1573, "step": 39201 }, { "epoch": 0.6992116434202547, "grad_norm": 0.26184868812561035, "learning_rate": 1.2558457677167865e-05, "loss": 0.1501, "step": 39202 }, { "epoch": 0.6992294795419683, "grad_norm": 0.23924040794372559, "learning_rate": 1.2557107631392292e-05, "loss": 0.1196, "step": 39203 }, { "epoch": 0.699247315663682, "grad_norm": 0.3905337452888489, "learning_rate": 1.255575763384968e-05, "loss": 0.1632, "step": 39204 }, { "epoch": 0.6992651517853958, "grad_norm": 0.31102848052978516, "learning_rate": 1.255440768454525e-05, "loss": 0.1394, "step": 39205 }, { "epoch": 0.6992829879071095, "grad_norm": 0.2870670557022095, "learning_rate": 1.255305778348422e-05, "loss": 0.078, "step": 39206 }, { "epoch": 0.6993008240288232, "grad_norm": 0.20866712927818298, "learning_rate": 1.2551707930671852e-05, "loss": 0.0957, "step": 39207 }, { "epoch": 0.6993186601505369, "grad_norm": 0.2362719178199768, "learning_rate": 1.255035812611335e-05, "loss": 0.1078, "step": 39208 }, { "epoch": 0.6993364962722506, "grad_norm": 0.24133490025997162, "learning_rate": 1.254900836981397e-05, "loss": 0.1107, "step": 39209 }, { "epoch": 0.6993543323939643, "grad_norm": 0.3059801757335663, "learning_rate": 1.2547658661778937e-05, "loss": 0.1035, "step": 39210 }, { "epoch": 0.699372168515678, "grad_norm": 0.22538863122463226, "learning_rate": 1.2546309002013479e-05, "loss": 0.0685, "step": 39211 }, { "epoch": 0.6993900046373916, "grad_norm": 0.2707521319389343, "learning_rate": 1.2544959390522815e-05, "loss": 0.1392, "step": 39212 }, { "epoch": 0.6994078407591053, "grad_norm": 0.21140368282794952, "learning_rate": 1.2543609827312203e-05, "loss": 0.0977, "step": 39213 }, { "epoch": 0.699425676880819, "grad_norm": 0.26786288619041443, "learning_rate": 1.2542260312386861e-05, "loss": 0.1379, "step": 39214 }, { "epoch": 0.6994435130025327, "grad_norm": 0.2541566491127014, "learning_rate": 1.2540910845752025e-05, "loss": 0.1169, "step": 39215 }, { "epoch": 0.6994613491242464, "grad_norm": 0.2691229581832886, "learning_rate": 1.2539561427412904e-05, "loss": 0.1108, "step": 39216 }, { "epoch": 0.6994791852459601, "grad_norm": 0.298031747341156, "learning_rate": 1.2538212057374765e-05, "loss": 0.1111, "step": 39217 }, { "epoch": 0.6994970213676738, "grad_norm": 0.21651975810527802, "learning_rate": 1.2536862735642812e-05, "loss": 0.1161, "step": 39218 }, { "epoch": 0.6995148574893875, "grad_norm": 0.25400853157043457, "learning_rate": 1.2535513462222289e-05, "loss": 0.1181, "step": 39219 }, { "epoch": 0.6995326936111012, "grad_norm": 0.3091680407524109, "learning_rate": 1.2534164237118413e-05, "loss": 0.1871, "step": 39220 }, { "epoch": 0.699550529732815, "grad_norm": 0.2525642216205597, "learning_rate": 1.2532815060336417e-05, "loss": 0.1367, "step": 39221 }, { "epoch": 0.6995683658545286, "grad_norm": 0.25376302003860474, "learning_rate": 1.2531465931881548e-05, "loss": 0.1557, "step": 39222 }, { "epoch": 0.6995862019762423, "grad_norm": 0.22580528259277344, "learning_rate": 1.2530116851759021e-05, "loss": 0.1387, "step": 39223 }, { "epoch": 0.699604038097956, "grad_norm": 0.22035124897956848, "learning_rate": 1.2528767819974072e-05, "loss": 0.1301, "step": 39224 }, { "epoch": 0.6996218742196697, "grad_norm": 0.35134023427963257, "learning_rate": 1.2527418836531913e-05, "loss": 0.161, "step": 39225 }, { "epoch": 0.6996397103413834, "grad_norm": 0.2618481516838074, "learning_rate": 1.2526069901437798e-05, "loss": 0.095, "step": 39226 }, { "epoch": 0.6996575464630971, "grad_norm": 0.211517795920372, "learning_rate": 1.2524721014696945e-05, "loss": 0.0997, "step": 39227 }, { "epoch": 0.6996753825848108, "grad_norm": 0.24371327459812164, "learning_rate": 1.252337217631458e-05, "loss": 0.1307, "step": 39228 }, { "epoch": 0.6996932187065245, "grad_norm": 0.19230906665325165, "learning_rate": 1.2522023386295928e-05, "loss": 0.1067, "step": 39229 }, { "epoch": 0.6997110548282381, "grad_norm": 0.2598012685775757, "learning_rate": 1.252067464464623e-05, "loss": 0.1581, "step": 39230 }, { "epoch": 0.6997288909499518, "grad_norm": 0.25412869453430176, "learning_rate": 1.2519325951370709e-05, "loss": 0.1246, "step": 39231 }, { "epoch": 0.6997467270716655, "grad_norm": 0.40220728516578674, "learning_rate": 1.2517977306474587e-05, "loss": 0.178, "step": 39232 }, { "epoch": 0.6997645631933792, "grad_norm": 0.18672268092632294, "learning_rate": 1.251662870996309e-05, "loss": 0.0781, "step": 39233 }, { "epoch": 0.6997823993150929, "grad_norm": 0.30113136768341064, "learning_rate": 1.2515280161841461e-05, "loss": 0.1077, "step": 39234 }, { "epoch": 0.6998002354368066, "grad_norm": 0.2578449547290802, "learning_rate": 1.2513931662114908e-05, "loss": 0.0871, "step": 39235 }, { "epoch": 0.6998180715585203, "grad_norm": 0.22928424179553986, "learning_rate": 1.2512583210788678e-05, "loss": 0.1166, "step": 39236 }, { "epoch": 0.699835907680234, "grad_norm": 0.33365604281425476, "learning_rate": 1.2511234807867988e-05, "loss": 0.1636, "step": 39237 }, { "epoch": 0.6998537438019478, "grad_norm": 0.19825081527233124, "learning_rate": 1.2509886453358058e-05, "loss": 0.1199, "step": 39238 }, { "epoch": 0.6998715799236614, "grad_norm": 0.30040496587753296, "learning_rate": 1.250853814726413e-05, "loss": 0.1744, "step": 39239 }, { "epoch": 0.6998894160453751, "grad_norm": 0.2596457004547119, "learning_rate": 1.2507189889591422e-05, "loss": 0.0891, "step": 39240 }, { "epoch": 0.6999072521670888, "grad_norm": 0.3632284998893738, "learning_rate": 1.2505841680345163e-05, "loss": 0.1023, "step": 39241 }, { "epoch": 0.6999250882888025, "grad_norm": 0.3388392925262451, "learning_rate": 1.2504493519530563e-05, "loss": 0.1145, "step": 39242 }, { "epoch": 0.6999429244105162, "grad_norm": 0.21876616775989532, "learning_rate": 1.2503145407152878e-05, "loss": 0.0655, "step": 39243 }, { "epoch": 0.6999607605322299, "grad_norm": 0.22531850636005402, "learning_rate": 1.2501797343217314e-05, "loss": 0.0841, "step": 39244 }, { "epoch": 0.6999785966539436, "grad_norm": 0.24790534377098083, "learning_rate": 1.25004493277291e-05, "loss": 0.0704, "step": 39245 }, { "epoch": 0.6999964327756573, "grad_norm": 0.2611728608608246, "learning_rate": 1.2499101360693461e-05, "loss": 0.1389, "step": 39246 }, { "epoch": 0.700014268897371, "grad_norm": 0.26006975769996643, "learning_rate": 1.2497753442115615e-05, "loss": 0.1344, "step": 39247 }, { "epoch": 0.7000321050190846, "grad_norm": 0.23054149746894836, "learning_rate": 1.2496405572000793e-05, "loss": 0.0992, "step": 39248 }, { "epoch": 0.7000499411407983, "grad_norm": 0.24358099699020386, "learning_rate": 1.2495057750354234e-05, "loss": 0.136, "step": 39249 }, { "epoch": 0.700067777262512, "grad_norm": 0.3306012749671936, "learning_rate": 1.2493709977181149e-05, "loss": 0.0956, "step": 39250 }, { "epoch": 0.7000856133842257, "grad_norm": 0.27693748474121094, "learning_rate": 1.2492362252486752e-05, "loss": 0.1181, "step": 39251 }, { "epoch": 0.7001034495059394, "grad_norm": 0.24711725115776062, "learning_rate": 1.2491014576276288e-05, "loss": 0.0543, "step": 39252 }, { "epoch": 0.7001212856276531, "grad_norm": 0.3434850871562958, "learning_rate": 1.2489666948554976e-05, "loss": 0.1315, "step": 39253 }, { "epoch": 0.7001391217493668, "grad_norm": 0.3840778172016144, "learning_rate": 1.2488319369328033e-05, "loss": 0.0924, "step": 39254 }, { "epoch": 0.7001569578710806, "grad_norm": 0.37275633215904236, "learning_rate": 1.2486971838600678e-05, "loss": 0.208, "step": 39255 }, { "epoch": 0.7001747939927943, "grad_norm": 0.24376462399959564, "learning_rate": 1.248562435637815e-05, "loss": 0.0912, "step": 39256 }, { "epoch": 0.7001926301145079, "grad_norm": 0.24473705887794495, "learning_rate": 1.2484276922665664e-05, "loss": 0.1066, "step": 39257 }, { "epoch": 0.7002104662362216, "grad_norm": 0.329258531332016, "learning_rate": 1.2482929537468444e-05, "loss": 0.1293, "step": 39258 }, { "epoch": 0.7002283023579353, "grad_norm": 0.23452426493167877, "learning_rate": 1.2481582200791713e-05, "loss": 0.1427, "step": 39259 }, { "epoch": 0.700246138479649, "grad_norm": 0.3388354182243347, "learning_rate": 1.2480234912640685e-05, "loss": 0.1479, "step": 39260 }, { "epoch": 0.7002639746013627, "grad_norm": 0.2526503801345825, "learning_rate": 1.2478887673020595e-05, "loss": 0.1164, "step": 39261 }, { "epoch": 0.7002818107230764, "grad_norm": 0.28935933113098145, "learning_rate": 1.2477540481936656e-05, "loss": 0.1432, "step": 39262 }, { "epoch": 0.7002996468447901, "grad_norm": 0.24720443785190582, "learning_rate": 1.2476193339394105e-05, "loss": 0.1397, "step": 39263 }, { "epoch": 0.7003174829665038, "grad_norm": 0.30163803696632385, "learning_rate": 1.2474846245398142e-05, "loss": 0.1022, "step": 39264 }, { "epoch": 0.7003353190882174, "grad_norm": 0.2327345609664917, "learning_rate": 1.2473499199954012e-05, "loss": 0.0975, "step": 39265 }, { "epoch": 0.7003531552099311, "grad_norm": 0.2702726125717163, "learning_rate": 1.2472152203066926e-05, "loss": 0.145, "step": 39266 }, { "epoch": 0.7003709913316448, "grad_norm": 0.3468043804168701, "learning_rate": 1.2470805254742104e-05, "loss": 0.1001, "step": 39267 }, { "epoch": 0.7003888274533585, "grad_norm": 0.25190499424934387, "learning_rate": 1.2469458354984759e-05, "loss": 0.1523, "step": 39268 }, { "epoch": 0.7004066635750722, "grad_norm": 0.22685116529464722, "learning_rate": 1.2468111503800132e-05, "loss": 0.0873, "step": 39269 }, { "epoch": 0.7004244996967859, "grad_norm": 0.32708802819252014, "learning_rate": 1.246676470119343e-05, "loss": 0.0693, "step": 39270 }, { "epoch": 0.7004423358184996, "grad_norm": 0.2807897627353668, "learning_rate": 1.246541794716988e-05, "loss": 0.0866, "step": 39271 }, { "epoch": 0.7004601719402134, "grad_norm": 0.23111185431480408, "learning_rate": 1.2464071241734698e-05, "loss": 0.0917, "step": 39272 }, { "epoch": 0.7004780080619271, "grad_norm": 0.3054521083831787, "learning_rate": 1.2462724584893095e-05, "loss": 0.1099, "step": 39273 }, { "epoch": 0.7004958441836407, "grad_norm": 0.3203429877758026, "learning_rate": 1.2461377976650316e-05, "loss": 0.159, "step": 39274 }, { "epoch": 0.7005136803053544, "grad_norm": 0.319185733795166, "learning_rate": 1.2460031417011553e-05, "loss": 0.097, "step": 39275 }, { "epoch": 0.7005315164270681, "grad_norm": 0.2993345260620117, "learning_rate": 1.2458684905982049e-05, "loss": 0.0811, "step": 39276 }, { "epoch": 0.7005493525487818, "grad_norm": 0.2945885956287384, "learning_rate": 1.2457338443567005e-05, "loss": 0.1221, "step": 39277 }, { "epoch": 0.7005671886704955, "grad_norm": 0.22599923610687256, "learning_rate": 1.245599202977166e-05, "loss": 0.1122, "step": 39278 }, { "epoch": 0.7005850247922092, "grad_norm": 0.2429341822862625, "learning_rate": 1.245464566460122e-05, "loss": 0.1281, "step": 39279 }, { "epoch": 0.7006028609139229, "grad_norm": 0.282914936542511, "learning_rate": 1.2453299348060909e-05, "loss": 0.1413, "step": 39280 }, { "epoch": 0.7006206970356366, "grad_norm": 0.19204550981521606, "learning_rate": 1.245195308015593e-05, "loss": 0.1148, "step": 39281 }, { "epoch": 0.7006385331573503, "grad_norm": 0.2714342176914215, "learning_rate": 1.2450606860891528e-05, "loss": 0.1295, "step": 39282 }, { "epoch": 0.7006563692790639, "grad_norm": 0.30962443351745605, "learning_rate": 1.2449260690272907e-05, "loss": 0.1417, "step": 39283 }, { "epoch": 0.7006742054007776, "grad_norm": 0.22967122495174408, "learning_rate": 1.2447914568305289e-05, "loss": 0.0838, "step": 39284 }, { "epoch": 0.7006920415224913, "grad_norm": 0.3372592628002167, "learning_rate": 1.2446568494993885e-05, "loss": 0.1295, "step": 39285 }, { "epoch": 0.700709877644205, "grad_norm": 0.21744464337825775, "learning_rate": 1.2445222470343912e-05, "loss": 0.1222, "step": 39286 }, { "epoch": 0.7007277137659187, "grad_norm": 0.2446707785129547, "learning_rate": 1.2443876494360599e-05, "loss": 0.1053, "step": 39287 }, { "epoch": 0.7007455498876324, "grad_norm": 0.3213288486003876, "learning_rate": 1.244253056704916e-05, "loss": 0.1265, "step": 39288 }, { "epoch": 0.7007633860093462, "grad_norm": 0.20614628493785858, "learning_rate": 1.2441184688414798e-05, "loss": 0.1004, "step": 39289 }, { "epoch": 0.7007812221310599, "grad_norm": 0.29540812969207764, "learning_rate": 1.2439838858462752e-05, "loss": 0.1, "step": 39290 }, { "epoch": 0.7007990582527736, "grad_norm": 0.31829172372817993, "learning_rate": 1.243849307719822e-05, "loss": 0.2475, "step": 39291 }, { "epoch": 0.7008168943744872, "grad_norm": 0.31675219535827637, "learning_rate": 1.2437147344626437e-05, "loss": 0.1735, "step": 39292 }, { "epoch": 0.7008347304962009, "grad_norm": 0.36702772974967957, "learning_rate": 1.2435801660752611e-05, "loss": 0.1177, "step": 39293 }, { "epoch": 0.7008525666179146, "grad_norm": 0.21705499291419983, "learning_rate": 1.2434456025581944e-05, "loss": 0.1356, "step": 39294 }, { "epoch": 0.7008704027396283, "grad_norm": 0.23931188881397247, "learning_rate": 1.2433110439119678e-05, "loss": 0.1212, "step": 39295 }, { "epoch": 0.700888238861342, "grad_norm": 0.2305675595998764, "learning_rate": 1.2431764901371015e-05, "loss": 0.1195, "step": 39296 }, { "epoch": 0.7009060749830557, "grad_norm": 0.3326476812362671, "learning_rate": 1.2430419412341174e-05, "loss": 0.077, "step": 39297 }, { "epoch": 0.7009239111047694, "grad_norm": 0.2987017333507538, "learning_rate": 1.2429073972035368e-05, "loss": 0.0823, "step": 39298 }, { "epoch": 0.7009417472264831, "grad_norm": 0.2243385910987854, "learning_rate": 1.2427728580458803e-05, "loss": 0.142, "step": 39299 }, { "epoch": 0.7009595833481967, "grad_norm": 0.21158157289028168, "learning_rate": 1.2426383237616711e-05, "loss": 0.0718, "step": 39300 }, { "epoch": 0.7009774194699104, "grad_norm": 0.21682368218898773, "learning_rate": 1.2425037943514306e-05, "loss": 0.1136, "step": 39301 }, { "epoch": 0.7009952555916241, "grad_norm": 0.2618319094181061, "learning_rate": 1.2423692698156786e-05, "loss": 0.0955, "step": 39302 }, { "epoch": 0.7010130917133378, "grad_norm": 0.21428616344928741, "learning_rate": 1.2422347501549384e-05, "loss": 0.1243, "step": 39303 }, { "epoch": 0.7010309278350515, "grad_norm": 0.2124176174402237, "learning_rate": 1.24210023536973e-05, "loss": 0.0585, "step": 39304 }, { "epoch": 0.7010487639567652, "grad_norm": 0.22856761515140533, "learning_rate": 1.2419657254605762e-05, "loss": 0.1112, "step": 39305 }, { "epoch": 0.701066600078479, "grad_norm": 0.28618964552879333, "learning_rate": 1.2418312204279983e-05, "loss": 0.1455, "step": 39306 }, { "epoch": 0.7010844362001927, "grad_norm": 0.5303351283073425, "learning_rate": 1.2416967202725157e-05, "loss": 0.1534, "step": 39307 }, { "epoch": 0.7011022723219064, "grad_norm": 0.2856936752796173, "learning_rate": 1.2415622249946523e-05, "loss": 0.1111, "step": 39308 }, { "epoch": 0.70112010844362, "grad_norm": 0.24779370427131653, "learning_rate": 1.2414277345949285e-05, "loss": 0.1341, "step": 39309 }, { "epoch": 0.7011379445653337, "grad_norm": 0.401278018951416, "learning_rate": 1.2412932490738655e-05, "loss": 0.185, "step": 39310 }, { "epoch": 0.7011557806870474, "grad_norm": 0.2862904369831085, "learning_rate": 1.2411587684319845e-05, "loss": 0.1352, "step": 39311 }, { "epoch": 0.7011736168087611, "grad_norm": 0.2385338842868805, "learning_rate": 1.241024292669806e-05, "loss": 0.1217, "step": 39312 }, { "epoch": 0.7011914529304748, "grad_norm": 0.24269017577171326, "learning_rate": 1.2408898217878531e-05, "loss": 0.1621, "step": 39313 }, { "epoch": 0.7012092890521885, "grad_norm": 0.23597021400928497, "learning_rate": 1.2407553557866461e-05, "loss": 0.0994, "step": 39314 }, { "epoch": 0.7012271251739022, "grad_norm": 0.2812851667404175, "learning_rate": 1.2406208946667065e-05, "loss": 0.1587, "step": 39315 }, { "epoch": 0.7012449612956159, "grad_norm": 0.32705435156822205, "learning_rate": 1.2404864384285538e-05, "loss": 0.1086, "step": 39316 }, { "epoch": 0.7012627974173296, "grad_norm": 0.25162845849990845, "learning_rate": 1.2403519870727121e-05, "loss": 0.0738, "step": 39317 }, { "epoch": 0.7012806335390432, "grad_norm": 0.17822778224945068, "learning_rate": 1.2402175405997003e-05, "loss": 0.0828, "step": 39318 }, { "epoch": 0.7012984696607569, "grad_norm": 0.24295833706855774, "learning_rate": 1.2400830990100409e-05, "loss": 0.0965, "step": 39319 }, { "epoch": 0.7013163057824706, "grad_norm": 0.2563144862651825, "learning_rate": 1.2399486623042539e-05, "loss": 0.1077, "step": 39320 }, { "epoch": 0.7013341419041843, "grad_norm": 0.2714223861694336, "learning_rate": 1.2398142304828622e-05, "loss": 0.0835, "step": 39321 }, { "epoch": 0.7013519780258981, "grad_norm": 0.24917560815811157, "learning_rate": 1.2396798035463856e-05, "loss": 0.0939, "step": 39322 }, { "epoch": 0.7013698141476118, "grad_norm": 0.24004727602005005, "learning_rate": 1.2395453814953453e-05, "loss": 0.1343, "step": 39323 }, { "epoch": 0.7013876502693255, "grad_norm": 0.24072277545928955, "learning_rate": 1.2394109643302618e-05, "loss": 0.0923, "step": 39324 }, { "epoch": 0.7014054863910392, "grad_norm": 0.2789347767829895, "learning_rate": 1.2392765520516575e-05, "loss": 0.1566, "step": 39325 }, { "epoch": 0.7014233225127529, "grad_norm": 0.2381190061569214, "learning_rate": 1.2391421446600526e-05, "loss": 0.1195, "step": 39326 }, { "epoch": 0.7014411586344665, "grad_norm": 0.29626700282096863, "learning_rate": 1.2390077421559684e-05, "loss": 0.1393, "step": 39327 }, { "epoch": 0.7014589947561802, "grad_norm": 0.3041497766971588, "learning_rate": 1.2388733445399259e-05, "loss": 0.115, "step": 39328 }, { "epoch": 0.7014768308778939, "grad_norm": 0.1709960401058197, "learning_rate": 1.2387389518124447e-05, "loss": 0.0938, "step": 39329 }, { "epoch": 0.7014946669996076, "grad_norm": 0.28282129764556885, "learning_rate": 1.2386045639740481e-05, "loss": 0.1308, "step": 39330 }, { "epoch": 0.7015125031213213, "grad_norm": 0.2282441407442093, "learning_rate": 1.2384701810252547e-05, "loss": 0.1246, "step": 39331 }, { "epoch": 0.701530339243035, "grad_norm": 0.2657516300678253, "learning_rate": 1.2383358029665878e-05, "loss": 0.1293, "step": 39332 }, { "epoch": 0.7015481753647487, "grad_norm": 0.29586759209632874, "learning_rate": 1.2382014297985658e-05, "loss": 0.0941, "step": 39333 }, { "epoch": 0.7015660114864624, "grad_norm": 0.3080667555332184, "learning_rate": 1.2380670615217122e-05, "loss": 0.0832, "step": 39334 }, { "epoch": 0.701583847608176, "grad_norm": 0.3187499940395355, "learning_rate": 1.2379326981365464e-05, "loss": 0.101, "step": 39335 }, { "epoch": 0.7016016837298897, "grad_norm": 0.21686674654483795, "learning_rate": 1.2377983396435891e-05, "loss": 0.1519, "step": 39336 }, { "epoch": 0.7016195198516034, "grad_norm": 0.3235342502593994, "learning_rate": 1.237663986043361e-05, "loss": 0.1125, "step": 39337 }, { "epoch": 0.7016373559733171, "grad_norm": 0.23311492800712585, "learning_rate": 1.2375296373363836e-05, "loss": 0.1109, "step": 39338 }, { "epoch": 0.7016551920950309, "grad_norm": 0.2622036635875702, "learning_rate": 1.2373952935231778e-05, "loss": 0.0813, "step": 39339 }, { "epoch": 0.7016730282167446, "grad_norm": 0.29204609990119934, "learning_rate": 1.2372609546042638e-05, "loss": 0.1833, "step": 39340 }, { "epoch": 0.7016908643384583, "grad_norm": 0.26663222908973694, "learning_rate": 1.2371266205801623e-05, "loss": 0.114, "step": 39341 }, { "epoch": 0.701708700460172, "grad_norm": 0.38094082474708557, "learning_rate": 1.2369922914513935e-05, "loss": 0.1232, "step": 39342 }, { "epoch": 0.7017265365818857, "grad_norm": 0.19940249621868134, "learning_rate": 1.2368579672184796e-05, "loss": 0.082, "step": 39343 }, { "epoch": 0.7017443727035994, "grad_norm": 0.23590822517871857, "learning_rate": 1.2367236478819408e-05, "loss": 0.0979, "step": 39344 }, { "epoch": 0.701762208825313, "grad_norm": 0.3120937943458557, "learning_rate": 1.2365893334422962e-05, "loss": 0.1377, "step": 39345 }, { "epoch": 0.7017800449470267, "grad_norm": 0.24797527492046356, "learning_rate": 1.2364550239000688e-05, "loss": 0.1149, "step": 39346 }, { "epoch": 0.7017978810687404, "grad_norm": 0.3228990137577057, "learning_rate": 1.2363207192557772e-05, "loss": 0.127, "step": 39347 }, { "epoch": 0.7018157171904541, "grad_norm": 0.25563138723373413, "learning_rate": 1.2361864195099437e-05, "loss": 0.1181, "step": 39348 }, { "epoch": 0.7018335533121678, "grad_norm": 0.24893851578235626, "learning_rate": 1.2360521246630882e-05, "loss": 0.1232, "step": 39349 }, { "epoch": 0.7018513894338815, "grad_norm": 0.22202596068382263, "learning_rate": 1.2359178347157306e-05, "loss": 0.1206, "step": 39350 }, { "epoch": 0.7018692255555952, "grad_norm": 0.23783749341964722, "learning_rate": 1.2357835496683926e-05, "loss": 0.1204, "step": 39351 }, { "epoch": 0.7018870616773089, "grad_norm": 0.36229702830314636, "learning_rate": 1.2356492695215943e-05, "loss": 0.1143, "step": 39352 }, { "epoch": 0.7019048977990225, "grad_norm": 0.3061220645904541, "learning_rate": 1.2355149942758562e-05, "loss": 0.1282, "step": 39353 }, { "epoch": 0.7019227339207362, "grad_norm": 0.3575322926044464, "learning_rate": 1.2353807239316988e-05, "loss": 0.1051, "step": 39354 }, { "epoch": 0.7019405700424499, "grad_norm": 0.25187069177627563, "learning_rate": 1.2352464584896415e-05, "loss": 0.1323, "step": 39355 }, { "epoch": 0.7019584061641637, "grad_norm": 0.20016524195671082, "learning_rate": 1.2351121979502066e-05, "loss": 0.0799, "step": 39356 }, { "epoch": 0.7019762422858774, "grad_norm": 0.22806833684444427, "learning_rate": 1.2349779423139138e-05, "loss": 0.1682, "step": 39357 }, { "epoch": 0.7019940784075911, "grad_norm": 0.28366509079933167, "learning_rate": 1.2348436915812824e-05, "loss": 0.133, "step": 39358 }, { "epoch": 0.7020119145293048, "grad_norm": 0.2176230102777481, "learning_rate": 1.2347094457528349e-05, "loss": 0.0906, "step": 39359 }, { "epoch": 0.7020297506510185, "grad_norm": 0.28068429231643677, "learning_rate": 1.2345752048290895e-05, "loss": 0.1347, "step": 39360 }, { "epoch": 0.7020475867727322, "grad_norm": 0.2743104100227356, "learning_rate": 1.2344409688105688e-05, "loss": 0.1093, "step": 39361 }, { "epoch": 0.7020654228944458, "grad_norm": 0.21048085391521454, "learning_rate": 1.234306737697792e-05, "loss": 0.0636, "step": 39362 }, { "epoch": 0.7020832590161595, "grad_norm": 0.2708156406879425, "learning_rate": 1.2341725114912783e-05, "loss": 0.1382, "step": 39363 }, { "epoch": 0.7021010951378732, "grad_norm": 0.2513867914676666, "learning_rate": 1.2340382901915504e-05, "loss": 0.1177, "step": 39364 }, { "epoch": 0.7021189312595869, "grad_norm": 0.2737678289413452, "learning_rate": 1.233904073799127e-05, "loss": 0.1223, "step": 39365 }, { "epoch": 0.7021367673813006, "grad_norm": 0.25893157720565796, "learning_rate": 1.2337698623145288e-05, "loss": 0.1446, "step": 39366 }, { "epoch": 0.7021546035030143, "grad_norm": 0.2867906987667084, "learning_rate": 1.233635655738276e-05, "loss": 0.1371, "step": 39367 }, { "epoch": 0.702172439624728, "grad_norm": 0.20060017704963684, "learning_rate": 1.2335014540708879e-05, "loss": 0.1195, "step": 39368 }, { "epoch": 0.7021902757464417, "grad_norm": 0.23535068333148956, "learning_rate": 1.2333672573128866e-05, "loss": 0.1107, "step": 39369 }, { "epoch": 0.7022081118681553, "grad_norm": 0.3587065041065216, "learning_rate": 1.2332330654647912e-05, "loss": 0.111, "step": 39370 }, { "epoch": 0.702225947989869, "grad_norm": 0.38629475235939026, "learning_rate": 1.233098878527122e-05, "loss": 0.1376, "step": 39371 }, { "epoch": 0.7022437841115827, "grad_norm": 0.26195722818374634, "learning_rate": 1.232964696500398e-05, "loss": 0.1428, "step": 39372 }, { "epoch": 0.7022616202332965, "grad_norm": 0.20094266533851624, "learning_rate": 1.2328305193851414e-05, "loss": 0.0899, "step": 39373 }, { "epoch": 0.7022794563550102, "grad_norm": 0.2695089876651764, "learning_rate": 1.2326963471818707e-05, "loss": 0.1036, "step": 39374 }, { "epoch": 0.7022972924767239, "grad_norm": 0.3112042546272278, "learning_rate": 1.2325621798911074e-05, "loss": 0.0974, "step": 39375 }, { "epoch": 0.7023151285984376, "grad_norm": 0.5171024203300476, "learning_rate": 1.2324280175133701e-05, "loss": 0.1584, "step": 39376 }, { "epoch": 0.7023329647201513, "grad_norm": 0.23663710057735443, "learning_rate": 1.2322938600491805e-05, "loss": 0.1178, "step": 39377 }, { "epoch": 0.702350800841865, "grad_norm": 0.22713212668895721, "learning_rate": 1.2321597074990575e-05, "loss": 0.078, "step": 39378 }, { "epoch": 0.7023686369635787, "grad_norm": 0.30640026926994324, "learning_rate": 1.2320255598635216e-05, "loss": 0.1312, "step": 39379 }, { "epoch": 0.7023864730852923, "grad_norm": 0.30451124906539917, "learning_rate": 1.2318914171430925e-05, "loss": 0.1218, "step": 39380 }, { "epoch": 0.702404309207006, "grad_norm": 0.2545153796672821, "learning_rate": 1.2317572793382892e-05, "loss": 0.1276, "step": 39381 }, { "epoch": 0.7024221453287197, "grad_norm": 0.25689148902893066, "learning_rate": 1.2316231464496338e-05, "loss": 0.0972, "step": 39382 }, { "epoch": 0.7024399814504334, "grad_norm": 0.1817709505558014, "learning_rate": 1.2314890184776454e-05, "loss": 0.0812, "step": 39383 }, { "epoch": 0.7024578175721471, "grad_norm": 0.2498323917388916, "learning_rate": 1.2313548954228432e-05, "loss": 0.1062, "step": 39384 }, { "epoch": 0.7024756536938608, "grad_norm": 0.2677672505378723, "learning_rate": 1.231220777285747e-05, "loss": 0.1226, "step": 39385 }, { "epoch": 0.7024934898155745, "grad_norm": 0.3736957907676697, "learning_rate": 1.2310866640668784e-05, "loss": 0.108, "step": 39386 }, { "epoch": 0.7025113259372882, "grad_norm": 0.29187342524528503, "learning_rate": 1.2309525557667547e-05, "loss": 0.1136, "step": 39387 }, { "epoch": 0.7025291620590018, "grad_norm": 0.2804716229438782, "learning_rate": 1.2308184523858985e-05, "loss": 0.1182, "step": 39388 }, { "epoch": 0.7025469981807155, "grad_norm": 0.2445303499698639, "learning_rate": 1.2306843539248272e-05, "loss": 0.1022, "step": 39389 }, { "epoch": 0.7025648343024293, "grad_norm": 0.2529180347919464, "learning_rate": 1.230550260384063e-05, "loss": 0.1109, "step": 39390 }, { "epoch": 0.702582670424143, "grad_norm": 0.2855203151702881, "learning_rate": 1.2304161717641241e-05, "loss": 0.1209, "step": 39391 }, { "epoch": 0.7026005065458567, "grad_norm": 0.39952555298805237, "learning_rate": 1.2302820880655308e-05, "loss": 0.0753, "step": 39392 }, { "epoch": 0.7026183426675704, "grad_norm": 0.24199889600276947, "learning_rate": 1.2301480092888026e-05, "loss": 0.1087, "step": 39393 }, { "epoch": 0.7026361787892841, "grad_norm": 0.24643494188785553, "learning_rate": 1.2300139354344586e-05, "loss": 0.1237, "step": 39394 }, { "epoch": 0.7026540149109978, "grad_norm": 0.35643017292022705, "learning_rate": 1.2298798665030198e-05, "loss": 0.1379, "step": 39395 }, { "epoch": 0.7026718510327115, "grad_norm": 0.2550109922885895, "learning_rate": 1.2297458024950057e-05, "loss": 0.1142, "step": 39396 }, { "epoch": 0.7026896871544251, "grad_norm": 0.3141452372074127, "learning_rate": 1.2296117434109353e-05, "loss": 0.128, "step": 39397 }, { "epoch": 0.7027075232761388, "grad_norm": 0.21773818135261536, "learning_rate": 1.2294776892513277e-05, "loss": 0.0962, "step": 39398 }, { "epoch": 0.7027253593978525, "grad_norm": 0.3088330030441284, "learning_rate": 1.2293436400167043e-05, "loss": 0.1411, "step": 39399 }, { "epoch": 0.7027431955195662, "grad_norm": 0.31708624958992004, "learning_rate": 1.2292095957075841e-05, "loss": 0.1083, "step": 39400 }, { "epoch": 0.7027610316412799, "grad_norm": 0.25620579719543457, "learning_rate": 1.2290755563244851e-05, "loss": 0.1004, "step": 39401 }, { "epoch": 0.7027788677629936, "grad_norm": 0.3027898371219635, "learning_rate": 1.2289415218679284e-05, "loss": 0.1421, "step": 39402 }, { "epoch": 0.7027967038847073, "grad_norm": 0.26583635807037354, "learning_rate": 1.2288074923384344e-05, "loss": 0.135, "step": 39403 }, { "epoch": 0.702814540006421, "grad_norm": 0.3299161195755005, "learning_rate": 1.2286734677365214e-05, "loss": 0.114, "step": 39404 }, { "epoch": 0.7028323761281347, "grad_norm": 0.2883865237236023, "learning_rate": 1.2285394480627094e-05, "loss": 0.1144, "step": 39405 }, { "epoch": 0.7028502122498483, "grad_norm": 0.26030004024505615, "learning_rate": 1.2284054333175174e-05, "loss": 0.1194, "step": 39406 }, { "epoch": 0.7028680483715621, "grad_norm": 0.26997897028923035, "learning_rate": 1.2282714235014641e-05, "loss": 0.1487, "step": 39407 }, { "epoch": 0.7028858844932758, "grad_norm": 0.33512449264526367, "learning_rate": 1.2281374186150713e-05, "loss": 0.1284, "step": 39408 }, { "epoch": 0.7029037206149895, "grad_norm": 0.19745367765426636, "learning_rate": 1.228003418658857e-05, "loss": 0.11, "step": 39409 }, { "epoch": 0.7029215567367032, "grad_norm": 0.314870148897171, "learning_rate": 1.2278694236333407e-05, "loss": 0.1632, "step": 39410 }, { "epoch": 0.7029393928584169, "grad_norm": 0.2699345052242279, "learning_rate": 1.2277354335390411e-05, "loss": 0.1547, "step": 39411 }, { "epoch": 0.7029572289801306, "grad_norm": 0.31500157713890076, "learning_rate": 1.2276014483764791e-05, "loss": 0.1133, "step": 39412 }, { "epoch": 0.7029750651018443, "grad_norm": 0.2887321412563324, "learning_rate": 1.2274674681461737e-05, "loss": 0.1375, "step": 39413 }, { "epoch": 0.702992901223558, "grad_norm": 0.3022030293941498, "learning_rate": 1.2273334928486427e-05, "loss": 0.1185, "step": 39414 }, { "epoch": 0.7030107373452716, "grad_norm": 0.48569798469543457, "learning_rate": 1.2271995224844076e-05, "loss": 0.079, "step": 39415 }, { "epoch": 0.7030285734669853, "grad_norm": 0.29759669303894043, "learning_rate": 1.227065557053986e-05, "loss": 0.0821, "step": 39416 }, { "epoch": 0.703046409588699, "grad_norm": 0.21454553306102753, "learning_rate": 1.2269315965578987e-05, "loss": 0.0806, "step": 39417 }, { "epoch": 0.7030642457104127, "grad_norm": 0.26078954339027405, "learning_rate": 1.2267976409966645e-05, "loss": 0.1735, "step": 39418 }, { "epoch": 0.7030820818321264, "grad_norm": 0.2663392424583435, "learning_rate": 1.226663690370802e-05, "loss": 0.1109, "step": 39419 }, { "epoch": 0.7030999179538401, "grad_norm": 0.35911014676094055, "learning_rate": 1.2265297446808302e-05, "loss": 0.1285, "step": 39420 }, { "epoch": 0.7031177540755538, "grad_norm": 0.276168555021286, "learning_rate": 1.22639580392727e-05, "loss": 0.1165, "step": 39421 }, { "epoch": 0.7031355901972675, "grad_norm": 0.2792278826236725, "learning_rate": 1.2262618681106392e-05, "loss": 0.1732, "step": 39422 }, { "epoch": 0.7031534263189813, "grad_norm": 0.29941728711128235, "learning_rate": 1.2261279372314574e-05, "loss": 0.0968, "step": 39423 }, { "epoch": 0.7031712624406949, "grad_norm": 0.2718508541584015, "learning_rate": 1.225994011290243e-05, "loss": 0.1376, "step": 39424 }, { "epoch": 0.7031890985624086, "grad_norm": 0.2902662754058838, "learning_rate": 1.2258600902875165e-05, "loss": 0.0983, "step": 39425 }, { "epoch": 0.7032069346841223, "grad_norm": 0.3752191364765167, "learning_rate": 1.2257261742237965e-05, "loss": 0.1252, "step": 39426 }, { "epoch": 0.703224770805836, "grad_norm": 0.24731576442718506, "learning_rate": 1.225592263099602e-05, "loss": 0.1336, "step": 39427 }, { "epoch": 0.7032426069275497, "grad_norm": 0.3346899449825287, "learning_rate": 1.225458356915451e-05, "loss": 0.1678, "step": 39428 }, { "epoch": 0.7032604430492634, "grad_norm": 0.30468907952308655, "learning_rate": 1.2253244556718637e-05, "loss": 0.1603, "step": 39429 }, { "epoch": 0.7032782791709771, "grad_norm": 0.27630361914634705, "learning_rate": 1.22519055936936e-05, "loss": 0.0937, "step": 39430 }, { "epoch": 0.7032961152926908, "grad_norm": 0.25730952620506287, "learning_rate": 1.2250566680084579e-05, "loss": 0.1094, "step": 39431 }, { "epoch": 0.7033139514144044, "grad_norm": 0.29234641790390015, "learning_rate": 1.2249227815896767e-05, "loss": 0.1349, "step": 39432 }, { "epoch": 0.7033317875361181, "grad_norm": 0.22435252368450165, "learning_rate": 1.2247889001135343e-05, "loss": 0.1198, "step": 39433 }, { "epoch": 0.7033496236578318, "grad_norm": 0.312931090593338, "learning_rate": 1.2246550235805513e-05, "loss": 0.1234, "step": 39434 }, { "epoch": 0.7033674597795455, "grad_norm": 0.23169714212417603, "learning_rate": 1.2245211519912458e-05, "loss": 0.0715, "step": 39435 }, { "epoch": 0.7033852959012592, "grad_norm": 0.37830817699432373, "learning_rate": 1.2243872853461372e-05, "loss": 0.1038, "step": 39436 }, { "epoch": 0.7034031320229729, "grad_norm": 0.33053845167160034, "learning_rate": 1.224253423645743e-05, "loss": 0.1499, "step": 39437 }, { "epoch": 0.7034209681446866, "grad_norm": 0.2584063708782196, "learning_rate": 1.2241195668905839e-05, "loss": 0.0891, "step": 39438 }, { "epoch": 0.7034388042664003, "grad_norm": 0.2626304626464844, "learning_rate": 1.2239857150811782e-05, "loss": 0.0735, "step": 39439 }, { "epoch": 0.7034566403881141, "grad_norm": 0.2601485848426819, "learning_rate": 1.2238518682180446e-05, "loss": 0.1255, "step": 39440 }, { "epoch": 0.7034744765098278, "grad_norm": 0.2184794694185257, "learning_rate": 1.2237180263017009e-05, "loss": 0.0729, "step": 39441 }, { "epoch": 0.7034923126315414, "grad_norm": 0.27454760670661926, "learning_rate": 1.223584189332668e-05, "loss": 0.142, "step": 39442 }, { "epoch": 0.7035101487532551, "grad_norm": 0.2277342975139618, "learning_rate": 1.2234503573114628e-05, "loss": 0.0954, "step": 39443 }, { "epoch": 0.7035279848749688, "grad_norm": 0.3067512512207031, "learning_rate": 1.2233165302386057e-05, "loss": 0.1175, "step": 39444 }, { "epoch": 0.7035458209966825, "grad_norm": 0.2913004159927368, "learning_rate": 1.2231827081146147e-05, "loss": 0.0935, "step": 39445 }, { "epoch": 0.7035636571183962, "grad_norm": 0.26625627279281616, "learning_rate": 1.2230488909400076e-05, "loss": 0.1264, "step": 39446 }, { "epoch": 0.7035814932401099, "grad_norm": 0.2908312678337097, "learning_rate": 1.2229150787153049e-05, "loss": 0.1131, "step": 39447 }, { "epoch": 0.7035993293618236, "grad_norm": 0.2886578142642975, "learning_rate": 1.2227812714410244e-05, "loss": 0.1327, "step": 39448 }, { "epoch": 0.7036171654835373, "grad_norm": 0.25169724225997925, "learning_rate": 1.2226474691176852e-05, "loss": 0.143, "step": 39449 }, { "epoch": 0.7036350016052509, "grad_norm": 0.24674318730831146, "learning_rate": 1.2225136717458041e-05, "loss": 0.14, "step": 39450 }, { "epoch": 0.7036528377269646, "grad_norm": 0.24743840098381042, "learning_rate": 1.2223798793259026e-05, "loss": 0.0917, "step": 39451 }, { "epoch": 0.7036706738486783, "grad_norm": 0.2742423713207245, "learning_rate": 1.2222460918584977e-05, "loss": 0.1268, "step": 39452 }, { "epoch": 0.703688509970392, "grad_norm": 0.2715749740600586, "learning_rate": 1.2221123093441087e-05, "loss": 0.1562, "step": 39453 }, { "epoch": 0.7037063460921057, "grad_norm": 0.2605179250240326, "learning_rate": 1.2219785317832525e-05, "loss": 0.1122, "step": 39454 }, { "epoch": 0.7037241822138194, "grad_norm": 0.21195285022258759, "learning_rate": 1.2218447591764498e-05, "loss": 0.1308, "step": 39455 }, { "epoch": 0.7037420183355331, "grad_norm": 0.2376292645931244, "learning_rate": 1.2217109915242173e-05, "loss": 0.1094, "step": 39456 }, { "epoch": 0.7037598544572469, "grad_norm": 0.27844753861427307, "learning_rate": 1.2215772288270754e-05, "loss": 0.1126, "step": 39457 }, { "epoch": 0.7037776905789606, "grad_norm": 0.29905635118484497, "learning_rate": 1.2214434710855422e-05, "loss": 0.0924, "step": 39458 }, { "epoch": 0.7037955267006742, "grad_norm": 0.3172041177749634, "learning_rate": 1.2213097183001343e-05, "loss": 0.1333, "step": 39459 }, { "epoch": 0.7038133628223879, "grad_norm": 0.28696951270103455, "learning_rate": 1.2211759704713726e-05, "loss": 0.1156, "step": 39460 }, { "epoch": 0.7038311989441016, "grad_norm": 0.33109989762306213, "learning_rate": 1.2210422275997747e-05, "loss": 0.1221, "step": 39461 }, { "epoch": 0.7038490350658153, "grad_norm": 0.2560708820819855, "learning_rate": 1.2209084896858586e-05, "loss": 0.1148, "step": 39462 }, { "epoch": 0.703866871187529, "grad_norm": 0.2727649211883545, "learning_rate": 1.2207747567301423e-05, "loss": 0.1355, "step": 39463 }, { "epoch": 0.7038847073092427, "grad_norm": 0.22568921744823456, "learning_rate": 1.2206410287331458e-05, "loss": 0.14, "step": 39464 }, { "epoch": 0.7039025434309564, "grad_norm": 0.46994584798812866, "learning_rate": 1.2205073056953864e-05, "loss": 0.0991, "step": 39465 }, { "epoch": 0.7039203795526701, "grad_norm": 0.19712673127651215, "learning_rate": 1.2203735876173825e-05, "loss": 0.0513, "step": 39466 }, { "epoch": 0.7039382156743837, "grad_norm": 0.37030521035194397, "learning_rate": 1.2202398744996519e-05, "loss": 0.18, "step": 39467 }, { "epoch": 0.7039560517960974, "grad_norm": 0.23003219068050385, "learning_rate": 1.2201061663427144e-05, "loss": 0.1192, "step": 39468 }, { "epoch": 0.7039738879178111, "grad_norm": 0.2154010534286499, "learning_rate": 1.2199724631470874e-05, "loss": 0.1354, "step": 39469 }, { "epoch": 0.7039917240395248, "grad_norm": 0.3016541600227356, "learning_rate": 1.2198387649132884e-05, "loss": 0.1309, "step": 39470 }, { "epoch": 0.7040095601612385, "grad_norm": 0.28513240814208984, "learning_rate": 1.2197050716418373e-05, "loss": 0.1045, "step": 39471 }, { "epoch": 0.7040273962829522, "grad_norm": 0.2755373418331146, "learning_rate": 1.2195713833332506e-05, "loss": 0.1074, "step": 39472 }, { "epoch": 0.7040452324046659, "grad_norm": 0.2639329433441162, "learning_rate": 1.2194376999880484e-05, "loss": 0.1108, "step": 39473 }, { "epoch": 0.7040630685263797, "grad_norm": 0.3147179186344147, "learning_rate": 1.219304021606748e-05, "loss": 0.0998, "step": 39474 }, { "epoch": 0.7040809046480934, "grad_norm": 0.2252577692270279, "learning_rate": 1.2191703481898676e-05, "loss": 0.0922, "step": 39475 }, { "epoch": 0.704098740769807, "grad_norm": 0.26773354411125183, "learning_rate": 1.2190366797379244e-05, "loss": 0.0921, "step": 39476 }, { "epoch": 0.7041165768915207, "grad_norm": 0.19199728965759277, "learning_rate": 1.2189030162514384e-05, "loss": 0.086, "step": 39477 }, { "epoch": 0.7041344130132344, "grad_norm": 0.26217150688171387, "learning_rate": 1.2187693577309267e-05, "loss": 0.1318, "step": 39478 }, { "epoch": 0.7041522491349481, "grad_norm": 0.40364983677864075, "learning_rate": 1.2186357041769075e-05, "loss": 0.0938, "step": 39479 }, { "epoch": 0.7041700852566618, "grad_norm": 0.2648116946220398, "learning_rate": 1.218502055589898e-05, "loss": 0.1234, "step": 39480 }, { "epoch": 0.7041879213783755, "grad_norm": 0.21393592655658722, "learning_rate": 1.2183684119704181e-05, "loss": 0.0978, "step": 39481 }, { "epoch": 0.7042057575000892, "grad_norm": 0.5880461931228638, "learning_rate": 1.218234773318985e-05, "loss": 0.164, "step": 39482 }, { "epoch": 0.7042235936218029, "grad_norm": 0.22025704383850098, "learning_rate": 1.2181011396361152e-05, "loss": 0.1547, "step": 39483 }, { "epoch": 0.7042414297435166, "grad_norm": 0.320406049489975, "learning_rate": 1.2179675109223296e-05, "loss": 0.1678, "step": 39484 }, { "epoch": 0.7042592658652302, "grad_norm": 0.38594451546669006, "learning_rate": 1.2178338871781436e-05, "loss": 0.1789, "step": 39485 }, { "epoch": 0.7042771019869439, "grad_norm": 0.31850072741508484, "learning_rate": 1.2177002684040773e-05, "loss": 0.1568, "step": 39486 }, { "epoch": 0.7042949381086576, "grad_norm": 0.2897709012031555, "learning_rate": 1.2175666546006475e-05, "loss": 0.1545, "step": 39487 }, { "epoch": 0.7043127742303713, "grad_norm": 0.2247619777917862, "learning_rate": 1.2174330457683724e-05, "loss": 0.1233, "step": 39488 }, { "epoch": 0.704330610352085, "grad_norm": 0.2698691487312317, "learning_rate": 1.2172994419077688e-05, "loss": 0.1577, "step": 39489 }, { "epoch": 0.7043484464737987, "grad_norm": 0.197042316198349, "learning_rate": 1.2171658430193566e-05, "loss": 0.08, "step": 39490 }, { "epoch": 0.7043662825955125, "grad_norm": 0.28770431876182556, "learning_rate": 1.2170322491036529e-05, "loss": 0.1744, "step": 39491 }, { "epoch": 0.7043841187172262, "grad_norm": 0.2561630606651306, "learning_rate": 1.2168986601611747e-05, "loss": 0.1417, "step": 39492 }, { "epoch": 0.7044019548389399, "grad_norm": 0.250017911195755, "learning_rate": 1.2167650761924402e-05, "loss": 0.1235, "step": 39493 }, { "epoch": 0.7044197909606535, "grad_norm": 0.2779890298843384, "learning_rate": 1.2166314971979681e-05, "loss": 0.1395, "step": 39494 }, { "epoch": 0.7044376270823672, "grad_norm": 0.3253113329410553, "learning_rate": 1.2164979231782755e-05, "loss": 0.156, "step": 39495 }, { "epoch": 0.7044554632040809, "grad_norm": 0.6006177067756653, "learning_rate": 1.2163643541338803e-05, "loss": 0.1355, "step": 39496 }, { "epoch": 0.7044732993257946, "grad_norm": 0.2981247305870056, "learning_rate": 1.2162307900652994e-05, "loss": 0.1408, "step": 39497 }, { "epoch": 0.7044911354475083, "grad_norm": 0.3281283676624298, "learning_rate": 1.2160972309730522e-05, "loss": 0.1117, "step": 39498 }, { "epoch": 0.704508971569222, "grad_norm": 0.31892332434654236, "learning_rate": 1.2159636768576546e-05, "loss": 0.1264, "step": 39499 }, { "epoch": 0.7045268076909357, "grad_norm": 0.26161426305770874, "learning_rate": 1.2158301277196263e-05, "loss": 0.1437, "step": 39500 }, { "epoch": 0.7045446438126494, "grad_norm": 0.2609124779701233, "learning_rate": 1.2156965835594841e-05, "loss": 0.1264, "step": 39501 }, { "epoch": 0.704562479934363, "grad_norm": 0.2709504961967468, "learning_rate": 1.2155630443777444e-05, "loss": 0.116, "step": 39502 }, { "epoch": 0.7045803160560767, "grad_norm": 0.21923157572746277, "learning_rate": 1.215429510174927e-05, "loss": 0.0948, "step": 39503 }, { "epoch": 0.7045981521777904, "grad_norm": 0.35085877776145935, "learning_rate": 1.2152959809515483e-05, "loss": 0.1545, "step": 39504 }, { "epoch": 0.7046159882995041, "grad_norm": 0.3090458810329437, "learning_rate": 1.2151624567081263e-05, "loss": 0.1022, "step": 39505 }, { "epoch": 0.7046338244212178, "grad_norm": 0.24669237434864044, "learning_rate": 1.2150289374451773e-05, "loss": 0.1294, "step": 39506 }, { "epoch": 0.7046516605429315, "grad_norm": 0.2640063762664795, "learning_rate": 1.2148954231632212e-05, "loss": 0.1388, "step": 39507 }, { "epoch": 0.7046694966646453, "grad_norm": 0.3696783483028412, "learning_rate": 1.2147619138627739e-05, "loss": 0.1119, "step": 39508 }, { "epoch": 0.704687332786359, "grad_norm": 0.3612895905971527, "learning_rate": 1.2146284095443536e-05, "loss": 0.1096, "step": 39509 }, { "epoch": 0.7047051689080727, "grad_norm": 0.24098873138427734, "learning_rate": 1.2144949102084774e-05, "loss": 0.0826, "step": 39510 }, { "epoch": 0.7047230050297864, "grad_norm": 0.3146705627441406, "learning_rate": 1.2143614158556621e-05, "loss": 0.153, "step": 39511 }, { "epoch": 0.7047408411515, "grad_norm": 0.28849712014198303, "learning_rate": 1.214227926486426e-05, "loss": 0.1159, "step": 39512 }, { "epoch": 0.7047586772732137, "grad_norm": 0.28125548362731934, "learning_rate": 1.2140944421012873e-05, "loss": 0.132, "step": 39513 }, { "epoch": 0.7047765133949274, "grad_norm": 0.382479727268219, "learning_rate": 1.2139609627007628e-05, "loss": 0.0839, "step": 39514 }, { "epoch": 0.7047943495166411, "grad_norm": 0.20573905110359192, "learning_rate": 1.213827488285369e-05, "loss": 0.0919, "step": 39515 }, { "epoch": 0.7048121856383548, "grad_norm": 0.269321084022522, "learning_rate": 1.2136940188556248e-05, "loss": 0.1145, "step": 39516 }, { "epoch": 0.7048300217600685, "grad_norm": 0.342955619096756, "learning_rate": 1.2135605544120469e-05, "loss": 0.1631, "step": 39517 }, { "epoch": 0.7048478578817822, "grad_norm": 0.3823602795600891, "learning_rate": 1.2134270949551526e-05, "loss": 0.157, "step": 39518 }, { "epoch": 0.7048656940034959, "grad_norm": 0.24534699320793152, "learning_rate": 1.2132936404854583e-05, "loss": 0.1119, "step": 39519 }, { "epoch": 0.7048835301252095, "grad_norm": 0.32959961891174316, "learning_rate": 1.2131601910034835e-05, "loss": 0.1327, "step": 39520 }, { "epoch": 0.7049013662469232, "grad_norm": 0.21906019747257233, "learning_rate": 1.2130267465097439e-05, "loss": 0.0624, "step": 39521 }, { "epoch": 0.7049192023686369, "grad_norm": 0.2751816213130951, "learning_rate": 1.2128933070047572e-05, "loss": 0.0849, "step": 39522 }, { "epoch": 0.7049370384903506, "grad_norm": 0.2616502046585083, "learning_rate": 1.2127598724890407e-05, "loss": 0.1097, "step": 39523 }, { "epoch": 0.7049548746120643, "grad_norm": 0.2105613797903061, "learning_rate": 1.2126264429631104e-05, "loss": 0.0955, "step": 39524 }, { "epoch": 0.7049727107337781, "grad_norm": 0.3224189281463623, "learning_rate": 1.2124930184274857e-05, "loss": 0.1294, "step": 39525 }, { "epoch": 0.7049905468554918, "grad_norm": 0.22643953561782837, "learning_rate": 1.212359598882682e-05, "loss": 0.1383, "step": 39526 }, { "epoch": 0.7050083829772055, "grad_norm": 0.3114086985588074, "learning_rate": 1.212226184329218e-05, "loss": 0.1441, "step": 39527 }, { "epoch": 0.7050262190989192, "grad_norm": 0.2591535151004791, "learning_rate": 1.2120927747676093e-05, "loss": 0.1313, "step": 39528 }, { "epoch": 0.7050440552206328, "grad_norm": 0.26897308230400085, "learning_rate": 1.2119593701983745e-05, "loss": 0.1032, "step": 39529 }, { "epoch": 0.7050618913423465, "grad_norm": 0.2618337869644165, "learning_rate": 1.2118259706220303e-05, "loss": 0.1687, "step": 39530 }, { "epoch": 0.7050797274640602, "grad_norm": 0.32848429679870605, "learning_rate": 1.2116925760390934e-05, "loss": 0.1022, "step": 39531 }, { "epoch": 0.7050975635857739, "grad_norm": 0.4262397885322571, "learning_rate": 1.21155918645008e-05, "loss": 0.1925, "step": 39532 }, { "epoch": 0.7051153997074876, "grad_norm": 0.26168233156204224, "learning_rate": 1.2114258018555094e-05, "loss": 0.107, "step": 39533 }, { "epoch": 0.7051332358292013, "grad_norm": 0.30384665727615356, "learning_rate": 1.2112924222558975e-05, "loss": 0.1858, "step": 39534 }, { "epoch": 0.705151071950915, "grad_norm": 0.24300090968608856, "learning_rate": 1.2111590476517609e-05, "loss": 0.1479, "step": 39535 }, { "epoch": 0.7051689080726287, "grad_norm": 0.31717199087142944, "learning_rate": 1.2110256780436174e-05, "loss": 0.1077, "step": 39536 }, { "epoch": 0.7051867441943424, "grad_norm": 0.2913479804992676, "learning_rate": 1.2108923134319825e-05, "loss": 0.1318, "step": 39537 }, { "epoch": 0.705204580316056, "grad_norm": 0.33571234345436096, "learning_rate": 1.210758953817375e-05, "loss": 0.0726, "step": 39538 }, { "epoch": 0.7052224164377697, "grad_norm": 0.24905113875865936, "learning_rate": 1.2106255992003102e-05, "loss": 0.1174, "step": 39539 }, { "epoch": 0.7052402525594834, "grad_norm": 0.3410506844520569, "learning_rate": 1.210492249581307e-05, "loss": 0.1413, "step": 39540 }, { "epoch": 0.7052580886811972, "grad_norm": 0.35247164964675903, "learning_rate": 1.2103589049608802e-05, "loss": 0.1353, "step": 39541 }, { "epoch": 0.7052759248029109, "grad_norm": 0.22630973160266876, "learning_rate": 1.2102255653395486e-05, "loss": 0.0838, "step": 39542 }, { "epoch": 0.7052937609246246, "grad_norm": 0.3605957329273224, "learning_rate": 1.2100922307178284e-05, "loss": 0.0771, "step": 39543 }, { "epoch": 0.7053115970463383, "grad_norm": 0.23125700652599335, "learning_rate": 1.2099589010962358e-05, "loss": 0.1499, "step": 39544 }, { "epoch": 0.705329433168052, "grad_norm": 0.2557556927204132, "learning_rate": 1.2098255764752874e-05, "loss": 0.1322, "step": 39545 }, { "epoch": 0.7053472692897657, "grad_norm": 0.23560579121112823, "learning_rate": 1.2096922568555016e-05, "loss": 0.092, "step": 39546 }, { "epoch": 0.7053651054114793, "grad_norm": 0.23158732056617737, "learning_rate": 1.2095589422373946e-05, "loss": 0.1317, "step": 39547 }, { "epoch": 0.705382941533193, "grad_norm": 0.3390738368034363, "learning_rate": 1.2094256326214823e-05, "loss": 0.1297, "step": 39548 }, { "epoch": 0.7054007776549067, "grad_norm": 0.3284175992012024, "learning_rate": 1.2092923280082823e-05, "loss": 0.0942, "step": 39549 }, { "epoch": 0.7054186137766204, "grad_norm": 0.18790553510189056, "learning_rate": 1.20915902839831e-05, "loss": 0.0931, "step": 39550 }, { "epoch": 0.7054364498983341, "grad_norm": 0.23232364654541016, "learning_rate": 1.209025733792084e-05, "loss": 0.1161, "step": 39551 }, { "epoch": 0.7054542860200478, "grad_norm": 0.2700973153114319, "learning_rate": 1.2088924441901203e-05, "loss": 0.1284, "step": 39552 }, { "epoch": 0.7054721221417615, "grad_norm": 0.20915038883686066, "learning_rate": 1.2087591595929345e-05, "loss": 0.082, "step": 39553 }, { "epoch": 0.7054899582634752, "grad_norm": 0.4089609384536743, "learning_rate": 1.208625880001045e-05, "loss": 0.1622, "step": 39554 }, { "epoch": 0.7055077943851888, "grad_norm": 0.37525680661201477, "learning_rate": 1.2084926054149667e-05, "loss": 0.1201, "step": 39555 }, { "epoch": 0.7055256305069025, "grad_norm": 0.3497820794582367, "learning_rate": 1.2083593358352182e-05, "loss": 0.1389, "step": 39556 }, { "epoch": 0.7055434666286162, "grad_norm": 0.2753267288208008, "learning_rate": 1.208226071262315e-05, "loss": 0.0948, "step": 39557 }, { "epoch": 0.70556130275033, "grad_norm": 0.2556132972240448, "learning_rate": 1.2080928116967726e-05, "loss": 0.096, "step": 39558 }, { "epoch": 0.7055791388720437, "grad_norm": 0.22126372158527374, "learning_rate": 1.2079595571391098e-05, "loss": 0.122, "step": 39559 }, { "epoch": 0.7055969749937574, "grad_norm": 0.1990654319524765, "learning_rate": 1.2078263075898419e-05, "loss": 0.0653, "step": 39560 }, { "epoch": 0.7056148111154711, "grad_norm": 0.19901281595230103, "learning_rate": 1.2076930630494856e-05, "loss": 0.066, "step": 39561 }, { "epoch": 0.7056326472371848, "grad_norm": 0.30381327867507935, "learning_rate": 1.2075598235185574e-05, "loss": 0.1236, "step": 39562 }, { "epoch": 0.7056504833588985, "grad_norm": 0.24971404671669006, "learning_rate": 1.2074265889975725e-05, "loss": 0.0918, "step": 39563 }, { "epoch": 0.7056683194806121, "grad_norm": 0.3000890016555786, "learning_rate": 1.2072933594870498e-05, "loss": 0.1471, "step": 39564 }, { "epoch": 0.7056861556023258, "grad_norm": 0.25084424018859863, "learning_rate": 1.2071601349875045e-05, "loss": 0.1427, "step": 39565 }, { "epoch": 0.7057039917240395, "grad_norm": 0.3690944015979767, "learning_rate": 1.2070269154994517e-05, "loss": 0.1524, "step": 39566 }, { "epoch": 0.7057218278457532, "grad_norm": 0.2730376422405243, "learning_rate": 1.2068937010234105e-05, "loss": 0.0929, "step": 39567 }, { "epoch": 0.7057396639674669, "grad_norm": 0.27159979939460754, "learning_rate": 1.2067604915598952e-05, "loss": 0.1036, "step": 39568 }, { "epoch": 0.7057575000891806, "grad_norm": 0.25176775455474854, "learning_rate": 1.2066272871094233e-05, "loss": 0.133, "step": 39569 }, { "epoch": 0.7057753362108943, "grad_norm": 0.19935861229896545, "learning_rate": 1.2064940876725111e-05, "loss": 0.1033, "step": 39570 }, { "epoch": 0.705793172332608, "grad_norm": 0.30074551701545715, "learning_rate": 1.2063608932496736e-05, "loss": 0.175, "step": 39571 }, { "epoch": 0.7058110084543217, "grad_norm": 0.2840253412723541, "learning_rate": 1.2062277038414291e-05, "loss": 0.1232, "step": 39572 }, { "epoch": 0.7058288445760353, "grad_norm": 0.4243912398815155, "learning_rate": 1.2060945194482925e-05, "loss": 0.1485, "step": 39573 }, { "epoch": 0.705846680697749, "grad_norm": 0.232137992978096, "learning_rate": 1.2059613400707809e-05, "loss": 0.1552, "step": 39574 }, { "epoch": 0.7058645168194628, "grad_norm": 0.22122007608413696, "learning_rate": 1.2058281657094097e-05, "loss": 0.0808, "step": 39575 }, { "epoch": 0.7058823529411765, "grad_norm": 0.20192213356494904, "learning_rate": 1.2056949963646948e-05, "loss": 0.0747, "step": 39576 }, { "epoch": 0.7059001890628902, "grad_norm": 0.3498326539993286, "learning_rate": 1.2055618320371541e-05, "loss": 0.1185, "step": 39577 }, { "epoch": 0.7059180251846039, "grad_norm": 0.31498607993125916, "learning_rate": 1.2054286727273028e-05, "loss": 0.1426, "step": 39578 }, { "epoch": 0.7059358613063176, "grad_norm": 0.4067002832889557, "learning_rate": 1.205295518435657e-05, "loss": 0.1821, "step": 39579 }, { "epoch": 0.7059536974280313, "grad_norm": 0.29944872856140137, "learning_rate": 1.2051623691627322e-05, "loss": 0.103, "step": 39580 }, { "epoch": 0.705971533549745, "grad_norm": 0.2615627646446228, "learning_rate": 1.2050292249090462e-05, "loss": 0.1034, "step": 39581 }, { "epoch": 0.7059893696714586, "grad_norm": 0.38268721103668213, "learning_rate": 1.2048960856751132e-05, "loss": 0.1638, "step": 39582 }, { "epoch": 0.7060072057931723, "grad_norm": 0.3006829619407654, "learning_rate": 1.2047629514614512e-05, "loss": 0.087, "step": 39583 }, { "epoch": 0.706025041914886, "grad_norm": 0.2751002013683319, "learning_rate": 1.2046298222685743e-05, "loss": 0.1336, "step": 39584 }, { "epoch": 0.7060428780365997, "grad_norm": 0.21429263055324554, "learning_rate": 1.2044966980970008e-05, "loss": 0.0848, "step": 39585 }, { "epoch": 0.7060607141583134, "grad_norm": 0.3600725531578064, "learning_rate": 1.2043635789472452e-05, "loss": 0.157, "step": 39586 }, { "epoch": 0.7060785502800271, "grad_norm": 0.2642713785171509, "learning_rate": 1.204230464819824e-05, "loss": 0.128, "step": 39587 }, { "epoch": 0.7060963864017408, "grad_norm": 0.2573707699775696, "learning_rate": 1.2040973557152533e-05, "loss": 0.1207, "step": 39588 }, { "epoch": 0.7061142225234545, "grad_norm": 0.23546798527240753, "learning_rate": 1.2039642516340477e-05, "loss": 0.0956, "step": 39589 }, { "epoch": 0.7061320586451681, "grad_norm": 0.3157248795032501, "learning_rate": 1.2038311525767252e-05, "loss": 0.1373, "step": 39590 }, { "epoch": 0.7061498947668818, "grad_norm": 0.2501681447029114, "learning_rate": 1.203698058543801e-05, "loss": 0.1755, "step": 39591 }, { "epoch": 0.7061677308885956, "grad_norm": 0.2777828872203827, "learning_rate": 1.2035649695357906e-05, "loss": 0.0528, "step": 39592 }, { "epoch": 0.7061855670103093, "grad_norm": 0.2489694356918335, "learning_rate": 1.2034318855532095e-05, "loss": 0.1044, "step": 39593 }, { "epoch": 0.706203403132023, "grad_norm": 0.24624349176883698, "learning_rate": 1.203298806596575e-05, "loss": 0.0771, "step": 39594 }, { "epoch": 0.7062212392537367, "grad_norm": 0.27731943130493164, "learning_rate": 1.2031657326664014e-05, "loss": 0.1418, "step": 39595 }, { "epoch": 0.7062390753754504, "grad_norm": 0.3589498996734619, "learning_rate": 1.2030326637632061e-05, "loss": 0.1626, "step": 39596 }, { "epoch": 0.7062569114971641, "grad_norm": 0.42085835337638855, "learning_rate": 1.202899599887503e-05, "loss": 0.1956, "step": 39597 }, { "epoch": 0.7062747476188778, "grad_norm": 0.23246483504772186, "learning_rate": 1.2027665410398106e-05, "loss": 0.1078, "step": 39598 }, { "epoch": 0.7062925837405915, "grad_norm": 0.24317991733551025, "learning_rate": 1.2026334872206426e-05, "loss": 0.1329, "step": 39599 }, { "epoch": 0.7063104198623051, "grad_norm": 0.23957769572734833, "learning_rate": 1.2025004384305155e-05, "loss": 0.1369, "step": 39600 }, { "epoch": 0.7063282559840188, "grad_norm": 0.23834265768527985, "learning_rate": 1.2023673946699451e-05, "loss": 0.0935, "step": 39601 }, { "epoch": 0.7063460921057325, "grad_norm": 0.1824650913476944, "learning_rate": 1.2022343559394456e-05, "loss": 0.1157, "step": 39602 }, { "epoch": 0.7063639282274462, "grad_norm": 0.4296053647994995, "learning_rate": 1.202101322239535e-05, "loss": 0.1839, "step": 39603 }, { "epoch": 0.7063817643491599, "grad_norm": 0.2947930693626404, "learning_rate": 1.201968293570728e-05, "loss": 0.0864, "step": 39604 }, { "epoch": 0.7063996004708736, "grad_norm": 0.21025703847408295, "learning_rate": 1.20183526993354e-05, "loss": 0.127, "step": 39605 }, { "epoch": 0.7064174365925873, "grad_norm": 0.3835030496120453, "learning_rate": 1.2017022513284862e-05, "loss": 0.1938, "step": 39606 }, { "epoch": 0.706435272714301, "grad_norm": 0.39272934198379517, "learning_rate": 1.2015692377560836e-05, "loss": 0.1223, "step": 39607 }, { "epoch": 0.7064531088360146, "grad_norm": 0.2779324948787689, "learning_rate": 1.2014362292168475e-05, "loss": 0.1832, "step": 39608 }, { "epoch": 0.7064709449577284, "grad_norm": 0.29282549023628235, "learning_rate": 1.2013032257112917e-05, "loss": 0.0701, "step": 39609 }, { "epoch": 0.7064887810794421, "grad_norm": 0.2887338697910309, "learning_rate": 1.2011702272399334e-05, "loss": 0.0793, "step": 39610 }, { "epoch": 0.7065066172011558, "grad_norm": 0.26466473937034607, "learning_rate": 1.2010372338032885e-05, "loss": 0.1165, "step": 39611 }, { "epoch": 0.7065244533228695, "grad_norm": 0.3904156982898712, "learning_rate": 1.2009042454018724e-05, "loss": 0.1558, "step": 39612 }, { "epoch": 0.7065422894445832, "grad_norm": 0.26360198855400085, "learning_rate": 1.2007712620361999e-05, "loss": 0.1097, "step": 39613 }, { "epoch": 0.7065601255662969, "grad_norm": 0.21179459989070892, "learning_rate": 1.2006382837067867e-05, "loss": 0.0915, "step": 39614 }, { "epoch": 0.7065779616880106, "grad_norm": 0.23796197772026062, "learning_rate": 1.2005053104141475e-05, "loss": 0.117, "step": 39615 }, { "epoch": 0.7065957978097243, "grad_norm": 0.28057971596717834, "learning_rate": 1.2003723421587993e-05, "loss": 0.1409, "step": 39616 }, { "epoch": 0.706613633931438, "grad_norm": 0.3126096725463867, "learning_rate": 1.200239378941257e-05, "loss": 0.1275, "step": 39617 }, { "epoch": 0.7066314700531516, "grad_norm": 0.23418393731117249, "learning_rate": 1.2001064207620355e-05, "loss": 0.189, "step": 39618 }, { "epoch": 0.7066493061748653, "grad_norm": 0.2520259916782379, "learning_rate": 1.1999734676216498e-05, "loss": 0.1263, "step": 39619 }, { "epoch": 0.706667142296579, "grad_norm": 0.280765563249588, "learning_rate": 1.199840519520617e-05, "loss": 0.145, "step": 39620 }, { "epoch": 0.7066849784182927, "grad_norm": 0.2633095681667328, "learning_rate": 1.199707576459451e-05, "loss": 0.1255, "step": 39621 }, { "epoch": 0.7067028145400064, "grad_norm": 0.3299943208694458, "learning_rate": 1.1995746384386669e-05, "loss": 0.1497, "step": 39622 }, { "epoch": 0.7067206506617201, "grad_norm": 0.3245101273059845, "learning_rate": 1.1994417054587814e-05, "loss": 0.1527, "step": 39623 }, { "epoch": 0.7067384867834338, "grad_norm": 0.2601618766784668, "learning_rate": 1.1993087775203083e-05, "loss": 0.0991, "step": 39624 }, { "epoch": 0.7067563229051474, "grad_norm": 0.19852134585380554, "learning_rate": 1.1991758546237644e-05, "loss": 0.1041, "step": 39625 }, { "epoch": 0.7067741590268612, "grad_norm": 0.25954705476760864, "learning_rate": 1.1990429367696642e-05, "loss": 0.1074, "step": 39626 }, { "epoch": 0.7067919951485749, "grad_norm": 0.4318484663963318, "learning_rate": 1.198910023958523e-05, "loss": 0.0962, "step": 39627 }, { "epoch": 0.7068098312702886, "grad_norm": 0.264016717672348, "learning_rate": 1.198777116190855e-05, "loss": 0.1099, "step": 39628 }, { "epoch": 0.7068276673920023, "grad_norm": 0.2937481105327606, "learning_rate": 1.1986442134671772e-05, "loss": 0.1449, "step": 39629 }, { "epoch": 0.706845503513716, "grad_norm": 0.18546999990940094, "learning_rate": 1.198511315788004e-05, "loss": 0.1302, "step": 39630 }, { "epoch": 0.7068633396354297, "grad_norm": 0.2516763210296631, "learning_rate": 1.1983784231538502e-05, "loss": 0.1221, "step": 39631 }, { "epoch": 0.7068811757571434, "grad_norm": 0.19915767014026642, "learning_rate": 1.1982455355652305e-05, "loss": 0.105, "step": 39632 }, { "epoch": 0.7068990118788571, "grad_norm": 0.47600650787353516, "learning_rate": 1.1981126530226617e-05, "loss": 0.1699, "step": 39633 }, { "epoch": 0.7069168480005708, "grad_norm": 0.2385549247264862, "learning_rate": 1.1979797755266579e-05, "loss": 0.1292, "step": 39634 }, { "epoch": 0.7069346841222844, "grad_norm": 0.2580333948135376, "learning_rate": 1.197846903077734e-05, "loss": 0.1122, "step": 39635 }, { "epoch": 0.7069525202439981, "grad_norm": 0.24852822721004486, "learning_rate": 1.1977140356764044e-05, "loss": 0.1258, "step": 39636 }, { "epoch": 0.7069703563657118, "grad_norm": 0.3117390275001526, "learning_rate": 1.1975811733231851e-05, "loss": 0.1282, "step": 39637 }, { "epoch": 0.7069881924874255, "grad_norm": 0.23703408241271973, "learning_rate": 1.1974483160185918e-05, "loss": 0.1276, "step": 39638 }, { "epoch": 0.7070060286091392, "grad_norm": 0.2623916268348694, "learning_rate": 1.1973154637631386e-05, "loss": 0.1578, "step": 39639 }, { "epoch": 0.7070238647308529, "grad_norm": 0.21476981043815613, "learning_rate": 1.197182616557341e-05, "loss": 0.0866, "step": 39640 }, { "epoch": 0.7070417008525666, "grad_norm": 0.22601446509361267, "learning_rate": 1.1970497744017122e-05, "loss": 0.1166, "step": 39641 }, { "epoch": 0.7070595369742804, "grad_norm": 0.3203946650028229, "learning_rate": 1.1969169372967698e-05, "loss": 0.129, "step": 39642 }, { "epoch": 0.707077373095994, "grad_norm": 0.19800713658332825, "learning_rate": 1.1967841052430274e-05, "loss": 0.109, "step": 39643 }, { "epoch": 0.7070952092177077, "grad_norm": 0.24219807982444763, "learning_rate": 1.1966512782409998e-05, "loss": 0.0712, "step": 39644 }, { "epoch": 0.7071130453394214, "grad_norm": 0.23886223137378693, "learning_rate": 1.1965184562912008e-05, "loss": 0.1404, "step": 39645 }, { "epoch": 0.7071308814611351, "grad_norm": 0.29603704810142517, "learning_rate": 1.1963856393941478e-05, "loss": 0.1343, "step": 39646 }, { "epoch": 0.7071487175828488, "grad_norm": 0.23609548807144165, "learning_rate": 1.196252827550354e-05, "loss": 0.136, "step": 39647 }, { "epoch": 0.7071665537045625, "grad_norm": 0.27406346797943115, "learning_rate": 1.1961200207603349e-05, "loss": 0.141, "step": 39648 }, { "epoch": 0.7071843898262762, "grad_norm": 0.28046584129333496, "learning_rate": 1.1959872190246035e-05, "loss": 0.1485, "step": 39649 }, { "epoch": 0.7072022259479899, "grad_norm": 0.2155359834432602, "learning_rate": 1.1958544223436774e-05, "loss": 0.1326, "step": 39650 }, { "epoch": 0.7072200620697036, "grad_norm": 0.3944588303565979, "learning_rate": 1.1957216307180691e-05, "loss": 0.1379, "step": 39651 }, { "epoch": 0.7072378981914172, "grad_norm": 0.22116807103157043, "learning_rate": 1.195588844148295e-05, "loss": 0.1046, "step": 39652 }, { "epoch": 0.7072557343131309, "grad_norm": 0.1930345892906189, "learning_rate": 1.195456062634869e-05, "loss": 0.0922, "step": 39653 }, { "epoch": 0.7072735704348446, "grad_norm": 0.2768682837486267, "learning_rate": 1.195323286178305e-05, "loss": 0.1324, "step": 39654 }, { "epoch": 0.7072914065565583, "grad_norm": 0.7779185175895691, "learning_rate": 1.1951905147791195e-05, "loss": 0.1455, "step": 39655 }, { "epoch": 0.707309242678272, "grad_norm": 0.21959508955478668, "learning_rate": 1.1950577484378263e-05, "loss": 0.1162, "step": 39656 }, { "epoch": 0.7073270787999857, "grad_norm": 0.28785791993141174, "learning_rate": 1.1949249871549401e-05, "loss": 0.0646, "step": 39657 }, { "epoch": 0.7073449149216994, "grad_norm": 0.2897997200489044, "learning_rate": 1.1947922309309742e-05, "loss": 0.1517, "step": 39658 }, { "epoch": 0.7073627510434132, "grad_norm": 0.24086059629917145, "learning_rate": 1.1946594797664454e-05, "loss": 0.1447, "step": 39659 }, { "epoch": 0.7073805871651269, "grad_norm": 0.3726377487182617, "learning_rate": 1.1945267336618673e-05, "loss": 0.1547, "step": 39660 }, { "epoch": 0.7073984232868405, "grad_norm": 0.33043307065963745, "learning_rate": 1.1943939926177547e-05, "loss": 0.1545, "step": 39661 }, { "epoch": 0.7074162594085542, "grad_norm": 0.31618979573249817, "learning_rate": 1.194261256634621e-05, "loss": 0.1758, "step": 39662 }, { "epoch": 0.7074340955302679, "grad_norm": 0.26249876618385315, "learning_rate": 1.1941285257129822e-05, "loss": 0.082, "step": 39663 }, { "epoch": 0.7074519316519816, "grad_norm": 0.22928814589977264, "learning_rate": 1.1939957998533528e-05, "loss": 0.0806, "step": 39664 }, { "epoch": 0.7074697677736953, "grad_norm": 0.2218640297651291, "learning_rate": 1.1938630790562451e-05, "loss": 0.1145, "step": 39665 }, { "epoch": 0.707487603895409, "grad_norm": 0.2665131390094757, "learning_rate": 1.1937303633221768e-05, "loss": 0.0862, "step": 39666 }, { "epoch": 0.7075054400171227, "grad_norm": 0.30140218138694763, "learning_rate": 1.1935976526516596e-05, "loss": 0.102, "step": 39667 }, { "epoch": 0.7075232761388364, "grad_norm": 0.30023959279060364, "learning_rate": 1.19346494704521e-05, "loss": 0.1228, "step": 39668 }, { "epoch": 0.70754111226055, "grad_norm": 0.3467610776424408, "learning_rate": 1.1933322465033417e-05, "loss": 0.1695, "step": 39669 }, { "epoch": 0.7075589483822637, "grad_norm": 0.2699323892593384, "learning_rate": 1.193199551026569e-05, "loss": 0.1543, "step": 39670 }, { "epoch": 0.7075767845039774, "grad_norm": 0.1974875032901764, "learning_rate": 1.193066860615405e-05, "loss": 0.0803, "step": 39671 }, { "epoch": 0.7075946206256911, "grad_norm": 0.33543792366981506, "learning_rate": 1.1929341752703663e-05, "loss": 0.1515, "step": 39672 }, { "epoch": 0.7076124567474048, "grad_norm": 0.249459370970726, "learning_rate": 1.192801494991966e-05, "loss": 0.0951, "step": 39673 }, { "epoch": 0.7076302928691185, "grad_norm": 0.37445738911628723, "learning_rate": 1.192668819780719e-05, "loss": 0.1051, "step": 39674 }, { "epoch": 0.7076481289908322, "grad_norm": 0.2494664192199707, "learning_rate": 1.192536149637138e-05, "loss": 0.1093, "step": 39675 }, { "epoch": 0.707665965112546, "grad_norm": 0.33687901496887207, "learning_rate": 1.1924034845617394e-05, "loss": 0.1461, "step": 39676 }, { "epoch": 0.7076838012342597, "grad_norm": 0.30348601937294006, "learning_rate": 1.1922708245550366e-05, "loss": 0.1372, "step": 39677 }, { "epoch": 0.7077016373559734, "grad_norm": 0.3136875629425049, "learning_rate": 1.1921381696175426e-05, "loss": 0.1277, "step": 39678 }, { "epoch": 0.707719473477687, "grad_norm": 0.25206974148750305, "learning_rate": 1.1920055197497739e-05, "loss": 0.1149, "step": 39679 }, { "epoch": 0.7077373095994007, "grad_norm": 0.25457724928855896, "learning_rate": 1.1918728749522426e-05, "loss": 0.1241, "step": 39680 }, { "epoch": 0.7077551457211144, "grad_norm": 0.29711389541625977, "learning_rate": 1.1917402352254647e-05, "loss": 0.1822, "step": 39681 }, { "epoch": 0.7077729818428281, "grad_norm": 0.2780214548110962, "learning_rate": 1.1916076005699536e-05, "loss": 0.1076, "step": 39682 }, { "epoch": 0.7077908179645418, "grad_norm": 0.3048097789287567, "learning_rate": 1.1914749709862235e-05, "loss": 0.1427, "step": 39683 }, { "epoch": 0.7078086540862555, "grad_norm": 0.3513798415660858, "learning_rate": 1.1913423464747872e-05, "loss": 0.0911, "step": 39684 }, { "epoch": 0.7078264902079692, "grad_norm": 0.2893765866756439, "learning_rate": 1.1912097270361611e-05, "loss": 0.1, "step": 39685 }, { "epoch": 0.7078443263296829, "grad_norm": 0.22490662336349487, "learning_rate": 1.191077112670858e-05, "loss": 0.1265, "step": 39686 }, { "epoch": 0.7078621624513965, "grad_norm": 0.20559197664260864, "learning_rate": 1.190944503379392e-05, "loss": 0.0745, "step": 39687 }, { "epoch": 0.7078799985731102, "grad_norm": 0.2870340645313263, "learning_rate": 1.1908118991622765e-05, "loss": 0.1494, "step": 39688 }, { "epoch": 0.7078978346948239, "grad_norm": 0.3129982054233551, "learning_rate": 1.190679300020027e-05, "loss": 0.2129, "step": 39689 }, { "epoch": 0.7079156708165376, "grad_norm": 0.26966583728790283, "learning_rate": 1.1905467059531569e-05, "loss": 0.0832, "step": 39690 }, { "epoch": 0.7079335069382513, "grad_norm": 0.2385096698999405, "learning_rate": 1.1904141169621802e-05, "loss": 0.1534, "step": 39691 }, { "epoch": 0.707951343059965, "grad_norm": 0.26734763383865356, "learning_rate": 1.1902815330476094e-05, "loss": 0.1456, "step": 39692 }, { "epoch": 0.7079691791816788, "grad_norm": 0.3229265511035919, "learning_rate": 1.19014895420996e-05, "loss": 0.155, "step": 39693 }, { "epoch": 0.7079870153033925, "grad_norm": 0.23967275023460388, "learning_rate": 1.1900163804497464e-05, "loss": 0.1118, "step": 39694 }, { "epoch": 0.7080048514251062, "grad_norm": 0.20419557392597198, "learning_rate": 1.1898838117674819e-05, "loss": 0.1304, "step": 39695 }, { "epoch": 0.7080226875468199, "grad_norm": 0.27684780955314636, "learning_rate": 1.1897512481636802e-05, "loss": 0.1084, "step": 39696 }, { "epoch": 0.7080405236685335, "grad_norm": 0.27113083004951477, "learning_rate": 1.1896186896388542e-05, "loss": 0.1187, "step": 39697 }, { "epoch": 0.7080583597902472, "grad_norm": 0.19479693472385406, "learning_rate": 1.18948613619352e-05, "loss": 0.0853, "step": 39698 }, { "epoch": 0.7080761959119609, "grad_norm": 0.33110857009887695, "learning_rate": 1.1893535878281898e-05, "loss": 0.1238, "step": 39699 }, { "epoch": 0.7080940320336746, "grad_norm": 0.25000420212745667, "learning_rate": 1.189221044543378e-05, "loss": 0.1321, "step": 39700 }, { "epoch": 0.7081118681553883, "grad_norm": 0.34915584325790405, "learning_rate": 1.1890885063395971e-05, "loss": 0.0949, "step": 39701 }, { "epoch": 0.708129704277102, "grad_norm": 0.3054065704345703, "learning_rate": 1.188955973217363e-05, "loss": 0.1013, "step": 39702 }, { "epoch": 0.7081475403988157, "grad_norm": 0.24145790934562683, "learning_rate": 1.1888234451771882e-05, "loss": 0.0958, "step": 39703 }, { "epoch": 0.7081653765205294, "grad_norm": 0.4281059205532074, "learning_rate": 1.1886909222195866e-05, "loss": 0.1507, "step": 39704 }, { "epoch": 0.708183212642243, "grad_norm": 0.31464439630508423, "learning_rate": 1.1885584043450711e-05, "loss": 0.145, "step": 39705 }, { "epoch": 0.7082010487639567, "grad_norm": 0.4476488530635834, "learning_rate": 1.1884258915541571e-05, "loss": 0.1952, "step": 39706 }, { "epoch": 0.7082188848856704, "grad_norm": 0.24803683161735535, "learning_rate": 1.1882933838473562e-05, "loss": 0.1896, "step": 39707 }, { "epoch": 0.7082367210073841, "grad_norm": 0.2919997572898865, "learning_rate": 1.1881608812251843e-05, "loss": 0.0733, "step": 39708 }, { "epoch": 0.7082545571290978, "grad_norm": 0.28765708208084106, "learning_rate": 1.188028383688154e-05, "loss": 0.1271, "step": 39709 }, { "epoch": 0.7082723932508116, "grad_norm": 0.24165499210357666, "learning_rate": 1.1878958912367778e-05, "loss": 0.1478, "step": 39710 }, { "epoch": 0.7082902293725253, "grad_norm": 0.27377569675445557, "learning_rate": 1.1877634038715712e-05, "loss": 0.0797, "step": 39711 }, { "epoch": 0.708308065494239, "grad_norm": 0.2816867232322693, "learning_rate": 1.187630921593047e-05, "loss": 0.0988, "step": 39712 }, { "epoch": 0.7083259016159527, "grad_norm": 0.2299519032239914, "learning_rate": 1.1874984444017183e-05, "loss": 0.0867, "step": 39713 }, { "epoch": 0.7083437377376663, "grad_norm": 0.3553406894207001, "learning_rate": 1.1873659722980985e-05, "loss": 0.1144, "step": 39714 }, { "epoch": 0.70836157385938, "grad_norm": 0.29719212651252747, "learning_rate": 1.1872335052827021e-05, "loss": 0.1036, "step": 39715 }, { "epoch": 0.7083794099810937, "grad_norm": 0.3521648347377777, "learning_rate": 1.1871010433560422e-05, "loss": 0.0983, "step": 39716 }, { "epoch": 0.7083972461028074, "grad_norm": 0.27065762877464294, "learning_rate": 1.1869685865186322e-05, "loss": 0.0778, "step": 39717 }, { "epoch": 0.7084150822245211, "grad_norm": 0.23846198618412018, "learning_rate": 1.1868361347709843e-05, "loss": 0.1031, "step": 39718 }, { "epoch": 0.7084329183462348, "grad_norm": 0.28253084421157837, "learning_rate": 1.1867036881136142e-05, "loss": 0.1451, "step": 39719 }, { "epoch": 0.7084507544679485, "grad_norm": 0.2843198776245117, "learning_rate": 1.1865712465470336e-05, "loss": 0.1103, "step": 39720 }, { "epoch": 0.7084685905896622, "grad_norm": 0.3050706088542938, "learning_rate": 1.1864388100717569e-05, "loss": 0.0992, "step": 39721 }, { "epoch": 0.7084864267113758, "grad_norm": 0.30785882472991943, "learning_rate": 1.1863063786882971e-05, "loss": 0.1558, "step": 39722 }, { "epoch": 0.7085042628330895, "grad_norm": 0.20489034056663513, "learning_rate": 1.1861739523971668e-05, "loss": 0.1242, "step": 39723 }, { "epoch": 0.7085220989548032, "grad_norm": 0.30929896235466003, "learning_rate": 1.186041531198881e-05, "loss": 0.2091, "step": 39724 }, { "epoch": 0.7085399350765169, "grad_norm": 0.26772361993789673, "learning_rate": 1.185909115093952e-05, "loss": 0.0658, "step": 39725 }, { "epoch": 0.7085577711982306, "grad_norm": 0.2231772392988205, "learning_rate": 1.185776704082893e-05, "loss": 0.0826, "step": 39726 }, { "epoch": 0.7085756073199444, "grad_norm": 0.2768242657184601, "learning_rate": 1.1856442981662167e-05, "loss": 0.1506, "step": 39727 }, { "epoch": 0.7085934434416581, "grad_norm": 0.2602427005767822, "learning_rate": 1.1855118973444377e-05, "loss": 0.1084, "step": 39728 }, { "epoch": 0.7086112795633718, "grad_norm": 0.2349170595407486, "learning_rate": 1.1853795016180689e-05, "loss": 0.1455, "step": 39729 }, { "epoch": 0.7086291156850855, "grad_norm": 0.29324156045913696, "learning_rate": 1.1852471109876229e-05, "loss": 0.0987, "step": 39730 }, { "epoch": 0.7086469518067992, "grad_norm": 0.26818355917930603, "learning_rate": 1.1851147254536124e-05, "loss": 0.1432, "step": 39731 }, { "epoch": 0.7086647879285128, "grad_norm": 0.25412893295288086, "learning_rate": 1.1849823450165524e-05, "loss": 0.1282, "step": 39732 }, { "epoch": 0.7086826240502265, "grad_norm": 0.2679944932460785, "learning_rate": 1.1848499696769549e-05, "loss": 0.1136, "step": 39733 }, { "epoch": 0.7087004601719402, "grad_norm": 0.2822827994823456, "learning_rate": 1.1847175994353324e-05, "loss": 0.1348, "step": 39734 }, { "epoch": 0.7087182962936539, "grad_norm": 0.24108313024044037, "learning_rate": 1.1845852342921995e-05, "loss": 0.1556, "step": 39735 }, { "epoch": 0.7087361324153676, "grad_norm": 0.24654445052146912, "learning_rate": 1.1844528742480677e-05, "loss": 0.0782, "step": 39736 }, { "epoch": 0.7087539685370813, "grad_norm": 0.2935083210468292, "learning_rate": 1.184320519303452e-05, "loss": 0.1154, "step": 39737 }, { "epoch": 0.708771804658795, "grad_norm": 0.27358415722846985, "learning_rate": 1.1841881694588642e-05, "loss": 0.1145, "step": 39738 }, { "epoch": 0.7087896407805087, "grad_norm": 0.2414022982120514, "learning_rate": 1.1840558247148176e-05, "loss": 0.0737, "step": 39739 }, { "epoch": 0.7088074769022223, "grad_norm": 0.3104711174964905, "learning_rate": 1.1839234850718242e-05, "loss": 0.108, "step": 39740 }, { "epoch": 0.708825313023936, "grad_norm": 0.3439747095108032, "learning_rate": 1.1837911505303989e-05, "loss": 0.1208, "step": 39741 }, { "epoch": 0.7088431491456497, "grad_norm": 0.2752928137779236, "learning_rate": 1.1836588210910535e-05, "loss": 0.1049, "step": 39742 }, { "epoch": 0.7088609852673635, "grad_norm": 0.24843448400497437, "learning_rate": 1.1835264967543013e-05, "loss": 0.0936, "step": 39743 }, { "epoch": 0.7088788213890772, "grad_norm": 0.3079076409339905, "learning_rate": 1.1833941775206541e-05, "loss": 0.1277, "step": 39744 }, { "epoch": 0.7088966575107909, "grad_norm": 0.24981015920639038, "learning_rate": 1.183261863390627e-05, "loss": 0.1374, "step": 39745 }, { "epoch": 0.7089144936325046, "grad_norm": 0.25197288393974304, "learning_rate": 1.1831295543647317e-05, "loss": 0.1414, "step": 39746 }, { "epoch": 0.7089323297542183, "grad_norm": 0.29533651471138, "learning_rate": 1.1829972504434797e-05, "loss": 0.1064, "step": 39747 }, { "epoch": 0.708950165875932, "grad_norm": 0.2694208025932312, "learning_rate": 1.1828649516273865e-05, "loss": 0.093, "step": 39748 }, { "epoch": 0.7089680019976456, "grad_norm": 0.23937560617923737, "learning_rate": 1.1827326579169629e-05, "loss": 0.1072, "step": 39749 }, { "epoch": 0.7089858381193593, "grad_norm": 0.2120543271303177, "learning_rate": 1.182600369312723e-05, "loss": 0.1117, "step": 39750 }, { "epoch": 0.709003674241073, "grad_norm": 0.2478790581226349, "learning_rate": 1.1824680858151794e-05, "loss": 0.1495, "step": 39751 }, { "epoch": 0.7090215103627867, "grad_norm": 0.3088122308254242, "learning_rate": 1.1823358074248444e-05, "loss": 0.1022, "step": 39752 }, { "epoch": 0.7090393464845004, "grad_norm": 0.35148707032203674, "learning_rate": 1.18220353414223e-05, "loss": 0.1098, "step": 39753 }, { "epoch": 0.7090571826062141, "grad_norm": 0.24480833113193512, "learning_rate": 1.1820712659678507e-05, "loss": 0.0812, "step": 39754 }, { "epoch": 0.7090750187279278, "grad_norm": 0.37409868836402893, "learning_rate": 1.1819390029022186e-05, "loss": 0.1252, "step": 39755 }, { "epoch": 0.7090928548496415, "grad_norm": 0.24978578090667725, "learning_rate": 1.1818067449458461e-05, "loss": 0.1082, "step": 39756 }, { "epoch": 0.7091106909713552, "grad_norm": 0.26101842522621155, "learning_rate": 1.1816744920992448e-05, "loss": 0.0933, "step": 39757 }, { "epoch": 0.7091285270930688, "grad_norm": 0.28741735219955444, "learning_rate": 1.1815422443629299e-05, "loss": 0.1096, "step": 39758 }, { "epoch": 0.7091463632147825, "grad_norm": 0.309205561876297, "learning_rate": 1.1814100017374122e-05, "loss": 0.1131, "step": 39759 }, { "epoch": 0.7091641993364963, "grad_norm": 0.3159785568714142, "learning_rate": 1.1812777642232048e-05, "loss": 0.1239, "step": 39760 }, { "epoch": 0.70918203545821, "grad_norm": 0.1896720677614212, "learning_rate": 1.1811455318208195e-05, "loss": 0.0665, "step": 39761 }, { "epoch": 0.7091998715799237, "grad_norm": 0.2209632843732834, "learning_rate": 1.1810133045307705e-05, "loss": 0.1297, "step": 39762 }, { "epoch": 0.7092177077016374, "grad_norm": 0.22897344827651978, "learning_rate": 1.1808810823535684e-05, "loss": 0.0881, "step": 39763 }, { "epoch": 0.7092355438233511, "grad_norm": 0.37251970171928406, "learning_rate": 1.180748865289728e-05, "loss": 0.1761, "step": 39764 }, { "epoch": 0.7092533799450648, "grad_norm": 0.37440046668052673, "learning_rate": 1.1806166533397605e-05, "loss": 0.128, "step": 39765 }, { "epoch": 0.7092712160667785, "grad_norm": 0.3115336298942566, "learning_rate": 1.1804844465041779e-05, "loss": 0.1063, "step": 39766 }, { "epoch": 0.7092890521884921, "grad_norm": 0.26212194561958313, "learning_rate": 1.1803522447834942e-05, "loss": 0.0902, "step": 39767 }, { "epoch": 0.7093068883102058, "grad_norm": 0.3410329222679138, "learning_rate": 1.1802200481782208e-05, "loss": 0.1235, "step": 39768 }, { "epoch": 0.7093247244319195, "grad_norm": 0.2521723210811615, "learning_rate": 1.1800878566888705e-05, "loss": 0.1204, "step": 39769 }, { "epoch": 0.7093425605536332, "grad_norm": 0.19752517342567444, "learning_rate": 1.1799556703159542e-05, "loss": 0.0982, "step": 39770 }, { "epoch": 0.7093603966753469, "grad_norm": 0.2872680425643921, "learning_rate": 1.1798234890599868e-05, "loss": 0.1722, "step": 39771 }, { "epoch": 0.7093782327970606, "grad_norm": 0.29151684045791626, "learning_rate": 1.1796913129214798e-05, "loss": 0.1192, "step": 39772 }, { "epoch": 0.7093960689187743, "grad_norm": 0.25677651166915894, "learning_rate": 1.179559141900945e-05, "loss": 0.1354, "step": 39773 }, { "epoch": 0.709413905040488, "grad_norm": 0.31532707810401917, "learning_rate": 1.1794269759988943e-05, "loss": 0.0877, "step": 39774 }, { "epoch": 0.7094317411622016, "grad_norm": 0.2908805012702942, "learning_rate": 1.1792948152158417e-05, "loss": 0.1285, "step": 39775 }, { "epoch": 0.7094495772839153, "grad_norm": 0.26030611991882324, "learning_rate": 1.1791626595522973e-05, "loss": 0.175, "step": 39776 }, { "epoch": 0.7094674134056291, "grad_norm": 0.2896346151828766, "learning_rate": 1.1790305090087758e-05, "loss": 0.1509, "step": 39777 }, { "epoch": 0.7094852495273428, "grad_norm": 0.25536879897117615, "learning_rate": 1.1788983635857884e-05, "loss": 0.0908, "step": 39778 }, { "epoch": 0.7095030856490565, "grad_norm": 0.23476728796958923, "learning_rate": 1.1787662232838461e-05, "loss": 0.1214, "step": 39779 }, { "epoch": 0.7095209217707702, "grad_norm": 0.23477321863174438, "learning_rate": 1.1786340881034632e-05, "loss": 0.1035, "step": 39780 }, { "epoch": 0.7095387578924839, "grad_norm": 0.22442224621772766, "learning_rate": 1.1785019580451511e-05, "loss": 0.1068, "step": 39781 }, { "epoch": 0.7095565940141976, "grad_norm": 0.3498179316520691, "learning_rate": 1.1783698331094217e-05, "loss": 0.1358, "step": 39782 }, { "epoch": 0.7095744301359113, "grad_norm": 0.2949180006980896, "learning_rate": 1.1782377132967865e-05, "loss": 0.1224, "step": 39783 }, { "epoch": 0.709592266257625, "grad_norm": 0.25757506489753723, "learning_rate": 1.1781055986077593e-05, "loss": 0.0951, "step": 39784 }, { "epoch": 0.7096101023793386, "grad_norm": 0.28989556431770325, "learning_rate": 1.1779734890428515e-05, "loss": 0.1323, "step": 39785 }, { "epoch": 0.7096279385010523, "grad_norm": 0.4818069338798523, "learning_rate": 1.1778413846025748e-05, "loss": 0.162, "step": 39786 }, { "epoch": 0.709645774622766, "grad_norm": 0.2923320531845093, "learning_rate": 1.177709285287442e-05, "loss": 0.1014, "step": 39787 }, { "epoch": 0.7096636107444797, "grad_norm": 0.32751449942588806, "learning_rate": 1.1775771910979633e-05, "loss": 0.1181, "step": 39788 }, { "epoch": 0.7096814468661934, "grad_norm": 0.2230789214372635, "learning_rate": 1.1774451020346532e-05, "loss": 0.0865, "step": 39789 }, { "epoch": 0.7096992829879071, "grad_norm": 0.23723924160003662, "learning_rate": 1.1773130180980218e-05, "loss": 0.1024, "step": 39790 }, { "epoch": 0.7097171191096208, "grad_norm": 0.251668781042099, "learning_rate": 1.177180939288583e-05, "loss": 0.0987, "step": 39791 }, { "epoch": 0.7097349552313345, "grad_norm": 0.279910147190094, "learning_rate": 1.1770488656068469e-05, "loss": 0.1595, "step": 39792 }, { "epoch": 0.7097527913530481, "grad_norm": 0.19495919346809387, "learning_rate": 1.176916797053327e-05, "loss": 0.0992, "step": 39793 }, { "epoch": 0.7097706274747619, "grad_norm": 0.1858009248971939, "learning_rate": 1.1767847336285348e-05, "loss": 0.0774, "step": 39794 }, { "epoch": 0.7097884635964756, "grad_norm": 0.21037927269935608, "learning_rate": 1.1766526753329818e-05, "loss": 0.1213, "step": 39795 }, { "epoch": 0.7098062997181893, "grad_norm": 0.24584011733531952, "learning_rate": 1.1765206221671792e-05, "loss": 0.0834, "step": 39796 }, { "epoch": 0.709824135839903, "grad_norm": 0.24783062934875488, "learning_rate": 1.176388574131641e-05, "loss": 0.1438, "step": 39797 }, { "epoch": 0.7098419719616167, "grad_norm": 0.8838186264038086, "learning_rate": 1.1762565312268775e-05, "loss": 0.0986, "step": 39798 }, { "epoch": 0.7098598080833304, "grad_norm": 0.23214347660541534, "learning_rate": 1.1761244934534011e-05, "loss": 0.1209, "step": 39799 }, { "epoch": 0.7098776442050441, "grad_norm": 0.30486711859703064, "learning_rate": 1.1759924608117235e-05, "loss": 0.0851, "step": 39800 }, { "epoch": 0.7098954803267578, "grad_norm": 0.3394198715686798, "learning_rate": 1.1758604333023552e-05, "loss": 0.1192, "step": 39801 }, { "epoch": 0.7099133164484714, "grad_norm": 0.2624277174472809, "learning_rate": 1.1757284109258102e-05, "loss": 0.1015, "step": 39802 }, { "epoch": 0.7099311525701851, "grad_norm": 0.3305242657661438, "learning_rate": 1.1755963936825984e-05, "loss": 0.1544, "step": 39803 }, { "epoch": 0.7099489886918988, "grad_norm": 0.30331698060035706, "learning_rate": 1.1754643815732336e-05, "loss": 0.1486, "step": 39804 }, { "epoch": 0.7099668248136125, "grad_norm": 0.5515194535255432, "learning_rate": 1.175332374598225e-05, "loss": 0.1323, "step": 39805 }, { "epoch": 0.7099846609353262, "grad_norm": 0.3562612235546112, "learning_rate": 1.1752003727580868e-05, "loss": 0.1481, "step": 39806 }, { "epoch": 0.7100024970570399, "grad_norm": 0.25684693455696106, "learning_rate": 1.1750683760533293e-05, "loss": 0.1298, "step": 39807 }, { "epoch": 0.7100203331787536, "grad_norm": 0.2579234838485718, "learning_rate": 1.1749363844844646e-05, "loss": 0.0977, "step": 39808 }, { "epoch": 0.7100381693004673, "grad_norm": 0.20638930797576904, "learning_rate": 1.1748043980520032e-05, "loss": 0.1049, "step": 39809 }, { "epoch": 0.710056005422181, "grad_norm": 0.22392813861370087, "learning_rate": 1.1746724167564585e-05, "loss": 0.1192, "step": 39810 }, { "epoch": 0.7100738415438947, "grad_norm": 0.2495158463716507, "learning_rate": 1.1745404405983412e-05, "loss": 0.1034, "step": 39811 }, { "epoch": 0.7100916776656084, "grad_norm": 0.2554972767829895, "learning_rate": 1.1744084695781633e-05, "loss": 0.0995, "step": 39812 }, { "epoch": 0.7101095137873221, "grad_norm": 0.20457206666469574, "learning_rate": 1.1742765036964357e-05, "loss": 0.0971, "step": 39813 }, { "epoch": 0.7101273499090358, "grad_norm": 0.24943438172340393, "learning_rate": 1.1741445429536693e-05, "loss": 0.0921, "step": 39814 }, { "epoch": 0.7101451860307495, "grad_norm": 0.31734800338745117, "learning_rate": 1.1740125873503777e-05, "loss": 0.0791, "step": 39815 }, { "epoch": 0.7101630221524632, "grad_norm": 0.2341710925102234, "learning_rate": 1.173880636887071e-05, "loss": 0.0865, "step": 39816 }, { "epoch": 0.7101808582741769, "grad_norm": 0.27869102358818054, "learning_rate": 1.1737486915642603e-05, "loss": 0.1206, "step": 39817 }, { "epoch": 0.7101986943958906, "grad_norm": 0.35360386967658997, "learning_rate": 1.1736167513824578e-05, "loss": 0.1438, "step": 39818 }, { "epoch": 0.7102165305176042, "grad_norm": 0.2731833755970001, "learning_rate": 1.1734848163421757e-05, "loss": 0.102, "step": 39819 }, { "epoch": 0.7102343666393179, "grad_norm": 0.27589675784111023, "learning_rate": 1.1733528864439248e-05, "loss": 0.1358, "step": 39820 }, { "epoch": 0.7102522027610316, "grad_norm": 0.3466830551624298, "learning_rate": 1.1732209616882161e-05, "loss": 0.1429, "step": 39821 }, { "epoch": 0.7102700388827453, "grad_norm": 0.26109829545021057, "learning_rate": 1.1730890420755603e-05, "loss": 0.1045, "step": 39822 }, { "epoch": 0.710287875004459, "grad_norm": 0.19484354555606842, "learning_rate": 1.1729571276064708e-05, "loss": 0.1031, "step": 39823 }, { "epoch": 0.7103057111261727, "grad_norm": 0.24178259074687958, "learning_rate": 1.1728252182814575e-05, "loss": 0.1298, "step": 39824 }, { "epoch": 0.7103235472478864, "grad_norm": 0.2804674506187439, "learning_rate": 1.1726933141010325e-05, "loss": 0.08, "step": 39825 }, { "epoch": 0.7103413833696001, "grad_norm": 0.22235791385173798, "learning_rate": 1.1725614150657061e-05, "loss": 0.1146, "step": 39826 }, { "epoch": 0.7103592194913138, "grad_norm": 0.21689453721046448, "learning_rate": 1.1724295211759896e-05, "loss": 0.0964, "step": 39827 }, { "epoch": 0.7103770556130276, "grad_norm": 0.24945567548274994, "learning_rate": 1.1722976324323956e-05, "loss": 0.0886, "step": 39828 }, { "epoch": 0.7103948917347412, "grad_norm": 0.281645268201828, "learning_rate": 1.1721657488354346e-05, "loss": 0.156, "step": 39829 }, { "epoch": 0.7104127278564549, "grad_norm": 0.2604235112667084, "learning_rate": 1.1720338703856169e-05, "loss": 0.1375, "step": 39830 }, { "epoch": 0.7104305639781686, "grad_norm": 0.2349165380001068, "learning_rate": 1.1719019970834552e-05, "loss": 0.0829, "step": 39831 }, { "epoch": 0.7104484000998823, "grad_norm": 0.3044160306453705, "learning_rate": 1.1717701289294593e-05, "loss": 0.104, "step": 39832 }, { "epoch": 0.710466236221596, "grad_norm": 0.288135826587677, "learning_rate": 1.171638265924142e-05, "loss": 0.116, "step": 39833 }, { "epoch": 0.7104840723433097, "grad_norm": 0.33671990036964417, "learning_rate": 1.1715064080680138e-05, "loss": 0.1006, "step": 39834 }, { "epoch": 0.7105019084650234, "grad_norm": 0.3152539134025574, "learning_rate": 1.1713745553615846e-05, "loss": 0.1163, "step": 39835 }, { "epoch": 0.7105197445867371, "grad_norm": 0.3200022578239441, "learning_rate": 1.1712427078053675e-05, "loss": 0.1194, "step": 39836 }, { "epoch": 0.7105375807084507, "grad_norm": 0.202154278755188, "learning_rate": 1.1711108653998725e-05, "loss": 0.1031, "step": 39837 }, { "epoch": 0.7105554168301644, "grad_norm": 0.2518230080604553, "learning_rate": 1.170979028145611e-05, "loss": 0.1014, "step": 39838 }, { "epoch": 0.7105732529518781, "grad_norm": 0.32290545105934143, "learning_rate": 1.1708471960430934e-05, "loss": 0.1175, "step": 39839 }, { "epoch": 0.7105910890735918, "grad_norm": 0.3526301085948944, "learning_rate": 1.1707153690928304e-05, "loss": 0.1249, "step": 39840 }, { "epoch": 0.7106089251953055, "grad_norm": 0.18469084799289703, "learning_rate": 1.1705835472953346e-05, "loss": 0.088, "step": 39841 }, { "epoch": 0.7106267613170192, "grad_norm": 0.23163259029388428, "learning_rate": 1.1704517306511165e-05, "loss": 0.1381, "step": 39842 }, { "epoch": 0.7106445974387329, "grad_norm": 0.20498667657375336, "learning_rate": 1.1703199191606865e-05, "loss": 0.1183, "step": 39843 }, { "epoch": 0.7106624335604467, "grad_norm": 0.2499837428331375, "learning_rate": 1.1701881128245545e-05, "loss": 0.137, "step": 39844 }, { "epoch": 0.7106802696821604, "grad_norm": 0.3577694296836853, "learning_rate": 1.1700563116432339e-05, "loss": 0.1609, "step": 39845 }, { "epoch": 0.710698105803874, "grad_norm": 0.388089656829834, "learning_rate": 1.1699245156172336e-05, "loss": 0.1371, "step": 39846 }, { "epoch": 0.7107159419255877, "grad_norm": 0.23920665681362152, "learning_rate": 1.1697927247470661e-05, "loss": 0.1068, "step": 39847 }, { "epoch": 0.7107337780473014, "grad_norm": 0.27356773614883423, "learning_rate": 1.1696609390332403e-05, "loss": 0.1641, "step": 39848 }, { "epoch": 0.7107516141690151, "grad_norm": 0.2568212151527405, "learning_rate": 1.1695291584762693e-05, "loss": 0.1534, "step": 39849 }, { "epoch": 0.7107694502907288, "grad_norm": 0.3869524896144867, "learning_rate": 1.1693973830766628e-05, "loss": 0.1894, "step": 39850 }, { "epoch": 0.7107872864124425, "grad_norm": 0.27857154607772827, "learning_rate": 1.1692656128349316e-05, "loss": 0.1019, "step": 39851 }, { "epoch": 0.7108051225341562, "grad_norm": 0.28598251938819885, "learning_rate": 1.1691338477515864e-05, "loss": 0.1058, "step": 39852 }, { "epoch": 0.7108229586558699, "grad_norm": 0.27139079570770264, "learning_rate": 1.1690020878271371e-05, "loss": 0.1023, "step": 39853 }, { "epoch": 0.7108407947775836, "grad_norm": 0.3517839312553406, "learning_rate": 1.1688703330620965e-05, "loss": 0.1114, "step": 39854 }, { "epoch": 0.7108586308992972, "grad_norm": 0.3066236674785614, "learning_rate": 1.1687385834569745e-05, "loss": 0.1831, "step": 39855 }, { "epoch": 0.7108764670210109, "grad_norm": 0.28191259503364563, "learning_rate": 1.1686068390122812e-05, "loss": 0.1158, "step": 39856 }, { "epoch": 0.7108943031427246, "grad_norm": 0.27088141441345215, "learning_rate": 1.1684750997285267e-05, "loss": 0.1334, "step": 39857 }, { "epoch": 0.7109121392644383, "grad_norm": 0.27961984276771545, "learning_rate": 1.1683433656062237e-05, "loss": 0.0987, "step": 39858 }, { "epoch": 0.710929975386152, "grad_norm": 0.3439388573169708, "learning_rate": 1.1682116366458806e-05, "loss": 0.0858, "step": 39859 }, { "epoch": 0.7109478115078657, "grad_norm": 0.24691370129585266, "learning_rate": 1.1680799128480103e-05, "loss": 0.1199, "step": 39860 }, { "epoch": 0.7109656476295795, "grad_norm": 0.280831515789032, "learning_rate": 1.1679481942131212e-05, "loss": 0.145, "step": 39861 }, { "epoch": 0.7109834837512932, "grad_norm": 0.15910178422927856, "learning_rate": 1.1678164807417261e-05, "loss": 0.0848, "step": 39862 }, { "epoch": 0.7110013198730069, "grad_norm": 0.2672363519668579, "learning_rate": 1.1676847724343345e-05, "loss": 0.1092, "step": 39863 }, { "epoch": 0.7110191559947205, "grad_norm": 0.26946932077407837, "learning_rate": 1.1675530692914566e-05, "loss": 0.132, "step": 39864 }, { "epoch": 0.7110369921164342, "grad_norm": 0.26551803946495056, "learning_rate": 1.1674213713136034e-05, "loss": 0.0819, "step": 39865 }, { "epoch": 0.7110548282381479, "grad_norm": 0.2769637703895569, "learning_rate": 1.1672896785012843e-05, "loss": 0.1223, "step": 39866 }, { "epoch": 0.7110726643598616, "grad_norm": 0.2650698721408844, "learning_rate": 1.1671579908550117e-05, "loss": 0.1504, "step": 39867 }, { "epoch": 0.7110905004815753, "grad_norm": 0.2823314964771271, "learning_rate": 1.167026308375295e-05, "loss": 0.1384, "step": 39868 }, { "epoch": 0.711108336603289, "grad_norm": 0.2580188512802124, "learning_rate": 1.1668946310626447e-05, "loss": 0.1126, "step": 39869 }, { "epoch": 0.7111261727250027, "grad_norm": 0.28970351815223694, "learning_rate": 1.1667629589175702e-05, "loss": 0.0927, "step": 39870 }, { "epoch": 0.7111440088467164, "grad_norm": 0.33309268951416016, "learning_rate": 1.1666312919405841e-05, "loss": 0.0797, "step": 39871 }, { "epoch": 0.71116184496843, "grad_norm": 0.25777631998062134, "learning_rate": 1.1664996301321957e-05, "loss": 0.1216, "step": 39872 }, { "epoch": 0.7111796810901437, "grad_norm": 0.22438335418701172, "learning_rate": 1.1663679734929139e-05, "loss": 0.1171, "step": 39873 }, { "epoch": 0.7111975172118574, "grad_norm": 0.22949950397014618, "learning_rate": 1.166236322023251e-05, "loss": 0.114, "step": 39874 }, { "epoch": 0.7112153533335711, "grad_norm": 0.2608383595943451, "learning_rate": 1.1661046757237173e-05, "loss": 0.1599, "step": 39875 }, { "epoch": 0.7112331894552848, "grad_norm": 0.21117225289344788, "learning_rate": 1.165973034594823e-05, "loss": 0.1242, "step": 39876 }, { "epoch": 0.7112510255769985, "grad_norm": 0.3097367286682129, "learning_rate": 1.1658413986370777e-05, "loss": 0.1309, "step": 39877 }, { "epoch": 0.7112688616987123, "grad_norm": 0.294981449842453, "learning_rate": 1.165709767850992e-05, "loss": 0.1319, "step": 39878 }, { "epoch": 0.711286697820426, "grad_norm": 0.26491573452949524, "learning_rate": 1.165578142237075e-05, "loss": 0.1454, "step": 39879 }, { "epoch": 0.7113045339421397, "grad_norm": 0.23283720016479492, "learning_rate": 1.1654465217958394e-05, "loss": 0.1231, "step": 39880 }, { "epoch": 0.7113223700638533, "grad_norm": 0.19688691198825836, "learning_rate": 1.1653149065277935e-05, "loss": 0.1029, "step": 39881 }, { "epoch": 0.711340206185567, "grad_norm": 0.25711968541145325, "learning_rate": 1.1651832964334483e-05, "loss": 0.1094, "step": 39882 }, { "epoch": 0.7113580423072807, "grad_norm": 0.24089841544628143, "learning_rate": 1.1650516915133127e-05, "loss": 0.113, "step": 39883 }, { "epoch": 0.7113758784289944, "grad_norm": 0.30157706141471863, "learning_rate": 1.1649200917678987e-05, "loss": 0.1229, "step": 39884 }, { "epoch": 0.7113937145507081, "grad_norm": 0.21007725596427917, "learning_rate": 1.1647884971977158e-05, "loss": 0.1437, "step": 39885 }, { "epoch": 0.7114115506724218, "grad_norm": 0.18124797940254211, "learning_rate": 1.1646569078032727e-05, "loss": 0.1455, "step": 39886 }, { "epoch": 0.7114293867941355, "grad_norm": 0.33128172159194946, "learning_rate": 1.1645253235850815e-05, "loss": 0.1075, "step": 39887 }, { "epoch": 0.7114472229158492, "grad_norm": 0.23498903214931488, "learning_rate": 1.1643937445436506e-05, "loss": 0.0759, "step": 39888 }, { "epoch": 0.7114650590375629, "grad_norm": 0.2730211615562439, "learning_rate": 1.1642621706794915e-05, "loss": 0.1404, "step": 39889 }, { "epoch": 0.7114828951592765, "grad_norm": 0.28579044342041016, "learning_rate": 1.1641306019931139e-05, "loss": 0.0753, "step": 39890 }, { "epoch": 0.7115007312809902, "grad_norm": 0.28043386340141296, "learning_rate": 1.1639990384850275e-05, "loss": 0.1019, "step": 39891 }, { "epoch": 0.7115185674027039, "grad_norm": 0.3845950663089752, "learning_rate": 1.1638674801557412e-05, "loss": 0.1637, "step": 39892 }, { "epoch": 0.7115364035244176, "grad_norm": 0.20041073858737946, "learning_rate": 1.1637359270057669e-05, "loss": 0.0995, "step": 39893 }, { "epoch": 0.7115542396461313, "grad_norm": 0.23606139421463013, "learning_rate": 1.1636043790356137e-05, "loss": 0.1469, "step": 39894 }, { "epoch": 0.7115720757678451, "grad_norm": 0.2565153241157532, "learning_rate": 1.1634728362457916e-05, "loss": 0.0997, "step": 39895 }, { "epoch": 0.7115899118895588, "grad_norm": 0.16047993302345276, "learning_rate": 1.1633412986368094e-05, "loss": 0.082, "step": 39896 }, { "epoch": 0.7116077480112725, "grad_norm": 0.27130910754203796, "learning_rate": 1.163209766209179e-05, "loss": 0.1164, "step": 39897 }, { "epoch": 0.7116255841329862, "grad_norm": 0.2683762311935425, "learning_rate": 1.1630782389634093e-05, "loss": 0.1208, "step": 39898 }, { "epoch": 0.7116434202546998, "grad_norm": 0.2111593335866928, "learning_rate": 1.1629467169000099e-05, "loss": 0.1147, "step": 39899 }, { "epoch": 0.7116612563764135, "grad_norm": 0.32923048734664917, "learning_rate": 1.1628152000194901e-05, "loss": 0.1633, "step": 39900 }, { "epoch": 0.7116790924981272, "grad_norm": 0.2416684925556183, "learning_rate": 1.1626836883223604e-05, "loss": 0.1471, "step": 39901 }, { "epoch": 0.7116969286198409, "grad_norm": 0.33641648292541504, "learning_rate": 1.1625521818091315e-05, "loss": 0.1562, "step": 39902 }, { "epoch": 0.7117147647415546, "grad_norm": 0.2838899791240692, "learning_rate": 1.1624206804803123e-05, "loss": 0.2105, "step": 39903 }, { "epoch": 0.7117326008632683, "grad_norm": 0.2801453173160553, "learning_rate": 1.1622891843364126e-05, "loss": 0.1225, "step": 39904 }, { "epoch": 0.711750436984982, "grad_norm": 0.28423693776130676, "learning_rate": 1.162157693377941e-05, "loss": 0.1148, "step": 39905 }, { "epoch": 0.7117682731066957, "grad_norm": 0.252823144197464, "learning_rate": 1.1620262076054093e-05, "loss": 0.1486, "step": 39906 }, { "epoch": 0.7117861092284093, "grad_norm": 0.30582118034362793, "learning_rate": 1.161894727019326e-05, "loss": 0.1409, "step": 39907 }, { "epoch": 0.711803945350123, "grad_norm": 0.25981634855270386, "learning_rate": 1.161763251620201e-05, "loss": 0.1006, "step": 39908 }, { "epoch": 0.7118217814718367, "grad_norm": 0.24681849777698517, "learning_rate": 1.1616317814085428e-05, "loss": 0.1185, "step": 39909 }, { "epoch": 0.7118396175935504, "grad_norm": 0.2377425879240036, "learning_rate": 1.1615003163848632e-05, "loss": 0.1336, "step": 39910 }, { "epoch": 0.7118574537152641, "grad_norm": 0.3168492913246155, "learning_rate": 1.1613688565496705e-05, "loss": 0.1671, "step": 39911 }, { "epoch": 0.7118752898369779, "grad_norm": 0.2868969142436981, "learning_rate": 1.1612374019034744e-05, "loss": 0.1415, "step": 39912 }, { "epoch": 0.7118931259586916, "grad_norm": 0.25970104336738586, "learning_rate": 1.1611059524467838e-05, "loss": 0.1523, "step": 39913 }, { "epoch": 0.7119109620804053, "grad_norm": 0.3548128008842468, "learning_rate": 1.1609745081801099e-05, "loss": 0.1249, "step": 39914 }, { "epoch": 0.711928798202119, "grad_norm": 0.35793355107307434, "learning_rate": 1.1608430691039601e-05, "loss": 0.1323, "step": 39915 }, { "epoch": 0.7119466343238327, "grad_norm": 0.2647632658481598, "learning_rate": 1.1607116352188463e-05, "loss": 0.135, "step": 39916 }, { "epoch": 0.7119644704455463, "grad_norm": 0.2214607149362564, "learning_rate": 1.1605802065252769e-05, "loss": 0.0931, "step": 39917 }, { "epoch": 0.71198230656726, "grad_norm": 0.2038196325302124, "learning_rate": 1.1604487830237598e-05, "loss": 0.1203, "step": 39918 }, { "epoch": 0.7120001426889737, "grad_norm": 0.32778993248939514, "learning_rate": 1.1603173647148071e-05, "loss": 0.1506, "step": 39919 }, { "epoch": 0.7120179788106874, "grad_norm": 0.24346116185188293, "learning_rate": 1.1601859515989272e-05, "loss": 0.1426, "step": 39920 }, { "epoch": 0.7120358149324011, "grad_norm": 0.23992453515529633, "learning_rate": 1.1600545436766291e-05, "loss": 0.1065, "step": 39921 }, { "epoch": 0.7120536510541148, "grad_norm": 0.30495685338974, "learning_rate": 1.1599231409484215e-05, "loss": 0.1074, "step": 39922 }, { "epoch": 0.7120714871758285, "grad_norm": 0.2628614902496338, "learning_rate": 1.1597917434148156e-05, "loss": 0.1456, "step": 39923 }, { "epoch": 0.7120893232975422, "grad_norm": 0.21645644307136536, "learning_rate": 1.1596603510763196e-05, "loss": 0.1357, "step": 39924 }, { "epoch": 0.7121071594192558, "grad_norm": 0.243134006857872, "learning_rate": 1.1595289639334433e-05, "loss": 0.0405, "step": 39925 }, { "epoch": 0.7121249955409695, "grad_norm": 0.28979218006134033, "learning_rate": 1.1593975819866945e-05, "loss": 0.063, "step": 39926 }, { "epoch": 0.7121428316626832, "grad_norm": 0.258320689201355, "learning_rate": 1.159266205236585e-05, "loss": 0.147, "step": 39927 }, { "epoch": 0.7121606677843969, "grad_norm": 0.29123276472091675, "learning_rate": 1.1591348336836216e-05, "loss": 0.1222, "step": 39928 }, { "epoch": 0.7121785039061107, "grad_norm": 0.19829247891902924, "learning_rate": 1.1590034673283156e-05, "loss": 0.1132, "step": 39929 }, { "epoch": 0.7121963400278244, "grad_norm": 0.35753124952316284, "learning_rate": 1.1588721061711755e-05, "loss": 0.1115, "step": 39930 }, { "epoch": 0.7122141761495381, "grad_norm": 0.27448055148124695, "learning_rate": 1.1587407502127094e-05, "loss": 0.131, "step": 39931 }, { "epoch": 0.7122320122712518, "grad_norm": 0.22233644127845764, "learning_rate": 1.158609399453428e-05, "loss": 0.0915, "step": 39932 }, { "epoch": 0.7122498483929655, "grad_norm": 0.38981086015701294, "learning_rate": 1.1584780538938402e-05, "loss": 0.0979, "step": 39933 }, { "epoch": 0.7122676845146791, "grad_norm": 0.29726698994636536, "learning_rate": 1.1583467135344547e-05, "loss": 0.1086, "step": 39934 }, { "epoch": 0.7122855206363928, "grad_norm": 0.2556772828102112, "learning_rate": 1.15821537837578e-05, "loss": 0.1252, "step": 39935 }, { "epoch": 0.7123033567581065, "grad_norm": 0.2642151713371277, "learning_rate": 1.1580840484183264e-05, "loss": 0.1046, "step": 39936 }, { "epoch": 0.7123211928798202, "grad_norm": 0.28623437881469727, "learning_rate": 1.1579527236626028e-05, "loss": 0.0988, "step": 39937 }, { "epoch": 0.7123390290015339, "grad_norm": 0.24815405905246735, "learning_rate": 1.1578214041091178e-05, "loss": 0.1295, "step": 39938 }, { "epoch": 0.7123568651232476, "grad_norm": 0.5346922874450684, "learning_rate": 1.15769008975838e-05, "loss": 0.2474, "step": 39939 }, { "epoch": 0.7123747012449613, "grad_norm": 0.3100970685482025, "learning_rate": 1.1575587806108999e-05, "loss": 0.1292, "step": 39940 }, { "epoch": 0.712392537366675, "grad_norm": 0.41137516498565674, "learning_rate": 1.1574274766671856e-05, "loss": 0.2248, "step": 39941 }, { "epoch": 0.7124103734883886, "grad_norm": 0.288207471370697, "learning_rate": 1.157296177927745e-05, "loss": 0.167, "step": 39942 }, { "epoch": 0.7124282096101023, "grad_norm": 0.25861960649490356, "learning_rate": 1.1571648843930891e-05, "loss": 0.1279, "step": 39943 }, { "epoch": 0.712446045731816, "grad_norm": 0.405437707901001, "learning_rate": 1.1570335960637253e-05, "loss": 0.1383, "step": 39944 }, { "epoch": 0.7124638818535298, "grad_norm": 0.2419949769973755, "learning_rate": 1.1569023129401639e-05, "loss": 0.1305, "step": 39945 }, { "epoch": 0.7124817179752435, "grad_norm": 0.2325814664363861, "learning_rate": 1.1567710350229127e-05, "loss": 0.0656, "step": 39946 }, { "epoch": 0.7124995540969572, "grad_norm": 0.2678578794002533, "learning_rate": 1.156639762312481e-05, "loss": 0.1239, "step": 39947 }, { "epoch": 0.7125173902186709, "grad_norm": 0.2614659070968628, "learning_rate": 1.1565084948093771e-05, "loss": 0.1209, "step": 39948 }, { "epoch": 0.7125352263403846, "grad_norm": 0.2686004340648651, "learning_rate": 1.156377232514111e-05, "loss": 0.1155, "step": 39949 }, { "epoch": 0.7125530624620983, "grad_norm": 0.3356996476650238, "learning_rate": 1.1562459754271907e-05, "loss": 0.0829, "step": 39950 }, { "epoch": 0.712570898583812, "grad_norm": 0.2051440328359604, "learning_rate": 1.1561147235491252e-05, "loss": 0.0791, "step": 39951 }, { "epoch": 0.7125887347055256, "grad_norm": 0.2531212270259857, "learning_rate": 1.155983476880422e-05, "loss": 0.1063, "step": 39952 }, { "epoch": 0.7126065708272393, "grad_norm": 0.29687801003456116, "learning_rate": 1.1558522354215922e-05, "loss": 0.1162, "step": 39953 }, { "epoch": 0.712624406948953, "grad_norm": 0.258180171251297, "learning_rate": 1.1557209991731435e-05, "loss": 0.1244, "step": 39954 }, { "epoch": 0.7126422430706667, "grad_norm": 0.289081335067749, "learning_rate": 1.1555897681355834e-05, "loss": 0.2352, "step": 39955 }, { "epoch": 0.7126600791923804, "grad_norm": 0.2513437271118164, "learning_rate": 1.1554585423094228e-05, "loss": 0.0876, "step": 39956 }, { "epoch": 0.7126779153140941, "grad_norm": 0.30848774313926697, "learning_rate": 1.1553273216951682e-05, "loss": 0.0932, "step": 39957 }, { "epoch": 0.7126957514358078, "grad_norm": 0.2955001890659332, "learning_rate": 1.1551961062933305e-05, "loss": 0.1078, "step": 39958 }, { "epoch": 0.7127135875575215, "grad_norm": 0.24780118465423584, "learning_rate": 1.1550648961044169e-05, "loss": 0.1196, "step": 39959 }, { "epoch": 0.7127314236792351, "grad_norm": 0.2631162106990814, "learning_rate": 1.1549336911289366e-05, "loss": 0.1821, "step": 39960 }, { "epoch": 0.7127492598009488, "grad_norm": 0.24644921720027924, "learning_rate": 1.1548024913673967e-05, "loss": 0.1472, "step": 39961 }, { "epoch": 0.7127670959226626, "grad_norm": 0.22369952499866486, "learning_rate": 1.1546712968203077e-05, "loss": 0.0859, "step": 39962 }, { "epoch": 0.7127849320443763, "grad_norm": 0.2760312259197235, "learning_rate": 1.1545401074881779e-05, "loss": 0.1494, "step": 39963 }, { "epoch": 0.71280276816609, "grad_norm": 0.2596503794193268, "learning_rate": 1.154408923371515e-05, "loss": 0.1037, "step": 39964 }, { "epoch": 0.7128206042878037, "grad_norm": 0.29482051730155945, "learning_rate": 1.154277744470827e-05, "loss": 0.1716, "step": 39965 }, { "epoch": 0.7128384404095174, "grad_norm": 0.2661428451538086, "learning_rate": 1.1541465707866243e-05, "loss": 0.1037, "step": 39966 }, { "epoch": 0.7128562765312311, "grad_norm": 0.24982422590255737, "learning_rate": 1.1540154023194141e-05, "loss": 0.1035, "step": 39967 }, { "epoch": 0.7128741126529448, "grad_norm": 0.2818349301815033, "learning_rate": 1.1538842390697056e-05, "loss": 0.1576, "step": 39968 }, { "epoch": 0.7128919487746584, "grad_norm": 0.5475735664367676, "learning_rate": 1.153753081038005e-05, "loss": 0.1002, "step": 39969 }, { "epoch": 0.7129097848963721, "grad_norm": 0.2658008635044098, "learning_rate": 1.1536219282248239e-05, "loss": 0.1066, "step": 39970 }, { "epoch": 0.7129276210180858, "grad_norm": 0.30838435888290405, "learning_rate": 1.1534907806306683e-05, "loss": 0.1119, "step": 39971 }, { "epoch": 0.7129454571397995, "grad_norm": 0.25189292430877686, "learning_rate": 1.1533596382560482e-05, "loss": 0.1031, "step": 39972 }, { "epoch": 0.7129632932615132, "grad_norm": 0.2695024311542511, "learning_rate": 1.1532285011014715e-05, "loss": 0.0975, "step": 39973 }, { "epoch": 0.7129811293832269, "grad_norm": 0.21420122683048248, "learning_rate": 1.1530973691674455e-05, "loss": 0.1134, "step": 39974 }, { "epoch": 0.7129989655049406, "grad_norm": 0.29684752225875854, "learning_rate": 1.1529662424544799e-05, "loss": 0.1073, "step": 39975 }, { "epoch": 0.7130168016266543, "grad_norm": 0.26985830068588257, "learning_rate": 1.1528351209630824e-05, "loss": 0.079, "step": 39976 }, { "epoch": 0.713034637748368, "grad_norm": 0.3190481960773468, "learning_rate": 1.1527040046937615e-05, "loss": 0.1801, "step": 39977 }, { "epoch": 0.7130524738700816, "grad_norm": 0.30620959401130676, "learning_rate": 1.152572893647024e-05, "loss": 0.1292, "step": 39978 }, { "epoch": 0.7130703099917954, "grad_norm": 0.21564139425754547, "learning_rate": 1.1524417878233803e-05, "loss": 0.0849, "step": 39979 }, { "epoch": 0.7130881461135091, "grad_norm": 0.20728722214698792, "learning_rate": 1.152310687223338e-05, "loss": 0.119, "step": 39980 }, { "epoch": 0.7131059822352228, "grad_norm": 0.2679125964641571, "learning_rate": 1.1521795918474046e-05, "loss": 0.0938, "step": 39981 }, { "epoch": 0.7131238183569365, "grad_norm": 0.2899567782878876, "learning_rate": 1.1520485016960881e-05, "loss": 0.1134, "step": 39982 }, { "epoch": 0.7131416544786502, "grad_norm": 0.32243776321411133, "learning_rate": 1.1519174167698976e-05, "loss": 0.1538, "step": 39983 }, { "epoch": 0.7131594906003639, "grad_norm": 0.24156616628170013, "learning_rate": 1.1517863370693403e-05, "loss": 0.1025, "step": 39984 }, { "epoch": 0.7131773267220776, "grad_norm": 0.3430239260196686, "learning_rate": 1.1516552625949253e-05, "loss": 0.1352, "step": 39985 }, { "epoch": 0.7131951628437913, "grad_norm": 0.29557710886001587, "learning_rate": 1.1515241933471607e-05, "loss": 0.1177, "step": 39986 }, { "epoch": 0.7132129989655049, "grad_norm": 0.2616858184337616, "learning_rate": 1.1513931293265529e-05, "loss": 0.1078, "step": 39987 }, { "epoch": 0.7132308350872186, "grad_norm": 0.28580084443092346, "learning_rate": 1.151262070533612e-05, "loss": 0.1238, "step": 39988 }, { "epoch": 0.7132486712089323, "grad_norm": 0.29453325271606445, "learning_rate": 1.1511310169688452e-05, "loss": 0.1496, "step": 39989 }, { "epoch": 0.713266507330646, "grad_norm": 0.22635002434253693, "learning_rate": 1.1509999686327604e-05, "loss": 0.1202, "step": 39990 }, { "epoch": 0.7132843434523597, "grad_norm": 0.39565804600715637, "learning_rate": 1.1508689255258648e-05, "loss": 0.1752, "step": 39991 }, { "epoch": 0.7133021795740734, "grad_norm": 0.28307363390922546, "learning_rate": 1.1507378876486682e-05, "loss": 0.1115, "step": 39992 }, { "epoch": 0.7133200156957871, "grad_norm": 0.2626785635948181, "learning_rate": 1.1506068550016774e-05, "loss": 0.0977, "step": 39993 }, { "epoch": 0.7133378518175008, "grad_norm": 0.31536778807640076, "learning_rate": 1.1504758275854008e-05, "loss": 0.1066, "step": 39994 }, { "epoch": 0.7133556879392144, "grad_norm": 0.2317369282245636, "learning_rate": 1.1503448054003457e-05, "loss": 0.1246, "step": 39995 }, { "epoch": 0.7133735240609282, "grad_norm": 0.361154168844223, "learning_rate": 1.1502137884470197e-05, "loss": 0.0999, "step": 39996 }, { "epoch": 0.7133913601826419, "grad_norm": 0.28814399242401123, "learning_rate": 1.1500827767259317e-05, "loss": 0.0934, "step": 39997 }, { "epoch": 0.7134091963043556, "grad_norm": 0.31533944606781006, "learning_rate": 1.1499517702375887e-05, "loss": 0.1712, "step": 39998 }, { "epoch": 0.7134270324260693, "grad_norm": 0.22883932292461395, "learning_rate": 1.1498207689824995e-05, "loss": 0.0722, "step": 39999 }, { "epoch": 0.713444868547783, "grad_norm": 0.1899060159921646, "learning_rate": 1.1496897729611706e-05, "loss": 0.0477, "step": 40000 }, { "epoch": 0.713444868547783, "eval_loss": 0.11541074514389038, "eval_runtime": 107.4664, "eval_samples_per_second": 9.529, "eval_steps_per_second": 1.591, "step": 40000 }, { "epoch": 0.7134627046694967, "grad_norm": 0.21473735570907593, "learning_rate": 1.1495587821741113e-05, "loss": 0.1387, "step": 40001 }, { "epoch": 0.7134805407912104, "grad_norm": 0.22912606596946716, "learning_rate": 1.1494277966218287e-05, "loss": 0.1427, "step": 40002 }, { "epoch": 0.7134983769129241, "grad_norm": 0.22071535885334015, "learning_rate": 1.1492968163048302e-05, "loss": 0.0871, "step": 40003 }, { "epoch": 0.7135162130346377, "grad_norm": 0.3121480941772461, "learning_rate": 1.1491658412236231e-05, "loss": 0.118, "step": 40004 }, { "epoch": 0.7135340491563514, "grad_norm": 0.24430778622627258, "learning_rate": 1.1490348713787167e-05, "loss": 0.1204, "step": 40005 }, { "epoch": 0.7135518852780651, "grad_norm": 0.2277761995792389, "learning_rate": 1.1489039067706176e-05, "loss": 0.1101, "step": 40006 }, { "epoch": 0.7135697213997788, "grad_norm": 0.23791418969631195, "learning_rate": 1.1487729473998336e-05, "loss": 0.1104, "step": 40007 }, { "epoch": 0.7135875575214925, "grad_norm": 0.34284713864326477, "learning_rate": 1.1486419932668725e-05, "loss": 0.173, "step": 40008 }, { "epoch": 0.7136053936432062, "grad_norm": 0.27452951669692993, "learning_rate": 1.1485110443722404e-05, "loss": 0.1048, "step": 40009 }, { "epoch": 0.7136232297649199, "grad_norm": 0.30748361349105835, "learning_rate": 1.1483801007164477e-05, "loss": 0.0894, "step": 40010 }, { "epoch": 0.7136410658866336, "grad_norm": 0.2984600067138672, "learning_rate": 1.1482491622999996e-05, "loss": 0.11, "step": 40011 }, { "epoch": 0.7136589020083473, "grad_norm": 0.35017356276512146, "learning_rate": 1.1481182291234054e-05, "loss": 0.1131, "step": 40012 }, { "epoch": 0.713676738130061, "grad_norm": 0.18534281849861145, "learning_rate": 1.1479873011871712e-05, "loss": 0.1286, "step": 40013 }, { "epoch": 0.7136945742517747, "grad_norm": 0.22484050691127777, "learning_rate": 1.1478563784918059e-05, "loss": 0.125, "step": 40014 }, { "epoch": 0.7137124103734884, "grad_norm": 0.21328525245189667, "learning_rate": 1.1477254610378163e-05, "loss": 0.1349, "step": 40015 }, { "epoch": 0.7137302464952021, "grad_norm": 0.4013546407222748, "learning_rate": 1.1475945488257097e-05, "loss": 0.203, "step": 40016 }, { "epoch": 0.7137480826169158, "grad_norm": 0.31322768330574036, "learning_rate": 1.147463641855993e-05, "loss": 0.1318, "step": 40017 }, { "epoch": 0.7137659187386295, "grad_norm": 0.29802224040031433, "learning_rate": 1.1473327401291753e-05, "loss": 0.1095, "step": 40018 }, { "epoch": 0.7137837548603432, "grad_norm": 0.2667621970176697, "learning_rate": 1.1472018436457632e-05, "loss": 0.1236, "step": 40019 }, { "epoch": 0.7138015909820569, "grad_norm": 0.26889848709106445, "learning_rate": 1.147070952406264e-05, "loss": 0.098, "step": 40020 }, { "epoch": 0.7138194271037706, "grad_norm": 0.33743032813072205, "learning_rate": 1.1469400664111848e-05, "loss": 0.1376, "step": 40021 }, { "epoch": 0.7138372632254842, "grad_norm": 0.25778719782829285, "learning_rate": 1.1468091856610325e-05, "loss": 0.1337, "step": 40022 }, { "epoch": 0.7138550993471979, "grad_norm": 0.27928870916366577, "learning_rate": 1.146678310156316e-05, "loss": 0.0992, "step": 40023 }, { "epoch": 0.7138729354689116, "grad_norm": 0.22588548064231873, "learning_rate": 1.1465474398975418e-05, "loss": 0.1425, "step": 40024 }, { "epoch": 0.7138907715906253, "grad_norm": 0.2689093351364136, "learning_rate": 1.1464165748852166e-05, "loss": 0.1281, "step": 40025 }, { "epoch": 0.713908607712339, "grad_norm": 0.23568901419639587, "learning_rate": 1.1462857151198485e-05, "loss": 0.1234, "step": 40026 }, { "epoch": 0.7139264438340527, "grad_norm": 0.3280787765979767, "learning_rate": 1.1461548606019443e-05, "loss": 0.1369, "step": 40027 }, { "epoch": 0.7139442799557664, "grad_norm": 0.2050517201423645, "learning_rate": 1.146024011332012e-05, "loss": 0.0956, "step": 40028 }, { "epoch": 0.7139621160774801, "grad_norm": 0.3899628221988678, "learning_rate": 1.1458931673105585e-05, "loss": 0.1645, "step": 40029 }, { "epoch": 0.7139799521991939, "grad_norm": 0.2577247619628906, "learning_rate": 1.1457623285380897e-05, "loss": 0.1137, "step": 40030 }, { "epoch": 0.7139977883209075, "grad_norm": 0.4088238775730133, "learning_rate": 1.1456314950151147e-05, "loss": 0.1057, "step": 40031 }, { "epoch": 0.7140156244426212, "grad_norm": 0.2634453773498535, "learning_rate": 1.14550066674214e-05, "loss": 0.109, "step": 40032 }, { "epoch": 0.7140334605643349, "grad_norm": 0.27273663878440857, "learning_rate": 1.1453698437196725e-05, "loss": 0.094, "step": 40033 }, { "epoch": 0.7140512966860486, "grad_norm": 0.19101014733314514, "learning_rate": 1.1452390259482192e-05, "loss": 0.0899, "step": 40034 }, { "epoch": 0.7140691328077623, "grad_norm": 0.27890104055404663, "learning_rate": 1.1451082134282868e-05, "loss": 0.1329, "step": 40035 }, { "epoch": 0.714086968929476, "grad_norm": 0.22313927114009857, "learning_rate": 1.1449774061603834e-05, "loss": 0.1013, "step": 40036 }, { "epoch": 0.7141048050511897, "grad_norm": 0.25917425751686096, "learning_rate": 1.144846604145016e-05, "loss": 0.1282, "step": 40037 }, { "epoch": 0.7141226411729034, "grad_norm": 0.265948086977005, "learning_rate": 1.14471580738269e-05, "loss": 0.118, "step": 40038 }, { "epoch": 0.714140477294617, "grad_norm": 0.2603774964809418, "learning_rate": 1.1445850158739146e-05, "loss": 0.1173, "step": 40039 }, { "epoch": 0.7141583134163307, "grad_norm": 0.24207022786140442, "learning_rate": 1.1444542296191952e-05, "loss": 0.1585, "step": 40040 }, { "epoch": 0.7141761495380444, "grad_norm": 0.2568919062614441, "learning_rate": 1.1443234486190399e-05, "loss": 0.1488, "step": 40041 }, { "epoch": 0.7141939856597581, "grad_norm": 0.2640952467918396, "learning_rate": 1.1441926728739552e-05, "loss": 0.086, "step": 40042 }, { "epoch": 0.7142118217814718, "grad_norm": 0.2652968466281891, "learning_rate": 1.1440619023844473e-05, "loss": 0.1742, "step": 40043 }, { "epoch": 0.7142296579031855, "grad_norm": 0.27271100878715515, "learning_rate": 1.1439311371510244e-05, "loss": 0.1068, "step": 40044 }, { "epoch": 0.7142474940248992, "grad_norm": 0.36714431643486023, "learning_rate": 1.1438003771741931e-05, "loss": 0.1012, "step": 40045 }, { "epoch": 0.714265330146613, "grad_norm": 0.2434958517551422, "learning_rate": 1.14366962245446e-05, "loss": 0.0795, "step": 40046 }, { "epoch": 0.7142831662683267, "grad_norm": 0.2660449147224426, "learning_rate": 1.1435388729923317e-05, "loss": 0.0609, "step": 40047 }, { "epoch": 0.7143010023900404, "grad_norm": 0.353059858083725, "learning_rate": 1.1434081287883142e-05, "loss": 0.1114, "step": 40048 }, { "epoch": 0.714318838511754, "grad_norm": 0.20561105012893677, "learning_rate": 1.1432773898429165e-05, "loss": 0.0941, "step": 40049 }, { "epoch": 0.7143366746334677, "grad_norm": 0.2738844156265259, "learning_rate": 1.1431466561566443e-05, "loss": 0.1327, "step": 40050 }, { "epoch": 0.7143545107551814, "grad_norm": 0.2646847665309906, "learning_rate": 1.143015927730004e-05, "loss": 0.108, "step": 40051 }, { "epoch": 0.7143723468768951, "grad_norm": 0.2548307478427887, "learning_rate": 1.1428852045635018e-05, "loss": 0.1079, "step": 40052 }, { "epoch": 0.7143901829986088, "grad_norm": 0.2920530438423157, "learning_rate": 1.1427544866576465e-05, "loss": 0.0767, "step": 40053 }, { "epoch": 0.7144080191203225, "grad_norm": 0.24720653891563416, "learning_rate": 1.1426237740129422e-05, "loss": 0.1095, "step": 40054 }, { "epoch": 0.7144258552420362, "grad_norm": 0.3039284646511078, "learning_rate": 1.1424930666298983e-05, "loss": 0.0825, "step": 40055 }, { "epoch": 0.7144436913637499, "grad_norm": 0.37439247965812683, "learning_rate": 1.1423623645090189e-05, "loss": 0.1319, "step": 40056 }, { "epoch": 0.7144615274854635, "grad_norm": 0.2087412029504776, "learning_rate": 1.142231667650813e-05, "loss": 0.0853, "step": 40057 }, { "epoch": 0.7144793636071772, "grad_norm": 0.1958671361207962, "learning_rate": 1.142100976055786e-05, "loss": 0.0937, "step": 40058 }, { "epoch": 0.7144971997288909, "grad_norm": 0.20561863481998444, "learning_rate": 1.141970289724445e-05, "loss": 0.0943, "step": 40059 }, { "epoch": 0.7145150358506046, "grad_norm": 0.32751795649528503, "learning_rate": 1.1418396086572957e-05, "loss": 0.1361, "step": 40060 }, { "epoch": 0.7145328719723183, "grad_norm": 0.3036060929298401, "learning_rate": 1.1417089328548443e-05, "loss": 0.1272, "step": 40061 }, { "epoch": 0.714550708094032, "grad_norm": 0.2874866724014282, "learning_rate": 1.1415782623175994e-05, "loss": 0.1459, "step": 40062 }, { "epoch": 0.7145685442157458, "grad_norm": 0.43067842721939087, "learning_rate": 1.1414475970460664e-05, "loss": 0.1653, "step": 40063 }, { "epoch": 0.7145863803374595, "grad_norm": 0.26317596435546875, "learning_rate": 1.1413169370407514e-05, "loss": 0.1353, "step": 40064 }, { "epoch": 0.7146042164591732, "grad_norm": 0.28594955801963806, "learning_rate": 1.1411862823021607e-05, "loss": 0.129, "step": 40065 }, { "epoch": 0.7146220525808868, "grad_norm": 0.26611292362213135, "learning_rate": 1.1410556328308019e-05, "loss": 0.0854, "step": 40066 }, { "epoch": 0.7146398887026005, "grad_norm": 0.3274479806423187, "learning_rate": 1.14092498862718e-05, "loss": 0.1513, "step": 40067 }, { "epoch": 0.7146577248243142, "grad_norm": 0.235755056142807, "learning_rate": 1.1407943496918034e-05, "loss": 0.1307, "step": 40068 }, { "epoch": 0.7146755609460279, "grad_norm": 0.22338080406188965, "learning_rate": 1.1406637160251759e-05, "loss": 0.0735, "step": 40069 }, { "epoch": 0.7146933970677416, "grad_norm": 0.3805863559246063, "learning_rate": 1.1405330876278067e-05, "loss": 0.1381, "step": 40070 }, { "epoch": 0.7147112331894553, "grad_norm": 0.3719305992126465, "learning_rate": 1.1404024645002007e-05, "loss": 0.1122, "step": 40071 }, { "epoch": 0.714729069311169, "grad_norm": 0.30759960412979126, "learning_rate": 1.1402718466428641e-05, "loss": 0.1614, "step": 40072 }, { "epoch": 0.7147469054328827, "grad_norm": 0.30556634068489075, "learning_rate": 1.1401412340563039e-05, "loss": 0.2038, "step": 40073 }, { "epoch": 0.7147647415545964, "grad_norm": 0.25647082924842834, "learning_rate": 1.1400106267410245e-05, "loss": 0.1004, "step": 40074 }, { "epoch": 0.71478257767631, "grad_norm": 0.2774975895881653, "learning_rate": 1.139880024697535e-05, "loss": 0.1405, "step": 40075 }, { "epoch": 0.7148004137980237, "grad_norm": 0.3357665240764618, "learning_rate": 1.13974942792634e-05, "loss": 0.1111, "step": 40076 }, { "epoch": 0.7148182499197374, "grad_norm": 0.2735438942909241, "learning_rate": 1.1396188364279465e-05, "loss": 0.088, "step": 40077 }, { "epoch": 0.7148360860414511, "grad_norm": 0.3286462724208832, "learning_rate": 1.139488250202859e-05, "loss": 0.1005, "step": 40078 }, { "epoch": 0.7148539221631648, "grad_norm": 0.35431915521621704, "learning_rate": 1.1393576692515859e-05, "loss": 0.1261, "step": 40079 }, { "epoch": 0.7148717582848786, "grad_norm": 0.34066176414489746, "learning_rate": 1.1392270935746324e-05, "loss": 0.1066, "step": 40080 }, { "epoch": 0.7148895944065923, "grad_norm": 0.22965796291828156, "learning_rate": 1.1390965231725037e-05, "loss": 0.1157, "step": 40081 }, { "epoch": 0.714907430528306, "grad_norm": 0.21479332447052002, "learning_rate": 1.1389659580457069e-05, "loss": 0.1143, "step": 40082 }, { "epoch": 0.7149252666500197, "grad_norm": 0.2500707507133484, "learning_rate": 1.1388353981947492e-05, "loss": 0.1036, "step": 40083 }, { "epoch": 0.7149431027717333, "grad_norm": 0.20943470299243927, "learning_rate": 1.1387048436201355e-05, "loss": 0.1164, "step": 40084 }, { "epoch": 0.714960938893447, "grad_norm": 0.22885392606258392, "learning_rate": 1.1385742943223721e-05, "loss": 0.0761, "step": 40085 }, { "epoch": 0.7149787750151607, "grad_norm": 0.3258139193058014, "learning_rate": 1.1384437503019649e-05, "loss": 0.1669, "step": 40086 }, { "epoch": 0.7149966111368744, "grad_norm": 0.26969093084335327, "learning_rate": 1.1383132115594192e-05, "loss": 0.0865, "step": 40087 }, { "epoch": 0.7150144472585881, "grad_norm": 0.22007393836975098, "learning_rate": 1.1381826780952425e-05, "loss": 0.1036, "step": 40088 }, { "epoch": 0.7150322833803018, "grad_norm": 0.23283235728740692, "learning_rate": 1.1380521499099403e-05, "loss": 0.1211, "step": 40089 }, { "epoch": 0.7150501195020155, "grad_norm": 0.2629624605178833, "learning_rate": 1.1379216270040183e-05, "loss": 0.1628, "step": 40090 }, { "epoch": 0.7150679556237292, "grad_norm": 0.26915791630744934, "learning_rate": 1.1377911093779814e-05, "loss": 0.0987, "step": 40091 }, { "epoch": 0.7150857917454428, "grad_norm": 0.2294517159461975, "learning_rate": 1.137660597032338e-05, "loss": 0.1329, "step": 40092 }, { "epoch": 0.7151036278671565, "grad_norm": 0.24991875886917114, "learning_rate": 1.1375300899675922e-05, "loss": 0.1512, "step": 40093 }, { "epoch": 0.7151214639888702, "grad_norm": 0.27796271443367004, "learning_rate": 1.1373995881842498e-05, "loss": 0.1089, "step": 40094 }, { "epoch": 0.7151393001105839, "grad_norm": 0.22614654898643494, "learning_rate": 1.137269091682818e-05, "loss": 0.1391, "step": 40095 }, { "epoch": 0.7151571362322976, "grad_norm": 0.2226305902004242, "learning_rate": 1.137138600463801e-05, "loss": 0.1133, "step": 40096 }, { "epoch": 0.7151749723540114, "grad_norm": 0.2698943614959717, "learning_rate": 1.1370081145277061e-05, "loss": 0.1434, "step": 40097 }, { "epoch": 0.7151928084757251, "grad_norm": 0.2680029571056366, "learning_rate": 1.136877633875039e-05, "loss": 0.1185, "step": 40098 }, { "epoch": 0.7152106445974388, "grad_norm": 0.2352083921432495, "learning_rate": 1.1367471585063048e-05, "loss": 0.1281, "step": 40099 }, { "epoch": 0.7152284807191525, "grad_norm": 0.28520306944847107, "learning_rate": 1.1366166884220084e-05, "loss": 0.145, "step": 40100 }, { "epoch": 0.7152463168408661, "grad_norm": 0.20300179719924927, "learning_rate": 1.1364862236226575e-05, "loss": 0.1157, "step": 40101 }, { "epoch": 0.7152641529625798, "grad_norm": 0.27353599667549133, "learning_rate": 1.1363557641087572e-05, "loss": 0.1059, "step": 40102 }, { "epoch": 0.7152819890842935, "grad_norm": 0.19483867287635803, "learning_rate": 1.1362253098808129e-05, "loss": 0.098, "step": 40103 }, { "epoch": 0.7152998252060072, "grad_norm": 0.2075476348400116, "learning_rate": 1.1360948609393293e-05, "loss": 0.0954, "step": 40104 }, { "epoch": 0.7153176613277209, "grad_norm": 0.4020407497882843, "learning_rate": 1.1359644172848141e-05, "loss": 0.1407, "step": 40105 }, { "epoch": 0.7153354974494346, "grad_norm": 0.2157728523015976, "learning_rate": 1.1358339789177718e-05, "loss": 0.0919, "step": 40106 }, { "epoch": 0.7153533335711483, "grad_norm": 0.27376970648765564, "learning_rate": 1.1357035458387083e-05, "loss": 0.1435, "step": 40107 }, { "epoch": 0.715371169692862, "grad_norm": 0.26904457807540894, "learning_rate": 1.1355731180481283e-05, "loss": 0.1198, "step": 40108 }, { "epoch": 0.7153890058145757, "grad_norm": 0.19689197838306427, "learning_rate": 1.135442695546538e-05, "loss": 0.1275, "step": 40109 }, { "epoch": 0.7154068419362893, "grad_norm": 0.3161015808582306, "learning_rate": 1.1353122783344438e-05, "loss": 0.1388, "step": 40110 }, { "epoch": 0.715424678058003, "grad_norm": 0.26607078313827515, "learning_rate": 1.1351818664123509e-05, "loss": 0.1361, "step": 40111 }, { "epoch": 0.7154425141797167, "grad_norm": 0.26398906111717224, "learning_rate": 1.1350514597807644e-05, "loss": 0.1146, "step": 40112 }, { "epoch": 0.7154603503014304, "grad_norm": 0.21497918665409088, "learning_rate": 1.134921058440189e-05, "loss": 0.1222, "step": 40113 }, { "epoch": 0.7154781864231442, "grad_norm": 0.2684513032436371, "learning_rate": 1.1347906623911316e-05, "loss": 0.1392, "step": 40114 }, { "epoch": 0.7154960225448579, "grad_norm": 0.29163411259651184, "learning_rate": 1.1346602716340976e-05, "loss": 0.1364, "step": 40115 }, { "epoch": 0.7155138586665716, "grad_norm": 0.32668426632881165, "learning_rate": 1.1345298861695917e-05, "loss": 0.1681, "step": 40116 }, { "epoch": 0.7155316947882853, "grad_norm": 0.2838995158672333, "learning_rate": 1.1343995059981188e-05, "loss": 0.1326, "step": 40117 }, { "epoch": 0.715549530909999, "grad_norm": 0.2582722008228302, "learning_rate": 1.1342691311201859e-05, "loss": 0.0842, "step": 40118 }, { "epoch": 0.7155673670317126, "grad_norm": 0.4089448153972626, "learning_rate": 1.1341387615362976e-05, "loss": 0.1377, "step": 40119 }, { "epoch": 0.7155852031534263, "grad_norm": 0.2817435562610626, "learning_rate": 1.1340083972469592e-05, "loss": 0.0972, "step": 40120 }, { "epoch": 0.71560303927514, "grad_norm": 0.22558709979057312, "learning_rate": 1.1338780382526751e-05, "loss": 0.0877, "step": 40121 }, { "epoch": 0.7156208753968537, "grad_norm": 0.24946123361587524, "learning_rate": 1.1337476845539524e-05, "loss": 0.0978, "step": 40122 }, { "epoch": 0.7156387115185674, "grad_norm": 0.3986477851867676, "learning_rate": 1.1336173361512946e-05, "loss": 0.143, "step": 40123 }, { "epoch": 0.7156565476402811, "grad_norm": 0.290179044008255, "learning_rate": 1.1334869930452093e-05, "loss": 0.1776, "step": 40124 }, { "epoch": 0.7156743837619948, "grad_norm": 0.30848413705825806, "learning_rate": 1.133356655236199e-05, "loss": 0.1863, "step": 40125 }, { "epoch": 0.7156922198837085, "grad_norm": 0.26101839542388916, "learning_rate": 1.1332263227247717e-05, "loss": 0.1065, "step": 40126 }, { "epoch": 0.7157100560054221, "grad_norm": 0.2588322162628174, "learning_rate": 1.1330959955114309e-05, "loss": 0.1127, "step": 40127 }, { "epoch": 0.7157278921271358, "grad_norm": 0.20499493181705475, "learning_rate": 1.1329656735966823e-05, "loss": 0.0862, "step": 40128 }, { "epoch": 0.7157457282488495, "grad_norm": 0.25368282198905945, "learning_rate": 1.1328353569810307e-05, "loss": 0.1409, "step": 40129 }, { "epoch": 0.7157635643705632, "grad_norm": 0.199868306517601, "learning_rate": 1.1327050456649807e-05, "loss": 0.1121, "step": 40130 }, { "epoch": 0.715781400492277, "grad_norm": 0.38141289353370667, "learning_rate": 1.132574739649039e-05, "loss": 0.1169, "step": 40131 }, { "epoch": 0.7157992366139907, "grad_norm": 0.24573341012001038, "learning_rate": 1.13244443893371e-05, "loss": 0.1218, "step": 40132 }, { "epoch": 0.7158170727357044, "grad_norm": 0.27360770106315613, "learning_rate": 1.1323141435194987e-05, "loss": 0.1932, "step": 40133 }, { "epoch": 0.7158349088574181, "grad_norm": 0.2645307183265686, "learning_rate": 1.1321838534069092e-05, "loss": 0.1524, "step": 40134 }, { "epoch": 0.7158527449791318, "grad_norm": 0.24422770738601685, "learning_rate": 1.1320535685964484e-05, "loss": 0.1253, "step": 40135 }, { "epoch": 0.7158705811008454, "grad_norm": 0.21716158092021942, "learning_rate": 1.1319232890886197e-05, "loss": 0.1071, "step": 40136 }, { "epoch": 0.7158884172225591, "grad_norm": 0.303070992231369, "learning_rate": 1.1317930148839295e-05, "loss": 0.146, "step": 40137 }, { "epoch": 0.7159062533442728, "grad_norm": 0.2110755443572998, "learning_rate": 1.1316627459828813e-05, "loss": 0.1275, "step": 40138 }, { "epoch": 0.7159240894659865, "grad_norm": 0.21533270180225372, "learning_rate": 1.1315324823859819e-05, "loss": 0.1129, "step": 40139 }, { "epoch": 0.7159419255877002, "grad_norm": 0.3444138169288635, "learning_rate": 1.1314022240937352e-05, "loss": 0.1747, "step": 40140 }, { "epoch": 0.7159597617094139, "grad_norm": 0.22958432137966156, "learning_rate": 1.1312719711066463e-05, "loss": 0.0426, "step": 40141 }, { "epoch": 0.7159775978311276, "grad_norm": 0.5284539461135864, "learning_rate": 1.1311417234252201e-05, "loss": 0.0995, "step": 40142 }, { "epoch": 0.7159954339528413, "grad_norm": 0.1922261267900467, "learning_rate": 1.13101148104996e-05, "loss": 0.0755, "step": 40143 }, { "epoch": 0.716013270074555, "grad_norm": 0.2351703643798828, "learning_rate": 1.1308812439813735e-05, "loss": 0.112, "step": 40144 }, { "epoch": 0.7160311061962686, "grad_norm": 0.3151635229587555, "learning_rate": 1.1307510122199641e-05, "loss": 0.1527, "step": 40145 }, { "epoch": 0.7160489423179823, "grad_norm": 0.22693036496639252, "learning_rate": 1.1306207857662365e-05, "loss": 0.1397, "step": 40146 }, { "epoch": 0.7160667784396961, "grad_norm": 0.24674342572689056, "learning_rate": 1.130490564620695e-05, "loss": 0.0998, "step": 40147 }, { "epoch": 0.7160846145614098, "grad_norm": 0.22183175384998322, "learning_rate": 1.130360348783846e-05, "loss": 0.1104, "step": 40148 }, { "epoch": 0.7161024506831235, "grad_norm": 0.24917374551296234, "learning_rate": 1.1302301382561934e-05, "loss": 0.0851, "step": 40149 }, { "epoch": 0.7161202868048372, "grad_norm": 0.35170769691467285, "learning_rate": 1.1300999330382409e-05, "loss": 0.113, "step": 40150 }, { "epoch": 0.7161381229265509, "grad_norm": 0.2818054258823395, "learning_rate": 1.1299697331304952e-05, "loss": 0.1032, "step": 40151 }, { "epoch": 0.7161559590482646, "grad_norm": 0.34978222846984863, "learning_rate": 1.1298395385334586e-05, "loss": 0.1637, "step": 40152 }, { "epoch": 0.7161737951699783, "grad_norm": 0.21895340085029602, "learning_rate": 1.1297093492476387e-05, "loss": 0.0675, "step": 40153 }, { "epoch": 0.7161916312916919, "grad_norm": 0.3641444444656372, "learning_rate": 1.1295791652735383e-05, "loss": 0.1401, "step": 40154 }, { "epoch": 0.7162094674134056, "grad_norm": 0.21930791437625885, "learning_rate": 1.1294489866116625e-05, "loss": 0.0834, "step": 40155 }, { "epoch": 0.7162273035351193, "grad_norm": 0.29976096749305725, "learning_rate": 1.1293188132625149e-05, "loss": 0.0945, "step": 40156 }, { "epoch": 0.716245139656833, "grad_norm": 0.2277435064315796, "learning_rate": 1.129188645226602e-05, "loss": 0.1119, "step": 40157 }, { "epoch": 0.7162629757785467, "grad_norm": 0.26885366439819336, "learning_rate": 1.1290584825044273e-05, "loss": 0.0742, "step": 40158 }, { "epoch": 0.7162808119002604, "grad_norm": 0.233631432056427, "learning_rate": 1.1289283250964955e-05, "loss": 0.1256, "step": 40159 }, { "epoch": 0.7162986480219741, "grad_norm": 0.2285805344581604, "learning_rate": 1.12879817300331e-05, "loss": 0.1258, "step": 40160 }, { "epoch": 0.7163164841436878, "grad_norm": 0.21788142621517181, "learning_rate": 1.1286680262253774e-05, "loss": 0.1005, "step": 40161 }, { "epoch": 0.7163343202654014, "grad_norm": 0.21417208015918732, "learning_rate": 1.1285378847632012e-05, "loss": 0.0881, "step": 40162 }, { "epoch": 0.7163521563871151, "grad_norm": 0.281019002199173, "learning_rate": 1.1284077486172847e-05, "loss": 0.1276, "step": 40163 }, { "epoch": 0.7163699925088289, "grad_norm": 0.2836749255657196, "learning_rate": 1.1282776177881346e-05, "loss": 0.2059, "step": 40164 }, { "epoch": 0.7163878286305426, "grad_norm": 0.2243744432926178, "learning_rate": 1.1281474922762534e-05, "loss": 0.0766, "step": 40165 }, { "epoch": 0.7164056647522563, "grad_norm": 0.27451154589653015, "learning_rate": 1.128017372082147e-05, "loss": 0.1142, "step": 40166 }, { "epoch": 0.71642350087397, "grad_norm": 0.23932093381881714, "learning_rate": 1.1278872572063196e-05, "loss": 0.1255, "step": 40167 }, { "epoch": 0.7164413369956837, "grad_norm": 0.24328601360321045, "learning_rate": 1.1277571476492746e-05, "loss": 0.1154, "step": 40168 }, { "epoch": 0.7164591731173974, "grad_norm": 0.23491215705871582, "learning_rate": 1.127627043411516e-05, "loss": 0.0947, "step": 40169 }, { "epoch": 0.7164770092391111, "grad_norm": 0.2488335818052292, "learning_rate": 1.12749694449355e-05, "loss": 0.1221, "step": 40170 }, { "epoch": 0.7164948453608248, "grad_norm": 0.21399731934070587, "learning_rate": 1.12736685089588e-05, "loss": 0.0813, "step": 40171 }, { "epoch": 0.7165126814825384, "grad_norm": 0.280479371547699, "learning_rate": 1.1272367626190103e-05, "loss": 0.1527, "step": 40172 }, { "epoch": 0.7165305176042521, "grad_norm": 0.16917753219604492, "learning_rate": 1.127106679663444e-05, "loss": 0.0683, "step": 40173 }, { "epoch": 0.7165483537259658, "grad_norm": 0.2876274883747101, "learning_rate": 1.1269766020296872e-05, "loss": 0.1005, "step": 40174 }, { "epoch": 0.7165661898476795, "grad_norm": 0.27877625823020935, "learning_rate": 1.1268465297182431e-05, "loss": 0.0939, "step": 40175 }, { "epoch": 0.7165840259693932, "grad_norm": 0.2975919246673584, "learning_rate": 1.1267164627296164e-05, "loss": 0.1339, "step": 40176 }, { "epoch": 0.7166018620911069, "grad_norm": 0.2593155801296234, "learning_rate": 1.1265864010643101e-05, "loss": 0.1245, "step": 40177 }, { "epoch": 0.7166196982128206, "grad_norm": 0.24974867701530457, "learning_rate": 1.1264563447228301e-05, "loss": 0.1462, "step": 40178 }, { "epoch": 0.7166375343345343, "grad_norm": 0.2695964276790619, "learning_rate": 1.126326293705679e-05, "loss": 0.1151, "step": 40179 }, { "epoch": 0.7166553704562479, "grad_norm": 0.2947452962398529, "learning_rate": 1.1261962480133626e-05, "loss": 0.0953, "step": 40180 }, { "epoch": 0.7166732065779617, "grad_norm": 0.25085800886154175, "learning_rate": 1.1260662076463837e-05, "loss": 0.1332, "step": 40181 }, { "epoch": 0.7166910426996754, "grad_norm": 0.33529868721961975, "learning_rate": 1.1259361726052459e-05, "loss": 0.1467, "step": 40182 }, { "epoch": 0.7167088788213891, "grad_norm": 0.3045383393764496, "learning_rate": 1.1258061428904552e-05, "loss": 0.1027, "step": 40183 }, { "epoch": 0.7167267149431028, "grad_norm": 0.29902833700180054, "learning_rate": 1.1256761185025142e-05, "loss": 0.1416, "step": 40184 }, { "epoch": 0.7167445510648165, "grad_norm": 0.23123466968536377, "learning_rate": 1.1255460994419275e-05, "loss": 0.1472, "step": 40185 }, { "epoch": 0.7167623871865302, "grad_norm": 0.23703597486019135, "learning_rate": 1.1254160857091977e-05, "loss": 0.1014, "step": 40186 }, { "epoch": 0.7167802233082439, "grad_norm": 0.29561474919319153, "learning_rate": 1.125286077304831e-05, "loss": 0.1523, "step": 40187 }, { "epoch": 0.7167980594299576, "grad_norm": 0.22643722593784332, "learning_rate": 1.1251560742293305e-05, "loss": 0.1118, "step": 40188 }, { "epoch": 0.7168158955516712, "grad_norm": 0.3007449805736542, "learning_rate": 1.1250260764831993e-05, "loss": 0.1288, "step": 40189 }, { "epoch": 0.7168337316733849, "grad_norm": 0.21088233590126038, "learning_rate": 1.1248960840669415e-05, "loss": 0.1391, "step": 40190 }, { "epoch": 0.7168515677950986, "grad_norm": 0.26974600553512573, "learning_rate": 1.1247660969810622e-05, "loss": 0.1323, "step": 40191 }, { "epoch": 0.7168694039168123, "grad_norm": 0.28961166739463806, "learning_rate": 1.1246361152260636e-05, "loss": 0.1363, "step": 40192 }, { "epoch": 0.716887240038526, "grad_norm": 0.2893197238445282, "learning_rate": 1.1245061388024514e-05, "loss": 0.1365, "step": 40193 }, { "epoch": 0.7169050761602397, "grad_norm": 0.29267364740371704, "learning_rate": 1.1243761677107285e-05, "loss": 0.1177, "step": 40194 }, { "epoch": 0.7169229122819534, "grad_norm": 0.34230703115463257, "learning_rate": 1.1242462019513978e-05, "loss": 0.1039, "step": 40195 }, { "epoch": 0.7169407484036671, "grad_norm": 0.27069658041000366, "learning_rate": 1.1241162415249648e-05, "loss": 0.111, "step": 40196 }, { "epoch": 0.7169585845253807, "grad_norm": 0.23034998774528503, "learning_rate": 1.1239862864319326e-05, "loss": 0.1025, "step": 40197 }, { "epoch": 0.7169764206470945, "grad_norm": 0.20744100213050842, "learning_rate": 1.1238563366728044e-05, "loss": 0.1237, "step": 40198 }, { "epoch": 0.7169942567688082, "grad_norm": 0.29052066802978516, "learning_rate": 1.1237263922480839e-05, "loss": 0.1711, "step": 40199 }, { "epoch": 0.7170120928905219, "grad_norm": 0.23305657505989075, "learning_rate": 1.123596453158276e-05, "loss": 0.0902, "step": 40200 }, { "epoch": 0.7170299290122356, "grad_norm": 0.1973293125629425, "learning_rate": 1.1234665194038838e-05, "loss": 0.1053, "step": 40201 }, { "epoch": 0.7170477651339493, "grad_norm": 0.2828347980976105, "learning_rate": 1.1233365909854104e-05, "loss": 0.1023, "step": 40202 }, { "epoch": 0.717065601255663, "grad_norm": 0.33049723505973816, "learning_rate": 1.1232066679033592e-05, "loss": 0.1449, "step": 40203 }, { "epoch": 0.7170834373773767, "grad_norm": 0.2718612551689148, "learning_rate": 1.1230767501582356e-05, "loss": 0.1625, "step": 40204 }, { "epoch": 0.7171012734990904, "grad_norm": 0.4590749740600586, "learning_rate": 1.1229468377505418e-05, "loss": 0.1576, "step": 40205 }, { "epoch": 0.717119109620804, "grad_norm": 0.2407943159341812, "learning_rate": 1.1228169306807809e-05, "loss": 0.0804, "step": 40206 }, { "epoch": 0.7171369457425177, "grad_norm": 0.27960464358329773, "learning_rate": 1.1226870289494581e-05, "loss": 0.1319, "step": 40207 }, { "epoch": 0.7171547818642314, "grad_norm": 0.23117999732494354, "learning_rate": 1.1225571325570753e-05, "loss": 0.0736, "step": 40208 }, { "epoch": 0.7171726179859451, "grad_norm": 0.2947548031806946, "learning_rate": 1.1224272415041376e-05, "loss": 0.1804, "step": 40209 }, { "epoch": 0.7171904541076588, "grad_norm": 0.31633463501930237, "learning_rate": 1.1222973557911477e-05, "loss": 0.1715, "step": 40210 }, { "epoch": 0.7172082902293725, "grad_norm": 0.25023147463798523, "learning_rate": 1.122167475418609e-05, "loss": 0.1319, "step": 40211 }, { "epoch": 0.7172261263510862, "grad_norm": 0.19847945868968964, "learning_rate": 1.1220376003870242e-05, "loss": 0.0821, "step": 40212 }, { "epoch": 0.7172439624727999, "grad_norm": 0.18981978297233582, "learning_rate": 1.1219077306968987e-05, "loss": 0.0881, "step": 40213 }, { "epoch": 0.7172617985945136, "grad_norm": 0.33658257126808167, "learning_rate": 1.1217778663487346e-05, "loss": 0.1334, "step": 40214 }, { "epoch": 0.7172796347162274, "grad_norm": 0.23656687140464783, "learning_rate": 1.1216480073430356e-05, "loss": 0.1568, "step": 40215 }, { "epoch": 0.717297470837941, "grad_norm": 0.31604477763175964, "learning_rate": 1.1215181536803041e-05, "loss": 0.0778, "step": 40216 }, { "epoch": 0.7173153069596547, "grad_norm": 0.29235780239105225, "learning_rate": 1.1213883053610453e-05, "loss": 0.1157, "step": 40217 }, { "epoch": 0.7173331430813684, "grad_norm": 0.258181631565094, "learning_rate": 1.1212584623857614e-05, "loss": 0.1042, "step": 40218 }, { "epoch": 0.7173509792030821, "grad_norm": 0.24919407069683075, "learning_rate": 1.1211286247549549e-05, "loss": 0.1175, "step": 40219 }, { "epoch": 0.7173688153247958, "grad_norm": 0.223277747631073, "learning_rate": 1.1209987924691312e-05, "loss": 0.095, "step": 40220 }, { "epoch": 0.7173866514465095, "grad_norm": 0.3121904134750366, "learning_rate": 1.1208689655287915e-05, "loss": 0.1349, "step": 40221 }, { "epoch": 0.7174044875682232, "grad_norm": 0.38540464639663696, "learning_rate": 1.1207391439344412e-05, "loss": 0.1131, "step": 40222 }, { "epoch": 0.7174223236899369, "grad_norm": 0.25548434257507324, "learning_rate": 1.120609327686582e-05, "loss": 0.1365, "step": 40223 }, { "epoch": 0.7174401598116505, "grad_norm": 0.2876202464103699, "learning_rate": 1.1204795167857176e-05, "loss": 0.1263, "step": 40224 }, { "epoch": 0.7174579959333642, "grad_norm": 0.2345791757106781, "learning_rate": 1.1203497112323501e-05, "loss": 0.1163, "step": 40225 }, { "epoch": 0.7174758320550779, "grad_norm": 0.33381974697113037, "learning_rate": 1.1202199110269847e-05, "loss": 0.0909, "step": 40226 }, { "epoch": 0.7174936681767916, "grad_norm": 0.30950236320495605, "learning_rate": 1.1200901161701235e-05, "loss": 0.1571, "step": 40227 }, { "epoch": 0.7175115042985053, "grad_norm": 0.2546635568141937, "learning_rate": 1.1199603266622696e-05, "loss": 0.1043, "step": 40228 }, { "epoch": 0.717529340420219, "grad_norm": 0.3141431212425232, "learning_rate": 1.1198305425039252e-05, "loss": 0.0736, "step": 40229 }, { "epoch": 0.7175471765419327, "grad_norm": 0.39999258518218994, "learning_rate": 1.1197007636955953e-05, "loss": 0.133, "step": 40230 }, { "epoch": 0.7175650126636464, "grad_norm": 0.25507768988609314, "learning_rate": 1.1195709902377819e-05, "loss": 0.1267, "step": 40231 }, { "epoch": 0.7175828487853602, "grad_norm": 0.2966732382774353, "learning_rate": 1.119441222130988e-05, "loss": 0.1423, "step": 40232 }, { "epoch": 0.7176006849070738, "grad_norm": 0.26241880655288696, "learning_rate": 1.119311459375716e-05, "loss": 0.1083, "step": 40233 }, { "epoch": 0.7176185210287875, "grad_norm": 0.25376299023628235, "learning_rate": 1.1191817019724704e-05, "loss": 0.1143, "step": 40234 }, { "epoch": 0.7176363571505012, "grad_norm": 0.32532167434692383, "learning_rate": 1.1190519499217525e-05, "loss": 0.1215, "step": 40235 }, { "epoch": 0.7176541932722149, "grad_norm": 0.26989617943763733, "learning_rate": 1.1189222032240672e-05, "loss": 0.0771, "step": 40236 }, { "epoch": 0.7176720293939286, "grad_norm": 0.21981695294380188, "learning_rate": 1.1187924618799165e-05, "loss": 0.0744, "step": 40237 }, { "epoch": 0.7176898655156423, "grad_norm": 0.22327512502670288, "learning_rate": 1.1186627258898025e-05, "loss": 0.1102, "step": 40238 }, { "epoch": 0.717707701637356, "grad_norm": 0.2637295126914978, "learning_rate": 1.1185329952542296e-05, "loss": 0.1365, "step": 40239 }, { "epoch": 0.7177255377590697, "grad_norm": 0.23144227266311646, "learning_rate": 1.1184032699736998e-05, "loss": 0.1464, "step": 40240 }, { "epoch": 0.7177433738807834, "grad_norm": 0.24773211777210236, "learning_rate": 1.1182735500487162e-05, "loss": 0.1303, "step": 40241 }, { "epoch": 0.717761210002497, "grad_norm": 0.24347743391990662, "learning_rate": 1.118143835479781e-05, "loss": 0.1105, "step": 40242 }, { "epoch": 0.7177790461242107, "grad_norm": 0.24755750596523285, "learning_rate": 1.1180141262673982e-05, "loss": 0.0882, "step": 40243 }, { "epoch": 0.7177968822459244, "grad_norm": 0.3391305208206177, "learning_rate": 1.1178844224120699e-05, "loss": 0.1061, "step": 40244 }, { "epoch": 0.7178147183676381, "grad_norm": 0.2461908459663391, "learning_rate": 1.1177547239142991e-05, "loss": 0.11, "step": 40245 }, { "epoch": 0.7178325544893518, "grad_norm": 0.2127000242471695, "learning_rate": 1.1176250307745875e-05, "loss": 0.0986, "step": 40246 }, { "epoch": 0.7178503906110655, "grad_norm": 0.2532394826412201, "learning_rate": 1.1174953429934395e-05, "loss": 0.145, "step": 40247 }, { "epoch": 0.7178682267327793, "grad_norm": 0.2500765025615692, "learning_rate": 1.1173656605713561e-05, "loss": 0.1241, "step": 40248 }, { "epoch": 0.717886062854493, "grad_norm": 0.27345430850982666, "learning_rate": 1.117235983508842e-05, "loss": 0.0909, "step": 40249 }, { "epoch": 0.7179038989762067, "grad_norm": 0.2972443103790283, "learning_rate": 1.1171063118063988e-05, "loss": 0.1465, "step": 40250 }, { "epoch": 0.7179217350979203, "grad_norm": 0.25764113664627075, "learning_rate": 1.1169766454645283e-05, "loss": 0.1082, "step": 40251 }, { "epoch": 0.717939571219634, "grad_norm": 0.2752144932746887, "learning_rate": 1.1168469844837348e-05, "loss": 0.1096, "step": 40252 }, { "epoch": 0.7179574073413477, "grad_norm": 0.2440723180770874, "learning_rate": 1.1167173288645203e-05, "loss": 0.1321, "step": 40253 }, { "epoch": 0.7179752434630614, "grad_norm": 0.24367555975914001, "learning_rate": 1.116587678607387e-05, "loss": 0.1277, "step": 40254 }, { "epoch": 0.7179930795847751, "grad_norm": 0.3189479410648346, "learning_rate": 1.116458033712837e-05, "loss": 0.1129, "step": 40255 }, { "epoch": 0.7180109157064888, "grad_norm": 0.31693968176841736, "learning_rate": 1.1163283941813742e-05, "loss": 0.1074, "step": 40256 }, { "epoch": 0.7180287518282025, "grad_norm": 0.2833858132362366, "learning_rate": 1.1161987600135005e-05, "loss": 0.1063, "step": 40257 }, { "epoch": 0.7180465879499162, "grad_norm": 0.24045118689537048, "learning_rate": 1.1160691312097186e-05, "loss": 0.1226, "step": 40258 }, { "epoch": 0.7180644240716298, "grad_norm": 0.17967744171619415, "learning_rate": 1.1159395077705303e-05, "loss": 0.077, "step": 40259 }, { "epoch": 0.7180822601933435, "grad_norm": 0.386762410402298, "learning_rate": 1.1158098896964378e-05, "loss": 0.142, "step": 40260 }, { "epoch": 0.7181000963150572, "grad_norm": 0.2182648628950119, "learning_rate": 1.1156802769879452e-05, "loss": 0.1361, "step": 40261 }, { "epoch": 0.7181179324367709, "grad_norm": 0.23098449409008026, "learning_rate": 1.115550669645553e-05, "loss": 0.1094, "step": 40262 }, { "epoch": 0.7181357685584846, "grad_norm": 0.28771594166755676, "learning_rate": 1.1154210676697657e-05, "loss": 0.1472, "step": 40263 }, { "epoch": 0.7181536046801983, "grad_norm": 0.2458367645740509, "learning_rate": 1.1152914710610835e-05, "loss": 0.136, "step": 40264 }, { "epoch": 0.7181714408019121, "grad_norm": 0.2774643003940582, "learning_rate": 1.1151618798200106e-05, "loss": 0.1222, "step": 40265 }, { "epoch": 0.7181892769236258, "grad_norm": 0.3647003769874573, "learning_rate": 1.1150322939470487e-05, "loss": 0.1391, "step": 40266 }, { "epoch": 0.7182071130453395, "grad_norm": 0.20595820248126984, "learning_rate": 1.1149027134427e-05, "loss": 0.1039, "step": 40267 }, { "epoch": 0.7182249491670532, "grad_norm": 0.27136728167533875, "learning_rate": 1.1147731383074658e-05, "loss": 0.0996, "step": 40268 }, { "epoch": 0.7182427852887668, "grad_norm": 0.3267781436443329, "learning_rate": 1.1146435685418501e-05, "loss": 0.0876, "step": 40269 }, { "epoch": 0.7182606214104805, "grad_norm": 0.2590530812740326, "learning_rate": 1.1145140041463547e-05, "loss": 0.1312, "step": 40270 }, { "epoch": 0.7182784575321942, "grad_norm": 0.22409094870090485, "learning_rate": 1.1143844451214816e-05, "loss": 0.0969, "step": 40271 }, { "epoch": 0.7182962936539079, "grad_norm": 0.29956942796707153, "learning_rate": 1.1142548914677329e-05, "loss": 0.0724, "step": 40272 }, { "epoch": 0.7183141297756216, "grad_norm": 0.24620793759822845, "learning_rate": 1.1141253431856097e-05, "loss": 0.1015, "step": 40273 }, { "epoch": 0.7183319658973353, "grad_norm": 0.2617422640323639, "learning_rate": 1.1139958002756166e-05, "loss": 0.1041, "step": 40274 }, { "epoch": 0.718349802019049, "grad_norm": 0.37246477603912354, "learning_rate": 1.1138662627382535e-05, "loss": 0.13, "step": 40275 }, { "epoch": 0.7183676381407627, "grad_norm": 0.34235435724258423, "learning_rate": 1.1137367305740243e-05, "loss": 0.1141, "step": 40276 }, { "epoch": 0.7183854742624763, "grad_norm": 0.2520189881324768, "learning_rate": 1.1136072037834294e-05, "loss": 0.1329, "step": 40277 }, { "epoch": 0.71840331038419, "grad_norm": 0.33708319067955017, "learning_rate": 1.1134776823669727e-05, "loss": 0.1748, "step": 40278 }, { "epoch": 0.7184211465059037, "grad_norm": 0.3198677897453308, "learning_rate": 1.1133481663251556e-05, "loss": 0.1373, "step": 40279 }, { "epoch": 0.7184389826276174, "grad_norm": 0.21848969161510468, "learning_rate": 1.1132186556584797e-05, "loss": 0.0704, "step": 40280 }, { "epoch": 0.7184568187493311, "grad_norm": 0.33849579095840454, "learning_rate": 1.1130891503674465e-05, "loss": 0.1301, "step": 40281 }, { "epoch": 0.7184746548710449, "grad_norm": 0.25301992893218994, "learning_rate": 1.1129596504525597e-05, "loss": 0.1467, "step": 40282 }, { "epoch": 0.7184924909927586, "grad_norm": 0.23240190744400024, "learning_rate": 1.1128301559143204e-05, "loss": 0.1475, "step": 40283 }, { "epoch": 0.7185103271144723, "grad_norm": 0.3365060091018677, "learning_rate": 1.1127006667532305e-05, "loss": 0.1757, "step": 40284 }, { "epoch": 0.718528163236186, "grad_norm": 0.2211749404668808, "learning_rate": 1.112571182969792e-05, "loss": 0.0343, "step": 40285 }, { "epoch": 0.7185459993578996, "grad_norm": 0.31455111503601074, "learning_rate": 1.1124417045645055e-05, "loss": 0.0993, "step": 40286 }, { "epoch": 0.7185638354796133, "grad_norm": 0.3140621781349182, "learning_rate": 1.1123122315378756e-05, "loss": 0.1227, "step": 40287 }, { "epoch": 0.718581671601327, "grad_norm": 0.3453923463821411, "learning_rate": 1.1121827638904026e-05, "loss": 0.2112, "step": 40288 }, { "epoch": 0.7185995077230407, "grad_norm": 0.26226431131362915, "learning_rate": 1.1120533016225877e-05, "loss": 0.1317, "step": 40289 }, { "epoch": 0.7186173438447544, "grad_norm": 0.2679152488708496, "learning_rate": 1.1119238447349334e-05, "loss": 0.1146, "step": 40290 }, { "epoch": 0.7186351799664681, "grad_norm": 0.2937815189361572, "learning_rate": 1.1117943932279429e-05, "loss": 0.1312, "step": 40291 }, { "epoch": 0.7186530160881818, "grad_norm": 0.19895359873771667, "learning_rate": 1.1116649471021165e-05, "loss": 0.1218, "step": 40292 }, { "epoch": 0.7186708522098955, "grad_norm": 0.35976168513298035, "learning_rate": 1.1115355063579566e-05, "loss": 0.125, "step": 40293 }, { "epoch": 0.7186886883316091, "grad_norm": 0.29435932636260986, "learning_rate": 1.1114060709959635e-05, "loss": 0.1581, "step": 40294 }, { "epoch": 0.7187065244533228, "grad_norm": 0.2642267048358917, "learning_rate": 1.1112766410166411e-05, "loss": 0.1293, "step": 40295 }, { "epoch": 0.7187243605750365, "grad_norm": 0.25632554292678833, "learning_rate": 1.11114721642049e-05, "loss": 0.1607, "step": 40296 }, { "epoch": 0.7187421966967502, "grad_norm": 0.3137747645378113, "learning_rate": 1.1110177972080121e-05, "loss": 0.1254, "step": 40297 }, { "epoch": 0.7187600328184639, "grad_norm": 0.17836590111255646, "learning_rate": 1.1108883833797088e-05, "loss": 0.1025, "step": 40298 }, { "epoch": 0.7187778689401777, "grad_norm": 0.27142930030822754, "learning_rate": 1.110758974936081e-05, "loss": 0.1639, "step": 40299 }, { "epoch": 0.7187957050618914, "grad_norm": 0.2555834650993347, "learning_rate": 1.1106295718776322e-05, "loss": 0.1029, "step": 40300 }, { "epoch": 0.7188135411836051, "grad_norm": 0.28176093101501465, "learning_rate": 1.1105001742048632e-05, "loss": 0.1062, "step": 40301 }, { "epoch": 0.7188313773053188, "grad_norm": 0.27635452151298523, "learning_rate": 1.1103707819182744e-05, "loss": 0.1035, "step": 40302 }, { "epoch": 0.7188492134270325, "grad_norm": 0.21727557480335236, "learning_rate": 1.1102413950183692e-05, "loss": 0.0906, "step": 40303 }, { "epoch": 0.7188670495487461, "grad_norm": 0.27734559774398804, "learning_rate": 1.1101120135056479e-05, "loss": 0.1495, "step": 40304 }, { "epoch": 0.7188848856704598, "grad_norm": 0.20844654738903046, "learning_rate": 1.109982637380613e-05, "loss": 0.0864, "step": 40305 }, { "epoch": 0.7189027217921735, "grad_norm": 0.2925710678100586, "learning_rate": 1.1098532666437655e-05, "loss": 0.0967, "step": 40306 }, { "epoch": 0.7189205579138872, "grad_norm": 0.20547480881214142, "learning_rate": 1.109723901295606e-05, "loss": 0.0975, "step": 40307 }, { "epoch": 0.7189383940356009, "grad_norm": 0.23280969262123108, "learning_rate": 1.109594541336638e-05, "loss": 0.0827, "step": 40308 }, { "epoch": 0.7189562301573146, "grad_norm": 0.24231484532356262, "learning_rate": 1.1094651867673614e-05, "loss": 0.0923, "step": 40309 }, { "epoch": 0.7189740662790283, "grad_norm": 0.24786196649074554, "learning_rate": 1.1093358375882781e-05, "loss": 0.1084, "step": 40310 }, { "epoch": 0.718991902400742, "grad_norm": 0.2697654664516449, "learning_rate": 1.1092064937998897e-05, "loss": 0.1326, "step": 40311 }, { "epoch": 0.7190097385224556, "grad_norm": 0.22581391036510468, "learning_rate": 1.109077155402696e-05, "loss": 0.1144, "step": 40312 }, { "epoch": 0.7190275746441693, "grad_norm": 0.2712780237197876, "learning_rate": 1.1089478223972007e-05, "loss": 0.1329, "step": 40313 }, { "epoch": 0.719045410765883, "grad_norm": 0.24814161658287048, "learning_rate": 1.1088184947839043e-05, "loss": 0.1095, "step": 40314 }, { "epoch": 0.7190632468875967, "grad_norm": 0.22884242236614227, "learning_rate": 1.1086891725633078e-05, "loss": 0.0957, "step": 40315 }, { "epoch": 0.7190810830093105, "grad_norm": 0.2308569699525833, "learning_rate": 1.1085598557359117e-05, "loss": 0.0858, "step": 40316 }, { "epoch": 0.7190989191310242, "grad_norm": 0.2363572120666504, "learning_rate": 1.108430544302218e-05, "loss": 0.0823, "step": 40317 }, { "epoch": 0.7191167552527379, "grad_norm": 0.2792021930217743, "learning_rate": 1.1083012382627295e-05, "loss": 0.0868, "step": 40318 }, { "epoch": 0.7191345913744516, "grad_norm": 0.283976674079895, "learning_rate": 1.1081719376179456e-05, "loss": 0.1306, "step": 40319 }, { "epoch": 0.7191524274961653, "grad_norm": 0.2579628527164459, "learning_rate": 1.1080426423683674e-05, "loss": 0.1174, "step": 40320 }, { "epoch": 0.719170263617879, "grad_norm": 0.3998417556285858, "learning_rate": 1.1079133525144975e-05, "loss": 0.1084, "step": 40321 }, { "epoch": 0.7191880997395926, "grad_norm": 0.3299851417541504, "learning_rate": 1.1077840680568361e-05, "loss": 0.147, "step": 40322 }, { "epoch": 0.7192059358613063, "grad_norm": 0.24805738031864166, "learning_rate": 1.1076547889958846e-05, "loss": 0.1008, "step": 40323 }, { "epoch": 0.71922377198302, "grad_norm": 0.257362961769104, "learning_rate": 1.107525515332144e-05, "loss": 0.1068, "step": 40324 }, { "epoch": 0.7192416081047337, "grad_norm": 0.23313094675540924, "learning_rate": 1.1073962470661147e-05, "loss": 0.0942, "step": 40325 }, { "epoch": 0.7192594442264474, "grad_norm": 0.26476219296455383, "learning_rate": 1.1072669841982995e-05, "loss": 0.0917, "step": 40326 }, { "epoch": 0.7192772803481611, "grad_norm": 0.266126811504364, "learning_rate": 1.1071377267291983e-05, "loss": 0.0904, "step": 40327 }, { "epoch": 0.7192951164698748, "grad_norm": 0.24406197667121887, "learning_rate": 1.1070084746593124e-05, "loss": 0.1125, "step": 40328 }, { "epoch": 0.7193129525915885, "grad_norm": 0.2179514616727829, "learning_rate": 1.1068792279891419e-05, "loss": 0.1357, "step": 40329 }, { "epoch": 0.7193307887133021, "grad_norm": 0.2856987416744232, "learning_rate": 1.1067499867191894e-05, "loss": 0.1222, "step": 40330 }, { "epoch": 0.7193486248350158, "grad_norm": 0.26420894265174866, "learning_rate": 1.1066207508499547e-05, "loss": 0.0908, "step": 40331 }, { "epoch": 0.7193664609567295, "grad_norm": 0.20407073199748993, "learning_rate": 1.1064915203819396e-05, "loss": 0.1206, "step": 40332 }, { "epoch": 0.7193842970784433, "grad_norm": 0.3084770441055298, "learning_rate": 1.1063622953156442e-05, "loss": 0.1834, "step": 40333 }, { "epoch": 0.719402133200157, "grad_norm": 0.1914638727903366, "learning_rate": 1.1062330756515704e-05, "loss": 0.0983, "step": 40334 }, { "epoch": 0.7194199693218707, "grad_norm": 0.38346391916275024, "learning_rate": 1.1061038613902189e-05, "loss": 0.1813, "step": 40335 }, { "epoch": 0.7194378054435844, "grad_norm": 0.28849998116493225, "learning_rate": 1.1059746525320902e-05, "loss": 0.0853, "step": 40336 }, { "epoch": 0.7194556415652981, "grad_norm": 0.22317031025886536, "learning_rate": 1.1058454490776851e-05, "loss": 0.0988, "step": 40337 }, { "epoch": 0.7194734776870118, "grad_norm": 0.21158041059970856, "learning_rate": 1.1057162510275037e-05, "loss": 0.1033, "step": 40338 }, { "epoch": 0.7194913138087254, "grad_norm": 0.24587425589561462, "learning_rate": 1.1055870583820488e-05, "loss": 0.0759, "step": 40339 }, { "epoch": 0.7195091499304391, "grad_norm": 0.2673921287059784, "learning_rate": 1.1054578711418197e-05, "loss": 0.0978, "step": 40340 }, { "epoch": 0.7195269860521528, "grad_norm": 0.297579288482666, "learning_rate": 1.105328689307318e-05, "loss": 0.0778, "step": 40341 }, { "epoch": 0.7195448221738665, "grad_norm": 0.23593173921108246, "learning_rate": 1.105199512879043e-05, "loss": 0.1001, "step": 40342 }, { "epoch": 0.7195626582955802, "grad_norm": 0.2672853171825409, "learning_rate": 1.105070341857497e-05, "loss": 0.1103, "step": 40343 }, { "epoch": 0.7195804944172939, "grad_norm": 0.22973723709583282, "learning_rate": 1.1049411762431804e-05, "loss": 0.1147, "step": 40344 }, { "epoch": 0.7195983305390076, "grad_norm": 0.292667031288147, "learning_rate": 1.1048120160365927e-05, "loss": 0.1133, "step": 40345 }, { "epoch": 0.7196161666607213, "grad_norm": 0.25625690817832947, "learning_rate": 1.1046828612382353e-05, "loss": 0.0992, "step": 40346 }, { "epoch": 0.719634002782435, "grad_norm": 0.2784973084926605, "learning_rate": 1.1045537118486105e-05, "loss": 0.1333, "step": 40347 }, { "epoch": 0.7196518389041486, "grad_norm": 0.2758970260620117, "learning_rate": 1.1044245678682169e-05, "loss": 0.1686, "step": 40348 }, { "epoch": 0.7196696750258624, "grad_norm": 0.2847234606742859, "learning_rate": 1.104295429297556e-05, "loss": 0.123, "step": 40349 }, { "epoch": 0.7196875111475761, "grad_norm": 0.2947148084640503, "learning_rate": 1.104166296137128e-05, "loss": 0.1427, "step": 40350 }, { "epoch": 0.7197053472692898, "grad_norm": 0.32902270555496216, "learning_rate": 1.1040371683874326e-05, "loss": 0.1188, "step": 40351 }, { "epoch": 0.7197231833910035, "grad_norm": 0.22397801280021667, "learning_rate": 1.1039080460489724e-05, "loss": 0.0989, "step": 40352 }, { "epoch": 0.7197410195127172, "grad_norm": 0.2822009027004242, "learning_rate": 1.1037789291222466e-05, "loss": 0.0844, "step": 40353 }, { "epoch": 0.7197588556344309, "grad_norm": 0.2531472444534302, "learning_rate": 1.1036498176077558e-05, "loss": 0.0958, "step": 40354 }, { "epoch": 0.7197766917561446, "grad_norm": 0.21755728125572205, "learning_rate": 1.103520711506e-05, "loss": 0.1121, "step": 40355 }, { "epoch": 0.7197945278778582, "grad_norm": 0.24625883996486664, "learning_rate": 1.103391610817481e-05, "loss": 0.1189, "step": 40356 }, { "epoch": 0.7198123639995719, "grad_norm": 0.28660720586776733, "learning_rate": 1.1032625155426985e-05, "loss": 0.1818, "step": 40357 }, { "epoch": 0.7198302001212856, "grad_norm": 0.23317407071590424, "learning_rate": 1.1031334256821521e-05, "loss": 0.1241, "step": 40358 }, { "epoch": 0.7198480362429993, "grad_norm": 0.335793137550354, "learning_rate": 1.1030043412363439e-05, "loss": 0.1087, "step": 40359 }, { "epoch": 0.719865872364713, "grad_norm": 0.2195972353219986, "learning_rate": 1.1028752622057722e-05, "loss": 0.094, "step": 40360 }, { "epoch": 0.7198837084864267, "grad_norm": 0.392493337392807, "learning_rate": 1.1027461885909395e-05, "loss": 0.1382, "step": 40361 }, { "epoch": 0.7199015446081404, "grad_norm": 0.34013909101486206, "learning_rate": 1.1026171203923455e-05, "loss": 0.1696, "step": 40362 }, { "epoch": 0.7199193807298541, "grad_norm": 0.2574087381362915, "learning_rate": 1.1024880576104899e-05, "loss": 0.129, "step": 40363 }, { "epoch": 0.7199372168515678, "grad_norm": 0.32612553238868713, "learning_rate": 1.1023590002458725e-05, "loss": 0.1347, "step": 40364 }, { "epoch": 0.7199550529732814, "grad_norm": 0.27196574211120605, "learning_rate": 1.102229948298995e-05, "loss": 0.1251, "step": 40365 }, { "epoch": 0.7199728890949952, "grad_norm": 0.25601544976234436, "learning_rate": 1.1021009017703573e-05, "loss": 0.0676, "step": 40366 }, { "epoch": 0.7199907252167089, "grad_norm": 0.3186188042163849, "learning_rate": 1.1019718606604593e-05, "loss": 0.1648, "step": 40367 }, { "epoch": 0.7200085613384226, "grad_norm": 0.27588871121406555, "learning_rate": 1.1018428249698e-05, "loss": 0.1284, "step": 40368 }, { "epoch": 0.7200263974601363, "grad_norm": 0.19100086390972137, "learning_rate": 1.101713794698882e-05, "loss": 0.0657, "step": 40369 }, { "epoch": 0.72004423358185, "grad_norm": 0.2826154828071594, "learning_rate": 1.1015847698482043e-05, "loss": 0.1151, "step": 40370 }, { "epoch": 0.7200620697035637, "grad_norm": 0.28423142433166504, "learning_rate": 1.101455750418267e-05, "loss": 0.125, "step": 40371 }, { "epoch": 0.7200799058252774, "grad_norm": 0.30817824602127075, "learning_rate": 1.1013267364095694e-05, "loss": 0.1409, "step": 40372 }, { "epoch": 0.720097741946991, "grad_norm": 0.2409517616033554, "learning_rate": 1.1011977278226124e-05, "loss": 0.119, "step": 40373 }, { "epoch": 0.7201155780687047, "grad_norm": 0.21675218641757965, "learning_rate": 1.101068724657897e-05, "loss": 0.0839, "step": 40374 }, { "epoch": 0.7201334141904184, "grad_norm": 0.293465793132782, "learning_rate": 1.1009397269159224e-05, "loss": 0.1013, "step": 40375 }, { "epoch": 0.7201512503121321, "grad_norm": 0.3176478445529938, "learning_rate": 1.1008107345971888e-05, "loss": 0.1251, "step": 40376 }, { "epoch": 0.7201690864338458, "grad_norm": 0.29263970255851746, "learning_rate": 1.100681747702195e-05, "loss": 0.1116, "step": 40377 }, { "epoch": 0.7201869225555595, "grad_norm": 0.2971602976322174, "learning_rate": 1.100552766231443e-05, "loss": 0.123, "step": 40378 }, { "epoch": 0.7202047586772732, "grad_norm": 0.2378644496202469, "learning_rate": 1.1004237901854317e-05, "loss": 0.1223, "step": 40379 }, { "epoch": 0.7202225947989869, "grad_norm": 0.22653701901435852, "learning_rate": 1.1002948195646612e-05, "loss": 0.0922, "step": 40380 }, { "epoch": 0.7202404309207006, "grad_norm": 0.23711735010147095, "learning_rate": 1.1001658543696305e-05, "loss": 0.0916, "step": 40381 }, { "epoch": 0.7202582670424142, "grad_norm": 0.31709030270576477, "learning_rate": 1.1000368946008417e-05, "loss": 0.1273, "step": 40382 }, { "epoch": 0.720276103164128, "grad_norm": 0.19927071034908295, "learning_rate": 1.0999079402587931e-05, "loss": 0.1338, "step": 40383 }, { "epoch": 0.7202939392858417, "grad_norm": 0.2005443125963211, "learning_rate": 1.0997789913439849e-05, "loss": 0.0952, "step": 40384 }, { "epoch": 0.7203117754075554, "grad_norm": 0.22356563806533813, "learning_rate": 1.099650047856916e-05, "loss": 0.1296, "step": 40385 }, { "epoch": 0.7203296115292691, "grad_norm": 0.24035263061523438, "learning_rate": 1.0995211097980885e-05, "loss": 0.1028, "step": 40386 }, { "epoch": 0.7203474476509828, "grad_norm": 0.22749945521354675, "learning_rate": 1.0993921771679996e-05, "loss": 0.069, "step": 40387 }, { "epoch": 0.7203652837726965, "grad_norm": 0.2557878792285919, "learning_rate": 1.0992632499671513e-05, "loss": 0.0995, "step": 40388 }, { "epoch": 0.7203831198944102, "grad_norm": 0.2746986448764801, "learning_rate": 1.0991343281960426e-05, "loss": 0.118, "step": 40389 }, { "epoch": 0.7204009560161239, "grad_norm": 0.2301800549030304, "learning_rate": 1.0990054118551722e-05, "loss": 0.063, "step": 40390 }, { "epoch": 0.7204187921378375, "grad_norm": 0.23514443635940552, "learning_rate": 1.0988765009450414e-05, "loss": 0.1085, "step": 40391 }, { "epoch": 0.7204366282595512, "grad_norm": 0.513597846031189, "learning_rate": 1.0987475954661491e-05, "loss": 0.134, "step": 40392 }, { "epoch": 0.7204544643812649, "grad_norm": 0.3090693950653076, "learning_rate": 1.0986186954189955e-05, "loss": 0.1597, "step": 40393 }, { "epoch": 0.7204723005029786, "grad_norm": 0.24443571269512177, "learning_rate": 1.0984898008040787e-05, "loss": 0.1121, "step": 40394 }, { "epoch": 0.7204901366246923, "grad_norm": 0.3139670193195343, "learning_rate": 1.0983609116219004e-05, "loss": 0.1141, "step": 40395 }, { "epoch": 0.720507972746406, "grad_norm": 0.17939960956573486, "learning_rate": 1.0982320278729596e-05, "loss": 0.0721, "step": 40396 }, { "epoch": 0.7205258088681197, "grad_norm": 0.36835068464279175, "learning_rate": 1.0981031495577554e-05, "loss": 0.1435, "step": 40397 }, { "epoch": 0.7205436449898334, "grad_norm": 0.23543259501457214, "learning_rate": 1.0979742766767867e-05, "loss": 0.1134, "step": 40398 }, { "epoch": 0.720561481111547, "grad_norm": 0.21438445150852203, "learning_rate": 1.0978454092305548e-05, "loss": 0.1163, "step": 40399 }, { "epoch": 0.7205793172332609, "grad_norm": 0.303114116191864, "learning_rate": 1.0977165472195575e-05, "loss": 0.1259, "step": 40400 }, { "epoch": 0.7205971533549745, "grad_norm": 0.25703418254852295, "learning_rate": 1.0975876906442961e-05, "loss": 0.1084, "step": 40401 }, { "epoch": 0.7206149894766882, "grad_norm": 0.45374831557273865, "learning_rate": 1.0974588395052693e-05, "loss": 0.1496, "step": 40402 }, { "epoch": 0.7206328255984019, "grad_norm": 0.3314122259616852, "learning_rate": 1.0973299938029755e-05, "loss": 0.1332, "step": 40403 }, { "epoch": 0.7206506617201156, "grad_norm": 0.2612239122390747, "learning_rate": 1.097201153537916e-05, "loss": 0.1724, "step": 40404 }, { "epoch": 0.7206684978418293, "grad_norm": 0.26297783851623535, "learning_rate": 1.0970723187105891e-05, "loss": 0.1136, "step": 40405 }, { "epoch": 0.720686333963543, "grad_norm": 0.26427575945854187, "learning_rate": 1.0969434893214947e-05, "loss": 0.1085, "step": 40406 }, { "epoch": 0.7207041700852567, "grad_norm": 0.23110325634479523, "learning_rate": 1.0968146653711311e-05, "loss": 0.1345, "step": 40407 }, { "epoch": 0.7207220062069704, "grad_norm": 0.24362215399742126, "learning_rate": 1.0966858468599994e-05, "loss": 0.1322, "step": 40408 }, { "epoch": 0.720739842328684, "grad_norm": 0.23025809228420258, "learning_rate": 1.0965570337885978e-05, "loss": 0.1313, "step": 40409 }, { "epoch": 0.7207576784503977, "grad_norm": 0.3628450334072113, "learning_rate": 1.096428226157426e-05, "loss": 0.1012, "step": 40410 }, { "epoch": 0.7207755145721114, "grad_norm": 0.20489072799682617, "learning_rate": 1.0962994239669824e-05, "loss": 0.0814, "step": 40411 }, { "epoch": 0.7207933506938251, "grad_norm": 0.2180626541376114, "learning_rate": 1.096170627217768e-05, "loss": 0.1273, "step": 40412 }, { "epoch": 0.7208111868155388, "grad_norm": 0.25559237599372864, "learning_rate": 1.0960418359102808e-05, "loss": 0.1111, "step": 40413 }, { "epoch": 0.7208290229372525, "grad_norm": 0.32958081364631653, "learning_rate": 1.0959130500450196e-05, "loss": 0.1275, "step": 40414 }, { "epoch": 0.7208468590589662, "grad_norm": 0.2854267358779907, "learning_rate": 1.0957842696224854e-05, "loss": 0.1454, "step": 40415 }, { "epoch": 0.7208646951806799, "grad_norm": 0.29579558968544006, "learning_rate": 1.0956554946431754e-05, "loss": 0.1297, "step": 40416 }, { "epoch": 0.7208825313023937, "grad_norm": 0.23838843405246735, "learning_rate": 1.0955267251075907e-05, "loss": 0.0826, "step": 40417 }, { "epoch": 0.7209003674241073, "grad_norm": 0.25677451491355896, "learning_rate": 1.0953979610162294e-05, "loss": 0.1393, "step": 40418 }, { "epoch": 0.720918203545821, "grad_norm": 0.2448953092098236, "learning_rate": 1.0952692023695907e-05, "loss": 0.121, "step": 40419 }, { "epoch": 0.7209360396675347, "grad_norm": 0.27708911895751953, "learning_rate": 1.095140449168173e-05, "loss": 0.1408, "step": 40420 }, { "epoch": 0.7209538757892484, "grad_norm": 0.29170405864715576, "learning_rate": 1.095011701412477e-05, "loss": 0.1072, "step": 40421 }, { "epoch": 0.7209717119109621, "grad_norm": 0.23069795966148376, "learning_rate": 1.0948829591030007e-05, "loss": 0.0875, "step": 40422 }, { "epoch": 0.7209895480326758, "grad_norm": 0.33636805415153503, "learning_rate": 1.0947542222402435e-05, "loss": 0.1609, "step": 40423 }, { "epoch": 0.7210073841543895, "grad_norm": 0.2755163013935089, "learning_rate": 1.0946254908247034e-05, "loss": 0.1603, "step": 40424 }, { "epoch": 0.7210252202761032, "grad_norm": 0.2104751169681549, "learning_rate": 1.0944967648568814e-05, "loss": 0.0767, "step": 40425 }, { "epoch": 0.7210430563978169, "grad_norm": 0.24717546999454498, "learning_rate": 1.094368044337275e-05, "loss": 0.1541, "step": 40426 }, { "epoch": 0.7210608925195305, "grad_norm": 0.3137086033821106, "learning_rate": 1.094239329266383e-05, "loss": 0.1309, "step": 40427 }, { "epoch": 0.7210787286412442, "grad_norm": 0.2632220387458801, "learning_rate": 1.0941106196447057e-05, "loss": 0.1383, "step": 40428 }, { "epoch": 0.7210965647629579, "grad_norm": 0.35149380564689636, "learning_rate": 1.0939819154727402e-05, "loss": 0.1152, "step": 40429 }, { "epoch": 0.7211144008846716, "grad_norm": 0.3171868920326233, "learning_rate": 1.0938532167509874e-05, "loss": 0.1387, "step": 40430 }, { "epoch": 0.7211322370063853, "grad_norm": 0.22164089977741241, "learning_rate": 1.0937245234799451e-05, "loss": 0.0849, "step": 40431 }, { "epoch": 0.721150073128099, "grad_norm": 0.2974955439567566, "learning_rate": 1.0935958356601123e-05, "loss": 0.0982, "step": 40432 }, { "epoch": 0.7211679092498127, "grad_norm": 0.27351701259613037, "learning_rate": 1.0934671532919869e-05, "loss": 0.1175, "step": 40433 }, { "epoch": 0.7211857453715265, "grad_norm": 0.25590378046035767, "learning_rate": 1.0933384763760695e-05, "loss": 0.1601, "step": 40434 }, { "epoch": 0.7212035814932402, "grad_norm": 0.28437310457229614, "learning_rate": 1.0932098049128581e-05, "loss": 0.1603, "step": 40435 }, { "epoch": 0.7212214176149538, "grad_norm": 0.28727272152900696, "learning_rate": 1.0930811389028515e-05, "loss": 0.1036, "step": 40436 }, { "epoch": 0.7212392537366675, "grad_norm": 0.26963332295417786, "learning_rate": 1.0929524783465472e-05, "loss": 0.1292, "step": 40437 }, { "epoch": 0.7212570898583812, "grad_norm": 0.3264870047569275, "learning_rate": 1.092823823244446e-05, "loss": 0.1402, "step": 40438 }, { "epoch": 0.7212749259800949, "grad_norm": 0.3137264549732208, "learning_rate": 1.0926951735970458e-05, "loss": 0.1318, "step": 40439 }, { "epoch": 0.7212927621018086, "grad_norm": 0.25432273745536804, "learning_rate": 1.092566529404845e-05, "loss": 0.1109, "step": 40440 }, { "epoch": 0.7213105982235223, "grad_norm": 0.3099328279495239, "learning_rate": 1.092437890668342e-05, "loss": 0.0764, "step": 40441 }, { "epoch": 0.721328434345236, "grad_norm": 0.3787504732608795, "learning_rate": 1.0923092573880362e-05, "loss": 0.1364, "step": 40442 }, { "epoch": 0.7213462704669497, "grad_norm": 0.34405717253685, "learning_rate": 1.0921806295644254e-05, "loss": 0.1224, "step": 40443 }, { "epoch": 0.7213641065886633, "grad_norm": 0.2813001275062561, "learning_rate": 1.0920520071980095e-05, "loss": 0.1151, "step": 40444 }, { "epoch": 0.721381942710377, "grad_norm": 0.22539572417736053, "learning_rate": 1.0919233902892862e-05, "loss": 0.1009, "step": 40445 }, { "epoch": 0.7213997788320907, "grad_norm": 0.28210368752479553, "learning_rate": 1.0917947788387533e-05, "loss": 0.127, "step": 40446 }, { "epoch": 0.7214176149538044, "grad_norm": 0.32122141122817993, "learning_rate": 1.0916661728469112e-05, "loss": 0.11, "step": 40447 }, { "epoch": 0.7214354510755181, "grad_norm": 0.28604593873023987, "learning_rate": 1.0915375723142577e-05, "loss": 0.1338, "step": 40448 }, { "epoch": 0.7214532871972318, "grad_norm": 0.2551625669002533, "learning_rate": 1.0914089772412905e-05, "loss": 0.0836, "step": 40449 }, { "epoch": 0.7214711233189455, "grad_norm": 0.2863278388977051, "learning_rate": 1.0912803876285079e-05, "loss": 0.1174, "step": 40450 }, { "epoch": 0.7214889594406593, "grad_norm": 0.35364723205566406, "learning_rate": 1.0911518034764104e-05, "loss": 0.086, "step": 40451 }, { "epoch": 0.721506795562373, "grad_norm": 0.28567206859588623, "learning_rate": 1.0910232247854946e-05, "loss": 0.0906, "step": 40452 }, { "epoch": 0.7215246316840866, "grad_norm": 0.5889822840690613, "learning_rate": 1.0908946515562596e-05, "loss": 0.1936, "step": 40453 }, { "epoch": 0.7215424678058003, "grad_norm": 0.25866588950157166, "learning_rate": 1.0907660837892028e-05, "loss": 0.0875, "step": 40454 }, { "epoch": 0.721560303927514, "grad_norm": 0.22260944545269012, "learning_rate": 1.0906375214848241e-05, "loss": 0.1044, "step": 40455 }, { "epoch": 0.7215781400492277, "grad_norm": 0.19718752801418304, "learning_rate": 1.0905089646436204e-05, "loss": 0.085, "step": 40456 }, { "epoch": 0.7215959761709414, "grad_norm": 0.3765561580657959, "learning_rate": 1.0903804132660916e-05, "loss": 0.1543, "step": 40457 }, { "epoch": 0.7216138122926551, "grad_norm": 0.2711127698421478, "learning_rate": 1.0902518673527354e-05, "loss": 0.1377, "step": 40458 }, { "epoch": 0.7216316484143688, "grad_norm": 0.3439038097858429, "learning_rate": 1.0901233269040487e-05, "loss": 0.1109, "step": 40459 }, { "epoch": 0.7216494845360825, "grad_norm": 0.20267468690872192, "learning_rate": 1.089994791920532e-05, "loss": 0.1131, "step": 40460 }, { "epoch": 0.7216673206577962, "grad_norm": 0.2884863615036011, "learning_rate": 1.0898662624026823e-05, "loss": 0.127, "step": 40461 }, { "epoch": 0.7216851567795098, "grad_norm": 0.2465682029724121, "learning_rate": 1.0897377383509983e-05, "loss": 0.1645, "step": 40462 }, { "epoch": 0.7217029929012235, "grad_norm": 0.3834037780761719, "learning_rate": 1.0896092197659765e-05, "loss": 0.0685, "step": 40463 }, { "epoch": 0.7217208290229372, "grad_norm": 0.2938390374183655, "learning_rate": 1.089480706648118e-05, "loss": 0.1478, "step": 40464 }, { "epoch": 0.7217386651446509, "grad_norm": 0.2629544734954834, "learning_rate": 1.089352198997919e-05, "loss": 0.1092, "step": 40465 }, { "epoch": 0.7217565012663646, "grad_norm": 0.27181199193000793, "learning_rate": 1.0892236968158783e-05, "loss": 0.0874, "step": 40466 }, { "epoch": 0.7217743373880784, "grad_norm": 0.22509315609931946, "learning_rate": 1.089095200102494e-05, "loss": 0.139, "step": 40467 }, { "epoch": 0.7217921735097921, "grad_norm": 0.2534896433353424, "learning_rate": 1.0889667088582628e-05, "loss": 0.15, "step": 40468 }, { "epoch": 0.7218100096315058, "grad_norm": 0.28012070059776306, "learning_rate": 1.0888382230836852e-05, "loss": 0.1062, "step": 40469 }, { "epoch": 0.7218278457532195, "grad_norm": 0.3172220289707184, "learning_rate": 1.088709742779257e-05, "loss": 0.12, "step": 40470 }, { "epoch": 0.7218456818749331, "grad_norm": 0.2582438886165619, "learning_rate": 1.088581267945478e-05, "loss": 0.1392, "step": 40471 }, { "epoch": 0.7218635179966468, "grad_norm": 0.29339727759361267, "learning_rate": 1.0884527985828446e-05, "loss": 0.1497, "step": 40472 }, { "epoch": 0.7218813541183605, "grad_norm": 0.40829694271087646, "learning_rate": 1.088324334691857e-05, "loss": 0.0991, "step": 40473 }, { "epoch": 0.7218991902400742, "grad_norm": 0.2584668695926666, "learning_rate": 1.0881958762730116e-05, "loss": 0.1468, "step": 40474 }, { "epoch": 0.7219170263617879, "grad_norm": 0.2535378932952881, "learning_rate": 1.0880674233268065e-05, "loss": 0.114, "step": 40475 }, { "epoch": 0.7219348624835016, "grad_norm": 0.3629578649997711, "learning_rate": 1.0879389758537389e-05, "loss": 0.1616, "step": 40476 }, { "epoch": 0.7219526986052153, "grad_norm": 0.33013126254081726, "learning_rate": 1.0878105338543084e-05, "loss": 0.1264, "step": 40477 }, { "epoch": 0.721970534726929, "grad_norm": 0.23822833597660065, "learning_rate": 1.0876820973290122e-05, "loss": 0.1513, "step": 40478 }, { "epoch": 0.7219883708486426, "grad_norm": 0.3148685097694397, "learning_rate": 1.0875536662783479e-05, "loss": 0.1493, "step": 40479 }, { "epoch": 0.7220062069703563, "grad_norm": 0.32281985878944397, "learning_rate": 1.0874252407028134e-05, "loss": 0.1188, "step": 40480 }, { "epoch": 0.72202404309207, "grad_norm": 0.27260199189186096, "learning_rate": 1.0872968206029055e-05, "loss": 0.1202, "step": 40481 }, { "epoch": 0.7220418792137837, "grad_norm": 0.3343993127346039, "learning_rate": 1.0871684059791243e-05, "loss": 0.1536, "step": 40482 }, { "epoch": 0.7220597153354974, "grad_norm": 0.3231378495693207, "learning_rate": 1.0870399968319653e-05, "loss": 0.1169, "step": 40483 }, { "epoch": 0.7220775514572112, "grad_norm": 0.28927361965179443, "learning_rate": 1.086911593161928e-05, "loss": 0.0672, "step": 40484 }, { "epoch": 0.7220953875789249, "grad_norm": 0.23475094139575958, "learning_rate": 1.0867831949695086e-05, "loss": 0.1357, "step": 40485 }, { "epoch": 0.7221132237006386, "grad_norm": 0.23237106204032898, "learning_rate": 1.0866548022552067e-05, "loss": 0.0898, "step": 40486 }, { "epoch": 0.7221310598223523, "grad_norm": 0.2454371303319931, "learning_rate": 1.086526415019519e-05, "loss": 0.1216, "step": 40487 }, { "epoch": 0.722148895944066, "grad_norm": 0.2840881049633026, "learning_rate": 1.0863980332629428e-05, "loss": 0.1277, "step": 40488 }, { "epoch": 0.7221667320657796, "grad_norm": 0.2540324926376343, "learning_rate": 1.0862696569859754e-05, "loss": 0.1076, "step": 40489 }, { "epoch": 0.7221845681874933, "grad_norm": 0.2851029336452484, "learning_rate": 1.0861412861891159e-05, "loss": 0.1674, "step": 40490 }, { "epoch": 0.722202404309207, "grad_norm": 0.28207066655158997, "learning_rate": 1.0860129208728612e-05, "loss": 0.1401, "step": 40491 }, { "epoch": 0.7222202404309207, "grad_norm": 0.27512624859809875, "learning_rate": 1.0858845610377088e-05, "loss": 0.1177, "step": 40492 }, { "epoch": 0.7222380765526344, "grad_norm": 0.2700352966785431, "learning_rate": 1.0857562066841562e-05, "loss": 0.0869, "step": 40493 }, { "epoch": 0.7222559126743481, "grad_norm": 0.2876301109790802, "learning_rate": 1.0856278578127e-05, "loss": 0.11, "step": 40494 }, { "epoch": 0.7222737487960618, "grad_norm": 0.2155667096376419, "learning_rate": 1.0854995144238398e-05, "loss": 0.1309, "step": 40495 }, { "epoch": 0.7222915849177755, "grad_norm": 0.20146290957927704, "learning_rate": 1.0853711765180719e-05, "loss": 0.1505, "step": 40496 }, { "epoch": 0.7223094210394891, "grad_norm": 0.26354163885116577, "learning_rate": 1.0852428440958931e-05, "loss": 0.0961, "step": 40497 }, { "epoch": 0.7223272571612028, "grad_norm": 0.38613587617874146, "learning_rate": 1.0851145171578026e-05, "loss": 0.1145, "step": 40498 }, { "epoch": 0.7223450932829165, "grad_norm": 0.24673140048980713, "learning_rate": 1.0849861957042962e-05, "loss": 0.096, "step": 40499 }, { "epoch": 0.7223629294046302, "grad_norm": 0.2833535671234131, "learning_rate": 1.0848578797358725e-05, "loss": 0.1165, "step": 40500 }, { "epoch": 0.722380765526344, "grad_norm": 0.2744133472442627, "learning_rate": 1.0847295692530287e-05, "loss": 0.0772, "step": 40501 }, { "epoch": 0.7223986016480577, "grad_norm": 0.2528727054595947, "learning_rate": 1.084601264256261e-05, "loss": 0.1081, "step": 40502 }, { "epoch": 0.7224164377697714, "grad_norm": 0.291473388671875, "learning_rate": 1.0844729647460686e-05, "loss": 0.1133, "step": 40503 }, { "epoch": 0.7224342738914851, "grad_norm": 0.24958087503910065, "learning_rate": 1.0843446707229476e-05, "loss": 0.079, "step": 40504 }, { "epoch": 0.7224521100131988, "grad_norm": 0.2474784106016159, "learning_rate": 1.084216382187396e-05, "loss": 0.1399, "step": 40505 }, { "epoch": 0.7224699461349124, "grad_norm": 0.2323588877916336, "learning_rate": 1.0840880991399105e-05, "loss": 0.1125, "step": 40506 }, { "epoch": 0.7224877822566261, "grad_norm": 0.2776666581630707, "learning_rate": 1.0839598215809877e-05, "loss": 0.1157, "step": 40507 }, { "epoch": 0.7225056183783398, "grad_norm": 0.2597182095050812, "learning_rate": 1.0838315495111265e-05, "loss": 0.079, "step": 40508 }, { "epoch": 0.7225234545000535, "grad_norm": 0.24726997315883636, "learning_rate": 1.0837032829308236e-05, "loss": 0.0916, "step": 40509 }, { "epoch": 0.7225412906217672, "grad_norm": 0.19278456270694733, "learning_rate": 1.0835750218405748e-05, "loss": 0.088, "step": 40510 }, { "epoch": 0.7225591267434809, "grad_norm": 0.31380847096443176, "learning_rate": 1.0834467662408795e-05, "loss": 0.1582, "step": 40511 }, { "epoch": 0.7225769628651946, "grad_norm": 0.26442664861679077, "learning_rate": 1.083318516132233e-05, "loss": 0.0915, "step": 40512 }, { "epoch": 0.7225947989869083, "grad_norm": 0.26641327142715454, "learning_rate": 1.0831902715151338e-05, "loss": 0.1137, "step": 40513 }, { "epoch": 0.722612635108622, "grad_norm": 0.2733730971813202, "learning_rate": 1.0830620323900787e-05, "loss": 0.1403, "step": 40514 }, { "epoch": 0.7226304712303356, "grad_norm": 0.35424867272377014, "learning_rate": 1.0829337987575636e-05, "loss": 0.1551, "step": 40515 }, { "epoch": 0.7226483073520493, "grad_norm": 0.24846625328063965, "learning_rate": 1.0828055706180873e-05, "loss": 0.0955, "step": 40516 }, { "epoch": 0.722666143473763, "grad_norm": 0.29416537284851074, "learning_rate": 1.082677347972146e-05, "loss": 0.1417, "step": 40517 }, { "epoch": 0.7226839795954768, "grad_norm": 0.3694874048233032, "learning_rate": 1.0825491308202374e-05, "loss": 0.1717, "step": 40518 }, { "epoch": 0.7227018157171905, "grad_norm": 0.26925182342529297, "learning_rate": 1.0824209191628573e-05, "loss": 0.0901, "step": 40519 }, { "epoch": 0.7227196518389042, "grad_norm": 0.3647941052913666, "learning_rate": 1.0822927130005029e-05, "loss": 0.1004, "step": 40520 }, { "epoch": 0.7227374879606179, "grad_norm": 0.27682557702064514, "learning_rate": 1.0821645123336725e-05, "loss": 0.1158, "step": 40521 }, { "epoch": 0.7227553240823316, "grad_norm": 0.25689277052879333, "learning_rate": 1.0820363171628617e-05, "loss": 0.1296, "step": 40522 }, { "epoch": 0.7227731602040453, "grad_norm": 0.2692350149154663, "learning_rate": 1.081908127488568e-05, "loss": 0.1255, "step": 40523 }, { "epoch": 0.7227909963257589, "grad_norm": 0.3036006987094879, "learning_rate": 1.0817799433112876e-05, "loss": 0.1323, "step": 40524 }, { "epoch": 0.7228088324474726, "grad_norm": 0.37668895721435547, "learning_rate": 1.0816517646315189e-05, "loss": 0.1071, "step": 40525 }, { "epoch": 0.7228266685691863, "grad_norm": 0.26455116271972656, "learning_rate": 1.0815235914497565e-05, "loss": 0.1027, "step": 40526 }, { "epoch": 0.7228445046909, "grad_norm": 0.2779602110385895, "learning_rate": 1.0813954237665e-05, "loss": 0.1196, "step": 40527 }, { "epoch": 0.7228623408126137, "grad_norm": 0.33256569504737854, "learning_rate": 1.0812672615822437e-05, "loss": 0.1579, "step": 40528 }, { "epoch": 0.7228801769343274, "grad_norm": 0.4240020215511322, "learning_rate": 1.0811391048974862e-05, "loss": 0.1397, "step": 40529 }, { "epoch": 0.7228980130560411, "grad_norm": 0.27562037110328674, "learning_rate": 1.0810109537127239e-05, "loss": 0.1411, "step": 40530 }, { "epoch": 0.7229158491777548, "grad_norm": 0.39022308588027954, "learning_rate": 1.0808828080284531e-05, "loss": 0.1093, "step": 40531 }, { "epoch": 0.7229336852994684, "grad_norm": 0.3457445502281189, "learning_rate": 1.0807546678451697e-05, "loss": 0.1304, "step": 40532 }, { "epoch": 0.7229515214211821, "grad_norm": 0.2470165193080902, "learning_rate": 1.0806265331633722e-05, "loss": 0.1228, "step": 40533 }, { "epoch": 0.7229693575428958, "grad_norm": 0.2204384058713913, "learning_rate": 1.080498403983557e-05, "loss": 0.0901, "step": 40534 }, { "epoch": 0.7229871936646096, "grad_norm": 0.35831955075263977, "learning_rate": 1.0803702803062198e-05, "loss": 0.1543, "step": 40535 }, { "epoch": 0.7230050297863233, "grad_norm": 0.2470790296792984, "learning_rate": 1.0802421621318578e-05, "loss": 0.1104, "step": 40536 }, { "epoch": 0.723022865908037, "grad_norm": 0.33692437410354614, "learning_rate": 1.0801140494609668e-05, "loss": 0.1954, "step": 40537 }, { "epoch": 0.7230407020297507, "grad_norm": 0.3157949149608612, "learning_rate": 1.079985942294045e-05, "loss": 0.1204, "step": 40538 }, { "epoch": 0.7230585381514644, "grad_norm": 0.18929168581962585, "learning_rate": 1.079857840631587e-05, "loss": 0.0997, "step": 40539 }, { "epoch": 0.7230763742731781, "grad_norm": 0.28917384147644043, "learning_rate": 1.0797297444740916e-05, "loss": 0.128, "step": 40540 }, { "epoch": 0.7230942103948917, "grad_norm": 0.32336491346359253, "learning_rate": 1.0796016538220535e-05, "loss": 0.0972, "step": 40541 }, { "epoch": 0.7231120465166054, "grad_norm": 0.2406318336725235, "learning_rate": 1.0794735686759708e-05, "loss": 0.1301, "step": 40542 }, { "epoch": 0.7231298826383191, "grad_norm": 0.18333131074905396, "learning_rate": 1.0793454890363392e-05, "loss": 0.1107, "step": 40543 }, { "epoch": 0.7231477187600328, "grad_norm": 0.23352345824241638, "learning_rate": 1.0792174149036549e-05, "loss": 0.1302, "step": 40544 }, { "epoch": 0.7231655548817465, "grad_norm": 0.3373039960861206, "learning_rate": 1.0790893462784141e-05, "loss": 0.128, "step": 40545 }, { "epoch": 0.7231833910034602, "grad_norm": 0.23998256027698517, "learning_rate": 1.0789612831611146e-05, "loss": 0.059, "step": 40546 }, { "epoch": 0.7232012271251739, "grad_norm": 0.2694459855556488, "learning_rate": 1.0788332255522519e-05, "loss": 0.0576, "step": 40547 }, { "epoch": 0.7232190632468876, "grad_norm": 0.2804769277572632, "learning_rate": 1.0787051734523224e-05, "loss": 0.1157, "step": 40548 }, { "epoch": 0.7232368993686012, "grad_norm": 0.3122931122779846, "learning_rate": 1.0785771268618225e-05, "loss": 0.1223, "step": 40549 }, { "epoch": 0.7232547354903149, "grad_norm": 0.32900938391685486, "learning_rate": 1.0784490857812476e-05, "loss": 0.1307, "step": 40550 }, { "epoch": 0.7232725716120286, "grad_norm": 0.204051673412323, "learning_rate": 1.0783210502110963e-05, "loss": 0.1052, "step": 40551 }, { "epoch": 0.7232904077337424, "grad_norm": 0.2570563554763794, "learning_rate": 1.0781930201518633e-05, "loss": 0.1057, "step": 40552 }, { "epoch": 0.7233082438554561, "grad_norm": 0.24053695797920227, "learning_rate": 1.0780649956040445e-05, "loss": 0.1338, "step": 40553 }, { "epoch": 0.7233260799771698, "grad_norm": 0.27540016174316406, "learning_rate": 1.077936976568137e-05, "loss": 0.142, "step": 40554 }, { "epoch": 0.7233439160988835, "grad_norm": 0.2392880916595459, "learning_rate": 1.0778089630446378e-05, "loss": 0.1149, "step": 40555 }, { "epoch": 0.7233617522205972, "grad_norm": 0.3592996597290039, "learning_rate": 1.077680955034042e-05, "loss": 0.1115, "step": 40556 }, { "epoch": 0.7233795883423109, "grad_norm": 0.25523144006729126, "learning_rate": 1.0775529525368463e-05, "loss": 0.1355, "step": 40557 }, { "epoch": 0.7233974244640246, "grad_norm": 0.38922280073165894, "learning_rate": 1.0774249555535457e-05, "loss": 0.1277, "step": 40558 }, { "epoch": 0.7234152605857382, "grad_norm": 0.37434661388397217, "learning_rate": 1.0772969640846383e-05, "loss": 0.113, "step": 40559 }, { "epoch": 0.7234330967074519, "grad_norm": 0.3323158025741577, "learning_rate": 1.0771689781306191e-05, "loss": 0.1389, "step": 40560 }, { "epoch": 0.7234509328291656, "grad_norm": 0.30588456988334656, "learning_rate": 1.0770409976919846e-05, "loss": 0.1445, "step": 40561 }, { "epoch": 0.7234687689508793, "grad_norm": 0.3162062466144562, "learning_rate": 1.0769130227692304e-05, "loss": 0.1005, "step": 40562 }, { "epoch": 0.723486605072593, "grad_norm": 0.40629053115844727, "learning_rate": 1.0767850533628521e-05, "loss": 0.1586, "step": 40563 }, { "epoch": 0.7235044411943067, "grad_norm": 0.20779992640018463, "learning_rate": 1.0766570894733475e-05, "loss": 0.0905, "step": 40564 }, { "epoch": 0.7235222773160204, "grad_norm": 0.36102133989334106, "learning_rate": 1.0765291311012113e-05, "loss": 0.1687, "step": 40565 }, { "epoch": 0.7235401134377341, "grad_norm": 0.25982508063316345, "learning_rate": 1.0764011782469391e-05, "loss": 0.1206, "step": 40566 }, { "epoch": 0.7235579495594477, "grad_norm": 0.35117021203041077, "learning_rate": 1.0762732309110288e-05, "loss": 0.1556, "step": 40567 }, { "epoch": 0.7235757856811615, "grad_norm": 0.269761323928833, "learning_rate": 1.0761452890939738e-05, "loss": 0.1089, "step": 40568 }, { "epoch": 0.7235936218028752, "grad_norm": 0.24880672991275787, "learning_rate": 1.0760173527962728e-05, "loss": 0.1257, "step": 40569 }, { "epoch": 0.7236114579245889, "grad_norm": 0.3262292444705963, "learning_rate": 1.0758894220184202e-05, "loss": 0.1206, "step": 40570 }, { "epoch": 0.7236292940463026, "grad_norm": 0.36932623386383057, "learning_rate": 1.0757614967609112e-05, "loss": 0.1227, "step": 40571 }, { "epoch": 0.7236471301680163, "grad_norm": 0.2558470070362091, "learning_rate": 1.0756335770242432e-05, "loss": 0.1116, "step": 40572 }, { "epoch": 0.72366496628973, "grad_norm": 0.27696770429611206, "learning_rate": 1.0755056628089116e-05, "loss": 0.1346, "step": 40573 }, { "epoch": 0.7236828024114437, "grad_norm": 0.22319665551185608, "learning_rate": 1.075377754115412e-05, "loss": 0.127, "step": 40574 }, { "epoch": 0.7237006385331574, "grad_norm": 0.23782727122306824, "learning_rate": 1.0752498509442405e-05, "loss": 0.1033, "step": 40575 }, { "epoch": 0.723718474654871, "grad_norm": 0.23157502710819244, "learning_rate": 1.0751219532958914e-05, "loss": 0.086, "step": 40576 }, { "epoch": 0.7237363107765847, "grad_norm": 0.24546676874160767, "learning_rate": 1.074994061170863e-05, "loss": 0.1039, "step": 40577 }, { "epoch": 0.7237541468982984, "grad_norm": 0.21519996225833893, "learning_rate": 1.0748661745696498e-05, "loss": 0.0951, "step": 40578 }, { "epoch": 0.7237719830200121, "grad_norm": 0.3266916871070862, "learning_rate": 1.0747382934927471e-05, "loss": 0.0963, "step": 40579 }, { "epoch": 0.7237898191417258, "grad_norm": 0.48786115646362305, "learning_rate": 1.0746104179406504e-05, "loss": 0.1567, "step": 40580 }, { "epoch": 0.7238076552634395, "grad_norm": 0.5185796618461609, "learning_rate": 1.0744825479138562e-05, "loss": 0.1389, "step": 40581 }, { "epoch": 0.7238254913851532, "grad_norm": 0.3125360906124115, "learning_rate": 1.074354683412861e-05, "loss": 0.1037, "step": 40582 }, { "epoch": 0.7238433275068669, "grad_norm": 0.2962605357170105, "learning_rate": 1.074226824438159e-05, "loss": 0.1441, "step": 40583 }, { "epoch": 0.7238611636285806, "grad_norm": 0.25355395674705505, "learning_rate": 1.0740989709902458e-05, "loss": 0.0825, "step": 40584 }, { "epoch": 0.7238789997502943, "grad_norm": 0.3399258255958557, "learning_rate": 1.073971123069618e-05, "loss": 0.1233, "step": 40585 }, { "epoch": 0.723896835872008, "grad_norm": 0.2795931100845337, "learning_rate": 1.073843280676771e-05, "loss": 0.0977, "step": 40586 }, { "epoch": 0.7239146719937217, "grad_norm": 0.27127134799957275, "learning_rate": 1.0737154438121997e-05, "loss": 0.1235, "step": 40587 }, { "epoch": 0.7239325081154354, "grad_norm": 0.23472371697425842, "learning_rate": 1.0735876124764002e-05, "loss": 0.1222, "step": 40588 }, { "epoch": 0.7239503442371491, "grad_norm": 0.24967920780181885, "learning_rate": 1.0734597866698668e-05, "loss": 0.1536, "step": 40589 }, { "epoch": 0.7239681803588628, "grad_norm": 0.34597447514533997, "learning_rate": 1.0733319663930968e-05, "loss": 0.131, "step": 40590 }, { "epoch": 0.7239860164805765, "grad_norm": 0.2651764154434204, "learning_rate": 1.073204151646585e-05, "loss": 0.1245, "step": 40591 }, { "epoch": 0.7240038526022902, "grad_norm": 0.26230162382125854, "learning_rate": 1.0730763424308266e-05, "loss": 0.1035, "step": 40592 }, { "epoch": 0.7240216887240039, "grad_norm": 0.3079476058483124, "learning_rate": 1.0729485387463162e-05, "loss": 0.1275, "step": 40593 }, { "epoch": 0.7240395248457175, "grad_norm": 0.3106514811515808, "learning_rate": 1.0728207405935511e-05, "loss": 0.1038, "step": 40594 }, { "epoch": 0.7240573609674312, "grad_norm": 0.20632345974445343, "learning_rate": 1.0726929479730249e-05, "loss": 0.0733, "step": 40595 }, { "epoch": 0.7240751970891449, "grad_norm": 0.34770768880844116, "learning_rate": 1.0725651608852347e-05, "loss": 0.1419, "step": 40596 }, { "epoch": 0.7240930332108586, "grad_norm": 0.21071726083755493, "learning_rate": 1.0724373793306741e-05, "loss": 0.098, "step": 40597 }, { "epoch": 0.7241108693325723, "grad_norm": 0.23060128092765808, "learning_rate": 1.0723096033098402e-05, "loss": 0.1092, "step": 40598 }, { "epoch": 0.724128705454286, "grad_norm": 0.2908787727355957, "learning_rate": 1.0721818328232273e-05, "loss": 0.1609, "step": 40599 }, { "epoch": 0.7241465415759997, "grad_norm": 0.31680330634117126, "learning_rate": 1.0720540678713306e-05, "loss": 0.0566, "step": 40600 }, { "epoch": 0.7241643776977134, "grad_norm": 0.2705176770687103, "learning_rate": 1.0719263084546458e-05, "loss": 0.1056, "step": 40601 }, { "epoch": 0.7241822138194272, "grad_norm": 0.21999453008174896, "learning_rate": 1.0717985545736669e-05, "loss": 0.1007, "step": 40602 }, { "epoch": 0.7242000499411408, "grad_norm": 0.32170572876930237, "learning_rate": 1.0716708062288908e-05, "loss": 0.086, "step": 40603 }, { "epoch": 0.7242178860628545, "grad_norm": 0.24259944260120392, "learning_rate": 1.0715430634208123e-05, "loss": 0.0788, "step": 40604 }, { "epoch": 0.7242357221845682, "grad_norm": 0.2975025475025177, "learning_rate": 1.071415326149926e-05, "loss": 0.1239, "step": 40605 }, { "epoch": 0.7242535583062819, "grad_norm": 0.2233697474002838, "learning_rate": 1.0712875944167267e-05, "loss": 0.124, "step": 40606 }, { "epoch": 0.7242713944279956, "grad_norm": 0.36306843161582947, "learning_rate": 1.0711598682217109e-05, "loss": 0.098, "step": 40607 }, { "epoch": 0.7242892305497093, "grad_norm": 0.22403933107852936, "learning_rate": 1.0710321475653717e-05, "loss": 0.1243, "step": 40608 }, { "epoch": 0.724307066671423, "grad_norm": 0.21132799983024597, "learning_rate": 1.0709044324482068e-05, "loss": 0.0862, "step": 40609 }, { "epoch": 0.7243249027931367, "grad_norm": 0.34940826892852783, "learning_rate": 1.0707767228707089e-05, "loss": 0.1009, "step": 40610 }, { "epoch": 0.7243427389148503, "grad_norm": 0.33472365140914917, "learning_rate": 1.0706490188333748e-05, "loss": 0.1405, "step": 40611 }, { "epoch": 0.724360575036564, "grad_norm": 0.27359768748283386, "learning_rate": 1.070521320336699e-05, "loss": 0.1242, "step": 40612 }, { "epoch": 0.7243784111582777, "grad_norm": 0.2877182066440582, "learning_rate": 1.0703936273811763e-05, "loss": 0.1489, "step": 40613 }, { "epoch": 0.7243962472799914, "grad_norm": 0.28104862570762634, "learning_rate": 1.0702659399673016e-05, "loss": 0.0638, "step": 40614 }, { "epoch": 0.7244140834017051, "grad_norm": 0.2790651321411133, "learning_rate": 1.070138258095569e-05, "loss": 0.0561, "step": 40615 }, { "epoch": 0.7244319195234188, "grad_norm": 0.2962120473384857, "learning_rate": 1.0700105817664751e-05, "loss": 0.137, "step": 40616 }, { "epoch": 0.7244497556451325, "grad_norm": 0.1861022263765335, "learning_rate": 1.0698829109805144e-05, "loss": 0.0939, "step": 40617 }, { "epoch": 0.7244675917668462, "grad_norm": 0.3007505238056183, "learning_rate": 1.0697552457381812e-05, "loss": 0.1757, "step": 40618 }, { "epoch": 0.72448542788856, "grad_norm": 0.2994762063026428, "learning_rate": 1.0696275860399701e-05, "loss": 0.1122, "step": 40619 }, { "epoch": 0.7245032640102737, "grad_norm": 0.25408273935317993, "learning_rate": 1.0694999318863772e-05, "loss": 0.1507, "step": 40620 }, { "epoch": 0.7245211001319873, "grad_norm": 0.2956840693950653, "learning_rate": 1.0693722832778966e-05, "loss": 0.1386, "step": 40621 }, { "epoch": 0.724538936253701, "grad_norm": 0.2609332501888275, "learning_rate": 1.0692446402150225e-05, "loss": 0.1291, "step": 40622 }, { "epoch": 0.7245567723754147, "grad_norm": 0.5064058303833008, "learning_rate": 1.0691170026982509e-05, "loss": 0.2121, "step": 40623 }, { "epoch": 0.7245746084971284, "grad_norm": 0.19119513034820557, "learning_rate": 1.0689893707280754e-05, "loss": 0.1208, "step": 40624 }, { "epoch": 0.7245924446188421, "grad_norm": 0.2072117179632187, "learning_rate": 1.068861744304992e-05, "loss": 0.1115, "step": 40625 }, { "epoch": 0.7246102807405558, "grad_norm": 0.43047600984573364, "learning_rate": 1.0687341234294948e-05, "loss": 0.1051, "step": 40626 }, { "epoch": 0.7246281168622695, "grad_norm": 0.3276810646057129, "learning_rate": 1.0686065081020789e-05, "loss": 0.1101, "step": 40627 }, { "epoch": 0.7246459529839832, "grad_norm": 0.43187955021858215, "learning_rate": 1.0684788983232371e-05, "loss": 0.1586, "step": 40628 }, { "epoch": 0.7246637891056968, "grad_norm": 0.36013561487197876, "learning_rate": 1.0683512940934667e-05, "loss": 0.114, "step": 40629 }, { "epoch": 0.7246816252274105, "grad_norm": 0.27758336067199707, "learning_rate": 1.0682236954132612e-05, "loss": 0.1399, "step": 40630 }, { "epoch": 0.7246994613491242, "grad_norm": 0.2186107337474823, "learning_rate": 1.0680961022831151e-05, "loss": 0.1134, "step": 40631 }, { "epoch": 0.7247172974708379, "grad_norm": 0.15892332792282104, "learning_rate": 1.067968514703522e-05, "loss": 0.0742, "step": 40632 }, { "epoch": 0.7247351335925516, "grad_norm": 0.24363532662391663, "learning_rate": 1.0678409326749789e-05, "loss": 0.1263, "step": 40633 }, { "epoch": 0.7247529697142653, "grad_norm": 0.24483613669872284, "learning_rate": 1.0677133561979786e-05, "loss": 0.1235, "step": 40634 }, { "epoch": 0.724770805835979, "grad_norm": 0.22202663123607635, "learning_rate": 1.0675857852730151e-05, "loss": 0.0976, "step": 40635 }, { "epoch": 0.7247886419576928, "grad_norm": 0.2127566933631897, "learning_rate": 1.0674582199005851e-05, "loss": 0.1012, "step": 40636 }, { "epoch": 0.7248064780794065, "grad_norm": 0.2635959982872009, "learning_rate": 1.0673306600811806e-05, "loss": 0.1092, "step": 40637 }, { "epoch": 0.7248243142011201, "grad_norm": 0.29407212138175964, "learning_rate": 1.0672031058152981e-05, "loss": 0.1878, "step": 40638 }, { "epoch": 0.7248421503228338, "grad_norm": 0.325975239276886, "learning_rate": 1.0670755571034315e-05, "loss": 0.1079, "step": 40639 }, { "epoch": 0.7248599864445475, "grad_norm": 0.27838611602783203, "learning_rate": 1.066948013946075e-05, "loss": 0.093, "step": 40640 }, { "epoch": 0.7248778225662612, "grad_norm": 0.5459676384925842, "learning_rate": 1.066820476343722e-05, "loss": 0.0752, "step": 40641 }, { "epoch": 0.7248956586879749, "grad_norm": 0.21756823360919952, "learning_rate": 1.0666929442968687e-05, "loss": 0.1384, "step": 40642 }, { "epoch": 0.7249134948096886, "grad_norm": 0.3354736566543579, "learning_rate": 1.0665654178060086e-05, "loss": 0.1396, "step": 40643 }, { "epoch": 0.7249313309314023, "grad_norm": 0.3089248538017273, "learning_rate": 1.066437896871636e-05, "loss": 0.1311, "step": 40644 }, { "epoch": 0.724949167053116, "grad_norm": 0.279687762260437, "learning_rate": 1.0663103814942443e-05, "loss": 0.116, "step": 40645 }, { "epoch": 0.7249670031748296, "grad_norm": 0.26352062821388245, "learning_rate": 1.0661828716743297e-05, "loss": 0.1238, "step": 40646 }, { "epoch": 0.7249848392965433, "grad_norm": 0.20644158124923706, "learning_rate": 1.0660553674123855e-05, "loss": 0.1263, "step": 40647 }, { "epoch": 0.725002675418257, "grad_norm": 0.3081561028957367, "learning_rate": 1.0659278687089059e-05, "loss": 0.0916, "step": 40648 }, { "epoch": 0.7250205115399707, "grad_norm": 0.2747787833213806, "learning_rate": 1.0658003755643844e-05, "loss": 0.0912, "step": 40649 }, { "epoch": 0.7250383476616844, "grad_norm": 0.2721705138683319, "learning_rate": 1.0656728879793169e-05, "loss": 0.1419, "step": 40650 }, { "epoch": 0.7250561837833981, "grad_norm": 0.23173055052757263, "learning_rate": 1.0655454059541958e-05, "loss": 0.1055, "step": 40651 }, { "epoch": 0.7250740199051118, "grad_norm": 0.24216590821743011, "learning_rate": 1.0654179294895169e-05, "loss": 0.1174, "step": 40652 }, { "epoch": 0.7250918560268256, "grad_norm": 0.22711877524852753, "learning_rate": 1.0652904585857736e-05, "loss": 0.134, "step": 40653 }, { "epoch": 0.7251096921485393, "grad_norm": 0.25260481238365173, "learning_rate": 1.0651629932434593e-05, "loss": 0.1071, "step": 40654 }, { "epoch": 0.725127528270253, "grad_norm": 0.30491986870765686, "learning_rate": 1.0650355334630695e-05, "loss": 0.1162, "step": 40655 }, { "epoch": 0.7251453643919666, "grad_norm": 0.26913368701934814, "learning_rate": 1.064908079245098e-05, "loss": 0.1327, "step": 40656 }, { "epoch": 0.7251632005136803, "grad_norm": 0.23623481392860413, "learning_rate": 1.0647806305900382e-05, "loss": 0.0936, "step": 40657 }, { "epoch": 0.725181036635394, "grad_norm": 0.2867516577243805, "learning_rate": 1.0646531874983834e-05, "loss": 0.133, "step": 40658 }, { "epoch": 0.7251988727571077, "grad_norm": 0.29122912883758545, "learning_rate": 1.0645257499706296e-05, "loss": 0.1006, "step": 40659 }, { "epoch": 0.7252167088788214, "grad_norm": 0.34155797958374023, "learning_rate": 1.0643983180072698e-05, "loss": 0.1386, "step": 40660 }, { "epoch": 0.7252345450005351, "grad_norm": 0.3489937484264374, "learning_rate": 1.0642708916087978e-05, "loss": 0.0961, "step": 40661 }, { "epoch": 0.7252523811222488, "grad_norm": 0.2931669056415558, "learning_rate": 1.064143470775707e-05, "loss": 0.1012, "step": 40662 }, { "epoch": 0.7252702172439625, "grad_norm": 0.24757510423660278, "learning_rate": 1.0640160555084926e-05, "loss": 0.1219, "step": 40663 }, { "epoch": 0.7252880533656761, "grad_norm": 0.2876999080181122, "learning_rate": 1.0638886458076474e-05, "loss": 0.1219, "step": 40664 }, { "epoch": 0.7253058894873898, "grad_norm": 0.3704746961593628, "learning_rate": 1.0637612416736667e-05, "loss": 0.1097, "step": 40665 }, { "epoch": 0.7253237256091035, "grad_norm": 0.28447043895721436, "learning_rate": 1.0636338431070433e-05, "loss": 0.1298, "step": 40666 }, { "epoch": 0.7253415617308172, "grad_norm": 0.27934443950653076, "learning_rate": 1.0635064501082707e-05, "loss": 0.1568, "step": 40667 }, { "epoch": 0.7253593978525309, "grad_norm": 0.3355010449886322, "learning_rate": 1.0633790626778437e-05, "loss": 0.13, "step": 40668 }, { "epoch": 0.7253772339742447, "grad_norm": 0.2632060945034027, "learning_rate": 1.0632516808162557e-05, "loss": 0.1216, "step": 40669 }, { "epoch": 0.7253950700959584, "grad_norm": 0.2881837487220764, "learning_rate": 1.0631243045240008e-05, "loss": 0.1411, "step": 40670 }, { "epoch": 0.7254129062176721, "grad_norm": 0.2924255430698395, "learning_rate": 1.0629969338015711e-05, "loss": 0.1128, "step": 40671 }, { "epoch": 0.7254307423393858, "grad_norm": 0.26867061853408813, "learning_rate": 1.0628695686494627e-05, "loss": 0.1479, "step": 40672 }, { "epoch": 0.7254485784610994, "grad_norm": 0.2628811001777649, "learning_rate": 1.0627422090681683e-05, "loss": 0.1058, "step": 40673 }, { "epoch": 0.7254664145828131, "grad_norm": 0.3317461609840393, "learning_rate": 1.0626148550581813e-05, "loss": 0.1674, "step": 40674 }, { "epoch": 0.7254842507045268, "grad_norm": 0.25950726866722107, "learning_rate": 1.0624875066199947e-05, "loss": 0.0797, "step": 40675 }, { "epoch": 0.7255020868262405, "grad_norm": 0.2664039134979248, "learning_rate": 1.0623601637541039e-05, "loss": 0.1654, "step": 40676 }, { "epoch": 0.7255199229479542, "grad_norm": 0.2518742084503174, "learning_rate": 1.0622328264610018e-05, "loss": 0.0821, "step": 40677 }, { "epoch": 0.7255377590696679, "grad_norm": 0.37814977765083313, "learning_rate": 1.0621054947411806e-05, "loss": 0.1256, "step": 40678 }, { "epoch": 0.7255555951913816, "grad_norm": 0.2022005021572113, "learning_rate": 1.0619781685951366e-05, "loss": 0.0784, "step": 40679 }, { "epoch": 0.7255734313130953, "grad_norm": 0.2952006757259369, "learning_rate": 1.0618508480233607e-05, "loss": 0.1243, "step": 40680 }, { "epoch": 0.725591267434809, "grad_norm": 0.19452205300331116, "learning_rate": 1.0617235330263484e-05, "loss": 0.1003, "step": 40681 }, { "epoch": 0.7256091035565226, "grad_norm": 0.2793303430080414, "learning_rate": 1.0615962236045926e-05, "loss": 0.1871, "step": 40682 }, { "epoch": 0.7256269396782363, "grad_norm": 0.22673064470291138, "learning_rate": 1.0614689197585866e-05, "loss": 0.0921, "step": 40683 }, { "epoch": 0.72564477579995, "grad_norm": 0.23677125573158264, "learning_rate": 1.061341621488823e-05, "loss": 0.1017, "step": 40684 }, { "epoch": 0.7256626119216637, "grad_norm": 0.17663273215293884, "learning_rate": 1.0612143287957974e-05, "loss": 0.0726, "step": 40685 }, { "epoch": 0.7256804480433775, "grad_norm": 0.2574593126773834, "learning_rate": 1.0610870416800017e-05, "loss": 0.1483, "step": 40686 }, { "epoch": 0.7256982841650912, "grad_norm": 0.2850920855998993, "learning_rate": 1.0609597601419296e-05, "loss": 0.1657, "step": 40687 }, { "epoch": 0.7257161202868049, "grad_norm": 0.2715839743614197, "learning_rate": 1.0608324841820735e-05, "loss": 0.1042, "step": 40688 }, { "epoch": 0.7257339564085186, "grad_norm": 0.2802274227142334, "learning_rate": 1.0607052138009288e-05, "loss": 0.0786, "step": 40689 }, { "epoch": 0.7257517925302323, "grad_norm": 0.2860008478164673, "learning_rate": 1.0605779489989878e-05, "loss": 0.1347, "step": 40690 }, { "epoch": 0.7257696286519459, "grad_norm": 0.3361153304576874, "learning_rate": 1.060450689776743e-05, "loss": 0.1278, "step": 40691 }, { "epoch": 0.7257874647736596, "grad_norm": 0.2440606951713562, "learning_rate": 1.0603234361346895e-05, "loss": 0.1144, "step": 40692 }, { "epoch": 0.7258053008953733, "grad_norm": 0.28858011960983276, "learning_rate": 1.0601961880733186e-05, "loss": 0.1271, "step": 40693 }, { "epoch": 0.725823137017087, "grad_norm": 0.26654544472694397, "learning_rate": 1.0600689455931254e-05, "loss": 0.1427, "step": 40694 }, { "epoch": 0.7258409731388007, "grad_norm": 0.31179121136665344, "learning_rate": 1.0599417086946023e-05, "loss": 0.1155, "step": 40695 }, { "epoch": 0.7258588092605144, "grad_norm": 0.24408067762851715, "learning_rate": 1.0598144773782425e-05, "loss": 0.1096, "step": 40696 }, { "epoch": 0.7258766453822281, "grad_norm": 0.27092304825782776, "learning_rate": 1.0596872516445385e-05, "loss": 0.0897, "step": 40697 }, { "epoch": 0.7258944815039418, "grad_norm": 0.27914175391197205, "learning_rate": 1.0595600314939849e-05, "loss": 0.1244, "step": 40698 }, { "epoch": 0.7259123176256554, "grad_norm": 0.3074016571044922, "learning_rate": 1.0594328169270742e-05, "loss": 0.0982, "step": 40699 }, { "epoch": 0.7259301537473691, "grad_norm": 0.22875364124774933, "learning_rate": 1.0593056079442995e-05, "loss": 0.1224, "step": 40700 }, { "epoch": 0.7259479898690828, "grad_norm": 0.30135592818260193, "learning_rate": 1.0591784045461531e-05, "loss": 0.1081, "step": 40701 }, { "epoch": 0.7259658259907965, "grad_norm": 0.3393057584762573, "learning_rate": 1.0590512067331294e-05, "loss": 0.1704, "step": 40702 }, { "epoch": 0.7259836621125103, "grad_norm": 0.26212579011917114, "learning_rate": 1.058924014505721e-05, "loss": 0.1682, "step": 40703 }, { "epoch": 0.726001498234224, "grad_norm": 0.25574398040771484, "learning_rate": 1.0587968278644212e-05, "loss": 0.1011, "step": 40704 }, { "epoch": 0.7260193343559377, "grad_norm": 0.30615395307540894, "learning_rate": 1.0586696468097212e-05, "loss": 0.1297, "step": 40705 }, { "epoch": 0.7260371704776514, "grad_norm": 0.23416419327259064, "learning_rate": 1.0585424713421168e-05, "loss": 0.0811, "step": 40706 }, { "epoch": 0.7260550065993651, "grad_norm": 0.2481381595134735, "learning_rate": 1.0584153014620984e-05, "loss": 0.11, "step": 40707 }, { "epoch": 0.7260728427210787, "grad_norm": 0.2731077969074249, "learning_rate": 1.0582881371701615e-05, "loss": 0.1172, "step": 40708 }, { "epoch": 0.7260906788427924, "grad_norm": 0.22165630757808685, "learning_rate": 1.0581609784667976e-05, "loss": 0.1274, "step": 40709 }, { "epoch": 0.7261085149645061, "grad_norm": 0.28758513927459717, "learning_rate": 1.0580338253524988e-05, "loss": 0.1464, "step": 40710 }, { "epoch": 0.7261263510862198, "grad_norm": 0.1941150724887848, "learning_rate": 1.0579066778277596e-05, "loss": 0.1186, "step": 40711 }, { "epoch": 0.7261441872079335, "grad_norm": 0.33894768357276917, "learning_rate": 1.0577795358930725e-05, "loss": 0.1361, "step": 40712 }, { "epoch": 0.7261620233296472, "grad_norm": 0.27559563517570496, "learning_rate": 1.0576523995489299e-05, "loss": 0.1109, "step": 40713 }, { "epoch": 0.7261798594513609, "grad_norm": 0.27855145931243896, "learning_rate": 1.057525268795824e-05, "loss": 0.0911, "step": 40714 }, { "epoch": 0.7261976955730746, "grad_norm": 0.3512547016143799, "learning_rate": 1.0573981436342493e-05, "loss": 0.0715, "step": 40715 }, { "epoch": 0.7262155316947883, "grad_norm": 0.20762325823307037, "learning_rate": 1.0572710240646973e-05, "loss": 0.1055, "step": 40716 }, { "epoch": 0.7262333678165019, "grad_norm": 0.2569544315338135, "learning_rate": 1.0571439100876615e-05, "loss": 0.1005, "step": 40717 }, { "epoch": 0.7262512039382156, "grad_norm": 0.3328148424625397, "learning_rate": 1.0570168017036333e-05, "loss": 0.1147, "step": 40718 }, { "epoch": 0.7262690400599293, "grad_norm": 0.27908775210380554, "learning_rate": 1.0568896989131071e-05, "loss": 0.1455, "step": 40719 }, { "epoch": 0.7262868761816431, "grad_norm": 0.2935066521167755, "learning_rate": 1.056762601716574e-05, "loss": 0.1492, "step": 40720 }, { "epoch": 0.7263047123033568, "grad_norm": 0.23233112692832947, "learning_rate": 1.0566355101145286e-05, "loss": 0.1116, "step": 40721 }, { "epoch": 0.7263225484250705, "grad_norm": 0.22923393547534943, "learning_rate": 1.0565084241074624e-05, "loss": 0.0935, "step": 40722 }, { "epoch": 0.7263403845467842, "grad_norm": 0.264287531375885, "learning_rate": 1.0563813436958672e-05, "loss": 0.0897, "step": 40723 }, { "epoch": 0.7263582206684979, "grad_norm": 0.2624832093715668, "learning_rate": 1.0562542688802374e-05, "loss": 0.1543, "step": 40724 }, { "epoch": 0.7263760567902116, "grad_norm": 0.5052750110626221, "learning_rate": 1.0561271996610647e-05, "loss": 0.1483, "step": 40725 }, { "epoch": 0.7263938929119252, "grad_norm": 0.21963505446910858, "learning_rate": 1.0560001360388416e-05, "loss": 0.1085, "step": 40726 }, { "epoch": 0.7264117290336389, "grad_norm": 0.43002766370773315, "learning_rate": 1.0558730780140599e-05, "loss": 0.1681, "step": 40727 }, { "epoch": 0.7264295651553526, "grad_norm": 0.3028855621814728, "learning_rate": 1.055746025587214e-05, "loss": 0.1142, "step": 40728 }, { "epoch": 0.7264474012770663, "grad_norm": 0.25368496775627136, "learning_rate": 1.055618978758795e-05, "loss": 0.113, "step": 40729 }, { "epoch": 0.72646523739878, "grad_norm": 0.3520154356956482, "learning_rate": 1.055491937529296e-05, "loss": 0.1274, "step": 40730 }, { "epoch": 0.7264830735204937, "grad_norm": 0.25041234493255615, "learning_rate": 1.055364901899209e-05, "loss": 0.1123, "step": 40731 }, { "epoch": 0.7265009096422074, "grad_norm": 0.2312353104352951, "learning_rate": 1.0552378718690257e-05, "loss": 0.1384, "step": 40732 }, { "epoch": 0.7265187457639211, "grad_norm": 0.3082735538482666, "learning_rate": 1.0551108474392405e-05, "loss": 0.0955, "step": 40733 }, { "epoch": 0.7265365818856347, "grad_norm": 0.286031037569046, "learning_rate": 1.0549838286103436e-05, "loss": 0.1003, "step": 40734 }, { "epoch": 0.7265544180073484, "grad_norm": 0.19576328992843628, "learning_rate": 1.0548568153828292e-05, "loss": 0.1212, "step": 40735 }, { "epoch": 0.7265722541290621, "grad_norm": 0.38110560178756714, "learning_rate": 1.0547298077571885e-05, "loss": 0.1363, "step": 40736 }, { "epoch": 0.7265900902507759, "grad_norm": 0.3293739855289459, "learning_rate": 1.0546028057339147e-05, "loss": 0.1305, "step": 40737 }, { "epoch": 0.7266079263724896, "grad_norm": 0.3222125768661499, "learning_rate": 1.0544758093134999e-05, "loss": 0.1791, "step": 40738 }, { "epoch": 0.7266257624942033, "grad_norm": 0.3354884088039398, "learning_rate": 1.0543488184964363e-05, "loss": 0.1107, "step": 40739 }, { "epoch": 0.726643598615917, "grad_norm": 0.32864633202552795, "learning_rate": 1.0542218332832147e-05, "loss": 0.1082, "step": 40740 }, { "epoch": 0.7266614347376307, "grad_norm": 0.2713450491428375, "learning_rate": 1.0540948536743297e-05, "loss": 0.1111, "step": 40741 }, { "epoch": 0.7266792708593444, "grad_norm": 0.31537148356437683, "learning_rate": 1.0539678796702724e-05, "loss": 0.1238, "step": 40742 }, { "epoch": 0.726697106981058, "grad_norm": 0.24227231740951538, "learning_rate": 1.053840911271535e-05, "loss": 0.1306, "step": 40743 }, { "epoch": 0.7267149431027717, "grad_norm": 0.27465173602104187, "learning_rate": 1.0537139484786098e-05, "loss": 0.1109, "step": 40744 }, { "epoch": 0.7267327792244854, "grad_norm": 0.31403836607933044, "learning_rate": 1.0535869912919882e-05, "loss": 0.1262, "step": 40745 }, { "epoch": 0.7267506153461991, "grad_norm": 0.2626326382160187, "learning_rate": 1.0534600397121635e-05, "loss": 0.0917, "step": 40746 }, { "epoch": 0.7267684514679128, "grad_norm": 0.3095690608024597, "learning_rate": 1.0533330937396265e-05, "loss": 0.1595, "step": 40747 }, { "epoch": 0.7267862875896265, "grad_norm": 0.3072819411754608, "learning_rate": 1.0532061533748713e-05, "loss": 0.106, "step": 40748 }, { "epoch": 0.7268041237113402, "grad_norm": 0.2610625922679901, "learning_rate": 1.0530792186183875e-05, "loss": 0.1397, "step": 40749 }, { "epoch": 0.7268219598330539, "grad_norm": 0.24871870875358582, "learning_rate": 1.0529522894706696e-05, "loss": 0.0998, "step": 40750 }, { "epoch": 0.7268397959547676, "grad_norm": 0.28653693199157715, "learning_rate": 1.0528253659322083e-05, "loss": 0.156, "step": 40751 }, { "epoch": 0.7268576320764812, "grad_norm": 0.25087928771972656, "learning_rate": 1.0526984480034954e-05, "loss": 0.1453, "step": 40752 }, { "epoch": 0.7268754681981949, "grad_norm": 0.3090744912624359, "learning_rate": 1.0525715356850226e-05, "loss": 0.0843, "step": 40753 }, { "epoch": 0.7268933043199087, "grad_norm": 0.2786547839641571, "learning_rate": 1.0524446289772832e-05, "loss": 0.0893, "step": 40754 }, { "epoch": 0.7269111404416224, "grad_norm": 0.29326432943344116, "learning_rate": 1.0523177278807686e-05, "loss": 0.0916, "step": 40755 }, { "epoch": 0.7269289765633361, "grad_norm": 0.3040871322154999, "learning_rate": 1.0521908323959703e-05, "loss": 0.1159, "step": 40756 }, { "epoch": 0.7269468126850498, "grad_norm": 0.24024918675422668, "learning_rate": 1.0520639425233805e-05, "loss": 0.121, "step": 40757 }, { "epoch": 0.7269646488067635, "grad_norm": 0.21338613331317902, "learning_rate": 1.0519370582634902e-05, "loss": 0.0546, "step": 40758 }, { "epoch": 0.7269824849284772, "grad_norm": 0.2051226645708084, "learning_rate": 1.0518101796167929e-05, "loss": 0.1154, "step": 40759 }, { "epoch": 0.7270003210501909, "grad_norm": 0.2698860168457031, "learning_rate": 1.0516833065837792e-05, "loss": 0.1341, "step": 40760 }, { "epoch": 0.7270181571719045, "grad_norm": 0.2239181101322174, "learning_rate": 1.0515564391649405e-05, "loss": 0.0772, "step": 40761 }, { "epoch": 0.7270359932936182, "grad_norm": 0.26052507758140564, "learning_rate": 1.0514295773607693e-05, "loss": 0.1295, "step": 40762 }, { "epoch": 0.7270538294153319, "grad_norm": 0.3422616720199585, "learning_rate": 1.0513027211717582e-05, "loss": 0.1196, "step": 40763 }, { "epoch": 0.7270716655370456, "grad_norm": 0.21924665570259094, "learning_rate": 1.0511758705983984e-05, "loss": 0.1107, "step": 40764 }, { "epoch": 0.7270895016587593, "grad_norm": 0.23988090455532074, "learning_rate": 1.0510490256411811e-05, "loss": 0.0641, "step": 40765 }, { "epoch": 0.727107337780473, "grad_norm": 0.33719107508659363, "learning_rate": 1.0509221863005974e-05, "loss": 0.1443, "step": 40766 }, { "epoch": 0.7271251739021867, "grad_norm": 0.2508661150932312, "learning_rate": 1.0507953525771407e-05, "loss": 0.0873, "step": 40767 }, { "epoch": 0.7271430100239004, "grad_norm": 0.2774602472782135, "learning_rate": 1.0506685244713019e-05, "loss": 0.1042, "step": 40768 }, { "epoch": 0.727160846145614, "grad_norm": 0.18078641593456268, "learning_rate": 1.0505417019835722e-05, "loss": 0.0813, "step": 40769 }, { "epoch": 0.7271786822673278, "grad_norm": 0.2852545380592346, "learning_rate": 1.0504148851144438e-05, "loss": 0.1705, "step": 40770 }, { "epoch": 0.7271965183890415, "grad_norm": 0.24146457016468048, "learning_rate": 1.050288073864407e-05, "loss": 0.0811, "step": 40771 }, { "epoch": 0.7272143545107552, "grad_norm": 0.2205086201429367, "learning_rate": 1.0501612682339551e-05, "loss": 0.1028, "step": 40772 }, { "epoch": 0.7272321906324689, "grad_norm": 0.2953667938709259, "learning_rate": 1.0500344682235788e-05, "loss": 0.1413, "step": 40773 }, { "epoch": 0.7272500267541826, "grad_norm": 0.2932448387145996, "learning_rate": 1.049907673833769e-05, "loss": 0.1228, "step": 40774 }, { "epoch": 0.7272678628758963, "grad_norm": 0.2763751745223999, "learning_rate": 1.0497808850650188e-05, "loss": 0.1222, "step": 40775 }, { "epoch": 0.72728569899761, "grad_norm": 0.3213701844215393, "learning_rate": 1.0496541019178177e-05, "loss": 0.1331, "step": 40776 }, { "epoch": 0.7273035351193237, "grad_norm": 0.3039744198322296, "learning_rate": 1.0495273243926592e-05, "loss": 0.1382, "step": 40777 }, { "epoch": 0.7273213712410374, "grad_norm": 0.2129533588886261, "learning_rate": 1.0494005524900338e-05, "loss": 0.1229, "step": 40778 }, { "epoch": 0.727339207362751, "grad_norm": 0.2732669413089752, "learning_rate": 1.0492737862104321e-05, "loss": 0.1163, "step": 40779 }, { "epoch": 0.7273570434844647, "grad_norm": 0.2482658326625824, "learning_rate": 1.049147025554347e-05, "loss": 0.1309, "step": 40780 }, { "epoch": 0.7273748796061784, "grad_norm": 0.2273804396390915, "learning_rate": 1.049020270522269e-05, "loss": 0.1273, "step": 40781 }, { "epoch": 0.7273927157278921, "grad_norm": 0.2663029134273529, "learning_rate": 1.04889352111469e-05, "loss": 0.1417, "step": 40782 }, { "epoch": 0.7274105518496058, "grad_norm": 0.2137305587530136, "learning_rate": 1.0487667773321005e-05, "loss": 0.0898, "step": 40783 }, { "epoch": 0.7274283879713195, "grad_norm": 0.27498704195022583, "learning_rate": 1.0486400391749918e-05, "loss": 0.1329, "step": 40784 }, { "epoch": 0.7274462240930332, "grad_norm": 0.3028446137905121, "learning_rate": 1.048513306643856e-05, "loss": 0.1083, "step": 40785 }, { "epoch": 0.7274640602147469, "grad_norm": 0.29380154609680176, "learning_rate": 1.0483865797391842e-05, "loss": 0.1379, "step": 40786 }, { "epoch": 0.7274818963364607, "grad_norm": 0.27396297454833984, "learning_rate": 1.0482598584614673e-05, "loss": 0.0995, "step": 40787 }, { "epoch": 0.7274997324581743, "grad_norm": 0.314533531665802, "learning_rate": 1.0481331428111957e-05, "loss": 0.1104, "step": 40788 }, { "epoch": 0.727517568579888, "grad_norm": 0.2690560221672058, "learning_rate": 1.0480064327888617e-05, "loss": 0.0946, "step": 40789 }, { "epoch": 0.7275354047016017, "grad_norm": 0.29971808195114136, "learning_rate": 1.0478797283949568e-05, "loss": 0.1952, "step": 40790 }, { "epoch": 0.7275532408233154, "grad_norm": 0.304678738117218, "learning_rate": 1.047753029629972e-05, "loss": 0.1086, "step": 40791 }, { "epoch": 0.7275710769450291, "grad_norm": 0.27599942684173584, "learning_rate": 1.047626336494397e-05, "loss": 0.1004, "step": 40792 }, { "epoch": 0.7275889130667428, "grad_norm": 0.24337460100650787, "learning_rate": 1.0474996489887246e-05, "loss": 0.1079, "step": 40793 }, { "epoch": 0.7276067491884565, "grad_norm": 0.2940039336681366, "learning_rate": 1.0473729671134455e-05, "loss": 0.1528, "step": 40794 }, { "epoch": 0.7276245853101702, "grad_norm": 0.26683154702186584, "learning_rate": 1.0472462908690506e-05, "loss": 0.1346, "step": 40795 }, { "epoch": 0.7276424214318838, "grad_norm": 0.3066383898258209, "learning_rate": 1.0471196202560307e-05, "loss": 0.1323, "step": 40796 }, { "epoch": 0.7276602575535975, "grad_norm": 0.2884158790111542, "learning_rate": 1.0469929552748758e-05, "loss": 0.1404, "step": 40797 }, { "epoch": 0.7276780936753112, "grad_norm": 0.3149704337120056, "learning_rate": 1.0468662959260794e-05, "loss": 0.1565, "step": 40798 }, { "epoch": 0.7276959297970249, "grad_norm": 0.38092759251594543, "learning_rate": 1.0467396422101308e-05, "loss": 0.1892, "step": 40799 }, { "epoch": 0.7277137659187386, "grad_norm": 0.24065916240215302, "learning_rate": 1.0466129941275216e-05, "loss": 0.1419, "step": 40800 }, { "epoch": 0.7277316020404523, "grad_norm": 0.3000841736793518, "learning_rate": 1.0464863516787413e-05, "loss": 0.1303, "step": 40801 }, { "epoch": 0.727749438162166, "grad_norm": 0.2805483639240265, "learning_rate": 1.0463597148642828e-05, "loss": 0.1247, "step": 40802 }, { "epoch": 0.7277672742838797, "grad_norm": 0.24328021705150604, "learning_rate": 1.0462330836846355e-05, "loss": 0.1913, "step": 40803 }, { "epoch": 0.7277851104055935, "grad_norm": 0.1977032572031021, "learning_rate": 1.0461064581402916e-05, "loss": 0.0774, "step": 40804 }, { "epoch": 0.7278029465273071, "grad_norm": 0.2659285068511963, "learning_rate": 1.0459798382317402e-05, "loss": 0.0917, "step": 40805 }, { "epoch": 0.7278207826490208, "grad_norm": 0.3462757170200348, "learning_rate": 1.0458532239594742e-05, "loss": 0.1178, "step": 40806 }, { "epoch": 0.7278386187707345, "grad_norm": 0.259512722492218, "learning_rate": 1.0457266153239833e-05, "loss": 0.1129, "step": 40807 }, { "epoch": 0.7278564548924482, "grad_norm": 0.3088579773902893, "learning_rate": 1.0456000123257584e-05, "loss": 0.1272, "step": 40808 }, { "epoch": 0.7278742910141619, "grad_norm": 0.2899314761161804, "learning_rate": 1.04547341496529e-05, "loss": 0.1091, "step": 40809 }, { "epoch": 0.7278921271358756, "grad_norm": 0.21103590726852417, "learning_rate": 1.0453468232430682e-05, "loss": 0.1318, "step": 40810 }, { "epoch": 0.7279099632575893, "grad_norm": 0.3091026544570923, "learning_rate": 1.0452202371595856e-05, "loss": 0.1021, "step": 40811 }, { "epoch": 0.727927799379303, "grad_norm": 0.2287903130054474, "learning_rate": 1.0450936567153316e-05, "loss": 0.1329, "step": 40812 }, { "epoch": 0.7279456355010167, "grad_norm": 0.18485434353351593, "learning_rate": 1.0449670819107974e-05, "loss": 0.0547, "step": 40813 }, { "epoch": 0.7279634716227303, "grad_norm": 0.26258590817451477, "learning_rate": 1.0448405127464722e-05, "loss": 0.1122, "step": 40814 }, { "epoch": 0.727981307744444, "grad_norm": 0.2268001139163971, "learning_rate": 1.0447139492228486e-05, "loss": 0.1335, "step": 40815 }, { "epoch": 0.7279991438661577, "grad_norm": 0.2697557806968689, "learning_rate": 1.0445873913404156e-05, "loss": 0.103, "step": 40816 }, { "epoch": 0.7280169799878714, "grad_norm": 0.22228126227855682, "learning_rate": 1.0444608390996655e-05, "loss": 0.0726, "step": 40817 }, { "epoch": 0.7280348161095851, "grad_norm": 0.2938465476036072, "learning_rate": 1.0443342925010868e-05, "loss": 0.0857, "step": 40818 }, { "epoch": 0.7280526522312988, "grad_norm": 0.2011396884918213, "learning_rate": 1.0442077515451724e-05, "loss": 0.0534, "step": 40819 }, { "epoch": 0.7280704883530125, "grad_norm": 0.27762192487716675, "learning_rate": 1.0440812162324113e-05, "loss": 0.1058, "step": 40820 }, { "epoch": 0.7280883244747263, "grad_norm": 0.25490036606788635, "learning_rate": 1.0439546865632943e-05, "loss": 0.0927, "step": 40821 }, { "epoch": 0.72810616059644, "grad_norm": 0.36661574244499207, "learning_rate": 1.043828162538312e-05, "loss": 0.1161, "step": 40822 }, { "epoch": 0.7281239967181536, "grad_norm": 0.24220754206180573, "learning_rate": 1.0437016441579537e-05, "loss": 0.1752, "step": 40823 }, { "epoch": 0.7281418328398673, "grad_norm": 0.28342440724372864, "learning_rate": 1.0435751314227116e-05, "loss": 0.106, "step": 40824 }, { "epoch": 0.728159668961581, "grad_norm": 0.26885533332824707, "learning_rate": 1.0434486243330754e-05, "loss": 0.1524, "step": 40825 }, { "epoch": 0.7281775050832947, "grad_norm": 0.24018530547618866, "learning_rate": 1.0433221228895354e-05, "loss": 0.1057, "step": 40826 }, { "epoch": 0.7281953412050084, "grad_norm": 0.20767271518707275, "learning_rate": 1.0431956270925811e-05, "loss": 0.1448, "step": 40827 }, { "epoch": 0.7282131773267221, "grad_norm": 0.3597570061683655, "learning_rate": 1.0430691369427045e-05, "loss": 0.1453, "step": 40828 }, { "epoch": 0.7282310134484358, "grad_norm": 0.23094920814037323, "learning_rate": 1.0429426524403954e-05, "loss": 0.0905, "step": 40829 }, { "epoch": 0.7282488495701495, "grad_norm": 0.29736557602882385, "learning_rate": 1.0428161735861428e-05, "loss": 0.1724, "step": 40830 }, { "epoch": 0.7282666856918631, "grad_norm": 0.2172999233007431, "learning_rate": 1.0426897003804386e-05, "loss": 0.1056, "step": 40831 }, { "epoch": 0.7282845218135768, "grad_norm": 0.2563237249851227, "learning_rate": 1.042563232823772e-05, "loss": 0.1209, "step": 40832 }, { "epoch": 0.7283023579352905, "grad_norm": 0.30967509746551514, "learning_rate": 1.0424367709166344e-05, "loss": 0.1121, "step": 40833 }, { "epoch": 0.7283201940570042, "grad_norm": 0.2564481496810913, "learning_rate": 1.0423103146595154e-05, "loss": 0.0843, "step": 40834 }, { "epoch": 0.7283380301787179, "grad_norm": 0.3186921775341034, "learning_rate": 1.042183864052905e-05, "loss": 0.1328, "step": 40835 }, { "epoch": 0.7283558663004316, "grad_norm": 0.31604063510894775, "learning_rate": 1.0420574190972926e-05, "loss": 0.1429, "step": 40836 }, { "epoch": 0.7283737024221453, "grad_norm": 0.25721925497055054, "learning_rate": 1.0419309797931701e-05, "loss": 0.0837, "step": 40837 }, { "epoch": 0.7283915385438591, "grad_norm": 0.23972263932228088, "learning_rate": 1.0418045461410263e-05, "loss": 0.136, "step": 40838 }, { "epoch": 0.7284093746655728, "grad_norm": 0.2746901214122772, "learning_rate": 1.0416781181413523e-05, "loss": 0.1381, "step": 40839 }, { "epoch": 0.7284272107872864, "grad_norm": 0.29209262132644653, "learning_rate": 1.0415516957946366e-05, "loss": 0.1472, "step": 40840 }, { "epoch": 0.7284450469090001, "grad_norm": 0.23592689633369446, "learning_rate": 1.041425279101371e-05, "loss": 0.0621, "step": 40841 }, { "epoch": 0.7284628830307138, "grad_norm": 0.34710782766342163, "learning_rate": 1.041298868062045e-05, "loss": 0.1461, "step": 40842 }, { "epoch": 0.7284807191524275, "grad_norm": 0.35806378722190857, "learning_rate": 1.0411724626771482e-05, "loss": 0.188, "step": 40843 }, { "epoch": 0.7284985552741412, "grad_norm": 0.22632494568824768, "learning_rate": 1.04104606294717e-05, "loss": 0.0753, "step": 40844 }, { "epoch": 0.7285163913958549, "grad_norm": 0.2383161336183548, "learning_rate": 1.040919668872601e-05, "loss": 0.1159, "step": 40845 }, { "epoch": 0.7285342275175686, "grad_norm": 0.3089994788169861, "learning_rate": 1.0407932804539325e-05, "loss": 0.1064, "step": 40846 }, { "epoch": 0.7285520636392823, "grad_norm": 0.27490413188934326, "learning_rate": 1.0406668976916531e-05, "loss": 0.1006, "step": 40847 }, { "epoch": 0.728569899760996, "grad_norm": 0.31490302085876465, "learning_rate": 1.0405405205862529e-05, "loss": 0.1508, "step": 40848 }, { "epoch": 0.7285877358827096, "grad_norm": 0.2667716443538666, "learning_rate": 1.0404141491382208e-05, "loss": 0.1151, "step": 40849 }, { "epoch": 0.7286055720044233, "grad_norm": 0.20112928748130798, "learning_rate": 1.0402877833480485e-05, "loss": 0.1066, "step": 40850 }, { "epoch": 0.728623408126137, "grad_norm": 0.28737518191337585, "learning_rate": 1.0401614232162246e-05, "loss": 0.1369, "step": 40851 }, { "epoch": 0.7286412442478507, "grad_norm": 0.3094600439071655, "learning_rate": 1.0400350687432395e-05, "loss": 0.169, "step": 40852 }, { "epoch": 0.7286590803695644, "grad_norm": 0.21896794438362122, "learning_rate": 1.0399087199295819e-05, "loss": 0.1649, "step": 40853 }, { "epoch": 0.7286769164912781, "grad_norm": 0.27871015667915344, "learning_rate": 1.0397823767757433e-05, "loss": 0.1124, "step": 40854 }, { "epoch": 0.7286947526129919, "grad_norm": 0.22240017354488373, "learning_rate": 1.0396560392822124e-05, "loss": 0.0806, "step": 40855 }, { "epoch": 0.7287125887347056, "grad_norm": 0.43458759784698486, "learning_rate": 1.0395297074494791e-05, "loss": 0.1663, "step": 40856 }, { "epoch": 0.7287304248564193, "grad_norm": 0.20958521962165833, "learning_rate": 1.0394033812780323e-05, "loss": 0.0499, "step": 40857 }, { "epoch": 0.728748260978133, "grad_norm": 0.3532405495643616, "learning_rate": 1.0392770607683633e-05, "loss": 0.1398, "step": 40858 }, { "epoch": 0.7287660970998466, "grad_norm": 0.2999124228954315, "learning_rate": 1.0391507459209601e-05, "loss": 0.1586, "step": 40859 }, { "epoch": 0.7287839332215603, "grad_norm": 0.22298569977283478, "learning_rate": 1.0390244367363139e-05, "loss": 0.0801, "step": 40860 }, { "epoch": 0.728801769343274, "grad_norm": 0.21821489930152893, "learning_rate": 1.0388981332149138e-05, "loss": 0.1066, "step": 40861 }, { "epoch": 0.7288196054649877, "grad_norm": 0.2001788467168808, "learning_rate": 1.0387718353572482e-05, "loss": 0.07, "step": 40862 }, { "epoch": 0.7288374415867014, "grad_norm": 0.2745591104030609, "learning_rate": 1.0386455431638085e-05, "loss": 0.1416, "step": 40863 }, { "epoch": 0.7288552777084151, "grad_norm": 0.32037249207496643, "learning_rate": 1.0385192566350835e-05, "loss": 0.1115, "step": 40864 }, { "epoch": 0.7288731138301288, "grad_norm": 0.2541063725948334, "learning_rate": 1.0383929757715623e-05, "loss": 0.0856, "step": 40865 }, { "epoch": 0.7288909499518424, "grad_norm": 0.3164190649986267, "learning_rate": 1.0382667005737343e-05, "loss": 0.1198, "step": 40866 }, { "epoch": 0.7289087860735561, "grad_norm": 0.23438000679016113, "learning_rate": 1.0381404310420903e-05, "loss": 0.1285, "step": 40867 }, { "epoch": 0.7289266221952698, "grad_norm": 0.2584449350833893, "learning_rate": 1.0380141671771188e-05, "loss": 0.1283, "step": 40868 }, { "epoch": 0.7289444583169835, "grad_norm": 0.27657485008239746, "learning_rate": 1.0378879089793093e-05, "loss": 0.1061, "step": 40869 }, { "epoch": 0.7289622944386972, "grad_norm": 0.2460792362689972, "learning_rate": 1.0377616564491502e-05, "loss": 0.0803, "step": 40870 }, { "epoch": 0.728980130560411, "grad_norm": 0.2133624404668808, "learning_rate": 1.0376354095871333e-05, "loss": 0.126, "step": 40871 }, { "epoch": 0.7289979666821247, "grad_norm": 0.229832723736763, "learning_rate": 1.0375091683937455e-05, "loss": 0.1056, "step": 40872 }, { "epoch": 0.7290158028038384, "grad_norm": 0.3540733754634857, "learning_rate": 1.037382932869478e-05, "loss": 0.1215, "step": 40873 }, { "epoch": 0.7290336389255521, "grad_norm": 0.35751286149024963, "learning_rate": 1.0372567030148194e-05, "loss": 0.0879, "step": 40874 }, { "epoch": 0.7290514750472658, "grad_norm": 0.28189775347709656, "learning_rate": 1.0371304788302586e-05, "loss": 0.0382, "step": 40875 }, { "epoch": 0.7290693111689794, "grad_norm": 0.2872788906097412, "learning_rate": 1.0370042603162858e-05, "loss": 0.151, "step": 40876 }, { "epoch": 0.7290871472906931, "grad_norm": 0.22581401467323303, "learning_rate": 1.03687804747339e-05, "loss": 0.1398, "step": 40877 }, { "epoch": 0.7291049834124068, "grad_norm": 0.32426077127456665, "learning_rate": 1.0367518403020602e-05, "loss": 0.1541, "step": 40878 }, { "epoch": 0.7291228195341205, "grad_norm": 0.2865886390209198, "learning_rate": 1.0366256388027849e-05, "loss": 0.109, "step": 40879 }, { "epoch": 0.7291406556558342, "grad_norm": 0.20413914322853088, "learning_rate": 1.0364994429760547e-05, "loss": 0.0886, "step": 40880 }, { "epoch": 0.7291584917775479, "grad_norm": 0.2653484642505646, "learning_rate": 1.036373252822358e-05, "loss": 0.091, "step": 40881 }, { "epoch": 0.7291763278992616, "grad_norm": 0.3649868369102478, "learning_rate": 1.0362470683421843e-05, "loss": 0.1198, "step": 40882 }, { "epoch": 0.7291941640209753, "grad_norm": 0.3148685097694397, "learning_rate": 1.0361208895360216e-05, "loss": 0.1691, "step": 40883 }, { "epoch": 0.7292120001426889, "grad_norm": 0.22477386891841888, "learning_rate": 1.035994716404361e-05, "loss": 0.0829, "step": 40884 }, { "epoch": 0.7292298362644026, "grad_norm": 0.28982433676719666, "learning_rate": 1.0358685489476905e-05, "loss": 0.1606, "step": 40885 }, { "epoch": 0.7292476723861163, "grad_norm": 0.25430357456207275, "learning_rate": 1.0357423871664982e-05, "loss": 0.1298, "step": 40886 }, { "epoch": 0.72926550850783, "grad_norm": 0.2837600111961365, "learning_rate": 1.0356162310612749e-05, "loss": 0.1094, "step": 40887 }, { "epoch": 0.7292833446295438, "grad_norm": 0.23365193605422974, "learning_rate": 1.0354900806325082e-05, "loss": 0.1287, "step": 40888 }, { "epoch": 0.7293011807512575, "grad_norm": 0.4007009267807007, "learning_rate": 1.0353639358806885e-05, "loss": 0.1036, "step": 40889 }, { "epoch": 0.7293190168729712, "grad_norm": 0.22579741477966309, "learning_rate": 1.0352377968063042e-05, "loss": 0.1404, "step": 40890 }, { "epoch": 0.7293368529946849, "grad_norm": 0.2736065685749054, "learning_rate": 1.0351116634098443e-05, "loss": 0.1069, "step": 40891 }, { "epoch": 0.7293546891163986, "grad_norm": 0.29125985503196716, "learning_rate": 1.0349855356917962e-05, "loss": 0.1204, "step": 40892 }, { "epoch": 0.7293725252381122, "grad_norm": 0.458539217710495, "learning_rate": 1.0348594136526513e-05, "loss": 0.1399, "step": 40893 }, { "epoch": 0.7293903613598259, "grad_norm": 0.26064369082450867, "learning_rate": 1.0347332972928975e-05, "loss": 0.0511, "step": 40894 }, { "epoch": 0.7294081974815396, "grad_norm": 0.21638596057891846, "learning_rate": 1.0346071866130233e-05, "loss": 0.1067, "step": 40895 }, { "epoch": 0.7294260336032533, "grad_norm": 0.25604212284088135, "learning_rate": 1.034481081613517e-05, "loss": 0.118, "step": 40896 }, { "epoch": 0.729443869724967, "grad_norm": 0.19671796262264252, "learning_rate": 1.034354982294869e-05, "loss": 0.0783, "step": 40897 }, { "epoch": 0.7294617058466807, "grad_norm": 0.3067481815814972, "learning_rate": 1.0342288886575674e-05, "loss": 0.1624, "step": 40898 }, { "epoch": 0.7294795419683944, "grad_norm": 0.3336236774921417, "learning_rate": 1.0341028007020998e-05, "loss": 0.0645, "step": 40899 }, { "epoch": 0.7294973780901081, "grad_norm": 0.18999546766281128, "learning_rate": 1.033976718428957e-05, "loss": 0.0913, "step": 40900 }, { "epoch": 0.7295152142118217, "grad_norm": 0.23737022280693054, "learning_rate": 1.033850641838626e-05, "loss": 0.1412, "step": 40901 }, { "epoch": 0.7295330503335354, "grad_norm": 0.19350391626358032, "learning_rate": 1.033724570931597e-05, "loss": 0.1176, "step": 40902 }, { "epoch": 0.7295508864552491, "grad_norm": 0.3428206145763397, "learning_rate": 1.0335985057083583e-05, "loss": 0.0995, "step": 40903 }, { "epoch": 0.7295687225769628, "grad_norm": 0.23868238925933838, "learning_rate": 1.0334724461693982e-05, "loss": 0.1299, "step": 40904 }, { "epoch": 0.7295865586986766, "grad_norm": 0.2400166541337967, "learning_rate": 1.0333463923152045e-05, "loss": 0.1075, "step": 40905 }, { "epoch": 0.7296043948203903, "grad_norm": 0.22942416369915009, "learning_rate": 1.0332203441462677e-05, "loss": 0.0926, "step": 40906 }, { "epoch": 0.729622230942104, "grad_norm": 0.2808311879634857, "learning_rate": 1.0330943016630754e-05, "loss": 0.1169, "step": 40907 }, { "epoch": 0.7296400670638177, "grad_norm": 0.2582831382751465, "learning_rate": 1.032968264866116e-05, "loss": 0.1333, "step": 40908 }, { "epoch": 0.7296579031855314, "grad_norm": 0.2564980387687683, "learning_rate": 1.0328422337558777e-05, "loss": 0.0955, "step": 40909 }, { "epoch": 0.729675739307245, "grad_norm": 0.31204789876937866, "learning_rate": 1.0327162083328504e-05, "loss": 0.1235, "step": 40910 }, { "epoch": 0.7296935754289587, "grad_norm": 0.20017872750759125, "learning_rate": 1.032590188597522e-05, "loss": 0.0743, "step": 40911 }, { "epoch": 0.7297114115506724, "grad_norm": 0.18407808244228363, "learning_rate": 1.0324641745503807e-05, "loss": 0.0993, "step": 40912 }, { "epoch": 0.7297292476723861, "grad_norm": 0.26999562978744507, "learning_rate": 1.0323381661919143e-05, "loss": 0.1504, "step": 40913 }, { "epoch": 0.7297470837940998, "grad_norm": 0.25350305438041687, "learning_rate": 1.0322121635226128e-05, "loss": 0.1258, "step": 40914 }, { "epoch": 0.7297649199158135, "grad_norm": 0.2944730818271637, "learning_rate": 1.0320861665429635e-05, "loss": 0.1009, "step": 40915 }, { "epoch": 0.7297827560375272, "grad_norm": 0.3151612877845764, "learning_rate": 1.0319601752534558e-05, "loss": 0.113, "step": 40916 }, { "epoch": 0.7298005921592409, "grad_norm": 0.28373289108276367, "learning_rate": 1.0318341896545775e-05, "loss": 0.1113, "step": 40917 }, { "epoch": 0.7298184282809546, "grad_norm": 0.21229694783687592, "learning_rate": 1.0317082097468158e-05, "loss": 0.0744, "step": 40918 }, { "epoch": 0.7298362644026682, "grad_norm": 0.33448320627212524, "learning_rate": 1.0315822355306615e-05, "loss": 0.107, "step": 40919 }, { "epoch": 0.7298541005243819, "grad_norm": 0.24119000136852264, "learning_rate": 1.0314562670066017e-05, "loss": 0.1291, "step": 40920 }, { "epoch": 0.7298719366460956, "grad_norm": 0.31554439663887024, "learning_rate": 1.0313303041751243e-05, "loss": 0.0911, "step": 40921 }, { "epoch": 0.7298897727678094, "grad_norm": 0.27390286326408386, "learning_rate": 1.0312043470367172e-05, "loss": 0.1041, "step": 40922 }, { "epoch": 0.7299076088895231, "grad_norm": 0.3328604996204376, "learning_rate": 1.0310783955918702e-05, "loss": 0.1559, "step": 40923 }, { "epoch": 0.7299254450112368, "grad_norm": 0.27093109488487244, "learning_rate": 1.0309524498410707e-05, "loss": 0.1299, "step": 40924 }, { "epoch": 0.7299432811329505, "grad_norm": 0.18972952663898468, "learning_rate": 1.0308265097848069e-05, "loss": 0.0853, "step": 40925 }, { "epoch": 0.7299611172546642, "grad_norm": 0.2934782803058624, "learning_rate": 1.030700575423566e-05, "loss": 0.1114, "step": 40926 }, { "epoch": 0.7299789533763779, "grad_norm": 0.33054137229919434, "learning_rate": 1.0305746467578382e-05, "loss": 0.1745, "step": 40927 }, { "epoch": 0.7299967894980915, "grad_norm": 0.26389992237091064, "learning_rate": 1.0304487237881098e-05, "loss": 0.1264, "step": 40928 }, { "epoch": 0.7300146256198052, "grad_norm": 0.16462776064872742, "learning_rate": 1.0303228065148704e-05, "loss": 0.0866, "step": 40929 }, { "epoch": 0.7300324617415189, "grad_norm": 0.29492875933647156, "learning_rate": 1.0301968949386074e-05, "loss": 0.1468, "step": 40930 }, { "epoch": 0.7300502978632326, "grad_norm": 0.24353215098381042, "learning_rate": 1.0300709890598079e-05, "loss": 0.1122, "step": 40931 }, { "epoch": 0.7300681339849463, "grad_norm": 0.2887917459011078, "learning_rate": 1.0299450888789618e-05, "loss": 0.1339, "step": 40932 }, { "epoch": 0.73008597010666, "grad_norm": 0.2835650146007538, "learning_rate": 1.0298191943965566e-05, "loss": 0.0888, "step": 40933 }, { "epoch": 0.7301038062283737, "grad_norm": 0.36773163080215454, "learning_rate": 1.02969330561308e-05, "loss": 0.1469, "step": 40934 }, { "epoch": 0.7301216423500874, "grad_norm": 0.20941372215747833, "learning_rate": 1.0295674225290189e-05, "loss": 0.0865, "step": 40935 }, { "epoch": 0.730139478471801, "grad_norm": 0.2645230293273926, "learning_rate": 1.0294415451448632e-05, "loss": 0.0798, "step": 40936 }, { "epoch": 0.7301573145935147, "grad_norm": 0.3101343810558319, "learning_rate": 1.0293156734611e-05, "loss": 0.0962, "step": 40937 }, { "epoch": 0.7301751507152284, "grad_norm": 0.4218254089355469, "learning_rate": 1.0291898074782172e-05, "loss": 0.1678, "step": 40938 }, { "epoch": 0.7301929868369422, "grad_norm": 0.23557502031326294, "learning_rate": 1.029063947196702e-05, "loss": 0.0933, "step": 40939 }, { "epoch": 0.7302108229586559, "grad_norm": 0.21256248652935028, "learning_rate": 1.0289380926170436e-05, "loss": 0.0951, "step": 40940 }, { "epoch": 0.7302286590803696, "grad_norm": 0.27800583839416504, "learning_rate": 1.0288122437397296e-05, "loss": 0.1106, "step": 40941 }, { "epoch": 0.7302464952020833, "grad_norm": 0.31588152050971985, "learning_rate": 1.0286864005652464e-05, "loss": 0.145, "step": 40942 }, { "epoch": 0.730264331323797, "grad_norm": 0.3558647632598877, "learning_rate": 1.0285605630940836e-05, "loss": 0.1338, "step": 40943 }, { "epoch": 0.7302821674455107, "grad_norm": 0.32653316855430603, "learning_rate": 1.0284347313267276e-05, "loss": 0.1439, "step": 40944 }, { "epoch": 0.7303000035672244, "grad_norm": 0.2534255385398865, "learning_rate": 1.0283089052636677e-05, "loss": 0.0873, "step": 40945 }, { "epoch": 0.730317839688938, "grad_norm": 0.21963676810264587, "learning_rate": 1.0281830849053906e-05, "loss": 0.1158, "step": 40946 }, { "epoch": 0.7303356758106517, "grad_norm": 0.2415035218000412, "learning_rate": 1.0280572702523844e-05, "loss": 0.0955, "step": 40947 }, { "epoch": 0.7303535119323654, "grad_norm": 0.31052109599113464, "learning_rate": 1.0279314613051358e-05, "loss": 0.1055, "step": 40948 }, { "epoch": 0.7303713480540791, "grad_norm": 0.2785680294036865, "learning_rate": 1.027805658064134e-05, "loss": 0.1033, "step": 40949 }, { "epoch": 0.7303891841757928, "grad_norm": 0.24477392435073853, "learning_rate": 1.0276798605298659e-05, "loss": 0.1073, "step": 40950 }, { "epoch": 0.7304070202975065, "grad_norm": 0.4114842414855957, "learning_rate": 1.0275540687028196e-05, "loss": 0.1648, "step": 40951 }, { "epoch": 0.7304248564192202, "grad_norm": 0.2071211189031601, "learning_rate": 1.0274282825834811e-05, "loss": 0.1072, "step": 40952 }, { "epoch": 0.7304426925409339, "grad_norm": 0.20276851952075958, "learning_rate": 1.0273025021723398e-05, "loss": 0.0716, "step": 40953 }, { "epoch": 0.7304605286626475, "grad_norm": 0.31820905208587646, "learning_rate": 1.0271767274698831e-05, "loss": 0.1527, "step": 40954 }, { "epoch": 0.7304783647843612, "grad_norm": 0.3157261312007904, "learning_rate": 1.0270509584765969e-05, "loss": 0.1635, "step": 40955 }, { "epoch": 0.730496200906075, "grad_norm": 0.2503087818622589, "learning_rate": 1.0269251951929712e-05, "loss": 0.0973, "step": 40956 }, { "epoch": 0.7305140370277887, "grad_norm": 0.3021951913833618, "learning_rate": 1.0267994376194909e-05, "loss": 0.1577, "step": 40957 }, { "epoch": 0.7305318731495024, "grad_norm": 0.2571439743041992, "learning_rate": 1.026673685756646e-05, "loss": 0.1029, "step": 40958 }, { "epoch": 0.7305497092712161, "grad_norm": 0.22617734968662262, "learning_rate": 1.0265479396049227e-05, "loss": 0.1126, "step": 40959 }, { "epoch": 0.7305675453929298, "grad_norm": 0.2502675950527191, "learning_rate": 1.0264221991648085e-05, "loss": 0.1002, "step": 40960 }, { "epoch": 0.7305853815146435, "grad_norm": 0.27962726354599, "learning_rate": 1.0262964644367898e-05, "loss": 0.1123, "step": 40961 }, { "epoch": 0.7306032176363572, "grad_norm": 0.32183629274368286, "learning_rate": 1.0261707354213559e-05, "loss": 0.2326, "step": 40962 }, { "epoch": 0.7306210537580708, "grad_norm": 0.2995951771736145, "learning_rate": 1.0260450121189935e-05, "loss": 0.1263, "step": 40963 }, { "epoch": 0.7306388898797845, "grad_norm": 0.26489582657814026, "learning_rate": 1.0259192945301896e-05, "loss": 0.1101, "step": 40964 }, { "epoch": 0.7306567260014982, "grad_norm": 0.2925983965396881, "learning_rate": 1.0257935826554307e-05, "loss": 0.1158, "step": 40965 }, { "epoch": 0.7306745621232119, "grad_norm": 0.2978723645210266, "learning_rate": 1.025667876495206e-05, "loss": 0.1565, "step": 40966 }, { "epoch": 0.7306923982449256, "grad_norm": 0.2961837351322174, "learning_rate": 1.0255421760500017e-05, "loss": 0.1452, "step": 40967 }, { "epoch": 0.7307102343666393, "grad_norm": 0.26765406131744385, "learning_rate": 1.0254164813203055e-05, "loss": 0.1378, "step": 40968 }, { "epoch": 0.730728070488353, "grad_norm": 0.2946990430355072, "learning_rate": 1.025290792306603e-05, "loss": 0.1435, "step": 40969 }, { "epoch": 0.7307459066100667, "grad_norm": 0.24788260459899902, "learning_rate": 1.025165109009383e-05, "loss": 0.0772, "step": 40970 }, { "epoch": 0.7307637427317804, "grad_norm": 0.3516397774219513, "learning_rate": 1.0250394314291334e-05, "loss": 0.0872, "step": 40971 }, { "epoch": 0.7307815788534942, "grad_norm": 0.44780948758125305, "learning_rate": 1.0249137595663402e-05, "loss": 0.1377, "step": 40972 }, { "epoch": 0.7307994149752078, "grad_norm": 0.38909098505973816, "learning_rate": 1.024788093421491e-05, "loss": 0.1216, "step": 40973 }, { "epoch": 0.7308172510969215, "grad_norm": 0.2733481526374817, "learning_rate": 1.0246624329950712e-05, "loss": 0.1354, "step": 40974 }, { "epoch": 0.7308350872186352, "grad_norm": 0.2599705159664154, "learning_rate": 1.0245367782875709e-05, "loss": 0.1649, "step": 40975 }, { "epoch": 0.7308529233403489, "grad_norm": 0.1773041933774948, "learning_rate": 1.0244111292994754e-05, "loss": 0.0812, "step": 40976 }, { "epoch": 0.7308707594620626, "grad_norm": 0.29490283131599426, "learning_rate": 1.024285486031272e-05, "loss": 0.0811, "step": 40977 }, { "epoch": 0.7308885955837763, "grad_norm": 0.328452467918396, "learning_rate": 1.024159848483447e-05, "loss": 0.1474, "step": 40978 }, { "epoch": 0.73090643170549, "grad_norm": 0.3337807059288025, "learning_rate": 1.024034216656489e-05, "loss": 0.086, "step": 40979 }, { "epoch": 0.7309242678272037, "grad_norm": 0.27943354845046997, "learning_rate": 1.0239085905508841e-05, "loss": 0.126, "step": 40980 }, { "epoch": 0.7309421039489173, "grad_norm": 0.2145722359418869, "learning_rate": 1.0237829701671192e-05, "loss": 0.0931, "step": 40981 }, { "epoch": 0.730959940070631, "grad_norm": 0.24666835367679596, "learning_rate": 1.0236573555056808e-05, "loss": 0.1162, "step": 40982 }, { "epoch": 0.7309777761923447, "grad_norm": 0.36359351873397827, "learning_rate": 1.023531746567057e-05, "loss": 0.1141, "step": 40983 }, { "epoch": 0.7309956123140584, "grad_norm": 0.2992902398109436, "learning_rate": 1.0234061433517333e-05, "loss": 0.1467, "step": 40984 }, { "epoch": 0.7310134484357721, "grad_norm": 0.24065323173999786, "learning_rate": 1.0232805458601984e-05, "loss": 0.1072, "step": 40985 }, { "epoch": 0.7310312845574858, "grad_norm": 0.2959069609642029, "learning_rate": 1.023154954092938e-05, "loss": 0.1256, "step": 40986 }, { "epoch": 0.7310491206791995, "grad_norm": 0.3075346350669861, "learning_rate": 1.0230293680504383e-05, "loss": 0.1162, "step": 40987 }, { "epoch": 0.7310669568009132, "grad_norm": 0.28486353158950806, "learning_rate": 1.022903787733188e-05, "loss": 0.1266, "step": 40988 }, { "epoch": 0.731084792922627, "grad_norm": 0.3788246512413025, "learning_rate": 1.0227782131416724e-05, "loss": 0.172, "step": 40989 }, { "epoch": 0.7311026290443406, "grad_norm": 0.28341367840766907, "learning_rate": 1.0226526442763787e-05, "loss": 0.0893, "step": 40990 }, { "epoch": 0.7311204651660543, "grad_norm": 0.2282809019088745, "learning_rate": 1.0225270811377929e-05, "loss": 0.1008, "step": 40991 }, { "epoch": 0.731138301287768, "grad_norm": 0.3333247900009155, "learning_rate": 1.0224015237264032e-05, "loss": 0.1929, "step": 40992 }, { "epoch": 0.7311561374094817, "grad_norm": 0.3236623704433441, "learning_rate": 1.0222759720426956e-05, "loss": 0.1044, "step": 40993 }, { "epoch": 0.7311739735311954, "grad_norm": 0.2413376122713089, "learning_rate": 1.0221504260871566e-05, "loss": 0.111, "step": 40994 }, { "epoch": 0.7311918096529091, "grad_norm": 0.2764434516429901, "learning_rate": 1.0220248858602732e-05, "loss": 0.1255, "step": 40995 }, { "epoch": 0.7312096457746228, "grad_norm": 0.20417995750904083, "learning_rate": 1.0218993513625308e-05, "loss": 0.1282, "step": 40996 }, { "epoch": 0.7312274818963365, "grad_norm": 0.228545680642128, "learning_rate": 1.021773822594418e-05, "loss": 0.1604, "step": 40997 }, { "epoch": 0.7312453180180501, "grad_norm": 0.22014886140823364, "learning_rate": 1.0216482995564195e-05, "loss": 0.0728, "step": 40998 }, { "epoch": 0.7312631541397638, "grad_norm": 0.22016765177249908, "learning_rate": 1.0215227822490236e-05, "loss": 0.0811, "step": 40999 }, { "epoch": 0.7312809902614775, "grad_norm": 0.348111093044281, "learning_rate": 1.0213972706727151e-05, "loss": 0.148, "step": 41000 }, { "epoch": 0.7312809902614775, "eval_loss": 0.11357378959655762, "eval_runtime": 107.9055, "eval_samples_per_second": 9.49, "eval_steps_per_second": 1.585, "step": 41000 }, { "epoch": 0.7312988263831912, "grad_norm": 0.2562297284603119, "learning_rate": 1.0212717648279827e-05, "loss": 0.1199, "step": 41001 }, { "epoch": 0.7313166625049049, "grad_norm": 0.41424325108528137, "learning_rate": 1.0211462647153114e-05, "loss": 0.133, "step": 41002 }, { "epoch": 0.7313344986266186, "grad_norm": 0.3072645664215088, "learning_rate": 1.0210207703351878e-05, "loss": 0.0798, "step": 41003 }, { "epoch": 0.7313523347483323, "grad_norm": 0.2379826009273529, "learning_rate": 1.0208952816880982e-05, "loss": 0.0798, "step": 41004 }, { "epoch": 0.731370170870046, "grad_norm": 0.23669207096099854, "learning_rate": 1.0207697987745297e-05, "loss": 0.1258, "step": 41005 }, { "epoch": 0.7313880069917598, "grad_norm": 0.297948956489563, "learning_rate": 1.0206443215949685e-05, "loss": 0.1424, "step": 41006 }, { "epoch": 0.7314058431134735, "grad_norm": 0.2605789005756378, "learning_rate": 1.020518850149901e-05, "loss": 0.1239, "step": 41007 }, { "epoch": 0.7314236792351871, "grad_norm": 0.3622311055660248, "learning_rate": 1.0203933844398134e-05, "loss": 0.1041, "step": 41008 }, { "epoch": 0.7314415153569008, "grad_norm": 0.18975771963596344, "learning_rate": 1.0202679244651912e-05, "loss": 0.0953, "step": 41009 }, { "epoch": 0.7314593514786145, "grad_norm": 0.23391392827033997, "learning_rate": 1.0201424702265224e-05, "loss": 0.1148, "step": 41010 }, { "epoch": 0.7314771876003282, "grad_norm": 0.30333054065704346, "learning_rate": 1.0200170217242916e-05, "loss": 0.0878, "step": 41011 }, { "epoch": 0.7314950237220419, "grad_norm": 0.2512120008468628, "learning_rate": 1.0198915789589869e-05, "loss": 0.1514, "step": 41012 }, { "epoch": 0.7315128598437556, "grad_norm": 0.3056817054748535, "learning_rate": 1.0197661419310927e-05, "loss": 0.1051, "step": 41013 }, { "epoch": 0.7315306959654693, "grad_norm": 0.25697413086891174, "learning_rate": 1.0196407106410974e-05, "loss": 0.1144, "step": 41014 }, { "epoch": 0.731548532087183, "grad_norm": 0.2934715747833252, "learning_rate": 1.0195152850894857e-05, "loss": 0.1328, "step": 41015 }, { "epoch": 0.7315663682088966, "grad_norm": 0.30051085352897644, "learning_rate": 1.0193898652767442e-05, "loss": 0.1066, "step": 41016 }, { "epoch": 0.7315842043306103, "grad_norm": 0.3459209203720093, "learning_rate": 1.019264451203358e-05, "loss": 0.0937, "step": 41017 }, { "epoch": 0.731602040452324, "grad_norm": 0.28906819224357605, "learning_rate": 1.0191390428698153e-05, "loss": 0.1227, "step": 41018 }, { "epoch": 0.7316198765740377, "grad_norm": 0.7798620462417603, "learning_rate": 1.0190136402766009e-05, "loss": 0.1106, "step": 41019 }, { "epoch": 0.7316377126957514, "grad_norm": 0.3055686056613922, "learning_rate": 1.0188882434242014e-05, "loss": 0.0851, "step": 41020 }, { "epoch": 0.7316555488174651, "grad_norm": 0.25559568405151367, "learning_rate": 1.0187628523131027e-05, "loss": 0.1293, "step": 41021 }, { "epoch": 0.7316733849391788, "grad_norm": 0.29998308420181274, "learning_rate": 1.0186374669437896e-05, "loss": 0.1609, "step": 41022 }, { "epoch": 0.7316912210608926, "grad_norm": 0.2601812779903412, "learning_rate": 1.0185120873167503e-05, "loss": 0.1152, "step": 41023 }, { "epoch": 0.7317090571826063, "grad_norm": 0.20285411179065704, "learning_rate": 1.0183867134324698e-05, "loss": 0.1171, "step": 41024 }, { "epoch": 0.73172689330432, "grad_norm": 0.296916663646698, "learning_rate": 1.0182613452914333e-05, "loss": 0.1314, "step": 41025 }, { "epoch": 0.7317447294260336, "grad_norm": 0.22588127851486206, "learning_rate": 1.0181359828941276e-05, "loss": 0.1063, "step": 41026 }, { "epoch": 0.7317625655477473, "grad_norm": 0.2552788555622101, "learning_rate": 1.0180106262410394e-05, "loss": 0.1262, "step": 41027 }, { "epoch": 0.731780401669461, "grad_norm": 0.2651049792766571, "learning_rate": 1.017885275332654e-05, "loss": 0.1098, "step": 41028 }, { "epoch": 0.7317982377911747, "grad_norm": 0.3077028691768646, "learning_rate": 1.0177599301694573e-05, "loss": 0.1278, "step": 41029 }, { "epoch": 0.7318160739128884, "grad_norm": 0.24880871176719666, "learning_rate": 1.017634590751934e-05, "loss": 0.1185, "step": 41030 }, { "epoch": 0.7318339100346021, "grad_norm": 0.22988176345825195, "learning_rate": 1.017509257080572e-05, "loss": 0.1206, "step": 41031 }, { "epoch": 0.7318517461563158, "grad_norm": 0.23562580347061157, "learning_rate": 1.0173839291558562e-05, "loss": 0.1333, "step": 41032 }, { "epoch": 0.7318695822780295, "grad_norm": 0.3394968807697296, "learning_rate": 1.0172586069782721e-05, "loss": 0.158, "step": 41033 }, { "epoch": 0.7318874183997431, "grad_norm": 0.23663291335105896, "learning_rate": 1.017133290548306e-05, "loss": 0.1118, "step": 41034 }, { "epoch": 0.7319052545214568, "grad_norm": 0.29905152320861816, "learning_rate": 1.0170079798664425e-05, "loss": 0.1271, "step": 41035 }, { "epoch": 0.7319230906431705, "grad_norm": 0.26523861289024353, "learning_rate": 1.0168826749331692e-05, "loss": 0.1027, "step": 41036 }, { "epoch": 0.7319409267648842, "grad_norm": 0.2506278455257416, "learning_rate": 1.016757375748971e-05, "loss": 0.1435, "step": 41037 }, { "epoch": 0.7319587628865979, "grad_norm": 0.3426370918750763, "learning_rate": 1.0166320823143324e-05, "loss": 0.0802, "step": 41038 }, { "epoch": 0.7319765990083116, "grad_norm": 0.3216326832771301, "learning_rate": 1.0165067946297411e-05, "loss": 0.1575, "step": 41039 }, { "epoch": 0.7319944351300254, "grad_norm": 0.24299614131450653, "learning_rate": 1.016381512695681e-05, "loss": 0.1217, "step": 41040 }, { "epoch": 0.7320122712517391, "grad_norm": 0.2927909195423126, "learning_rate": 1.0162562365126396e-05, "loss": 0.1448, "step": 41041 }, { "epoch": 0.7320301073734528, "grad_norm": 0.3751700818538666, "learning_rate": 1.0161309660811014e-05, "loss": 0.0862, "step": 41042 }, { "epoch": 0.7320479434951664, "grad_norm": 0.40837520360946655, "learning_rate": 1.0160057014015512e-05, "loss": 0.1305, "step": 41043 }, { "epoch": 0.7320657796168801, "grad_norm": 0.27980414032936096, "learning_rate": 1.015880442474476e-05, "loss": 0.1072, "step": 41044 }, { "epoch": 0.7320836157385938, "grad_norm": 0.3306896686553955, "learning_rate": 1.0157551893003614e-05, "loss": 0.153, "step": 41045 }, { "epoch": 0.7321014518603075, "grad_norm": 0.21825826168060303, "learning_rate": 1.015629941879692e-05, "loss": 0.0947, "step": 41046 }, { "epoch": 0.7321192879820212, "grad_norm": 0.18993951380252838, "learning_rate": 1.0155047002129536e-05, "loss": 0.0822, "step": 41047 }, { "epoch": 0.7321371241037349, "grad_norm": 0.275922566652298, "learning_rate": 1.015379464300631e-05, "loss": 0.0526, "step": 41048 }, { "epoch": 0.7321549602254486, "grad_norm": 0.3374969959259033, "learning_rate": 1.0152542341432111e-05, "loss": 0.116, "step": 41049 }, { "epoch": 0.7321727963471623, "grad_norm": 0.34595680236816406, "learning_rate": 1.0151290097411789e-05, "loss": 0.1263, "step": 41050 }, { "epoch": 0.732190632468876, "grad_norm": 0.2317531853914261, "learning_rate": 1.0150037910950191e-05, "loss": 0.1506, "step": 41051 }, { "epoch": 0.7322084685905896, "grad_norm": 0.29938170313835144, "learning_rate": 1.014878578205217e-05, "loss": 0.1511, "step": 41052 }, { "epoch": 0.7322263047123033, "grad_norm": 0.2171143889427185, "learning_rate": 1.0147533710722582e-05, "loss": 0.0922, "step": 41053 }, { "epoch": 0.732244140834017, "grad_norm": 0.31345969438552856, "learning_rate": 1.0146281696966293e-05, "loss": 0.1202, "step": 41054 }, { "epoch": 0.7322619769557307, "grad_norm": 0.18728560209274292, "learning_rate": 1.0145029740788146e-05, "loss": 0.0778, "step": 41055 }, { "epoch": 0.7322798130774444, "grad_norm": 0.23062537610530853, "learning_rate": 1.0143777842192986e-05, "loss": 0.1022, "step": 41056 }, { "epoch": 0.7322976491991582, "grad_norm": 0.29825496673583984, "learning_rate": 1.0142526001185681e-05, "loss": 0.1435, "step": 41057 }, { "epoch": 0.7323154853208719, "grad_norm": 0.28086090087890625, "learning_rate": 1.014127421777108e-05, "loss": 0.1842, "step": 41058 }, { "epoch": 0.7323333214425856, "grad_norm": 0.36868149042129517, "learning_rate": 1.0140022491954032e-05, "loss": 0.1198, "step": 41059 }, { "epoch": 0.7323511575642992, "grad_norm": 0.17740115523338318, "learning_rate": 1.0138770823739389e-05, "loss": 0.1123, "step": 41060 }, { "epoch": 0.7323689936860129, "grad_norm": 0.2053452730178833, "learning_rate": 1.0137519213131993e-05, "loss": 0.1298, "step": 41061 }, { "epoch": 0.7323868298077266, "grad_norm": 0.23524436354637146, "learning_rate": 1.0136267660136716e-05, "loss": 0.1166, "step": 41062 }, { "epoch": 0.7324046659294403, "grad_norm": 0.26659414172172546, "learning_rate": 1.0135016164758399e-05, "loss": 0.1337, "step": 41063 }, { "epoch": 0.732422502051154, "grad_norm": 0.22963832318782806, "learning_rate": 1.0133764727001893e-05, "loss": 0.1145, "step": 41064 }, { "epoch": 0.7324403381728677, "grad_norm": 0.42937949299812317, "learning_rate": 1.013251334687204e-05, "loss": 0.1807, "step": 41065 }, { "epoch": 0.7324581742945814, "grad_norm": 0.2890181243419647, "learning_rate": 1.013126202437371e-05, "loss": 0.0552, "step": 41066 }, { "epoch": 0.7324760104162951, "grad_norm": 0.34323248267173767, "learning_rate": 1.0130010759511735e-05, "loss": 0.1006, "step": 41067 }, { "epoch": 0.7324938465380088, "grad_norm": 0.2769702076911926, "learning_rate": 1.0128759552290984e-05, "loss": 0.1589, "step": 41068 }, { "epoch": 0.7325116826597224, "grad_norm": 0.2968181371688843, "learning_rate": 1.0127508402716288e-05, "loss": 0.1367, "step": 41069 }, { "epoch": 0.7325295187814361, "grad_norm": 0.5595503449440002, "learning_rate": 1.0126257310792515e-05, "loss": 0.1397, "step": 41070 }, { "epoch": 0.7325473549031498, "grad_norm": 0.2284081131219864, "learning_rate": 1.0125006276524507e-05, "loss": 0.1302, "step": 41071 }, { "epoch": 0.7325651910248635, "grad_norm": 0.28476080298423767, "learning_rate": 1.0123755299917109e-05, "loss": 0.1251, "step": 41072 }, { "epoch": 0.7325830271465773, "grad_norm": 0.2432744950056076, "learning_rate": 1.0122504380975175e-05, "loss": 0.1361, "step": 41073 }, { "epoch": 0.732600863268291, "grad_norm": 0.24936284124851227, "learning_rate": 1.0121253519703543e-05, "loss": 0.1177, "step": 41074 }, { "epoch": 0.7326186993900047, "grad_norm": 0.3589072525501251, "learning_rate": 1.0120002716107083e-05, "loss": 0.1277, "step": 41075 }, { "epoch": 0.7326365355117184, "grad_norm": 0.24981731176376343, "learning_rate": 1.0118751970190631e-05, "loss": 0.1307, "step": 41076 }, { "epoch": 0.732654371633432, "grad_norm": 0.28184929490089417, "learning_rate": 1.0117501281959035e-05, "loss": 0.1369, "step": 41077 }, { "epoch": 0.7326722077551457, "grad_norm": 0.22753465175628662, "learning_rate": 1.0116250651417136e-05, "loss": 0.0609, "step": 41078 }, { "epoch": 0.7326900438768594, "grad_norm": 0.327316015958786, "learning_rate": 1.0115000078569801e-05, "loss": 0.1269, "step": 41079 }, { "epoch": 0.7327078799985731, "grad_norm": 0.21712535619735718, "learning_rate": 1.0113749563421854e-05, "loss": 0.1208, "step": 41080 }, { "epoch": 0.7327257161202868, "grad_norm": 0.2620106637477875, "learning_rate": 1.0112499105978166e-05, "loss": 0.1104, "step": 41081 }, { "epoch": 0.7327435522420005, "grad_norm": 0.26524466276168823, "learning_rate": 1.0111248706243564e-05, "loss": 0.1464, "step": 41082 }, { "epoch": 0.7327613883637142, "grad_norm": 0.3362818956375122, "learning_rate": 1.0109998364222915e-05, "loss": 0.1502, "step": 41083 }, { "epoch": 0.7327792244854279, "grad_norm": 0.19853998720645905, "learning_rate": 1.0108748079921055e-05, "loss": 0.0858, "step": 41084 }, { "epoch": 0.7327970606071416, "grad_norm": 0.2885602116584778, "learning_rate": 1.0107497853342829e-05, "loss": 0.0986, "step": 41085 }, { "epoch": 0.7328148967288552, "grad_norm": 0.29427123069763184, "learning_rate": 1.0106247684493086e-05, "loss": 0.0819, "step": 41086 }, { "epoch": 0.7328327328505689, "grad_norm": 0.28571197390556335, "learning_rate": 1.0104997573376662e-05, "loss": 0.1078, "step": 41087 }, { "epoch": 0.7328505689722826, "grad_norm": 0.24130010604858398, "learning_rate": 1.0103747519998422e-05, "loss": 0.1199, "step": 41088 }, { "epoch": 0.7328684050939963, "grad_norm": 0.2578444182872772, "learning_rate": 1.01024975243632e-05, "loss": 0.1214, "step": 41089 }, { "epoch": 0.7328862412157101, "grad_norm": 0.30509063601493835, "learning_rate": 1.0101247586475842e-05, "loss": 0.1885, "step": 41090 }, { "epoch": 0.7329040773374238, "grad_norm": 0.30987098813056946, "learning_rate": 1.0099997706341188e-05, "loss": 0.1045, "step": 41091 }, { "epoch": 0.7329219134591375, "grad_norm": 0.24301046133041382, "learning_rate": 1.0098747883964096e-05, "loss": 0.0904, "step": 41092 }, { "epoch": 0.7329397495808512, "grad_norm": 0.2671217918395996, "learning_rate": 1.0097498119349404e-05, "loss": 0.0925, "step": 41093 }, { "epoch": 0.7329575857025649, "grad_norm": 0.2784178555011749, "learning_rate": 1.009624841250195e-05, "loss": 0.101, "step": 41094 }, { "epoch": 0.7329754218242786, "grad_norm": 0.2802357077598572, "learning_rate": 1.0094998763426591e-05, "loss": 0.091, "step": 41095 }, { "epoch": 0.7329932579459922, "grad_norm": 0.26100000739097595, "learning_rate": 1.0093749172128159e-05, "loss": 0.1087, "step": 41096 }, { "epoch": 0.7330110940677059, "grad_norm": 0.27671751379966736, "learning_rate": 1.0092499638611508e-05, "loss": 0.1341, "step": 41097 }, { "epoch": 0.7330289301894196, "grad_norm": 0.2316170036792755, "learning_rate": 1.009125016288148e-05, "loss": 0.0509, "step": 41098 }, { "epoch": 0.7330467663111333, "grad_norm": 0.22340913116931915, "learning_rate": 1.0090000744942915e-05, "loss": 0.1036, "step": 41099 }, { "epoch": 0.733064602432847, "grad_norm": 0.26249369978904724, "learning_rate": 1.0088751384800649e-05, "loss": 0.1486, "step": 41100 }, { "epoch": 0.7330824385545607, "grad_norm": 0.3222165107727051, "learning_rate": 1.008750208245954e-05, "loss": 0.1064, "step": 41101 }, { "epoch": 0.7331002746762744, "grad_norm": 0.2129303365945816, "learning_rate": 1.0086252837924427e-05, "loss": 0.0652, "step": 41102 }, { "epoch": 0.733118110797988, "grad_norm": 0.2269088327884674, "learning_rate": 1.0085003651200145e-05, "loss": 0.1037, "step": 41103 }, { "epoch": 0.7331359469197017, "grad_norm": 0.26634708046913147, "learning_rate": 1.0083754522291536e-05, "loss": 0.0898, "step": 41104 }, { "epoch": 0.7331537830414154, "grad_norm": 0.2972703278064728, "learning_rate": 1.008250545120345e-05, "loss": 0.1297, "step": 41105 }, { "epoch": 0.7331716191631291, "grad_norm": 0.3696639835834503, "learning_rate": 1.0081256437940729e-05, "loss": 0.1236, "step": 41106 }, { "epoch": 0.7331894552848429, "grad_norm": 0.28319859504699707, "learning_rate": 1.00800074825082e-05, "loss": 0.1528, "step": 41107 }, { "epoch": 0.7332072914065566, "grad_norm": 0.3253980576992035, "learning_rate": 1.0078758584910725e-05, "loss": 0.105, "step": 41108 }, { "epoch": 0.7332251275282703, "grad_norm": 0.23751689493656158, "learning_rate": 1.0077509745153126e-05, "loss": 0.0912, "step": 41109 }, { "epoch": 0.733242963649984, "grad_norm": 0.4059242308139801, "learning_rate": 1.0076260963240261e-05, "loss": 0.0917, "step": 41110 }, { "epoch": 0.7332607997716977, "grad_norm": 0.26670950651168823, "learning_rate": 1.0075012239176965e-05, "loss": 0.0896, "step": 41111 }, { "epoch": 0.7332786358934114, "grad_norm": 0.23575033247470856, "learning_rate": 1.0073763572968075e-05, "loss": 0.1076, "step": 41112 }, { "epoch": 0.733296472015125, "grad_norm": 0.3413923680782318, "learning_rate": 1.0072514964618426e-05, "loss": 0.1249, "step": 41113 }, { "epoch": 0.7333143081368387, "grad_norm": 0.20009158551692963, "learning_rate": 1.0071266414132871e-05, "loss": 0.0838, "step": 41114 }, { "epoch": 0.7333321442585524, "grad_norm": 0.3031262159347534, "learning_rate": 1.0070017921516247e-05, "loss": 0.1437, "step": 41115 }, { "epoch": 0.7333499803802661, "grad_norm": 0.29012876749038696, "learning_rate": 1.0068769486773388e-05, "loss": 0.0875, "step": 41116 }, { "epoch": 0.7333678165019798, "grad_norm": 0.3280128240585327, "learning_rate": 1.0067521109909126e-05, "loss": 0.1141, "step": 41117 }, { "epoch": 0.7333856526236935, "grad_norm": 0.26290854811668396, "learning_rate": 1.006627279092832e-05, "loss": 0.14, "step": 41118 }, { "epoch": 0.7334034887454072, "grad_norm": 0.33645305037498474, "learning_rate": 1.0065024529835798e-05, "loss": 0.1314, "step": 41119 }, { "epoch": 0.7334213248671209, "grad_norm": 0.2620290517807007, "learning_rate": 1.0063776326636399e-05, "loss": 0.1362, "step": 41120 }, { "epoch": 0.7334391609888345, "grad_norm": 0.3068559169769287, "learning_rate": 1.0062528181334954e-05, "loss": 0.0972, "step": 41121 }, { "epoch": 0.7334569971105482, "grad_norm": 0.34216317534446716, "learning_rate": 1.0061280093936315e-05, "loss": 0.1097, "step": 41122 }, { "epoch": 0.7334748332322619, "grad_norm": 0.28030675649642944, "learning_rate": 1.0060032064445307e-05, "loss": 0.1638, "step": 41123 }, { "epoch": 0.7334926693539757, "grad_norm": 0.2601567208766937, "learning_rate": 1.0058784092866785e-05, "loss": 0.1102, "step": 41124 }, { "epoch": 0.7335105054756894, "grad_norm": 0.29961302876472473, "learning_rate": 1.0057536179205576e-05, "loss": 0.1235, "step": 41125 }, { "epoch": 0.7335283415974031, "grad_norm": 0.23860082030296326, "learning_rate": 1.0056288323466507e-05, "loss": 0.131, "step": 41126 }, { "epoch": 0.7335461777191168, "grad_norm": 0.2541649639606476, "learning_rate": 1.0055040525654436e-05, "loss": 0.1805, "step": 41127 }, { "epoch": 0.7335640138408305, "grad_norm": 0.31603550910949707, "learning_rate": 1.0053792785774191e-05, "loss": 0.1106, "step": 41128 }, { "epoch": 0.7335818499625442, "grad_norm": 0.2143966406583786, "learning_rate": 1.0052545103830605e-05, "loss": 0.1204, "step": 41129 }, { "epoch": 0.7335996860842579, "grad_norm": 0.345138281583786, "learning_rate": 1.0051297479828508e-05, "loss": 0.1677, "step": 41130 }, { "epoch": 0.7336175222059715, "grad_norm": 0.4226834774017334, "learning_rate": 1.0050049913772755e-05, "loss": 0.2135, "step": 41131 }, { "epoch": 0.7336353583276852, "grad_norm": 0.2573096752166748, "learning_rate": 1.0048802405668173e-05, "loss": 0.0939, "step": 41132 }, { "epoch": 0.7336531944493989, "grad_norm": 0.2890351712703705, "learning_rate": 1.0047554955519595e-05, "loss": 0.1128, "step": 41133 }, { "epoch": 0.7336710305711126, "grad_norm": 0.25333184003829956, "learning_rate": 1.004630756333185e-05, "loss": 0.1037, "step": 41134 }, { "epoch": 0.7336888666928263, "grad_norm": 0.3420916199684143, "learning_rate": 1.0045060229109793e-05, "loss": 0.098, "step": 41135 }, { "epoch": 0.73370670281454, "grad_norm": 0.29470258951187134, "learning_rate": 1.0043812952858236e-05, "loss": 0.0796, "step": 41136 }, { "epoch": 0.7337245389362537, "grad_norm": 0.31416720151901245, "learning_rate": 1.0042565734582036e-05, "loss": 0.1404, "step": 41137 }, { "epoch": 0.7337423750579674, "grad_norm": 0.3004606068134308, "learning_rate": 1.0041318574286015e-05, "loss": 0.0993, "step": 41138 }, { "epoch": 0.733760211179681, "grad_norm": 0.21731922030448914, "learning_rate": 1.0040071471975005e-05, "loss": 0.0785, "step": 41139 }, { "epoch": 0.7337780473013947, "grad_norm": 0.2686507999897003, "learning_rate": 1.003882442765385e-05, "loss": 0.0812, "step": 41140 }, { "epoch": 0.7337958834231085, "grad_norm": 0.19689592719078064, "learning_rate": 1.0037577441327382e-05, "loss": 0.1281, "step": 41141 }, { "epoch": 0.7338137195448222, "grad_norm": 0.32062211632728577, "learning_rate": 1.003633051300043e-05, "loss": 0.1415, "step": 41142 }, { "epoch": 0.7338315556665359, "grad_norm": 0.3154888153076172, "learning_rate": 1.003508364267782e-05, "loss": 0.1299, "step": 41143 }, { "epoch": 0.7338493917882496, "grad_norm": 0.25007688999176025, "learning_rate": 1.0033836830364405e-05, "loss": 0.1201, "step": 41144 }, { "epoch": 0.7338672279099633, "grad_norm": 0.3194383382797241, "learning_rate": 1.0032590076065004e-05, "loss": 0.0947, "step": 41145 }, { "epoch": 0.733885064031677, "grad_norm": 0.2792467176914215, "learning_rate": 1.0031343379784458e-05, "loss": 0.1429, "step": 41146 }, { "epoch": 0.7339029001533907, "grad_norm": 0.24085290729999542, "learning_rate": 1.0030096741527584e-05, "loss": 0.1012, "step": 41147 }, { "epoch": 0.7339207362751043, "grad_norm": 0.21871492266654968, "learning_rate": 1.0028850161299234e-05, "loss": 0.1118, "step": 41148 }, { "epoch": 0.733938572396818, "grad_norm": 0.26251277327537537, "learning_rate": 1.0027603639104233e-05, "loss": 0.1202, "step": 41149 }, { "epoch": 0.7339564085185317, "grad_norm": 0.30111393332481384, "learning_rate": 1.0026357174947401e-05, "loss": 0.1149, "step": 41150 }, { "epoch": 0.7339742446402454, "grad_norm": 0.23938579857349396, "learning_rate": 1.002511076883359e-05, "loss": 0.0954, "step": 41151 }, { "epoch": 0.7339920807619591, "grad_norm": 0.2065141648054123, "learning_rate": 1.0023864420767612e-05, "loss": 0.0999, "step": 41152 }, { "epoch": 0.7340099168836728, "grad_norm": 0.25619766116142273, "learning_rate": 1.0022618130754318e-05, "loss": 0.0944, "step": 41153 }, { "epoch": 0.7340277530053865, "grad_norm": 0.21290558576583862, "learning_rate": 1.0021371898798532e-05, "loss": 0.1121, "step": 41154 }, { "epoch": 0.7340455891271002, "grad_norm": 0.33529436588287354, "learning_rate": 1.0020125724905076e-05, "loss": 0.1294, "step": 41155 }, { "epoch": 0.7340634252488139, "grad_norm": 0.2827434837818146, "learning_rate": 1.0018879609078782e-05, "loss": 0.1738, "step": 41156 }, { "epoch": 0.7340812613705275, "grad_norm": 0.2743864059448242, "learning_rate": 1.0017633551324493e-05, "loss": 0.0758, "step": 41157 }, { "epoch": 0.7340990974922413, "grad_norm": 0.29576677083969116, "learning_rate": 1.001638755164703e-05, "loss": 0.1413, "step": 41158 }, { "epoch": 0.734116933613955, "grad_norm": 0.33710238337516785, "learning_rate": 1.0015141610051227e-05, "loss": 0.1361, "step": 41159 }, { "epoch": 0.7341347697356687, "grad_norm": 0.23217138648033142, "learning_rate": 1.0013895726541898e-05, "loss": 0.1208, "step": 41160 }, { "epoch": 0.7341526058573824, "grad_norm": 0.23373915255069733, "learning_rate": 1.0012649901123897e-05, "loss": 0.0675, "step": 41161 }, { "epoch": 0.7341704419790961, "grad_norm": 0.3140864968299866, "learning_rate": 1.0011404133802039e-05, "loss": 0.1318, "step": 41162 }, { "epoch": 0.7341882781008098, "grad_norm": 0.2737288177013397, "learning_rate": 1.0010158424581148e-05, "loss": 0.0972, "step": 41163 }, { "epoch": 0.7342061142225235, "grad_norm": 0.24355921149253845, "learning_rate": 1.000891277346607e-05, "loss": 0.1201, "step": 41164 }, { "epoch": 0.7342239503442372, "grad_norm": 0.3785707950592041, "learning_rate": 1.0007667180461613e-05, "loss": 0.1537, "step": 41165 }, { "epoch": 0.7342417864659508, "grad_norm": 0.27761510014533997, "learning_rate": 1.0006421645572626e-05, "loss": 0.1022, "step": 41166 }, { "epoch": 0.7342596225876645, "grad_norm": 0.4070633351802826, "learning_rate": 1.0005176168803925e-05, "loss": 0.1119, "step": 41167 }, { "epoch": 0.7342774587093782, "grad_norm": 0.29173749685287476, "learning_rate": 1.0003930750160345e-05, "loss": 0.1105, "step": 41168 }, { "epoch": 0.7342952948310919, "grad_norm": 0.25865909457206726, "learning_rate": 1.0002685389646696e-05, "loss": 0.134, "step": 41169 }, { "epoch": 0.7343131309528056, "grad_norm": 0.23558352887630463, "learning_rate": 1.000144008726783e-05, "loss": 0.1048, "step": 41170 }, { "epoch": 0.7343309670745193, "grad_norm": 0.32705268263816833, "learning_rate": 1.000019484302856e-05, "loss": 0.1845, "step": 41171 }, { "epoch": 0.734348803196233, "grad_norm": 0.40526434779167175, "learning_rate": 9.998949656933718e-06, "loss": 0.1598, "step": 41172 }, { "epoch": 0.7343666393179467, "grad_norm": 0.2830185294151306, "learning_rate": 9.997704528988117e-06, "loss": 0.1132, "step": 41173 }, { "epoch": 0.7343844754396605, "grad_norm": 0.2461305558681488, "learning_rate": 9.996459459196606e-06, "loss": 0.1153, "step": 41174 }, { "epoch": 0.7344023115613741, "grad_norm": 0.23248641192913055, "learning_rate": 9.995214447564e-06, "loss": 0.114, "step": 41175 }, { "epoch": 0.7344201476830878, "grad_norm": 0.42728564143180847, "learning_rate": 9.993969494095123e-06, "loss": 0.1718, "step": 41176 }, { "epoch": 0.7344379838048015, "grad_norm": 0.31020474433898926, "learning_rate": 9.992724598794794e-06, "loss": 0.1451, "step": 41177 }, { "epoch": 0.7344558199265152, "grad_norm": 0.21588747203350067, "learning_rate": 9.99147976166786e-06, "loss": 0.1217, "step": 41178 }, { "epoch": 0.7344736560482289, "grad_norm": 0.29261553287506104, "learning_rate": 9.990234982719124e-06, "loss": 0.116, "step": 41179 }, { "epoch": 0.7344914921699426, "grad_norm": 0.33601871132850647, "learning_rate": 9.98899026195343e-06, "loss": 0.1776, "step": 41180 }, { "epoch": 0.7345093282916563, "grad_norm": 0.4971127212047577, "learning_rate": 9.987745599375595e-06, "loss": 0.1256, "step": 41181 }, { "epoch": 0.73452716441337, "grad_norm": 0.37492135167121887, "learning_rate": 9.986500994990433e-06, "loss": 0.1535, "step": 41182 }, { "epoch": 0.7345450005350836, "grad_norm": 0.34187525510787964, "learning_rate": 9.985256448802788e-06, "loss": 0.121, "step": 41183 }, { "epoch": 0.7345628366567973, "grad_norm": 0.22877824306488037, "learning_rate": 9.984011960817475e-06, "loss": 0.1085, "step": 41184 }, { "epoch": 0.734580672778511, "grad_norm": 0.2493501454591751, "learning_rate": 9.982767531039319e-06, "loss": 0.0967, "step": 41185 }, { "epoch": 0.7345985089002247, "grad_norm": 0.26916593313217163, "learning_rate": 9.981523159473133e-06, "loss": 0.1105, "step": 41186 }, { "epoch": 0.7346163450219384, "grad_norm": 0.26236462593078613, "learning_rate": 9.980278846123759e-06, "loss": 0.1087, "step": 41187 }, { "epoch": 0.7346341811436521, "grad_norm": 0.29075196385383606, "learning_rate": 9.97903459099601e-06, "loss": 0.1085, "step": 41188 }, { "epoch": 0.7346520172653658, "grad_norm": 0.25500744581222534, "learning_rate": 9.977790394094715e-06, "loss": 0.1043, "step": 41189 }, { "epoch": 0.7346698533870795, "grad_norm": 0.2520645558834076, "learning_rate": 9.976546255424682e-06, "loss": 0.1459, "step": 41190 }, { "epoch": 0.7346876895087933, "grad_norm": 0.24114584922790527, "learning_rate": 9.975302174990755e-06, "loss": 0.1004, "step": 41191 }, { "epoch": 0.734705525630507, "grad_norm": 0.3239428400993347, "learning_rate": 9.974058152797735e-06, "loss": 0.1165, "step": 41192 }, { "epoch": 0.7347233617522206, "grad_norm": 0.3203233480453491, "learning_rate": 9.972814188850465e-06, "loss": 0.1493, "step": 41193 }, { "epoch": 0.7347411978739343, "grad_norm": 0.263615220785141, "learning_rate": 9.971570283153759e-06, "loss": 0.1275, "step": 41194 }, { "epoch": 0.734759033995648, "grad_norm": 0.2871703505516052, "learning_rate": 9.970326435712426e-06, "loss": 0.1181, "step": 41195 }, { "epoch": 0.7347768701173617, "grad_norm": 0.34165987372398376, "learning_rate": 9.96908264653131e-06, "loss": 0.0979, "step": 41196 }, { "epoch": 0.7347947062390754, "grad_norm": 0.28180333971977234, "learning_rate": 9.967838915615218e-06, "loss": 0.0749, "step": 41197 }, { "epoch": 0.7348125423607891, "grad_norm": 0.36381253600120544, "learning_rate": 9.966595242968978e-06, "loss": 0.1456, "step": 41198 }, { "epoch": 0.7348303784825028, "grad_norm": 0.3145157992839813, "learning_rate": 9.965351628597398e-06, "loss": 0.1065, "step": 41199 }, { "epoch": 0.7348482146042165, "grad_norm": 0.2439817190170288, "learning_rate": 9.964108072505316e-06, "loss": 0.1636, "step": 41200 }, { "epoch": 0.7348660507259301, "grad_norm": 0.17858165502548218, "learning_rate": 9.962864574697542e-06, "loss": 0.1137, "step": 41201 }, { "epoch": 0.7348838868476438, "grad_norm": 0.18529586493968964, "learning_rate": 9.9616211351789e-06, "loss": 0.0369, "step": 41202 }, { "epoch": 0.7349017229693575, "grad_norm": 0.256927490234375, "learning_rate": 9.960377753954208e-06, "loss": 0.0958, "step": 41203 }, { "epoch": 0.7349195590910712, "grad_norm": 0.232254296541214, "learning_rate": 9.959134431028281e-06, "loss": 0.0878, "step": 41204 }, { "epoch": 0.7349373952127849, "grad_norm": 0.2753954231739044, "learning_rate": 9.95789116640595e-06, "loss": 0.1557, "step": 41205 }, { "epoch": 0.7349552313344986, "grad_norm": 0.2848125100135803, "learning_rate": 9.956647960092019e-06, "loss": 0.1579, "step": 41206 }, { "epoch": 0.7349730674562123, "grad_norm": 0.30386704206466675, "learning_rate": 9.955404812091324e-06, "loss": 0.114, "step": 41207 }, { "epoch": 0.7349909035779261, "grad_norm": 0.3053191006183624, "learning_rate": 9.95416172240867e-06, "loss": 0.1416, "step": 41208 }, { "epoch": 0.7350087396996398, "grad_norm": 0.24196003377437592, "learning_rate": 9.952918691048891e-06, "loss": 0.0772, "step": 41209 }, { "epoch": 0.7350265758213534, "grad_norm": 0.25430259108543396, "learning_rate": 9.951675718016792e-06, "loss": 0.0858, "step": 41210 }, { "epoch": 0.7350444119430671, "grad_norm": 0.31533169746398926, "learning_rate": 9.9504328033172e-06, "loss": 0.1907, "step": 41211 }, { "epoch": 0.7350622480647808, "grad_norm": 0.2254267930984497, "learning_rate": 9.949189946954918e-06, "loss": 0.0928, "step": 41212 }, { "epoch": 0.7350800841864945, "grad_norm": 0.2716812789440155, "learning_rate": 9.947947148934783e-06, "loss": 0.0872, "step": 41213 }, { "epoch": 0.7350979203082082, "grad_norm": 0.28400975465774536, "learning_rate": 9.946704409261604e-06, "loss": 0.1271, "step": 41214 }, { "epoch": 0.7351157564299219, "grad_norm": 0.34416598081588745, "learning_rate": 9.945461727940198e-06, "loss": 0.0915, "step": 41215 }, { "epoch": 0.7351335925516356, "grad_norm": 0.3939386308193207, "learning_rate": 9.94421910497538e-06, "loss": 0.1359, "step": 41216 }, { "epoch": 0.7351514286733493, "grad_norm": 0.21759319305419922, "learning_rate": 9.942976540371962e-06, "loss": 0.1305, "step": 41217 }, { "epoch": 0.735169264795063, "grad_norm": 0.2846537232398987, "learning_rate": 9.941734034134775e-06, "loss": 0.1152, "step": 41218 }, { "epoch": 0.7351871009167766, "grad_norm": 0.27095213532447815, "learning_rate": 9.940491586268621e-06, "loss": 0.1142, "step": 41219 }, { "epoch": 0.7352049370384903, "grad_norm": 0.3897612690925598, "learning_rate": 9.93924919677833e-06, "loss": 0.1112, "step": 41220 }, { "epoch": 0.735222773160204, "grad_norm": 0.36748987436294556, "learning_rate": 9.938006865668704e-06, "loss": 0.1001, "step": 41221 }, { "epoch": 0.7352406092819177, "grad_norm": 0.23544777929782867, "learning_rate": 9.936764592944572e-06, "loss": 0.084, "step": 41222 }, { "epoch": 0.7352584454036314, "grad_norm": 0.23080258071422577, "learning_rate": 9.935522378610746e-06, "loss": 0.1423, "step": 41223 }, { "epoch": 0.7352762815253451, "grad_norm": 0.21409931778907776, "learning_rate": 9.934280222672035e-06, "loss": 0.1433, "step": 41224 }, { "epoch": 0.7352941176470589, "grad_norm": 0.20853962004184723, "learning_rate": 9.933038125133252e-06, "loss": 0.0971, "step": 41225 }, { "epoch": 0.7353119537687726, "grad_norm": 0.2655574083328247, "learning_rate": 9.931796085999222e-06, "loss": 0.1062, "step": 41226 }, { "epoch": 0.7353297898904863, "grad_norm": 0.2580946385860443, "learning_rate": 9.930554105274759e-06, "loss": 0.099, "step": 41227 }, { "epoch": 0.7353476260121999, "grad_norm": 0.32619747519493103, "learning_rate": 9.929312182964671e-06, "loss": 0.1373, "step": 41228 }, { "epoch": 0.7353654621339136, "grad_norm": 0.32713520526885986, "learning_rate": 9.928070319073773e-06, "loss": 0.1593, "step": 41229 }, { "epoch": 0.7353832982556273, "grad_norm": 0.2641415297985077, "learning_rate": 9.926828513606874e-06, "loss": 0.1066, "step": 41230 }, { "epoch": 0.735401134377341, "grad_norm": 0.2015880048274994, "learning_rate": 9.9255867665688e-06, "loss": 0.1058, "step": 41231 }, { "epoch": 0.7354189704990547, "grad_norm": 0.281227707862854, "learning_rate": 9.924345077964359e-06, "loss": 0.1086, "step": 41232 }, { "epoch": 0.7354368066207684, "grad_norm": 0.5230772495269775, "learning_rate": 9.923103447798357e-06, "loss": 0.1017, "step": 41233 }, { "epoch": 0.7354546427424821, "grad_norm": 0.3981606066226959, "learning_rate": 9.92186187607561e-06, "loss": 0.1324, "step": 41234 }, { "epoch": 0.7354724788641958, "grad_norm": 0.23892194032669067, "learning_rate": 9.920620362800945e-06, "loss": 0.1277, "step": 41235 }, { "epoch": 0.7354903149859094, "grad_norm": 0.3244689404964447, "learning_rate": 9.919378907979163e-06, "loss": 0.1264, "step": 41236 }, { "epoch": 0.7355081511076231, "grad_norm": 0.3191315233707428, "learning_rate": 9.918137511615078e-06, "loss": 0.1446, "step": 41237 }, { "epoch": 0.7355259872293368, "grad_norm": 0.3100046217441559, "learning_rate": 9.916896173713492e-06, "loss": 0.1196, "step": 41238 }, { "epoch": 0.7355438233510505, "grad_norm": 0.29578694701194763, "learning_rate": 9.915654894279234e-06, "loss": 0.1501, "step": 41239 }, { "epoch": 0.7355616594727642, "grad_norm": 0.23093993961811066, "learning_rate": 9.914413673317108e-06, "loss": 0.1164, "step": 41240 }, { "epoch": 0.7355794955944779, "grad_norm": 0.25504809617996216, "learning_rate": 9.913172510831925e-06, "loss": 0.0783, "step": 41241 }, { "epoch": 0.7355973317161917, "grad_norm": 0.2667956054210663, "learning_rate": 9.911931406828495e-06, "loss": 0.1214, "step": 41242 }, { "epoch": 0.7356151678379054, "grad_norm": 0.31752878427505493, "learning_rate": 9.910690361311623e-06, "loss": 0.1053, "step": 41243 }, { "epoch": 0.7356330039596191, "grad_norm": 0.246660515666008, "learning_rate": 9.909449374286137e-06, "loss": 0.0931, "step": 41244 }, { "epoch": 0.7356508400813327, "grad_norm": 0.35422033071517944, "learning_rate": 9.908208445756834e-06, "loss": 0.1364, "step": 41245 }, { "epoch": 0.7356686762030464, "grad_norm": 0.31301769614219666, "learning_rate": 9.906967575728521e-06, "loss": 0.1116, "step": 41246 }, { "epoch": 0.7356865123247601, "grad_norm": 0.31700021028518677, "learning_rate": 9.905726764206022e-06, "loss": 0.15, "step": 41247 }, { "epoch": 0.7357043484464738, "grad_norm": 0.21303242444992065, "learning_rate": 9.904486011194131e-06, "loss": 0.0974, "step": 41248 }, { "epoch": 0.7357221845681875, "grad_norm": 0.2768758535385132, "learning_rate": 9.903245316697676e-06, "loss": 0.1695, "step": 41249 }, { "epoch": 0.7357400206899012, "grad_norm": 0.2156328409910202, "learning_rate": 9.902004680721455e-06, "loss": 0.1148, "step": 41250 }, { "epoch": 0.7357578568116149, "grad_norm": 0.3599184453487396, "learning_rate": 9.900764103270272e-06, "loss": 0.1301, "step": 41251 }, { "epoch": 0.7357756929333286, "grad_norm": 0.25073978304862976, "learning_rate": 9.899523584348947e-06, "loss": 0.1078, "step": 41252 }, { "epoch": 0.7357935290550423, "grad_norm": 0.22071605920791626, "learning_rate": 9.898283123962287e-06, "loss": 0.0655, "step": 41253 }, { "epoch": 0.7358113651767559, "grad_norm": 0.21694721281528473, "learning_rate": 9.897042722115098e-06, "loss": 0.0712, "step": 41254 }, { "epoch": 0.7358292012984696, "grad_norm": 0.27649515867233276, "learning_rate": 9.895802378812185e-06, "loss": 0.132, "step": 41255 }, { "epoch": 0.7358470374201833, "grad_norm": 0.2669910788536072, "learning_rate": 9.894562094058355e-06, "loss": 0.1205, "step": 41256 }, { "epoch": 0.735864873541897, "grad_norm": 0.32782435417175293, "learning_rate": 9.893321867858424e-06, "loss": 0.1631, "step": 41257 }, { "epoch": 0.7358827096636107, "grad_norm": 0.2619032561779022, "learning_rate": 9.892081700217199e-06, "loss": 0.0974, "step": 41258 }, { "epoch": 0.7359005457853245, "grad_norm": 0.3018103241920471, "learning_rate": 9.89084159113948e-06, "loss": 0.1293, "step": 41259 }, { "epoch": 0.7359183819070382, "grad_norm": 0.23306900262832642, "learning_rate": 9.88960154063007e-06, "loss": 0.116, "step": 41260 }, { "epoch": 0.7359362180287519, "grad_norm": 0.29921168088912964, "learning_rate": 9.888361548693781e-06, "loss": 0.1494, "step": 41261 }, { "epoch": 0.7359540541504656, "grad_norm": 0.5553643107414246, "learning_rate": 9.887121615335434e-06, "loss": 0.1846, "step": 41262 }, { "epoch": 0.7359718902721792, "grad_norm": 0.29240524768829346, "learning_rate": 9.885881740559821e-06, "loss": 0.1424, "step": 41263 }, { "epoch": 0.7359897263938929, "grad_norm": 0.29825636744499207, "learning_rate": 9.884641924371745e-06, "loss": 0.1885, "step": 41264 }, { "epoch": 0.7360075625156066, "grad_norm": 0.22996903955936432, "learning_rate": 9.883402166776023e-06, "loss": 0.1063, "step": 41265 }, { "epoch": 0.7360253986373203, "grad_norm": 0.5425612330436707, "learning_rate": 9.882162467777458e-06, "loss": 0.1794, "step": 41266 }, { "epoch": 0.736043234759034, "grad_norm": 0.2286163866519928, "learning_rate": 9.88092282738085e-06, "loss": 0.1112, "step": 41267 }, { "epoch": 0.7360610708807477, "grad_norm": 0.21221517026424408, "learning_rate": 9.87968324559101e-06, "loss": 0.1621, "step": 41268 }, { "epoch": 0.7360789070024614, "grad_norm": 0.33377403020858765, "learning_rate": 9.878443722412731e-06, "loss": 0.1457, "step": 41269 }, { "epoch": 0.7360967431241751, "grad_norm": 0.28944864869117737, "learning_rate": 9.877204257850834e-06, "loss": 0.1175, "step": 41270 }, { "epoch": 0.7361145792458887, "grad_norm": 0.21595631539821625, "learning_rate": 9.875964851910119e-06, "loss": 0.0879, "step": 41271 }, { "epoch": 0.7361324153676024, "grad_norm": 0.24051351845264435, "learning_rate": 9.874725504595386e-06, "loss": 0.0932, "step": 41272 }, { "epoch": 0.7361502514893161, "grad_norm": 0.22883793711662292, "learning_rate": 9.873486215911432e-06, "loss": 0.153, "step": 41273 }, { "epoch": 0.7361680876110298, "grad_norm": 0.35602131485939026, "learning_rate": 9.87224698586308e-06, "loss": 0.1328, "step": 41274 }, { "epoch": 0.7361859237327435, "grad_norm": 0.2803347706794739, "learning_rate": 9.871007814455113e-06, "loss": 0.0929, "step": 41275 }, { "epoch": 0.7362037598544573, "grad_norm": 0.3134915232658386, "learning_rate": 9.869768701692355e-06, "loss": 0.1371, "step": 41276 }, { "epoch": 0.736221595976171, "grad_norm": 0.2598631680011749, "learning_rate": 9.868529647579592e-06, "loss": 0.1169, "step": 41277 }, { "epoch": 0.7362394320978847, "grad_norm": 0.19712214171886444, "learning_rate": 9.86729065212164e-06, "loss": 0.0689, "step": 41278 }, { "epoch": 0.7362572682195984, "grad_norm": 0.26780885457992554, "learning_rate": 9.8660517153233e-06, "loss": 0.1358, "step": 41279 }, { "epoch": 0.736275104341312, "grad_norm": 0.264740914106369, "learning_rate": 9.864812837189366e-06, "loss": 0.1412, "step": 41280 }, { "epoch": 0.7362929404630257, "grad_norm": 0.21488817036151886, "learning_rate": 9.86357401772465e-06, "loss": 0.0913, "step": 41281 }, { "epoch": 0.7363107765847394, "grad_norm": 0.2211741805076599, "learning_rate": 9.862335256933936e-06, "loss": 0.1101, "step": 41282 }, { "epoch": 0.7363286127064531, "grad_norm": 0.18428611755371094, "learning_rate": 9.86109655482205e-06, "loss": 0.0828, "step": 41283 }, { "epoch": 0.7363464488281668, "grad_norm": 0.32277899980545044, "learning_rate": 9.859857911393783e-06, "loss": 0.141, "step": 41284 }, { "epoch": 0.7363642849498805, "grad_norm": 0.2758120596408844, "learning_rate": 9.858619326653934e-06, "loss": 0.0877, "step": 41285 }, { "epoch": 0.7363821210715942, "grad_norm": 0.26657819747924805, "learning_rate": 9.857380800607299e-06, "loss": 0.1283, "step": 41286 }, { "epoch": 0.7363999571933079, "grad_norm": 0.36549943685531616, "learning_rate": 9.856142333258695e-06, "loss": 0.0982, "step": 41287 }, { "epoch": 0.7364177933150216, "grad_norm": 0.2409859597682953, "learning_rate": 9.854903924612901e-06, "loss": 0.1204, "step": 41288 }, { "epoch": 0.7364356294367352, "grad_norm": 0.3054850995540619, "learning_rate": 9.853665574674744e-06, "loss": 0.118, "step": 41289 }, { "epoch": 0.7364534655584489, "grad_norm": 0.3015710115432739, "learning_rate": 9.852427283449001e-06, "loss": 0.1545, "step": 41290 }, { "epoch": 0.7364713016801626, "grad_norm": 0.30109190940856934, "learning_rate": 9.85118905094049e-06, "loss": 0.167, "step": 41291 }, { "epoch": 0.7364891378018764, "grad_norm": 0.2459445595741272, "learning_rate": 9.849950877154002e-06, "loss": 0.0904, "step": 41292 }, { "epoch": 0.7365069739235901, "grad_norm": 0.3391425311565399, "learning_rate": 9.848712762094337e-06, "loss": 0.129, "step": 41293 }, { "epoch": 0.7365248100453038, "grad_norm": 0.23951072990894318, "learning_rate": 9.847474705766294e-06, "loss": 0.1369, "step": 41294 }, { "epoch": 0.7365426461670175, "grad_norm": 0.32919031381607056, "learning_rate": 9.846236708174663e-06, "loss": 0.1044, "step": 41295 }, { "epoch": 0.7365604822887312, "grad_norm": 0.23353825509548187, "learning_rate": 9.844998769324265e-06, "loss": 0.1165, "step": 41296 }, { "epoch": 0.7365783184104449, "grad_norm": 0.35395652055740356, "learning_rate": 9.843760889219883e-06, "loss": 0.1528, "step": 41297 }, { "epoch": 0.7365961545321585, "grad_norm": 0.26342421770095825, "learning_rate": 9.842523067866318e-06, "loss": 0.1178, "step": 41298 }, { "epoch": 0.7366139906538722, "grad_norm": 0.3448401689529419, "learning_rate": 9.841285305268364e-06, "loss": 0.1047, "step": 41299 }, { "epoch": 0.7366318267755859, "grad_norm": 0.24445965886116028, "learning_rate": 9.840047601430829e-06, "loss": 0.1253, "step": 41300 }, { "epoch": 0.7366496628972996, "grad_norm": 0.22575442492961884, "learning_rate": 9.838809956358505e-06, "loss": 0.1364, "step": 41301 }, { "epoch": 0.7366674990190133, "grad_norm": 0.27268749475479126, "learning_rate": 9.837572370056183e-06, "loss": 0.1396, "step": 41302 }, { "epoch": 0.736685335140727, "grad_norm": 0.2491791993379593, "learning_rate": 9.836334842528677e-06, "loss": 0.1246, "step": 41303 }, { "epoch": 0.7367031712624407, "grad_norm": 0.28346988558769226, "learning_rate": 9.835097373780766e-06, "loss": 0.1428, "step": 41304 }, { "epoch": 0.7367210073841544, "grad_norm": 0.26530852913856506, "learning_rate": 9.833859963817263e-06, "loss": 0.1386, "step": 41305 }, { "epoch": 0.736738843505868, "grad_norm": 0.31037837266921997, "learning_rate": 9.832622612642956e-06, "loss": 0.1025, "step": 41306 }, { "epoch": 0.7367566796275817, "grad_norm": 0.27824413776397705, "learning_rate": 9.831385320262643e-06, "loss": 0.1625, "step": 41307 }, { "epoch": 0.7367745157492954, "grad_norm": 0.3469172418117523, "learning_rate": 9.830148086681112e-06, "loss": 0.1169, "step": 41308 }, { "epoch": 0.7367923518710092, "grad_norm": 0.30491894483566284, "learning_rate": 9.828910911903175e-06, "loss": 0.1054, "step": 41309 }, { "epoch": 0.7368101879927229, "grad_norm": 0.22883087396621704, "learning_rate": 9.827673795933618e-06, "loss": 0.0977, "step": 41310 }, { "epoch": 0.7368280241144366, "grad_norm": 0.2847735285758972, "learning_rate": 9.826436738777237e-06, "loss": 0.0708, "step": 41311 }, { "epoch": 0.7368458602361503, "grad_norm": 0.3282276690006256, "learning_rate": 9.82519974043882e-06, "loss": 0.1307, "step": 41312 }, { "epoch": 0.736863696357864, "grad_norm": 0.35099318623542786, "learning_rate": 9.823962800923181e-06, "loss": 0.1349, "step": 41313 }, { "epoch": 0.7368815324795777, "grad_norm": 0.28953787684440613, "learning_rate": 9.822725920235102e-06, "loss": 0.1509, "step": 41314 }, { "epoch": 0.7368993686012913, "grad_norm": 0.23363667726516724, "learning_rate": 9.821489098379372e-06, "loss": 0.0719, "step": 41315 }, { "epoch": 0.736917204723005, "grad_norm": 0.2640970051288605, "learning_rate": 9.8202523353608e-06, "loss": 0.1172, "step": 41316 }, { "epoch": 0.7369350408447187, "grad_norm": 0.22964787483215332, "learning_rate": 9.819015631184164e-06, "loss": 0.1352, "step": 41317 }, { "epoch": 0.7369528769664324, "grad_norm": 0.29829102754592896, "learning_rate": 9.817778985854276e-06, "loss": 0.1285, "step": 41318 }, { "epoch": 0.7369707130881461, "grad_norm": 0.27778011560440063, "learning_rate": 9.81654239937592e-06, "loss": 0.1213, "step": 41319 }, { "epoch": 0.7369885492098598, "grad_norm": 0.3028642535209656, "learning_rate": 9.815305871753891e-06, "loss": 0.0751, "step": 41320 }, { "epoch": 0.7370063853315735, "grad_norm": 0.29142358899116516, "learning_rate": 9.81406940299297e-06, "loss": 0.1571, "step": 41321 }, { "epoch": 0.7370242214532872, "grad_norm": 0.24036431312561035, "learning_rate": 9.812832993097973e-06, "loss": 0.1054, "step": 41322 }, { "epoch": 0.7370420575750009, "grad_norm": 0.31236696243286133, "learning_rate": 9.811596642073678e-06, "loss": 0.1085, "step": 41323 }, { "epoch": 0.7370598936967145, "grad_norm": 0.2720494270324707, "learning_rate": 9.810360349924883e-06, "loss": 0.1317, "step": 41324 }, { "epoch": 0.7370777298184282, "grad_norm": 0.2666541635990143, "learning_rate": 9.809124116656365e-06, "loss": 0.1219, "step": 41325 }, { "epoch": 0.737095565940142, "grad_norm": 0.19292420148849487, "learning_rate": 9.807887942272941e-06, "loss": 0.0919, "step": 41326 }, { "epoch": 0.7371134020618557, "grad_norm": 0.23057663440704346, "learning_rate": 9.80665182677939e-06, "loss": 0.0924, "step": 41327 }, { "epoch": 0.7371312381835694, "grad_norm": 0.3075406551361084, "learning_rate": 9.805415770180503e-06, "loss": 0.117, "step": 41328 }, { "epoch": 0.7371490743052831, "grad_norm": 0.3134598135948181, "learning_rate": 9.804179772481064e-06, "loss": 0.0958, "step": 41329 }, { "epoch": 0.7371669104269968, "grad_norm": 0.24785654246807098, "learning_rate": 9.802943833685881e-06, "loss": 0.1259, "step": 41330 }, { "epoch": 0.7371847465487105, "grad_norm": 0.23358578979969025, "learning_rate": 9.80170795379973e-06, "loss": 0.1315, "step": 41331 }, { "epoch": 0.7372025826704242, "grad_norm": 0.2687293291091919, "learning_rate": 9.800472132827415e-06, "loss": 0.1012, "step": 41332 }, { "epoch": 0.7372204187921378, "grad_norm": 0.23413026332855225, "learning_rate": 9.799236370773712e-06, "loss": 0.1316, "step": 41333 }, { "epoch": 0.7372382549138515, "grad_norm": 0.2845030725002289, "learning_rate": 9.798000667643426e-06, "loss": 0.1515, "step": 41334 }, { "epoch": 0.7372560910355652, "grad_norm": 0.24666526913642883, "learning_rate": 9.796765023441342e-06, "loss": 0.1009, "step": 41335 }, { "epoch": 0.7372739271572789, "grad_norm": 0.26149916648864746, "learning_rate": 9.795529438172247e-06, "loss": 0.1522, "step": 41336 }, { "epoch": 0.7372917632789926, "grad_norm": 0.310781329870224, "learning_rate": 9.794293911840932e-06, "loss": 0.1356, "step": 41337 }, { "epoch": 0.7373095994007063, "grad_norm": 0.27617835998535156, "learning_rate": 9.793058444452175e-06, "loss": 0.1151, "step": 41338 }, { "epoch": 0.73732743552242, "grad_norm": 0.2620704174041748, "learning_rate": 9.791823036010785e-06, "loss": 0.1065, "step": 41339 }, { "epoch": 0.7373452716441337, "grad_norm": 0.21987231075763702, "learning_rate": 9.790587686521544e-06, "loss": 0.1243, "step": 41340 }, { "epoch": 0.7373631077658473, "grad_norm": 0.2871493995189667, "learning_rate": 9.789352395989238e-06, "loss": 0.1247, "step": 41341 }, { "epoch": 0.737380943887561, "grad_norm": 0.19367468357086182, "learning_rate": 9.788117164418645e-06, "loss": 0.0685, "step": 41342 }, { "epoch": 0.7373987800092748, "grad_norm": 0.23209679126739502, "learning_rate": 9.786881991814572e-06, "loss": 0.0928, "step": 41343 }, { "epoch": 0.7374166161309885, "grad_norm": 0.3238326609134674, "learning_rate": 9.785646878181792e-06, "loss": 0.1184, "step": 41344 }, { "epoch": 0.7374344522527022, "grad_norm": 0.19274313747882843, "learning_rate": 9.784411823525109e-06, "loss": 0.1528, "step": 41345 }, { "epoch": 0.7374522883744159, "grad_norm": 0.3104701638221741, "learning_rate": 9.783176827849292e-06, "loss": 0.0909, "step": 41346 }, { "epoch": 0.7374701244961296, "grad_norm": 0.34914323687553406, "learning_rate": 9.781941891159143e-06, "loss": 0.143, "step": 41347 }, { "epoch": 0.7374879606178433, "grad_norm": 0.19293087720870972, "learning_rate": 9.780707013459447e-06, "loss": 0.1033, "step": 41348 }, { "epoch": 0.737505796739557, "grad_norm": 0.28701385855674744, "learning_rate": 9.779472194754986e-06, "loss": 0.109, "step": 41349 }, { "epoch": 0.7375236328612707, "grad_norm": 0.38134685158729553, "learning_rate": 9.778237435050547e-06, "loss": 0.151, "step": 41350 }, { "epoch": 0.7375414689829843, "grad_norm": 0.2430017739534378, "learning_rate": 9.77700273435091e-06, "loss": 0.1528, "step": 41351 }, { "epoch": 0.737559305104698, "grad_norm": 0.31125009059906006, "learning_rate": 9.775768092660876e-06, "loss": 0.1459, "step": 41352 }, { "epoch": 0.7375771412264117, "grad_norm": 0.2849879562854767, "learning_rate": 9.77453350998522e-06, "loss": 0.1533, "step": 41353 }, { "epoch": 0.7375949773481254, "grad_norm": 0.2242075651884079, "learning_rate": 9.773298986328735e-06, "loss": 0.0902, "step": 41354 }, { "epoch": 0.7376128134698391, "grad_norm": 0.17803502082824707, "learning_rate": 9.772064521696192e-06, "loss": 0.0798, "step": 41355 }, { "epoch": 0.7376306495915528, "grad_norm": 0.28858867287635803, "learning_rate": 9.770830116092398e-06, "loss": 0.129, "step": 41356 }, { "epoch": 0.7376484857132665, "grad_norm": 0.5552446246147156, "learning_rate": 9.769595769522121e-06, "loss": 0.159, "step": 41357 }, { "epoch": 0.7376663218349802, "grad_norm": 0.26760727167129517, "learning_rate": 9.76836148199015e-06, "loss": 0.1145, "step": 41358 }, { "epoch": 0.7376841579566938, "grad_norm": 0.3317415118217468, "learning_rate": 9.767127253501273e-06, "loss": 0.1462, "step": 41359 }, { "epoch": 0.7377019940784076, "grad_norm": 0.2822422981262207, "learning_rate": 9.765893084060265e-06, "loss": 0.1271, "step": 41360 }, { "epoch": 0.7377198302001213, "grad_norm": 0.3395298719406128, "learning_rate": 9.764658973671925e-06, "loss": 0.1073, "step": 41361 }, { "epoch": 0.737737666321835, "grad_norm": 0.22872154414653778, "learning_rate": 9.76342492234103e-06, "loss": 0.1116, "step": 41362 }, { "epoch": 0.7377555024435487, "grad_norm": 0.22709661722183228, "learning_rate": 9.762190930072365e-06, "loss": 0.0925, "step": 41363 }, { "epoch": 0.7377733385652624, "grad_norm": 0.27168264985084534, "learning_rate": 9.760956996870699e-06, "loss": 0.1351, "step": 41364 }, { "epoch": 0.7377911746869761, "grad_norm": 0.2419309765100479, "learning_rate": 9.759723122740836e-06, "loss": 0.1066, "step": 41365 }, { "epoch": 0.7378090108086898, "grad_norm": 0.24501577019691467, "learning_rate": 9.758489307687552e-06, "loss": 0.1011, "step": 41366 }, { "epoch": 0.7378268469304035, "grad_norm": 0.3001067340373993, "learning_rate": 9.757255551715624e-06, "loss": 0.1522, "step": 41367 }, { "epoch": 0.7378446830521171, "grad_norm": 0.21468831598758698, "learning_rate": 9.756021854829833e-06, "loss": 0.0719, "step": 41368 }, { "epoch": 0.7378625191738308, "grad_norm": 0.3272707760334015, "learning_rate": 9.754788217034975e-06, "loss": 0.1008, "step": 41369 }, { "epoch": 0.7378803552955445, "grad_norm": 0.3034740388393402, "learning_rate": 9.753554638335822e-06, "loss": 0.1375, "step": 41370 }, { "epoch": 0.7378981914172582, "grad_norm": 0.36299192905426025, "learning_rate": 9.752321118737149e-06, "loss": 0.154, "step": 41371 }, { "epoch": 0.7379160275389719, "grad_norm": 0.2829195261001587, "learning_rate": 9.751087658243754e-06, "loss": 0.1302, "step": 41372 }, { "epoch": 0.7379338636606856, "grad_norm": 0.37660980224609375, "learning_rate": 9.749854256860399e-06, "loss": 0.1014, "step": 41373 }, { "epoch": 0.7379516997823993, "grad_norm": 0.26894327998161316, "learning_rate": 9.748620914591888e-06, "loss": 0.1102, "step": 41374 }, { "epoch": 0.737969535904113, "grad_norm": 0.28775569796562195, "learning_rate": 9.747387631442991e-06, "loss": 0.1566, "step": 41375 }, { "epoch": 0.7379873720258266, "grad_norm": 0.23961712419986725, "learning_rate": 9.746154407418486e-06, "loss": 0.1321, "step": 41376 }, { "epoch": 0.7380052081475404, "grad_norm": 0.26509279012680054, "learning_rate": 9.744921242523144e-06, "loss": 0.135, "step": 41377 }, { "epoch": 0.7380230442692541, "grad_norm": 0.3258817791938782, "learning_rate": 9.743688136761769e-06, "loss": 0.143, "step": 41378 }, { "epoch": 0.7380408803909678, "grad_norm": 0.40079614520072937, "learning_rate": 9.742455090139128e-06, "loss": 0.1376, "step": 41379 }, { "epoch": 0.7380587165126815, "grad_norm": 0.3613516688346863, "learning_rate": 9.741222102659998e-06, "loss": 0.1219, "step": 41380 }, { "epoch": 0.7380765526343952, "grad_norm": 0.24138766527175903, "learning_rate": 9.739989174329153e-06, "loss": 0.1228, "step": 41381 }, { "epoch": 0.7380943887561089, "grad_norm": 0.24906961619853973, "learning_rate": 9.738756305151392e-06, "loss": 0.0866, "step": 41382 }, { "epoch": 0.7381122248778226, "grad_norm": 0.26027509570121765, "learning_rate": 9.737523495131479e-06, "loss": 0.1036, "step": 41383 }, { "epoch": 0.7381300609995363, "grad_norm": 0.2520490288734436, "learning_rate": 9.736290744274198e-06, "loss": 0.0623, "step": 41384 }, { "epoch": 0.73814789712125, "grad_norm": 0.21440115571022034, "learning_rate": 9.735058052584316e-06, "loss": 0.1032, "step": 41385 }, { "epoch": 0.7381657332429636, "grad_norm": 0.23680466413497925, "learning_rate": 9.733825420066634e-06, "loss": 0.1249, "step": 41386 }, { "epoch": 0.7381835693646773, "grad_norm": 0.13513095676898956, "learning_rate": 9.732592846725905e-06, "loss": 0.0483, "step": 41387 }, { "epoch": 0.738201405486391, "grad_norm": 0.28293490409851074, "learning_rate": 9.73136033256693e-06, "loss": 0.1358, "step": 41388 }, { "epoch": 0.7382192416081047, "grad_norm": 0.24058249592781067, "learning_rate": 9.730127877594474e-06, "loss": 0.0958, "step": 41389 }, { "epoch": 0.7382370777298184, "grad_norm": 0.24740611016750336, "learning_rate": 9.72889548181331e-06, "loss": 0.0722, "step": 41390 }, { "epoch": 0.7382549138515321, "grad_norm": 0.27616000175476074, "learning_rate": 9.727663145228231e-06, "loss": 0.0822, "step": 41391 }, { "epoch": 0.7382727499732458, "grad_norm": 0.2812407612800598, "learning_rate": 9.726430867844002e-06, "loss": 0.1247, "step": 41392 }, { "epoch": 0.7382905860949596, "grad_norm": 0.23269103467464447, "learning_rate": 9.725198649665401e-06, "loss": 0.1277, "step": 41393 }, { "epoch": 0.7383084222166733, "grad_norm": 0.25381720066070557, "learning_rate": 9.7239664906972e-06, "loss": 0.0819, "step": 41394 }, { "epoch": 0.7383262583383869, "grad_norm": 0.318613737821579, "learning_rate": 9.722734390944188e-06, "loss": 0.1469, "step": 41395 }, { "epoch": 0.7383440944601006, "grad_norm": 0.32372286915779114, "learning_rate": 9.721502350411136e-06, "loss": 0.1107, "step": 41396 }, { "epoch": 0.7383619305818143, "grad_norm": 0.19990098476409912, "learning_rate": 9.720270369102818e-06, "loss": 0.0929, "step": 41397 }, { "epoch": 0.738379766703528, "grad_norm": 0.24862739443778992, "learning_rate": 9.719038447023998e-06, "loss": 0.1186, "step": 41398 }, { "epoch": 0.7383976028252417, "grad_norm": 0.3199597895145416, "learning_rate": 9.717806584179472e-06, "loss": 0.1244, "step": 41399 }, { "epoch": 0.7384154389469554, "grad_norm": 0.28124749660491943, "learning_rate": 9.716574780574e-06, "loss": 0.096, "step": 41400 }, { "epoch": 0.7384332750686691, "grad_norm": 0.21386314928531647, "learning_rate": 9.71534303621237e-06, "loss": 0.0881, "step": 41401 }, { "epoch": 0.7384511111903828, "grad_norm": 0.22377094626426697, "learning_rate": 9.71411135109935e-06, "loss": 0.1298, "step": 41402 }, { "epoch": 0.7384689473120964, "grad_norm": 0.1951315551996231, "learning_rate": 9.712879725239703e-06, "loss": 0.0817, "step": 41403 }, { "epoch": 0.7384867834338101, "grad_norm": 0.27208980917930603, "learning_rate": 9.711648158638229e-06, "loss": 0.1007, "step": 41404 }, { "epoch": 0.7385046195555238, "grad_norm": 0.3466946482658386, "learning_rate": 9.710416651299681e-06, "loss": 0.1774, "step": 41405 }, { "epoch": 0.7385224556772375, "grad_norm": 0.2540076971054077, "learning_rate": 9.709185203228841e-06, "loss": 0.2065, "step": 41406 }, { "epoch": 0.7385402917989512, "grad_norm": 0.23163831233978271, "learning_rate": 9.70795381443047e-06, "loss": 0.1031, "step": 41407 }, { "epoch": 0.7385581279206649, "grad_norm": 0.2710544764995575, "learning_rate": 9.706722484909364e-06, "loss": 0.1548, "step": 41408 }, { "epoch": 0.7385759640423786, "grad_norm": 0.5077825784683228, "learning_rate": 9.705491214670281e-06, "loss": 0.1196, "step": 41409 }, { "epoch": 0.7385938001640924, "grad_norm": 0.27410173416137695, "learning_rate": 9.704260003717999e-06, "loss": 0.0725, "step": 41410 }, { "epoch": 0.7386116362858061, "grad_norm": 0.20637863874435425, "learning_rate": 9.703028852057277e-06, "loss": 0.0807, "step": 41411 }, { "epoch": 0.7386294724075197, "grad_norm": 0.3210963308811188, "learning_rate": 9.701797759692908e-06, "loss": 0.1167, "step": 41412 }, { "epoch": 0.7386473085292334, "grad_norm": 0.3024260997772217, "learning_rate": 9.700566726629657e-06, "loss": 0.1476, "step": 41413 }, { "epoch": 0.7386651446509471, "grad_norm": 0.2640106976032257, "learning_rate": 9.69933575287228e-06, "loss": 0.1031, "step": 41414 }, { "epoch": 0.7386829807726608, "grad_norm": 0.307650089263916, "learning_rate": 9.698104838425575e-06, "loss": 0.1178, "step": 41415 }, { "epoch": 0.7387008168943745, "grad_norm": 0.2609955966472626, "learning_rate": 9.696873983294292e-06, "loss": 0.145, "step": 41416 }, { "epoch": 0.7387186530160882, "grad_norm": 0.2551628351211548, "learning_rate": 9.695643187483216e-06, "loss": 0.0918, "step": 41417 }, { "epoch": 0.7387364891378019, "grad_norm": 0.20740343630313873, "learning_rate": 9.694412450997117e-06, "loss": 0.0547, "step": 41418 }, { "epoch": 0.7387543252595156, "grad_norm": 0.2992290258407593, "learning_rate": 9.69318177384076e-06, "loss": 0.1215, "step": 41419 }, { "epoch": 0.7387721613812293, "grad_norm": 0.2691270411014557, "learning_rate": 9.691951156018907e-06, "loss": 0.1097, "step": 41420 }, { "epoch": 0.7387899975029429, "grad_norm": 0.3057255148887634, "learning_rate": 9.690720597536352e-06, "loss": 0.1064, "step": 41421 }, { "epoch": 0.7388078336246566, "grad_norm": 0.31872445344924927, "learning_rate": 9.689490098397846e-06, "loss": 0.092, "step": 41422 }, { "epoch": 0.7388256697463703, "grad_norm": 0.3267669379711151, "learning_rate": 9.688259658608167e-06, "loss": 0.0895, "step": 41423 }, { "epoch": 0.738843505868084, "grad_norm": 0.2612524926662445, "learning_rate": 9.687029278172074e-06, "loss": 0.1347, "step": 41424 }, { "epoch": 0.7388613419897977, "grad_norm": 0.3430282473564148, "learning_rate": 9.685798957094353e-06, "loss": 0.0975, "step": 41425 }, { "epoch": 0.7388791781115114, "grad_norm": 0.28120988607406616, "learning_rate": 9.684568695379765e-06, "loss": 0.0962, "step": 41426 }, { "epoch": 0.7388970142332252, "grad_norm": 0.2897935211658478, "learning_rate": 9.683338493033067e-06, "loss": 0.0899, "step": 41427 }, { "epoch": 0.7389148503549389, "grad_norm": 0.19540317356586456, "learning_rate": 9.682108350059053e-06, "loss": 0.103, "step": 41428 }, { "epoch": 0.7389326864766526, "grad_norm": 0.3540865182876587, "learning_rate": 9.680878266462464e-06, "loss": 0.1765, "step": 41429 }, { "epoch": 0.7389505225983662, "grad_norm": 0.3027437627315521, "learning_rate": 9.679648242248093e-06, "loss": 0.1468, "step": 41430 }, { "epoch": 0.7389683587200799, "grad_norm": 0.31158390641212463, "learning_rate": 9.678418277420695e-06, "loss": 0.1179, "step": 41431 }, { "epoch": 0.7389861948417936, "grad_norm": 0.2667568325996399, "learning_rate": 9.677188371985044e-06, "loss": 0.152, "step": 41432 }, { "epoch": 0.7390040309635073, "grad_norm": 0.372336208820343, "learning_rate": 9.675958525945891e-06, "loss": 0.1307, "step": 41433 }, { "epoch": 0.739021867085221, "grad_norm": 0.24716825783252716, "learning_rate": 9.674728739308023e-06, "loss": 0.1088, "step": 41434 }, { "epoch": 0.7390397032069347, "grad_norm": 0.2810577154159546, "learning_rate": 9.673499012076202e-06, "loss": 0.1214, "step": 41435 }, { "epoch": 0.7390575393286484, "grad_norm": 0.24963566660881042, "learning_rate": 9.672269344255192e-06, "loss": 0.1197, "step": 41436 }, { "epoch": 0.7390753754503621, "grad_norm": 0.33228957653045654, "learning_rate": 9.671039735849752e-06, "loss": 0.1766, "step": 41437 }, { "epoch": 0.7390932115720757, "grad_norm": 0.3481631577014923, "learning_rate": 9.669810186864664e-06, "loss": 0.1124, "step": 41438 }, { "epoch": 0.7391110476937894, "grad_norm": 0.32490450143814087, "learning_rate": 9.668580697304686e-06, "loss": 0.0922, "step": 41439 }, { "epoch": 0.7391288838155031, "grad_norm": 0.25999683141708374, "learning_rate": 9.667351267174584e-06, "loss": 0.2062, "step": 41440 }, { "epoch": 0.7391467199372168, "grad_norm": 0.3724503815174103, "learning_rate": 9.666121896479119e-06, "loss": 0.1238, "step": 41441 }, { "epoch": 0.7391645560589305, "grad_norm": 0.2098606377840042, "learning_rate": 9.664892585223059e-06, "loss": 0.07, "step": 41442 }, { "epoch": 0.7391823921806442, "grad_norm": 0.21738047897815704, "learning_rate": 9.66366333341118e-06, "loss": 0.1037, "step": 41443 }, { "epoch": 0.739200228302358, "grad_norm": 0.2584335505962372, "learning_rate": 9.66243414104824e-06, "loss": 0.1337, "step": 41444 }, { "epoch": 0.7392180644240717, "grad_norm": 0.2703591287136078, "learning_rate": 9.661205008139004e-06, "loss": 0.1079, "step": 41445 }, { "epoch": 0.7392359005457854, "grad_norm": 0.28691044449806213, "learning_rate": 9.659975934688225e-06, "loss": 0.0983, "step": 41446 }, { "epoch": 0.739253736667499, "grad_norm": 0.3057778477668762, "learning_rate": 9.658746920700687e-06, "loss": 0.1004, "step": 41447 }, { "epoch": 0.7392715727892127, "grad_norm": 0.26334884762763977, "learning_rate": 9.657517966181145e-06, "loss": 0.1571, "step": 41448 }, { "epoch": 0.7392894089109264, "grad_norm": 0.3140193521976471, "learning_rate": 9.656289071134361e-06, "loss": 0.1536, "step": 41449 }, { "epoch": 0.7393072450326401, "grad_norm": 0.2501097619533539, "learning_rate": 9.655060235565091e-06, "loss": 0.1178, "step": 41450 }, { "epoch": 0.7393250811543538, "grad_norm": 0.24283044040203094, "learning_rate": 9.653831459478118e-06, "loss": 0.1108, "step": 41451 }, { "epoch": 0.7393429172760675, "grad_norm": 0.20097772777080536, "learning_rate": 9.652602742878195e-06, "loss": 0.1354, "step": 41452 }, { "epoch": 0.7393607533977812, "grad_norm": 0.26745274662971497, "learning_rate": 9.651374085770081e-06, "loss": 0.1005, "step": 41453 }, { "epoch": 0.7393785895194949, "grad_norm": 0.30940866470336914, "learning_rate": 9.650145488158537e-06, "loss": 0.0985, "step": 41454 }, { "epoch": 0.7393964256412086, "grad_norm": 0.2447596937417984, "learning_rate": 9.64891695004834e-06, "loss": 0.1198, "step": 41455 }, { "epoch": 0.7394142617629222, "grad_norm": 0.22476989030838013, "learning_rate": 9.647688471444233e-06, "loss": 0.1109, "step": 41456 }, { "epoch": 0.7394320978846359, "grad_norm": 0.38907331228256226, "learning_rate": 9.646460052350994e-06, "loss": 0.1189, "step": 41457 }, { "epoch": 0.7394499340063496, "grad_norm": 0.22350060939788818, "learning_rate": 9.645231692773382e-06, "loss": 0.0924, "step": 41458 }, { "epoch": 0.7394677701280633, "grad_norm": 0.24202854931354523, "learning_rate": 9.644003392716148e-06, "loss": 0.1164, "step": 41459 }, { "epoch": 0.739485606249777, "grad_norm": 0.24680662155151367, "learning_rate": 9.642775152184067e-06, "loss": 0.1084, "step": 41460 }, { "epoch": 0.7395034423714908, "grad_norm": 0.34029340744018555, "learning_rate": 9.641546971181894e-06, "loss": 0.1432, "step": 41461 }, { "epoch": 0.7395212784932045, "grad_norm": 0.2243356704711914, "learning_rate": 9.640318849714388e-06, "loss": 0.0816, "step": 41462 }, { "epoch": 0.7395391146149182, "grad_norm": 0.265844464302063, "learning_rate": 9.639090787786306e-06, "loss": 0.0997, "step": 41463 }, { "epoch": 0.7395569507366319, "grad_norm": 0.2882643938064575, "learning_rate": 9.63786278540242e-06, "loss": 0.0959, "step": 41464 }, { "epoch": 0.7395747868583455, "grad_norm": 0.35611018538475037, "learning_rate": 9.636634842567486e-06, "loss": 0.1454, "step": 41465 }, { "epoch": 0.7395926229800592, "grad_norm": 0.26879075169563293, "learning_rate": 9.635406959286259e-06, "loss": 0.1022, "step": 41466 }, { "epoch": 0.7396104591017729, "grad_norm": 0.3092162311077118, "learning_rate": 9.6341791355635e-06, "loss": 0.1094, "step": 41467 }, { "epoch": 0.7396282952234866, "grad_norm": 0.29538553953170776, "learning_rate": 9.632951371403964e-06, "loss": 0.1378, "step": 41468 }, { "epoch": 0.7396461313452003, "grad_norm": 0.2055298388004303, "learning_rate": 9.631723666812418e-06, "loss": 0.0861, "step": 41469 }, { "epoch": 0.739663967466914, "grad_norm": 0.3825426995754242, "learning_rate": 9.630496021793622e-06, "loss": 0.1674, "step": 41470 }, { "epoch": 0.7396818035886277, "grad_norm": 0.26604917645454407, "learning_rate": 9.629268436352337e-06, "loss": 0.1288, "step": 41471 }, { "epoch": 0.7396996397103414, "grad_norm": 0.2953208386898041, "learning_rate": 9.628040910493306e-06, "loss": 0.1065, "step": 41472 }, { "epoch": 0.739717475832055, "grad_norm": 0.3289187252521515, "learning_rate": 9.626813444221306e-06, "loss": 0.1117, "step": 41473 }, { "epoch": 0.7397353119537687, "grad_norm": 0.38006505370140076, "learning_rate": 9.625586037541088e-06, "loss": 0.0758, "step": 41474 }, { "epoch": 0.7397531480754824, "grad_norm": 0.18941110372543335, "learning_rate": 9.624358690457408e-06, "loss": 0.1157, "step": 41475 }, { "epoch": 0.7397709841971961, "grad_norm": 0.3121732473373413, "learning_rate": 9.623131402975014e-06, "loss": 0.1247, "step": 41476 }, { "epoch": 0.7397888203189098, "grad_norm": 0.28172236680984497, "learning_rate": 9.621904175098684e-06, "loss": 0.1376, "step": 41477 }, { "epoch": 0.7398066564406236, "grad_norm": 0.31437012553215027, "learning_rate": 9.620677006833165e-06, "loss": 0.0999, "step": 41478 }, { "epoch": 0.7398244925623373, "grad_norm": 0.26400426030158997, "learning_rate": 9.619449898183214e-06, "loss": 0.1017, "step": 41479 }, { "epoch": 0.739842328684051, "grad_norm": 0.297380656003952, "learning_rate": 9.618222849153585e-06, "loss": 0.1542, "step": 41480 }, { "epoch": 0.7398601648057647, "grad_norm": 0.2452584058046341, "learning_rate": 9.616995859749032e-06, "loss": 0.1185, "step": 41481 }, { "epoch": 0.7398780009274784, "grad_norm": 0.25024178624153137, "learning_rate": 9.615768929974323e-06, "loss": 0.1063, "step": 41482 }, { "epoch": 0.739895837049192, "grad_norm": 0.2582530677318573, "learning_rate": 9.614542059834198e-06, "loss": 0.1515, "step": 41483 }, { "epoch": 0.7399136731709057, "grad_norm": 0.24570819735527039, "learning_rate": 9.613315249333432e-06, "loss": 0.131, "step": 41484 }, { "epoch": 0.7399315092926194, "grad_norm": 0.2790364623069763, "learning_rate": 9.61208849847676e-06, "loss": 0.1227, "step": 41485 }, { "epoch": 0.7399493454143331, "grad_norm": 0.2984522879123688, "learning_rate": 9.610861807268956e-06, "loss": 0.1595, "step": 41486 }, { "epoch": 0.7399671815360468, "grad_norm": 0.27627497911453247, "learning_rate": 9.60963517571477e-06, "loss": 0.078, "step": 41487 }, { "epoch": 0.7399850176577605, "grad_norm": 0.172231525182724, "learning_rate": 9.608408603818952e-06, "loss": 0.1011, "step": 41488 }, { "epoch": 0.7400028537794742, "grad_norm": 0.2303430438041687, "learning_rate": 9.60718209158625e-06, "loss": 0.0972, "step": 41489 }, { "epoch": 0.7400206899011879, "grad_norm": 0.3887530565261841, "learning_rate": 9.605955639021433e-06, "loss": 0.1544, "step": 41490 }, { "epoch": 0.7400385260229015, "grad_norm": 0.2966137230396271, "learning_rate": 9.604729246129252e-06, "loss": 0.096, "step": 41491 }, { "epoch": 0.7400563621446152, "grad_norm": 0.2255358248949051, "learning_rate": 9.603502912914456e-06, "loss": 0.1113, "step": 41492 }, { "epoch": 0.7400741982663289, "grad_norm": 0.2501656115055084, "learning_rate": 9.602276639381803e-06, "loss": 0.1431, "step": 41493 }, { "epoch": 0.7400920343880427, "grad_norm": 0.28245556354522705, "learning_rate": 9.601050425536032e-06, "loss": 0.1247, "step": 41494 }, { "epoch": 0.7401098705097564, "grad_norm": 0.33549466729164124, "learning_rate": 9.599824271381919e-06, "loss": 0.1086, "step": 41495 }, { "epoch": 0.7401277066314701, "grad_norm": 0.2642711400985718, "learning_rate": 9.598598176924206e-06, "loss": 0.1752, "step": 41496 }, { "epoch": 0.7401455427531838, "grad_norm": 0.22594918310642242, "learning_rate": 9.597372142167635e-06, "loss": 0.1434, "step": 41497 }, { "epoch": 0.7401633788748975, "grad_norm": 0.2276662290096283, "learning_rate": 9.596146167116973e-06, "loss": 0.1655, "step": 41498 }, { "epoch": 0.7401812149966112, "grad_norm": 0.29121944308280945, "learning_rate": 9.594920251776976e-06, "loss": 0.1546, "step": 41499 }, { "epoch": 0.7401990511183248, "grad_norm": 0.2935793697834015, "learning_rate": 9.593694396152386e-06, "loss": 0.0946, "step": 41500 }, { "epoch": 0.7402168872400385, "grad_norm": 0.23710060119628906, "learning_rate": 9.592468600247961e-06, "loss": 0.136, "step": 41501 }, { "epoch": 0.7402347233617522, "grad_norm": 0.290985643863678, "learning_rate": 9.591242864068439e-06, "loss": 0.1351, "step": 41502 }, { "epoch": 0.7402525594834659, "grad_norm": 0.2703031003475189, "learning_rate": 9.590017187618592e-06, "loss": 0.1278, "step": 41503 }, { "epoch": 0.7402703956051796, "grad_norm": 0.25689277052879333, "learning_rate": 9.58879157090316e-06, "loss": 0.0972, "step": 41504 }, { "epoch": 0.7402882317268933, "grad_norm": 0.24157381057739258, "learning_rate": 9.587566013926897e-06, "loss": 0.1557, "step": 41505 }, { "epoch": 0.740306067848607, "grad_norm": 0.19293861091136932, "learning_rate": 9.586340516694548e-06, "loss": 0.1046, "step": 41506 }, { "epoch": 0.7403239039703207, "grad_norm": 0.2195795327425003, "learning_rate": 9.585115079210857e-06, "loss": 0.1099, "step": 41507 }, { "epoch": 0.7403417400920344, "grad_norm": 0.2531551420688629, "learning_rate": 9.583889701480598e-06, "loss": 0.1054, "step": 41508 }, { "epoch": 0.740359576213748, "grad_norm": 0.29554933309555054, "learning_rate": 9.582664383508503e-06, "loss": 0.1394, "step": 41509 }, { "epoch": 0.7403774123354617, "grad_norm": 0.32142654061317444, "learning_rate": 9.581439125299318e-06, "loss": 0.112, "step": 41510 }, { "epoch": 0.7403952484571755, "grad_norm": 0.2822263836860657, "learning_rate": 9.580213926857809e-06, "loss": 0.1423, "step": 41511 }, { "epoch": 0.7404130845788892, "grad_norm": 0.34038910269737244, "learning_rate": 9.57898878818871e-06, "loss": 0.1379, "step": 41512 }, { "epoch": 0.7404309207006029, "grad_norm": 0.2618231475353241, "learning_rate": 9.577763709296783e-06, "loss": 0.0961, "step": 41513 }, { "epoch": 0.7404487568223166, "grad_norm": 0.21803507208824158, "learning_rate": 9.57653869018677e-06, "loss": 0.1146, "step": 41514 }, { "epoch": 0.7404665929440303, "grad_norm": 0.2623039186000824, "learning_rate": 9.575313730863414e-06, "loss": 0.111, "step": 41515 }, { "epoch": 0.740484429065744, "grad_norm": 0.5876278281211853, "learning_rate": 9.574088831331476e-06, "loss": 0.165, "step": 41516 }, { "epoch": 0.7405022651874577, "grad_norm": 0.25911587476730347, "learning_rate": 9.5728639915957e-06, "loss": 0.0668, "step": 41517 }, { "epoch": 0.7405201013091713, "grad_norm": 0.2928988039493561, "learning_rate": 9.57163921166083e-06, "loss": 0.0939, "step": 41518 }, { "epoch": 0.740537937430885, "grad_norm": 0.27473384141921997, "learning_rate": 9.570414491531612e-06, "loss": 0.1441, "step": 41519 }, { "epoch": 0.7405557735525987, "grad_norm": 0.338054895401001, "learning_rate": 9.569189831212794e-06, "loss": 0.1368, "step": 41520 }, { "epoch": 0.7405736096743124, "grad_norm": 0.2573372423648834, "learning_rate": 9.56796523070913e-06, "loss": 0.0951, "step": 41521 }, { "epoch": 0.7405914457960261, "grad_norm": 0.3364068865776062, "learning_rate": 9.566740690025364e-06, "loss": 0.1312, "step": 41522 }, { "epoch": 0.7406092819177398, "grad_norm": 0.28443315625190735, "learning_rate": 9.565516209166242e-06, "loss": 0.123, "step": 41523 }, { "epoch": 0.7406271180394535, "grad_norm": 0.26175040006637573, "learning_rate": 9.564291788136503e-06, "loss": 0.0803, "step": 41524 }, { "epoch": 0.7406449541611672, "grad_norm": 0.2979171574115753, "learning_rate": 9.563067426940897e-06, "loss": 0.0977, "step": 41525 }, { "epoch": 0.7406627902828808, "grad_norm": 0.3721378445625305, "learning_rate": 9.561843125584186e-06, "loss": 0.1492, "step": 41526 }, { "epoch": 0.7406806264045945, "grad_norm": 0.198248028755188, "learning_rate": 9.560618884071102e-06, "loss": 0.0677, "step": 41527 }, { "epoch": 0.7406984625263083, "grad_norm": 0.25049564242362976, "learning_rate": 9.55939470240638e-06, "loss": 0.1362, "step": 41528 }, { "epoch": 0.740716298648022, "grad_norm": 0.3355697691440582, "learning_rate": 9.558170580594789e-06, "loss": 0.1048, "step": 41529 }, { "epoch": 0.7407341347697357, "grad_norm": 0.23160558938980103, "learning_rate": 9.556946518641061e-06, "loss": 0.0722, "step": 41530 }, { "epoch": 0.7407519708914494, "grad_norm": 0.25317880511283875, "learning_rate": 9.555722516549942e-06, "loss": 0.1171, "step": 41531 }, { "epoch": 0.7407698070131631, "grad_norm": 0.23997081816196442, "learning_rate": 9.554498574326176e-06, "loss": 0.1327, "step": 41532 }, { "epoch": 0.7407876431348768, "grad_norm": 0.47078171372413635, "learning_rate": 9.5532746919745e-06, "loss": 0.1193, "step": 41533 }, { "epoch": 0.7408054792565905, "grad_norm": 0.22875575721263885, "learning_rate": 9.552050869499679e-06, "loss": 0.0959, "step": 41534 }, { "epoch": 0.7408233153783041, "grad_norm": 0.3673996329307556, "learning_rate": 9.55082710690644e-06, "loss": 0.0944, "step": 41535 }, { "epoch": 0.7408411515000178, "grad_norm": 0.24300555884838104, "learning_rate": 9.549603404199534e-06, "loss": 0.0866, "step": 41536 }, { "epoch": 0.7408589876217315, "grad_norm": 0.2510339617729187, "learning_rate": 9.54837976138369e-06, "loss": 0.1618, "step": 41537 }, { "epoch": 0.7408768237434452, "grad_norm": 0.21743349730968475, "learning_rate": 9.547156178463673e-06, "loss": 0.1469, "step": 41538 }, { "epoch": 0.7408946598651589, "grad_norm": 0.186907559633255, "learning_rate": 9.545932655444207e-06, "loss": 0.0838, "step": 41539 }, { "epoch": 0.7409124959868726, "grad_norm": 0.21787531673908234, "learning_rate": 9.544709192330054e-06, "loss": 0.062, "step": 41540 }, { "epoch": 0.7409303321085863, "grad_norm": 0.32206791639328003, "learning_rate": 9.543485789125933e-06, "loss": 0.1646, "step": 41541 }, { "epoch": 0.7409481682303, "grad_norm": 0.27628186345100403, "learning_rate": 9.542262445836615e-06, "loss": 0.1638, "step": 41542 }, { "epoch": 0.7409660043520137, "grad_norm": 0.23920463025569916, "learning_rate": 9.541039162466819e-06, "loss": 0.114, "step": 41543 }, { "epoch": 0.7409838404737273, "grad_norm": 0.25832855701446533, "learning_rate": 9.539815939021302e-06, "loss": 0.1492, "step": 41544 }, { "epoch": 0.7410016765954411, "grad_norm": 0.27545973658561707, "learning_rate": 9.538592775504793e-06, "loss": 0.12, "step": 41545 }, { "epoch": 0.7410195127171548, "grad_norm": 0.3088870048522949, "learning_rate": 9.537369671922031e-06, "loss": 0.08, "step": 41546 }, { "epoch": 0.7410373488388685, "grad_norm": 0.3808946907520294, "learning_rate": 9.536146628277775e-06, "loss": 0.1004, "step": 41547 }, { "epoch": 0.7410551849605822, "grad_norm": 0.30978134274482727, "learning_rate": 9.534923644576754e-06, "loss": 0.1648, "step": 41548 }, { "epoch": 0.7410730210822959, "grad_norm": 0.2890113294124603, "learning_rate": 9.533700720823713e-06, "loss": 0.1383, "step": 41549 }, { "epoch": 0.7410908572040096, "grad_norm": 0.22573745250701904, "learning_rate": 9.532477857023378e-06, "loss": 0.104, "step": 41550 }, { "epoch": 0.7411086933257233, "grad_norm": 0.1530609428882599, "learning_rate": 9.531255053180513e-06, "loss": 0.0693, "step": 41551 }, { "epoch": 0.741126529447437, "grad_norm": 0.2667560875415802, "learning_rate": 9.530032309299835e-06, "loss": 0.0942, "step": 41552 }, { "epoch": 0.7411443655691506, "grad_norm": 0.27196401357650757, "learning_rate": 9.528809625386103e-06, "loss": 0.1479, "step": 41553 }, { "epoch": 0.7411622016908643, "grad_norm": 0.26229971647262573, "learning_rate": 9.527587001444042e-06, "loss": 0.1587, "step": 41554 }, { "epoch": 0.741180037812578, "grad_norm": 0.2506105601787567, "learning_rate": 9.526364437478405e-06, "loss": 0.105, "step": 41555 }, { "epoch": 0.7411978739342917, "grad_norm": 0.39847680926322937, "learning_rate": 9.525141933493925e-06, "loss": 0.0968, "step": 41556 }, { "epoch": 0.7412157100560054, "grad_norm": 0.2831707000732422, "learning_rate": 9.52391948949534e-06, "loss": 0.0951, "step": 41557 }, { "epoch": 0.7412335461777191, "grad_norm": 0.2951960265636444, "learning_rate": 9.522697105487386e-06, "loss": 0.1528, "step": 41558 }, { "epoch": 0.7412513822994328, "grad_norm": 0.2560045123100281, "learning_rate": 9.521474781474795e-06, "loss": 0.1544, "step": 41559 }, { "epoch": 0.7412692184211465, "grad_norm": 0.30274707078933716, "learning_rate": 9.520252517462324e-06, "loss": 0.1416, "step": 41560 }, { "epoch": 0.7412870545428601, "grad_norm": 0.3149712085723877, "learning_rate": 9.519030313454702e-06, "loss": 0.0914, "step": 41561 }, { "epoch": 0.741304890664574, "grad_norm": 0.28240153193473816, "learning_rate": 9.517808169456665e-06, "loss": 0.1271, "step": 41562 }, { "epoch": 0.7413227267862876, "grad_norm": 0.2329358011484146, "learning_rate": 9.516586085472942e-06, "loss": 0.1326, "step": 41563 }, { "epoch": 0.7413405629080013, "grad_norm": 0.21053653955459595, "learning_rate": 9.515364061508285e-06, "loss": 0.1014, "step": 41564 }, { "epoch": 0.741358399029715, "grad_norm": 0.21459347009658813, "learning_rate": 9.51414209756743e-06, "loss": 0.0801, "step": 41565 }, { "epoch": 0.7413762351514287, "grad_norm": 0.26571404933929443, "learning_rate": 9.512920193655098e-06, "loss": 0.1171, "step": 41566 }, { "epoch": 0.7413940712731424, "grad_norm": 0.21552519500255585, "learning_rate": 9.511698349776044e-06, "loss": 0.1118, "step": 41567 }, { "epoch": 0.7414119073948561, "grad_norm": 0.22639957070350647, "learning_rate": 9.51047656593499e-06, "loss": 0.0571, "step": 41568 }, { "epoch": 0.7414297435165698, "grad_norm": 0.2666633725166321, "learning_rate": 9.509254842136683e-06, "loss": 0.0961, "step": 41569 }, { "epoch": 0.7414475796382834, "grad_norm": 0.3685295283794403, "learning_rate": 9.508033178385858e-06, "loss": 0.1326, "step": 41570 }, { "epoch": 0.7414654157599971, "grad_norm": 0.2418726682662964, "learning_rate": 9.506811574687249e-06, "loss": 0.1272, "step": 41571 }, { "epoch": 0.7414832518817108, "grad_norm": 0.24539007246494293, "learning_rate": 9.505590031045577e-06, "loss": 0.1288, "step": 41572 }, { "epoch": 0.7415010880034245, "grad_norm": 0.25588253140449524, "learning_rate": 9.5043685474656e-06, "loss": 0.148, "step": 41573 }, { "epoch": 0.7415189241251382, "grad_norm": 0.2501687705516815, "learning_rate": 9.503147123952044e-06, "loss": 0.1008, "step": 41574 }, { "epoch": 0.7415367602468519, "grad_norm": 0.5458818674087524, "learning_rate": 9.50192576050964e-06, "loss": 0.1111, "step": 41575 }, { "epoch": 0.7415545963685656, "grad_norm": 0.27154460549354553, "learning_rate": 9.500704457143117e-06, "loss": 0.0901, "step": 41576 }, { "epoch": 0.7415724324902793, "grad_norm": 0.22834055125713348, "learning_rate": 9.499483213857225e-06, "loss": 0.129, "step": 41577 }, { "epoch": 0.741590268611993, "grad_norm": 0.2994661331176758, "learning_rate": 9.49826203065669e-06, "loss": 0.1965, "step": 41578 }, { "epoch": 0.7416081047337068, "grad_norm": 0.2642194926738739, "learning_rate": 9.497040907546236e-06, "loss": 0.1374, "step": 41579 }, { "epoch": 0.7416259408554204, "grad_norm": 0.4218829870223999, "learning_rate": 9.495819844530616e-06, "loss": 0.1428, "step": 41580 }, { "epoch": 0.7416437769771341, "grad_norm": 0.27938157320022583, "learning_rate": 9.494598841614544e-06, "loss": 0.1411, "step": 41581 }, { "epoch": 0.7416616130988478, "grad_norm": 0.3028716742992401, "learning_rate": 9.493377898802771e-06, "loss": 0.0888, "step": 41582 }, { "epoch": 0.7416794492205615, "grad_norm": 0.2963705062866211, "learning_rate": 9.49215701610002e-06, "loss": 0.1339, "step": 41583 }, { "epoch": 0.7416972853422752, "grad_norm": 0.33887961506843567, "learning_rate": 9.490936193511027e-06, "loss": 0.1158, "step": 41584 }, { "epoch": 0.7417151214639889, "grad_norm": 0.25907376408576965, "learning_rate": 9.48971543104051e-06, "loss": 0.0586, "step": 41585 }, { "epoch": 0.7417329575857026, "grad_norm": 0.25996875762939453, "learning_rate": 9.488494728693226e-06, "loss": 0.1231, "step": 41586 }, { "epoch": 0.7417507937074163, "grad_norm": 0.33476582169532776, "learning_rate": 9.48727408647389e-06, "loss": 0.129, "step": 41587 }, { "epoch": 0.7417686298291299, "grad_norm": 0.2209702432155609, "learning_rate": 9.486053504387241e-06, "loss": 0.1152, "step": 41588 }, { "epoch": 0.7417864659508436, "grad_norm": 0.29414498805999756, "learning_rate": 9.484832982437996e-06, "loss": 0.0627, "step": 41589 }, { "epoch": 0.7418043020725573, "grad_norm": 0.22861534357070923, "learning_rate": 9.483612520630905e-06, "loss": 0.1152, "step": 41590 }, { "epoch": 0.741822138194271, "grad_norm": 0.2732055187225342, "learning_rate": 9.482392118970693e-06, "loss": 0.1324, "step": 41591 }, { "epoch": 0.7418399743159847, "grad_norm": 0.2440653145313263, "learning_rate": 9.481171777462088e-06, "loss": 0.1056, "step": 41592 }, { "epoch": 0.7418578104376984, "grad_norm": 0.30423206090927124, "learning_rate": 9.479951496109813e-06, "loss": 0.1617, "step": 41593 }, { "epoch": 0.7418756465594121, "grad_norm": 0.22783881425857544, "learning_rate": 9.478731274918615e-06, "loss": 0.0993, "step": 41594 }, { "epoch": 0.7418934826811259, "grad_norm": 0.2019798755645752, "learning_rate": 9.477511113893206e-06, "loss": 0.0796, "step": 41595 }, { "epoch": 0.7419113188028396, "grad_norm": 0.28052619099617004, "learning_rate": 9.476291013038336e-06, "loss": 0.0789, "step": 41596 }, { "epoch": 0.7419291549245532, "grad_norm": 0.2511623203754425, "learning_rate": 9.47507097235872e-06, "loss": 0.0914, "step": 41597 }, { "epoch": 0.7419469910462669, "grad_norm": 0.23255203664302826, "learning_rate": 9.473850991859085e-06, "loss": 0.1087, "step": 41598 }, { "epoch": 0.7419648271679806, "grad_norm": 0.2532743513584137, "learning_rate": 9.472631071544175e-06, "loss": 0.1546, "step": 41599 }, { "epoch": 0.7419826632896943, "grad_norm": 0.2276996374130249, "learning_rate": 9.47141121141871e-06, "loss": 0.1059, "step": 41600 }, { "epoch": 0.742000499411408, "grad_norm": 0.21323199570178986, "learning_rate": 9.470191411487416e-06, "loss": 0.1058, "step": 41601 }, { "epoch": 0.7420183355331217, "grad_norm": 0.23297640681266785, "learning_rate": 9.468971671755018e-06, "loss": 0.0949, "step": 41602 }, { "epoch": 0.7420361716548354, "grad_norm": 0.3119436204433441, "learning_rate": 9.46775199222626e-06, "loss": 0.1469, "step": 41603 }, { "epoch": 0.7420540077765491, "grad_norm": 0.25828230381011963, "learning_rate": 9.466532372905856e-06, "loss": 0.0885, "step": 41604 }, { "epoch": 0.7420718438982628, "grad_norm": 0.36914077401161194, "learning_rate": 9.465312813798537e-06, "loss": 0.1114, "step": 41605 }, { "epoch": 0.7420896800199764, "grad_norm": 0.24076122045516968, "learning_rate": 9.464093314909025e-06, "loss": 0.11, "step": 41606 }, { "epoch": 0.7421075161416901, "grad_norm": 0.22149060666561127, "learning_rate": 9.462873876242062e-06, "loss": 0.0845, "step": 41607 }, { "epoch": 0.7421253522634038, "grad_norm": 0.2874957323074341, "learning_rate": 9.461654497802358e-06, "loss": 0.1575, "step": 41608 }, { "epoch": 0.7421431883851175, "grad_norm": 0.2312544286251068, "learning_rate": 9.460435179594657e-06, "loss": 0.101, "step": 41609 }, { "epoch": 0.7421610245068312, "grad_norm": 0.21125483512878418, "learning_rate": 9.459215921623673e-06, "loss": 0.083, "step": 41610 }, { "epoch": 0.7421788606285449, "grad_norm": 0.2313290387392044, "learning_rate": 9.45799672389413e-06, "loss": 0.1311, "step": 41611 }, { "epoch": 0.7421966967502587, "grad_norm": 0.3128338158130646, "learning_rate": 9.456777586410767e-06, "loss": 0.1097, "step": 41612 }, { "epoch": 0.7422145328719724, "grad_norm": 0.2656687796115875, "learning_rate": 9.455558509178306e-06, "loss": 0.1753, "step": 41613 }, { "epoch": 0.742232368993686, "grad_norm": 0.2783730626106262, "learning_rate": 9.454339492201464e-06, "loss": 0.1045, "step": 41614 }, { "epoch": 0.7422502051153997, "grad_norm": 0.2757885158061981, "learning_rate": 9.453120535484967e-06, "loss": 0.1274, "step": 41615 }, { "epoch": 0.7422680412371134, "grad_norm": 0.35291942954063416, "learning_rate": 9.45190163903355e-06, "loss": 0.1221, "step": 41616 }, { "epoch": 0.7422858773588271, "grad_norm": 0.30008742213249207, "learning_rate": 9.450682802851934e-06, "loss": 0.1236, "step": 41617 }, { "epoch": 0.7423037134805408, "grad_norm": 0.20530201494693756, "learning_rate": 9.449464026944841e-06, "loss": 0.1177, "step": 41618 }, { "epoch": 0.7423215496022545, "grad_norm": 0.23360618948936462, "learning_rate": 9.448245311316989e-06, "loss": 0.0906, "step": 41619 }, { "epoch": 0.7423393857239682, "grad_norm": 0.40567994117736816, "learning_rate": 9.447026655973118e-06, "loss": 0.1394, "step": 41620 }, { "epoch": 0.7423572218456819, "grad_norm": 0.26606419682502747, "learning_rate": 9.445808060917943e-06, "loss": 0.1421, "step": 41621 }, { "epoch": 0.7423750579673956, "grad_norm": 0.39970555901527405, "learning_rate": 9.44458952615618e-06, "loss": 0.1361, "step": 41622 }, { "epoch": 0.7423928940891092, "grad_norm": 0.3549562990665436, "learning_rate": 9.443371051692568e-06, "loss": 0.176, "step": 41623 }, { "epoch": 0.7424107302108229, "grad_norm": 0.22855940461158752, "learning_rate": 9.442152637531816e-06, "loss": 0.0969, "step": 41624 }, { "epoch": 0.7424285663325366, "grad_norm": 0.38711312413215637, "learning_rate": 9.440934283678662e-06, "loss": 0.119, "step": 41625 }, { "epoch": 0.7424464024542503, "grad_norm": 0.31242865324020386, "learning_rate": 9.43971599013782e-06, "loss": 0.1494, "step": 41626 }, { "epoch": 0.742464238575964, "grad_norm": 0.1833241730928421, "learning_rate": 9.438497756914013e-06, "loss": 0.1375, "step": 41627 }, { "epoch": 0.7424820746976777, "grad_norm": 0.3609023094177246, "learning_rate": 9.437279584011957e-06, "loss": 0.1706, "step": 41628 }, { "epoch": 0.7424999108193915, "grad_norm": 0.23171988129615784, "learning_rate": 9.436061471436386e-06, "loss": 0.0634, "step": 41629 }, { "epoch": 0.7425177469411052, "grad_norm": 0.2211289405822754, "learning_rate": 9.434843419192019e-06, "loss": 0.1075, "step": 41630 }, { "epoch": 0.7425355830628189, "grad_norm": 0.3138478696346283, "learning_rate": 9.433625427283574e-06, "loss": 0.1176, "step": 41631 }, { "epoch": 0.7425534191845325, "grad_norm": 0.22442957758903503, "learning_rate": 9.432407495715765e-06, "loss": 0.1011, "step": 41632 }, { "epoch": 0.7425712553062462, "grad_norm": 0.3194059729576111, "learning_rate": 9.431189624493328e-06, "loss": 0.1297, "step": 41633 }, { "epoch": 0.7425890914279599, "grad_norm": 0.2304256409406662, "learning_rate": 9.42997181362098e-06, "loss": 0.078, "step": 41634 }, { "epoch": 0.7426069275496736, "grad_norm": 0.3238835334777832, "learning_rate": 9.428754063103429e-06, "loss": 0.1025, "step": 41635 }, { "epoch": 0.7426247636713873, "grad_norm": 0.25386375188827515, "learning_rate": 9.427536372945414e-06, "loss": 0.1182, "step": 41636 }, { "epoch": 0.742642599793101, "grad_norm": 0.22706788778305054, "learning_rate": 9.426318743151638e-06, "loss": 0.0956, "step": 41637 }, { "epoch": 0.7426604359148147, "grad_norm": 0.4195094704627991, "learning_rate": 9.425101173726839e-06, "loss": 0.1561, "step": 41638 }, { "epoch": 0.7426782720365284, "grad_norm": 0.22369061410427094, "learning_rate": 9.423883664675725e-06, "loss": 0.119, "step": 41639 }, { "epoch": 0.742696108158242, "grad_norm": 0.267567902803421, "learning_rate": 9.422666216003021e-06, "loss": 0.1405, "step": 41640 }, { "epoch": 0.7427139442799557, "grad_norm": 0.25982314348220825, "learning_rate": 9.42144882771343e-06, "loss": 0.1121, "step": 41641 }, { "epoch": 0.7427317804016694, "grad_norm": 0.3214619755744934, "learning_rate": 9.420231499811696e-06, "loss": 0.1267, "step": 41642 }, { "epoch": 0.7427496165233831, "grad_norm": 0.256944864988327, "learning_rate": 9.419014232302525e-06, "loss": 0.0832, "step": 41643 }, { "epoch": 0.7427674526450968, "grad_norm": 0.2596313953399658, "learning_rate": 9.41779702519064e-06, "loss": 0.0879, "step": 41644 }, { "epoch": 0.7427852887668105, "grad_norm": 0.21887367963790894, "learning_rate": 9.416579878480741e-06, "loss": 0.0809, "step": 41645 }, { "epoch": 0.7428031248885243, "grad_norm": 0.23044101893901825, "learning_rate": 9.415362792177573e-06, "loss": 0.1533, "step": 41646 }, { "epoch": 0.742820961010238, "grad_norm": 0.2685229182243347, "learning_rate": 9.41414576628584e-06, "loss": 0.1467, "step": 41647 }, { "epoch": 0.7428387971319517, "grad_norm": 0.32725685834884644, "learning_rate": 9.412928800810262e-06, "loss": 0.1515, "step": 41648 }, { "epoch": 0.7428566332536654, "grad_norm": 0.2877674102783203, "learning_rate": 9.411711895755546e-06, "loss": 0.1085, "step": 41649 }, { "epoch": 0.742874469375379, "grad_norm": 0.27718546986579895, "learning_rate": 9.41049505112642e-06, "loss": 0.1203, "step": 41650 }, { "epoch": 0.7428923054970927, "grad_norm": 0.2505258619785309, "learning_rate": 9.409278266927607e-06, "loss": 0.1146, "step": 41651 }, { "epoch": 0.7429101416188064, "grad_norm": 0.2803599536418915, "learning_rate": 9.408061543163815e-06, "loss": 0.155, "step": 41652 }, { "epoch": 0.7429279777405201, "grad_norm": 0.24942843616008759, "learning_rate": 9.406844879839766e-06, "loss": 0.0881, "step": 41653 }, { "epoch": 0.7429458138622338, "grad_norm": 0.2915973961353302, "learning_rate": 9.40562827696016e-06, "loss": 0.1182, "step": 41654 }, { "epoch": 0.7429636499839475, "grad_norm": 0.27929285168647766, "learning_rate": 9.404411734529734e-06, "loss": 0.1444, "step": 41655 }, { "epoch": 0.7429814861056612, "grad_norm": 0.2811204493045807, "learning_rate": 9.403195252553197e-06, "loss": 0.1637, "step": 41656 }, { "epoch": 0.7429993222273749, "grad_norm": 0.1754535436630249, "learning_rate": 9.401978831035257e-06, "loss": 0.1145, "step": 41657 }, { "epoch": 0.7430171583490885, "grad_norm": 0.17934869229793549, "learning_rate": 9.400762469980631e-06, "loss": 0.0898, "step": 41658 }, { "epoch": 0.7430349944708022, "grad_norm": 0.2595594525337219, "learning_rate": 9.399546169394044e-06, "loss": 0.1311, "step": 41659 }, { "epoch": 0.7430528305925159, "grad_norm": 0.2776778042316437, "learning_rate": 9.398329929280203e-06, "loss": 0.172, "step": 41660 }, { "epoch": 0.7430706667142296, "grad_norm": 0.3605760335922241, "learning_rate": 9.397113749643827e-06, "loss": 0.1938, "step": 41661 }, { "epoch": 0.7430885028359433, "grad_norm": 0.5109666585922241, "learning_rate": 9.395897630489617e-06, "loss": 0.1479, "step": 41662 }, { "epoch": 0.7431063389576571, "grad_norm": 0.2370705008506775, "learning_rate": 9.394681571822303e-06, "loss": 0.071, "step": 41663 }, { "epoch": 0.7431241750793708, "grad_norm": 0.3058117926120758, "learning_rate": 9.393465573646587e-06, "loss": 0.1264, "step": 41664 }, { "epoch": 0.7431420112010845, "grad_norm": 0.3172210156917572, "learning_rate": 9.3922496359672e-06, "loss": 0.115, "step": 41665 }, { "epoch": 0.7431598473227982, "grad_norm": 0.21875812113285065, "learning_rate": 9.391033758788842e-06, "loss": 0.1128, "step": 41666 }, { "epoch": 0.7431776834445118, "grad_norm": 0.3352431654930115, "learning_rate": 9.389817942116218e-06, "loss": 0.0973, "step": 41667 }, { "epoch": 0.7431955195662255, "grad_norm": 0.2931629717350006, "learning_rate": 9.388602185954063e-06, "loss": 0.0671, "step": 41668 }, { "epoch": 0.7432133556879392, "grad_norm": 0.33040523529052734, "learning_rate": 9.387386490307074e-06, "loss": 0.1152, "step": 41669 }, { "epoch": 0.7432311918096529, "grad_norm": 0.2503654658794403, "learning_rate": 9.38617085517997e-06, "loss": 0.1223, "step": 41670 }, { "epoch": 0.7432490279313666, "grad_norm": 0.23381134867668152, "learning_rate": 9.38495528057745e-06, "loss": 0.1096, "step": 41671 }, { "epoch": 0.7432668640530803, "grad_norm": 0.28774163126945496, "learning_rate": 9.383739766504249e-06, "loss": 0.1118, "step": 41672 }, { "epoch": 0.743284700174794, "grad_norm": 0.29325708746910095, "learning_rate": 9.382524312965063e-06, "loss": 0.1174, "step": 41673 }, { "epoch": 0.7433025362965077, "grad_norm": 0.28332558274269104, "learning_rate": 9.38130891996461e-06, "loss": 0.0771, "step": 41674 }, { "epoch": 0.7433203724182214, "grad_norm": 0.34472018480300903, "learning_rate": 9.380093587507596e-06, "loss": 0.0835, "step": 41675 }, { "epoch": 0.743338208539935, "grad_norm": 0.34437990188598633, "learning_rate": 9.378878315598724e-06, "loss": 0.1019, "step": 41676 }, { "epoch": 0.7433560446616487, "grad_norm": 0.4410110116004944, "learning_rate": 9.377663104242726e-06, "loss": 0.1704, "step": 41677 }, { "epoch": 0.7433738807833624, "grad_norm": 0.3495272397994995, "learning_rate": 9.376447953444293e-06, "loss": 0.1693, "step": 41678 }, { "epoch": 0.7433917169050761, "grad_norm": 0.29913029074668884, "learning_rate": 9.37523286320815e-06, "loss": 0.1415, "step": 41679 }, { "epoch": 0.7434095530267899, "grad_norm": 0.34118911623954773, "learning_rate": 9.374017833538994e-06, "loss": 0.1373, "step": 41680 }, { "epoch": 0.7434273891485036, "grad_norm": 0.22000570595264435, "learning_rate": 9.372802864441551e-06, "loss": 0.1066, "step": 41681 }, { "epoch": 0.7434452252702173, "grad_norm": 0.30710750818252563, "learning_rate": 9.371587955920521e-06, "loss": 0.0896, "step": 41682 }, { "epoch": 0.743463061391931, "grad_norm": 0.42219114303588867, "learning_rate": 9.370373107980614e-06, "loss": 0.0753, "step": 41683 }, { "epoch": 0.7434808975136447, "grad_norm": 0.24803954362869263, "learning_rate": 9.369158320626533e-06, "loss": 0.0828, "step": 41684 }, { "epoch": 0.7434987336353583, "grad_norm": 0.25883179903030396, "learning_rate": 9.367943593862998e-06, "loss": 0.123, "step": 41685 }, { "epoch": 0.743516569757072, "grad_norm": 0.2109529674053192, "learning_rate": 9.366728927694715e-06, "loss": 0.1172, "step": 41686 }, { "epoch": 0.7435344058787857, "grad_norm": 0.46198832988739014, "learning_rate": 9.36551432212639e-06, "loss": 0.1567, "step": 41687 }, { "epoch": 0.7435522420004994, "grad_norm": 0.30942273139953613, "learning_rate": 9.364299777162733e-06, "loss": 0.143, "step": 41688 }, { "epoch": 0.7435700781222131, "grad_norm": 0.3172661364078522, "learning_rate": 9.363085292808439e-06, "loss": 0.1346, "step": 41689 }, { "epoch": 0.7435879142439268, "grad_norm": 0.3130984902381897, "learning_rate": 9.361870869068237e-06, "loss": 0.1071, "step": 41690 }, { "epoch": 0.7436057503656405, "grad_norm": 0.29129648208618164, "learning_rate": 9.360656505946816e-06, "loss": 0.116, "step": 41691 }, { "epoch": 0.7436235864873542, "grad_norm": 0.3340189754962921, "learning_rate": 9.359442203448898e-06, "loss": 0.1475, "step": 41692 }, { "epoch": 0.7436414226090678, "grad_norm": 0.2583788335323334, "learning_rate": 9.35822796157918e-06, "loss": 0.1531, "step": 41693 }, { "epoch": 0.7436592587307815, "grad_norm": 0.27318018674850464, "learning_rate": 9.35701378034238e-06, "loss": 0.121, "step": 41694 }, { "epoch": 0.7436770948524952, "grad_norm": 0.28395748138427734, "learning_rate": 9.355799659743195e-06, "loss": 0.1045, "step": 41695 }, { "epoch": 0.743694930974209, "grad_norm": 0.26743993163108826, "learning_rate": 9.354585599786336e-06, "loss": 0.1335, "step": 41696 }, { "epoch": 0.7437127670959227, "grad_norm": 0.26818326115608215, "learning_rate": 9.353371600476498e-06, "loss": 0.0945, "step": 41697 }, { "epoch": 0.7437306032176364, "grad_norm": 0.33066803216934204, "learning_rate": 9.352157661818406e-06, "loss": 0.1129, "step": 41698 }, { "epoch": 0.7437484393393501, "grad_norm": 0.2889024615287781, "learning_rate": 9.350943783816754e-06, "loss": 0.1323, "step": 41699 }, { "epoch": 0.7437662754610638, "grad_norm": 0.27680182456970215, "learning_rate": 9.349729966476249e-06, "loss": 0.1289, "step": 41700 }, { "epoch": 0.7437841115827775, "grad_norm": 0.22897565364837646, "learning_rate": 9.348516209801597e-06, "loss": 0.1019, "step": 41701 }, { "epoch": 0.7438019477044912, "grad_norm": 0.2774960994720459, "learning_rate": 9.347302513797492e-06, "loss": 0.1127, "step": 41702 }, { "epoch": 0.7438197838262048, "grad_norm": 0.2343015968799591, "learning_rate": 9.34608887846866e-06, "loss": 0.1015, "step": 41703 }, { "epoch": 0.7438376199479185, "grad_norm": 0.2966124415397644, "learning_rate": 9.344875303819789e-06, "loss": 0.1197, "step": 41704 }, { "epoch": 0.7438554560696322, "grad_norm": 0.20514456927776337, "learning_rate": 9.343661789855584e-06, "loss": 0.089, "step": 41705 }, { "epoch": 0.7438732921913459, "grad_norm": 0.25766170024871826, "learning_rate": 9.342448336580753e-06, "loss": 0.0961, "step": 41706 }, { "epoch": 0.7438911283130596, "grad_norm": 0.2840248942375183, "learning_rate": 9.341234944000008e-06, "loss": 0.0914, "step": 41707 }, { "epoch": 0.7439089644347733, "grad_norm": 0.4357462227344513, "learning_rate": 9.340021612118044e-06, "loss": 0.1297, "step": 41708 }, { "epoch": 0.743926800556487, "grad_norm": 0.18843814730644226, "learning_rate": 9.338808340939567e-06, "loss": 0.0712, "step": 41709 }, { "epoch": 0.7439446366782007, "grad_norm": 0.3405860662460327, "learning_rate": 9.337595130469267e-06, "loss": 0.0777, "step": 41710 }, { "epoch": 0.7439624727999143, "grad_norm": 0.2407132089138031, "learning_rate": 9.336381980711867e-06, "loss": 0.157, "step": 41711 }, { "epoch": 0.743980308921628, "grad_norm": 0.31268998980522156, "learning_rate": 9.335168891672064e-06, "loss": 0.1321, "step": 41712 }, { "epoch": 0.7439981450433418, "grad_norm": 0.21785883605480194, "learning_rate": 9.333955863354551e-06, "loss": 0.0967, "step": 41713 }, { "epoch": 0.7440159811650555, "grad_norm": 0.28984639048576355, "learning_rate": 9.332742895764041e-06, "loss": 0.0917, "step": 41714 }, { "epoch": 0.7440338172867692, "grad_norm": 0.2926657199859619, "learning_rate": 9.33152998890522e-06, "loss": 0.1134, "step": 41715 }, { "epoch": 0.7440516534084829, "grad_norm": 0.20349322259426117, "learning_rate": 9.33031714278281e-06, "loss": 0.1034, "step": 41716 }, { "epoch": 0.7440694895301966, "grad_norm": 0.37565159797668457, "learning_rate": 9.329104357401503e-06, "loss": 0.1565, "step": 41717 }, { "epoch": 0.7440873256519103, "grad_norm": 0.25387582182884216, "learning_rate": 9.32789163276599e-06, "loss": 0.132, "step": 41718 }, { "epoch": 0.744105161773624, "grad_norm": 0.310578852891922, "learning_rate": 9.326678968880991e-06, "loss": 0.1543, "step": 41719 }, { "epoch": 0.7441229978953376, "grad_norm": 0.28139519691467285, "learning_rate": 9.32546636575119e-06, "loss": 0.1005, "step": 41720 }, { "epoch": 0.7441408340170513, "grad_norm": 0.3438161313533783, "learning_rate": 9.324253823381302e-06, "loss": 0.1168, "step": 41721 }, { "epoch": 0.744158670138765, "grad_norm": 0.2320399135351181, "learning_rate": 9.323041341776023e-06, "loss": 0.0977, "step": 41722 }, { "epoch": 0.7441765062604787, "grad_norm": 0.2858046293258667, "learning_rate": 9.32182892094004e-06, "loss": 0.0854, "step": 41723 }, { "epoch": 0.7441943423821924, "grad_norm": 0.27552470564842224, "learning_rate": 9.320616560878073e-06, "loss": 0.2017, "step": 41724 }, { "epoch": 0.7442121785039061, "grad_norm": 0.22283224761486053, "learning_rate": 9.319404261594811e-06, "loss": 0.1036, "step": 41725 }, { "epoch": 0.7442300146256198, "grad_norm": 0.2163795828819275, "learning_rate": 9.318192023094951e-06, "loss": 0.0761, "step": 41726 }, { "epoch": 0.7442478507473335, "grad_norm": 0.27455413341522217, "learning_rate": 9.3169798453832e-06, "loss": 0.1096, "step": 41727 }, { "epoch": 0.7442656868690471, "grad_norm": 0.22569192945957184, "learning_rate": 9.315767728464241e-06, "loss": 0.0914, "step": 41728 }, { "epoch": 0.7442835229907608, "grad_norm": 0.26902055740356445, "learning_rate": 9.314555672342792e-06, "loss": 0.1045, "step": 41729 }, { "epoch": 0.7443013591124746, "grad_norm": 0.29484423995018005, "learning_rate": 9.313343677023542e-06, "loss": 0.1213, "step": 41730 }, { "epoch": 0.7443191952341883, "grad_norm": 0.26825323700904846, "learning_rate": 9.312131742511192e-06, "loss": 0.0943, "step": 41731 }, { "epoch": 0.744337031355902, "grad_norm": 0.22511784732341766, "learning_rate": 9.310919868810428e-06, "loss": 0.0844, "step": 41732 }, { "epoch": 0.7443548674776157, "grad_norm": 0.3141074478626251, "learning_rate": 9.309708055925959e-06, "loss": 0.1466, "step": 41733 }, { "epoch": 0.7443727035993294, "grad_norm": 0.26542744040489197, "learning_rate": 9.308496303862488e-06, "loss": 0.1193, "step": 41734 }, { "epoch": 0.7443905397210431, "grad_norm": 0.21172067523002625, "learning_rate": 9.307284612624703e-06, "loss": 0.1003, "step": 41735 }, { "epoch": 0.7444083758427568, "grad_norm": 0.29889070987701416, "learning_rate": 9.306072982217299e-06, "loss": 0.1163, "step": 41736 }, { "epoch": 0.7444262119644705, "grad_norm": 0.3729795217514038, "learning_rate": 9.30486141264498e-06, "loss": 0.1624, "step": 41737 }, { "epoch": 0.7444440480861841, "grad_norm": 0.24151965975761414, "learning_rate": 9.303649903912442e-06, "loss": 0.0895, "step": 41738 }, { "epoch": 0.7444618842078978, "grad_norm": 0.35498708486557007, "learning_rate": 9.302438456024378e-06, "loss": 0.113, "step": 41739 }, { "epoch": 0.7444797203296115, "grad_norm": 0.27695244550704956, "learning_rate": 9.301227068985477e-06, "loss": 0.0955, "step": 41740 }, { "epoch": 0.7444975564513252, "grad_norm": 0.18597276508808136, "learning_rate": 9.300015742800449e-06, "loss": 0.0615, "step": 41741 }, { "epoch": 0.7445153925730389, "grad_norm": 0.28616073727607727, "learning_rate": 9.298804477473983e-06, "loss": 0.0898, "step": 41742 }, { "epoch": 0.7445332286947526, "grad_norm": 0.29208970069885254, "learning_rate": 9.297593273010774e-06, "loss": 0.1568, "step": 41743 }, { "epoch": 0.7445510648164663, "grad_norm": 0.2410343438386917, "learning_rate": 9.296382129415515e-06, "loss": 0.1178, "step": 41744 }, { "epoch": 0.74456890093818, "grad_norm": 0.1810934841632843, "learning_rate": 9.295171046692897e-06, "loss": 0.0724, "step": 41745 }, { "epoch": 0.7445867370598936, "grad_norm": 0.26009905338287354, "learning_rate": 9.293960024847629e-06, "loss": 0.0496, "step": 41746 }, { "epoch": 0.7446045731816074, "grad_norm": 0.2826032340526581, "learning_rate": 9.292749063884384e-06, "loss": 0.1332, "step": 41747 }, { "epoch": 0.7446224093033211, "grad_norm": 0.27991506457328796, "learning_rate": 9.291538163807881e-06, "loss": 0.0613, "step": 41748 }, { "epoch": 0.7446402454250348, "grad_norm": 0.38155898451805115, "learning_rate": 9.29032732462279e-06, "loss": 0.0785, "step": 41749 }, { "epoch": 0.7446580815467485, "grad_norm": 0.2500755190849304, "learning_rate": 9.28911654633383e-06, "loss": 0.1255, "step": 41750 }, { "epoch": 0.7446759176684622, "grad_norm": 0.27290117740631104, "learning_rate": 9.287905828945678e-06, "loss": 0.0899, "step": 41751 }, { "epoch": 0.7446937537901759, "grad_norm": 0.2371479868888855, "learning_rate": 9.286695172463028e-06, "loss": 0.0736, "step": 41752 }, { "epoch": 0.7447115899118896, "grad_norm": 0.2186102271080017, "learning_rate": 9.285484576890568e-06, "loss": 0.0803, "step": 41753 }, { "epoch": 0.7447294260336033, "grad_norm": 0.2484026998281479, "learning_rate": 9.284274042233004e-06, "loss": 0.1295, "step": 41754 }, { "epoch": 0.744747262155317, "grad_norm": 0.35418692231178284, "learning_rate": 9.283063568495024e-06, "loss": 0.1442, "step": 41755 }, { "epoch": 0.7447650982770306, "grad_norm": 0.2577676773071289, "learning_rate": 9.281853155681316e-06, "loss": 0.116, "step": 41756 }, { "epoch": 0.7447829343987443, "grad_norm": 0.3181343674659729, "learning_rate": 9.280642803796578e-06, "loss": 0.0875, "step": 41757 }, { "epoch": 0.744800770520458, "grad_norm": 0.34740081429481506, "learning_rate": 9.279432512845485e-06, "loss": 0.1719, "step": 41758 }, { "epoch": 0.7448186066421717, "grad_norm": 0.24784712493419647, "learning_rate": 9.278222282832752e-06, "loss": 0.1246, "step": 41759 }, { "epoch": 0.7448364427638854, "grad_norm": 0.24826543033123016, "learning_rate": 9.277012113763051e-06, "loss": 0.0715, "step": 41760 }, { "epoch": 0.7448542788855991, "grad_norm": 0.3630891442298889, "learning_rate": 9.275802005641091e-06, "loss": 0.1775, "step": 41761 }, { "epoch": 0.7448721150073128, "grad_norm": 0.22842375934123993, "learning_rate": 9.274591958471542e-06, "loss": 0.1188, "step": 41762 }, { "epoch": 0.7448899511290265, "grad_norm": 0.3053423762321472, "learning_rate": 9.273381972259116e-06, "loss": 0.0667, "step": 41763 }, { "epoch": 0.7449077872507402, "grad_norm": 0.30044570565223694, "learning_rate": 9.272172047008493e-06, "loss": 0.1381, "step": 41764 }, { "epoch": 0.7449256233724539, "grad_norm": 0.332535982131958, "learning_rate": 9.270962182724362e-06, "loss": 0.111, "step": 41765 }, { "epoch": 0.7449434594941676, "grad_norm": 0.2808431088924408, "learning_rate": 9.269752379411408e-06, "loss": 0.0989, "step": 41766 }, { "epoch": 0.7449612956158813, "grad_norm": 0.23383773863315582, "learning_rate": 9.268542637074335e-06, "loss": 0.0917, "step": 41767 }, { "epoch": 0.744979131737595, "grad_norm": 0.2602749466896057, "learning_rate": 9.267332955717824e-06, "loss": 0.1386, "step": 41768 }, { "epoch": 0.7449969678593087, "grad_norm": 0.22628013789653778, "learning_rate": 9.266123335346567e-06, "loss": 0.0875, "step": 41769 }, { "epoch": 0.7450148039810224, "grad_norm": 0.24421167373657227, "learning_rate": 9.264913775965245e-06, "loss": 0.1197, "step": 41770 }, { "epoch": 0.7450326401027361, "grad_norm": 0.16801457107067108, "learning_rate": 9.263704277578546e-06, "loss": 0.0603, "step": 41771 }, { "epoch": 0.7450504762244498, "grad_norm": 0.2294878214597702, "learning_rate": 9.262494840191171e-06, "loss": 0.0879, "step": 41772 }, { "epoch": 0.7450683123461634, "grad_norm": 0.2590222954750061, "learning_rate": 9.261285463807806e-06, "loss": 0.0968, "step": 41773 }, { "epoch": 0.7450861484678771, "grad_norm": 0.2385087013244629, "learning_rate": 9.260076148433123e-06, "loss": 0.1103, "step": 41774 }, { "epoch": 0.7451039845895908, "grad_norm": 0.3576755225658417, "learning_rate": 9.25886689407183e-06, "loss": 0.0979, "step": 41775 }, { "epoch": 0.7451218207113045, "grad_norm": 0.2186053842306137, "learning_rate": 9.257657700728597e-06, "loss": 0.082, "step": 41776 }, { "epoch": 0.7451396568330182, "grad_norm": 0.2458042949438095, "learning_rate": 9.25644856840813e-06, "loss": 0.1449, "step": 41777 }, { "epoch": 0.7451574929547319, "grad_norm": 0.24039867520332336, "learning_rate": 9.255239497115103e-06, "loss": 0.099, "step": 41778 }, { "epoch": 0.7451753290764456, "grad_norm": 0.37679219245910645, "learning_rate": 9.254030486854198e-06, "loss": 0.1688, "step": 41779 }, { "epoch": 0.7451931651981593, "grad_norm": 0.32191309332847595, "learning_rate": 9.25282153763012e-06, "loss": 0.1347, "step": 41780 }, { "epoch": 0.7452110013198731, "grad_norm": 0.20005831122398376, "learning_rate": 9.251612649447544e-06, "loss": 0.105, "step": 41781 }, { "epoch": 0.7452288374415867, "grad_norm": 0.23925656080245972, "learning_rate": 9.250403822311158e-06, "loss": 0.1005, "step": 41782 }, { "epoch": 0.7452466735633004, "grad_norm": 0.26097792387008667, "learning_rate": 9.249195056225643e-06, "loss": 0.1252, "step": 41783 }, { "epoch": 0.7452645096850141, "grad_norm": 0.35629400610923767, "learning_rate": 9.247986351195681e-06, "loss": 0.1504, "step": 41784 }, { "epoch": 0.7452823458067278, "grad_norm": 0.49715733528137207, "learning_rate": 9.246777707225973e-06, "loss": 0.1504, "step": 41785 }, { "epoch": 0.7453001819284415, "grad_norm": 0.21874253451824188, "learning_rate": 9.245569124321196e-06, "loss": 0.0918, "step": 41786 }, { "epoch": 0.7453180180501552, "grad_norm": 0.26758113503456116, "learning_rate": 9.244360602486027e-06, "loss": 0.1203, "step": 41787 }, { "epoch": 0.7453358541718689, "grad_norm": 0.1933550238609314, "learning_rate": 9.243152141725165e-06, "loss": 0.0584, "step": 41788 }, { "epoch": 0.7453536902935826, "grad_norm": 0.2705863416194916, "learning_rate": 9.24194374204328e-06, "loss": 0.1201, "step": 41789 }, { "epoch": 0.7453715264152962, "grad_norm": 0.2597334384918213, "learning_rate": 9.24073540344507e-06, "loss": 0.0875, "step": 41790 }, { "epoch": 0.7453893625370099, "grad_norm": 0.2080470621585846, "learning_rate": 9.239527125935216e-06, "loss": 0.1186, "step": 41791 }, { "epoch": 0.7454071986587236, "grad_norm": 0.4132150113582611, "learning_rate": 9.238318909518387e-06, "loss": 0.0939, "step": 41792 }, { "epoch": 0.7454250347804373, "grad_norm": 0.26948103308677673, "learning_rate": 9.237110754199287e-06, "loss": 0.1208, "step": 41793 }, { "epoch": 0.745442870902151, "grad_norm": 0.3922657370567322, "learning_rate": 9.235902659982593e-06, "loss": 0.1485, "step": 41794 }, { "epoch": 0.7454607070238647, "grad_norm": 0.5338376760482788, "learning_rate": 9.23469462687298e-06, "loss": 0.1782, "step": 41795 }, { "epoch": 0.7454785431455784, "grad_norm": 0.3584495782852173, "learning_rate": 9.23348665487514e-06, "loss": 0.123, "step": 41796 }, { "epoch": 0.7454963792672922, "grad_norm": 0.2581416070461273, "learning_rate": 9.232278743993742e-06, "loss": 0.1079, "step": 41797 }, { "epoch": 0.7455142153890059, "grad_norm": 0.24829868972301483, "learning_rate": 9.231070894233484e-06, "loss": 0.0715, "step": 41798 }, { "epoch": 0.7455320515107196, "grad_norm": 0.22758758068084717, "learning_rate": 9.229863105599044e-06, "loss": 0.0634, "step": 41799 }, { "epoch": 0.7455498876324332, "grad_norm": 0.2903639078140259, "learning_rate": 9.2286553780951e-06, "loss": 0.1261, "step": 41800 }, { "epoch": 0.7455677237541469, "grad_norm": 0.2785182297229767, "learning_rate": 9.227447711726325e-06, "loss": 0.1041, "step": 41801 }, { "epoch": 0.7455855598758606, "grad_norm": 0.3490147888660431, "learning_rate": 9.226240106497421e-06, "loss": 0.1769, "step": 41802 }, { "epoch": 0.7456033959975743, "grad_norm": 0.29464948177337646, "learning_rate": 9.22503256241305e-06, "loss": 0.137, "step": 41803 }, { "epoch": 0.745621232119288, "grad_norm": 0.25141823291778564, "learning_rate": 9.223825079477911e-06, "loss": 0.1141, "step": 41804 }, { "epoch": 0.7456390682410017, "grad_norm": 0.24085195362567902, "learning_rate": 9.222617657696664e-06, "loss": 0.1138, "step": 41805 }, { "epoch": 0.7456569043627154, "grad_norm": 0.2202644944190979, "learning_rate": 9.22141029707401e-06, "loss": 0.0877, "step": 41806 }, { "epoch": 0.745674740484429, "grad_norm": 0.27243903279304504, "learning_rate": 9.220202997614618e-06, "loss": 0.1294, "step": 41807 }, { "epoch": 0.7456925766061427, "grad_norm": 0.2497827112674713, "learning_rate": 9.21899575932317e-06, "loss": 0.1185, "step": 41808 }, { "epoch": 0.7457104127278564, "grad_norm": 0.22586867213249207, "learning_rate": 9.217788582204345e-06, "loss": 0.1197, "step": 41809 }, { "epoch": 0.7457282488495701, "grad_norm": 0.2817850410938263, "learning_rate": 9.216581466262817e-06, "loss": 0.1488, "step": 41810 }, { "epoch": 0.7457460849712838, "grad_norm": 0.2709518373012543, "learning_rate": 9.215374411503275e-06, "loss": 0.1215, "step": 41811 }, { "epoch": 0.7457639210929975, "grad_norm": 0.2206665277481079, "learning_rate": 9.214167417930395e-06, "loss": 0.1463, "step": 41812 }, { "epoch": 0.7457817572147112, "grad_norm": 0.30456459522247314, "learning_rate": 9.212960485548856e-06, "loss": 0.1546, "step": 41813 }, { "epoch": 0.745799593336425, "grad_norm": 0.23487740755081177, "learning_rate": 9.211753614363326e-06, "loss": 0.1532, "step": 41814 }, { "epoch": 0.7458174294581387, "grad_norm": 0.260042667388916, "learning_rate": 9.210546804378498e-06, "loss": 0.1279, "step": 41815 }, { "epoch": 0.7458352655798524, "grad_norm": 0.2977808713912964, "learning_rate": 9.209340055599035e-06, "loss": 0.1131, "step": 41816 }, { "epoch": 0.745853101701566, "grad_norm": 0.30064624547958374, "learning_rate": 9.208133368029633e-06, "loss": 0.151, "step": 41817 }, { "epoch": 0.7458709378232797, "grad_norm": 0.27859240770339966, "learning_rate": 9.206926741674957e-06, "loss": 0.1186, "step": 41818 }, { "epoch": 0.7458887739449934, "grad_norm": 0.2517804801464081, "learning_rate": 9.20572017653969e-06, "loss": 0.11, "step": 41819 }, { "epoch": 0.7459066100667071, "grad_norm": 0.3461153209209442, "learning_rate": 9.20451367262851e-06, "loss": 0.1277, "step": 41820 }, { "epoch": 0.7459244461884208, "grad_norm": 0.4292374551296234, "learning_rate": 9.20330722994609e-06, "loss": 0.1332, "step": 41821 }, { "epoch": 0.7459422823101345, "grad_norm": 0.2662316560745239, "learning_rate": 9.202100848497106e-06, "loss": 0.1502, "step": 41822 }, { "epoch": 0.7459601184318482, "grad_norm": 0.3414556086063385, "learning_rate": 9.200894528286227e-06, "loss": 0.1396, "step": 41823 }, { "epoch": 0.7459779545535619, "grad_norm": 0.21548150479793549, "learning_rate": 9.199688269318149e-06, "loss": 0.0896, "step": 41824 }, { "epoch": 0.7459957906752755, "grad_norm": 0.24060171842575073, "learning_rate": 9.198482071597533e-06, "loss": 0.1242, "step": 41825 }, { "epoch": 0.7460136267969892, "grad_norm": 0.26048561930656433, "learning_rate": 9.197275935129062e-06, "loss": 0.132, "step": 41826 }, { "epoch": 0.7460314629187029, "grad_norm": 0.2417096197605133, "learning_rate": 9.196069859917398e-06, "loss": 0.1284, "step": 41827 }, { "epoch": 0.7460492990404166, "grad_norm": 0.2765873074531555, "learning_rate": 9.19486384596723e-06, "loss": 0.1655, "step": 41828 }, { "epoch": 0.7460671351621303, "grad_norm": 0.2587246596813202, "learning_rate": 9.193657893283234e-06, "loss": 0.1372, "step": 41829 }, { "epoch": 0.746084971283844, "grad_norm": 0.21504376828670502, "learning_rate": 9.192452001870067e-06, "loss": 0.0769, "step": 41830 }, { "epoch": 0.7461028074055578, "grad_norm": 0.3404028117656708, "learning_rate": 9.191246171732427e-06, "loss": 0.1434, "step": 41831 }, { "epoch": 0.7461206435272715, "grad_norm": 0.32502004504203796, "learning_rate": 9.190040402874966e-06, "loss": 0.1412, "step": 41832 }, { "epoch": 0.7461384796489852, "grad_norm": 0.35646936297416687, "learning_rate": 9.18883469530238e-06, "loss": 0.1476, "step": 41833 }, { "epoch": 0.7461563157706989, "grad_norm": 0.24465464055538177, "learning_rate": 9.18762904901933e-06, "loss": 0.119, "step": 41834 }, { "epoch": 0.7461741518924125, "grad_norm": 0.27104452252388, "learning_rate": 9.186423464030492e-06, "loss": 0.1813, "step": 41835 }, { "epoch": 0.7461919880141262, "grad_norm": 0.2749364376068115, "learning_rate": 9.18521794034053e-06, "loss": 0.1251, "step": 41836 }, { "epoch": 0.7462098241358399, "grad_norm": 0.21803925931453705, "learning_rate": 9.184012477954134e-06, "loss": 0.1068, "step": 41837 }, { "epoch": 0.7462276602575536, "grad_norm": 0.18814244866371155, "learning_rate": 9.182807076875969e-06, "loss": 0.0738, "step": 41838 }, { "epoch": 0.7462454963792673, "grad_norm": 0.25718310475349426, "learning_rate": 9.181601737110707e-06, "loss": 0.1538, "step": 41839 }, { "epoch": 0.746263332500981, "grad_norm": 0.2642189562320709, "learning_rate": 9.18039645866301e-06, "loss": 0.108, "step": 41840 }, { "epoch": 0.7462811686226947, "grad_norm": 0.37207135558128357, "learning_rate": 9.179191241537568e-06, "loss": 0.1048, "step": 41841 }, { "epoch": 0.7462990047444084, "grad_norm": 0.2946400046348572, "learning_rate": 9.177986085739046e-06, "loss": 0.1288, "step": 41842 }, { "epoch": 0.746316840866122, "grad_norm": 0.28700873255729675, "learning_rate": 9.176780991272108e-06, "loss": 0.1252, "step": 41843 }, { "epoch": 0.7463346769878357, "grad_norm": 0.3128969073295593, "learning_rate": 9.17557595814144e-06, "loss": 0.0736, "step": 41844 }, { "epoch": 0.7463525131095494, "grad_norm": 0.30640971660614014, "learning_rate": 9.174370986351694e-06, "loss": 0.1874, "step": 41845 }, { "epoch": 0.7463703492312631, "grad_norm": 0.30637016892433167, "learning_rate": 9.173166075907563e-06, "loss": 0.1413, "step": 41846 }, { "epoch": 0.7463881853529768, "grad_norm": 0.2354010045528412, "learning_rate": 9.171961226813705e-06, "loss": 0.1318, "step": 41847 }, { "epoch": 0.7464060214746906, "grad_norm": 0.25598016381263733, "learning_rate": 9.170756439074793e-06, "loss": 0.1129, "step": 41848 }, { "epoch": 0.7464238575964043, "grad_norm": 0.26738688349723816, "learning_rate": 9.16955171269549e-06, "loss": 0.1131, "step": 41849 }, { "epoch": 0.746441693718118, "grad_norm": 0.25804081559181213, "learning_rate": 9.168347047680478e-06, "loss": 0.1105, "step": 41850 }, { "epoch": 0.7464595298398317, "grad_norm": 0.23106598854064941, "learning_rate": 9.167142444034421e-06, "loss": 0.0633, "step": 41851 }, { "epoch": 0.7464773659615453, "grad_norm": 0.28757357597351074, "learning_rate": 9.16593790176199e-06, "loss": 0.0743, "step": 41852 }, { "epoch": 0.746495202083259, "grad_norm": 0.25843679904937744, "learning_rate": 9.164733420867843e-06, "loss": 0.0986, "step": 41853 }, { "epoch": 0.7465130382049727, "grad_norm": 0.2869722843170166, "learning_rate": 9.163529001356666e-06, "loss": 0.1075, "step": 41854 }, { "epoch": 0.7465308743266864, "grad_norm": 0.17662650346755981, "learning_rate": 9.162324643233123e-06, "loss": 0.1234, "step": 41855 }, { "epoch": 0.7465487104484001, "grad_norm": 0.22272805869579315, "learning_rate": 9.161120346501877e-06, "loss": 0.147, "step": 41856 }, { "epoch": 0.7465665465701138, "grad_norm": 0.376632422208786, "learning_rate": 9.159916111167593e-06, "loss": 0.1298, "step": 41857 }, { "epoch": 0.7465843826918275, "grad_norm": 0.31782400608062744, "learning_rate": 9.158711937234954e-06, "loss": 0.1104, "step": 41858 }, { "epoch": 0.7466022188135412, "grad_norm": 0.3512881398200989, "learning_rate": 9.157507824708609e-06, "loss": 0.2044, "step": 41859 }, { "epoch": 0.7466200549352549, "grad_norm": 0.2636779546737671, "learning_rate": 9.156303773593241e-06, "loss": 0.0814, "step": 41860 }, { "epoch": 0.7466378910569685, "grad_norm": 0.31799277663230896, "learning_rate": 9.155099783893518e-06, "loss": 0.135, "step": 41861 }, { "epoch": 0.7466557271786822, "grad_norm": 0.2215995341539383, "learning_rate": 9.15389585561409e-06, "loss": 0.122, "step": 41862 }, { "epoch": 0.7466735633003959, "grad_norm": 0.25963956117630005, "learning_rate": 9.15269198875964e-06, "loss": 0.1149, "step": 41863 }, { "epoch": 0.7466913994221096, "grad_norm": 0.1972527652978897, "learning_rate": 9.151488183334833e-06, "loss": 0.1285, "step": 41864 }, { "epoch": 0.7467092355438234, "grad_norm": 0.2374015599489212, "learning_rate": 9.15028443934433e-06, "loss": 0.1091, "step": 41865 }, { "epoch": 0.7467270716655371, "grad_norm": 0.3324569761753082, "learning_rate": 9.149080756792792e-06, "loss": 0.1721, "step": 41866 }, { "epoch": 0.7467449077872508, "grad_norm": 0.1996554434299469, "learning_rate": 9.147877135684898e-06, "loss": 0.0925, "step": 41867 }, { "epoch": 0.7467627439089645, "grad_norm": 0.3236035704612732, "learning_rate": 9.146673576025308e-06, "loss": 0.1407, "step": 41868 }, { "epoch": 0.7467805800306782, "grad_norm": 0.30322229862213135, "learning_rate": 9.145470077818688e-06, "loss": 0.2051, "step": 41869 }, { "epoch": 0.7467984161523918, "grad_norm": 0.3037695288658142, "learning_rate": 9.144266641069693e-06, "loss": 0.1273, "step": 41870 }, { "epoch": 0.7468162522741055, "grad_norm": 0.16107887029647827, "learning_rate": 9.143063265783006e-06, "loss": 0.1022, "step": 41871 }, { "epoch": 0.7468340883958192, "grad_norm": 0.32069131731987, "learning_rate": 9.141859951963273e-06, "loss": 0.136, "step": 41872 }, { "epoch": 0.7468519245175329, "grad_norm": 0.3409341871738434, "learning_rate": 9.140656699615174e-06, "loss": 0.2077, "step": 41873 }, { "epoch": 0.7468697606392466, "grad_norm": 0.2663356065750122, "learning_rate": 9.13945350874337e-06, "loss": 0.1309, "step": 41874 }, { "epoch": 0.7468875967609603, "grad_norm": 0.2814778983592987, "learning_rate": 9.138250379352515e-06, "loss": 0.0596, "step": 41875 }, { "epoch": 0.746905432882674, "grad_norm": 0.33803728222846985, "learning_rate": 9.137047311447289e-06, "loss": 0.1574, "step": 41876 }, { "epoch": 0.7469232690043877, "grad_norm": 0.2891872525215149, "learning_rate": 9.135844305032343e-06, "loss": 0.1087, "step": 41877 }, { "epoch": 0.7469411051261013, "grad_norm": 0.3055242598056793, "learning_rate": 9.134641360112345e-06, "loss": 0.1313, "step": 41878 }, { "epoch": 0.746958941247815, "grad_norm": 0.2337963730096817, "learning_rate": 9.133438476691949e-06, "loss": 0.0867, "step": 41879 }, { "epoch": 0.7469767773695287, "grad_norm": 0.22477470338344574, "learning_rate": 9.132235654775834e-06, "loss": 0.1195, "step": 41880 }, { "epoch": 0.7469946134912424, "grad_norm": 0.3041396737098694, "learning_rate": 9.131032894368655e-06, "loss": 0.1671, "step": 41881 }, { "epoch": 0.7470124496129562, "grad_norm": 0.21302111446857452, "learning_rate": 9.129830195475073e-06, "loss": 0.1187, "step": 41882 }, { "epoch": 0.7470302857346699, "grad_norm": 0.3032519221305847, "learning_rate": 9.128627558099743e-06, "loss": 0.1515, "step": 41883 }, { "epoch": 0.7470481218563836, "grad_norm": 0.22368109226226807, "learning_rate": 9.12742498224734e-06, "loss": 0.1127, "step": 41884 }, { "epoch": 0.7470659579780973, "grad_norm": 0.276267409324646, "learning_rate": 9.12622246792252e-06, "loss": 0.1083, "step": 41885 }, { "epoch": 0.747083794099811, "grad_norm": 0.2245662808418274, "learning_rate": 9.12502001512994e-06, "loss": 0.1281, "step": 41886 }, { "epoch": 0.7471016302215246, "grad_norm": 0.410659521818161, "learning_rate": 9.123817623874271e-06, "loss": 0.1719, "step": 41887 }, { "epoch": 0.7471194663432383, "grad_norm": 0.23370492458343506, "learning_rate": 9.122615294160159e-06, "loss": 0.0795, "step": 41888 }, { "epoch": 0.747137302464952, "grad_norm": 0.2496359646320343, "learning_rate": 9.121413025992284e-06, "loss": 0.1233, "step": 41889 }, { "epoch": 0.7471551385866657, "grad_norm": 0.28118282556533813, "learning_rate": 9.120210819375297e-06, "loss": 0.1572, "step": 41890 }, { "epoch": 0.7471729747083794, "grad_norm": 0.3433353006839752, "learning_rate": 9.119008674313856e-06, "loss": 0.0971, "step": 41891 }, { "epoch": 0.7471908108300931, "grad_norm": 0.29779717326164246, "learning_rate": 9.117806590812614e-06, "loss": 0.0865, "step": 41892 }, { "epoch": 0.7472086469518068, "grad_norm": 0.17946451902389526, "learning_rate": 9.116604568876248e-06, "loss": 0.0541, "step": 41893 }, { "epoch": 0.7472264830735205, "grad_norm": 0.2596747875213623, "learning_rate": 9.11540260850941e-06, "loss": 0.1045, "step": 41894 }, { "epoch": 0.7472443191952342, "grad_norm": 0.2925165593624115, "learning_rate": 9.114200709716755e-06, "loss": 0.0576, "step": 41895 }, { "epoch": 0.7472621553169478, "grad_norm": 0.2701359987258911, "learning_rate": 9.11299887250294e-06, "loss": 0.1403, "step": 41896 }, { "epoch": 0.7472799914386615, "grad_norm": 0.22653523087501526, "learning_rate": 9.111797096872634e-06, "loss": 0.1479, "step": 41897 }, { "epoch": 0.7472978275603753, "grad_norm": 0.31115207076072693, "learning_rate": 9.110595382830491e-06, "loss": 0.0992, "step": 41898 }, { "epoch": 0.747315663682089, "grad_norm": 0.2689175605773926, "learning_rate": 9.109393730381161e-06, "loss": 0.0828, "step": 41899 }, { "epoch": 0.7473334998038027, "grad_norm": 0.25736773014068604, "learning_rate": 9.108192139529317e-06, "loss": 0.1645, "step": 41900 }, { "epoch": 0.7473513359255164, "grad_norm": 0.18629489839076996, "learning_rate": 9.1069906102796e-06, "loss": 0.0726, "step": 41901 }, { "epoch": 0.7473691720472301, "grad_norm": 0.2451358139514923, "learning_rate": 9.105789142636686e-06, "loss": 0.1332, "step": 41902 }, { "epoch": 0.7473870081689438, "grad_norm": 0.29196950793266296, "learning_rate": 9.104587736605222e-06, "loss": 0.1207, "step": 41903 }, { "epoch": 0.7474048442906575, "grad_norm": 0.2902083396911621, "learning_rate": 9.103386392189866e-06, "loss": 0.1817, "step": 41904 }, { "epoch": 0.7474226804123711, "grad_norm": 0.3537599444389343, "learning_rate": 9.102185109395267e-06, "loss": 0.1035, "step": 41905 }, { "epoch": 0.7474405165340848, "grad_norm": 0.24687115848064423, "learning_rate": 9.100983888226097e-06, "loss": 0.1422, "step": 41906 }, { "epoch": 0.7474583526557985, "grad_norm": 0.36129337549209595, "learning_rate": 9.099782728687006e-06, "loss": 0.1328, "step": 41907 }, { "epoch": 0.7474761887775122, "grad_norm": 0.3037579655647278, "learning_rate": 9.098581630782648e-06, "loss": 0.1267, "step": 41908 }, { "epoch": 0.7474940248992259, "grad_norm": 0.2956200838088989, "learning_rate": 9.097380594517674e-06, "loss": 0.1144, "step": 41909 }, { "epoch": 0.7475118610209396, "grad_norm": 0.2353220134973526, "learning_rate": 9.096179619896753e-06, "loss": 0.0871, "step": 41910 }, { "epoch": 0.7475296971426533, "grad_norm": 0.31837642192840576, "learning_rate": 9.094978706924531e-06, "loss": 0.1366, "step": 41911 }, { "epoch": 0.747547533264367, "grad_norm": 0.2602744996547699, "learning_rate": 9.093777855605664e-06, "loss": 0.0991, "step": 41912 }, { "epoch": 0.7475653693860806, "grad_norm": 0.23165516555309296, "learning_rate": 9.092577065944802e-06, "loss": 0.0838, "step": 41913 }, { "epoch": 0.7475832055077943, "grad_norm": 0.2634504437446594, "learning_rate": 9.091376337946605e-06, "loss": 0.1239, "step": 41914 }, { "epoch": 0.7476010416295081, "grad_norm": 0.28072696924209595, "learning_rate": 9.090175671615736e-06, "loss": 0.1239, "step": 41915 }, { "epoch": 0.7476188777512218, "grad_norm": 0.2594597041606903, "learning_rate": 9.08897506695684e-06, "loss": 0.124, "step": 41916 }, { "epoch": 0.7476367138729355, "grad_norm": 0.25251322984695435, "learning_rate": 9.087774523974575e-06, "loss": 0.1194, "step": 41917 }, { "epoch": 0.7476545499946492, "grad_norm": 0.2923622727394104, "learning_rate": 9.086574042673578e-06, "loss": 0.1283, "step": 41918 }, { "epoch": 0.7476723861163629, "grad_norm": 0.26804476976394653, "learning_rate": 9.085373623058529e-06, "loss": 0.0917, "step": 41919 }, { "epoch": 0.7476902222380766, "grad_norm": 0.27839505672454834, "learning_rate": 9.084173265134068e-06, "loss": 0.161, "step": 41920 }, { "epoch": 0.7477080583597903, "grad_norm": 0.38782840967178345, "learning_rate": 9.082972968904852e-06, "loss": 0.0625, "step": 41921 }, { "epoch": 0.747725894481504, "grad_norm": 0.26456138491630554, "learning_rate": 9.081772734375519e-06, "loss": 0.0916, "step": 41922 }, { "epoch": 0.7477437306032176, "grad_norm": 0.1794004589319229, "learning_rate": 9.080572561550737e-06, "loss": 0.0508, "step": 41923 }, { "epoch": 0.7477615667249313, "grad_norm": 0.369101881980896, "learning_rate": 9.079372450435159e-06, "loss": 0.1171, "step": 41924 }, { "epoch": 0.747779402846645, "grad_norm": 0.2246226668357849, "learning_rate": 9.078172401033433e-06, "loss": 0.1299, "step": 41925 }, { "epoch": 0.7477972389683587, "grad_norm": 0.23060117661952972, "learning_rate": 9.0769724133502e-06, "loss": 0.1031, "step": 41926 }, { "epoch": 0.7478150750900724, "grad_norm": 0.24144943058490753, "learning_rate": 9.075772487390128e-06, "loss": 0.0823, "step": 41927 }, { "epoch": 0.7478329112117861, "grad_norm": 0.30769750475883484, "learning_rate": 9.074572623157856e-06, "loss": 0.089, "step": 41928 }, { "epoch": 0.7478507473334998, "grad_norm": 0.2637888193130493, "learning_rate": 9.073372820658046e-06, "loss": 0.1166, "step": 41929 }, { "epoch": 0.7478685834552135, "grad_norm": 0.20476548373699188, "learning_rate": 9.07217307989535e-06, "loss": 0.078, "step": 41930 }, { "epoch": 0.7478864195769271, "grad_norm": 0.24336165189743042, "learning_rate": 9.0709734008744e-06, "loss": 0.1012, "step": 41931 }, { "epoch": 0.7479042556986409, "grad_norm": 0.343633770942688, "learning_rate": 9.069773783599866e-06, "loss": 0.1274, "step": 41932 }, { "epoch": 0.7479220918203546, "grad_norm": 0.2840675711631775, "learning_rate": 9.068574228076393e-06, "loss": 0.1223, "step": 41933 }, { "epoch": 0.7479399279420683, "grad_norm": 0.34654518961906433, "learning_rate": 9.067374734308628e-06, "loss": 0.1488, "step": 41934 }, { "epoch": 0.747957764063782, "grad_norm": 0.275153785943985, "learning_rate": 9.066175302301213e-06, "loss": 0.1141, "step": 41935 }, { "epoch": 0.7479756001854957, "grad_norm": 0.21194010972976685, "learning_rate": 9.064975932058815e-06, "loss": 0.0894, "step": 41936 }, { "epoch": 0.7479934363072094, "grad_norm": 0.23151636123657227, "learning_rate": 9.063776623586073e-06, "loss": 0.1118, "step": 41937 }, { "epoch": 0.7480112724289231, "grad_norm": 0.26441770792007446, "learning_rate": 9.062577376887638e-06, "loss": 0.1334, "step": 41938 }, { "epoch": 0.7480291085506368, "grad_norm": 0.24598845839500427, "learning_rate": 9.061378191968158e-06, "loss": 0.1389, "step": 41939 }, { "epoch": 0.7480469446723504, "grad_norm": 0.3421214520931244, "learning_rate": 9.060179068832272e-06, "loss": 0.1623, "step": 41940 }, { "epoch": 0.7480647807940641, "grad_norm": 0.2248920500278473, "learning_rate": 9.058980007484638e-06, "loss": 0.0963, "step": 41941 }, { "epoch": 0.7480826169157778, "grad_norm": 0.33854901790618896, "learning_rate": 9.05778100792991e-06, "loss": 0.1476, "step": 41942 }, { "epoch": 0.7481004530374915, "grad_norm": 0.24359679222106934, "learning_rate": 9.05658207017273e-06, "loss": 0.1639, "step": 41943 }, { "epoch": 0.7481182891592052, "grad_norm": 0.32375413179397583, "learning_rate": 9.05538319421774e-06, "loss": 0.1094, "step": 41944 }, { "epoch": 0.7481361252809189, "grad_norm": 0.2316959798336029, "learning_rate": 9.054184380069597e-06, "loss": 0.102, "step": 41945 }, { "epoch": 0.7481539614026326, "grad_norm": 0.2827676236629486, "learning_rate": 9.052985627732941e-06, "loss": 0.0951, "step": 41946 }, { "epoch": 0.7481717975243463, "grad_norm": 0.33458948135375977, "learning_rate": 9.051786937212422e-06, "loss": 0.1243, "step": 41947 }, { "epoch": 0.74818963364606, "grad_norm": 0.35230180621147156, "learning_rate": 9.050588308512677e-06, "loss": 0.1489, "step": 41948 }, { "epoch": 0.7482074697677737, "grad_norm": 0.3614899814128876, "learning_rate": 9.04938974163837e-06, "loss": 0.0851, "step": 41949 }, { "epoch": 0.7482253058894874, "grad_norm": 0.33453720808029175, "learning_rate": 9.048191236594137e-06, "loss": 0.1398, "step": 41950 }, { "epoch": 0.7482431420112011, "grad_norm": 0.22242973744869232, "learning_rate": 9.046992793384623e-06, "loss": 0.0831, "step": 41951 }, { "epoch": 0.7482609781329148, "grad_norm": 0.27579745650291443, "learning_rate": 9.045794412014477e-06, "loss": 0.1073, "step": 41952 }, { "epoch": 0.7482788142546285, "grad_norm": 0.20397932827472687, "learning_rate": 9.044596092488331e-06, "loss": 0.1068, "step": 41953 }, { "epoch": 0.7482966503763422, "grad_norm": 0.298859566450119, "learning_rate": 9.043397834810852e-06, "loss": 0.0699, "step": 41954 }, { "epoch": 0.7483144864980559, "grad_norm": 0.30979788303375244, "learning_rate": 9.042199638986665e-06, "loss": 0.0881, "step": 41955 }, { "epoch": 0.7483323226197696, "grad_norm": 0.3030558228492737, "learning_rate": 9.041001505020433e-06, "loss": 0.1029, "step": 41956 }, { "epoch": 0.7483501587414833, "grad_norm": 0.271344393491745, "learning_rate": 9.039803432916782e-06, "loss": 0.1038, "step": 41957 }, { "epoch": 0.7483679948631969, "grad_norm": 0.27243441343307495, "learning_rate": 9.038605422680375e-06, "loss": 0.1329, "step": 41958 }, { "epoch": 0.7483858309849106, "grad_norm": 0.2747722566127777, "learning_rate": 9.037407474315845e-06, "loss": 0.0858, "step": 41959 }, { "epoch": 0.7484036671066243, "grad_norm": 0.27515843510627747, "learning_rate": 9.036209587827838e-06, "loss": 0.1037, "step": 41960 }, { "epoch": 0.748421503228338, "grad_norm": 0.2827395498752594, "learning_rate": 9.035011763220984e-06, "loss": 0.1217, "step": 41961 }, { "epoch": 0.7484393393500517, "grad_norm": 0.3788807690143585, "learning_rate": 9.03381400049995e-06, "loss": 0.1032, "step": 41962 }, { "epoch": 0.7484571754717654, "grad_norm": 0.3126762807369232, "learning_rate": 9.03261629966937e-06, "loss": 0.104, "step": 41963 }, { "epoch": 0.7484750115934791, "grad_norm": 0.2632387578487396, "learning_rate": 9.031418660733882e-06, "loss": 0.1496, "step": 41964 }, { "epoch": 0.7484928477151928, "grad_norm": 0.2825818657875061, "learning_rate": 9.030221083698129e-06, "loss": 0.1696, "step": 41965 }, { "epoch": 0.7485106838369066, "grad_norm": 0.31543752551078796, "learning_rate": 9.02902356856675e-06, "loss": 0.1301, "step": 41966 }, { "epoch": 0.7485285199586202, "grad_norm": 0.2320011854171753, "learning_rate": 9.027826115344396e-06, "loss": 0.0929, "step": 41967 }, { "epoch": 0.7485463560803339, "grad_norm": 0.23497483134269714, "learning_rate": 9.026628724035699e-06, "loss": 0.1323, "step": 41968 }, { "epoch": 0.7485641922020476, "grad_norm": 0.331898957490921, "learning_rate": 9.025431394645315e-06, "loss": 0.0704, "step": 41969 }, { "epoch": 0.7485820283237613, "grad_norm": 0.24425993859767914, "learning_rate": 9.024234127177866e-06, "loss": 0.1031, "step": 41970 }, { "epoch": 0.748599864445475, "grad_norm": 0.30189934372901917, "learning_rate": 9.023036921638012e-06, "loss": 0.102, "step": 41971 }, { "epoch": 0.7486177005671887, "grad_norm": 0.29834675788879395, "learning_rate": 9.021839778030386e-06, "loss": 0.1233, "step": 41972 }, { "epoch": 0.7486355366889024, "grad_norm": 0.3311809301376343, "learning_rate": 9.020642696359628e-06, "loss": 0.0887, "step": 41973 }, { "epoch": 0.7486533728106161, "grad_norm": 0.34275487065315247, "learning_rate": 9.019445676630368e-06, "loss": 0.1317, "step": 41974 }, { "epoch": 0.7486712089323297, "grad_norm": 0.2713060677051544, "learning_rate": 9.018248718847266e-06, "loss": 0.1187, "step": 41975 }, { "epoch": 0.7486890450540434, "grad_norm": 0.38590869307518005, "learning_rate": 9.017051823014952e-06, "loss": 0.1114, "step": 41976 }, { "epoch": 0.7487068811757571, "grad_norm": 0.266640841960907, "learning_rate": 9.015854989138064e-06, "loss": 0.1332, "step": 41977 }, { "epoch": 0.7487247172974708, "grad_norm": 0.21670185029506683, "learning_rate": 9.014658217221244e-06, "loss": 0.0928, "step": 41978 }, { "epoch": 0.7487425534191845, "grad_norm": 0.3271774351596832, "learning_rate": 9.013461507269122e-06, "loss": 0.0934, "step": 41979 }, { "epoch": 0.7487603895408982, "grad_norm": 0.25897735357284546, "learning_rate": 9.012264859286351e-06, "loss": 0.0799, "step": 41980 }, { "epoch": 0.7487782256626119, "grad_norm": 0.35139474272727966, "learning_rate": 9.011068273277566e-06, "loss": 0.1707, "step": 41981 }, { "epoch": 0.7487960617843256, "grad_norm": 0.3317379355430603, "learning_rate": 9.009871749247392e-06, "loss": 0.1065, "step": 41982 }, { "epoch": 0.7488138979060394, "grad_norm": 0.42130887508392334, "learning_rate": 9.008675287200489e-06, "loss": 0.1363, "step": 41983 }, { "epoch": 0.748831734027753, "grad_norm": 0.2340133637189865, "learning_rate": 9.007478887141471e-06, "loss": 0.1094, "step": 41984 }, { "epoch": 0.7488495701494667, "grad_norm": 0.24788811802864075, "learning_rate": 9.006282549075001e-06, "loss": 0.1536, "step": 41985 }, { "epoch": 0.7488674062711804, "grad_norm": 0.2535175085067749, "learning_rate": 9.005086273005703e-06, "loss": 0.1235, "step": 41986 }, { "epoch": 0.7488852423928941, "grad_norm": 0.2354431003332138, "learning_rate": 9.003890058938205e-06, "loss": 0.0584, "step": 41987 }, { "epoch": 0.7489030785146078, "grad_norm": 0.3969897925853729, "learning_rate": 9.002693906877164e-06, "loss": 0.1905, "step": 41988 }, { "epoch": 0.7489209146363215, "grad_norm": 0.18769477307796478, "learning_rate": 9.001497816827205e-06, "loss": 0.108, "step": 41989 }, { "epoch": 0.7489387507580352, "grad_norm": 0.3645225465297699, "learning_rate": 9.00030178879297e-06, "loss": 0.183, "step": 41990 }, { "epoch": 0.7489565868797489, "grad_norm": 0.2234112024307251, "learning_rate": 8.999105822779089e-06, "loss": 0.0927, "step": 41991 }, { "epoch": 0.7489744230014626, "grad_norm": 0.3051113784313202, "learning_rate": 8.99790991879019e-06, "loss": 0.1254, "step": 41992 }, { "epoch": 0.7489922591231762, "grad_norm": 0.3288295269012451, "learning_rate": 8.996714076830931e-06, "loss": 0.1049, "step": 41993 }, { "epoch": 0.7490100952448899, "grad_norm": 0.26711562275886536, "learning_rate": 8.995518296905934e-06, "loss": 0.0957, "step": 41994 }, { "epoch": 0.7490279313666036, "grad_norm": 0.267429918050766, "learning_rate": 8.994322579019827e-06, "loss": 0.1372, "step": 41995 }, { "epoch": 0.7490457674883173, "grad_norm": 0.27344921231269836, "learning_rate": 8.993126923177262e-06, "loss": 0.1134, "step": 41996 }, { "epoch": 0.749063603610031, "grad_norm": 0.2872573733329773, "learning_rate": 8.991931329382857e-06, "loss": 0.1371, "step": 41997 }, { "epoch": 0.7490814397317447, "grad_norm": 0.19753947854042053, "learning_rate": 8.990735797641268e-06, "loss": 0.1119, "step": 41998 }, { "epoch": 0.7490992758534585, "grad_norm": 0.4069095551967621, "learning_rate": 8.98954032795711e-06, "loss": 0.1658, "step": 41999 }, { "epoch": 0.7491171119751722, "grad_norm": 0.28794756531715393, "learning_rate": 8.988344920335018e-06, "loss": 0.0889, "step": 42000 }, { "epoch": 0.7491171119751722, "eval_loss": 0.11315001547336578, "eval_runtime": 107.1699, "eval_samples_per_second": 9.555, "eval_steps_per_second": 1.596, "step": 42000 }, { "epoch": 0.7491349480968859, "grad_norm": 0.33742472529411316, "learning_rate": 8.98714957477964e-06, "loss": 0.1144, "step": 42001 }, { "epoch": 0.7491527842185995, "grad_norm": 0.28506872057914734, "learning_rate": 8.9859542912956e-06, "loss": 0.1627, "step": 42002 }, { "epoch": 0.7491706203403132, "grad_norm": 0.28201717138290405, "learning_rate": 8.984759069887535e-06, "loss": 0.1457, "step": 42003 }, { "epoch": 0.7491884564620269, "grad_norm": 0.2749691307544708, "learning_rate": 8.983563910560073e-06, "loss": 0.0858, "step": 42004 }, { "epoch": 0.7492062925837406, "grad_norm": 0.47563955187797546, "learning_rate": 8.98236881331784e-06, "loss": 0.1744, "step": 42005 }, { "epoch": 0.7492241287054543, "grad_norm": 0.2707219421863556, "learning_rate": 8.981173778165488e-06, "loss": 0.1143, "step": 42006 }, { "epoch": 0.749241964827168, "grad_norm": 0.2272682785987854, "learning_rate": 8.979978805107639e-06, "loss": 0.1497, "step": 42007 }, { "epoch": 0.7492598009488817, "grad_norm": 0.20125547051429749, "learning_rate": 8.978783894148926e-06, "loss": 0.1102, "step": 42008 }, { "epoch": 0.7492776370705954, "grad_norm": 0.3066682517528534, "learning_rate": 8.977589045293969e-06, "loss": 0.117, "step": 42009 }, { "epoch": 0.749295473192309, "grad_norm": 0.28412675857543945, "learning_rate": 8.976394258547422e-06, "loss": 0.1223, "step": 42010 }, { "epoch": 0.7493133093140227, "grad_norm": 0.18476998805999756, "learning_rate": 8.975199533913895e-06, "loss": 0.096, "step": 42011 }, { "epoch": 0.7493311454357364, "grad_norm": 0.3824542462825775, "learning_rate": 8.974004871398036e-06, "loss": 0.1145, "step": 42012 }, { "epoch": 0.7493489815574501, "grad_norm": 0.3623155355453491, "learning_rate": 8.972810271004463e-06, "loss": 0.0959, "step": 42013 }, { "epoch": 0.7493668176791638, "grad_norm": 0.2711127698421478, "learning_rate": 8.971615732737823e-06, "loss": 0.1612, "step": 42014 }, { "epoch": 0.7493846538008775, "grad_norm": 0.23485594987869263, "learning_rate": 8.970421256602735e-06, "loss": 0.0911, "step": 42015 }, { "epoch": 0.7494024899225913, "grad_norm": 0.24790982902050018, "learning_rate": 8.96922684260383e-06, "loss": 0.1246, "step": 42016 }, { "epoch": 0.749420326044305, "grad_norm": 0.31698155403137207, "learning_rate": 8.968032490745742e-06, "loss": 0.1467, "step": 42017 }, { "epoch": 0.7494381621660187, "grad_norm": 0.2183833122253418, "learning_rate": 8.966838201033085e-06, "loss": 0.142, "step": 42018 }, { "epoch": 0.7494559982877324, "grad_norm": 0.2552938163280487, "learning_rate": 8.965643973470511e-06, "loss": 0.1235, "step": 42019 }, { "epoch": 0.749473834409446, "grad_norm": 0.2738887667655945, "learning_rate": 8.96444980806264e-06, "loss": 0.1025, "step": 42020 }, { "epoch": 0.7494916705311597, "grad_norm": 0.23545123636722565, "learning_rate": 8.963255704814097e-06, "loss": 0.1202, "step": 42021 }, { "epoch": 0.7495095066528734, "grad_norm": 0.29631537199020386, "learning_rate": 8.962061663729507e-06, "loss": 0.1315, "step": 42022 }, { "epoch": 0.7495273427745871, "grad_norm": 0.21105071902275085, "learning_rate": 8.960867684813514e-06, "loss": 0.073, "step": 42023 }, { "epoch": 0.7495451788963008, "grad_norm": 0.2751389443874359, "learning_rate": 8.959673768070728e-06, "loss": 0.1646, "step": 42024 }, { "epoch": 0.7495630150180145, "grad_norm": 0.27297112345695496, "learning_rate": 8.958479913505796e-06, "loss": 0.1509, "step": 42025 }, { "epoch": 0.7495808511397282, "grad_norm": 0.2689000070095062, "learning_rate": 8.95728612112333e-06, "loss": 0.0719, "step": 42026 }, { "epoch": 0.7495986872614419, "grad_norm": 0.192246213555336, "learning_rate": 8.95609239092797e-06, "loss": 0.1264, "step": 42027 }, { "epoch": 0.7496165233831555, "grad_norm": 0.3367277681827545, "learning_rate": 8.954898722924337e-06, "loss": 0.0737, "step": 42028 }, { "epoch": 0.7496343595048692, "grad_norm": 0.4084070026874542, "learning_rate": 8.95370511711706e-06, "loss": 0.1654, "step": 42029 }, { "epoch": 0.7496521956265829, "grad_norm": 0.23909322917461395, "learning_rate": 8.952511573510763e-06, "loss": 0.17, "step": 42030 }, { "epoch": 0.7496700317482966, "grad_norm": 0.26647239923477173, "learning_rate": 8.951318092110064e-06, "loss": 0.1123, "step": 42031 }, { "epoch": 0.7496878678700103, "grad_norm": 0.215842142701149, "learning_rate": 8.95012467291961e-06, "loss": 0.0725, "step": 42032 }, { "epoch": 0.7497057039917241, "grad_norm": 0.2804771959781647, "learning_rate": 8.948931315944014e-06, "loss": 0.143, "step": 42033 }, { "epoch": 0.7497235401134378, "grad_norm": 0.2761063873767853, "learning_rate": 8.947738021187907e-06, "loss": 0.117, "step": 42034 }, { "epoch": 0.7497413762351515, "grad_norm": 0.22390833497047424, "learning_rate": 8.946544788655901e-06, "loss": 0.1102, "step": 42035 }, { "epoch": 0.7497592123568652, "grad_norm": 0.29880204796791077, "learning_rate": 8.94535161835264e-06, "loss": 0.0978, "step": 42036 }, { "epoch": 0.7497770484785788, "grad_norm": 0.3341601490974426, "learning_rate": 8.944158510282744e-06, "loss": 0.1571, "step": 42037 }, { "epoch": 0.7497948846002925, "grad_norm": 0.3323186933994293, "learning_rate": 8.942965464450825e-06, "loss": 0.1236, "step": 42038 }, { "epoch": 0.7498127207220062, "grad_norm": 0.2128514051437378, "learning_rate": 8.941772480861527e-06, "loss": 0.1001, "step": 42039 }, { "epoch": 0.7498305568437199, "grad_norm": 0.2780879735946655, "learning_rate": 8.940579559519454e-06, "loss": 0.1586, "step": 42040 }, { "epoch": 0.7498483929654336, "grad_norm": 0.22825388610363007, "learning_rate": 8.939386700429253e-06, "loss": 0.0853, "step": 42041 }, { "epoch": 0.7498662290871473, "grad_norm": 0.29734793305397034, "learning_rate": 8.938193903595535e-06, "loss": 0.076, "step": 42042 }, { "epoch": 0.749884065208861, "grad_norm": 0.3439893126487732, "learning_rate": 8.937001169022925e-06, "loss": 0.106, "step": 42043 }, { "epoch": 0.7499019013305747, "grad_norm": 0.22359788417816162, "learning_rate": 8.935808496716038e-06, "loss": 0.0617, "step": 42044 }, { "epoch": 0.7499197374522883, "grad_norm": 0.25676581263542175, "learning_rate": 8.934615886679515e-06, "loss": 0.0967, "step": 42045 }, { "epoch": 0.749937573574002, "grad_norm": 0.3949834704399109, "learning_rate": 8.933423338917968e-06, "loss": 0.1543, "step": 42046 }, { "epoch": 0.7499554096957157, "grad_norm": 0.28987938165664673, "learning_rate": 8.932230853436021e-06, "loss": 0.1097, "step": 42047 }, { "epoch": 0.7499732458174294, "grad_norm": 0.30169570446014404, "learning_rate": 8.931038430238292e-06, "loss": 0.1293, "step": 42048 }, { "epoch": 0.7499910819391431, "grad_norm": 0.2319132685661316, "learning_rate": 8.929846069329411e-06, "loss": 0.1445, "step": 42049 }, { "epoch": 0.7500089180608569, "grad_norm": 0.2882557213306427, "learning_rate": 8.928653770714001e-06, "loss": 0.1171, "step": 42050 }, { "epoch": 0.7500267541825706, "grad_norm": 0.2730274796485901, "learning_rate": 8.927461534396672e-06, "loss": 0.1053, "step": 42051 }, { "epoch": 0.7500445903042843, "grad_norm": 0.2675424814224243, "learning_rate": 8.926269360382061e-06, "loss": 0.178, "step": 42052 }, { "epoch": 0.750062426425998, "grad_norm": 0.3200012445449829, "learning_rate": 8.925077248674771e-06, "loss": 0.1008, "step": 42053 }, { "epoch": 0.7500802625477117, "grad_norm": 0.268690288066864, "learning_rate": 8.923885199279444e-06, "loss": 0.0805, "step": 42054 }, { "epoch": 0.7500980986694253, "grad_norm": 0.2666533887386322, "learning_rate": 8.922693212200692e-06, "loss": 0.1516, "step": 42055 }, { "epoch": 0.750115934791139, "grad_norm": 0.2992364168167114, "learning_rate": 8.921501287443131e-06, "loss": 0.1185, "step": 42056 }, { "epoch": 0.7501337709128527, "grad_norm": 0.24742060899734497, "learning_rate": 8.920309425011378e-06, "loss": 0.0851, "step": 42057 }, { "epoch": 0.7501516070345664, "grad_norm": 0.4028169512748718, "learning_rate": 8.919117624910067e-06, "loss": 0.1113, "step": 42058 }, { "epoch": 0.7501694431562801, "grad_norm": 0.2818405330181122, "learning_rate": 8.917925887143811e-06, "loss": 0.1269, "step": 42059 }, { "epoch": 0.7501872792779938, "grad_norm": 0.21878543496131897, "learning_rate": 8.916734211717225e-06, "loss": 0.1149, "step": 42060 }, { "epoch": 0.7502051153997075, "grad_norm": 0.3269925117492676, "learning_rate": 8.91554259863493e-06, "loss": 0.1095, "step": 42061 }, { "epoch": 0.7502229515214212, "grad_norm": 0.22564412653446198, "learning_rate": 8.914351047901551e-06, "loss": 0.074, "step": 42062 }, { "epoch": 0.7502407876431348, "grad_norm": 0.25140830874443054, "learning_rate": 8.913159559521705e-06, "loss": 0.1103, "step": 42063 }, { "epoch": 0.7502586237648485, "grad_norm": 0.28874045610427856, "learning_rate": 8.91196813350001e-06, "loss": 0.1067, "step": 42064 }, { "epoch": 0.7502764598865622, "grad_norm": 0.3521924912929535, "learning_rate": 8.910776769841073e-06, "loss": 0.1517, "step": 42065 }, { "epoch": 0.7502942960082759, "grad_norm": 0.25434020161628723, "learning_rate": 8.90958546854953e-06, "loss": 0.0957, "step": 42066 }, { "epoch": 0.7503121321299897, "grad_norm": 0.33381780982017517, "learning_rate": 8.908394229629983e-06, "loss": 0.1397, "step": 42067 }, { "epoch": 0.7503299682517034, "grad_norm": 0.1818884164094925, "learning_rate": 8.907203053087068e-06, "loss": 0.0949, "step": 42068 }, { "epoch": 0.7503478043734171, "grad_norm": 0.25334861874580383, "learning_rate": 8.906011938925391e-06, "loss": 0.1604, "step": 42069 }, { "epoch": 0.7503656404951308, "grad_norm": 0.29042163491249084, "learning_rate": 8.904820887149562e-06, "loss": 0.1299, "step": 42070 }, { "epoch": 0.7503834766168445, "grad_norm": 0.25951075553894043, "learning_rate": 8.903629897764215e-06, "loss": 0.1354, "step": 42071 }, { "epoch": 0.7504013127385581, "grad_norm": 0.30543404817581177, "learning_rate": 8.902438970773958e-06, "loss": 0.136, "step": 42072 }, { "epoch": 0.7504191488602718, "grad_norm": 0.26674771308898926, "learning_rate": 8.901248106183408e-06, "loss": 0.118, "step": 42073 }, { "epoch": 0.7504369849819855, "grad_norm": 0.2996059060096741, "learning_rate": 8.900057303997169e-06, "loss": 0.1178, "step": 42074 }, { "epoch": 0.7504548211036992, "grad_norm": 0.21766085922718048, "learning_rate": 8.898866564219882e-06, "loss": 0.0646, "step": 42075 }, { "epoch": 0.7504726572254129, "grad_norm": 0.2722904086112976, "learning_rate": 8.897675886856147e-06, "loss": 0.1126, "step": 42076 }, { "epoch": 0.7504904933471266, "grad_norm": 0.26624128222465515, "learning_rate": 8.896485271910582e-06, "loss": 0.1117, "step": 42077 }, { "epoch": 0.7505083294688403, "grad_norm": 0.3210342228412628, "learning_rate": 8.895294719387792e-06, "loss": 0.0903, "step": 42078 }, { "epoch": 0.750526165590554, "grad_norm": 0.3359255790710449, "learning_rate": 8.894104229292413e-06, "loss": 0.1103, "step": 42079 }, { "epoch": 0.7505440017122676, "grad_norm": 0.33223870396614075, "learning_rate": 8.892913801629038e-06, "loss": 0.1095, "step": 42080 }, { "epoch": 0.7505618378339813, "grad_norm": 0.30413511395454407, "learning_rate": 8.891723436402302e-06, "loss": 0.0894, "step": 42081 }, { "epoch": 0.750579673955695, "grad_norm": 0.31320711970329285, "learning_rate": 8.89053313361681e-06, "loss": 0.1239, "step": 42082 }, { "epoch": 0.7505975100774087, "grad_norm": 0.252905011177063, "learning_rate": 8.889342893277166e-06, "loss": 0.0763, "step": 42083 }, { "epoch": 0.7506153461991225, "grad_norm": 0.24671049416065216, "learning_rate": 8.888152715388004e-06, "loss": 0.1599, "step": 42084 }, { "epoch": 0.7506331823208362, "grad_norm": 0.3065972328186035, "learning_rate": 8.886962599953927e-06, "loss": 0.1246, "step": 42085 }, { "epoch": 0.7506510184425499, "grad_norm": 0.32748398184776306, "learning_rate": 8.885772546979546e-06, "loss": 0.1076, "step": 42086 }, { "epoch": 0.7506688545642636, "grad_norm": 0.26008275151252747, "learning_rate": 8.884582556469467e-06, "loss": 0.0823, "step": 42087 }, { "epoch": 0.7506866906859773, "grad_norm": 0.2635764479637146, "learning_rate": 8.883392628428325e-06, "loss": 0.129, "step": 42088 }, { "epoch": 0.750704526807691, "grad_norm": 0.33368223905563354, "learning_rate": 8.882202762860717e-06, "loss": 0.1734, "step": 42089 }, { "epoch": 0.7507223629294046, "grad_norm": 0.23162329196929932, "learning_rate": 8.881012959771257e-06, "loss": 0.0922, "step": 42090 }, { "epoch": 0.7507401990511183, "grad_norm": 0.34869635105133057, "learning_rate": 8.879823219164551e-06, "loss": 0.1277, "step": 42091 }, { "epoch": 0.750758035172832, "grad_norm": 0.3413008749485016, "learning_rate": 8.878633541045226e-06, "loss": 0.1309, "step": 42092 }, { "epoch": 0.7507758712945457, "grad_norm": 0.20450599491596222, "learning_rate": 8.877443925417886e-06, "loss": 0.1234, "step": 42093 }, { "epoch": 0.7507937074162594, "grad_norm": 0.22577449679374695, "learning_rate": 8.876254372287132e-06, "loss": 0.0909, "step": 42094 }, { "epoch": 0.7508115435379731, "grad_norm": 0.33587026596069336, "learning_rate": 8.875064881657593e-06, "loss": 0.1207, "step": 42095 }, { "epoch": 0.7508293796596868, "grad_norm": 0.2645242512226105, "learning_rate": 8.873875453533868e-06, "loss": 0.1638, "step": 42096 }, { "epoch": 0.7508472157814005, "grad_norm": 0.21473512053489685, "learning_rate": 8.872686087920574e-06, "loss": 0.1181, "step": 42097 }, { "epoch": 0.7508650519031141, "grad_norm": 0.24860872328281403, "learning_rate": 8.871496784822323e-06, "loss": 0.1196, "step": 42098 }, { "epoch": 0.7508828880248278, "grad_norm": 0.29707661271095276, "learning_rate": 8.87030754424372e-06, "loss": 0.1088, "step": 42099 }, { "epoch": 0.7509007241465415, "grad_norm": 0.2295612394809723, "learning_rate": 8.869118366189365e-06, "loss": 0.1054, "step": 42100 }, { "epoch": 0.7509185602682553, "grad_norm": 0.23430395126342773, "learning_rate": 8.86792925066389e-06, "loss": 0.141, "step": 42101 }, { "epoch": 0.750936396389969, "grad_norm": 0.22908875346183777, "learning_rate": 8.866740197671895e-06, "loss": 0.1395, "step": 42102 }, { "epoch": 0.7509542325116827, "grad_norm": 0.26498180627822876, "learning_rate": 8.86555120721798e-06, "loss": 0.1307, "step": 42103 }, { "epoch": 0.7509720686333964, "grad_norm": 0.2420375645160675, "learning_rate": 8.86436227930676e-06, "loss": 0.1279, "step": 42104 }, { "epoch": 0.7509899047551101, "grad_norm": 0.26881036162376404, "learning_rate": 8.863173413942851e-06, "loss": 0.1142, "step": 42105 }, { "epoch": 0.7510077408768238, "grad_norm": 0.26392707228660583, "learning_rate": 8.861984611130855e-06, "loss": 0.1785, "step": 42106 }, { "epoch": 0.7510255769985374, "grad_norm": 0.2737892270088196, "learning_rate": 8.86079587087537e-06, "loss": 0.1179, "step": 42107 }, { "epoch": 0.7510434131202511, "grad_norm": 0.30394309759140015, "learning_rate": 8.859607193181027e-06, "loss": 0.1558, "step": 42108 }, { "epoch": 0.7510612492419648, "grad_norm": 0.26812347769737244, "learning_rate": 8.858418578052411e-06, "loss": 0.0888, "step": 42109 }, { "epoch": 0.7510790853636785, "grad_norm": 0.2923065423965454, "learning_rate": 8.857230025494148e-06, "loss": 0.1145, "step": 42110 }, { "epoch": 0.7510969214853922, "grad_norm": 0.22205254435539246, "learning_rate": 8.856041535510836e-06, "loss": 0.0985, "step": 42111 }, { "epoch": 0.7511147576071059, "grad_norm": 0.27145302295684814, "learning_rate": 8.854853108107086e-06, "loss": 0.1168, "step": 42112 }, { "epoch": 0.7511325937288196, "grad_norm": 0.253492146730423, "learning_rate": 8.85366474328749e-06, "loss": 0.0825, "step": 42113 }, { "epoch": 0.7511504298505333, "grad_norm": 0.4730740487575531, "learning_rate": 8.852476441056676e-06, "loss": 0.1307, "step": 42114 }, { "epoch": 0.751168265972247, "grad_norm": 0.2881898283958435, "learning_rate": 8.85128820141924e-06, "loss": 0.0702, "step": 42115 }, { "epoch": 0.7511861020939606, "grad_norm": 0.23260222375392914, "learning_rate": 8.85010002437979e-06, "loss": 0.1007, "step": 42116 }, { "epoch": 0.7512039382156744, "grad_norm": 0.270072340965271, "learning_rate": 8.84891190994292e-06, "loss": 0.1116, "step": 42117 }, { "epoch": 0.7512217743373881, "grad_norm": 0.1961875855922699, "learning_rate": 8.847723858113254e-06, "loss": 0.0898, "step": 42118 }, { "epoch": 0.7512396104591018, "grad_norm": 0.16710162162780762, "learning_rate": 8.846535868895393e-06, "loss": 0.0852, "step": 42119 }, { "epoch": 0.7512574465808155, "grad_norm": 0.2354332059621811, "learning_rate": 8.845347942293933e-06, "loss": 0.1355, "step": 42120 }, { "epoch": 0.7512752827025292, "grad_norm": 0.4211379885673523, "learning_rate": 8.84416007831348e-06, "loss": 0.1945, "step": 42121 }, { "epoch": 0.7512931188242429, "grad_norm": 0.32310420274734497, "learning_rate": 8.842972276958639e-06, "loss": 0.1287, "step": 42122 }, { "epoch": 0.7513109549459566, "grad_norm": 0.19457566738128662, "learning_rate": 8.841784538234027e-06, "loss": 0.0848, "step": 42123 }, { "epoch": 0.7513287910676703, "grad_norm": 0.2943336069583893, "learning_rate": 8.84059686214424e-06, "loss": 0.1724, "step": 42124 }, { "epoch": 0.7513466271893839, "grad_norm": 0.33193403482437134, "learning_rate": 8.839409248693881e-06, "loss": 0.1676, "step": 42125 }, { "epoch": 0.7513644633110976, "grad_norm": 0.2646985352039337, "learning_rate": 8.838221697887544e-06, "loss": 0.1879, "step": 42126 }, { "epoch": 0.7513822994328113, "grad_norm": 0.26498857140541077, "learning_rate": 8.837034209729852e-06, "loss": 0.1052, "step": 42127 }, { "epoch": 0.751400135554525, "grad_norm": 0.2948433756828308, "learning_rate": 8.835846784225398e-06, "loss": 0.1756, "step": 42128 }, { "epoch": 0.7514179716762387, "grad_norm": 0.23665866255760193, "learning_rate": 8.834659421378783e-06, "loss": 0.0923, "step": 42129 }, { "epoch": 0.7514358077979524, "grad_norm": 0.24343526363372803, "learning_rate": 8.833472121194602e-06, "loss": 0.0862, "step": 42130 }, { "epoch": 0.7514536439196661, "grad_norm": 0.23014704883098602, "learning_rate": 8.832284883677478e-06, "loss": 0.0784, "step": 42131 }, { "epoch": 0.7514714800413798, "grad_norm": 0.27050164341926575, "learning_rate": 8.831097708832001e-06, "loss": 0.1361, "step": 42132 }, { "epoch": 0.7514893161630934, "grad_norm": 0.25127875804901123, "learning_rate": 8.829910596662773e-06, "loss": 0.1104, "step": 42133 }, { "epoch": 0.7515071522848072, "grad_norm": 0.3064620792865753, "learning_rate": 8.828723547174389e-06, "loss": 0.1409, "step": 42134 }, { "epoch": 0.7515249884065209, "grad_norm": 0.26769953966140747, "learning_rate": 8.827536560371467e-06, "loss": 0.0876, "step": 42135 }, { "epoch": 0.7515428245282346, "grad_norm": 0.20214343070983887, "learning_rate": 8.826349636258591e-06, "loss": 0.1015, "step": 42136 }, { "epoch": 0.7515606606499483, "grad_norm": 0.2769550681114197, "learning_rate": 8.825162774840376e-06, "loss": 0.119, "step": 42137 }, { "epoch": 0.751578496771662, "grad_norm": 0.3347737193107605, "learning_rate": 8.823975976121418e-06, "loss": 0.143, "step": 42138 }, { "epoch": 0.7515963328933757, "grad_norm": 0.2858056426048279, "learning_rate": 8.822789240106308e-06, "loss": 0.163, "step": 42139 }, { "epoch": 0.7516141690150894, "grad_norm": 0.2721172273159027, "learning_rate": 8.821602566799662e-06, "loss": 0.1657, "step": 42140 }, { "epoch": 0.7516320051368031, "grad_norm": 0.20643888413906097, "learning_rate": 8.82041595620607e-06, "loss": 0.1161, "step": 42141 }, { "epoch": 0.7516498412585167, "grad_norm": 0.282868355512619, "learning_rate": 8.819229408330138e-06, "loss": 0.131, "step": 42142 }, { "epoch": 0.7516676773802304, "grad_norm": 0.6819544434547424, "learning_rate": 8.818042923176453e-06, "loss": 0.1338, "step": 42143 }, { "epoch": 0.7516855135019441, "grad_norm": 0.25793537497520447, "learning_rate": 8.816856500749629e-06, "loss": 0.1096, "step": 42144 }, { "epoch": 0.7517033496236578, "grad_norm": 0.3843628466129303, "learning_rate": 8.815670141054261e-06, "loss": 0.1021, "step": 42145 }, { "epoch": 0.7517211857453715, "grad_norm": 0.3574337661266327, "learning_rate": 8.814483844094943e-06, "loss": 0.1316, "step": 42146 }, { "epoch": 0.7517390218670852, "grad_norm": 0.22077126801013947, "learning_rate": 8.813297609876268e-06, "loss": 0.1019, "step": 42147 }, { "epoch": 0.7517568579887989, "grad_norm": 0.2697865664958954, "learning_rate": 8.81211143840285e-06, "loss": 0.097, "step": 42148 }, { "epoch": 0.7517746941105126, "grad_norm": 0.2687152922153473, "learning_rate": 8.810925329679273e-06, "loss": 0.1311, "step": 42149 }, { "epoch": 0.7517925302322263, "grad_norm": 0.36930057406425476, "learning_rate": 8.809739283710146e-06, "loss": 0.1158, "step": 42150 }, { "epoch": 0.75181036635394, "grad_norm": 0.32879677414894104, "learning_rate": 8.808553300500066e-06, "loss": 0.1233, "step": 42151 }, { "epoch": 0.7518282024756537, "grad_norm": 0.29432424902915955, "learning_rate": 8.807367380053613e-06, "loss": 0.1199, "step": 42152 }, { "epoch": 0.7518460385973674, "grad_norm": 0.43963679671287537, "learning_rate": 8.806181522375409e-06, "loss": 0.1315, "step": 42153 }, { "epoch": 0.7518638747190811, "grad_norm": 0.25721198320388794, "learning_rate": 8.804995727470036e-06, "loss": 0.0829, "step": 42154 }, { "epoch": 0.7518817108407948, "grad_norm": 0.24039192497730255, "learning_rate": 8.803809995342094e-06, "loss": 0.1069, "step": 42155 }, { "epoch": 0.7518995469625085, "grad_norm": 0.24511951208114624, "learning_rate": 8.80262432599617e-06, "loss": 0.1143, "step": 42156 }, { "epoch": 0.7519173830842222, "grad_norm": 0.2990073561668396, "learning_rate": 8.801438719436877e-06, "loss": 0.1181, "step": 42157 }, { "epoch": 0.7519352192059359, "grad_norm": 0.26523932814598083, "learning_rate": 8.800253175668801e-06, "loss": 0.1305, "step": 42158 }, { "epoch": 0.7519530553276496, "grad_norm": 0.25667643547058105, "learning_rate": 8.799067694696542e-06, "loss": 0.1472, "step": 42159 }, { "epoch": 0.7519708914493632, "grad_norm": 0.2096024602651596, "learning_rate": 8.79788227652468e-06, "loss": 0.1276, "step": 42160 }, { "epoch": 0.7519887275710769, "grad_norm": 0.30646803975105286, "learning_rate": 8.796696921157833e-06, "loss": 0.1345, "step": 42161 }, { "epoch": 0.7520065636927906, "grad_norm": 0.2282247394323349, "learning_rate": 8.795511628600583e-06, "loss": 0.0936, "step": 42162 }, { "epoch": 0.7520243998145043, "grad_norm": 0.344303160905838, "learning_rate": 8.794326398857523e-06, "loss": 0.1441, "step": 42163 }, { "epoch": 0.752042235936218, "grad_norm": 0.19948624074459076, "learning_rate": 8.793141231933255e-06, "loss": 0.076, "step": 42164 }, { "epoch": 0.7520600720579317, "grad_norm": 0.2920467257499695, "learning_rate": 8.791956127832362e-06, "loss": 0.1394, "step": 42165 }, { "epoch": 0.7520779081796454, "grad_norm": 0.339828759431839, "learning_rate": 8.790771086559455e-06, "loss": 0.1638, "step": 42166 }, { "epoch": 0.7520957443013591, "grad_norm": 0.22915126383304596, "learning_rate": 8.789586108119115e-06, "loss": 0.0778, "step": 42167 }, { "epoch": 0.7521135804230729, "grad_norm": 0.2713801860809326, "learning_rate": 8.78840119251594e-06, "loss": 0.1187, "step": 42168 }, { "epoch": 0.7521314165447865, "grad_norm": 0.2872539460659027, "learning_rate": 8.787216339754514e-06, "loss": 0.1087, "step": 42169 }, { "epoch": 0.7521492526665002, "grad_norm": 0.2936089038848877, "learning_rate": 8.786031549839447e-06, "loss": 0.155, "step": 42170 }, { "epoch": 0.7521670887882139, "grad_norm": 0.33259886503219604, "learning_rate": 8.784846822775316e-06, "loss": 0.1183, "step": 42171 }, { "epoch": 0.7521849249099276, "grad_norm": 0.23290354013442993, "learning_rate": 8.783662158566724e-06, "loss": 0.1004, "step": 42172 }, { "epoch": 0.7522027610316413, "grad_norm": 0.20739133656024933, "learning_rate": 8.782477557218249e-06, "loss": 0.0837, "step": 42173 }, { "epoch": 0.752220597153355, "grad_norm": 0.3643436133861542, "learning_rate": 8.7812930187345e-06, "loss": 0.1055, "step": 42174 }, { "epoch": 0.7522384332750687, "grad_norm": 0.27334144711494446, "learning_rate": 8.780108543120061e-06, "loss": 0.0968, "step": 42175 }, { "epoch": 0.7522562693967824, "grad_norm": 0.24182043969631195, "learning_rate": 8.778924130379523e-06, "loss": 0.0909, "step": 42176 }, { "epoch": 0.752274105518496, "grad_norm": 0.29700538516044617, "learning_rate": 8.777739780517472e-06, "loss": 0.1117, "step": 42177 }, { "epoch": 0.7522919416402097, "grad_norm": 0.26775923371315, "learning_rate": 8.776555493538502e-06, "loss": 0.1028, "step": 42178 }, { "epoch": 0.7523097777619234, "grad_norm": 0.2090633064508438, "learning_rate": 8.775371269447213e-06, "loss": 0.1015, "step": 42179 }, { "epoch": 0.7523276138836371, "grad_norm": 0.2052198201417923, "learning_rate": 8.77418710824819e-06, "loss": 0.1269, "step": 42180 }, { "epoch": 0.7523454500053508, "grad_norm": 0.2673207223415375, "learning_rate": 8.773003009946026e-06, "loss": 0.1354, "step": 42181 }, { "epoch": 0.7523632861270645, "grad_norm": 0.25561168789863586, "learning_rate": 8.771818974545296e-06, "loss": 0.1073, "step": 42182 }, { "epoch": 0.7523811222487782, "grad_norm": 0.3096112310886383, "learning_rate": 8.770635002050609e-06, "loss": 0.1189, "step": 42183 }, { "epoch": 0.7523989583704919, "grad_norm": 0.27430427074432373, "learning_rate": 8.769451092466544e-06, "loss": 0.1025, "step": 42184 }, { "epoch": 0.7524167944922057, "grad_norm": 0.3142137825489044, "learning_rate": 8.768267245797696e-06, "loss": 0.1188, "step": 42185 }, { "epoch": 0.7524346306139194, "grad_norm": 0.28176233172416687, "learning_rate": 8.767083462048639e-06, "loss": 0.1374, "step": 42186 }, { "epoch": 0.752452466735633, "grad_norm": 0.39823800325393677, "learning_rate": 8.765899741223983e-06, "loss": 0.1141, "step": 42187 }, { "epoch": 0.7524703028573467, "grad_norm": 0.2888416647911072, "learning_rate": 8.764716083328306e-06, "loss": 0.1112, "step": 42188 }, { "epoch": 0.7524881389790604, "grad_norm": 0.25727975368499756, "learning_rate": 8.763532488366196e-06, "loss": 0.1166, "step": 42189 }, { "epoch": 0.7525059751007741, "grad_norm": 0.2711246609687805, "learning_rate": 8.762348956342236e-06, "loss": 0.1561, "step": 42190 }, { "epoch": 0.7525238112224878, "grad_norm": 0.2138831615447998, "learning_rate": 8.761165487261028e-06, "loss": 0.0666, "step": 42191 }, { "epoch": 0.7525416473442015, "grad_norm": 0.2878745496273041, "learning_rate": 8.759982081127142e-06, "loss": 0.1554, "step": 42192 }, { "epoch": 0.7525594834659152, "grad_norm": 0.30273404717445374, "learning_rate": 8.758798737945184e-06, "loss": 0.102, "step": 42193 }, { "epoch": 0.7525773195876289, "grad_norm": 0.4103369116783142, "learning_rate": 8.757615457719732e-06, "loss": 0.1088, "step": 42194 }, { "epoch": 0.7525951557093425, "grad_norm": 0.2743113040924072, "learning_rate": 8.756432240455361e-06, "loss": 0.0959, "step": 42195 }, { "epoch": 0.7526129918310562, "grad_norm": 0.2367207407951355, "learning_rate": 8.755249086156677e-06, "loss": 0.0994, "step": 42196 }, { "epoch": 0.7526308279527699, "grad_norm": 0.23079419136047363, "learning_rate": 8.754065994828261e-06, "loss": 0.1263, "step": 42197 }, { "epoch": 0.7526486640744836, "grad_norm": 0.30911049246788025, "learning_rate": 8.752882966474696e-06, "loss": 0.1336, "step": 42198 }, { "epoch": 0.7526665001961973, "grad_norm": 0.22299621999263763, "learning_rate": 8.75170000110056e-06, "loss": 0.1152, "step": 42199 }, { "epoch": 0.752684336317911, "grad_norm": 0.29472866654396057, "learning_rate": 8.750517098710457e-06, "loss": 0.1062, "step": 42200 }, { "epoch": 0.7527021724396247, "grad_norm": 0.3956160843372345, "learning_rate": 8.749334259308958e-06, "loss": 0.1543, "step": 42201 }, { "epoch": 0.7527200085613385, "grad_norm": 0.2704830467700958, "learning_rate": 8.748151482900654e-06, "loss": 0.135, "step": 42202 }, { "epoch": 0.7527378446830522, "grad_norm": 0.4271145761013031, "learning_rate": 8.746968769490127e-06, "loss": 0.0818, "step": 42203 }, { "epoch": 0.7527556808047658, "grad_norm": 0.2855283319950104, "learning_rate": 8.745786119081955e-06, "loss": 0.2005, "step": 42204 }, { "epoch": 0.7527735169264795, "grad_norm": 0.26800844073295593, "learning_rate": 8.744603531680732e-06, "loss": 0.1568, "step": 42205 }, { "epoch": 0.7527913530481932, "grad_norm": 0.28565552830696106, "learning_rate": 8.743421007291047e-06, "loss": 0.1197, "step": 42206 }, { "epoch": 0.7528091891699069, "grad_norm": 0.2677529454231262, "learning_rate": 8.742238545917478e-06, "loss": 0.177, "step": 42207 }, { "epoch": 0.7528270252916206, "grad_norm": 0.28748244047164917, "learning_rate": 8.741056147564596e-06, "loss": 0.1066, "step": 42208 }, { "epoch": 0.7528448614133343, "grad_norm": 0.2905385494232178, "learning_rate": 8.739873812237007e-06, "loss": 0.106, "step": 42209 }, { "epoch": 0.752862697535048, "grad_norm": 0.2593698799610138, "learning_rate": 8.738691539939284e-06, "loss": 0.1219, "step": 42210 }, { "epoch": 0.7528805336567617, "grad_norm": 0.26137372851371765, "learning_rate": 8.737509330676008e-06, "loss": 0.0955, "step": 42211 }, { "epoch": 0.7528983697784754, "grad_norm": 0.26841309666633606, "learning_rate": 8.736327184451753e-06, "loss": 0.0838, "step": 42212 }, { "epoch": 0.752916205900189, "grad_norm": 0.23604892194271088, "learning_rate": 8.735145101271122e-06, "loss": 0.0847, "step": 42213 }, { "epoch": 0.7529340420219027, "grad_norm": 0.2799234092235565, "learning_rate": 8.733963081138686e-06, "loss": 0.1298, "step": 42214 }, { "epoch": 0.7529518781436164, "grad_norm": 0.18916910886764526, "learning_rate": 8.732781124059026e-06, "loss": 0.0911, "step": 42215 }, { "epoch": 0.7529697142653301, "grad_norm": 0.29734471440315247, "learning_rate": 8.731599230036725e-06, "loss": 0.1386, "step": 42216 }, { "epoch": 0.7529875503870438, "grad_norm": 0.4562632739543915, "learning_rate": 8.730417399076355e-06, "loss": 0.0927, "step": 42217 }, { "epoch": 0.7530053865087576, "grad_norm": 0.221034973859787, "learning_rate": 8.729235631182517e-06, "loss": 0.1019, "step": 42218 }, { "epoch": 0.7530232226304713, "grad_norm": 0.4050613343715668, "learning_rate": 8.72805392635977e-06, "loss": 0.1114, "step": 42219 }, { "epoch": 0.753041058752185, "grad_norm": 0.25245755910873413, "learning_rate": 8.726872284612716e-06, "loss": 0.1214, "step": 42220 }, { "epoch": 0.7530588948738987, "grad_norm": 0.28109344840049744, "learning_rate": 8.725690705945918e-06, "loss": 0.1019, "step": 42221 }, { "epoch": 0.7530767309956123, "grad_norm": 0.2757278084754944, "learning_rate": 8.724509190363972e-06, "loss": 0.1264, "step": 42222 }, { "epoch": 0.753094567117326, "grad_norm": 0.30457839369773865, "learning_rate": 8.723327737871451e-06, "loss": 0.1388, "step": 42223 }, { "epoch": 0.7531124032390397, "grad_norm": 0.33599331974983215, "learning_rate": 8.722146348472932e-06, "loss": 0.1299, "step": 42224 }, { "epoch": 0.7531302393607534, "grad_norm": 0.4407554268836975, "learning_rate": 8.720965022172986e-06, "loss": 0.1173, "step": 42225 }, { "epoch": 0.7531480754824671, "grad_norm": 0.23061370849609375, "learning_rate": 8.719783758976213e-06, "loss": 0.0757, "step": 42226 }, { "epoch": 0.7531659116041808, "grad_norm": 0.2715109586715698, "learning_rate": 8.718602558887182e-06, "loss": 0.1052, "step": 42227 }, { "epoch": 0.7531837477258945, "grad_norm": 0.22626429796218872, "learning_rate": 8.717421421910468e-06, "loss": 0.1304, "step": 42228 }, { "epoch": 0.7532015838476082, "grad_norm": 0.2100069224834442, "learning_rate": 8.716240348050653e-06, "loss": 0.1041, "step": 42229 }, { "epoch": 0.7532194199693218, "grad_norm": 0.3300541341304779, "learning_rate": 8.715059337312306e-06, "loss": 0.1228, "step": 42230 }, { "epoch": 0.7532372560910355, "grad_norm": 0.2597181797027588, "learning_rate": 8.713878389700026e-06, "loss": 0.1548, "step": 42231 }, { "epoch": 0.7532550922127492, "grad_norm": 0.24442999064922333, "learning_rate": 8.712697505218365e-06, "loss": 0.0681, "step": 42232 }, { "epoch": 0.7532729283344629, "grad_norm": 0.25236642360687256, "learning_rate": 8.711516683871923e-06, "loss": 0.1278, "step": 42233 }, { "epoch": 0.7532907644561766, "grad_norm": 0.25417017936706543, "learning_rate": 8.710335925665261e-06, "loss": 0.1076, "step": 42234 }, { "epoch": 0.7533086005778904, "grad_norm": 0.22272507846355438, "learning_rate": 8.709155230602971e-06, "loss": 0.1212, "step": 42235 }, { "epoch": 0.7533264366996041, "grad_norm": 0.3872177004814148, "learning_rate": 8.707974598689625e-06, "loss": 0.1627, "step": 42236 }, { "epoch": 0.7533442728213178, "grad_norm": 0.23775237798690796, "learning_rate": 8.706794029929794e-06, "loss": 0.1405, "step": 42237 }, { "epoch": 0.7533621089430315, "grad_norm": 0.28898173570632935, "learning_rate": 8.705613524328049e-06, "loss": 0.1716, "step": 42238 }, { "epoch": 0.7533799450647451, "grad_norm": 0.35293468832969666, "learning_rate": 8.704433081888983e-06, "loss": 0.101, "step": 42239 }, { "epoch": 0.7533977811864588, "grad_norm": 0.2647443413734436, "learning_rate": 8.703252702617159e-06, "loss": 0.13, "step": 42240 }, { "epoch": 0.7534156173081725, "grad_norm": 0.24611803889274597, "learning_rate": 8.70207238651716e-06, "loss": 0.0993, "step": 42241 }, { "epoch": 0.7534334534298862, "grad_norm": 0.35614416003227234, "learning_rate": 8.700892133593555e-06, "loss": 0.0823, "step": 42242 }, { "epoch": 0.7534512895515999, "grad_norm": 0.23924636840820312, "learning_rate": 8.699711943850916e-06, "loss": 0.1449, "step": 42243 }, { "epoch": 0.7534691256733136, "grad_norm": 0.32560649514198303, "learning_rate": 8.698531817293832e-06, "loss": 0.1317, "step": 42244 }, { "epoch": 0.7534869617950273, "grad_norm": 0.263988733291626, "learning_rate": 8.697351753926866e-06, "loss": 0.1172, "step": 42245 }, { "epoch": 0.753504797916741, "grad_norm": 0.21988803148269653, "learning_rate": 8.696171753754586e-06, "loss": 0.0874, "step": 42246 }, { "epoch": 0.7535226340384547, "grad_norm": 0.3509487807750702, "learning_rate": 8.694991816781588e-06, "loss": 0.148, "step": 42247 }, { "epoch": 0.7535404701601683, "grad_norm": 0.33444318175315857, "learning_rate": 8.693811943012422e-06, "loss": 0.1505, "step": 42248 }, { "epoch": 0.753558306281882, "grad_norm": 0.3227597773075104, "learning_rate": 8.692632132451683e-06, "loss": 0.1091, "step": 42249 }, { "epoch": 0.7535761424035957, "grad_norm": 0.28614404797554016, "learning_rate": 8.691452385103934e-06, "loss": 0.1143, "step": 42250 }, { "epoch": 0.7535939785253094, "grad_norm": 0.23036304116249084, "learning_rate": 8.690272700973739e-06, "loss": 0.0809, "step": 42251 }, { "epoch": 0.7536118146470232, "grad_norm": 0.31473544239997864, "learning_rate": 8.689093080065691e-06, "loss": 0.1025, "step": 42252 }, { "epoch": 0.7536296507687369, "grad_norm": 0.24815233051776886, "learning_rate": 8.68791352238435e-06, "loss": 0.1349, "step": 42253 }, { "epoch": 0.7536474868904506, "grad_norm": 0.30203378200531006, "learning_rate": 8.68673402793429e-06, "loss": 0.1098, "step": 42254 }, { "epoch": 0.7536653230121643, "grad_norm": 0.1972241997718811, "learning_rate": 8.685554596720083e-06, "loss": 0.1134, "step": 42255 }, { "epoch": 0.753683159133878, "grad_norm": 0.3330599367618561, "learning_rate": 8.684375228746295e-06, "loss": 0.0788, "step": 42256 }, { "epoch": 0.7537009952555916, "grad_norm": 0.3485974073410034, "learning_rate": 8.68319592401751e-06, "loss": 0.1232, "step": 42257 }, { "epoch": 0.7537188313773053, "grad_norm": 0.33124592900276184, "learning_rate": 8.682016682538293e-06, "loss": 0.1677, "step": 42258 }, { "epoch": 0.753736667499019, "grad_norm": 0.2172287553548813, "learning_rate": 8.680837504313208e-06, "loss": 0.0707, "step": 42259 }, { "epoch": 0.7537545036207327, "grad_norm": 0.32111528515815735, "learning_rate": 8.679658389346842e-06, "loss": 0.1809, "step": 42260 }, { "epoch": 0.7537723397424464, "grad_norm": 0.2719787657260895, "learning_rate": 8.678479337643747e-06, "loss": 0.0882, "step": 42261 }, { "epoch": 0.7537901758641601, "grad_norm": 0.2695040702819824, "learning_rate": 8.677300349208513e-06, "loss": 0.1182, "step": 42262 }, { "epoch": 0.7538080119858738, "grad_norm": 0.33145859837532043, "learning_rate": 8.676121424045702e-06, "loss": 0.131, "step": 42263 }, { "epoch": 0.7538258481075875, "grad_norm": 0.29759299755096436, "learning_rate": 8.67494256215987e-06, "loss": 0.0912, "step": 42264 }, { "epoch": 0.7538436842293011, "grad_norm": 0.297317236661911, "learning_rate": 8.673763763555611e-06, "loss": 0.1264, "step": 42265 }, { "epoch": 0.7538615203510148, "grad_norm": 0.30711886286735535, "learning_rate": 8.672585028237481e-06, "loss": 0.1359, "step": 42266 }, { "epoch": 0.7538793564727285, "grad_norm": 0.28112030029296875, "learning_rate": 8.671406356210051e-06, "loss": 0.1334, "step": 42267 }, { "epoch": 0.7538971925944422, "grad_norm": 0.2673642933368683, "learning_rate": 8.670227747477891e-06, "loss": 0.1057, "step": 42268 }, { "epoch": 0.753915028716156, "grad_norm": 0.27309855818748474, "learning_rate": 8.66904920204556e-06, "loss": 0.12, "step": 42269 }, { "epoch": 0.7539328648378697, "grad_norm": 0.17130039632320404, "learning_rate": 8.667870719917642e-06, "loss": 0.0615, "step": 42270 }, { "epoch": 0.7539507009595834, "grad_norm": 0.3982267677783966, "learning_rate": 8.666692301098697e-06, "loss": 0.1334, "step": 42271 }, { "epoch": 0.7539685370812971, "grad_norm": 0.27218371629714966, "learning_rate": 8.665513945593295e-06, "loss": 0.1065, "step": 42272 }, { "epoch": 0.7539863732030108, "grad_norm": 0.27529701590538025, "learning_rate": 8.664335653405995e-06, "loss": 0.1415, "step": 42273 }, { "epoch": 0.7540042093247245, "grad_norm": 0.21380817890167236, "learning_rate": 8.66315742454138e-06, "loss": 0.0602, "step": 42274 }, { "epoch": 0.7540220454464381, "grad_norm": 0.2720341086387634, "learning_rate": 8.661979259004002e-06, "loss": 0.1049, "step": 42275 }, { "epoch": 0.7540398815681518, "grad_norm": 0.34371060132980347, "learning_rate": 8.660801156798443e-06, "loss": 0.1264, "step": 42276 }, { "epoch": 0.7540577176898655, "grad_norm": 0.3421943783760071, "learning_rate": 8.659623117929252e-06, "loss": 0.0994, "step": 42277 }, { "epoch": 0.7540755538115792, "grad_norm": 0.2658706605434418, "learning_rate": 8.658445142401017e-06, "loss": 0.1535, "step": 42278 }, { "epoch": 0.7540933899332929, "grad_norm": 0.32612144947052, "learning_rate": 8.65726723021829e-06, "loss": 0.1846, "step": 42279 }, { "epoch": 0.7541112260550066, "grad_norm": 0.2186860740184784, "learning_rate": 8.656089381385641e-06, "loss": 0.0946, "step": 42280 }, { "epoch": 0.7541290621767203, "grad_norm": 0.2428620457649231, "learning_rate": 8.654911595907635e-06, "loss": 0.1171, "step": 42281 }, { "epoch": 0.754146898298434, "grad_norm": 0.2928742468357086, "learning_rate": 8.653733873788828e-06, "loss": 0.1583, "step": 42282 }, { "epoch": 0.7541647344201476, "grad_norm": 0.27857810258865356, "learning_rate": 8.652556215033802e-06, "loss": 0.1285, "step": 42283 }, { "epoch": 0.7541825705418613, "grad_norm": 0.35628536343574524, "learning_rate": 8.651378619647117e-06, "loss": 0.1517, "step": 42284 }, { "epoch": 0.754200406663575, "grad_norm": 0.2081788182258606, "learning_rate": 8.650201087633334e-06, "loss": 0.0751, "step": 42285 }, { "epoch": 0.7542182427852888, "grad_norm": 0.21348069608211517, "learning_rate": 8.64902361899701e-06, "loss": 0.091, "step": 42286 }, { "epoch": 0.7542360789070025, "grad_norm": 0.2190733253955841, "learning_rate": 8.647846213742724e-06, "loss": 0.1003, "step": 42287 }, { "epoch": 0.7542539150287162, "grad_norm": 0.20201753079891205, "learning_rate": 8.646668871875027e-06, "loss": 0.1445, "step": 42288 }, { "epoch": 0.7542717511504299, "grad_norm": 0.4274791181087494, "learning_rate": 8.6454915933985e-06, "loss": 0.1549, "step": 42289 }, { "epoch": 0.7542895872721436, "grad_norm": 0.2638417184352875, "learning_rate": 8.644314378317685e-06, "loss": 0.0886, "step": 42290 }, { "epoch": 0.7543074233938573, "grad_norm": 0.28225457668304443, "learning_rate": 8.643137226637168e-06, "loss": 0.0802, "step": 42291 }, { "epoch": 0.754325259515571, "grad_norm": 0.33142709732055664, "learning_rate": 8.641960138361499e-06, "loss": 0.1372, "step": 42292 }, { "epoch": 0.7543430956372846, "grad_norm": 0.22591717541217804, "learning_rate": 8.640783113495243e-06, "loss": 0.0581, "step": 42293 }, { "epoch": 0.7543609317589983, "grad_norm": 0.23914481699466705, "learning_rate": 8.639606152042962e-06, "loss": 0.0822, "step": 42294 }, { "epoch": 0.754378767880712, "grad_norm": 0.21988533437252045, "learning_rate": 8.638429254009209e-06, "loss": 0.1294, "step": 42295 }, { "epoch": 0.7543966040024257, "grad_norm": 0.2980956733226776, "learning_rate": 8.637252419398562e-06, "loss": 0.0892, "step": 42296 }, { "epoch": 0.7544144401241394, "grad_norm": 0.3343035876750946, "learning_rate": 8.636075648215577e-06, "loss": 0.1404, "step": 42297 }, { "epoch": 0.7544322762458531, "grad_norm": 0.300118625164032, "learning_rate": 8.634898940464817e-06, "loss": 0.1312, "step": 42298 }, { "epoch": 0.7544501123675668, "grad_norm": 0.29913660883903503, "learning_rate": 8.633722296150832e-06, "loss": 0.1603, "step": 42299 }, { "epoch": 0.7544679484892804, "grad_norm": 0.31691840291023254, "learning_rate": 8.632545715278201e-06, "loss": 0.0997, "step": 42300 }, { "epoch": 0.7544857846109941, "grad_norm": 0.33182668685913086, "learning_rate": 8.631369197851474e-06, "loss": 0.1535, "step": 42301 }, { "epoch": 0.7545036207327078, "grad_norm": 0.23822741210460663, "learning_rate": 8.630192743875207e-06, "loss": 0.1517, "step": 42302 }, { "epoch": 0.7545214568544216, "grad_norm": 0.23876644670963287, "learning_rate": 8.629016353353963e-06, "loss": 0.1021, "step": 42303 }, { "epoch": 0.7545392929761353, "grad_norm": 0.38707587122917175, "learning_rate": 8.62784002629232e-06, "loss": 0.1986, "step": 42304 }, { "epoch": 0.754557129097849, "grad_norm": 0.2862582802772522, "learning_rate": 8.62666376269482e-06, "loss": 0.1255, "step": 42305 }, { "epoch": 0.7545749652195627, "grad_norm": 0.30288851261138916, "learning_rate": 8.625487562566026e-06, "loss": 0.155, "step": 42306 }, { "epoch": 0.7545928013412764, "grad_norm": 0.2549150288105011, "learning_rate": 8.6243114259105e-06, "loss": 0.0928, "step": 42307 }, { "epoch": 0.7546106374629901, "grad_norm": 0.23368658125400543, "learning_rate": 8.62313535273279e-06, "loss": 0.1274, "step": 42308 }, { "epoch": 0.7546284735847038, "grad_norm": 0.23577933013439178, "learning_rate": 8.621959343037472e-06, "loss": 0.1007, "step": 42309 }, { "epoch": 0.7546463097064174, "grad_norm": 0.23457945883274078, "learning_rate": 8.620783396829097e-06, "loss": 0.0829, "step": 42310 }, { "epoch": 0.7546641458281311, "grad_norm": 0.24033674597740173, "learning_rate": 8.619607514112221e-06, "loss": 0.0851, "step": 42311 }, { "epoch": 0.7546819819498448, "grad_norm": 0.2517170011997223, "learning_rate": 8.618431694891397e-06, "loss": 0.1629, "step": 42312 }, { "epoch": 0.7546998180715585, "grad_norm": 0.26215705275535583, "learning_rate": 8.617255939171199e-06, "loss": 0.0941, "step": 42313 }, { "epoch": 0.7547176541932722, "grad_norm": 0.31515082716941833, "learning_rate": 8.616080246956173e-06, "loss": 0.1458, "step": 42314 }, { "epoch": 0.7547354903149859, "grad_norm": 0.32869940996170044, "learning_rate": 8.614904618250872e-06, "loss": 0.1122, "step": 42315 }, { "epoch": 0.7547533264366996, "grad_norm": 0.3740622103214264, "learning_rate": 8.613729053059866e-06, "loss": 0.1294, "step": 42316 }, { "epoch": 0.7547711625584133, "grad_norm": 0.2890958786010742, "learning_rate": 8.6125535513877e-06, "loss": 0.1185, "step": 42317 }, { "epoch": 0.7547889986801269, "grad_norm": 0.2520935833454132, "learning_rate": 8.611378113238944e-06, "loss": 0.0921, "step": 42318 }, { "epoch": 0.7548068348018407, "grad_norm": 0.3312392830848694, "learning_rate": 8.610202738618147e-06, "loss": 0.0869, "step": 42319 }, { "epoch": 0.7548246709235544, "grad_norm": 0.361851304769516, "learning_rate": 8.609027427529864e-06, "loss": 0.1554, "step": 42320 }, { "epoch": 0.7548425070452681, "grad_norm": 0.31280526518821716, "learning_rate": 8.607852179978646e-06, "loss": 0.1122, "step": 42321 }, { "epoch": 0.7548603431669818, "grad_norm": 0.30120766162872314, "learning_rate": 8.606676995969059e-06, "loss": 0.1617, "step": 42322 }, { "epoch": 0.7548781792886955, "grad_norm": 0.29599201679229736, "learning_rate": 8.605501875505658e-06, "loss": 0.1442, "step": 42323 }, { "epoch": 0.7548960154104092, "grad_norm": 0.31141197681427, "learning_rate": 8.604326818592992e-06, "loss": 0.1091, "step": 42324 }, { "epoch": 0.7549138515321229, "grad_norm": 0.17772111296653748, "learning_rate": 8.603151825235612e-06, "loss": 0.1337, "step": 42325 }, { "epoch": 0.7549316876538366, "grad_norm": 0.27165862917900085, "learning_rate": 8.601976895438086e-06, "loss": 0.0647, "step": 42326 }, { "epoch": 0.7549495237755502, "grad_norm": 0.2627803683280945, "learning_rate": 8.60080202920496e-06, "loss": 0.1277, "step": 42327 }, { "epoch": 0.7549673598972639, "grad_norm": 0.2923004925251007, "learning_rate": 8.59962722654079e-06, "loss": 0.1025, "step": 42328 }, { "epoch": 0.7549851960189776, "grad_norm": 0.25430062413215637, "learning_rate": 8.598452487450124e-06, "loss": 0.1583, "step": 42329 }, { "epoch": 0.7550030321406913, "grad_norm": 0.2305556684732437, "learning_rate": 8.597277811937526e-06, "loss": 0.1213, "step": 42330 }, { "epoch": 0.755020868262405, "grad_norm": 0.24316012859344482, "learning_rate": 8.596103200007536e-06, "loss": 0.1173, "step": 42331 }, { "epoch": 0.7550387043841187, "grad_norm": 0.2630822956562042, "learning_rate": 8.594928651664725e-06, "loss": 0.1754, "step": 42332 }, { "epoch": 0.7550565405058324, "grad_norm": 0.2831932604312897, "learning_rate": 8.593754166913637e-06, "loss": 0.1046, "step": 42333 }, { "epoch": 0.7550743766275461, "grad_norm": 0.2862245440483093, "learning_rate": 8.592579745758817e-06, "loss": 0.168, "step": 42334 }, { "epoch": 0.7550922127492598, "grad_norm": 0.23946191370487213, "learning_rate": 8.591405388204831e-06, "loss": 0.1035, "step": 42335 }, { "epoch": 0.7551100488709735, "grad_norm": 0.2561069130897522, "learning_rate": 8.590231094256226e-06, "loss": 0.1139, "step": 42336 }, { "epoch": 0.7551278849926872, "grad_norm": 0.3123854100704193, "learning_rate": 8.589056863917553e-06, "loss": 0.1445, "step": 42337 }, { "epoch": 0.7551457211144009, "grad_norm": 0.26023247838020325, "learning_rate": 8.587882697193356e-06, "loss": 0.0894, "step": 42338 }, { "epoch": 0.7551635572361146, "grad_norm": 0.2850258946418762, "learning_rate": 8.586708594088203e-06, "loss": 0.1053, "step": 42339 }, { "epoch": 0.7551813933578283, "grad_norm": 0.25813084840774536, "learning_rate": 8.585534554606634e-06, "loss": 0.0917, "step": 42340 }, { "epoch": 0.755199229479542, "grad_norm": 0.3162669539451599, "learning_rate": 8.584360578753204e-06, "loss": 0.1162, "step": 42341 }, { "epoch": 0.7552170656012557, "grad_norm": 0.3151412010192871, "learning_rate": 8.583186666532455e-06, "loss": 0.1237, "step": 42342 }, { "epoch": 0.7552349017229694, "grad_norm": 0.24522614479064941, "learning_rate": 8.582012817948951e-06, "loss": 0.0701, "step": 42343 }, { "epoch": 0.755252737844683, "grad_norm": 0.27964454889297485, "learning_rate": 8.58083903300723e-06, "loss": 0.1228, "step": 42344 }, { "epoch": 0.7552705739663967, "grad_norm": 0.31902268528938293, "learning_rate": 8.579665311711854e-06, "loss": 0.1672, "step": 42345 }, { "epoch": 0.7552884100881104, "grad_norm": 0.23952889442443848, "learning_rate": 8.578491654067366e-06, "loss": 0.1348, "step": 42346 }, { "epoch": 0.7553062462098241, "grad_norm": 0.2591972053050995, "learning_rate": 8.57731806007831e-06, "loss": 0.1314, "step": 42347 }, { "epoch": 0.7553240823315378, "grad_norm": 0.243315190076828, "learning_rate": 8.57614452974925e-06, "loss": 0.0994, "step": 42348 }, { "epoch": 0.7553419184532515, "grad_norm": 0.2919265925884247, "learning_rate": 8.574971063084724e-06, "loss": 0.1813, "step": 42349 }, { "epoch": 0.7553597545749652, "grad_norm": 0.46110835671424866, "learning_rate": 8.573797660089284e-06, "loss": 0.1332, "step": 42350 }, { "epoch": 0.7553775906966789, "grad_norm": 0.27178412675857544, "learning_rate": 8.57262432076747e-06, "loss": 0.115, "step": 42351 }, { "epoch": 0.7553954268183926, "grad_norm": 0.24335041642189026, "learning_rate": 8.571451045123846e-06, "loss": 0.136, "step": 42352 }, { "epoch": 0.7554132629401064, "grad_norm": 0.2885285019874573, "learning_rate": 8.57027783316295e-06, "loss": 0.111, "step": 42353 }, { "epoch": 0.75543109906182, "grad_norm": 0.2938196063041687, "learning_rate": 8.569104684889337e-06, "loss": 0.1284, "step": 42354 }, { "epoch": 0.7554489351835337, "grad_norm": 0.2460946887731552, "learning_rate": 8.567931600307538e-06, "loss": 0.083, "step": 42355 }, { "epoch": 0.7554667713052474, "grad_norm": 0.2997353971004486, "learning_rate": 8.566758579422118e-06, "loss": 0.1258, "step": 42356 }, { "epoch": 0.7554846074269611, "grad_norm": 0.31593987345695496, "learning_rate": 8.56558562223762e-06, "loss": 0.1225, "step": 42357 }, { "epoch": 0.7555024435486748, "grad_norm": 0.2387072741985321, "learning_rate": 8.56441272875858e-06, "loss": 0.1044, "step": 42358 }, { "epoch": 0.7555202796703885, "grad_norm": 0.26952114701271057, "learning_rate": 8.563239898989562e-06, "loss": 0.095, "step": 42359 }, { "epoch": 0.7555381157921022, "grad_norm": 0.28935471177101135, "learning_rate": 8.562067132935093e-06, "loss": 0.1622, "step": 42360 }, { "epoch": 0.7555559519138159, "grad_norm": 0.3453834652900696, "learning_rate": 8.560894430599736e-06, "loss": 0.1073, "step": 42361 }, { "epoch": 0.7555737880355295, "grad_norm": 0.32495880126953125, "learning_rate": 8.559721791988035e-06, "loss": 0.1515, "step": 42362 }, { "epoch": 0.7555916241572432, "grad_norm": 0.2073800414800644, "learning_rate": 8.558549217104524e-06, "loss": 0.1019, "step": 42363 }, { "epoch": 0.7556094602789569, "grad_norm": 0.24889720976352692, "learning_rate": 8.557376705953752e-06, "loss": 0.0559, "step": 42364 }, { "epoch": 0.7556272964006706, "grad_norm": 0.2713497579097748, "learning_rate": 8.556204258540274e-06, "loss": 0.0937, "step": 42365 }, { "epoch": 0.7556451325223843, "grad_norm": 0.27397334575653076, "learning_rate": 8.555031874868627e-06, "loss": 0.1564, "step": 42366 }, { "epoch": 0.755662968644098, "grad_norm": 0.22274106740951538, "learning_rate": 8.553859554943358e-06, "loss": 0.1032, "step": 42367 }, { "epoch": 0.7556808047658117, "grad_norm": 0.2755008339881897, "learning_rate": 8.552687298769e-06, "loss": 0.1162, "step": 42368 }, { "epoch": 0.7556986408875254, "grad_norm": 0.4210760295391083, "learning_rate": 8.551515106350117e-06, "loss": 0.1295, "step": 42369 }, { "epoch": 0.7557164770092392, "grad_norm": 0.29186972975730896, "learning_rate": 8.550342977691239e-06, "loss": 0.1159, "step": 42370 }, { "epoch": 0.7557343131309529, "grad_norm": 0.24795956909656525, "learning_rate": 8.549170912796908e-06, "loss": 0.093, "step": 42371 }, { "epoch": 0.7557521492526665, "grad_norm": 0.23988518118858337, "learning_rate": 8.547998911671678e-06, "loss": 0.1144, "step": 42372 }, { "epoch": 0.7557699853743802, "grad_norm": 0.2773982882499695, "learning_rate": 8.54682697432008e-06, "loss": 0.1324, "step": 42373 }, { "epoch": 0.7557878214960939, "grad_norm": 0.2738858461380005, "learning_rate": 8.545655100746672e-06, "loss": 0.1391, "step": 42374 }, { "epoch": 0.7558056576178076, "grad_norm": 0.18455122411251068, "learning_rate": 8.544483290955988e-06, "loss": 0.0845, "step": 42375 }, { "epoch": 0.7558234937395213, "grad_norm": 0.27649593353271484, "learning_rate": 8.54331154495257e-06, "loss": 0.1454, "step": 42376 }, { "epoch": 0.755841329861235, "grad_norm": 0.19789008796215057, "learning_rate": 8.54213986274095e-06, "loss": 0.0926, "step": 42377 }, { "epoch": 0.7558591659829487, "grad_norm": 0.2960929274559021, "learning_rate": 8.540968244325692e-06, "loss": 0.1074, "step": 42378 }, { "epoch": 0.7558770021046624, "grad_norm": 0.16888195276260376, "learning_rate": 8.539796689711324e-06, "loss": 0.0796, "step": 42379 }, { "epoch": 0.755894838226376, "grad_norm": 0.2973552942276001, "learning_rate": 8.538625198902389e-06, "loss": 0.1171, "step": 42380 }, { "epoch": 0.7559126743480897, "grad_norm": 0.23543117940425873, "learning_rate": 8.53745377190342e-06, "loss": 0.0878, "step": 42381 }, { "epoch": 0.7559305104698034, "grad_norm": 0.25800907611846924, "learning_rate": 8.536282408718976e-06, "loss": 0.0762, "step": 42382 }, { "epoch": 0.7559483465915171, "grad_norm": 0.3049534559249878, "learning_rate": 8.535111109353586e-06, "loss": 0.1633, "step": 42383 }, { "epoch": 0.7559661827132308, "grad_norm": 0.3172808885574341, "learning_rate": 8.533939873811794e-06, "loss": 0.1086, "step": 42384 }, { "epoch": 0.7559840188349445, "grad_norm": 0.28367358446121216, "learning_rate": 8.532768702098129e-06, "loss": 0.178, "step": 42385 }, { "epoch": 0.7560018549566582, "grad_norm": 0.25366777181625366, "learning_rate": 8.53159759421714e-06, "loss": 0.1389, "step": 42386 }, { "epoch": 0.756019691078372, "grad_norm": 0.20593418180942535, "learning_rate": 8.530426550173373e-06, "loss": 0.1257, "step": 42387 }, { "epoch": 0.7560375272000857, "grad_norm": 0.23428650200366974, "learning_rate": 8.529255569971364e-06, "loss": 0.1387, "step": 42388 }, { "epoch": 0.7560553633217993, "grad_norm": 0.22403877973556519, "learning_rate": 8.528084653615647e-06, "loss": 0.1006, "step": 42389 }, { "epoch": 0.756073199443513, "grad_norm": 0.282473087310791, "learning_rate": 8.526913801110758e-06, "loss": 0.1233, "step": 42390 }, { "epoch": 0.7560910355652267, "grad_norm": 0.29483911395072937, "learning_rate": 8.525743012461245e-06, "loss": 0.1792, "step": 42391 }, { "epoch": 0.7561088716869404, "grad_norm": 0.259728342294693, "learning_rate": 8.524572287671645e-06, "loss": 0.112, "step": 42392 }, { "epoch": 0.7561267078086541, "grad_norm": 0.31821489334106445, "learning_rate": 8.523401626746495e-06, "loss": 0.1115, "step": 42393 }, { "epoch": 0.7561445439303678, "grad_norm": 0.29007548093795776, "learning_rate": 8.52223102969032e-06, "loss": 0.1334, "step": 42394 }, { "epoch": 0.7561623800520815, "grad_norm": 0.25430139899253845, "learning_rate": 8.521060496507677e-06, "loss": 0.0783, "step": 42395 }, { "epoch": 0.7561802161737952, "grad_norm": 0.27675554156303406, "learning_rate": 8.519890027203096e-06, "loss": 0.1297, "step": 42396 }, { "epoch": 0.7561980522955088, "grad_norm": 0.2595537304878235, "learning_rate": 8.518719621781112e-06, "loss": 0.137, "step": 42397 }, { "epoch": 0.7562158884172225, "grad_norm": 0.24917346239089966, "learning_rate": 8.517549280246256e-06, "loss": 0.0736, "step": 42398 }, { "epoch": 0.7562337245389362, "grad_norm": 0.24004189670085907, "learning_rate": 8.516379002603077e-06, "loss": 0.0758, "step": 42399 }, { "epoch": 0.7562515606606499, "grad_norm": 0.3062804639339447, "learning_rate": 8.515208788856102e-06, "loss": 0.1752, "step": 42400 }, { "epoch": 0.7562693967823636, "grad_norm": 0.21575665473937988, "learning_rate": 8.514038639009881e-06, "loss": 0.0821, "step": 42401 }, { "epoch": 0.7562872329040773, "grad_norm": 0.25711703300476074, "learning_rate": 8.512868553068937e-06, "loss": 0.1061, "step": 42402 }, { "epoch": 0.756305069025791, "grad_norm": 0.32097741961479187, "learning_rate": 8.5116985310378e-06, "loss": 0.0659, "step": 42403 }, { "epoch": 0.7563229051475048, "grad_norm": 0.24532966315746307, "learning_rate": 8.510528572921025e-06, "loss": 0.09, "step": 42404 }, { "epoch": 0.7563407412692185, "grad_norm": 0.19473090767860413, "learning_rate": 8.509358678723137e-06, "loss": 0.0841, "step": 42405 }, { "epoch": 0.7563585773909322, "grad_norm": 0.2386460155248642, "learning_rate": 8.508188848448668e-06, "loss": 0.0687, "step": 42406 }, { "epoch": 0.7563764135126458, "grad_norm": 0.2659556567668915, "learning_rate": 8.507019082102147e-06, "loss": 0.1138, "step": 42407 }, { "epoch": 0.7563942496343595, "grad_norm": 0.3707256019115448, "learning_rate": 8.505849379688127e-06, "loss": 0.0975, "step": 42408 }, { "epoch": 0.7564120857560732, "grad_norm": 0.26180100440979004, "learning_rate": 8.50467974121113e-06, "loss": 0.1048, "step": 42409 }, { "epoch": 0.7564299218777869, "grad_norm": 0.2367788702249527, "learning_rate": 8.50351016667569e-06, "loss": 0.1125, "step": 42410 }, { "epoch": 0.7564477579995006, "grad_norm": 0.24839997291564941, "learning_rate": 8.502340656086347e-06, "loss": 0.1172, "step": 42411 }, { "epoch": 0.7564655941212143, "grad_norm": 0.33145636320114136, "learning_rate": 8.501171209447617e-06, "loss": 0.1381, "step": 42412 }, { "epoch": 0.756483430242928, "grad_norm": 0.35183119773864746, "learning_rate": 8.500001826764048e-06, "loss": 0.119, "step": 42413 }, { "epoch": 0.7565012663646417, "grad_norm": 0.2977330982685089, "learning_rate": 8.49883250804018e-06, "loss": 0.1501, "step": 42414 }, { "epoch": 0.7565191024863553, "grad_norm": 0.2004634290933609, "learning_rate": 8.497663253280536e-06, "loss": 0.0769, "step": 42415 }, { "epoch": 0.756536938608069, "grad_norm": 0.2261001020669937, "learning_rate": 8.496494062489638e-06, "loss": 0.1087, "step": 42416 }, { "epoch": 0.7565547747297827, "grad_norm": 0.21379733085632324, "learning_rate": 8.495324935672039e-06, "loss": 0.0955, "step": 42417 }, { "epoch": 0.7565726108514964, "grad_norm": 0.3068874776363373, "learning_rate": 8.494155872832262e-06, "loss": 0.0923, "step": 42418 }, { "epoch": 0.7565904469732101, "grad_norm": 0.3010251820087433, "learning_rate": 8.492986873974837e-06, "loss": 0.1671, "step": 42419 }, { "epoch": 0.7566082830949239, "grad_norm": 0.2966093420982361, "learning_rate": 8.491817939104287e-06, "loss": 0.1015, "step": 42420 }, { "epoch": 0.7566261192166376, "grad_norm": 0.2878933846950531, "learning_rate": 8.490649068225165e-06, "loss": 0.1143, "step": 42421 }, { "epoch": 0.7566439553383513, "grad_norm": 0.25792810320854187, "learning_rate": 8.489480261341986e-06, "loss": 0.1213, "step": 42422 }, { "epoch": 0.756661791460065, "grad_norm": 0.3670046031475067, "learning_rate": 8.488311518459283e-06, "loss": 0.0999, "step": 42423 }, { "epoch": 0.7566796275817786, "grad_norm": 0.31089597940444946, "learning_rate": 8.48714283958159e-06, "loss": 0.1553, "step": 42424 }, { "epoch": 0.7566974637034923, "grad_norm": 0.2926601767539978, "learning_rate": 8.485974224713425e-06, "loss": 0.127, "step": 42425 }, { "epoch": 0.756715299825206, "grad_norm": 0.22717778384685516, "learning_rate": 8.484805673859335e-06, "loss": 0.0889, "step": 42426 }, { "epoch": 0.7567331359469197, "grad_norm": 0.20641830563545227, "learning_rate": 8.483637187023835e-06, "loss": 0.0857, "step": 42427 }, { "epoch": 0.7567509720686334, "grad_norm": 0.23882833123207092, "learning_rate": 8.482468764211471e-06, "loss": 0.0983, "step": 42428 }, { "epoch": 0.7567688081903471, "grad_norm": 0.2915860414505005, "learning_rate": 8.481300405426756e-06, "loss": 0.1343, "step": 42429 }, { "epoch": 0.7567866443120608, "grad_norm": 0.2899327576160431, "learning_rate": 8.480132110674232e-06, "loss": 0.1138, "step": 42430 }, { "epoch": 0.7568044804337745, "grad_norm": 0.2871955931186676, "learning_rate": 8.47896387995842e-06, "loss": 0.0968, "step": 42431 }, { "epoch": 0.7568223165554882, "grad_norm": 0.2323184609413147, "learning_rate": 8.477795713283853e-06, "loss": 0.1158, "step": 42432 }, { "epoch": 0.7568401526772018, "grad_norm": 0.25878503918647766, "learning_rate": 8.476627610655049e-06, "loss": 0.0896, "step": 42433 }, { "epoch": 0.7568579887989155, "grad_norm": 0.23402409255504608, "learning_rate": 8.475459572076549e-06, "loss": 0.1007, "step": 42434 }, { "epoch": 0.7568758249206292, "grad_norm": 0.31241506338119507, "learning_rate": 8.474291597552877e-06, "loss": 0.1643, "step": 42435 }, { "epoch": 0.7568936610423429, "grad_norm": 0.3281039297580719, "learning_rate": 8.473123687088558e-06, "loss": 0.1313, "step": 42436 }, { "epoch": 0.7569114971640567, "grad_norm": 0.21797817945480347, "learning_rate": 8.47195584068812e-06, "loss": 0.0823, "step": 42437 }, { "epoch": 0.7569293332857704, "grad_norm": 0.2519802749156952, "learning_rate": 8.47078805835608e-06, "loss": 0.1198, "step": 42438 }, { "epoch": 0.7569471694074841, "grad_norm": 0.2547273337841034, "learning_rate": 8.469620340096983e-06, "loss": 0.0805, "step": 42439 }, { "epoch": 0.7569650055291978, "grad_norm": 0.24511700868606567, "learning_rate": 8.46845268591534e-06, "loss": 0.1246, "step": 42440 }, { "epoch": 0.7569828416509115, "grad_norm": 0.3022809624671936, "learning_rate": 8.467285095815694e-06, "loss": 0.138, "step": 42441 }, { "epoch": 0.7570006777726251, "grad_norm": 0.2824322581291199, "learning_rate": 8.46611756980255e-06, "loss": 0.1396, "step": 42442 }, { "epoch": 0.7570185138943388, "grad_norm": 0.36678019165992737, "learning_rate": 8.464950107880453e-06, "loss": 0.096, "step": 42443 }, { "epoch": 0.7570363500160525, "grad_norm": 0.2552364766597748, "learning_rate": 8.463782710053922e-06, "loss": 0.1373, "step": 42444 }, { "epoch": 0.7570541861377662, "grad_norm": 0.31479692459106445, "learning_rate": 8.462615376327482e-06, "loss": 0.0658, "step": 42445 }, { "epoch": 0.7570720222594799, "grad_norm": 0.2944337725639343, "learning_rate": 8.461448106705644e-06, "loss": 0.1626, "step": 42446 }, { "epoch": 0.7570898583811936, "grad_norm": 0.2544756233692169, "learning_rate": 8.460280901192957e-06, "loss": 0.1014, "step": 42447 }, { "epoch": 0.7571076945029073, "grad_norm": 0.26997318863868713, "learning_rate": 8.459113759793933e-06, "loss": 0.1308, "step": 42448 }, { "epoch": 0.757125530624621, "grad_norm": 0.29476282000541687, "learning_rate": 8.4579466825131e-06, "loss": 0.13, "step": 42449 }, { "epoch": 0.7571433667463346, "grad_norm": 0.33941540122032166, "learning_rate": 8.456779669354975e-06, "loss": 0.1184, "step": 42450 }, { "epoch": 0.7571612028680483, "grad_norm": 0.38993942737579346, "learning_rate": 8.455612720324078e-06, "loss": 0.1438, "step": 42451 }, { "epoch": 0.757179038989762, "grad_norm": 0.30195915699005127, "learning_rate": 8.454445835424948e-06, "loss": 0.1412, "step": 42452 }, { "epoch": 0.7571968751114757, "grad_norm": 0.31078389286994934, "learning_rate": 8.453279014662101e-06, "loss": 0.1843, "step": 42453 }, { "epoch": 0.7572147112331895, "grad_norm": 0.22774893045425415, "learning_rate": 8.452112258040054e-06, "loss": 0.0847, "step": 42454 }, { "epoch": 0.7572325473549032, "grad_norm": 0.3099285066127777, "learning_rate": 8.450945565563342e-06, "loss": 0.1199, "step": 42455 }, { "epoch": 0.7572503834766169, "grad_norm": 0.26423385739326477, "learning_rate": 8.449778937236472e-06, "loss": 0.0943, "step": 42456 }, { "epoch": 0.7572682195983306, "grad_norm": 0.23151858150959015, "learning_rate": 8.448612373063985e-06, "loss": 0.1277, "step": 42457 }, { "epoch": 0.7572860557200443, "grad_norm": 0.26518574357032776, "learning_rate": 8.447445873050392e-06, "loss": 0.1428, "step": 42458 }, { "epoch": 0.757303891841758, "grad_norm": 0.22830712795257568, "learning_rate": 8.446279437200208e-06, "loss": 0.0928, "step": 42459 }, { "epoch": 0.7573217279634716, "grad_norm": 0.26572519540786743, "learning_rate": 8.44511306551797e-06, "loss": 0.0984, "step": 42460 }, { "epoch": 0.7573395640851853, "grad_norm": 0.2624780237674713, "learning_rate": 8.443946758008192e-06, "loss": 0.1225, "step": 42461 }, { "epoch": 0.757357400206899, "grad_norm": 0.2891901731491089, "learning_rate": 8.442780514675395e-06, "loss": 0.0605, "step": 42462 }, { "epoch": 0.7573752363286127, "grad_norm": 0.21807990968227386, "learning_rate": 8.441614335524098e-06, "loss": 0.0899, "step": 42463 }, { "epoch": 0.7573930724503264, "grad_norm": 0.3276066780090332, "learning_rate": 8.440448220558817e-06, "loss": 0.1323, "step": 42464 }, { "epoch": 0.7574109085720401, "grad_norm": 0.22362357378005981, "learning_rate": 8.439282169784083e-06, "loss": 0.1019, "step": 42465 }, { "epoch": 0.7574287446937538, "grad_norm": 0.2000609189271927, "learning_rate": 8.438116183204414e-06, "loss": 0.0836, "step": 42466 }, { "epoch": 0.7574465808154675, "grad_norm": 0.19653943181037903, "learning_rate": 8.436950260824317e-06, "loss": 0.0752, "step": 42467 }, { "epoch": 0.7574644169371811, "grad_norm": 0.2805749773979187, "learning_rate": 8.43578440264833e-06, "loss": 0.1054, "step": 42468 }, { "epoch": 0.7574822530588948, "grad_norm": 0.32110410928726196, "learning_rate": 8.434618608680959e-06, "loss": 0.1299, "step": 42469 }, { "epoch": 0.7575000891806085, "grad_norm": 0.24727442860603333, "learning_rate": 8.433452878926731e-06, "loss": 0.1158, "step": 42470 }, { "epoch": 0.7575179253023223, "grad_norm": 0.30695629119873047, "learning_rate": 8.432287213390164e-06, "loss": 0.1622, "step": 42471 }, { "epoch": 0.757535761424036, "grad_norm": 0.21809907257556915, "learning_rate": 8.431121612075766e-06, "loss": 0.091, "step": 42472 }, { "epoch": 0.7575535975457497, "grad_norm": 0.32574430108070374, "learning_rate": 8.429956074988071e-06, "loss": 0.1056, "step": 42473 }, { "epoch": 0.7575714336674634, "grad_norm": 0.24971674382686615, "learning_rate": 8.42879060213159e-06, "loss": 0.105, "step": 42474 }, { "epoch": 0.7575892697891771, "grad_norm": 0.26743370294570923, "learning_rate": 8.427625193510839e-06, "loss": 0.1003, "step": 42475 }, { "epoch": 0.7576071059108908, "grad_norm": 0.17743538320064545, "learning_rate": 8.426459849130339e-06, "loss": 0.0611, "step": 42476 }, { "epoch": 0.7576249420326044, "grad_norm": 0.30535629391670227, "learning_rate": 8.425294568994593e-06, "loss": 0.1781, "step": 42477 }, { "epoch": 0.7576427781543181, "grad_norm": 0.46546825766563416, "learning_rate": 8.42412935310814e-06, "loss": 0.1494, "step": 42478 }, { "epoch": 0.7576606142760318, "grad_norm": 0.31428396701812744, "learning_rate": 8.42296420147549e-06, "loss": 0.1257, "step": 42479 }, { "epoch": 0.7576784503977455, "grad_norm": 0.3928177058696747, "learning_rate": 8.42179911410115e-06, "loss": 0.1045, "step": 42480 }, { "epoch": 0.7576962865194592, "grad_norm": 0.22976143658161163, "learning_rate": 8.420634090989638e-06, "loss": 0.0847, "step": 42481 }, { "epoch": 0.7577141226411729, "grad_norm": 0.30515724420547485, "learning_rate": 8.419469132145483e-06, "loss": 0.1395, "step": 42482 }, { "epoch": 0.7577319587628866, "grad_norm": 0.2620439827442169, "learning_rate": 8.418304237573182e-06, "loss": 0.135, "step": 42483 }, { "epoch": 0.7577497948846003, "grad_norm": 0.3544202446937561, "learning_rate": 8.417139407277273e-06, "loss": 0.0561, "step": 42484 }, { "epoch": 0.757767631006314, "grad_norm": 0.1993686854839325, "learning_rate": 8.415974641262248e-06, "loss": 0.0667, "step": 42485 }, { "epoch": 0.7577854671280276, "grad_norm": 0.18143802881240845, "learning_rate": 8.414809939532642e-06, "loss": 0.0507, "step": 42486 }, { "epoch": 0.7578033032497413, "grad_norm": 0.2108558714389801, "learning_rate": 8.413645302092962e-06, "loss": 0.1245, "step": 42487 }, { "epoch": 0.7578211393714551, "grad_norm": 0.2468854784965515, "learning_rate": 8.41248072894772e-06, "loss": 0.1249, "step": 42488 }, { "epoch": 0.7578389754931688, "grad_norm": 0.2811237871646881, "learning_rate": 8.41131622010143e-06, "loss": 0.1205, "step": 42489 }, { "epoch": 0.7578568116148825, "grad_norm": 0.2078595757484436, "learning_rate": 8.410151775558603e-06, "loss": 0.0594, "step": 42490 }, { "epoch": 0.7578746477365962, "grad_norm": 0.2517724931240082, "learning_rate": 8.408987395323766e-06, "loss": 0.0812, "step": 42491 }, { "epoch": 0.7578924838583099, "grad_norm": 0.22889970242977142, "learning_rate": 8.407823079401423e-06, "loss": 0.0764, "step": 42492 }, { "epoch": 0.7579103199800236, "grad_norm": 0.2879985272884369, "learning_rate": 8.406658827796091e-06, "loss": 0.1259, "step": 42493 }, { "epoch": 0.7579281561017372, "grad_norm": 0.20380660891532898, "learning_rate": 8.405494640512269e-06, "loss": 0.0844, "step": 42494 }, { "epoch": 0.7579459922234509, "grad_norm": 0.26806068420410156, "learning_rate": 8.40433051755449e-06, "loss": 0.1145, "step": 42495 }, { "epoch": 0.7579638283451646, "grad_norm": 0.3038773536682129, "learning_rate": 8.40316645892725e-06, "loss": 0.1166, "step": 42496 }, { "epoch": 0.7579816644668783, "grad_norm": 0.2840266823768616, "learning_rate": 8.402002464635079e-06, "loss": 0.1255, "step": 42497 }, { "epoch": 0.757999500588592, "grad_norm": 0.24185702204704285, "learning_rate": 8.400838534682468e-06, "loss": 0.1047, "step": 42498 }, { "epoch": 0.7580173367103057, "grad_norm": 0.202910378575325, "learning_rate": 8.399674669073952e-06, "loss": 0.1034, "step": 42499 }, { "epoch": 0.7580351728320194, "grad_norm": 0.28981637954711914, "learning_rate": 8.398510867814026e-06, "loss": 0.1045, "step": 42500 }, { "epoch": 0.7580530089537331, "grad_norm": 0.270341157913208, "learning_rate": 8.39734713090721e-06, "loss": 0.1438, "step": 42501 }, { "epoch": 0.7580708450754468, "grad_norm": 0.295109361410141, "learning_rate": 8.39618345835801e-06, "loss": 0.1141, "step": 42502 }, { "epoch": 0.7580886811971604, "grad_norm": 0.3369133174419403, "learning_rate": 8.395019850170927e-06, "loss": 0.1157, "step": 42503 }, { "epoch": 0.7581065173188741, "grad_norm": 0.24019691348075867, "learning_rate": 8.393856306350493e-06, "loss": 0.1172, "step": 42504 }, { "epoch": 0.7581243534405879, "grad_norm": 0.22990193963050842, "learning_rate": 8.392692826901205e-06, "loss": 0.1353, "step": 42505 }, { "epoch": 0.7581421895623016, "grad_norm": 0.3955802619457245, "learning_rate": 8.391529411827578e-06, "loss": 0.1299, "step": 42506 }, { "epoch": 0.7581600256840153, "grad_norm": 0.24784225225448608, "learning_rate": 8.39036606113411e-06, "loss": 0.1076, "step": 42507 }, { "epoch": 0.758177861805729, "grad_norm": 0.2705511748790741, "learning_rate": 8.389202774825328e-06, "loss": 0.1149, "step": 42508 }, { "epoch": 0.7581956979274427, "grad_norm": 0.3075313866138458, "learning_rate": 8.388039552905735e-06, "loss": 0.1325, "step": 42509 }, { "epoch": 0.7582135340491564, "grad_norm": 0.32823804020881653, "learning_rate": 8.386876395379828e-06, "loss": 0.1776, "step": 42510 }, { "epoch": 0.7582313701708701, "grad_norm": 0.3254458010196686, "learning_rate": 8.385713302252136e-06, "loss": 0.1512, "step": 42511 }, { "epoch": 0.7582492062925837, "grad_norm": 0.20118288695812225, "learning_rate": 8.384550273527148e-06, "loss": 0.07, "step": 42512 }, { "epoch": 0.7582670424142974, "grad_norm": 0.29054129123687744, "learning_rate": 8.383387309209389e-06, "loss": 0.1583, "step": 42513 }, { "epoch": 0.7582848785360111, "grad_norm": 0.22485169768333435, "learning_rate": 8.382224409303364e-06, "loss": 0.111, "step": 42514 }, { "epoch": 0.7583027146577248, "grad_norm": 0.3353945314884186, "learning_rate": 8.381061573813573e-06, "loss": 0.1327, "step": 42515 }, { "epoch": 0.7583205507794385, "grad_norm": 0.26426833868026733, "learning_rate": 8.37989880274452e-06, "loss": 0.1025, "step": 42516 }, { "epoch": 0.7583383869011522, "grad_norm": 0.2605058550834656, "learning_rate": 8.378736096100728e-06, "loss": 0.1123, "step": 42517 }, { "epoch": 0.7583562230228659, "grad_norm": 0.32913678884506226, "learning_rate": 8.377573453886698e-06, "loss": 0.1327, "step": 42518 }, { "epoch": 0.7583740591445796, "grad_norm": 0.21796973049640656, "learning_rate": 8.376410876106932e-06, "loss": 0.1402, "step": 42519 }, { "epoch": 0.7583918952662932, "grad_norm": 0.3871913552284241, "learning_rate": 8.375248362765931e-06, "loss": 0.1332, "step": 42520 }, { "epoch": 0.758409731388007, "grad_norm": 0.26409193873405457, "learning_rate": 8.374085913868218e-06, "loss": 0.1256, "step": 42521 }, { "epoch": 0.7584275675097207, "grad_norm": 0.2507871985435486, "learning_rate": 8.372923529418291e-06, "loss": 0.1171, "step": 42522 }, { "epoch": 0.7584454036314344, "grad_norm": 0.29590460658073425, "learning_rate": 8.371761209420648e-06, "loss": 0.1174, "step": 42523 }, { "epoch": 0.7584632397531481, "grad_norm": 0.27333101630210876, "learning_rate": 8.370598953879808e-06, "loss": 0.1153, "step": 42524 }, { "epoch": 0.7584810758748618, "grad_norm": 0.24664399027824402, "learning_rate": 8.369436762800262e-06, "loss": 0.1387, "step": 42525 }, { "epoch": 0.7584989119965755, "grad_norm": 0.22566759586334229, "learning_rate": 8.36827463618653e-06, "loss": 0.1208, "step": 42526 }, { "epoch": 0.7585167481182892, "grad_norm": 0.2601856589317322, "learning_rate": 8.367112574043111e-06, "loss": 0.1345, "step": 42527 }, { "epoch": 0.7585345842400029, "grad_norm": 0.26377347111701965, "learning_rate": 8.36595057637451e-06, "loss": 0.0978, "step": 42528 }, { "epoch": 0.7585524203617166, "grad_norm": 0.2287953644990921, "learning_rate": 8.36478864318522e-06, "loss": 0.0904, "step": 42529 }, { "epoch": 0.7585702564834302, "grad_norm": 0.4153529107570648, "learning_rate": 8.363626774479763e-06, "loss": 0.1178, "step": 42530 }, { "epoch": 0.7585880926051439, "grad_norm": 0.2789282500743866, "learning_rate": 8.362464970262638e-06, "loss": 0.1389, "step": 42531 }, { "epoch": 0.7586059287268576, "grad_norm": 0.3188031315803528, "learning_rate": 8.361303230538342e-06, "loss": 0.1667, "step": 42532 }, { "epoch": 0.7586237648485713, "grad_norm": 0.2907651960849762, "learning_rate": 8.360141555311373e-06, "loss": 0.0757, "step": 42533 }, { "epoch": 0.758641600970285, "grad_norm": 0.23574650287628174, "learning_rate": 8.35897994458625e-06, "loss": 0.0459, "step": 42534 }, { "epoch": 0.7586594370919987, "grad_norm": 0.2789841890335083, "learning_rate": 8.35781839836747e-06, "loss": 0.1178, "step": 42535 }, { "epoch": 0.7586772732137124, "grad_norm": 0.22705766558647156, "learning_rate": 8.356656916659534e-06, "loss": 0.1056, "step": 42536 }, { "epoch": 0.758695109335426, "grad_norm": 0.2200649380683899, "learning_rate": 8.355495499466936e-06, "loss": 0.1045, "step": 42537 }, { "epoch": 0.7587129454571399, "grad_norm": 0.3629782497882843, "learning_rate": 8.354334146794193e-06, "loss": 0.1112, "step": 42538 }, { "epoch": 0.7587307815788535, "grad_norm": 0.3256164789199829, "learning_rate": 8.353172858645794e-06, "loss": 0.1198, "step": 42539 }, { "epoch": 0.7587486177005672, "grad_norm": 0.2853577435016632, "learning_rate": 8.352011635026254e-06, "loss": 0.1426, "step": 42540 }, { "epoch": 0.7587664538222809, "grad_norm": 0.25813165307044983, "learning_rate": 8.350850475940068e-06, "loss": 0.1304, "step": 42541 }, { "epoch": 0.7587842899439946, "grad_norm": 0.26125818490982056, "learning_rate": 8.349689381391727e-06, "loss": 0.1038, "step": 42542 }, { "epoch": 0.7588021260657083, "grad_norm": 0.2559528648853302, "learning_rate": 8.348528351385748e-06, "loss": 0.0791, "step": 42543 }, { "epoch": 0.758819962187422, "grad_norm": 0.22897832095623016, "learning_rate": 8.347367385926624e-06, "loss": 0.1054, "step": 42544 }, { "epoch": 0.7588377983091357, "grad_norm": 0.2834038734436035, "learning_rate": 8.346206485018856e-06, "loss": 0.089, "step": 42545 }, { "epoch": 0.7588556344308494, "grad_norm": 0.3164770305156708, "learning_rate": 8.345045648666939e-06, "loss": 0.1039, "step": 42546 }, { "epoch": 0.758873470552563, "grad_norm": 0.2670660614967346, "learning_rate": 8.34388487687538e-06, "loss": 0.2029, "step": 42547 }, { "epoch": 0.7588913066742767, "grad_norm": 0.23607979714870453, "learning_rate": 8.34272416964868e-06, "loss": 0.0802, "step": 42548 }, { "epoch": 0.7589091427959904, "grad_norm": 0.23584768176078796, "learning_rate": 8.341563526991333e-06, "loss": 0.1708, "step": 42549 }, { "epoch": 0.7589269789177041, "grad_norm": 0.2171461433172226, "learning_rate": 8.340402948907832e-06, "loss": 0.0772, "step": 42550 }, { "epoch": 0.7589448150394178, "grad_norm": 0.33473798632621765, "learning_rate": 8.339242435402692e-06, "loss": 0.1016, "step": 42551 }, { "epoch": 0.7589626511611315, "grad_norm": 0.21547746658325195, "learning_rate": 8.338081986480392e-06, "loss": 0.0735, "step": 42552 }, { "epoch": 0.7589804872828452, "grad_norm": 0.249026820063591, "learning_rate": 8.33692160214545e-06, "loss": 0.0614, "step": 42553 }, { "epoch": 0.7589983234045589, "grad_norm": 0.3808281123638153, "learning_rate": 8.335761282402349e-06, "loss": 0.1411, "step": 42554 }, { "epoch": 0.7590161595262727, "grad_norm": 0.28560081124305725, "learning_rate": 8.334601027255598e-06, "loss": 0.132, "step": 42555 }, { "epoch": 0.7590339956479863, "grad_norm": 0.3320583701133728, "learning_rate": 8.33344083670969e-06, "loss": 0.1517, "step": 42556 }, { "epoch": 0.7590518317697, "grad_norm": 0.2528710961341858, "learning_rate": 8.332280710769124e-06, "loss": 0.146, "step": 42557 }, { "epoch": 0.7590696678914137, "grad_norm": 0.20687957108020782, "learning_rate": 8.331120649438396e-06, "loss": 0.0975, "step": 42558 }, { "epoch": 0.7590875040131274, "grad_norm": 0.23713906109333038, "learning_rate": 8.32996065272199e-06, "loss": 0.0865, "step": 42559 }, { "epoch": 0.7591053401348411, "grad_norm": 0.24285969138145447, "learning_rate": 8.328800720624424e-06, "loss": 0.1184, "step": 42560 }, { "epoch": 0.7591231762565548, "grad_norm": 0.24162907898426056, "learning_rate": 8.327640853150184e-06, "loss": 0.1142, "step": 42561 }, { "epoch": 0.7591410123782685, "grad_norm": 0.2637457847595215, "learning_rate": 8.326481050303766e-06, "loss": 0.1234, "step": 42562 }, { "epoch": 0.7591588484999822, "grad_norm": 0.27374517917633057, "learning_rate": 8.325321312089658e-06, "loss": 0.1305, "step": 42563 }, { "epoch": 0.7591766846216959, "grad_norm": 0.21077707409858704, "learning_rate": 8.324161638512373e-06, "loss": 0.1107, "step": 42564 }, { "epoch": 0.7591945207434095, "grad_norm": 0.25188934803009033, "learning_rate": 8.323002029576397e-06, "loss": 0.1056, "step": 42565 }, { "epoch": 0.7592123568651232, "grad_norm": 0.24743735790252686, "learning_rate": 8.321842485286216e-06, "loss": 0.1006, "step": 42566 }, { "epoch": 0.7592301929868369, "grad_norm": 0.41030362248420715, "learning_rate": 8.320683005646335e-06, "loss": 0.1114, "step": 42567 }, { "epoch": 0.7592480291085506, "grad_norm": 0.2505373954772949, "learning_rate": 8.319523590661255e-06, "loss": 0.1549, "step": 42568 }, { "epoch": 0.7592658652302643, "grad_norm": 0.2733801305294037, "learning_rate": 8.318364240335463e-06, "loss": 0.0787, "step": 42569 }, { "epoch": 0.759283701351978, "grad_norm": 0.29073014855384827, "learning_rate": 8.317204954673455e-06, "loss": 0.1175, "step": 42570 }, { "epoch": 0.7593015374736917, "grad_norm": 0.2475467324256897, "learning_rate": 8.31604573367972e-06, "loss": 0.1188, "step": 42571 }, { "epoch": 0.7593193735954055, "grad_norm": 0.23538252711296082, "learning_rate": 8.314886577358747e-06, "loss": 0.1141, "step": 42572 }, { "epoch": 0.7593372097171192, "grad_norm": 0.2203197479248047, "learning_rate": 8.313727485715042e-06, "loss": 0.106, "step": 42573 }, { "epoch": 0.7593550458388328, "grad_norm": 0.33786508440971375, "learning_rate": 8.312568458753095e-06, "loss": 0.1069, "step": 42574 }, { "epoch": 0.7593728819605465, "grad_norm": 0.29235002398490906, "learning_rate": 8.311409496477399e-06, "loss": 0.0876, "step": 42575 }, { "epoch": 0.7593907180822602, "grad_norm": 0.4059620201587677, "learning_rate": 8.31025059889243e-06, "loss": 0.0718, "step": 42576 }, { "epoch": 0.7594085542039739, "grad_norm": 0.32068195939064026, "learning_rate": 8.309091766002708e-06, "loss": 0.1316, "step": 42577 }, { "epoch": 0.7594263903256876, "grad_norm": 0.3006241023540497, "learning_rate": 8.307932997812706e-06, "loss": 0.1222, "step": 42578 }, { "epoch": 0.7594442264474013, "grad_norm": 0.2777824103832245, "learning_rate": 8.306774294326916e-06, "loss": 0.1245, "step": 42579 }, { "epoch": 0.759462062569115, "grad_norm": 0.2003381997346878, "learning_rate": 8.305615655549842e-06, "loss": 0.1527, "step": 42580 }, { "epoch": 0.7594798986908287, "grad_norm": 0.39176928997039795, "learning_rate": 8.30445708148596e-06, "loss": 0.1671, "step": 42581 }, { "epoch": 0.7594977348125423, "grad_norm": 0.24290509521961212, "learning_rate": 8.303298572139775e-06, "loss": 0.1043, "step": 42582 }, { "epoch": 0.759515570934256, "grad_norm": 0.3411197364330292, "learning_rate": 8.302140127515773e-06, "loss": 0.1449, "step": 42583 }, { "epoch": 0.7595334070559697, "grad_norm": 0.2943204343318939, "learning_rate": 8.300981747618444e-06, "loss": 0.1528, "step": 42584 }, { "epoch": 0.7595512431776834, "grad_norm": 0.25660815834999084, "learning_rate": 8.299823432452267e-06, "loss": 0.0929, "step": 42585 }, { "epoch": 0.7595690792993971, "grad_norm": 0.26953575015068054, "learning_rate": 8.298665182021753e-06, "loss": 0.0697, "step": 42586 }, { "epoch": 0.7595869154211108, "grad_norm": 0.23611371219158173, "learning_rate": 8.29750699633138e-06, "loss": 0.1118, "step": 42587 }, { "epoch": 0.7596047515428245, "grad_norm": 0.32746422290802, "learning_rate": 8.29634887538564e-06, "loss": 0.1158, "step": 42588 }, { "epoch": 0.7596225876645383, "grad_norm": 0.33476951718330383, "learning_rate": 8.295190819189012e-06, "loss": 0.1235, "step": 42589 }, { "epoch": 0.759640423786252, "grad_norm": 0.3064015805721283, "learning_rate": 8.294032827746002e-06, "loss": 0.1271, "step": 42590 }, { "epoch": 0.7596582599079656, "grad_norm": 0.26001209020614624, "learning_rate": 8.29287490106109e-06, "loss": 0.0895, "step": 42591 }, { "epoch": 0.7596760960296793, "grad_norm": 0.26250532269477844, "learning_rate": 8.291717039138765e-06, "loss": 0.0881, "step": 42592 }, { "epoch": 0.759693932151393, "grad_norm": 0.26296573877334595, "learning_rate": 8.29055924198351e-06, "loss": 0.1197, "step": 42593 }, { "epoch": 0.7597117682731067, "grad_norm": 0.2375052124261856, "learning_rate": 8.289401509599817e-06, "loss": 0.1153, "step": 42594 }, { "epoch": 0.7597296043948204, "grad_norm": 0.24330399930477142, "learning_rate": 8.288243841992182e-06, "loss": 0.0803, "step": 42595 }, { "epoch": 0.7597474405165341, "grad_norm": 0.3472011387348175, "learning_rate": 8.287086239165088e-06, "loss": 0.1405, "step": 42596 }, { "epoch": 0.7597652766382478, "grad_norm": 0.2950378358364105, "learning_rate": 8.28592870112302e-06, "loss": 0.1107, "step": 42597 }, { "epoch": 0.7597831127599615, "grad_norm": 0.3199588656425476, "learning_rate": 8.284771227870458e-06, "loss": 0.1313, "step": 42598 }, { "epoch": 0.7598009488816752, "grad_norm": 0.31105688214302063, "learning_rate": 8.283613819411903e-06, "loss": 0.1575, "step": 42599 }, { "epoch": 0.7598187850033888, "grad_norm": 0.30236220359802246, "learning_rate": 8.282456475751835e-06, "loss": 0.1463, "step": 42600 }, { "epoch": 0.7598366211251025, "grad_norm": 0.30073082447052, "learning_rate": 8.281299196894737e-06, "loss": 0.1457, "step": 42601 }, { "epoch": 0.7598544572468162, "grad_norm": 0.3153354525566101, "learning_rate": 8.280141982845093e-06, "loss": 0.1342, "step": 42602 }, { "epoch": 0.7598722933685299, "grad_norm": 0.2673802375793457, "learning_rate": 8.2789848336074e-06, "loss": 0.1323, "step": 42603 }, { "epoch": 0.7598901294902436, "grad_norm": 0.26933273673057556, "learning_rate": 8.277827749186137e-06, "loss": 0.1065, "step": 42604 }, { "epoch": 0.7599079656119573, "grad_norm": 0.20391269028186798, "learning_rate": 8.276670729585793e-06, "loss": 0.0766, "step": 42605 }, { "epoch": 0.7599258017336711, "grad_norm": 0.44093039631843567, "learning_rate": 8.275513774810837e-06, "loss": 0.0868, "step": 42606 }, { "epoch": 0.7599436378553848, "grad_norm": 0.23069758713245392, "learning_rate": 8.274356884865775e-06, "loss": 0.0801, "step": 42607 }, { "epoch": 0.7599614739770985, "grad_norm": 0.270152747631073, "learning_rate": 8.273200059755073e-06, "loss": 0.1506, "step": 42608 }, { "epoch": 0.7599793100988121, "grad_norm": 0.24902886152267456, "learning_rate": 8.272043299483234e-06, "loss": 0.1351, "step": 42609 }, { "epoch": 0.7599971462205258, "grad_norm": 0.28365853428840637, "learning_rate": 8.270886604054734e-06, "loss": 0.1666, "step": 42610 }, { "epoch": 0.7600149823422395, "grad_norm": 0.3187510669231415, "learning_rate": 8.269729973474047e-06, "loss": 0.1324, "step": 42611 }, { "epoch": 0.7600328184639532, "grad_norm": 0.2684469223022461, "learning_rate": 8.268573407745676e-06, "loss": 0.1394, "step": 42612 }, { "epoch": 0.7600506545856669, "grad_norm": 0.24168717861175537, "learning_rate": 8.26741690687409e-06, "loss": 0.1191, "step": 42613 }, { "epoch": 0.7600684907073806, "grad_norm": 0.2750702500343323, "learning_rate": 8.266260470863774e-06, "loss": 0.1099, "step": 42614 }, { "epoch": 0.7600863268290943, "grad_norm": 0.38823944330215454, "learning_rate": 8.265104099719206e-06, "loss": 0.0807, "step": 42615 }, { "epoch": 0.760104162950808, "grad_norm": 0.28940507769584656, "learning_rate": 8.263947793444882e-06, "loss": 0.0924, "step": 42616 }, { "epoch": 0.7601219990725216, "grad_norm": 0.3433786928653717, "learning_rate": 8.262791552045277e-06, "loss": 0.1597, "step": 42617 }, { "epoch": 0.7601398351942353, "grad_norm": 0.2491087168455124, "learning_rate": 8.261635375524874e-06, "loss": 0.0832, "step": 42618 }, { "epoch": 0.760157671315949, "grad_norm": 0.39373230934143066, "learning_rate": 8.260479263888143e-06, "loss": 0.1013, "step": 42619 }, { "epoch": 0.7601755074376627, "grad_norm": 0.3260776102542877, "learning_rate": 8.259323217139586e-06, "loss": 0.1393, "step": 42620 }, { "epoch": 0.7601933435593764, "grad_norm": 0.2427917867898941, "learning_rate": 8.258167235283665e-06, "loss": 0.0787, "step": 42621 }, { "epoch": 0.7602111796810902, "grad_norm": 0.22220590710639954, "learning_rate": 8.25701131832488e-06, "loss": 0.1285, "step": 42622 }, { "epoch": 0.7602290158028039, "grad_norm": 0.23178735375404358, "learning_rate": 8.2558554662677e-06, "loss": 0.0982, "step": 42623 }, { "epoch": 0.7602468519245176, "grad_norm": 0.17036950588226318, "learning_rate": 8.2546996791166e-06, "loss": 0.0751, "step": 42624 }, { "epoch": 0.7602646880462313, "grad_norm": 0.2511630654335022, "learning_rate": 8.253543956876077e-06, "loss": 0.1211, "step": 42625 }, { "epoch": 0.760282524167945, "grad_norm": 0.24705013632774353, "learning_rate": 8.252388299550601e-06, "loss": 0.1367, "step": 42626 }, { "epoch": 0.7603003602896586, "grad_norm": 0.21071964502334595, "learning_rate": 8.251232707144652e-06, "loss": 0.1128, "step": 42627 }, { "epoch": 0.7603181964113723, "grad_norm": 0.2599203884601593, "learning_rate": 8.250077179662701e-06, "loss": 0.1135, "step": 42628 }, { "epoch": 0.760336032533086, "grad_norm": 0.3434358537197113, "learning_rate": 8.248921717109246e-06, "loss": 0.1294, "step": 42629 }, { "epoch": 0.7603538686547997, "grad_norm": 0.34271255135536194, "learning_rate": 8.247766319488755e-06, "loss": 0.1077, "step": 42630 }, { "epoch": 0.7603717047765134, "grad_norm": 0.31806135177612305, "learning_rate": 8.246610986805709e-06, "loss": 0.1504, "step": 42631 }, { "epoch": 0.7603895408982271, "grad_norm": 0.20858317613601685, "learning_rate": 8.245455719064576e-06, "loss": 0.1361, "step": 42632 }, { "epoch": 0.7604073770199408, "grad_norm": 0.22832196950912476, "learning_rate": 8.244300516269851e-06, "loss": 0.1089, "step": 42633 }, { "epoch": 0.7604252131416545, "grad_norm": 0.261154443025589, "learning_rate": 8.243145378426003e-06, "loss": 0.1381, "step": 42634 }, { "epoch": 0.7604430492633681, "grad_norm": 0.3797610104084015, "learning_rate": 8.241990305537506e-06, "loss": 0.1869, "step": 42635 }, { "epoch": 0.7604608853850818, "grad_norm": 0.21285179257392883, "learning_rate": 8.240835297608851e-06, "loss": 0.0838, "step": 42636 }, { "epoch": 0.7604787215067955, "grad_norm": 0.2694287896156311, "learning_rate": 8.239680354644496e-06, "loss": 0.1309, "step": 42637 }, { "epoch": 0.7604965576285092, "grad_norm": 0.2810622453689575, "learning_rate": 8.238525476648939e-06, "loss": 0.1103, "step": 42638 }, { "epoch": 0.760514393750223, "grad_norm": 0.21039675176143646, "learning_rate": 8.237370663626648e-06, "loss": 0.0979, "step": 42639 }, { "epoch": 0.7605322298719367, "grad_norm": 0.2081712931394577, "learning_rate": 8.236215915582096e-06, "loss": 0.0664, "step": 42640 }, { "epoch": 0.7605500659936504, "grad_norm": 0.25597190856933594, "learning_rate": 8.235061232519753e-06, "loss": 0.1026, "step": 42641 }, { "epoch": 0.7605679021153641, "grad_norm": 0.29841238260269165, "learning_rate": 8.23390661444411e-06, "loss": 0.1438, "step": 42642 }, { "epoch": 0.7605857382370778, "grad_norm": 0.2845439910888672, "learning_rate": 8.232752061359636e-06, "loss": 0.0884, "step": 42643 }, { "epoch": 0.7606035743587914, "grad_norm": 0.3196823298931122, "learning_rate": 8.231597573270807e-06, "loss": 0.1429, "step": 42644 }, { "epoch": 0.7606214104805051, "grad_norm": 0.24257633090019226, "learning_rate": 8.23044315018209e-06, "loss": 0.1076, "step": 42645 }, { "epoch": 0.7606392466022188, "grad_norm": 0.2125617414712906, "learning_rate": 8.229288792097975e-06, "loss": 0.0812, "step": 42646 }, { "epoch": 0.7606570827239325, "grad_norm": 0.21107307076454163, "learning_rate": 8.22813449902293e-06, "loss": 0.13, "step": 42647 }, { "epoch": 0.7606749188456462, "grad_norm": 0.25327208638191223, "learning_rate": 8.226980270961418e-06, "loss": 0.0698, "step": 42648 }, { "epoch": 0.7606927549673599, "grad_norm": 0.29937681555747986, "learning_rate": 8.225826107917933e-06, "loss": 0.0705, "step": 42649 }, { "epoch": 0.7607105910890736, "grad_norm": 0.21487462520599365, "learning_rate": 8.224672009896934e-06, "loss": 0.0646, "step": 42650 }, { "epoch": 0.7607284272107873, "grad_norm": 0.3470233380794525, "learning_rate": 8.223517976902903e-06, "loss": 0.1011, "step": 42651 }, { "epoch": 0.760746263332501, "grad_norm": 0.21214036643505096, "learning_rate": 8.222364008940313e-06, "loss": 0.0788, "step": 42652 }, { "epoch": 0.7607640994542146, "grad_norm": 0.2770938277244568, "learning_rate": 8.221210106013638e-06, "loss": 0.1051, "step": 42653 }, { "epoch": 0.7607819355759283, "grad_norm": 0.38843944668769836, "learning_rate": 8.220056268127338e-06, "loss": 0.0986, "step": 42654 }, { "epoch": 0.760799771697642, "grad_norm": 0.3177003264427185, "learning_rate": 8.218902495285901e-06, "loss": 0.1292, "step": 42655 }, { "epoch": 0.7608176078193558, "grad_norm": 0.25186216831207275, "learning_rate": 8.217748787493798e-06, "loss": 0.1658, "step": 42656 }, { "epoch": 0.7608354439410695, "grad_norm": 0.2543962001800537, "learning_rate": 8.216595144755493e-06, "loss": 0.144, "step": 42657 }, { "epoch": 0.7608532800627832, "grad_norm": 0.27730339765548706, "learning_rate": 8.215441567075458e-06, "loss": 0.0867, "step": 42658 }, { "epoch": 0.7608711161844969, "grad_norm": 0.4992286264896393, "learning_rate": 8.214288054458175e-06, "loss": 0.1924, "step": 42659 }, { "epoch": 0.7608889523062106, "grad_norm": 0.2354087382555008, "learning_rate": 8.213134606908108e-06, "loss": 0.1111, "step": 42660 }, { "epoch": 0.7609067884279243, "grad_norm": 0.2836313843727112, "learning_rate": 8.21198122442973e-06, "loss": 0.1041, "step": 42661 }, { "epoch": 0.7609246245496379, "grad_norm": 0.3752792775630951, "learning_rate": 8.210827907027501e-06, "loss": 0.1949, "step": 42662 }, { "epoch": 0.7609424606713516, "grad_norm": 0.2538500428199768, "learning_rate": 8.209674654705915e-06, "loss": 0.1046, "step": 42663 }, { "epoch": 0.7609602967930653, "grad_norm": 0.24966923892498016, "learning_rate": 8.208521467469418e-06, "loss": 0.1145, "step": 42664 }, { "epoch": 0.760978132914779, "grad_norm": 0.21328851580619812, "learning_rate": 8.2073683453225e-06, "loss": 0.0999, "step": 42665 }, { "epoch": 0.7609959690364927, "grad_norm": 0.27110686898231506, "learning_rate": 8.206215288269622e-06, "loss": 0.0703, "step": 42666 }, { "epoch": 0.7610138051582064, "grad_norm": 0.25685498118400574, "learning_rate": 8.205062296315244e-06, "loss": 0.1302, "step": 42667 }, { "epoch": 0.7610316412799201, "grad_norm": 0.23116253316402435, "learning_rate": 8.203909369463855e-06, "loss": 0.1198, "step": 42668 }, { "epoch": 0.7610494774016338, "grad_norm": 0.25200340151786804, "learning_rate": 8.202756507719916e-06, "loss": 0.0899, "step": 42669 }, { "epoch": 0.7610673135233474, "grad_norm": 0.27751943469047546, "learning_rate": 8.201603711087894e-06, "loss": 0.1033, "step": 42670 }, { "epoch": 0.7610851496450611, "grad_norm": 0.16676461696624756, "learning_rate": 8.200450979572247e-06, "loss": 0.0919, "step": 42671 }, { "epoch": 0.7611029857667748, "grad_norm": 0.30492642521858215, "learning_rate": 8.199298313177465e-06, "loss": 0.1279, "step": 42672 }, { "epoch": 0.7611208218884886, "grad_norm": 0.2170400470495224, "learning_rate": 8.198145711908006e-06, "loss": 0.1106, "step": 42673 }, { "epoch": 0.7611386580102023, "grad_norm": 0.24292133748531342, "learning_rate": 8.196993175768336e-06, "loss": 0.1041, "step": 42674 }, { "epoch": 0.761156494131916, "grad_norm": 0.4326706826686859, "learning_rate": 8.195840704762925e-06, "loss": 0.1476, "step": 42675 }, { "epoch": 0.7611743302536297, "grad_norm": 0.21665386855602264, "learning_rate": 8.19468829889623e-06, "loss": 0.1008, "step": 42676 }, { "epoch": 0.7611921663753434, "grad_norm": 0.23884227871894836, "learning_rate": 8.19353595817273e-06, "loss": 0.1012, "step": 42677 }, { "epoch": 0.7612100024970571, "grad_norm": 0.306518018245697, "learning_rate": 8.192383682596894e-06, "loss": 0.1448, "step": 42678 }, { "epoch": 0.7612278386187707, "grad_norm": 0.22884008288383484, "learning_rate": 8.191231472173185e-06, "loss": 0.0715, "step": 42679 }, { "epoch": 0.7612456747404844, "grad_norm": 0.2654304504394531, "learning_rate": 8.190079326906061e-06, "loss": 0.1754, "step": 42680 }, { "epoch": 0.7612635108621981, "grad_norm": 0.3729139268398285, "learning_rate": 8.188927246800004e-06, "loss": 0.1211, "step": 42681 }, { "epoch": 0.7612813469839118, "grad_norm": 0.25649964809417725, "learning_rate": 8.18777523185947e-06, "loss": 0.1104, "step": 42682 }, { "epoch": 0.7612991831056255, "grad_norm": 0.25164562463760376, "learning_rate": 8.18662328208893e-06, "loss": 0.1542, "step": 42683 }, { "epoch": 0.7613170192273392, "grad_norm": 0.22828686237335205, "learning_rate": 8.185471397492834e-06, "loss": 0.1112, "step": 42684 }, { "epoch": 0.7613348553490529, "grad_norm": 0.2508590519428253, "learning_rate": 8.184319578075664e-06, "loss": 0.0943, "step": 42685 }, { "epoch": 0.7613526914707666, "grad_norm": 0.2704280912876129, "learning_rate": 8.183167823841883e-06, "loss": 0.1374, "step": 42686 }, { "epoch": 0.7613705275924803, "grad_norm": 0.3528907001018524, "learning_rate": 8.18201613479595e-06, "loss": 0.1435, "step": 42687 }, { "epoch": 0.7613883637141939, "grad_norm": 0.20950253307819366, "learning_rate": 8.180864510942332e-06, "loss": 0.0956, "step": 42688 }, { "epoch": 0.7614061998359076, "grad_norm": 0.24641311168670654, "learning_rate": 8.179712952285485e-06, "loss": 0.111, "step": 42689 }, { "epoch": 0.7614240359576214, "grad_norm": 0.22809140384197235, "learning_rate": 8.178561458829887e-06, "loss": 0.1026, "step": 42690 }, { "epoch": 0.7614418720793351, "grad_norm": 0.3638050854206085, "learning_rate": 8.177410030579988e-06, "loss": 0.1389, "step": 42691 }, { "epoch": 0.7614597082010488, "grad_norm": 0.34730955958366394, "learning_rate": 8.176258667540266e-06, "loss": 0.1063, "step": 42692 }, { "epoch": 0.7614775443227625, "grad_norm": 0.21242783963680267, "learning_rate": 8.175107369715165e-06, "loss": 0.1473, "step": 42693 }, { "epoch": 0.7614953804444762, "grad_norm": 0.1886938512325287, "learning_rate": 8.17395613710917e-06, "loss": 0.1076, "step": 42694 }, { "epoch": 0.7615132165661899, "grad_norm": 0.25451093912124634, "learning_rate": 8.172804969726734e-06, "loss": 0.1049, "step": 42695 }, { "epoch": 0.7615310526879036, "grad_norm": 0.33973121643066406, "learning_rate": 8.171653867572312e-06, "loss": 0.0898, "step": 42696 }, { "epoch": 0.7615488888096172, "grad_norm": 0.2229091078042984, "learning_rate": 8.17050283065037e-06, "loss": 0.1153, "step": 42697 }, { "epoch": 0.7615667249313309, "grad_norm": 0.3007701337337494, "learning_rate": 8.169351858965376e-06, "loss": 0.1322, "step": 42698 }, { "epoch": 0.7615845610530446, "grad_norm": 0.2611568570137024, "learning_rate": 8.16820095252179e-06, "loss": 0.0989, "step": 42699 }, { "epoch": 0.7616023971747583, "grad_norm": 0.3249962627887726, "learning_rate": 8.167050111324068e-06, "loss": 0.0918, "step": 42700 }, { "epoch": 0.761620233296472, "grad_norm": 0.2891022264957428, "learning_rate": 8.165899335376676e-06, "loss": 0.1012, "step": 42701 }, { "epoch": 0.7616380694181857, "grad_norm": 0.3628145456314087, "learning_rate": 8.164748624684062e-06, "loss": 0.1013, "step": 42702 }, { "epoch": 0.7616559055398994, "grad_norm": 0.24708378314971924, "learning_rate": 8.163597979250707e-06, "loss": 0.1523, "step": 42703 }, { "epoch": 0.7616737416616131, "grad_norm": 0.20327432453632355, "learning_rate": 8.162447399081052e-06, "loss": 0.1012, "step": 42704 }, { "epoch": 0.7616915777833267, "grad_norm": 0.41453808546066284, "learning_rate": 8.161296884179575e-06, "loss": 0.1305, "step": 42705 }, { "epoch": 0.7617094139050404, "grad_norm": 0.3436262011528015, "learning_rate": 8.16014643455072e-06, "loss": 0.1722, "step": 42706 }, { "epoch": 0.7617272500267542, "grad_norm": 0.2809482514858246, "learning_rate": 8.15899605019896e-06, "loss": 0.1181, "step": 42707 }, { "epoch": 0.7617450861484679, "grad_norm": 0.19482646882534027, "learning_rate": 8.15784573112875e-06, "loss": 0.0882, "step": 42708 }, { "epoch": 0.7617629222701816, "grad_norm": 0.3058270215988159, "learning_rate": 8.156695477344547e-06, "loss": 0.1295, "step": 42709 }, { "epoch": 0.7617807583918953, "grad_norm": 0.24580976366996765, "learning_rate": 8.1555452888508e-06, "loss": 0.1282, "step": 42710 }, { "epoch": 0.761798594513609, "grad_norm": 0.297408789396286, "learning_rate": 8.154395165651988e-06, "loss": 0.1302, "step": 42711 }, { "epoch": 0.7618164306353227, "grad_norm": 0.24513469636440277, "learning_rate": 8.153245107752555e-06, "loss": 0.1251, "step": 42712 }, { "epoch": 0.7618342667570364, "grad_norm": 0.2538241446018219, "learning_rate": 8.152095115156966e-06, "loss": 0.1191, "step": 42713 }, { "epoch": 0.76185210287875, "grad_norm": 0.23211342096328735, "learning_rate": 8.150945187869675e-06, "loss": 0.1265, "step": 42714 }, { "epoch": 0.7618699390004637, "grad_norm": 0.25968465209007263, "learning_rate": 8.14979532589513e-06, "loss": 0.0955, "step": 42715 }, { "epoch": 0.7618877751221774, "grad_norm": 0.31376928091049194, "learning_rate": 8.148645529237805e-06, "loss": 0.0928, "step": 42716 }, { "epoch": 0.7619056112438911, "grad_norm": 0.21094666421413422, "learning_rate": 8.147495797902156e-06, "loss": 0.1247, "step": 42717 }, { "epoch": 0.7619234473656048, "grad_norm": 0.3071964383125305, "learning_rate": 8.146346131892621e-06, "loss": 0.0891, "step": 42718 }, { "epoch": 0.7619412834873185, "grad_norm": 0.3281419575214386, "learning_rate": 8.145196531213677e-06, "loss": 0.1041, "step": 42719 }, { "epoch": 0.7619591196090322, "grad_norm": 0.19709442555904388, "learning_rate": 8.144046995869766e-06, "loss": 0.1135, "step": 42720 }, { "epoch": 0.7619769557307459, "grad_norm": 0.2718742787837982, "learning_rate": 8.142897525865362e-06, "loss": 0.1163, "step": 42721 }, { "epoch": 0.7619947918524596, "grad_norm": 0.23442450165748596, "learning_rate": 8.141748121204906e-06, "loss": 0.0625, "step": 42722 }, { "epoch": 0.7620126279741734, "grad_norm": 0.27153611183166504, "learning_rate": 8.14059878189285e-06, "loss": 0.0939, "step": 42723 }, { "epoch": 0.762030464095887, "grad_norm": 0.27912840247154236, "learning_rate": 8.139449507933664e-06, "loss": 0.1233, "step": 42724 }, { "epoch": 0.7620483002176007, "grad_norm": 0.34461748600006104, "learning_rate": 8.138300299331794e-06, "loss": 0.1356, "step": 42725 }, { "epoch": 0.7620661363393144, "grad_norm": 0.24578818678855896, "learning_rate": 8.137151156091696e-06, "loss": 0.087, "step": 42726 }, { "epoch": 0.7620839724610281, "grad_norm": 0.2059352695941925, "learning_rate": 8.136002078217825e-06, "loss": 0.1016, "step": 42727 }, { "epoch": 0.7621018085827418, "grad_norm": 0.2727588713169098, "learning_rate": 8.134853065714626e-06, "loss": 0.1129, "step": 42728 }, { "epoch": 0.7621196447044555, "grad_norm": 0.30707958340644836, "learning_rate": 8.133704118586572e-06, "loss": 0.16, "step": 42729 }, { "epoch": 0.7621374808261692, "grad_norm": 0.23666512966156006, "learning_rate": 8.132555236838104e-06, "loss": 0.1283, "step": 42730 }, { "epoch": 0.7621553169478829, "grad_norm": 0.2750038206577301, "learning_rate": 8.13140642047367e-06, "loss": 0.1299, "step": 42731 }, { "epoch": 0.7621731530695965, "grad_norm": 0.2746490240097046, "learning_rate": 8.13025766949774e-06, "loss": 0.1268, "step": 42732 }, { "epoch": 0.7621909891913102, "grad_norm": 0.2559909522533417, "learning_rate": 8.129108983914749e-06, "loss": 0.0939, "step": 42733 }, { "epoch": 0.7622088253130239, "grad_norm": 0.3153408467769623, "learning_rate": 8.127960363729164e-06, "loss": 0.0861, "step": 42734 }, { "epoch": 0.7622266614347376, "grad_norm": 0.2540927827358246, "learning_rate": 8.126811808945436e-06, "loss": 0.1087, "step": 42735 }, { "epoch": 0.7622444975564513, "grad_norm": 0.2728038728237152, "learning_rate": 8.125663319568002e-06, "loss": 0.0803, "step": 42736 }, { "epoch": 0.762262333678165, "grad_norm": 0.36184045672416687, "learning_rate": 8.124514895601337e-06, "loss": 0.1315, "step": 42737 }, { "epoch": 0.7622801697998787, "grad_norm": 0.2298123687505722, "learning_rate": 8.12336653704988e-06, "loss": 0.0661, "step": 42738 }, { "epoch": 0.7622980059215924, "grad_norm": 0.40313494205474854, "learning_rate": 8.122218243918081e-06, "loss": 0.1701, "step": 42739 }, { "epoch": 0.7623158420433062, "grad_norm": 0.25307127833366394, "learning_rate": 8.121070016210393e-06, "loss": 0.1273, "step": 42740 }, { "epoch": 0.7623336781650198, "grad_norm": 0.21520350873470306, "learning_rate": 8.119921853931264e-06, "loss": 0.1159, "step": 42741 }, { "epoch": 0.7623515142867335, "grad_norm": 0.279513418674469, "learning_rate": 8.118773757085152e-06, "loss": 0.1358, "step": 42742 }, { "epoch": 0.7623693504084472, "grad_norm": 0.3304174244403839, "learning_rate": 8.117625725676506e-06, "loss": 0.0917, "step": 42743 }, { "epoch": 0.7623871865301609, "grad_norm": 0.2964650094509125, "learning_rate": 8.116477759709773e-06, "loss": 0.1097, "step": 42744 }, { "epoch": 0.7624050226518746, "grad_norm": 0.27967938780784607, "learning_rate": 8.115329859189394e-06, "loss": 0.1357, "step": 42745 }, { "epoch": 0.7624228587735883, "grad_norm": 0.2825847268104553, "learning_rate": 8.114182024119838e-06, "loss": 0.1152, "step": 42746 }, { "epoch": 0.762440694895302, "grad_norm": 0.3004486560821533, "learning_rate": 8.113034254505536e-06, "loss": 0.1234, "step": 42747 }, { "epoch": 0.7624585310170157, "grad_norm": 0.2610960900783539, "learning_rate": 8.111886550350953e-06, "loss": 0.0939, "step": 42748 }, { "epoch": 0.7624763671387293, "grad_norm": 0.2529049813747406, "learning_rate": 8.110738911660523e-06, "loss": 0.138, "step": 42749 }, { "epoch": 0.762494203260443, "grad_norm": 0.25084343552589417, "learning_rate": 8.10959133843871e-06, "loss": 0.1115, "step": 42750 }, { "epoch": 0.7625120393821567, "grad_norm": 0.5162140727043152, "learning_rate": 8.108443830689958e-06, "loss": 0.1066, "step": 42751 }, { "epoch": 0.7625298755038704, "grad_norm": 0.22267527878284454, "learning_rate": 8.107296388418708e-06, "loss": 0.0996, "step": 42752 }, { "epoch": 0.7625477116255841, "grad_norm": 0.3033842444419861, "learning_rate": 8.106149011629413e-06, "loss": 0.1101, "step": 42753 }, { "epoch": 0.7625655477472978, "grad_norm": 0.3041439652442932, "learning_rate": 8.105001700326511e-06, "loss": 0.1117, "step": 42754 }, { "epoch": 0.7625833838690115, "grad_norm": 0.2612026333808899, "learning_rate": 8.103854454514467e-06, "loss": 0.1062, "step": 42755 }, { "epoch": 0.7626012199907252, "grad_norm": 0.27075478434562683, "learning_rate": 8.102707274197718e-06, "loss": 0.1433, "step": 42756 }, { "epoch": 0.762619056112439, "grad_norm": 0.3153226375579834, "learning_rate": 8.10156015938071e-06, "loss": 0.1141, "step": 42757 }, { "epoch": 0.7626368922341527, "grad_norm": 0.21550028026103973, "learning_rate": 8.100413110067886e-06, "loss": 0.1039, "step": 42758 }, { "epoch": 0.7626547283558663, "grad_norm": 0.29533568024635315, "learning_rate": 8.099266126263704e-06, "loss": 0.1636, "step": 42759 }, { "epoch": 0.76267256447758, "grad_norm": 0.2324717938899994, "learning_rate": 8.098119207972599e-06, "loss": 0.1362, "step": 42760 }, { "epoch": 0.7626904005992937, "grad_norm": 0.19602574408054352, "learning_rate": 8.096972355199029e-06, "loss": 0.0664, "step": 42761 }, { "epoch": 0.7627082367210074, "grad_norm": 0.19509239494800568, "learning_rate": 8.09582556794742e-06, "loss": 0.1006, "step": 42762 }, { "epoch": 0.7627260728427211, "grad_norm": 0.2777608036994934, "learning_rate": 8.094678846222242e-06, "loss": 0.1872, "step": 42763 }, { "epoch": 0.7627439089644348, "grad_norm": 0.26951149106025696, "learning_rate": 8.093532190027926e-06, "loss": 0.143, "step": 42764 }, { "epoch": 0.7627617450861485, "grad_norm": 0.2789444923400879, "learning_rate": 8.092385599368917e-06, "loss": 0.1058, "step": 42765 }, { "epoch": 0.7627795812078622, "grad_norm": 0.25518038868904114, "learning_rate": 8.091239074249663e-06, "loss": 0.1114, "step": 42766 }, { "epoch": 0.7627974173295758, "grad_norm": 0.2865860164165497, "learning_rate": 8.090092614674599e-06, "loss": 0.1407, "step": 42767 }, { "epoch": 0.7628152534512895, "grad_norm": 0.30358266830444336, "learning_rate": 8.088946220648184e-06, "loss": 0.099, "step": 42768 }, { "epoch": 0.7628330895730032, "grad_norm": 0.24380910396575928, "learning_rate": 8.087799892174853e-06, "loss": 0.1302, "step": 42769 }, { "epoch": 0.7628509256947169, "grad_norm": 0.3392241597175598, "learning_rate": 8.086653629259053e-06, "loss": 0.1196, "step": 42770 }, { "epoch": 0.7628687618164306, "grad_norm": 0.19452714920043945, "learning_rate": 8.085507431905215e-06, "loss": 0.0769, "step": 42771 }, { "epoch": 0.7628865979381443, "grad_norm": 0.32577091455459595, "learning_rate": 8.084361300117802e-06, "loss": 0.1599, "step": 42772 }, { "epoch": 0.762904434059858, "grad_norm": 0.2734892666339874, "learning_rate": 8.083215233901245e-06, "loss": 0.1136, "step": 42773 }, { "epoch": 0.7629222701815718, "grad_norm": 0.3848903179168701, "learning_rate": 8.082069233259984e-06, "loss": 0.1064, "step": 42774 }, { "epoch": 0.7629401063032855, "grad_norm": 0.2228815108537674, "learning_rate": 8.080923298198461e-06, "loss": 0.1149, "step": 42775 }, { "epoch": 0.7629579424249991, "grad_norm": 0.40106096863746643, "learning_rate": 8.079777428721136e-06, "loss": 0.1379, "step": 42776 }, { "epoch": 0.7629757785467128, "grad_norm": 0.25054147839546204, "learning_rate": 8.078631624832436e-06, "loss": 0.1448, "step": 42777 }, { "epoch": 0.7629936146684265, "grad_norm": 0.22279147803783417, "learning_rate": 8.077485886536803e-06, "loss": 0.0845, "step": 42778 }, { "epoch": 0.7630114507901402, "grad_norm": 0.25205790996551514, "learning_rate": 8.076340213838682e-06, "loss": 0.1062, "step": 42779 }, { "epoch": 0.7630292869118539, "grad_norm": 0.33514559268951416, "learning_rate": 8.075194606742501e-06, "loss": 0.1679, "step": 42780 }, { "epoch": 0.7630471230335676, "grad_norm": 0.24219800531864166, "learning_rate": 8.07404906525272e-06, "loss": 0.1025, "step": 42781 }, { "epoch": 0.7630649591552813, "grad_norm": 0.25891268253326416, "learning_rate": 8.072903589373773e-06, "loss": 0.1458, "step": 42782 }, { "epoch": 0.763082795276995, "grad_norm": 0.3182671368122101, "learning_rate": 8.071758179110095e-06, "loss": 0.1268, "step": 42783 }, { "epoch": 0.7631006313987087, "grad_norm": 0.328510046005249, "learning_rate": 8.070612834466124e-06, "loss": 0.1326, "step": 42784 }, { "epoch": 0.7631184675204223, "grad_norm": 0.301238477230072, "learning_rate": 8.069467555446312e-06, "loss": 0.1477, "step": 42785 }, { "epoch": 0.763136303642136, "grad_norm": 0.24899804592132568, "learning_rate": 8.06832234205509e-06, "loss": 0.1145, "step": 42786 }, { "epoch": 0.7631541397638497, "grad_norm": 0.27358248829841614, "learning_rate": 8.06717719429689e-06, "loss": 0.1235, "step": 42787 }, { "epoch": 0.7631719758855634, "grad_norm": 0.33678731322288513, "learning_rate": 8.06603211217617e-06, "loss": 0.1246, "step": 42788 }, { "epoch": 0.7631898120072771, "grad_norm": 0.2629486620426178, "learning_rate": 8.06488709569735e-06, "loss": 0.1393, "step": 42789 }, { "epoch": 0.7632076481289908, "grad_norm": 0.2906598448753357, "learning_rate": 8.063742144864882e-06, "loss": 0.1418, "step": 42790 }, { "epoch": 0.7632254842507046, "grad_norm": 0.2102222889661789, "learning_rate": 8.062597259683202e-06, "loss": 0.0897, "step": 42791 }, { "epoch": 0.7632433203724183, "grad_norm": 0.2572406530380249, "learning_rate": 8.061452440156744e-06, "loss": 0.1587, "step": 42792 }, { "epoch": 0.763261156494132, "grad_norm": 0.3155263662338257, "learning_rate": 8.060307686289938e-06, "loss": 0.1557, "step": 42793 }, { "epoch": 0.7632789926158456, "grad_norm": 0.27527886629104614, "learning_rate": 8.059162998087242e-06, "loss": 0.1237, "step": 42794 }, { "epoch": 0.7632968287375593, "grad_norm": 0.30883437395095825, "learning_rate": 8.058018375553078e-06, "loss": 0.1621, "step": 42795 }, { "epoch": 0.763314664859273, "grad_norm": 0.2987239956855774, "learning_rate": 8.056873818691884e-06, "loss": 0.1348, "step": 42796 }, { "epoch": 0.7633325009809867, "grad_norm": 0.27694231271743774, "learning_rate": 8.055729327508097e-06, "loss": 0.1293, "step": 42797 }, { "epoch": 0.7633503371027004, "grad_norm": 0.26061302423477173, "learning_rate": 8.05458490200616e-06, "loss": 0.1288, "step": 42798 }, { "epoch": 0.7633681732244141, "grad_norm": 0.3165815472602844, "learning_rate": 8.053440542190505e-06, "loss": 0.1044, "step": 42799 }, { "epoch": 0.7633860093461278, "grad_norm": 0.3112483620643616, "learning_rate": 8.052296248065565e-06, "loss": 0.1471, "step": 42800 }, { "epoch": 0.7634038454678415, "grad_norm": 0.2951695919036865, "learning_rate": 8.051152019635774e-06, "loss": 0.14, "step": 42801 }, { "epoch": 0.7634216815895551, "grad_norm": 0.2529488503932953, "learning_rate": 8.050007856905572e-06, "loss": 0.1328, "step": 42802 }, { "epoch": 0.7634395177112688, "grad_norm": 0.22331856191158295, "learning_rate": 8.048863759879399e-06, "loss": 0.1156, "step": 42803 }, { "epoch": 0.7634573538329825, "grad_norm": 0.3526498079299927, "learning_rate": 8.047719728561687e-06, "loss": 0.0861, "step": 42804 }, { "epoch": 0.7634751899546962, "grad_norm": 0.21404924988746643, "learning_rate": 8.046575762956867e-06, "loss": 0.0821, "step": 42805 }, { "epoch": 0.7634930260764099, "grad_norm": 0.3276389241218567, "learning_rate": 8.045431863069369e-06, "loss": 0.1535, "step": 42806 }, { "epoch": 0.7635108621981236, "grad_norm": 0.25100985169410706, "learning_rate": 8.04428802890364e-06, "loss": 0.1148, "step": 42807 }, { "epoch": 0.7635286983198374, "grad_norm": 0.26612594723701477, "learning_rate": 8.043144260464105e-06, "loss": 0.1275, "step": 42808 }, { "epoch": 0.7635465344415511, "grad_norm": 0.24870416522026062, "learning_rate": 8.042000557755202e-06, "loss": 0.1355, "step": 42809 }, { "epoch": 0.7635643705632648, "grad_norm": 0.24198852479457855, "learning_rate": 8.040856920781354e-06, "loss": 0.1234, "step": 42810 }, { "epoch": 0.7635822066849784, "grad_norm": 0.2864730954170227, "learning_rate": 8.03971334954701e-06, "loss": 0.1402, "step": 42811 }, { "epoch": 0.7636000428066921, "grad_norm": 0.2514137923717499, "learning_rate": 8.038569844056592e-06, "loss": 0.1485, "step": 42812 }, { "epoch": 0.7636178789284058, "grad_norm": 0.222085103392601, "learning_rate": 8.037426404314538e-06, "loss": 0.0942, "step": 42813 }, { "epoch": 0.7636357150501195, "grad_norm": 0.3161686956882477, "learning_rate": 8.03628303032527e-06, "loss": 0.0805, "step": 42814 }, { "epoch": 0.7636535511718332, "grad_norm": 0.2392972707748413, "learning_rate": 8.035139722093235e-06, "loss": 0.145, "step": 42815 }, { "epoch": 0.7636713872935469, "grad_norm": 0.4221689701080322, "learning_rate": 8.033996479622852e-06, "loss": 0.1521, "step": 42816 }, { "epoch": 0.7636892234152606, "grad_norm": 0.2915332615375519, "learning_rate": 8.032853302918564e-06, "loss": 0.0973, "step": 42817 }, { "epoch": 0.7637070595369743, "grad_norm": 0.27898216247558594, "learning_rate": 8.031710191984799e-06, "loss": 0.1012, "step": 42818 }, { "epoch": 0.763724895658688, "grad_norm": 0.2774139642715454, "learning_rate": 8.030567146825976e-06, "loss": 0.0539, "step": 42819 }, { "epoch": 0.7637427317804016, "grad_norm": 0.20346693694591522, "learning_rate": 8.029424167446545e-06, "loss": 0.1282, "step": 42820 }, { "epoch": 0.7637605679021153, "grad_norm": 0.27988263964653015, "learning_rate": 8.028281253850928e-06, "loss": 0.0787, "step": 42821 }, { "epoch": 0.763778404023829, "grad_norm": 0.27412253618240356, "learning_rate": 8.027138406043555e-06, "loss": 0.1542, "step": 42822 }, { "epoch": 0.7637962401455427, "grad_norm": 0.264974445104599, "learning_rate": 8.025995624028846e-06, "loss": 0.0777, "step": 42823 }, { "epoch": 0.7638140762672565, "grad_norm": 0.27311447262763977, "learning_rate": 8.024852907811248e-06, "loss": 0.1348, "step": 42824 }, { "epoch": 0.7638319123889702, "grad_norm": 0.386681467294693, "learning_rate": 8.023710257395184e-06, "loss": 0.1507, "step": 42825 }, { "epoch": 0.7638497485106839, "grad_norm": 0.26128536462783813, "learning_rate": 8.022567672785083e-06, "loss": 0.1006, "step": 42826 }, { "epoch": 0.7638675846323976, "grad_norm": 0.3179316818714142, "learning_rate": 8.021425153985364e-06, "loss": 0.0829, "step": 42827 }, { "epoch": 0.7638854207541113, "grad_norm": 0.2926112115383148, "learning_rate": 8.020282701000479e-06, "loss": 0.1371, "step": 42828 }, { "epoch": 0.7639032568758249, "grad_norm": 0.2479543387889862, "learning_rate": 8.019140313834837e-06, "loss": 0.1421, "step": 42829 }, { "epoch": 0.7639210929975386, "grad_norm": 0.2865014374256134, "learning_rate": 8.017997992492865e-06, "loss": 0.0943, "step": 42830 }, { "epoch": 0.7639389291192523, "grad_norm": 0.3328753411769867, "learning_rate": 8.016855736979006e-06, "loss": 0.146, "step": 42831 }, { "epoch": 0.763956765240966, "grad_norm": 0.2740744650363922, "learning_rate": 8.015713547297673e-06, "loss": 0.0859, "step": 42832 }, { "epoch": 0.7639746013626797, "grad_norm": 0.340349018573761, "learning_rate": 8.01457142345331e-06, "loss": 0.1233, "step": 42833 }, { "epoch": 0.7639924374843934, "grad_norm": 0.27598676085472107, "learning_rate": 8.013429365450332e-06, "loss": 0.0671, "step": 42834 }, { "epoch": 0.7640102736061071, "grad_norm": 0.2351517230272293, "learning_rate": 8.01228737329317e-06, "loss": 0.0945, "step": 42835 }, { "epoch": 0.7640281097278208, "grad_norm": 0.29766207933425903, "learning_rate": 8.01114544698624e-06, "loss": 0.0734, "step": 42836 }, { "epoch": 0.7640459458495344, "grad_norm": 0.26338836550712585, "learning_rate": 8.010003586533988e-06, "loss": 0.124, "step": 42837 }, { "epoch": 0.7640637819712481, "grad_norm": 0.3375798463821411, "learning_rate": 8.00886179194083e-06, "loss": 0.1097, "step": 42838 }, { "epoch": 0.7640816180929618, "grad_norm": 0.25835952162742615, "learning_rate": 8.007720063211192e-06, "loss": 0.1192, "step": 42839 }, { "epoch": 0.7640994542146755, "grad_norm": 0.3264369070529938, "learning_rate": 8.006578400349491e-06, "loss": 0.1294, "step": 42840 }, { "epoch": 0.7641172903363893, "grad_norm": 0.25550565123558044, "learning_rate": 8.005436803360169e-06, "loss": 0.1573, "step": 42841 }, { "epoch": 0.764135126458103, "grad_norm": 0.4731195569038391, "learning_rate": 8.004295272247647e-06, "loss": 0.1315, "step": 42842 }, { "epoch": 0.7641529625798167, "grad_norm": 0.2981793284416199, "learning_rate": 8.003153807016337e-06, "loss": 0.1418, "step": 42843 }, { "epoch": 0.7641707987015304, "grad_norm": 0.2584768533706665, "learning_rate": 8.002012407670681e-06, "loss": 0.1093, "step": 42844 }, { "epoch": 0.7641886348232441, "grad_norm": 0.29052045941352844, "learning_rate": 8.000871074215086e-06, "loss": 0.1246, "step": 42845 }, { "epoch": 0.7642064709449577, "grad_norm": 0.2955264151096344, "learning_rate": 7.999729806653997e-06, "loss": 0.1378, "step": 42846 }, { "epoch": 0.7642243070666714, "grad_norm": 0.307647705078125, "learning_rate": 7.998588604991824e-06, "loss": 0.1411, "step": 42847 }, { "epoch": 0.7642421431883851, "grad_norm": 0.27249661087989807, "learning_rate": 7.997447469232996e-06, "loss": 0.0884, "step": 42848 }, { "epoch": 0.7642599793100988, "grad_norm": 0.33691415190696716, "learning_rate": 7.996306399381925e-06, "loss": 0.1648, "step": 42849 }, { "epoch": 0.7642778154318125, "grad_norm": 0.3107374906539917, "learning_rate": 7.995165395443053e-06, "loss": 0.1277, "step": 42850 }, { "epoch": 0.7642956515535262, "grad_norm": 0.2589839696884155, "learning_rate": 7.994024457420793e-06, "loss": 0.0912, "step": 42851 }, { "epoch": 0.7643134876752399, "grad_norm": 0.21271340548992157, "learning_rate": 7.992883585319566e-06, "loss": 0.1139, "step": 42852 }, { "epoch": 0.7643313237969536, "grad_norm": 0.3283775746822357, "learning_rate": 7.99174277914379e-06, "loss": 0.1249, "step": 42853 }, { "epoch": 0.7643491599186673, "grad_norm": 0.2842460572719574, "learning_rate": 7.9906020388979e-06, "loss": 0.1258, "step": 42854 }, { "epoch": 0.7643669960403809, "grad_norm": 0.30483558773994446, "learning_rate": 7.989461364586311e-06, "loss": 0.2016, "step": 42855 }, { "epoch": 0.7643848321620946, "grad_norm": 0.22662563621997833, "learning_rate": 7.988320756213447e-06, "loss": 0.1093, "step": 42856 }, { "epoch": 0.7644026682838083, "grad_norm": 0.31030842661857605, "learning_rate": 7.987180213783719e-06, "loss": 0.1261, "step": 42857 }, { "epoch": 0.7644205044055221, "grad_norm": 0.30658525228500366, "learning_rate": 7.986039737301555e-06, "loss": 0.1312, "step": 42858 }, { "epoch": 0.7644383405272358, "grad_norm": 0.25996431708335876, "learning_rate": 7.984899326771387e-06, "loss": 0.0842, "step": 42859 }, { "epoch": 0.7644561766489495, "grad_norm": 0.2168186753988266, "learning_rate": 7.983758982197625e-06, "loss": 0.1198, "step": 42860 }, { "epoch": 0.7644740127706632, "grad_norm": 0.2856423258781433, "learning_rate": 7.98261870358469e-06, "loss": 0.0944, "step": 42861 }, { "epoch": 0.7644918488923769, "grad_norm": 0.35936540365219116, "learning_rate": 7.981478490936997e-06, "loss": 0.1403, "step": 42862 }, { "epoch": 0.7645096850140906, "grad_norm": 0.22587734460830688, "learning_rate": 7.980338344258978e-06, "loss": 0.1109, "step": 42863 }, { "epoch": 0.7645275211358042, "grad_norm": 0.2973916828632355, "learning_rate": 7.979198263555043e-06, "loss": 0.1785, "step": 42864 }, { "epoch": 0.7645453572575179, "grad_norm": 0.2513025104999542, "learning_rate": 7.978058248829617e-06, "loss": 0.1101, "step": 42865 }, { "epoch": 0.7645631933792316, "grad_norm": 0.38973304629325867, "learning_rate": 7.976918300087111e-06, "loss": 0.117, "step": 42866 }, { "epoch": 0.7645810295009453, "grad_norm": 0.3005039393901825, "learning_rate": 7.975778417331953e-06, "loss": 0.096, "step": 42867 }, { "epoch": 0.764598865622659, "grad_norm": 0.27728769183158875, "learning_rate": 7.974638600568559e-06, "loss": 0.1327, "step": 42868 }, { "epoch": 0.7646167017443727, "grad_norm": 0.28186824917793274, "learning_rate": 7.973498849801348e-06, "loss": 0.0921, "step": 42869 }, { "epoch": 0.7646345378660864, "grad_norm": 0.3413606584072113, "learning_rate": 7.972359165034727e-06, "loss": 0.1237, "step": 42870 }, { "epoch": 0.7646523739878001, "grad_norm": 0.3357499837875366, "learning_rate": 7.971219546273131e-06, "loss": 0.1279, "step": 42871 }, { "epoch": 0.7646702101095137, "grad_norm": 0.2963908612728119, "learning_rate": 7.970079993520959e-06, "loss": 0.1147, "step": 42872 }, { "epoch": 0.7646880462312274, "grad_norm": 0.2905164659023285, "learning_rate": 7.968940506782652e-06, "loss": 0.1164, "step": 42873 }, { "epoch": 0.7647058823529411, "grad_norm": 0.2257183939218521, "learning_rate": 7.96780108606261e-06, "loss": 0.0851, "step": 42874 }, { "epoch": 0.7647237184746549, "grad_norm": 0.2909891605377197, "learning_rate": 7.966661731365246e-06, "loss": 0.0933, "step": 42875 }, { "epoch": 0.7647415545963686, "grad_norm": 0.26723378896713257, "learning_rate": 7.965522442694994e-06, "loss": 0.1451, "step": 42876 }, { "epoch": 0.7647593907180823, "grad_norm": 0.27947598695755005, "learning_rate": 7.964383220056259e-06, "loss": 0.0804, "step": 42877 }, { "epoch": 0.764777226839796, "grad_norm": 0.2634432911872864, "learning_rate": 7.963244063453463e-06, "loss": 0.1584, "step": 42878 }, { "epoch": 0.7647950629615097, "grad_norm": 0.23158571124076843, "learning_rate": 7.962104972891005e-06, "loss": 0.1446, "step": 42879 }, { "epoch": 0.7648128990832234, "grad_norm": 0.267520934343338, "learning_rate": 7.960965948373319e-06, "loss": 0.1311, "step": 42880 }, { "epoch": 0.764830735204937, "grad_norm": 0.3257252275943756, "learning_rate": 7.959826989904818e-06, "loss": 0.1716, "step": 42881 }, { "epoch": 0.7648485713266507, "grad_norm": 0.29919692873954773, "learning_rate": 7.95868809748991e-06, "loss": 0.1017, "step": 42882 }, { "epoch": 0.7648664074483644, "grad_norm": 0.2358035147190094, "learning_rate": 7.957549271133016e-06, "loss": 0.1046, "step": 42883 }, { "epoch": 0.7648842435700781, "grad_norm": 0.4467250108718872, "learning_rate": 7.956410510838538e-06, "loss": 0.164, "step": 42884 }, { "epoch": 0.7649020796917918, "grad_norm": 0.2204703539609909, "learning_rate": 7.955271816610899e-06, "loss": 0.0903, "step": 42885 }, { "epoch": 0.7649199158135055, "grad_norm": 0.3152219355106354, "learning_rate": 7.954133188454522e-06, "loss": 0.1704, "step": 42886 }, { "epoch": 0.7649377519352192, "grad_norm": 0.3217666447162628, "learning_rate": 7.95299462637381e-06, "loss": 0.1276, "step": 42887 }, { "epoch": 0.7649555880569329, "grad_norm": 0.32320696115493774, "learning_rate": 7.951856130373176e-06, "loss": 0.1104, "step": 42888 }, { "epoch": 0.7649734241786466, "grad_norm": 0.2849743962287903, "learning_rate": 7.950717700457038e-06, "loss": 0.1441, "step": 42889 }, { "epoch": 0.7649912603003602, "grad_norm": 0.2630385458469391, "learning_rate": 7.94957933662981e-06, "loss": 0.1331, "step": 42890 }, { "epoch": 0.7650090964220739, "grad_norm": 0.6085127592086792, "learning_rate": 7.9484410388959e-06, "loss": 0.1371, "step": 42891 }, { "epoch": 0.7650269325437877, "grad_norm": 0.30573567748069763, "learning_rate": 7.947302807259715e-06, "loss": 0.1232, "step": 42892 }, { "epoch": 0.7650447686655014, "grad_norm": 0.18907184898853302, "learning_rate": 7.946164641725685e-06, "loss": 0.117, "step": 42893 }, { "epoch": 0.7650626047872151, "grad_norm": 0.19128276407718658, "learning_rate": 7.945026542298207e-06, "loss": 0.0777, "step": 42894 }, { "epoch": 0.7650804409089288, "grad_norm": 0.26594278216362, "learning_rate": 7.943888508981697e-06, "loss": 0.1218, "step": 42895 }, { "epoch": 0.7650982770306425, "grad_norm": 0.1851966232061386, "learning_rate": 7.942750541780567e-06, "loss": 0.0716, "step": 42896 }, { "epoch": 0.7651161131523562, "grad_norm": 0.27270033955574036, "learning_rate": 7.94161264069922e-06, "loss": 0.0909, "step": 42897 }, { "epoch": 0.7651339492740699, "grad_norm": 0.287460058927536, "learning_rate": 7.940474805742078e-06, "loss": 0.1108, "step": 42898 }, { "epoch": 0.7651517853957835, "grad_norm": 0.27612176537513733, "learning_rate": 7.939337036913543e-06, "loss": 0.0936, "step": 42899 }, { "epoch": 0.7651696215174972, "grad_norm": 0.3355642259120941, "learning_rate": 7.93819933421804e-06, "loss": 0.1571, "step": 42900 }, { "epoch": 0.7651874576392109, "grad_norm": 0.23417073488235474, "learning_rate": 7.937061697659962e-06, "loss": 0.1231, "step": 42901 }, { "epoch": 0.7652052937609246, "grad_norm": 0.3518558740615845, "learning_rate": 7.935924127243729e-06, "loss": 0.09, "step": 42902 }, { "epoch": 0.7652231298826383, "grad_norm": 0.25965461134910583, "learning_rate": 7.934786622973753e-06, "loss": 0.1058, "step": 42903 }, { "epoch": 0.765240966004352, "grad_norm": 0.31411510705947876, "learning_rate": 7.933649184854436e-06, "loss": 0.113, "step": 42904 }, { "epoch": 0.7652588021260657, "grad_norm": 0.28541257977485657, "learning_rate": 7.932511812890182e-06, "loss": 0.1066, "step": 42905 }, { "epoch": 0.7652766382477794, "grad_norm": 0.2829614281654358, "learning_rate": 7.931374507085412e-06, "loss": 0.1214, "step": 42906 }, { "epoch": 0.765294474369493, "grad_norm": 0.32959020137786865, "learning_rate": 7.930237267444533e-06, "loss": 0.1318, "step": 42907 }, { "epoch": 0.7653123104912067, "grad_norm": 0.25511986017227173, "learning_rate": 7.92910009397195e-06, "loss": 0.0834, "step": 42908 }, { "epoch": 0.7653301466129205, "grad_norm": 0.24911274015903473, "learning_rate": 7.927962986672068e-06, "loss": 0.1357, "step": 42909 }, { "epoch": 0.7653479827346342, "grad_norm": 0.22685551643371582, "learning_rate": 7.926825945549293e-06, "loss": 0.0877, "step": 42910 }, { "epoch": 0.7653658188563479, "grad_norm": 0.2752068340778351, "learning_rate": 7.925688970608044e-06, "loss": 0.1236, "step": 42911 }, { "epoch": 0.7653836549780616, "grad_norm": 0.38870489597320557, "learning_rate": 7.924552061852717e-06, "loss": 0.0835, "step": 42912 }, { "epoch": 0.7654014910997753, "grad_norm": 0.31101763248443604, "learning_rate": 7.923415219287728e-06, "loss": 0.125, "step": 42913 }, { "epoch": 0.765419327221489, "grad_norm": 0.308556467294693, "learning_rate": 7.922278442917474e-06, "loss": 0.1541, "step": 42914 }, { "epoch": 0.7654371633432027, "grad_norm": 0.3094073235988617, "learning_rate": 7.921141732746373e-06, "loss": 0.0929, "step": 42915 }, { "epoch": 0.7654549994649164, "grad_norm": 0.2967217266559601, "learning_rate": 7.920005088778828e-06, "loss": 0.1124, "step": 42916 }, { "epoch": 0.76547283558663, "grad_norm": 0.2879098057746887, "learning_rate": 7.91886851101924e-06, "loss": 0.0902, "step": 42917 }, { "epoch": 0.7654906717083437, "grad_norm": 0.35333603620529175, "learning_rate": 7.917731999472012e-06, "loss": 0.1326, "step": 42918 }, { "epoch": 0.7655085078300574, "grad_norm": 0.2966606318950653, "learning_rate": 7.916595554141562e-06, "loss": 0.091, "step": 42919 }, { "epoch": 0.7655263439517711, "grad_norm": 0.2804579436779022, "learning_rate": 7.915459175032286e-06, "loss": 0.1368, "step": 42920 }, { "epoch": 0.7655441800734848, "grad_norm": 0.35079076886177063, "learning_rate": 7.914322862148594e-06, "loss": 0.1218, "step": 42921 }, { "epoch": 0.7655620161951985, "grad_norm": 0.37944865226745605, "learning_rate": 7.913186615494886e-06, "loss": 0.1731, "step": 42922 }, { "epoch": 0.7655798523169122, "grad_norm": 0.3109481930732727, "learning_rate": 7.91205043507556e-06, "loss": 0.1578, "step": 42923 }, { "epoch": 0.7655976884386259, "grad_norm": 0.3452800512313843, "learning_rate": 7.910914320895038e-06, "loss": 0.1382, "step": 42924 }, { "epoch": 0.7656155245603395, "grad_norm": 0.2029651254415512, "learning_rate": 7.909778272957712e-06, "loss": 0.1134, "step": 42925 }, { "epoch": 0.7656333606820533, "grad_norm": 0.2473873496055603, "learning_rate": 7.908642291267982e-06, "loss": 0.1144, "step": 42926 }, { "epoch": 0.765651196803767, "grad_norm": 0.24521149694919586, "learning_rate": 7.907506375830265e-06, "loss": 0.1166, "step": 42927 }, { "epoch": 0.7656690329254807, "grad_norm": 0.313917875289917, "learning_rate": 7.906370526648951e-06, "loss": 0.1574, "step": 42928 }, { "epoch": 0.7656868690471944, "grad_norm": 0.2503361403942108, "learning_rate": 7.905234743728455e-06, "loss": 0.1482, "step": 42929 }, { "epoch": 0.7657047051689081, "grad_norm": 0.2644880414009094, "learning_rate": 7.904099027073172e-06, "loss": 0.1387, "step": 42930 }, { "epoch": 0.7657225412906218, "grad_norm": 0.31529226899147034, "learning_rate": 7.9029633766875e-06, "loss": 0.1557, "step": 42931 }, { "epoch": 0.7657403774123355, "grad_norm": 0.5534952282905579, "learning_rate": 7.901827792575856e-06, "loss": 0.1785, "step": 42932 }, { "epoch": 0.7657582135340492, "grad_norm": 0.2698407769203186, "learning_rate": 7.90069227474263e-06, "loss": 0.0972, "step": 42933 }, { "epoch": 0.7657760496557628, "grad_norm": 0.33253052830696106, "learning_rate": 7.899556823192229e-06, "loss": 0.1594, "step": 42934 }, { "epoch": 0.7657938857774765, "grad_norm": 0.2952536940574646, "learning_rate": 7.898421437929052e-06, "loss": 0.1483, "step": 42935 }, { "epoch": 0.7658117218991902, "grad_norm": 0.2531408667564392, "learning_rate": 7.89728611895749e-06, "loss": 0.063, "step": 42936 }, { "epoch": 0.7658295580209039, "grad_norm": 0.2943894863128662, "learning_rate": 7.896150866281963e-06, "loss": 0.1316, "step": 42937 }, { "epoch": 0.7658473941426176, "grad_norm": 0.31141456961631775, "learning_rate": 7.895015679906864e-06, "loss": 0.1197, "step": 42938 }, { "epoch": 0.7658652302643313, "grad_norm": 0.2847091853618622, "learning_rate": 7.893880559836583e-06, "loss": 0.1223, "step": 42939 }, { "epoch": 0.765883066386045, "grad_norm": 0.26799747347831726, "learning_rate": 7.892745506075538e-06, "loss": 0.1525, "step": 42940 }, { "epoch": 0.7659009025077587, "grad_norm": 0.20217901468276978, "learning_rate": 7.891610518628112e-06, "loss": 0.1184, "step": 42941 }, { "epoch": 0.7659187386294725, "grad_norm": 0.27190306782722473, "learning_rate": 7.89047559749872e-06, "loss": 0.1105, "step": 42942 }, { "epoch": 0.7659365747511861, "grad_norm": 0.2974618375301361, "learning_rate": 7.889340742691753e-06, "loss": 0.1277, "step": 42943 }, { "epoch": 0.7659544108728998, "grad_norm": 0.30681005120277405, "learning_rate": 7.888205954211606e-06, "loss": 0.13, "step": 42944 }, { "epoch": 0.7659722469946135, "grad_norm": 0.31746363639831543, "learning_rate": 7.887071232062687e-06, "loss": 0.1161, "step": 42945 }, { "epoch": 0.7659900831163272, "grad_norm": 0.19313858449459076, "learning_rate": 7.885936576249394e-06, "loss": 0.0736, "step": 42946 }, { "epoch": 0.7660079192380409, "grad_norm": 0.31254518032073975, "learning_rate": 7.884801986776122e-06, "loss": 0.0964, "step": 42947 }, { "epoch": 0.7660257553597546, "grad_norm": 0.310528963804245, "learning_rate": 7.88366746364726e-06, "loss": 0.1586, "step": 42948 }, { "epoch": 0.7660435914814683, "grad_norm": 0.20265977084636688, "learning_rate": 7.882533006867221e-06, "loss": 0.1296, "step": 42949 }, { "epoch": 0.766061427603182, "grad_norm": 0.27496954798698425, "learning_rate": 7.881398616440398e-06, "loss": 0.1252, "step": 42950 }, { "epoch": 0.7660792637248957, "grad_norm": 0.25272825360298157, "learning_rate": 7.880264292371187e-06, "loss": 0.1651, "step": 42951 }, { "epoch": 0.7660970998466093, "grad_norm": 0.20538458228111267, "learning_rate": 7.87913003466398e-06, "loss": 0.1254, "step": 42952 }, { "epoch": 0.766114935968323, "grad_norm": 0.28113630414009094, "learning_rate": 7.877995843323178e-06, "loss": 0.1175, "step": 42953 }, { "epoch": 0.7661327720900367, "grad_norm": 0.187697172164917, "learning_rate": 7.876861718353178e-06, "loss": 0.1085, "step": 42954 }, { "epoch": 0.7661506082117504, "grad_norm": 0.277017205953598, "learning_rate": 7.875727659758374e-06, "loss": 0.1154, "step": 42955 }, { "epoch": 0.7661684443334641, "grad_norm": 0.2884194552898407, "learning_rate": 7.874593667543171e-06, "loss": 0.1038, "step": 42956 }, { "epoch": 0.7661862804551778, "grad_norm": 0.24556809663772583, "learning_rate": 7.873459741711947e-06, "loss": 0.143, "step": 42957 }, { "epoch": 0.7662041165768915, "grad_norm": 0.24674363434314728, "learning_rate": 7.872325882269119e-06, "loss": 0.1284, "step": 42958 }, { "epoch": 0.7662219526986053, "grad_norm": 0.2722882926464081, "learning_rate": 7.871192089219071e-06, "loss": 0.1035, "step": 42959 }, { "epoch": 0.766239788820319, "grad_norm": 0.30494844913482666, "learning_rate": 7.870058362566198e-06, "loss": 0.1019, "step": 42960 }, { "epoch": 0.7662576249420326, "grad_norm": 0.31936338543891907, "learning_rate": 7.868924702314889e-06, "loss": 0.1845, "step": 42961 }, { "epoch": 0.7662754610637463, "grad_norm": 0.29673469066619873, "learning_rate": 7.867791108469552e-06, "loss": 0.1377, "step": 42962 }, { "epoch": 0.76629329718546, "grad_norm": 0.36143171787261963, "learning_rate": 7.866657581034572e-06, "loss": 0.1136, "step": 42963 }, { "epoch": 0.7663111333071737, "grad_norm": 0.3071307837963104, "learning_rate": 7.865524120014347e-06, "loss": 0.1163, "step": 42964 }, { "epoch": 0.7663289694288874, "grad_norm": 0.26397082209587097, "learning_rate": 7.864390725413268e-06, "loss": 0.0563, "step": 42965 }, { "epoch": 0.7663468055506011, "grad_norm": 0.33258673548698425, "learning_rate": 7.863257397235722e-06, "loss": 0.1341, "step": 42966 }, { "epoch": 0.7663646416723148, "grad_norm": 0.24217641353607178, "learning_rate": 7.862124135486116e-06, "loss": 0.1018, "step": 42967 }, { "epoch": 0.7663824777940285, "grad_norm": 0.22889651358127594, "learning_rate": 7.860990940168827e-06, "loss": 0.108, "step": 42968 }, { "epoch": 0.7664003139157421, "grad_norm": 0.23804038763046265, "learning_rate": 7.859857811288265e-06, "loss": 0.0909, "step": 42969 }, { "epoch": 0.7664181500374558, "grad_norm": 0.2688756585121155, "learning_rate": 7.858724748848809e-06, "loss": 0.0831, "step": 42970 }, { "epoch": 0.7664359861591695, "grad_norm": 0.22026517987251282, "learning_rate": 7.857591752854862e-06, "loss": 0.1221, "step": 42971 }, { "epoch": 0.7664538222808832, "grad_norm": 0.20782138407230377, "learning_rate": 7.856458823310809e-06, "loss": 0.0754, "step": 42972 }, { "epoch": 0.7664716584025969, "grad_norm": 0.8296259045600891, "learning_rate": 7.855325960221044e-06, "loss": 0.1608, "step": 42973 }, { "epoch": 0.7664894945243106, "grad_norm": 0.24983401596546173, "learning_rate": 7.85419316358995e-06, "loss": 0.1135, "step": 42974 }, { "epoch": 0.7665073306460243, "grad_norm": 0.21038931608200073, "learning_rate": 7.853060433421933e-06, "loss": 0.0632, "step": 42975 }, { "epoch": 0.7665251667677381, "grad_norm": 0.2374330759048462, "learning_rate": 7.851927769721376e-06, "loss": 0.1256, "step": 42976 }, { "epoch": 0.7665430028894518, "grad_norm": 0.31622299551963806, "learning_rate": 7.85079517249267e-06, "loss": 0.0641, "step": 42977 }, { "epoch": 0.7665608390111655, "grad_norm": 0.3443428575992584, "learning_rate": 7.849662641740204e-06, "loss": 0.1255, "step": 42978 }, { "epoch": 0.7665786751328791, "grad_norm": 0.21523240208625793, "learning_rate": 7.848530177468361e-06, "loss": 0.1117, "step": 42979 }, { "epoch": 0.7665965112545928, "grad_norm": 0.3534828722476959, "learning_rate": 7.847397779681548e-06, "loss": 0.1646, "step": 42980 }, { "epoch": 0.7666143473763065, "grad_norm": 0.28434497117996216, "learning_rate": 7.846265448384147e-06, "loss": 0.1286, "step": 42981 }, { "epoch": 0.7666321834980202, "grad_norm": 0.24215789139270782, "learning_rate": 7.845133183580536e-06, "loss": 0.0744, "step": 42982 }, { "epoch": 0.7666500196197339, "grad_norm": 0.24433310329914093, "learning_rate": 7.844000985275112e-06, "loss": 0.1075, "step": 42983 }, { "epoch": 0.7666678557414476, "grad_norm": 0.24054983258247375, "learning_rate": 7.842868853472276e-06, "loss": 0.079, "step": 42984 }, { "epoch": 0.7666856918631613, "grad_norm": 0.23580560088157654, "learning_rate": 7.841736788176407e-06, "loss": 0.0532, "step": 42985 }, { "epoch": 0.766703527984875, "grad_norm": 0.2895699441432953, "learning_rate": 7.84060478939189e-06, "loss": 0.0886, "step": 42986 }, { "epoch": 0.7667213641065886, "grad_norm": 0.24624425172805786, "learning_rate": 7.839472857123109e-06, "loss": 0.1534, "step": 42987 }, { "epoch": 0.7667392002283023, "grad_norm": 0.2970411479473114, "learning_rate": 7.838340991374465e-06, "loss": 0.1066, "step": 42988 }, { "epoch": 0.766757036350016, "grad_norm": 0.2494984120130539, "learning_rate": 7.83720919215034e-06, "loss": 0.0989, "step": 42989 }, { "epoch": 0.7667748724717297, "grad_norm": 0.25810766220092773, "learning_rate": 7.836077459455121e-06, "loss": 0.1079, "step": 42990 }, { "epoch": 0.7667927085934434, "grad_norm": 0.23424342274665833, "learning_rate": 7.834945793293191e-06, "loss": 0.108, "step": 42991 }, { "epoch": 0.7668105447151571, "grad_norm": 0.38717761635780334, "learning_rate": 7.833814193668935e-06, "loss": 0.1331, "step": 42992 }, { "epoch": 0.7668283808368709, "grad_norm": 0.2999991178512573, "learning_rate": 7.832682660586751e-06, "loss": 0.0709, "step": 42993 }, { "epoch": 0.7668462169585846, "grad_norm": 0.25582024455070496, "learning_rate": 7.831551194051017e-06, "loss": 0.1361, "step": 42994 }, { "epoch": 0.7668640530802983, "grad_norm": 0.26886799931526184, "learning_rate": 7.830419794066116e-06, "loss": 0.0976, "step": 42995 }, { "epoch": 0.766881889202012, "grad_norm": 0.31064337491989136, "learning_rate": 7.829288460636441e-06, "loss": 0.0961, "step": 42996 }, { "epoch": 0.7668997253237256, "grad_norm": 0.3979969322681427, "learning_rate": 7.828157193766369e-06, "loss": 0.1025, "step": 42997 }, { "epoch": 0.7669175614454393, "grad_norm": 0.27119138836860657, "learning_rate": 7.827025993460298e-06, "loss": 0.093, "step": 42998 }, { "epoch": 0.766935397567153, "grad_norm": 0.2645191252231598, "learning_rate": 7.825894859722608e-06, "loss": 0.1231, "step": 42999 }, { "epoch": 0.7669532336888667, "grad_norm": 0.2291717380285263, "learning_rate": 7.82476379255767e-06, "loss": 0.0944, "step": 43000 }, { "epoch": 0.7669532336888667, "eval_loss": 0.11168795824050903, "eval_runtime": 107.0319, "eval_samples_per_second": 9.567, "eval_steps_per_second": 1.598, "step": 43000 }, { "epoch": 0.7669710698105804, "grad_norm": 0.17766976356506348, "learning_rate": 7.82363279196989e-06, "loss": 0.0738, "step": 43001 }, { "epoch": 0.7669889059322941, "grad_norm": 0.4053889214992523, "learning_rate": 7.822501857963643e-06, "loss": 0.1159, "step": 43002 }, { "epoch": 0.7670067420540078, "grad_norm": 0.21890953183174133, "learning_rate": 7.82137099054331e-06, "loss": 0.1149, "step": 43003 }, { "epoch": 0.7670245781757214, "grad_norm": 0.22025559842586517, "learning_rate": 7.820240189713274e-06, "loss": 0.1038, "step": 43004 }, { "epoch": 0.7670424142974351, "grad_norm": 0.2734902799129486, "learning_rate": 7.819109455477916e-06, "loss": 0.1022, "step": 43005 }, { "epoch": 0.7670602504191488, "grad_norm": 0.2672120928764343, "learning_rate": 7.817978787841631e-06, "loss": 0.1263, "step": 43006 }, { "epoch": 0.7670780865408625, "grad_norm": 0.20441804826259613, "learning_rate": 7.816848186808796e-06, "loss": 0.0593, "step": 43007 }, { "epoch": 0.7670959226625762, "grad_norm": 0.2629643380641937, "learning_rate": 7.815717652383789e-06, "loss": 0.1417, "step": 43008 }, { "epoch": 0.7671137587842899, "grad_norm": 0.2767849266529083, "learning_rate": 7.81458718457099e-06, "loss": 0.0879, "step": 43009 }, { "epoch": 0.7671315949060037, "grad_norm": 0.38085970282554626, "learning_rate": 7.813456783374793e-06, "loss": 0.177, "step": 43010 }, { "epoch": 0.7671494310277174, "grad_norm": 0.2635781466960907, "learning_rate": 7.812326448799568e-06, "loss": 0.1124, "step": 43011 }, { "epoch": 0.7671672671494311, "grad_norm": 0.2636691629886627, "learning_rate": 7.811196180849708e-06, "loss": 0.1527, "step": 43012 }, { "epoch": 0.7671851032711448, "grad_norm": 0.2609785497188568, "learning_rate": 7.810065979529579e-06, "loss": 0.1075, "step": 43013 }, { "epoch": 0.7672029393928584, "grad_norm": 0.260689914226532, "learning_rate": 7.808935844843584e-06, "loss": 0.1184, "step": 43014 }, { "epoch": 0.7672207755145721, "grad_norm": 0.34491807222366333, "learning_rate": 7.807805776796088e-06, "loss": 0.1012, "step": 43015 }, { "epoch": 0.7672386116362858, "grad_norm": 0.25932416319847107, "learning_rate": 7.806675775391476e-06, "loss": 0.093, "step": 43016 }, { "epoch": 0.7672564477579995, "grad_norm": 0.317667156457901, "learning_rate": 7.805545840634126e-06, "loss": 0.0992, "step": 43017 }, { "epoch": 0.7672742838797132, "grad_norm": 0.24547673761844635, "learning_rate": 7.804415972528412e-06, "loss": 0.0814, "step": 43018 }, { "epoch": 0.7672921200014269, "grad_norm": 0.21152032911777496, "learning_rate": 7.803286171078731e-06, "loss": 0.0708, "step": 43019 }, { "epoch": 0.7673099561231406, "grad_norm": 0.2532030940055847, "learning_rate": 7.802156436289448e-06, "loss": 0.149, "step": 43020 }, { "epoch": 0.7673277922448543, "grad_norm": 0.34872791171073914, "learning_rate": 7.801026768164949e-06, "loss": 0.1289, "step": 43021 }, { "epoch": 0.767345628366568, "grad_norm": 0.36275026202201843, "learning_rate": 7.799897166709602e-06, "loss": 0.1036, "step": 43022 }, { "epoch": 0.7673634644882816, "grad_norm": 0.27639642357826233, "learning_rate": 7.798767631927802e-06, "loss": 0.0999, "step": 43023 }, { "epoch": 0.7673813006099953, "grad_norm": 0.3113633692264557, "learning_rate": 7.797638163823914e-06, "loss": 0.1375, "step": 43024 }, { "epoch": 0.767399136731709, "grad_norm": 0.2779654264450073, "learning_rate": 7.796508762402327e-06, "loss": 0.116, "step": 43025 }, { "epoch": 0.7674169728534227, "grad_norm": 0.22081835567951202, "learning_rate": 7.79537942766741e-06, "loss": 0.1628, "step": 43026 }, { "epoch": 0.7674348089751365, "grad_norm": 0.2872173488140106, "learning_rate": 7.794250159623548e-06, "loss": 0.1211, "step": 43027 }, { "epoch": 0.7674526450968502, "grad_norm": 0.23371343314647675, "learning_rate": 7.793120958275119e-06, "loss": 0.1141, "step": 43028 }, { "epoch": 0.7674704812185639, "grad_norm": 0.8620897531509399, "learning_rate": 7.791991823626492e-06, "loss": 0.1722, "step": 43029 }, { "epoch": 0.7674883173402776, "grad_norm": 0.32448840141296387, "learning_rate": 7.790862755682051e-06, "loss": 0.1452, "step": 43030 }, { "epoch": 0.7675061534619912, "grad_norm": 0.25936704874038696, "learning_rate": 7.78973375444616e-06, "loss": 0.0922, "step": 43031 }, { "epoch": 0.7675239895837049, "grad_norm": 0.19083185493946075, "learning_rate": 7.788604819923215e-06, "loss": 0.1107, "step": 43032 }, { "epoch": 0.7675418257054186, "grad_norm": 0.5480894446372986, "learning_rate": 7.787475952117582e-06, "loss": 0.1661, "step": 43033 }, { "epoch": 0.7675596618271323, "grad_norm": 0.2621341943740845, "learning_rate": 7.786347151033637e-06, "loss": 0.1356, "step": 43034 }, { "epoch": 0.767577497948846, "grad_norm": 0.24636195600032806, "learning_rate": 7.785218416675746e-06, "loss": 0.0985, "step": 43035 }, { "epoch": 0.7675953340705597, "grad_norm": 0.27014729380607605, "learning_rate": 7.784089749048307e-06, "loss": 0.0849, "step": 43036 }, { "epoch": 0.7676131701922734, "grad_norm": 0.3166449964046478, "learning_rate": 7.78296114815568e-06, "loss": 0.1402, "step": 43037 }, { "epoch": 0.7676310063139871, "grad_norm": 0.1861816644668579, "learning_rate": 7.781832614002232e-06, "loss": 0.104, "step": 43038 }, { "epoch": 0.7676488424357008, "grad_norm": 0.2678993344306946, "learning_rate": 7.780704146592349e-06, "loss": 0.1356, "step": 43039 }, { "epoch": 0.7676666785574144, "grad_norm": 0.26752379536628723, "learning_rate": 7.779575745930413e-06, "loss": 0.1611, "step": 43040 }, { "epoch": 0.7676845146791281, "grad_norm": 0.2167665660381317, "learning_rate": 7.778447412020787e-06, "loss": 0.1232, "step": 43041 }, { "epoch": 0.7677023508008418, "grad_norm": 0.25560852885246277, "learning_rate": 7.777319144867848e-06, "loss": 0.0906, "step": 43042 }, { "epoch": 0.7677201869225556, "grad_norm": 0.2128244936466217, "learning_rate": 7.77619094447597e-06, "loss": 0.1116, "step": 43043 }, { "epoch": 0.7677380230442693, "grad_norm": 0.218234121799469, "learning_rate": 7.775062810849515e-06, "loss": 0.1165, "step": 43044 }, { "epoch": 0.767755859165983, "grad_norm": 0.24815750122070312, "learning_rate": 7.773934743992875e-06, "loss": 0.143, "step": 43045 }, { "epoch": 0.7677736952876967, "grad_norm": 0.29426339268684387, "learning_rate": 7.772806743910412e-06, "loss": 0.151, "step": 43046 }, { "epoch": 0.7677915314094104, "grad_norm": 0.24082951247692108, "learning_rate": 7.771678810606502e-06, "loss": 0.1131, "step": 43047 }, { "epoch": 0.767809367531124, "grad_norm": 0.25458139181137085, "learning_rate": 7.770550944085508e-06, "loss": 0.0835, "step": 43048 }, { "epoch": 0.7678272036528377, "grad_norm": 0.22703048586845398, "learning_rate": 7.769423144351814e-06, "loss": 0.0894, "step": 43049 }, { "epoch": 0.7678450397745514, "grad_norm": 0.45730623602867126, "learning_rate": 7.76829541140979e-06, "loss": 0.1164, "step": 43050 }, { "epoch": 0.7678628758962651, "grad_norm": 0.2879440486431122, "learning_rate": 7.767167745263796e-06, "loss": 0.1348, "step": 43051 }, { "epoch": 0.7678807120179788, "grad_norm": 0.2726638615131378, "learning_rate": 7.766040145918221e-06, "loss": 0.0969, "step": 43052 }, { "epoch": 0.7678985481396925, "grad_norm": 0.24980737268924713, "learning_rate": 7.764912613377418e-06, "loss": 0.14, "step": 43053 }, { "epoch": 0.7679163842614062, "grad_norm": 0.34120723605155945, "learning_rate": 7.763785147645772e-06, "loss": 0.1267, "step": 43054 }, { "epoch": 0.7679342203831199, "grad_norm": 0.31987863779067993, "learning_rate": 7.76265774872765e-06, "loss": 0.0967, "step": 43055 }, { "epoch": 0.7679520565048336, "grad_norm": 0.3227657973766327, "learning_rate": 7.761530416627421e-06, "loss": 0.112, "step": 43056 }, { "epoch": 0.7679698926265472, "grad_norm": 0.25025060772895813, "learning_rate": 7.760403151349446e-06, "loss": 0.1579, "step": 43057 }, { "epoch": 0.7679877287482609, "grad_norm": 0.3062143325805664, "learning_rate": 7.75927595289811e-06, "loss": 0.1051, "step": 43058 }, { "epoch": 0.7680055648699746, "grad_norm": 0.22879207134246826, "learning_rate": 7.758148821277775e-06, "loss": 0.1279, "step": 43059 }, { "epoch": 0.7680234009916884, "grad_norm": 0.28289875388145447, "learning_rate": 7.75702175649281e-06, "loss": 0.0872, "step": 43060 }, { "epoch": 0.7680412371134021, "grad_norm": 0.2567320466041565, "learning_rate": 7.755894758547578e-06, "loss": 0.0963, "step": 43061 }, { "epoch": 0.7680590732351158, "grad_norm": 0.3041916787624359, "learning_rate": 7.754767827446461e-06, "loss": 0.1513, "step": 43062 }, { "epoch": 0.7680769093568295, "grad_norm": 0.2254086136817932, "learning_rate": 7.753640963193817e-06, "loss": 0.112, "step": 43063 }, { "epoch": 0.7680947454785432, "grad_norm": 0.31808820366859436, "learning_rate": 7.752514165794022e-06, "loss": 0.1766, "step": 43064 }, { "epoch": 0.7681125816002569, "grad_norm": 0.26063990592956543, "learning_rate": 7.751387435251428e-06, "loss": 0.1178, "step": 43065 }, { "epoch": 0.7681304177219705, "grad_norm": 0.32291775941848755, "learning_rate": 7.750260771570416e-06, "loss": 0.208, "step": 43066 }, { "epoch": 0.7681482538436842, "grad_norm": 0.1942048966884613, "learning_rate": 7.749134174755357e-06, "loss": 0.0798, "step": 43067 }, { "epoch": 0.7681660899653979, "grad_norm": 0.31568217277526855, "learning_rate": 7.748007644810614e-06, "loss": 0.1406, "step": 43068 }, { "epoch": 0.7681839260871116, "grad_norm": 0.229177787899971, "learning_rate": 7.746881181740551e-06, "loss": 0.089, "step": 43069 }, { "epoch": 0.7682017622088253, "grad_norm": 0.34718164801597595, "learning_rate": 7.745754785549528e-06, "loss": 0.0819, "step": 43070 }, { "epoch": 0.768219598330539, "grad_norm": 0.24727711081504822, "learning_rate": 7.744628456241929e-06, "loss": 0.106, "step": 43071 }, { "epoch": 0.7682374344522527, "grad_norm": 0.24125796556472778, "learning_rate": 7.743502193822106e-06, "loss": 0.1592, "step": 43072 }, { "epoch": 0.7682552705739664, "grad_norm": 0.2829814851284027, "learning_rate": 7.742375998294431e-06, "loss": 0.1101, "step": 43073 }, { "epoch": 0.76827310669568, "grad_norm": 0.278268963098526, "learning_rate": 7.741249869663259e-06, "loss": 0.083, "step": 43074 }, { "epoch": 0.7682909428173937, "grad_norm": 0.1862305998802185, "learning_rate": 7.74012380793297e-06, "loss": 0.0663, "step": 43075 }, { "epoch": 0.7683087789391074, "grad_norm": 0.2176610827445984, "learning_rate": 7.738997813107923e-06, "loss": 0.0921, "step": 43076 }, { "epoch": 0.7683266150608212, "grad_norm": 0.23432780802249908, "learning_rate": 7.737871885192484e-06, "loss": 0.1287, "step": 43077 }, { "epoch": 0.7683444511825349, "grad_norm": 0.2174353152513504, "learning_rate": 7.736746024191008e-06, "loss": 0.0748, "step": 43078 }, { "epoch": 0.7683622873042486, "grad_norm": 0.3002229928970337, "learning_rate": 7.735620230107873e-06, "loss": 0.125, "step": 43079 }, { "epoch": 0.7683801234259623, "grad_norm": 0.2447798103094101, "learning_rate": 7.73449450294743e-06, "loss": 0.0967, "step": 43080 }, { "epoch": 0.768397959547676, "grad_norm": 0.21270261704921722, "learning_rate": 7.733368842714055e-06, "loss": 0.1388, "step": 43081 }, { "epoch": 0.7684157956693897, "grad_norm": 0.27022525668144226, "learning_rate": 7.73224324941211e-06, "loss": 0.1109, "step": 43082 }, { "epoch": 0.7684336317911034, "grad_norm": 0.20504756271839142, "learning_rate": 7.731117723045944e-06, "loss": 0.0809, "step": 43083 }, { "epoch": 0.768451467912817, "grad_norm": 0.2651057243347168, "learning_rate": 7.72999226361994e-06, "loss": 0.1094, "step": 43084 }, { "epoch": 0.7684693040345307, "grad_norm": 0.18808798491954803, "learning_rate": 7.728866871138448e-06, "loss": 0.084, "step": 43085 }, { "epoch": 0.7684871401562444, "grad_norm": 0.21226200461387634, "learning_rate": 7.727741545605835e-06, "loss": 0.1093, "step": 43086 }, { "epoch": 0.7685049762779581, "grad_norm": 0.27938905358314514, "learning_rate": 7.726616287026454e-06, "loss": 0.1039, "step": 43087 }, { "epoch": 0.7685228123996718, "grad_norm": 0.2202521711587906, "learning_rate": 7.72549109540468e-06, "loss": 0.0688, "step": 43088 }, { "epoch": 0.7685406485213855, "grad_norm": 0.27696171402931213, "learning_rate": 7.72436597074487e-06, "loss": 0.1348, "step": 43089 }, { "epoch": 0.7685584846430992, "grad_norm": 0.282012939453125, "learning_rate": 7.723240913051385e-06, "loss": 0.1008, "step": 43090 }, { "epoch": 0.7685763207648129, "grad_norm": 0.31433621048927307, "learning_rate": 7.722115922328577e-06, "loss": 0.1134, "step": 43091 }, { "epoch": 0.7685941568865265, "grad_norm": 0.352639764547348, "learning_rate": 7.720990998580823e-06, "loss": 0.1545, "step": 43092 }, { "epoch": 0.7686119930082402, "grad_norm": 0.29540979862213135, "learning_rate": 7.719866141812468e-06, "loss": 0.1308, "step": 43093 }, { "epoch": 0.768629829129954, "grad_norm": 0.40335536003112793, "learning_rate": 7.718741352027889e-06, "loss": 0.1397, "step": 43094 }, { "epoch": 0.7686476652516677, "grad_norm": 0.26056864857673645, "learning_rate": 7.717616629231436e-06, "loss": 0.1102, "step": 43095 }, { "epoch": 0.7686655013733814, "grad_norm": 0.25503790378570557, "learning_rate": 7.716491973427465e-06, "loss": 0.1223, "step": 43096 }, { "epoch": 0.7686833374950951, "grad_norm": 0.2516988515853882, "learning_rate": 7.715367384620345e-06, "loss": 0.1234, "step": 43097 }, { "epoch": 0.7687011736168088, "grad_norm": 0.17808571457862854, "learning_rate": 7.714242862814433e-06, "loss": 0.0727, "step": 43098 }, { "epoch": 0.7687190097385225, "grad_norm": 0.2271338403224945, "learning_rate": 7.713118408014087e-06, "loss": 0.0984, "step": 43099 }, { "epoch": 0.7687368458602362, "grad_norm": 0.20613867044448853, "learning_rate": 7.711994020223654e-06, "loss": 0.1261, "step": 43100 }, { "epoch": 0.7687546819819499, "grad_norm": 0.24867239594459534, "learning_rate": 7.710869699447512e-06, "loss": 0.1343, "step": 43101 }, { "epoch": 0.7687725181036635, "grad_norm": 0.29739993810653687, "learning_rate": 7.709745445690012e-06, "loss": 0.1776, "step": 43102 }, { "epoch": 0.7687903542253772, "grad_norm": 0.35179978609085083, "learning_rate": 7.708621258955509e-06, "loss": 0.1211, "step": 43103 }, { "epoch": 0.7688081903470909, "grad_norm": 0.25421440601348877, "learning_rate": 7.707497139248355e-06, "loss": 0.0683, "step": 43104 }, { "epoch": 0.7688260264688046, "grad_norm": 0.37196245789527893, "learning_rate": 7.70637308657292e-06, "loss": 0.1131, "step": 43105 }, { "epoch": 0.7688438625905183, "grad_norm": 0.2655698359012604, "learning_rate": 7.70524910093356e-06, "loss": 0.1016, "step": 43106 }, { "epoch": 0.768861698712232, "grad_norm": 0.3636311888694763, "learning_rate": 7.704125182334618e-06, "loss": 0.1231, "step": 43107 }, { "epoch": 0.7688795348339457, "grad_norm": 0.2517312467098236, "learning_rate": 7.703001330780469e-06, "loss": 0.1123, "step": 43108 }, { "epoch": 0.7688973709556594, "grad_norm": 0.297485888004303, "learning_rate": 7.701877546275451e-06, "loss": 0.0843, "step": 43109 }, { "epoch": 0.768915207077373, "grad_norm": 0.25403669476509094, "learning_rate": 7.700753828823942e-06, "loss": 0.162, "step": 43110 }, { "epoch": 0.7689330431990868, "grad_norm": 0.274909645318985, "learning_rate": 7.699630178430284e-06, "loss": 0.1111, "step": 43111 }, { "epoch": 0.7689508793208005, "grad_norm": 0.24657250940799713, "learning_rate": 7.698506595098834e-06, "loss": 0.0863, "step": 43112 }, { "epoch": 0.7689687154425142, "grad_norm": 0.3488124907016754, "learning_rate": 7.697383078833941e-06, "loss": 0.1929, "step": 43113 }, { "epoch": 0.7689865515642279, "grad_norm": 0.33446502685546875, "learning_rate": 7.696259629639977e-06, "loss": 0.1368, "step": 43114 }, { "epoch": 0.7690043876859416, "grad_norm": 0.21536527574062347, "learning_rate": 7.695136247521285e-06, "loss": 0.0859, "step": 43115 }, { "epoch": 0.7690222238076553, "grad_norm": 0.3449539542198181, "learning_rate": 7.694012932482223e-06, "loss": 0.114, "step": 43116 }, { "epoch": 0.769040059929369, "grad_norm": 0.30397507548332214, "learning_rate": 7.692889684527136e-06, "loss": 0.1737, "step": 43117 }, { "epoch": 0.7690578960510827, "grad_norm": 0.30209821462631226, "learning_rate": 7.691766503660393e-06, "loss": 0.1453, "step": 43118 }, { "epoch": 0.7690757321727963, "grad_norm": 0.28950777649879456, "learning_rate": 7.690643389886343e-06, "loss": 0.0836, "step": 43119 }, { "epoch": 0.76909356829451, "grad_norm": 0.3058173954486847, "learning_rate": 7.689520343209327e-06, "loss": 0.1149, "step": 43120 }, { "epoch": 0.7691114044162237, "grad_norm": 0.31864744424819946, "learning_rate": 7.68839736363372e-06, "loss": 0.1192, "step": 43121 }, { "epoch": 0.7691292405379374, "grad_norm": 0.4027446210384369, "learning_rate": 7.687274451163853e-06, "loss": 0.1276, "step": 43122 }, { "epoch": 0.7691470766596511, "grad_norm": 0.26255398988723755, "learning_rate": 7.686151605804102e-06, "loss": 0.1718, "step": 43123 }, { "epoch": 0.7691649127813648, "grad_norm": 0.2799380123615265, "learning_rate": 7.685028827558804e-06, "loss": 0.0861, "step": 43124 }, { "epoch": 0.7691827489030785, "grad_norm": 0.2313477098941803, "learning_rate": 7.683906116432316e-06, "loss": 0.1247, "step": 43125 }, { "epoch": 0.7692005850247922, "grad_norm": 0.2404094636440277, "learning_rate": 7.682783472428981e-06, "loss": 0.1359, "step": 43126 }, { "epoch": 0.7692184211465058, "grad_norm": 0.18121837079524994, "learning_rate": 7.681660895553163e-06, "loss": 0.0565, "step": 43127 }, { "epoch": 0.7692362572682196, "grad_norm": 0.283416748046875, "learning_rate": 7.680538385809214e-06, "loss": 0.114, "step": 43128 }, { "epoch": 0.7692540933899333, "grad_norm": 0.2934994399547577, "learning_rate": 7.679415943201476e-06, "loss": 0.1048, "step": 43129 }, { "epoch": 0.769271929511647, "grad_norm": 0.2772046625614166, "learning_rate": 7.678293567734299e-06, "loss": 0.0759, "step": 43130 }, { "epoch": 0.7692897656333607, "grad_norm": 0.2413949966430664, "learning_rate": 7.677171259412047e-06, "loss": 0.0857, "step": 43131 }, { "epoch": 0.7693076017550744, "grad_norm": 0.3355247676372528, "learning_rate": 7.676049018239059e-06, "loss": 0.1251, "step": 43132 }, { "epoch": 0.7693254378767881, "grad_norm": 0.2650832235813141, "learning_rate": 7.674926844219693e-06, "loss": 0.1391, "step": 43133 }, { "epoch": 0.7693432739985018, "grad_norm": 0.25431960821151733, "learning_rate": 7.673804737358285e-06, "loss": 0.0952, "step": 43134 }, { "epoch": 0.7693611101202155, "grad_norm": 0.26480409502983093, "learning_rate": 7.6726826976592e-06, "loss": 0.1505, "step": 43135 }, { "epoch": 0.7693789462419292, "grad_norm": 0.36791080236434937, "learning_rate": 7.671560725126775e-06, "loss": 0.1466, "step": 43136 }, { "epoch": 0.7693967823636428, "grad_norm": 0.2084689885377884, "learning_rate": 7.670438819765372e-06, "loss": 0.0921, "step": 43137 }, { "epoch": 0.7694146184853565, "grad_norm": 0.20457488298416138, "learning_rate": 7.669316981579335e-06, "loss": 0.0914, "step": 43138 }, { "epoch": 0.7694324546070702, "grad_norm": 0.29849106073379517, "learning_rate": 7.668195210573004e-06, "loss": 0.1015, "step": 43139 }, { "epoch": 0.7694502907287839, "grad_norm": 0.2425861954689026, "learning_rate": 7.667073506750741e-06, "loss": 0.1028, "step": 43140 }, { "epoch": 0.7694681268504976, "grad_norm": 0.24990884959697723, "learning_rate": 7.665951870116889e-06, "loss": 0.1002, "step": 43141 }, { "epoch": 0.7694859629722113, "grad_norm": 0.2745872437953949, "learning_rate": 7.664830300675793e-06, "loss": 0.1426, "step": 43142 }, { "epoch": 0.769503799093925, "grad_norm": 0.23437552154064178, "learning_rate": 7.663708798431795e-06, "loss": 0.0955, "step": 43143 }, { "epoch": 0.7695216352156388, "grad_norm": 0.2776063084602356, "learning_rate": 7.662587363389258e-06, "loss": 0.0853, "step": 43144 }, { "epoch": 0.7695394713373525, "grad_norm": 0.19547830522060394, "learning_rate": 7.661465995552523e-06, "loss": 0.0723, "step": 43145 }, { "epoch": 0.7695573074590661, "grad_norm": 0.39287278056144714, "learning_rate": 7.66034469492593e-06, "loss": 0.1068, "step": 43146 }, { "epoch": 0.7695751435807798, "grad_norm": 0.2864287197589874, "learning_rate": 7.659223461513823e-06, "loss": 0.1222, "step": 43147 }, { "epoch": 0.7695929797024935, "grad_norm": 0.34062066674232483, "learning_rate": 7.658102295320562e-06, "loss": 0.1781, "step": 43148 }, { "epoch": 0.7696108158242072, "grad_norm": 0.2591913342475891, "learning_rate": 7.656981196350482e-06, "loss": 0.1317, "step": 43149 }, { "epoch": 0.7696286519459209, "grad_norm": 0.2820344567298889, "learning_rate": 7.655860164607936e-06, "loss": 0.1086, "step": 43150 }, { "epoch": 0.7696464880676346, "grad_norm": 0.2313556671142578, "learning_rate": 7.654739200097271e-06, "loss": 0.1358, "step": 43151 }, { "epoch": 0.7696643241893483, "grad_norm": 0.21038925647735596, "learning_rate": 7.653618302822818e-06, "loss": 0.0951, "step": 43152 }, { "epoch": 0.769682160311062, "grad_norm": 0.26829609274864197, "learning_rate": 7.65249747278894e-06, "loss": 0.0967, "step": 43153 }, { "epoch": 0.7696999964327756, "grad_norm": 0.2607644498348236, "learning_rate": 7.651376709999971e-06, "loss": 0.1132, "step": 43154 }, { "epoch": 0.7697178325544893, "grad_norm": 0.24704980850219727, "learning_rate": 7.65025601446026e-06, "loss": 0.1189, "step": 43155 }, { "epoch": 0.769735668676203, "grad_norm": 0.25660374760627747, "learning_rate": 7.649135386174142e-06, "loss": 0.1115, "step": 43156 }, { "epoch": 0.7697535047979167, "grad_norm": 0.23838426172733307, "learning_rate": 7.648014825145974e-06, "loss": 0.1433, "step": 43157 }, { "epoch": 0.7697713409196304, "grad_norm": 0.3446713387966156, "learning_rate": 7.646894331380097e-06, "loss": 0.1324, "step": 43158 }, { "epoch": 0.7697891770413441, "grad_norm": 0.3234025835990906, "learning_rate": 7.645773904880849e-06, "loss": 0.1433, "step": 43159 }, { "epoch": 0.7698070131630578, "grad_norm": 0.21624432504177094, "learning_rate": 7.644653545652575e-06, "loss": 0.0794, "step": 43160 }, { "epoch": 0.7698248492847716, "grad_norm": 0.25194352865219116, "learning_rate": 7.643533253699611e-06, "loss": 0.1056, "step": 43161 }, { "epoch": 0.7698426854064853, "grad_norm": 0.3037458658218384, "learning_rate": 7.642413029026314e-06, "loss": 0.1188, "step": 43162 }, { "epoch": 0.769860521528199, "grad_norm": 0.30823564529418945, "learning_rate": 7.64129287163701e-06, "loss": 0.1296, "step": 43163 }, { "epoch": 0.7698783576499126, "grad_norm": 0.2188183218240738, "learning_rate": 7.640172781536062e-06, "loss": 0.1008, "step": 43164 }, { "epoch": 0.7698961937716263, "grad_norm": 0.3287068009376526, "learning_rate": 7.639052758727789e-06, "loss": 0.1724, "step": 43165 }, { "epoch": 0.76991402989334, "grad_norm": 0.24873261153697968, "learning_rate": 7.637932803216552e-06, "loss": 0.1291, "step": 43166 }, { "epoch": 0.7699318660150537, "grad_norm": 0.3201192319393158, "learning_rate": 7.636812915006686e-06, "loss": 0.1184, "step": 43167 }, { "epoch": 0.7699497021367674, "grad_norm": 0.25319814682006836, "learning_rate": 7.635693094102528e-06, "loss": 0.1361, "step": 43168 }, { "epoch": 0.7699675382584811, "grad_norm": 0.28412410616874695, "learning_rate": 7.634573340508413e-06, "loss": 0.109, "step": 43169 }, { "epoch": 0.7699853743801948, "grad_norm": 0.23510943353176117, "learning_rate": 7.633453654228701e-06, "loss": 0.1, "step": 43170 }, { "epoch": 0.7700032105019085, "grad_norm": 0.3590015769004822, "learning_rate": 7.632334035267719e-06, "loss": 0.1351, "step": 43171 }, { "epoch": 0.7700210466236221, "grad_norm": 0.2735082805156708, "learning_rate": 7.631214483629806e-06, "loss": 0.1187, "step": 43172 }, { "epoch": 0.7700388827453358, "grad_norm": 0.36123690009117126, "learning_rate": 7.630094999319309e-06, "loss": 0.0946, "step": 43173 }, { "epoch": 0.7700567188670495, "grad_norm": 0.3469754755496979, "learning_rate": 7.628975582340553e-06, "loss": 0.1557, "step": 43174 }, { "epoch": 0.7700745549887632, "grad_norm": 0.2728753387928009, "learning_rate": 7.627856232697894e-06, "loss": 0.1065, "step": 43175 }, { "epoch": 0.7700923911104769, "grad_norm": 0.4476653039455414, "learning_rate": 7.626736950395661e-06, "loss": 0.121, "step": 43176 }, { "epoch": 0.7701102272321906, "grad_norm": 0.19168226420879364, "learning_rate": 7.625617735438198e-06, "loss": 0.1099, "step": 43177 }, { "epoch": 0.7701280633539044, "grad_norm": 0.2618299424648285, "learning_rate": 7.624498587829837e-06, "loss": 0.1458, "step": 43178 }, { "epoch": 0.7701458994756181, "grad_norm": 0.3376937210559845, "learning_rate": 7.6233795075749296e-06, "loss": 0.1014, "step": 43179 }, { "epoch": 0.7701637355973318, "grad_norm": 0.3775985538959503, "learning_rate": 7.622260494677805e-06, "loss": 0.1242, "step": 43180 }, { "epoch": 0.7701815717190454, "grad_norm": 0.22356335818767548, "learning_rate": 7.621141549142799e-06, "loss": 0.1148, "step": 43181 }, { "epoch": 0.7701994078407591, "grad_norm": 0.24491247534751892, "learning_rate": 7.620022670974241e-06, "loss": 0.0939, "step": 43182 }, { "epoch": 0.7702172439624728, "grad_norm": 0.2010246217250824, "learning_rate": 7.61890386017649e-06, "loss": 0.0719, "step": 43183 }, { "epoch": 0.7702350800841865, "grad_norm": 0.30959445238113403, "learning_rate": 7.61778511675387e-06, "loss": 0.1612, "step": 43184 }, { "epoch": 0.7702529162059002, "grad_norm": 0.2769760191440582, "learning_rate": 7.616666440710718e-06, "loss": 0.1484, "step": 43185 }, { "epoch": 0.7702707523276139, "grad_norm": 0.25192350149154663, "learning_rate": 7.61554783205137e-06, "loss": 0.1144, "step": 43186 }, { "epoch": 0.7702885884493276, "grad_norm": 0.22268660366535187, "learning_rate": 7.614429290780156e-06, "loss": 0.1207, "step": 43187 }, { "epoch": 0.7703064245710413, "grad_norm": 0.24352645874023438, "learning_rate": 7.613310816901426e-06, "loss": 0.0927, "step": 43188 }, { "epoch": 0.770324260692755, "grad_norm": 0.2507781684398651, "learning_rate": 7.612192410419508e-06, "loss": 0.117, "step": 43189 }, { "epoch": 0.7703420968144686, "grad_norm": 0.21125119924545288, "learning_rate": 7.6110740713387305e-06, "loss": 0.1131, "step": 43190 }, { "epoch": 0.7703599329361823, "grad_norm": 0.24588696658611298, "learning_rate": 7.609955799663446e-06, "loss": 0.1305, "step": 43191 }, { "epoch": 0.770377769057896, "grad_norm": 0.14450868964195251, "learning_rate": 7.608837595397969e-06, "loss": 0.0344, "step": 43192 }, { "epoch": 0.7703956051796097, "grad_norm": 0.3037028908729553, "learning_rate": 7.607719458546652e-06, "loss": 0.1346, "step": 43193 }, { "epoch": 0.7704134413013234, "grad_norm": 0.2455395758152008, "learning_rate": 7.606601389113821e-06, "loss": 0.1241, "step": 43194 }, { "epoch": 0.7704312774230372, "grad_norm": 0.30561378598213196, "learning_rate": 7.605483387103804e-06, "loss": 0.066, "step": 43195 }, { "epoch": 0.7704491135447509, "grad_norm": 0.27732619643211365, "learning_rate": 7.604365452520948e-06, "loss": 0.1044, "step": 43196 }, { "epoch": 0.7704669496664646, "grad_norm": 0.35555416345596313, "learning_rate": 7.60324758536958e-06, "loss": 0.126, "step": 43197 }, { "epoch": 0.7704847857881783, "grad_norm": 0.30437901616096497, "learning_rate": 7.602129785654036e-06, "loss": 0.1546, "step": 43198 }, { "epoch": 0.7705026219098919, "grad_norm": 0.27484020590782166, "learning_rate": 7.6010120533786425e-06, "loss": 0.1836, "step": 43199 }, { "epoch": 0.7705204580316056, "grad_norm": 0.21924909949302673, "learning_rate": 7.599894388547729e-06, "loss": 0.1163, "step": 43200 }, { "epoch": 0.7705382941533193, "grad_norm": 0.25036191940307617, "learning_rate": 7.598776791165641e-06, "loss": 0.0674, "step": 43201 }, { "epoch": 0.770556130275033, "grad_norm": 0.2570417523384094, "learning_rate": 7.5976592612367055e-06, "loss": 0.1593, "step": 43202 }, { "epoch": 0.7705739663967467, "grad_norm": 0.2015930712223053, "learning_rate": 7.596541798765247e-06, "loss": 0.122, "step": 43203 }, { "epoch": 0.7705918025184604, "grad_norm": 0.3157704770565033, "learning_rate": 7.5954244037556074e-06, "loss": 0.1177, "step": 43204 }, { "epoch": 0.7706096386401741, "grad_norm": 0.28714025020599365, "learning_rate": 7.594307076212109e-06, "loss": 0.1428, "step": 43205 }, { "epoch": 0.7706274747618878, "grad_norm": 0.2482876032590866, "learning_rate": 7.593189816139095e-06, "loss": 0.0852, "step": 43206 }, { "epoch": 0.7706453108836014, "grad_norm": 0.23907610774040222, "learning_rate": 7.5920726235408915e-06, "loss": 0.1202, "step": 43207 }, { "epoch": 0.7706631470053151, "grad_norm": 0.24861156940460205, "learning_rate": 7.590955498421817e-06, "loss": 0.1075, "step": 43208 }, { "epoch": 0.7706809831270288, "grad_norm": 0.3839176595211029, "learning_rate": 7.5898384407862205e-06, "loss": 0.1088, "step": 43209 }, { "epoch": 0.7706988192487425, "grad_norm": 0.5693812966346741, "learning_rate": 7.588721450638425e-06, "loss": 0.1514, "step": 43210 }, { "epoch": 0.7707166553704562, "grad_norm": 0.2105524241924286, "learning_rate": 7.587604527982756e-06, "loss": 0.1074, "step": 43211 }, { "epoch": 0.77073449149217, "grad_norm": 0.24127443134784698, "learning_rate": 7.5864876728235476e-06, "loss": 0.141, "step": 43212 }, { "epoch": 0.7707523276138837, "grad_norm": 0.4091591238975525, "learning_rate": 7.585370885165119e-06, "loss": 0.1191, "step": 43213 }, { "epoch": 0.7707701637355974, "grad_norm": 0.27039700746536255, "learning_rate": 7.584254165011817e-06, "loss": 0.1024, "step": 43214 }, { "epoch": 0.7707879998573111, "grad_norm": 0.2048932909965515, "learning_rate": 7.583137512367958e-06, "loss": 0.1047, "step": 43215 }, { "epoch": 0.7708058359790247, "grad_norm": 0.2856503129005432, "learning_rate": 7.582020927237876e-06, "loss": 0.1115, "step": 43216 }, { "epoch": 0.7708236721007384, "grad_norm": 0.20236904919147491, "learning_rate": 7.580904409625889e-06, "loss": 0.1033, "step": 43217 }, { "epoch": 0.7708415082224521, "grad_norm": 0.2524934411048889, "learning_rate": 7.579787959536339e-06, "loss": 0.1336, "step": 43218 }, { "epoch": 0.7708593443441658, "grad_norm": 0.22171279788017273, "learning_rate": 7.578671576973539e-06, "loss": 0.0897, "step": 43219 }, { "epoch": 0.7708771804658795, "grad_norm": 0.35861000418663025, "learning_rate": 7.5775552619418374e-06, "loss": 0.147, "step": 43220 }, { "epoch": 0.7708950165875932, "grad_norm": 0.2904147207736969, "learning_rate": 7.576439014445538e-06, "loss": 0.0885, "step": 43221 }, { "epoch": 0.7709128527093069, "grad_norm": 0.2880682647228241, "learning_rate": 7.575322834488988e-06, "loss": 0.1041, "step": 43222 }, { "epoch": 0.7709306888310206, "grad_norm": 0.24455687403678894, "learning_rate": 7.574206722076505e-06, "loss": 0.0799, "step": 43223 }, { "epoch": 0.7709485249527342, "grad_norm": 0.24320228397846222, "learning_rate": 7.573090677212413e-06, "loss": 0.1201, "step": 43224 }, { "epoch": 0.7709663610744479, "grad_norm": 0.23077481985092163, "learning_rate": 7.5719746999010435e-06, "loss": 0.0725, "step": 43225 }, { "epoch": 0.7709841971961616, "grad_norm": 0.24306534230709076, "learning_rate": 7.570858790146709e-06, "loss": 0.1028, "step": 43226 }, { "epoch": 0.7710020333178753, "grad_norm": 0.24609504640102386, "learning_rate": 7.569742947953756e-06, "loss": 0.1243, "step": 43227 }, { "epoch": 0.771019869439589, "grad_norm": 0.3128361701965332, "learning_rate": 7.568627173326498e-06, "loss": 0.1152, "step": 43228 }, { "epoch": 0.7710377055613028, "grad_norm": 0.27013352513313293, "learning_rate": 7.5675114662692615e-06, "loss": 0.1194, "step": 43229 }, { "epoch": 0.7710555416830165, "grad_norm": 0.2624155879020691, "learning_rate": 7.566395826786366e-06, "loss": 0.14, "step": 43230 }, { "epoch": 0.7710733778047302, "grad_norm": 0.2880672216415405, "learning_rate": 7.565280254882148e-06, "loss": 0.0942, "step": 43231 }, { "epoch": 0.7710912139264439, "grad_norm": 0.23503975570201874, "learning_rate": 7.564164750560918e-06, "loss": 0.1155, "step": 43232 }, { "epoch": 0.7711090500481576, "grad_norm": 0.22663715481758118, "learning_rate": 7.563049313827014e-06, "loss": 0.1041, "step": 43233 }, { "epoch": 0.7711268861698712, "grad_norm": 0.27420687675476074, "learning_rate": 7.5619339446847455e-06, "loss": 0.0901, "step": 43234 }, { "epoch": 0.7711447222915849, "grad_norm": 0.26121965050697327, "learning_rate": 7.560818643138454e-06, "loss": 0.0618, "step": 43235 }, { "epoch": 0.7711625584132986, "grad_norm": 0.20403441786766052, "learning_rate": 7.559703409192451e-06, "loss": 0.0931, "step": 43236 }, { "epoch": 0.7711803945350123, "grad_norm": 0.3251589238643646, "learning_rate": 7.558588242851061e-06, "loss": 0.1346, "step": 43237 }, { "epoch": 0.771198230656726, "grad_norm": 0.2036709189414978, "learning_rate": 7.55747314411861e-06, "loss": 0.1236, "step": 43238 }, { "epoch": 0.7712160667784397, "grad_norm": 0.32432326674461365, "learning_rate": 7.556358112999406e-06, "loss": 0.1552, "step": 43239 }, { "epoch": 0.7712339029001534, "grad_norm": 0.21016792953014374, "learning_rate": 7.555243149497793e-06, "loss": 0.0719, "step": 43240 }, { "epoch": 0.771251739021867, "grad_norm": 0.22607794404029846, "learning_rate": 7.554128253618081e-06, "loss": 0.1221, "step": 43241 }, { "epoch": 0.7712695751435807, "grad_norm": 0.3194332718849182, "learning_rate": 7.553013425364594e-06, "loss": 0.1217, "step": 43242 }, { "epoch": 0.7712874112652944, "grad_norm": 0.2569514811038971, "learning_rate": 7.551898664741647e-06, "loss": 0.1416, "step": 43243 }, { "epoch": 0.7713052473870081, "grad_norm": 0.20603618025779724, "learning_rate": 7.550783971753572e-06, "loss": 0.1075, "step": 43244 }, { "epoch": 0.7713230835087219, "grad_norm": 0.2324785590171814, "learning_rate": 7.549669346404689e-06, "loss": 0.1219, "step": 43245 }, { "epoch": 0.7713409196304356, "grad_norm": 0.3243198096752167, "learning_rate": 7.548554788699303e-06, "loss": 0.1504, "step": 43246 }, { "epoch": 0.7713587557521493, "grad_norm": 0.2696075141429901, "learning_rate": 7.547440298641747e-06, "loss": 0.1098, "step": 43247 }, { "epoch": 0.771376591873863, "grad_norm": 0.28341400623321533, "learning_rate": 7.546325876236351e-06, "loss": 0.1112, "step": 43248 }, { "epoch": 0.7713944279955767, "grad_norm": 0.30703896284103394, "learning_rate": 7.545211521487422e-06, "loss": 0.1266, "step": 43249 }, { "epoch": 0.7714122641172904, "grad_norm": 0.2016613781452179, "learning_rate": 7.5440972343992806e-06, "loss": 0.08, "step": 43250 }, { "epoch": 0.771430100239004, "grad_norm": 0.22971948981285095, "learning_rate": 7.542983014976249e-06, "loss": 0.1181, "step": 43251 }, { "epoch": 0.7714479363607177, "grad_norm": 0.2425071746110916, "learning_rate": 7.541868863222637e-06, "loss": 0.1297, "step": 43252 }, { "epoch": 0.7714657724824314, "grad_norm": 0.29377493262290955, "learning_rate": 7.540754779142778e-06, "loss": 0.1328, "step": 43253 }, { "epoch": 0.7714836086041451, "grad_norm": 0.3360605537891388, "learning_rate": 7.5396407627409845e-06, "loss": 0.1367, "step": 43254 }, { "epoch": 0.7715014447258588, "grad_norm": 0.25270095467567444, "learning_rate": 7.538526814021574e-06, "loss": 0.1148, "step": 43255 }, { "epoch": 0.7715192808475725, "grad_norm": 0.26664939522743225, "learning_rate": 7.5374129329888575e-06, "loss": 0.1144, "step": 43256 }, { "epoch": 0.7715371169692862, "grad_norm": 0.27126747369766235, "learning_rate": 7.536299119647166e-06, "loss": 0.107, "step": 43257 }, { "epoch": 0.7715549530909999, "grad_norm": 0.2711648643016815, "learning_rate": 7.535185374000811e-06, "loss": 0.1216, "step": 43258 }, { "epoch": 0.7715727892127136, "grad_norm": 0.2582990527153015, "learning_rate": 7.5340716960541045e-06, "loss": 0.1104, "step": 43259 }, { "epoch": 0.7715906253344272, "grad_norm": 0.20992511510849, "learning_rate": 7.532958085811373e-06, "loss": 0.1147, "step": 43260 }, { "epoch": 0.7716084614561409, "grad_norm": 0.24704952538013458, "learning_rate": 7.531844543276922e-06, "loss": 0.0846, "step": 43261 }, { "epoch": 0.7716262975778547, "grad_norm": 0.2913043200969696, "learning_rate": 7.530731068455086e-06, "loss": 0.1163, "step": 43262 }, { "epoch": 0.7716441336995684, "grad_norm": 0.26963940262794495, "learning_rate": 7.529617661350166e-06, "loss": 0.1304, "step": 43263 }, { "epoch": 0.7716619698212821, "grad_norm": 0.24435272812843323, "learning_rate": 7.5285043219664814e-06, "loss": 0.1599, "step": 43264 }, { "epoch": 0.7716798059429958, "grad_norm": 0.2214638590812683, "learning_rate": 7.527391050308344e-06, "loss": 0.0947, "step": 43265 }, { "epoch": 0.7716976420647095, "grad_norm": 0.3008866608142853, "learning_rate": 7.526277846380081e-06, "loss": 0.1034, "step": 43266 }, { "epoch": 0.7717154781864232, "grad_norm": 0.32285189628601074, "learning_rate": 7.525164710185997e-06, "loss": 0.1422, "step": 43267 }, { "epoch": 0.7717333143081369, "grad_norm": 0.225494846701622, "learning_rate": 7.524051641730415e-06, "loss": 0.111, "step": 43268 }, { "epoch": 0.7717511504298505, "grad_norm": 0.23391669988632202, "learning_rate": 7.522938641017632e-06, "loss": 0.0798, "step": 43269 }, { "epoch": 0.7717689865515642, "grad_norm": 0.251701295375824, "learning_rate": 7.5218257080519886e-06, "loss": 0.1292, "step": 43270 }, { "epoch": 0.7717868226732779, "grad_norm": 0.260171115398407, "learning_rate": 7.520712842837782e-06, "loss": 0.1339, "step": 43271 }, { "epoch": 0.7718046587949916, "grad_norm": 0.25056910514831543, "learning_rate": 7.519600045379329e-06, "loss": 0.0967, "step": 43272 }, { "epoch": 0.7718224949167053, "grad_norm": 0.3433469235897064, "learning_rate": 7.518487315680936e-06, "loss": 0.167, "step": 43273 }, { "epoch": 0.771840331038419, "grad_norm": 0.41224896907806396, "learning_rate": 7.517374653746925e-06, "loss": 0.2071, "step": 43274 }, { "epoch": 0.7718581671601327, "grad_norm": 0.2244434356689453, "learning_rate": 7.516262059581616e-06, "loss": 0.109, "step": 43275 }, { "epoch": 0.7718760032818464, "grad_norm": 0.24160553514957428, "learning_rate": 7.515149533189317e-06, "loss": 0.114, "step": 43276 }, { "epoch": 0.77189383940356, "grad_norm": 0.2449491024017334, "learning_rate": 7.514037074574334e-06, "loss": 0.0924, "step": 43277 }, { "epoch": 0.7719116755252737, "grad_norm": 0.3116455078125, "learning_rate": 7.512924683740974e-06, "loss": 0.1561, "step": 43278 }, { "epoch": 0.7719295116469875, "grad_norm": 0.4128992557525635, "learning_rate": 7.511812360693568e-06, "loss": 0.1172, "step": 43279 }, { "epoch": 0.7719473477687012, "grad_norm": 0.2890704572200775, "learning_rate": 7.5107001054364166e-06, "loss": 0.1643, "step": 43280 }, { "epoch": 0.7719651838904149, "grad_norm": 0.3013269305229187, "learning_rate": 7.509587917973831e-06, "loss": 0.0879, "step": 43281 }, { "epoch": 0.7719830200121286, "grad_norm": 0.21916384994983673, "learning_rate": 7.508475798310119e-06, "loss": 0.0973, "step": 43282 }, { "epoch": 0.7720008561338423, "grad_norm": 0.20679941773414612, "learning_rate": 7.5073637464496e-06, "loss": 0.1153, "step": 43283 }, { "epoch": 0.772018692255556, "grad_norm": 0.23694542050361633, "learning_rate": 7.506251762396585e-06, "loss": 0.0857, "step": 43284 }, { "epoch": 0.7720365283772697, "grad_norm": 0.23543240129947662, "learning_rate": 7.505139846155377e-06, "loss": 0.0868, "step": 43285 }, { "epoch": 0.7720543644989833, "grad_norm": 0.23788268864154816, "learning_rate": 7.504027997730284e-06, "loss": 0.0977, "step": 43286 }, { "epoch": 0.772072200620697, "grad_norm": 0.29897359013557434, "learning_rate": 7.502916217125627e-06, "loss": 0.0838, "step": 43287 }, { "epoch": 0.7720900367424107, "grad_norm": 0.22732172906398773, "learning_rate": 7.501804504345702e-06, "loss": 0.1252, "step": 43288 }, { "epoch": 0.7721078728641244, "grad_norm": 0.24480777978897095, "learning_rate": 7.5006928593948364e-06, "loss": 0.0812, "step": 43289 }, { "epoch": 0.7721257089858381, "grad_norm": 0.2581625282764435, "learning_rate": 7.499581282277329e-06, "loss": 0.1317, "step": 43290 }, { "epoch": 0.7721435451075518, "grad_norm": 0.29986411333084106, "learning_rate": 7.49846977299748e-06, "loss": 0.124, "step": 43291 }, { "epoch": 0.7721613812292655, "grad_norm": 0.27072200179100037, "learning_rate": 7.4973583315596145e-06, "loss": 0.1195, "step": 43292 }, { "epoch": 0.7721792173509792, "grad_norm": 0.266581654548645, "learning_rate": 7.496246957968037e-06, "loss": 0.118, "step": 43293 }, { "epoch": 0.7721970534726929, "grad_norm": 0.23910745978355408, "learning_rate": 7.495135652227048e-06, "loss": 0.1144, "step": 43294 }, { "epoch": 0.7722148895944065, "grad_norm": 0.31104573607444763, "learning_rate": 7.494024414340952e-06, "loss": 0.1925, "step": 43295 }, { "epoch": 0.7722327257161203, "grad_norm": 0.23937399685382843, "learning_rate": 7.492913244314073e-06, "loss": 0.1161, "step": 43296 }, { "epoch": 0.772250561837834, "grad_norm": 0.334472119808197, "learning_rate": 7.491802142150709e-06, "loss": 0.1006, "step": 43297 }, { "epoch": 0.7722683979595477, "grad_norm": 0.25423452258110046, "learning_rate": 7.490691107855166e-06, "loss": 0.1052, "step": 43298 }, { "epoch": 0.7722862340812614, "grad_norm": 0.20954209566116333, "learning_rate": 7.489580141431743e-06, "loss": 0.1136, "step": 43299 }, { "epoch": 0.7723040702029751, "grad_norm": 0.2716667950153351, "learning_rate": 7.488469242884766e-06, "loss": 0.1223, "step": 43300 }, { "epoch": 0.7723219063246888, "grad_norm": 0.2741115987300873, "learning_rate": 7.48735841221852e-06, "loss": 0.1086, "step": 43301 }, { "epoch": 0.7723397424464025, "grad_norm": 0.36933889985084534, "learning_rate": 7.486247649437331e-06, "loss": 0.1531, "step": 43302 }, { "epoch": 0.7723575785681162, "grad_norm": 0.2792014181613922, "learning_rate": 7.485136954545493e-06, "loss": 0.0814, "step": 43303 }, { "epoch": 0.7723754146898298, "grad_norm": 0.23467305302619934, "learning_rate": 7.484026327547308e-06, "loss": 0.0986, "step": 43304 }, { "epoch": 0.7723932508115435, "grad_norm": 0.3154553771018982, "learning_rate": 7.482915768447093e-06, "loss": 0.1199, "step": 43305 }, { "epoch": 0.7724110869332572, "grad_norm": 0.2857285141944885, "learning_rate": 7.4818052772491485e-06, "loss": 0.1053, "step": 43306 }, { "epoch": 0.7724289230549709, "grad_norm": 0.24907024204730988, "learning_rate": 7.480694853957776e-06, "loss": 0.112, "step": 43307 }, { "epoch": 0.7724467591766846, "grad_norm": 0.2875986397266388, "learning_rate": 7.479584498577271e-06, "loss": 0.1438, "step": 43308 }, { "epoch": 0.7724645952983983, "grad_norm": 0.3144265115261078, "learning_rate": 7.47847421111196e-06, "loss": 0.1411, "step": 43309 }, { "epoch": 0.772482431420112, "grad_norm": 0.3387351334095001, "learning_rate": 7.477363991566133e-06, "loss": 0.1124, "step": 43310 }, { "epoch": 0.7725002675418257, "grad_norm": 0.33802229166030884, "learning_rate": 7.476253839944094e-06, "loss": 0.1452, "step": 43311 }, { "epoch": 0.7725181036635393, "grad_norm": 0.40700623393058777, "learning_rate": 7.475143756250141e-06, "loss": 0.1813, "step": 43312 }, { "epoch": 0.7725359397852531, "grad_norm": 0.2382963001728058, "learning_rate": 7.474033740488592e-06, "loss": 0.1078, "step": 43313 }, { "epoch": 0.7725537759069668, "grad_norm": 0.2745089530944824, "learning_rate": 7.472923792663741e-06, "loss": 0.1365, "step": 43314 }, { "epoch": 0.7725716120286805, "grad_norm": 0.21949723362922668, "learning_rate": 7.471813912779885e-06, "loss": 0.1021, "step": 43315 }, { "epoch": 0.7725894481503942, "grad_norm": 0.22324372828006744, "learning_rate": 7.470704100841339e-06, "loss": 0.0851, "step": 43316 }, { "epoch": 0.7726072842721079, "grad_norm": 0.2681769132614136, "learning_rate": 7.469594356852389e-06, "loss": 0.1117, "step": 43317 }, { "epoch": 0.7726251203938216, "grad_norm": 0.3031485974788666, "learning_rate": 7.4684846808173555e-06, "loss": 0.1148, "step": 43318 }, { "epoch": 0.7726429565155353, "grad_norm": 0.36637088656425476, "learning_rate": 7.467375072740529e-06, "loss": 0.1237, "step": 43319 }, { "epoch": 0.772660792637249, "grad_norm": 0.30085834860801697, "learning_rate": 7.466265532626216e-06, "loss": 0.142, "step": 43320 }, { "epoch": 0.7726786287589626, "grad_norm": 0.3427233397960663, "learning_rate": 7.465156060478701e-06, "loss": 0.1475, "step": 43321 }, { "epoch": 0.7726964648806763, "grad_norm": 0.3068648874759674, "learning_rate": 7.464046656302307e-06, "loss": 0.1164, "step": 43322 }, { "epoch": 0.77271430100239, "grad_norm": 0.30954113602638245, "learning_rate": 7.462937320101326e-06, "loss": 0.0899, "step": 43323 }, { "epoch": 0.7727321371241037, "grad_norm": 0.2714322805404663, "learning_rate": 7.461828051880057e-06, "loss": 0.1324, "step": 43324 }, { "epoch": 0.7727499732458174, "grad_norm": 0.3573809862136841, "learning_rate": 7.460718851642792e-06, "loss": 0.1385, "step": 43325 }, { "epoch": 0.7727678093675311, "grad_norm": 0.2611391544342041, "learning_rate": 7.459609719393845e-06, "loss": 0.0889, "step": 43326 }, { "epoch": 0.7727856454892448, "grad_norm": 0.3394410312175751, "learning_rate": 7.45850065513751e-06, "loss": 0.1398, "step": 43327 }, { "epoch": 0.7728034816109585, "grad_norm": 0.22016894817352295, "learning_rate": 7.457391658878077e-06, "loss": 0.0814, "step": 43328 }, { "epoch": 0.7728213177326722, "grad_norm": 0.2938532531261444, "learning_rate": 7.456282730619862e-06, "loss": 0.1208, "step": 43329 }, { "epoch": 0.772839153854386, "grad_norm": 0.2880353331565857, "learning_rate": 7.4551738703671435e-06, "loss": 0.1507, "step": 43330 }, { "epoch": 0.7728569899760996, "grad_norm": 0.2728100121021271, "learning_rate": 7.454065078124242e-06, "loss": 0.0925, "step": 43331 }, { "epoch": 0.7728748260978133, "grad_norm": 0.2868330478668213, "learning_rate": 7.4529563538954434e-06, "loss": 0.1747, "step": 43332 }, { "epoch": 0.772892662219527, "grad_norm": 0.2272026687860489, "learning_rate": 7.4518476976850464e-06, "loss": 0.0701, "step": 43333 }, { "epoch": 0.7729104983412407, "grad_norm": 0.25956249237060547, "learning_rate": 7.45073910949734e-06, "loss": 0.2031, "step": 43334 }, { "epoch": 0.7729283344629544, "grad_norm": 0.20929618179798126, "learning_rate": 7.449630589336639e-06, "loss": 0.072, "step": 43335 }, { "epoch": 0.7729461705846681, "grad_norm": 0.25738954544067383, "learning_rate": 7.448522137207234e-06, "loss": 0.0897, "step": 43336 }, { "epoch": 0.7729640067063818, "grad_norm": 0.2278204709291458, "learning_rate": 7.447413753113416e-06, "loss": 0.1022, "step": 43337 }, { "epoch": 0.7729818428280955, "grad_norm": 0.24706627428531647, "learning_rate": 7.446305437059478e-06, "loss": 0.1517, "step": 43338 }, { "epoch": 0.7729996789498091, "grad_norm": 0.23114341497421265, "learning_rate": 7.4451971890497295e-06, "loss": 0.0803, "step": 43339 }, { "epoch": 0.7730175150715228, "grad_norm": 0.23727920651435852, "learning_rate": 7.4440890090884614e-06, "loss": 0.1021, "step": 43340 }, { "epoch": 0.7730353511932365, "grad_norm": 0.31540319323539734, "learning_rate": 7.442980897179966e-06, "loss": 0.1309, "step": 43341 }, { "epoch": 0.7730531873149502, "grad_norm": 0.22448860108852386, "learning_rate": 7.441872853328536e-06, "loss": 0.11, "step": 43342 }, { "epoch": 0.7730710234366639, "grad_norm": 0.26633214950561523, "learning_rate": 7.440764877538475e-06, "loss": 0.0981, "step": 43343 }, { "epoch": 0.7730888595583776, "grad_norm": 0.279723197221756, "learning_rate": 7.439656969814068e-06, "loss": 0.165, "step": 43344 }, { "epoch": 0.7731066956800913, "grad_norm": 0.24724264442920685, "learning_rate": 7.438549130159625e-06, "loss": 0.092, "step": 43345 }, { "epoch": 0.7731245318018051, "grad_norm": 0.27358344197273254, "learning_rate": 7.43744135857943e-06, "loss": 0.0858, "step": 43346 }, { "epoch": 0.7731423679235188, "grad_norm": 0.2298402488231659, "learning_rate": 7.43633365507777e-06, "loss": 0.1045, "step": 43347 }, { "epoch": 0.7731602040452324, "grad_norm": 0.30903834104537964, "learning_rate": 7.435226019658956e-06, "loss": 0.1148, "step": 43348 }, { "epoch": 0.7731780401669461, "grad_norm": 0.2901347875595093, "learning_rate": 7.434118452327274e-06, "loss": 0.133, "step": 43349 }, { "epoch": 0.7731958762886598, "grad_norm": 0.2721090614795685, "learning_rate": 7.433010953087013e-06, "loss": 0.1042, "step": 43350 }, { "epoch": 0.7732137124103735, "grad_norm": 0.31019127368927, "learning_rate": 7.4319035219424625e-06, "loss": 0.1364, "step": 43351 }, { "epoch": 0.7732315485320872, "grad_norm": 0.19999803602695465, "learning_rate": 7.430796158897929e-06, "loss": 0.0897, "step": 43352 }, { "epoch": 0.7732493846538009, "grad_norm": 0.20183596014976501, "learning_rate": 7.429688863957698e-06, "loss": 0.0978, "step": 43353 }, { "epoch": 0.7732672207755146, "grad_norm": 0.2506796717643738, "learning_rate": 7.428581637126061e-06, "loss": 0.126, "step": 43354 }, { "epoch": 0.7732850568972283, "grad_norm": 0.3142896294593811, "learning_rate": 7.427474478407304e-06, "loss": 0.1584, "step": 43355 }, { "epoch": 0.773302893018942, "grad_norm": 0.28961101174354553, "learning_rate": 7.426367387805733e-06, "loss": 0.1144, "step": 43356 }, { "epoch": 0.7733207291406556, "grad_norm": 0.5187827348709106, "learning_rate": 7.425260365325623e-06, "loss": 0.1032, "step": 43357 }, { "epoch": 0.7733385652623693, "grad_norm": 0.43283024430274963, "learning_rate": 7.424153410971283e-06, "loss": 0.1058, "step": 43358 }, { "epoch": 0.773356401384083, "grad_norm": 0.27138620615005493, "learning_rate": 7.423046524746993e-06, "loss": 0.094, "step": 43359 }, { "epoch": 0.7733742375057967, "grad_norm": 0.2884177267551422, "learning_rate": 7.421939706657038e-06, "loss": 0.1431, "step": 43360 }, { "epoch": 0.7733920736275104, "grad_norm": 0.2897762060165405, "learning_rate": 7.420832956705726e-06, "loss": 0.1115, "step": 43361 }, { "epoch": 0.7734099097492241, "grad_norm": 0.2874641418457031, "learning_rate": 7.4197262748973345e-06, "loss": 0.133, "step": 43362 }, { "epoch": 0.7734277458709379, "grad_norm": 0.2513301968574524, "learning_rate": 7.418619661236157e-06, "loss": 0.1323, "step": 43363 }, { "epoch": 0.7734455819926516, "grad_norm": 0.2575463354587555, "learning_rate": 7.417513115726474e-06, "loss": 0.1026, "step": 43364 }, { "epoch": 0.7734634181143653, "grad_norm": 0.2520389258861542, "learning_rate": 7.4164066383725916e-06, "loss": 0.1079, "step": 43365 }, { "epoch": 0.7734812542360789, "grad_norm": 0.23377898335456848, "learning_rate": 7.41530022917879e-06, "loss": 0.0817, "step": 43366 }, { "epoch": 0.7734990903577926, "grad_norm": 0.29210180044174194, "learning_rate": 7.414193888149356e-06, "loss": 0.0974, "step": 43367 }, { "epoch": 0.7735169264795063, "grad_norm": 0.2617834806442261, "learning_rate": 7.413087615288577e-06, "loss": 0.0791, "step": 43368 }, { "epoch": 0.77353476260122, "grad_norm": 0.22317801415920258, "learning_rate": 7.411981410600749e-06, "loss": 0.0834, "step": 43369 }, { "epoch": 0.7735525987229337, "grad_norm": 0.2863292992115021, "learning_rate": 7.410875274090157e-06, "loss": 0.116, "step": 43370 }, { "epoch": 0.7735704348446474, "grad_norm": 0.29589495062828064, "learning_rate": 7.40976920576108e-06, "loss": 0.1292, "step": 43371 }, { "epoch": 0.7735882709663611, "grad_norm": 0.29509004950523376, "learning_rate": 7.408663205617822e-06, "loss": 0.2014, "step": 43372 }, { "epoch": 0.7736061070880748, "grad_norm": 0.30128195881843567, "learning_rate": 7.40755727366465e-06, "loss": 0.1153, "step": 43373 }, { "epoch": 0.7736239432097884, "grad_norm": 0.2588391900062561, "learning_rate": 7.406451409905873e-06, "loss": 0.1334, "step": 43374 }, { "epoch": 0.7736417793315021, "grad_norm": 0.2026851326227188, "learning_rate": 7.405345614345765e-06, "loss": 0.0932, "step": 43375 }, { "epoch": 0.7736596154532158, "grad_norm": 0.28502556681632996, "learning_rate": 7.404239886988615e-06, "loss": 0.1151, "step": 43376 }, { "epoch": 0.7736774515749295, "grad_norm": 0.35120463371276855, "learning_rate": 7.403134227838701e-06, "loss": 0.1357, "step": 43377 }, { "epoch": 0.7736952876966432, "grad_norm": 0.23151548206806183, "learning_rate": 7.402028636900326e-06, "loss": 0.1331, "step": 43378 }, { "epoch": 0.7737131238183569, "grad_norm": 0.31900206208229065, "learning_rate": 7.4009231141777655e-06, "loss": 0.1607, "step": 43379 }, { "epoch": 0.7737309599400707, "grad_norm": 0.26978549361228943, "learning_rate": 7.3998176596753034e-06, "loss": 0.1199, "step": 43380 }, { "epoch": 0.7737487960617844, "grad_norm": 0.21533839404582977, "learning_rate": 7.398712273397221e-06, "loss": 0.0686, "step": 43381 }, { "epoch": 0.7737666321834981, "grad_norm": 0.28145670890808105, "learning_rate": 7.397606955347816e-06, "loss": 0.1397, "step": 43382 }, { "epoch": 0.7737844683052117, "grad_norm": 0.19150963425636292, "learning_rate": 7.396501705531367e-06, "loss": 0.0761, "step": 43383 }, { "epoch": 0.7738023044269254, "grad_norm": 0.3306606113910675, "learning_rate": 7.395396523952147e-06, "loss": 0.1876, "step": 43384 }, { "epoch": 0.7738201405486391, "grad_norm": 0.2997142970561981, "learning_rate": 7.394291410614462e-06, "loss": 0.1064, "step": 43385 }, { "epoch": 0.7738379766703528, "grad_norm": 0.24205929040908813, "learning_rate": 7.3931863655225755e-06, "loss": 0.1176, "step": 43386 }, { "epoch": 0.7738558127920665, "grad_norm": 0.26777875423431396, "learning_rate": 7.3920813886807855e-06, "loss": 0.1611, "step": 43387 }, { "epoch": 0.7738736489137802, "grad_norm": 0.24796457588672638, "learning_rate": 7.390976480093373e-06, "loss": 0.1377, "step": 43388 }, { "epoch": 0.7738914850354939, "grad_norm": 0.2466205507516861, "learning_rate": 7.389871639764614e-06, "loss": 0.1152, "step": 43389 }, { "epoch": 0.7739093211572076, "grad_norm": 0.42343828082084656, "learning_rate": 7.38876686769879e-06, "loss": 0.1375, "step": 43390 }, { "epoch": 0.7739271572789213, "grad_norm": 0.3893948793411255, "learning_rate": 7.387662163900194e-06, "loss": 0.1214, "step": 43391 }, { "epoch": 0.7739449934006349, "grad_norm": 0.26313719153404236, "learning_rate": 7.3865575283731034e-06, "loss": 0.1185, "step": 43392 }, { "epoch": 0.7739628295223486, "grad_norm": 0.4351363778114319, "learning_rate": 7.3854529611218e-06, "loss": 0.1413, "step": 43393 }, { "epoch": 0.7739806656440623, "grad_norm": 0.2902747690677643, "learning_rate": 7.384348462150556e-06, "loss": 0.0958, "step": 43394 }, { "epoch": 0.773998501765776, "grad_norm": 0.2046617716550827, "learning_rate": 7.383244031463671e-06, "loss": 0.0929, "step": 43395 }, { "epoch": 0.7740163378874897, "grad_norm": 0.2876710891723633, "learning_rate": 7.382139669065416e-06, "loss": 0.1074, "step": 43396 }, { "epoch": 0.7740341740092035, "grad_norm": 0.3248555064201355, "learning_rate": 7.381035374960071e-06, "loss": 0.0676, "step": 43397 }, { "epoch": 0.7740520101309172, "grad_norm": 0.34729835391044617, "learning_rate": 7.379931149151912e-06, "loss": 0.093, "step": 43398 }, { "epoch": 0.7740698462526309, "grad_norm": 0.2575656771659851, "learning_rate": 7.378826991645232e-06, "loss": 0.1671, "step": 43399 }, { "epoch": 0.7740876823743446, "grad_norm": 0.2115790694952011, "learning_rate": 7.377722902444301e-06, "loss": 0.08, "step": 43400 }, { "epoch": 0.7741055184960582, "grad_norm": 0.31736472249031067, "learning_rate": 7.376618881553407e-06, "loss": 0.0987, "step": 43401 }, { "epoch": 0.7741233546177719, "grad_norm": 0.29819318652153015, "learning_rate": 7.375514928976826e-06, "loss": 0.1358, "step": 43402 }, { "epoch": 0.7741411907394856, "grad_norm": 0.2615434229373932, "learning_rate": 7.374411044718827e-06, "loss": 0.1298, "step": 43403 }, { "epoch": 0.7741590268611993, "grad_norm": 0.22527238726615906, "learning_rate": 7.373307228783708e-06, "loss": 0.0868, "step": 43404 }, { "epoch": 0.774176862982913, "grad_norm": 0.22288206219673157, "learning_rate": 7.372203481175738e-06, "loss": 0.0744, "step": 43405 }, { "epoch": 0.7741946991046267, "grad_norm": 0.31209704279899597, "learning_rate": 7.371099801899195e-06, "loss": 0.1052, "step": 43406 }, { "epoch": 0.7742125352263404, "grad_norm": 0.2829226851463318, "learning_rate": 7.3699961909583505e-06, "loss": 0.1287, "step": 43407 }, { "epoch": 0.7742303713480541, "grad_norm": 0.2901262938976288, "learning_rate": 7.368892648357497e-06, "loss": 0.151, "step": 43408 }, { "epoch": 0.7742482074697677, "grad_norm": 0.225845605134964, "learning_rate": 7.367789174100909e-06, "loss": 0.1271, "step": 43409 }, { "epoch": 0.7742660435914814, "grad_norm": 0.24322721362113953, "learning_rate": 7.366685768192854e-06, "loss": 0.1313, "step": 43410 }, { "epoch": 0.7742838797131951, "grad_norm": 0.39144471287727356, "learning_rate": 7.365582430637613e-06, "loss": 0.1357, "step": 43411 }, { "epoch": 0.7743017158349088, "grad_norm": 0.24938805401325226, "learning_rate": 7.364479161439469e-06, "loss": 0.1277, "step": 43412 }, { "epoch": 0.7743195519566225, "grad_norm": 0.3017863631248474, "learning_rate": 7.363375960602689e-06, "loss": 0.1338, "step": 43413 }, { "epoch": 0.7743373880783363, "grad_norm": 0.26391059160232544, "learning_rate": 7.362272828131564e-06, "loss": 0.1307, "step": 43414 }, { "epoch": 0.77435522420005, "grad_norm": 0.2297331690788269, "learning_rate": 7.361169764030363e-06, "loss": 0.1112, "step": 43415 }, { "epoch": 0.7743730603217637, "grad_norm": 0.2829309403896332, "learning_rate": 7.360066768303348e-06, "loss": 0.0924, "step": 43416 }, { "epoch": 0.7743908964434774, "grad_norm": 0.3273833394050598, "learning_rate": 7.3589638409548165e-06, "loss": 0.1661, "step": 43417 }, { "epoch": 0.774408732565191, "grad_norm": 0.2531827390193939, "learning_rate": 7.357860981989034e-06, "loss": 0.1243, "step": 43418 }, { "epoch": 0.7744265686869047, "grad_norm": 0.16432201862335205, "learning_rate": 7.356758191410277e-06, "loss": 0.0374, "step": 43419 }, { "epoch": 0.7744444048086184, "grad_norm": 0.20097115635871887, "learning_rate": 7.355655469222808e-06, "loss": 0.099, "step": 43420 }, { "epoch": 0.7744622409303321, "grad_norm": 0.17405837774276733, "learning_rate": 7.354552815430923e-06, "loss": 0.0623, "step": 43421 }, { "epoch": 0.7744800770520458, "grad_norm": 0.2694275975227356, "learning_rate": 7.353450230038886e-06, "loss": 0.11, "step": 43422 }, { "epoch": 0.7744979131737595, "grad_norm": 0.2961132526397705, "learning_rate": 7.352347713050969e-06, "loss": 0.144, "step": 43423 }, { "epoch": 0.7745157492954732, "grad_norm": 0.2935517430305481, "learning_rate": 7.351245264471452e-06, "loss": 0.1252, "step": 43424 }, { "epoch": 0.7745335854171869, "grad_norm": 0.26306623220443726, "learning_rate": 7.3501428843045925e-06, "loss": 0.1086, "step": 43425 }, { "epoch": 0.7745514215389006, "grad_norm": 0.3005884885787964, "learning_rate": 7.3490405725546826e-06, "loss": 0.139, "step": 43426 }, { "epoch": 0.7745692576606142, "grad_norm": 0.26893794536590576, "learning_rate": 7.347938329225982e-06, "loss": 0.1695, "step": 43427 }, { "epoch": 0.7745870937823279, "grad_norm": 0.22363987565040588, "learning_rate": 7.346836154322776e-06, "loss": 0.0891, "step": 43428 }, { "epoch": 0.7746049299040416, "grad_norm": 0.25274941325187683, "learning_rate": 7.34573404784932e-06, "loss": 0.1053, "step": 43429 }, { "epoch": 0.7746227660257553, "grad_norm": 0.2577058970928192, "learning_rate": 7.344632009809907e-06, "loss": 0.126, "step": 43430 }, { "epoch": 0.7746406021474691, "grad_norm": 0.24778054654598236, "learning_rate": 7.3435300402087985e-06, "loss": 0.1266, "step": 43431 }, { "epoch": 0.7746584382691828, "grad_norm": 0.3263584077358246, "learning_rate": 7.342428139050267e-06, "loss": 0.1346, "step": 43432 }, { "epoch": 0.7746762743908965, "grad_norm": 0.2668343186378479, "learning_rate": 7.3413263063385725e-06, "loss": 0.1002, "step": 43433 }, { "epoch": 0.7746941105126102, "grad_norm": 0.23315773904323578, "learning_rate": 7.340224542078006e-06, "loss": 0.092, "step": 43434 }, { "epoch": 0.7747119466343239, "grad_norm": 0.2598983943462372, "learning_rate": 7.339122846272828e-06, "loss": 0.1412, "step": 43435 }, { "epoch": 0.7747297827560375, "grad_norm": 0.22261232137680054, "learning_rate": 7.3380212189273075e-06, "loss": 0.1064, "step": 43436 }, { "epoch": 0.7747476188777512, "grad_norm": 0.27725648880004883, "learning_rate": 7.336919660045718e-06, "loss": 0.1347, "step": 43437 }, { "epoch": 0.7747654549994649, "grad_norm": 0.27133169770240784, "learning_rate": 7.335818169632322e-06, "loss": 0.0989, "step": 43438 }, { "epoch": 0.7747832911211786, "grad_norm": 0.41076937317848206, "learning_rate": 7.334716747691406e-06, "loss": 0.1866, "step": 43439 }, { "epoch": 0.7748011272428923, "grad_norm": 0.27317196130752563, "learning_rate": 7.333615394227217e-06, "loss": 0.1129, "step": 43440 }, { "epoch": 0.774818963364606, "grad_norm": 0.2356339544057846, "learning_rate": 7.332514109244046e-06, "loss": 0.0872, "step": 43441 }, { "epoch": 0.7748367994863197, "grad_norm": 0.27787521481513977, "learning_rate": 7.331412892746145e-06, "loss": 0.123, "step": 43442 }, { "epoch": 0.7748546356080334, "grad_norm": 0.5402619242668152, "learning_rate": 7.330311744737797e-06, "loss": 0.1533, "step": 43443 }, { "epoch": 0.774872471729747, "grad_norm": 0.23586773872375488, "learning_rate": 7.329210665223265e-06, "loss": 0.0996, "step": 43444 }, { "epoch": 0.7748903078514607, "grad_norm": 0.47996529936790466, "learning_rate": 7.3281096542068165e-06, "loss": 0.1813, "step": 43445 }, { "epoch": 0.7749081439731744, "grad_norm": 0.24311622977256775, "learning_rate": 7.32700871169271e-06, "loss": 0.1133, "step": 43446 }, { "epoch": 0.7749259800948882, "grad_norm": 0.25032761693000793, "learning_rate": 7.3259078376852285e-06, "loss": 0.1792, "step": 43447 }, { "epoch": 0.7749438162166019, "grad_norm": 0.3706215023994446, "learning_rate": 7.324807032188632e-06, "loss": 0.1017, "step": 43448 }, { "epoch": 0.7749616523383156, "grad_norm": 0.23765237629413605, "learning_rate": 7.323706295207189e-06, "loss": 0.063, "step": 43449 }, { "epoch": 0.7749794884600293, "grad_norm": 0.22569239139556885, "learning_rate": 7.322605626745166e-06, "loss": 0.1674, "step": 43450 }, { "epoch": 0.774997324581743, "grad_norm": 0.22864854335784912, "learning_rate": 7.321505026806821e-06, "loss": 0.0812, "step": 43451 }, { "epoch": 0.7750151607034567, "grad_norm": 0.25411897897720337, "learning_rate": 7.320404495396438e-06, "loss": 0.1205, "step": 43452 }, { "epoch": 0.7750329968251704, "grad_norm": 0.2267070859670639, "learning_rate": 7.31930403251827e-06, "loss": 0.111, "step": 43453 }, { "epoch": 0.775050832946884, "grad_norm": 0.34046271443367004, "learning_rate": 7.318203638176582e-06, "loss": 0.0912, "step": 43454 }, { "epoch": 0.7750686690685977, "grad_norm": 0.2848954200744629, "learning_rate": 7.317103312375642e-06, "loss": 0.0814, "step": 43455 }, { "epoch": 0.7750865051903114, "grad_norm": 0.23784367740154266, "learning_rate": 7.316003055119724e-06, "loss": 0.1097, "step": 43456 }, { "epoch": 0.7751043413120251, "grad_norm": 0.2915710508823395, "learning_rate": 7.3149028664130896e-06, "loss": 0.1267, "step": 43457 }, { "epoch": 0.7751221774337388, "grad_norm": 0.21106117963790894, "learning_rate": 7.313802746259996e-06, "loss": 0.1077, "step": 43458 }, { "epoch": 0.7751400135554525, "grad_norm": 0.26949021220207214, "learning_rate": 7.312702694664705e-06, "loss": 0.1171, "step": 43459 }, { "epoch": 0.7751578496771662, "grad_norm": 0.3910709023475647, "learning_rate": 7.311602711631496e-06, "loss": 0.1245, "step": 43460 }, { "epoch": 0.7751756857988799, "grad_norm": 0.294808030128479, "learning_rate": 7.3105027971646255e-06, "loss": 0.0408, "step": 43461 }, { "epoch": 0.7751935219205935, "grad_norm": 0.25726941227912903, "learning_rate": 7.3094029512683545e-06, "loss": 0.1387, "step": 43462 }, { "epoch": 0.7752113580423072, "grad_norm": 0.31136077642440796, "learning_rate": 7.308303173946945e-06, "loss": 0.0917, "step": 43463 }, { "epoch": 0.775229194164021, "grad_norm": 0.2807251214981079, "learning_rate": 7.3072034652046595e-06, "loss": 0.1309, "step": 43464 }, { "epoch": 0.7752470302857347, "grad_norm": 0.20629748702049255, "learning_rate": 7.306103825045771e-06, "loss": 0.1002, "step": 43465 }, { "epoch": 0.7752648664074484, "grad_norm": 0.33572325110435486, "learning_rate": 7.3050042534745346e-06, "loss": 0.1302, "step": 43466 }, { "epoch": 0.7752827025291621, "grad_norm": 0.2864384949207306, "learning_rate": 7.303904750495205e-06, "loss": 0.1419, "step": 43467 }, { "epoch": 0.7753005386508758, "grad_norm": 0.2772945761680603, "learning_rate": 7.30280531611206e-06, "loss": 0.1091, "step": 43468 }, { "epoch": 0.7753183747725895, "grad_norm": 0.2192145586013794, "learning_rate": 7.301705950329346e-06, "loss": 0.0755, "step": 43469 }, { "epoch": 0.7753362108943032, "grad_norm": 0.3089390993118286, "learning_rate": 7.300606653151343e-06, "loss": 0.162, "step": 43470 }, { "epoch": 0.7753540470160168, "grad_norm": 0.24713397026062012, "learning_rate": 7.2995074245822996e-06, "loss": 0.1024, "step": 43471 }, { "epoch": 0.7753718831377305, "grad_norm": 0.21032772958278656, "learning_rate": 7.298408264626472e-06, "loss": 0.0981, "step": 43472 }, { "epoch": 0.7753897192594442, "grad_norm": 0.2417958527803421, "learning_rate": 7.297309173288136e-06, "loss": 0.1226, "step": 43473 }, { "epoch": 0.7754075553811579, "grad_norm": 0.295732319355011, "learning_rate": 7.296210150571542e-06, "loss": 0.1179, "step": 43474 }, { "epoch": 0.7754253915028716, "grad_norm": 0.42218971252441406, "learning_rate": 7.295111196480956e-06, "loss": 0.1888, "step": 43475 }, { "epoch": 0.7754432276245853, "grad_norm": 0.2556533217430115, "learning_rate": 7.2940123110206314e-06, "loss": 0.1337, "step": 43476 }, { "epoch": 0.775461063746299, "grad_norm": 0.31848812103271484, "learning_rate": 7.292913494194823e-06, "loss": 0.1362, "step": 43477 }, { "epoch": 0.7754788998680127, "grad_norm": 0.29907524585723877, "learning_rate": 7.2918147460078055e-06, "loss": 0.1243, "step": 43478 }, { "epoch": 0.7754967359897263, "grad_norm": 0.28146547079086304, "learning_rate": 7.290716066463829e-06, "loss": 0.1411, "step": 43479 }, { "epoch": 0.77551457211144, "grad_norm": 0.2943083941936493, "learning_rate": 7.2896174555671546e-06, "loss": 0.1412, "step": 43480 }, { "epoch": 0.7755324082331538, "grad_norm": 0.23451046645641327, "learning_rate": 7.288518913322034e-06, "loss": 0.0787, "step": 43481 }, { "epoch": 0.7755502443548675, "grad_norm": 0.18192817270755768, "learning_rate": 7.287420439732728e-06, "loss": 0.0767, "step": 43482 }, { "epoch": 0.7755680804765812, "grad_norm": 0.23433317244052887, "learning_rate": 7.286322034803508e-06, "loss": 0.1052, "step": 43483 }, { "epoch": 0.7755859165982949, "grad_norm": 0.21425002813339233, "learning_rate": 7.2852236985386235e-06, "loss": 0.1225, "step": 43484 }, { "epoch": 0.7756037527200086, "grad_norm": 0.43848171830177307, "learning_rate": 7.28412543094232e-06, "loss": 0.1409, "step": 43485 }, { "epoch": 0.7756215888417223, "grad_norm": 0.32085123658180237, "learning_rate": 7.2830272320188745e-06, "loss": 0.138, "step": 43486 }, { "epoch": 0.775639424963436, "grad_norm": 0.2725811302661896, "learning_rate": 7.281929101772533e-06, "loss": 0.1707, "step": 43487 }, { "epoch": 0.7756572610851497, "grad_norm": 0.24344605207443237, "learning_rate": 7.280831040207556e-06, "loss": 0.1082, "step": 43488 }, { "epoch": 0.7756750972068633, "grad_norm": 0.2600553035736084, "learning_rate": 7.2797330473281965e-06, "loss": 0.1186, "step": 43489 }, { "epoch": 0.775692933328577, "grad_norm": 0.22441856563091278, "learning_rate": 7.278635123138705e-06, "loss": 0.1291, "step": 43490 }, { "epoch": 0.7757107694502907, "grad_norm": 0.5371310710906982, "learning_rate": 7.27753726764335e-06, "loss": 0.1228, "step": 43491 }, { "epoch": 0.7757286055720044, "grad_norm": 0.2149852216243744, "learning_rate": 7.276439480846384e-06, "loss": 0.0813, "step": 43492 }, { "epoch": 0.7757464416937181, "grad_norm": 0.23638857901096344, "learning_rate": 7.275341762752061e-06, "loss": 0.1088, "step": 43493 }, { "epoch": 0.7757642778154318, "grad_norm": 0.27985575795173645, "learning_rate": 7.274244113364626e-06, "loss": 0.0832, "step": 43494 }, { "epoch": 0.7757821139371455, "grad_norm": 0.21184313297271729, "learning_rate": 7.273146532688352e-06, "loss": 0.0667, "step": 43495 }, { "epoch": 0.7757999500588592, "grad_norm": 0.20760251581668854, "learning_rate": 7.2720490207274755e-06, "loss": 0.0763, "step": 43496 }, { "epoch": 0.7758177861805728, "grad_norm": 0.2926557958126068, "learning_rate": 7.2709515774862676e-06, "loss": 0.1126, "step": 43497 }, { "epoch": 0.7758356223022866, "grad_norm": 0.2546495795249939, "learning_rate": 7.269854202968968e-06, "loss": 0.1173, "step": 43498 }, { "epoch": 0.7758534584240003, "grad_norm": 0.3374875485897064, "learning_rate": 7.268756897179846e-06, "loss": 0.1126, "step": 43499 }, { "epoch": 0.775871294545714, "grad_norm": 0.2784263491630554, "learning_rate": 7.267659660123144e-06, "loss": 0.1551, "step": 43500 }, { "epoch": 0.7758891306674277, "grad_norm": 0.2909505069255829, "learning_rate": 7.26656249180312e-06, "loss": 0.1648, "step": 43501 }, { "epoch": 0.7759069667891414, "grad_norm": 0.21909500658512115, "learning_rate": 7.2654653922240215e-06, "loss": 0.1143, "step": 43502 }, { "epoch": 0.7759248029108551, "grad_norm": 0.21381916105747223, "learning_rate": 7.2643683613901e-06, "loss": 0.1083, "step": 43503 }, { "epoch": 0.7759426390325688, "grad_norm": 0.20347769558429718, "learning_rate": 7.2632713993056185e-06, "loss": 0.1243, "step": 43504 }, { "epoch": 0.7759604751542825, "grad_norm": 0.2153291255235672, "learning_rate": 7.2621745059748255e-06, "loss": 0.1024, "step": 43505 }, { "epoch": 0.7759783112759961, "grad_norm": 0.3248733878135681, "learning_rate": 7.261077681401968e-06, "loss": 0.0924, "step": 43506 }, { "epoch": 0.7759961473977098, "grad_norm": 0.32055947184562683, "learning_rate": 7.259980925591292e-06, "loss": 0.0903, "step": 43507 }, { "epoch": 0.7760139835194235, "grad_norm": 0.27737247943878174, "learning_rate": 7.258884238547067e-06, "loss": 0.1405, "step": 43508 }, { "epoch": 0.7760318196411372, "grad_norm": 0.27074140310287476, "learning_rate": 7.257787620273532e-06, "loss": 0.0833, "step": 43509 }, { "epoch": 0.7760496557628509, "grad_norm": 0.2653268873691559, "learning_rate": 7.256691070774935e-06, "loss": 0.1186, "step": 43510 }, { "epoch": 0.7760674918845646, "grad_norm": 0.2698708772659302, "learning_rate": 7.255594590055531e-06, "loss": 0.1356, "step": 43511 }, { "epoch": 0.7760853280062783, "grad_norm": 0.3289799690246582, "learning_rate": 7.254498178119579e-06, "loss": 0.1007, "step": 43512 }, { "epoch": 0.776103164127992, "grad_norm": 0.23925469815731049, "learning_rate": 7.2534018349713185e-06, "loss": 0.116, "step": 43513 }, { "epoch": 0.7761210002497057, "grad_norm": 0.1912168264389038, "learning_rate": 7.252305560615005e-06, "loss": 0.0858, "step": 43514 }, { "epoch": 0.7761388363714194, "grad_norm": 0.22202201187610626, "learning_rate": 7.251209355054883e-06, "loss": 0.0965, "step": 43515 }, { "epoch": 0.7761566724931331, "grad_norm": 0.23961244523525238, "learning_rate": 7.250113218295198e-06, "loss": 0.1022, "step": 43516 }, { "epoch": 0.7761745086148468, "grad_norm": 0.28463515639305115, "learning_rate": 7.249017150340212e-06, "loss": 0.1027, "step": 43517 }, { "epoch": 0.7761923447365605, "grad_norm": 0.2957436740398407, "learning_rate": 7.247921151194164e-06, "loss": 0.1765, "step": 43518 }, { "epoch": 0.7762101808582742, "grad_norm": 0.32369574904441833, "learning_rate": 7.246825220861308e-06, "loss": 0.1213, "step": 43519 }, { "epoch": 0.7762280169799879, "grad_norm": 0.30015018582344055, "learning_rate": 7.245729359345882e-06, "loss": 0.1496, "step": 43520 }, { "epoch": 0.7762458531017016, "grad_norm": 0.200738787651062, "learning_rate": 7.244633566652145e-06, "loss": 0.0911, "step": 43521 }, { "epoch": 0.7762636892234153, "grad_norm": 0.27351483702659607, "learning_rate": 7.243537842784345e-06, "loss": 0.0957, "step": 43522 }, { "epoch": 0.776281525345129, "grad_norm": 0.24771127104759216, "learning_rate": 7.242442187746715e-06, "loss": 0.102, "step": 43523 }, { "epoch": 0.7762993614668426, "grad_norm": 0.24154995381832123, "learning_rate": 7.241346601543519e-06, "loss": 0.0901, "step": 43524 }, { "epoch": 0.7763171975885563, "grad_norm": 0.345503568649292, "learning_rate": 7.240251084178992e-06, "loss": 0.1372, "step": 43525 }, { "epoch": 0.77633503371027, "grad_norm": 0.2833728790283203, "learning_rate": 7.2391556356573935e-06, "loss": 0.0944, "step": 43526 }, { "epoch": 0.7763528698319837, "grad_norm": 0.23817670345306396, "learning_rate": 7.238060255982962e-06, "loss": 0.0596, "step": 43527 }, { "epoch": 0.7763707059536974, "grad_norm": 0.35691890120506287, "learning_rate": 7.236964945159943e-06, "loss": 0.1073, "step": 43528 }, { "epoch": 0.7763885420754111, "grad_norm": 0.2345840036869049, "learning_rate": 7.235869703192574e-06, "loss": 0.1353, "step": 43529 }, { "epoch": 0.7764063781971248, "grad_norm": 0.2550492286682129, "learning_rate": 7.23477453008512e-06, "loss": 0.1097, "step": 43530 }, { "epoch": 0.7764242143188385, "grad_norm": 0.3019869923591614, "learning_rate": 7.233679425841814e-06, "loss": 0.0804, "step": 43531 }, { "epoch": 0.7764420504405523, "grad_norm": 0.284801721572876, "learning_rate": 7.2325843904669035e-06, "loss": 0.0745, "step": 43532 }, { "epoch": 0.7764598865622659, "grad_norm": 0.26073747873306274, "learning_rate": 7.231489423964624e-06, "loss": 0.1362, "step": 43533 }, { "epoch": 0.7764777226839796, "grad_norm": 0.25383538007736206, "learning_rate": 7.230394526339238e-06, "loss": 0.0673, "step": 43534 }, { "epoch": 0.7764955588056933, "grad_norm": 0.27917978167533875, "learning_rate": 7.229299697594979e-06, "loss": 0.0887, "step": 43535 }, { "epoch": 0.776513394927407, "grad_norm": 0.29004523158073425, "learning_rate": 7.228204937736094e-06, "loss": 0.1236, "step": 43536 }, { "epoch": 0.7765312310491207, "grad_norm": 0.30774280428886414, "learning_rate": 7.227110246766814e-06, "loss": 0.1324, "step": 43537 }, { "epoch": 0.7765490671708344, "grad_norm": 0.39555907249450684, "learning_rate": 7.226015624691396e-06, "loss": 0.1055, "step": 43538 }, { "epoch": 0.7765669032925481, "grad_norm": 0.313202440738678, "learning_rate": 7.2249210715140846e-06, "loss": 0.0769, "step": 43539 }, { "epoch": 0.7765847394142618, "grad_norm": 0.2521459758281708, "learning_rate": 7.223826587239122e-06, "loss": 0.0785, "step": 43540 }, { "epoch": 0.7766025755359754, "grad_norm": 0.318236380815506, "learning_rate": 7.222732171870747e-06, "loss": 0.1222, "step": 43541 }, { "epoch": 0.7766204116576891, "grad_norm": 0.32848671078681946, "learning_rate": 7.221637825413191e-06, "loss": 0.1277, "step": 43542 }, { "epoch": 0.7766382477794028, "grad_norm": 0.3019261360168457, "learning_rate": 7.220543547870717e-06, "loss": 0.1862, "step": 43543 }, { "epoch": 0.7766560839011165, "grad_norm": 0.24744179844856262, "learning_rate": 7.219449339247558e-06, "loss": 0.0613, "step": 43544 }, { "epoch": 0.7766739200228302, "grad_norm": 0.35991716384887695, "learning_rate": 7.218355199547955e-06, "loss": 0.1728, "step": 43545 }, { "epoch": 0.7766917561445439, "grad_norm": 0.23743069171905518, "learning_rate": 7.21726112877614e-06, "loss": 0.0585, "step": 43546 }, { "epoch": 0.7767095922662576, "grad_norm": 0.24608206748962402, "learning_rate": 7.216167126936371e-06, "loss": 0.0763, "step": 43547 }, { "epoch": 0.7767274283879714, "grad_norm": 0.5099205374717712, "learning_rate": 7.215073194032879e-06, "loss": 0.1104, "step": 43548 }, { "epoch": 0.7767452645096851, "grad_norm": 0.18419383466243744, "learning_rate": 7.213979330069909e-06, "loss": 0.0689, "step": 43549 }, { "epoch": 0.7767631006313988, "grad_norm": 0.2664598226547241, "learning_rate": 7.212885535051689e-06, "loss": 0.1694, "step": 43550 }, { "epoch": 0.7767809367531124, "grad_norm": 0.2201850712299347, "learning_rate": 7.211791808982474e-06, "loss": 0.0898, "step": 43551 }, { "epoch": 0.7767987728748261, "grad_norm": 0.2813500761985779, "learning_rate": 7.210698151866491e-06, "loss": 0.121, "step": 43552 }, { "epoch": 0.7768166089965398, "grad_norm": 0.30298855900764465, "learning_rate": 7.209604563707994e-06, "loss": 0.1482, "step": 43553 }, { "epoch": 0.7768344451182535, "grad_norm": 0.42618370056152344, "learning_rate": 7.208511044511215e-06, "loss": 0.1476, "step": 43554 }, { "epoch": 0.7768522812399672, "grad_norm": 0.3566909432411194, "learning_rate": 7.207417594280386e-06, "loss": 0.1175, "step": 43555 }, { "epoch": 0.7768701173616809, "grad_norm": 0.2191656231880188, "learning_rate": 7.2063242130197575e-06, "loss": 0.1219, "step": 43556 }, { "epoch": 0.7768879534833946, "grad_norm": 0.33290231227874756, "learning_rate": 7.205230900733562e-06, "loss": 0.1655, "step": 43557 }, { "epoch": 0.7769057896051083, "grad_norm": 0.24335452914237976, "learning_rate": 7.204137657426038e-06, "loss": 0.1282, "step": 43558 }, { "epoch": 0.7769236257268219, "grad_norm": 0.27061861753463745, "learning_rate": 7.203044483101415e-06, "loss": 0.0953, "step": 43559 }, { "epoch": 0.7769414618485356, "grad_norm": 0.2712450325489044, "learning_rate": 7.201951377763947e-06, "loss": 0.1299, "step": 43560 }, { "epoch": 0.7769592979702493, "grad_norm": 0.23656640946865082, "learning_rate": 7.200858341417863e-06, "loss": 0.1164, "step": 43561 }, { "epoch": 0.776977134091963, "grad_norm": 0.2635999321937561, "learning_rate": 7.199765374067397e-06, "loss": 0.1286, "step": 43562 }, { "epoch": 0.7769949702136767, "grad_norm": 0.22753091156482697, "learning_rate": 7.198672475716783e-06, "loss": 0.1139, "step": 43563 }, { "epoch": 0.7770128063353904, "grad_norm": 0.18766862154006958, "learning_rate": 7.19757964637027e-06, "loss": 0.0963, "step": 43564 }, { "epoch": 0.7770306424571042, "grad_norm": 0.500789999961853, "learning_rate": 7.19648688603208e-06, "loss": 0.1844, "step": 43565 }, { "epoch": 0.7770484785788179, "grad_norm": 0.23887291550636292, "learning_rate": 7.1953941947064616e-06, "loss": 0.1144, "step": 43566 }, { "epoch": 0.7770663147005316, "grad_norm": 0.4070923626422882, "learning_rate": 7.1943015723976465e-06, "loss": 0.1489, "step": 43567 }, { "epoch": 0.7770841508222452, "grad_norm": 0.30288925766944885, "learning_rate": 7.193209019109862e-06, "loss": 0.1275, "step": 43568 }, { "epoch": 0.7771019869439589, "grad_norm": 0.2119552493095398, "learning_rate": 7.192116534847354e-06, "loss": 0.0704, "step": 43569 }, { "epoch": 0.7771198230656726, "grad_norm": 0.253764808177948, "learning_rate": 7.191024119614357e-06, "loss": 0.1026, "step": 43570 }, { "epoch": 0.7771376591873863, "grad_norm": 0.3002331554889679, "learning_rate": 7.189931773415098e-06, "loss": 0.1695, "step": 43571 }, { "epoch": 0.7771554953091, "grad_norm": 0.3269325792789459, "learning_rate": 7.188839496253807e-06, "loss": 0.1196, "step": 43572 }, { "epoch": 0.7771733314308137, "grad_norm": 0.3425743579864502, "learning_rate": 7.187747288134736e-06, "loss": 0.1387, "step": 43573 }, { "epoch": 0.7771911675525274, "grad_norm": 0.24573343992233276, "learning_rate": 7.186655149062108e-06, "loss": 0.1496, "step": 43574 }, { "epoch": 0.7772090036742411, "grad_norm": 0.25576549768447876, "learning_rate": 7.1855630790401585e-06, "loss": 0.1437, "step": 43575 }, { "epoch": 0.7772268397959547, "grad_norm": 0.3472616970539093, "learning_rate": 7.184471078073107e-06, "loss": 0.1523, "step": 43576 }, { "epoch": 0.7772446759176684, "grad_norm": 0.3288723826408386, "learning_rate": 7.183379146165211e-06, "loss": 0.1474, "step": 43577 }, { "epoch": 0.7772625120393821, "grad_norm": 0.19933801889419556, "learning_rate": 7.1822872833206905e-06, "loss": 0.0717, "step": 43578 }, { "epoch": 0.7772803481610958, "grad_norm": 0.24148492515087128, "learning_rate": 7.181195489543768e-06, "loss": 0.0922, "step": 43579 }, { "epoch": 0.7772981842828095, "grad_norm": 0.3136390745639801, "learning_rate": 7.180103764838697e-06, "loss": 0.1601, "step": 43580 }, { "epoch": 0.7773160204045232, "grad_norm": 0.27717170119285583, "learning_rate": 7.17901210920969e-06, "loss": 0.0722, "step": 43581 }, { "epoch": 0.777333856526237, "grad_norm": 0.21808890998363495, "learning_rate": 7.177920522660994e-06, "loss": 0.0931, "step": 43582 }, { "epoch": 0.7773516926479507, "grad_norm": 0.3988843262195587, "learning_rate": 7.176829005196837e-06, "loss": 0.1051, "step": 43583 }, { "epoch": 0.7773695287696644, "grad_norm": 0.2618977129459381, "learning_rate": 7.175737556821443e-06, "loss": 0.1265, "step": 43584 }, { "epoch": 0.777387364891378, "grad_norm": 0.23547561466693878, "learning_rate": 7.174646177539041e-06, "loss": 0.108, "step": 43585 }, { "epoch": 0.7774052010130917, "grad_norm": 0.25493043661117554, "learning_rate": 7.1735548673538735e-06, "loss": 0.1058, "step": 43586 }, { "epoch": 0.7774230371348054, "grad_norm": 0.3202263414859772, "learning_rate": 7.172463626270165e-06, "loss": 0.128, "step": 43587 }, { "epoch": 0.7774408732565191, "grad_norm": 0.3321714997291565, "learning_rate": 7.171372454292144e-06, "loss": 0.1146, "step": 43588 }, { "epoch": 0.7774587093782328, "grad_norm": 0.19986885786056519, "learning_rate": 7.170281351424033e-06, "loss": 0.1292, "step": 43589 }, { "epoch": 0.7774765454999465, "grad_norm": 0.3647879660129547, "learning_rate": 7.1691903176700795e-06, "loss": 0.1721, "step": 43590 }, { "epoch": 0.7774943816216602, "grad_norm": 0.22513028979301453, "learning_rate": 7.168099353034502e-06, "loss": 0.1125, "step": 43591 }, { "epoch": 0.7775122177433739, "grad_norm": 0.23531781136989594, "learning_rate": 7.1670084575215205e-06, "loss": 0.0932, "step": 43592 }, { "epoch": 0.7775300538650876, "grad_norm": 0.2683334946632385, "learning_rate": 7.165917631135385e-06, "loss": 0.1084, "step": 43593 }, { "epoch": 0.7775478899868012, "grad_norm": 0.21299968659877777, "learning_rate": 7.164826873880301e-06, "loss": 0.1477, "step": 43594 }, { "epoch": 0.7775657261085149, "grad_norm": 0.20810571312904358, "learning_rate": 7.1637361857605195e-06, "loss": 0.1326, "step": 43595 }, { "epoch": 0.7775835622302286, "grad_norm": 0.19711872935295105, "learning_rate": 7.162645566780255e-06, "loss": 0.0863, "step": 43596 }, { "epoch": 0.7776013983519423, "grad_norm": 0.24326525628566742, "learning_rate": 7.161555016943738e-06, "loss": 0.1018, "step": 43597 }, { "epoch": 0.777619234473656, "grad_norm": 0.2896716594696045, "learning_rate": 7.160464536255185e-06, "loss": 0.1501, "step": 43598 }, { "epoch": 0.7776370705953698, "grad_norm": 0.2527450919151306, "learning_rate": 7.159374124718843e-06, "loss": 0.0946, "step": 43599 }, { "epoch": 0.7776549067170835, "grad_norm": 0.28956490755081177, "learning_rate": 7.158283782338929e-06, "loss": 0.0904, "step": 43600 }, { "epoch": 0.7776727428387972, "grad_norm": 0.18599063158035278, "learning_rate": 7.157193509119667e-06, "loss": 0.0917, "step": 43601 }, { "epoch": 0.7776905789605109, "grad_norm": 0.19335591793060303, "learning_rate": 7.156103305065281e-06, "loss": 0.0776, "step": 43602 }, { "epoch": 0.7777084150822245, "grad_norm": 0.2681027054786682, "learning_rate": 7.155013170180008e-06, "loss": 0.155, "step": 43603 }, { "epoch": 0.7777262512039382, "grad_norm": 0.30874451994895935, "learning_rate": 7.153923104468066e-06, "loss": 0.1095, "step": 43604 }, { "epoch": 0.7777440873256519, "grad_norm": 0.45701950788497925, "learning_rate": 7.152833107933685e-06, "loss": 0.125, "step": 43605 }, { "epoch": 0.7777619234473656, "grad_norm": 0.24174362421035767, "learning_rate": 7.151743180581077e-06, "loss": 0.1212, "step": 43606 }, { "epoch": 0.7777797595690793, "grad_norm": 0.26570090651512146, "learning_rate": 7.150653322414485e-06, "loss": 0.1414, "step": 43607 }, { "epoch": 0.777797595690793, "grad_norm": 0.33156126737594604, "learning_rate": 7.149563533438117e-06, "loss": 0.1548, "step": 43608 }, { "epoch": 0.7778154318125067, "grad_norm": 0.2927214205265045, "learning_rate": 7.148473813656218e-06, "loss": 0.1135, "step": 43609 }, { "epoch": 0.7778332679342204, "grad_norm": 0.27944502234458923, "learning_rate": 7.147384163072995e-06, "loss": 0.0862, "step": 43610 }, { "epoch": 0.777851104055934, "grad_norm": 0.2414645105600357, "learning_rate": 7.1462945816926724e-06, "loss": 0.1081, "step": 43611 }, { "epoch": 0.7778689401776477, "grad_norm": 0.32957446575164795, "learning_rate": 7.145205069519484e-06, "loss": 0.131, "step": 43612 }, { "epoch": 0.7778867762993614, "grad_norm": 0.22420360147953033, "learning_rate": 7.144115626557651e-06, "loss": 0.1191, "step": 43613 }, { "epoch": 0.7779046124210751, "grad_norm": 0.1976509541273117, "learning_rate": 7.1430262528113915e-06, "loss": 0.0555, "step": 43614 }, { "epoch": 0.7779224485427888, "grad_norm": 0.3038027882575989, "learning_rate": 7.1419369482849215e-06, "loss": 0.1085, "step": 43615 }, { "epoch": 0.7779402846645026, "grad_norm": 0.22419126331806183, "learning_rate": 7.140847712982479e-06, "loss": 0.1631, "step": 43616 }, { "epoch": 0.7779581207862163, "grad_norm": 0.42043906450271606, "learning_rate": 7.139758546908279e-06, "loss": 0.0922, "step": 43617 }, { "epoch": 0.77797595690793, "grad_norm": 0.27288374304771423, "learning_rate": 7.138669450066546e-06, "loss": 0.137, "step": 43618 }, { "epoch": 0.7779937930296437, "grad_norm": 0.3137914538383484, "learning_rate": 7.13758042246149e-06, "loss": 0.085, "step": 43619 }, { "epoch": 0.7780116291513574, "grad_norm": 0.2974531948566437, "learning_rate": 7.13649146409735e-06, "loss": 0.0788, "step": 43620 }, { "epoch": 0.778029465273071, "grad_norm": 0.22917471826076508, "learning_rate": 7.135402574978331e-06, "loss": 0.0927, "step": 43621 }, { "epoch": 0.7780473013947847, "grad_norm": 0.22131459414958954, "learning_rate": 7.134313755108668e-06, "loss": 0.0916, "step": 43622 }, { "epoch": 0.7780651375164984, "grad_norm": 0.28505557775497437, "learning_rate": 7.133225004492578e-06, "loss": 0.1338, "step": 43623 }, { "epoch": 0.7780829736382121, "grad_norm": 0.36708155274391174, "learning_rate": 7.1321363231342696e-06, "loss": 0.1873, "step": 43624 }, { "epoch": 0.7781008097599258, "grad_norm": 0.19532674551010132, "learning_rate": 7.13104771103798e-06, "loss": 0.109, "step": 43625 }, { "epoch": 0.7781186458816395, "grad_norm": 0.30337557196617126, "learning_rate": 7.1299591682079225e-06, "loss": 0.1589, "step": 43626 }, { "epoch": 0.7781364820033532, "grad_norm": 0.2736426293849945, "learning_rate": 7.1288706946483125e-06, "loss": 0.098, "step": 43627 }, { "epoch": 0.7781543181250669, "grad_norm": 0.19808503985404968, "learning_rate": 7.127782290363369e-06, "loss": 0.0867, "step": 43628 }, { "epoch": 0.7781721542467805, "grad_norm": 0.24737536907196045, "learning_rate": 7.126693955357319e-06, "loss": 0.064, "step": 43629 }, { "epoch": 0.7781899903684942, "grad_norm": 0.2788330614566803, "learning_rate": 7.125605689634377e-06, "loss": 0.1296, "step": 43630 }, { "epoch": 0.7782078264902079, "grad_norm": 0.20348486304283142, "learning_rate": 7.12451749319876e-06, "loss": 0.0614, "step": 43631 }, { "epoch": 0.7782256626119216, "grad_norm": 0.35838407278060913, "learning_rate": 7.123429366054687e-06, "loss": 0.1242, "step": 43632 }, { "epoch": 0.7782434987336354, "grad_norm": 0.3719123303890228, "learning_rate": 7.12234130820637e-06, "loss": 0.1025, "step": 43633 }, { "epoch": 0.7782613348553491, "grad_norm": 0.3032964766025543, "learning_rate": 7.121253319658039e-06, "loss": 0.123, "step": 43634 }, { "epoch": 0.7782791709770628, "grad_norm": 0.30011990666389465, "learning_rate": 7.1201654004139e-06, "loss": 0.1127, "step": 43635 }, { "epoch": 0.7782970070987765, "grad_norm": 0.2569604814052582, "learning_rate": 7.119077550478182e-06, "loss": 0.1195, "step": 43636 }, { "epoch": 0.7783148432204902, "grad_norm": 0.22060345113277435, "learning_rate": 7.117989769855085e-06, "loss": 0.1273, "step": 43637 }, { "epoch": 0.7783326793422038, "grad_norm": 0.4035508334636688, "learning_rate": 7.116902058548847e-06, "loss": 0.1217, "step": 43638 }, { "epoch": 0.7783505154639175, "grad_norm": 0.3727690279483795, "learning_rate": 7.115814416563671e-06, "loss": 0.1288, "step": 43639 }, { "epoch": 0.7783683515856312, "grad_norm": 0.2476806938648224, "learning_rate": 7.1147268439037764e-06, "loss": 0.1158, "step": 43640 }, { "epoch": 0.7783861877073449, "grad_norm": 0.5200356245040894, "learning_rate": 7.113639340573372e-06, "loss": 0.1146, "step": 43641 }, { "epoch": 0.7784040238290586, "grad_norm": 0.2770136296749115, "learning_rate": 7.112551906576684e-06, "loss": 0.1533, "step": 43642 }, { "epoch": 0.7784218599507723, "grad_norm": 0.22945545613765717, "learning_rate": 7.111464541917926e-06, "loss": 0.1086, "step": 43643 }, { "epoch": 0.778439696072486, "grad_norm": 0.33841952681541443, "learning_rate": 7.110377246601307e-06, "loss": 0.1326, "step": 43644 }, { "epoch": 0.7784575321941997, "grad_norm": 0.30094030499458313, "learning_rate": 7.109290020631046e-06, "loss": 0.1588, "step": 43645 }, { "epoch": 0.7784753683159134, "grad_norm": 0.3093232214450836, "learning_rate": 7.108202864011349e-06, "loss": 0.1426, "step": 43646 }, { "epoch": 0.778493204437627, "grad_norm": 0.2519281208515167, "learning_rate": 7.1071157767464424e-06, "loss": 0.1142, "step": 43647 }, { "epoch": 0.7785110405593407, "grad_norm": 0.25267964601516724, "learning_rate": 7.10602875884053e-06, "loss": 0.1733, "step": 43648 }, { "epoch": 0.7785288766810545, "grad_norm": 0.2881571650505066, "learning_rate": 7.104941810297836e-06, "loss": 0.1081, "step": 43649 }, { "epoch": 0.7785467128027682, "grad_norm": 0.18375305831432343, "learning_rate": 7.103854931122561e-06, "loss": 0.0927, "step": 43650 }, { "epoch": 0.7785645489244819, "grad_norm": 0.23631274700164795, "learning_rate": 7.102768121318934e-06, "loss": 0.1031, "step": 43651 }, { "epoch": 0.7785823850461956, "grad_norm": 0.2417704463005066, "learning_rate": 7.101681380891159e-06, "loss": 0.0633, "step": 43652 }, { "epoch": 0.7786002211679093, "grad_norm": 0.30564403533935547, "learning_rate": 7.10059470984345e-06, "loss": 0.132, "step": 43653 }, { "epoch": 0.778618057289623, "grad_norm": 0.2736362814903259, "learning_rate": 7.099508108180011e-06, "loss": 0.1251, "step": 43654 }, { "epoch": 0.7786358934113367, "grad_norm": 0.2428872436285019, "learning_rate": 7.098421575905068e-06, "loss": 0.0933, "step": 43655 }, { "epoch": 0.7786537295330503, "grad_norm": 0.4220563471317291, "learning_rate": 7.097335113022824e-06, "loss": 0.1694, "step": 43656 }, { "epoch": 0.778671565654764, "grad_norm": 0.28112512826919556, "learning_rate": 7.096248719537493e-06, "loss": 0.1252, "step": 43657 }, { "epoch": 0.7786894017764777, "grad_norm": 0.35753437876701355, "learning_rate": 7.095162395453289e-06, "loss": 0.1271, "step": 43658 }, { "epoch": 0.7787072378981914, "grad_norm": 0.24700522422790527, "learning_rate": 7.094076140774408e-06, "loss": 0.0739, "step": 43659 }, { "epoch": 0.7787250740199051, "grad_norm": 0.22753089666366577, "learning_rate": 7.092989955505083e-06, "loss": 0.1022, "step": 43660 }, { "epoch": 0.7787429101416188, "grad_norm": 0.32332295179367065, "learning_rate": 7.091903839649511e-06, "loss": 0.1391, "step": 43661 }, { "epoch": 0.7787607462633325, "grad_norm": 0.21662050485610962, "learning_rate": 7.0908177932118984e-06, "loss": 0.0832, "step": 43662 }, { "epoch": 0.7787785823850462, "grad_norm": 0.33779171109199524, "learning_rate": 7.089731816196471e-06, "loss": 0.1601, "step": 43663 }, { "epoch": 0.7787964185067598, "grad_norm": 0.3255383372306824, "learning_rate": 7.0886459086074195e-06, "loss": 0.1368, "step": 43664 }, { "epoch": 0.7788142546284735, "grad_norm": 0.254547655582428, "learning_rate": 7.087560070448973e-06, "loss": 0.094, "step": 43665 }, { "epoch": 0.7788320907501873, "grad_norm": 0.3348188102245331, "learning_rate": 7.086474301725327e-06, "loss": 0.1339, "step": 43666 }, { "epoch": 0.778849926871901, "grad_norm": 0.32626059651374817, "learning_rate": 7.0853886024406874e-06, "loss": 0.1081, "step": 43667 }, { "epoch": 0.7788677629936147, "grad_norm": 0.2311558872461319, "learning_rate": 7.084302972599277e-06, "loss": 0.0757, "step": 43668 }, { "epoch": 0.7788855991153284, "grad_norm": 0.25786083936691284, "learning_rate": 7.083217412205298e-06, "loss": 0.1198, "step": 43669 }, { "epoch": 0.7789034352370421, "grad_norm": 0.25509217381477356, "learning_rate": 7.082131921262955e-06, "loss": 0.1272, "step": 43670 }, { "epoch": 0.7789212713587558, "grad_norm": 0.2245519757270813, "learning_rate": 7.0810464997764555e-06, "loss": 0.0757, "step": 43671 }, { "epoch": 0.7789391074804695, "grad_norm": 0.2528594434261322, "learning_rate": 7.0799611477500025e-06, "loss": 0.1104, "step": 43672 }, { "epoch": 0.7789569436021831, "grad_norm": 0.31715109944343567, "learning_rate": 7.078875865187817e-06, "loss": 0.1248, "step": 43673 }, { "epoch": 0.7789747797238968, "grad_norm": 0.3272842466831207, "learning_rate": 7.077790652094099e-06, "loss": 0.1446, "step": 43674 }, { "epoch": 0.7789926158456105, "grad_norm": 0.4566992223262787, "learning_rate": 7.076705508473047e-06, "loss": 0.1536, "step": 43675 }, { "epoch": 0.7790104519673242, "grad_norm": 0.23735497891902924, "learning_rate": 7.075620434328883e-06, "loss": 0.0402, "step": 43676 }, { "epoch": 0.7790282880890379, "grad_norm": 0.31610390543937683, "learning_rate": 7.074535429665796e-06, "loss": 0.088, "step": 43677 }, { "epoch": 0.7790461242107516, "grad_norm": 0.2789989709854126, "learning_rate": 7.07345049448801e-06, "loss": 0.1305, "step": 43678 }, { "epoch": 0.7790639603324653, "grad_norm": 0.35149288177490234, "learning_rate": 7.072365628799721e-06, "loss": 0.1631, "step": 43679 }, { "epoch": 0.779081796454179, "grad_norm": 0.22422043979167938, "learning_rate": 7.071280832605126e-06, "loss": 0.0876, "step": 43680 }, { "epoch": 0.7790996325758927, "grad_norm": 0.2994772493839264, "learning_rate": 7.070196105908447e-06, "loss": 0.0965, "step": 43681 }, { "epoch": 0.7791174686976063, "grad_norm": 0.3156898021697998, "learning_rate": 7.069111448713883e-06, "loss": 0.1254, "step": 43682 }, { "epoch": 0.7791353048193201, "grad_norm": 0.3250686228275299, "learning_rate": 7.068026861025634e-06, "loss": 0.1218, "step": 43683 }, { "epoch": 0.7791531409410338, "grad_norm": 0.18285994231700897, "learning_rate": 7.066942342847907e-06, "loss": 0.0749, "step": 43684 }, { "epoch": 0.7791709770627475, "grad_norm": 0.3119698166847229, "learning_rate": 7.065857894184897e-06, "loss": 0.1657, "step": 43685 }, { "epoch": 0.7791888131844612, "grad_norm": 0.23059257864952087, "learning_rate": 7.064773515040823e-06, "loss": 0.092, "step": 43686 }, { "epoch": 0.7792066493061749, "grad_norm": 0.30813658237457275, "learning_rate": 7.063689205419882e-06, "loss": 0.1569, "step": 43687 }, { "epoch": 0.7792244854278886, "grad_norm": 0.22009910643100739, "learning_rate": 7.062604965326278e-06, "loss": 0.0809, "step": 43688 }, { "epoch": 0.7792423215496023, "grad_norm": 0.25231876969337463, "learning_rate": 7.061520794764204e-06, "loss": 0.1231, "step": 43689 }, { "epoch": 0.779260157671316, "grad_norm": 0.25878289341926575, "learning_rate": 7.060436693737879e-06, "loss": 0.1068, "step": 43690 }, { "epoch": 0.7792779937930296, "grad_norm": 0.27812430262565613, "learning_rate": 7.059352662251489e-06, "loss": 0.1582, "step": 43691 }, { "epoch": 0.7792958299147433, "grad_norm": 0.2660776674747467, "learning_rate": 7.058268700309254e-06, "loss": 0.1277, "step": 43692 }, { "epoch": 0.779313666036457, "grad_norm": 0.2194720357656479, "learning_rate": 7.057184807915357e-06, "loss": 0.0938, "step": 43693 }, { "epoch": 0.7793315021581707, "grad_norm": 0.26511961221694946, "learning_rate": 7.0561009850740186e-06, "loss": 0.0996, "step": 43694 }, { "epoch": 0.7793493382798844, "grad_norm": 0.3341408669948578, "learning_rate": 7.055017231789429e-06, "loss": 0.1418, "step": 43695 }, { "epoch": 0.7793671744015981, "grad_norm": 0.17767034471035004, "learning_rate": 7.053933548065791e-06, "loss": 0.1014, "step": 43696 }, { "epoch": 0.7793850105233118, "grad_norm": 0.1988210678100586, "learning_rate": 7.052849933907305e-06, "loss": 0.1244, "step": 43697 }, { "epoch": 0.7794028466450255, "grad_norm": 0.35376453399658203, "learning_rate": 7.0517663893181624e-06, "loss": 0.1153, "step": 43698 }, { "epoch": 0.7794206827667391, "grad_norm": 0.18547537922859192, "learning_rate": 7.050682914302581e-06, "loss": 0.0629, "step": 43699 }, { "epoch": 0.779438518888453, "grad_norm": 0.2151731550693512, "learning_rate": 7.049599508864752e-06, "loss": 0.1081, "step": 43700 }, { "epoch": 0.7794563550101666, "grad_norm": 0.26846128702163696, "learning_rate": 7.0485161730088765e-06, "loss": 0.0834, "step": 43701 }, { "epoch": 0.7794741911318803, "grad_norm": 0.28111574053764343, "learning_rate": 7.047432906739143e-06, "loss": 0.1437, "step": 43702 }, { "epoch": 0.779492027253594, "grad_norm": 0.2761976420879364, "learning_rate": 7.046349710059768e-06, "loss": 0.1322, "step": 43703 }, { "epoch": 0.7795098633753077, "grad_norm": 0.4626353681087494, "learning_rate": 7.045266582974935e-06, "loss": 0.1933, "step": 43704 }, { "epoch": 0.7795276994970214, "grad_norm": 0.2717216908931732, "learning_rate": 7.04418352548886e-06, "loss": 0.0738, "step": 43705 }, { "epoch": 0.7795455356187351, "grad_norm": 0.2565809488296509, "learning_rate": 7.043100537605721e-06, "loss": 0.1005, "step": 43706 }, { "epoch": 0.7795633717404488, "grad_norm": 0.2800256609916687, "learning_rate": 7.042017619329736e-06, "loss": 0.069, "step": 43707 }, { "epoch": 0.7795812078621625, "grad_norm": 0.2342686951160431, "learning_rate": 7.0409347706650915e-06, "loss": 0.1805, "step": 43708 }, { "epoch": 0.7795990439838761, "grad_norm": 0.2554384171962738, "learning_rate": 7.039851991615986e-06, "loss": 0.1476, "step": 43709 }, { "epoch": 0.7796168801055898, "grad_norm": 0.2566075026988983, "learning_rate": 7.038769282186622e-06, "loss": 0.1302, "step": 43710 }, { "epoch": 0.7796347162273035, "grad_norm": 0.25781774520874023, "learning_rate": 7.03768664238118e-06, "loss": 0.1329, "step": 43711 }, { "epoch": 0.7796525523490172, "grad_norm": 0.22653940320014954, "learning_rate": 7.036604072203876e-06, "loss": 0.0906, "step": 43712 }, { "epoch": 0.7796703884707309, "grad_norm": 0.32700657844543457, "learning_rate": 7.035521571658901e-06, "loss": 0.0717, "step": 43713 }, { "epoch": 0.7796882245924446, "grad_norm": 0.3249012529850006, "learning_rate": 7.034439140750448e-06, "loss": 0.129, "step": 43714 }, { "epoch": 0.7797060607141583, "grad_norm": 0.2203959822654724, "learning_rate": 7.033356779482708e-06, "loss": 0.0785, "step": 43715 }, { "epoch": 0.779723896835872, "grad_norm": 0.216635599732399, "learning_rate": 7.032274487859888e-06, "loss": 0.1112, "step": 43716 }, { "epoch": 0.7797417329575858, "grad_norm": 0.34894847869873047, "learning_rate": 7.03119226588618e-06, "loss": 0.0887, "step": 43717 }, { "epoch": 0.7797595690792994, "grad_norm": 0.27039068937301636, "learning_rate": 7.030110113565766e-06, "loss": 0.0596, "step": 43718 }, { "epoch": 0.7797774052010131, "grad_norm": 0.28972846269607544, "learning_rate": 7.0290280309028535e-06, "loss": 0.1283, "step": 43719 }, { "epoch": 0.7797952413227268, "grad_norm": 0.29053258895874023, "learning_rate": 7.027946017901646e-06, "loss": 0.1091, "step": 43720 }, { "epoch": 0.7798130774444405, "grad_norm": 0.35831987857818604, "learning_rate": 7.026864074566325e-06, "loss": 0.1839, "step": 43721 }, { "epoch": 0.7798309135661542, "grad_norm": 0.21541453897953033, "learning_rate": 7.025782200901085e-06, "loss": 0.0969, "step": 43722 }, { "epoch": 0.7798487496878679, "grad_norm": 0.2554680109024048, "learning_rate": 7.0247003969101225e-06, "loss": 0.1407, "step": 43723 }, { "epoch": 0.7798665858095816, "grad_norm": 0.2727867066860199, "learning_rate": 7.0236186625976225e-06, "loss": 0.1465, "step": 43724 }, { "epoch": 0.7798844219312953, "grad_norm": 0.2268063724040985, "learning_rate": 7.022536997967793e-06, "loss": 0.0789, "step": 43725 }, { "epoch": 0.779902258053009, "grad_norm": 0.3424607217311859, "learning_rate": 7.021455403024818e-06, "loss": 0.1049, "step": 43726 }, { "epoch": 0.7799200941747226, "grad_norm": 0.3250359296798706, "learning_rate": 7.020373877772893e-06, "loss": 0.1836, "step": 43727 }, { "epoch": 0.7799379302964363, "grad_norm": 0.32640814781188965, "learning_rate": 7.019292422216201e-06, "loss": 0.1267, "step": 43728 }, { "epoch": 0.77995576641815, "grad_norm": 0.3251577913761139, "learning_rate": 7.018211036358949e-06, "loss": 0.1214, "step": 43729 }, { "epoch": 0.7799736025398637, "grad_norm": 0.3441602289676666, "learning_rate": 7.01712972020532e-06, "loss": 0.1227, "step": 43730 }, { "epoch": 0.7799914386615774, "grad_norm": 0.328098326921463, "learning_rate": 7.016048473759501e-06, "loss": 0.1382, "step": 43731 }, { "epoch": 0.7800092747832911, "grad_norm": 0.1681549847126007, "learning_rate": 7.014967297025698e-06, "loss": 0.0841, "step": 43732 }, { "epoch": 0.7800271109050048, "grad_norm": 0.26506534218788147, "learning_rate": 7.0138861900080874e-06, "loss": 0.1392, "step": 43733 }, { "epoch": 0.7800449470267186, "grad_norm": 0.2814160883426666, "learning_rate": 7.012805152710872e-06, "loss": 0.0957, "step": 43734 }, { "epoch": 0.7800627831484322, "grad_norm": 0.2107677459716797, "learning_rate": 7.011724185138235e-06, "loss": 0.1419, "step": 43735 }, { "epoch": 0.7800806192701459, "grad_norm": 0.23588047921657562, "learning_rate": 7.010643287294369e-06, "loss": 0.0974, "step": 43736 }, { "epoch": 0.7800984553918596, "grad_norm": 0.1811896413564682, "learning_rate": 7.0095624591834555e-06, "loss": 0.0858, "step": 43737 }, { "epoch": 0.7801162915135733, "grad_norm": 0.3950812816619873, "learning_rate": 7.008481700809699e-06, "loss": 0.1537, "step": 43738 }, { "epoch": 0.780134127635287, "grad_norm": 0.2639102041721344, "learning_rate": 7.007401012177284e-06, "loss": 0.1167, "step": 43739 }, { "epoch": 0.7801519637570007, "grad_norm": 0.3729657828807831, "learning_rate": 7.006320393290394e-06, "loss": 0.1011, "step": 43740 }, { "epoch": 0.7801697998787144, "grad_norm": 0.9485948085784912, "learning_rate": 7.005239844153213e-06, "loss": 0.1553, "step": 43741 }, { "epoch": 0.7801876360004281, "grad_norm": 0.5171279907226562, "learning_rate": 7.00415936476995e-06, "loss": 0.1321, "step": 43742 }, { "epoch": 0.7802054721221418, "grad_norm": 0.2862878143787384, "learning_rate": 7.0030789551447765e-06, "loss": 0.0837, "step": 43743 }, { "epoch": 0.7802233082438554, "grad_norm": 0.280666321516037, "learning_rate": 7.001998615281885e-06, "loss": 0.1167, "step": 43744 }, { "epoch": 0.7802411443655691, "grad_norm": 0.33776262402534485, "learning_rate": 7.000918345185456e-06, "loss": 0.137, "step": 43745 }, { "epoch": 0.7802589804872828, "grad_norm": 0.2990826666355133, "learning_rate": 6.999838144859686e-06, "loss": 0.1449, "step": 43746 }, { "epoch": 0.7802768166089965, "grad_norm": 0.21509036421775818, "learning_rate": 6.998758014308765e-06, "loss": 0.1123, "step": 43747 }, { "epoch": 0.7802946527307102, "grad_norm": 0.26408934593200684, "learning_rate": 6.997677953536877e-06, "loss": 0.1579, "step": 43748 }, { "epoch": 0.7803124888524239, "grad_norm": 0.22391390800476074, "learning_rate": 6.996597962548207e-06, "loss": 0.0671, "step": 43749 }, { "epoch": 0.7803303249741376, "grad_norm": 0.24073286354541779, "learning_rate": 6.995518041346935e-06, "loss": 0.1353, "step": 43750 }, { "epoch": 0.7803481610958514, "grad_norm": 0.3524903655052185, "learning_rate": 6.99443818993726e-06, "loss": 0.1179, "step": 43751 }, { "epoch": 0.780365997217565, "grad_norm": 0.24867582321166992, "learning_rate": 6.993358408323361e-06, "loss": 0.1302, "step": 43752 }, { "epoch": 0.7803838333392787, "grad_norm": 0.29625800251960754, "learning_rate": 6.992278696509425e-06, "loss": 0.1588, "step": 43753 }, { "epoch": 0.7804016694609924, "grad_norm": 0.2628306746482849, "learning_rate": 6.991199054499628e-06, "loss": 0.121, "step": 43754 }, { "epoch": 0.7804195055827061, "grad_norm": 0.243768110871315, "learning_rate": 6.990119482298171e-06, "loss": 0.0854, "step": 43755 }, { "epoch": 0.7804373417044198, "grad_norm": 0.20008443295955658, "learning_rate": 6.989039979909235e-06, "loss": 0.0759, "step": 43756 }, { "epoch": 0.7804551778261335, "grad_norm": 0.2615085542201996, "learning_rate": 6.987960547336997e-06, "loss": 0.1138, "step": 43757 }, { "epoch": 0.7804730139478472, "grad_norm": 0.24916520714759827, "learning_rate": 6.986881184585636e-06, "loss": 0.1392, "step": 43758 }, { "epoch": 0.7804908500695609, "grad_norm": 0.24652761220932007, "learning_rate": 6.985801891659358e-06, "loss": 0.102, "step": 43759 }, { "epoch": 0.7805086861912746, "grad_norm": 0.2989273965358734, "learning_rate": 6.984722668562321e-06, "loss": 0.1385, "step": 43760 }, { "epoch": 0.7805265223129882, "grad_norm": 0.3251599967479706, "learning_rate": 6.983643515298732e-06, "loss": 0.1139, "step": 43761 }, { "epoch": 0.7805443584347019, "grad_norm": 0.23709046840667725, "learning_rate": 6.982564431872754e-06, "loss": 0.0833, "step": 43762 }, { "epoch": 0.7805621945564156, "grad_norm": 0.33574166893959045, "learning_rate": 6.981485418288589e-06, "loss": 0.1587, "step": 43763 }, { "epoch": 0.7805800306781293, "grad_norm": 0.26342105865478516, "learning_rate": 6.980406474550408e-06, "loss": 0.1179, "step": 43764 }, { "epoch": 0.780597866799843, "grad_norm": 0.3333938419818878, "learning_rate": 6.979327600662395e-06, "loss": 0.1126, "step": 43765 }, { "epoch": 0.7806157029215567, "grad_norm": 0.24686551094055176, "learning_rate": 6.978248796628734e-06, "loss": 0.1456, "step": 43766 }, { "epoch": 0.7806335390432705, "grad_norm": 0.2772115170955658, "learning_rate": 6.9771700624535945e-06, "loss": 0.1519, "step": 43767 }, { "epoch": 0.7806513751649842, "grad_norm": 0.25339657068252563, "learning_rate": 6.9760913981411796e-06, "loss": 0.1283, "step": 43768 }, { "epoch": 0.7806692112866979, "grad_norm": 0.3239727318286896, "learning_rate": 6.975012803695657e-06, "loss": 0.1337, "step": 43769 }, { "epoch": 0.7806870474084115, "grad_norm": 0.24212871491909027, "learning_rate": 6.973934279121214e-06, "loss": 0.1111, "step": 43770 }, { "epoch": 0.7807048835301252, "grad_norm": 0.24759207665920258, "learning_rate": 6.972855824422017e-06, "loss": 0.0994, "step": 43771 }, { "epoch": 0.7807227196518389, "grad_norm": 0.34922152757644653, "learning_rate": 6.9717774396022674e-06, "loss": 0.1085, "step": 43772 }, { "epoch": 0.7807405557735526, "grad_norm": 0.26788589358329773, "learning_rate": 6.970699124666124e-06, "loss": 0.101, "step": 43773 }, { "epoch": 0.7807583918952663, "grad_norm": 0.27522867918014526, "learning_rate": 6.969620879617789e-06, "loss": 0.1304, "step": 43774 }, { "epoch": 0.78077622801698, "grad_norm": 0.2831469476222992, "learning_rate": 6.968542704461423e-06, "loss": 0.1364, "step": 43775 }, { "epoch": 0.7807940641386937, "grad_norm": 0.32025599479675293, "learning_rate": 6.9674645992012176e-06, "loss": 0.1117, "step": 43776 }, { "epoch": 0.7808119002604074, "grad_norm": 0.28036609292030334, "learning_rate": 6.9663865638413515e-06, "loss": 0.1368, "step": 43777 }, { "epoch": 0.780829736382121, "grad_norm": 0.3188968300819397, "learning_rate": 6.965308598385997e-06, "loss": 0.155, "step": 43778 }, { "epoch": 0.7808475725038347, "grad_norm": 0.24273408949375153, "learning_rate": 6.964230702839339e-06, "loss": 0.122, "step": 43779 }, { "epoch": 0.7808654086255484, "grad_norm": 0.2649591267108917, "learning_rate": 6.9631528772055424e-06, "loss": 0.1227, "step": 43780 }, { "epoch": 0.7808832447472621, "grad_norm": 0.25641506910324097, "learning_rate": 6.962075121488801e-06, "loss": 0.0858, "step": 43781 }, { "epoch": 0.7809010808689758, "grad_norm": 0.24746544659137726, "learning_rate": 6.960997435693286e-06, "loss": 0.1175, "step": 43782 }, { "epoch": 0.7809189169906895, "grad_norm": 0.35266461968421936, "learning_rate": 6.95991981982318e-06, "loss": 0.1546, "step": 43783 }, { "epoch": 0.7809367531124033, "grad_norm": 0.23821569979190826, "learning_rate": 6.958842273882643e-06, "loss": 0.1033, "step": 43784 }, { "epoch": 0.780954589234117, "grad_norm": 0.2249196618795395, "learning_rate": 6.957764797875877e-06, "loss": 0.1176, "step": 43785 }, { "epoch": 0.7809724253558307, "grad_norm": 0.22049926221370697, "learning_rate": 6.956687391807043e-06, "loss": 0.0708, "step": 43786 }, { "epoch": 0.7809902614775444, "grad_norm": 0.2715582549571991, "learning_rate": 6.955610055680312e-06, "loss": 0.0913, "step": 43787 }, { "epoch": 0.781008097599258, "grad_norm": 0.23448802530765533, "learning_rate": 6.954532789499879e-06, "loss": 0.0853, "step": 43788 }, { "epoch": 0.7810259337209717, "grad_norm": 0.2523050904273987, "learning_rate": 6.9534555932699e-06, "loss": 0.1449, "step": 43789 }, { "epoch": 0.7810437698426854, "grad_norm": 0.23628056049346924, "learning_rate": 6.952378466994569e-06, "loss": 0.127, "step": 43790 }, { "epoch": 0.7810616059643991, "grad_norm": 0.28274673223495483, "learning_rate": 6.951301410678055e-06, "loss": 0.161, "step": 43791 }, { "epoch": 0.7810794420861128, "grad_norm": 0.21495795249938965, "learning_rate": 6.950224424324525e-06, "loss": 0.1287, "step": 43792 }, { "epoch": 0.7810972782078265, "grad_norm": 0.2318679541349411, "learning_rate": 6.949147507938156e-06, "loss": 0.0907, "step": 43793 }, { "epoch": 0.7811151143295402, "grad_norm": 0.2539827525615692, "learning_rate": 6.9480706615231316e-06, "loss": 0.1227, "step": 43794 }, { "epoch": 0.7811329504512539, "grad_norm": 0.235306516289711, "learning_rate": 6.946993885083622e-06, "loss": 0.084, "step": 43795 }, { "epoch": 0.7811507865729675, "grad_norm": 0.3072325885295868, "learning_rate": 6.945917178623798e-06, "loss": 0.1177, "step": 43796 }, { "epoch": 0.7811686226946812, "grad_norm": 0.2889297604560852, "learning_rate": 6.944840542147827e-06, "loss": 0.0908, "step": 43797 }, { "epoch": 0.7811864588163949, "grad_norm": 0.23642177879810333, "learning_rate": 6.943763975659898e-06, "loss": 0.1313, "step": 43798 }, { "epoch": 0.7812042949381086, "grad_norm": 0.28191569447517395, "learning_rate": 6.942687479164176e-06, "loss": 0.108, "step": 43799 }, { "epoch": 0.7812221310598223, "grad_norm": 0.25036439299583435, "learning_rate": 6.941611052664826e-06, "loss": 0.1163, "step": 43800 }, { "epoch": 0.7812399671815361, "grad_norm": 0.3078230023384094, "learning_rate": 6.9405346961660375e-06, "loss": 0.1198, "step": 43801 }, { "epoch": 0.7812578033032498, "grad_norm": 0.244432270526886, "learning_rate": 6.939458409671964e-06, "loss": 0.0806, "step": 43802 }, { "epoch": 0.7812756394249635, "grad_norm": 0.39712393283843994, "learning_rate": 6.938382193186798e-06, "loss": 0.1227, "step": 43803 }, { "epoch": 0.7812934755466772, "grad_norm": 0.3207155466079712, "learning_rate": 6.937306046714701e-06, "loss": 0.0907, "step": 43804 }, { "epoch": 0.7813113116683909, "grad_norm": 0.2498425990343094, "learning_rate": 6.936229970259844e-06, "loss": 0.1045, "step": 43805 }, { "epoch": 0.7813291477901045, "grad_norm": 0.2713736593723297, "learning_rate": 6.935153963826391e-06, "loss": 0.1068, "step": 43806 }, { "epoch": 0.7813469839118182, "grad_norm": 0.22285813093185425, "learning_rate": 6.934078027418525e-06, "loss": 0.077, "step": 43807 }, { "epoch": 0.7813648200335319, "grad_norm": 0.30948761105537415, "learning_rate": 6.933002161040417e-06, "loss": 0.0683, "step": 43808 }, { "epoch": 0.7813826561552456, "grad_norm": 0.27965933084487915, "learning_rate": 6.9319263646962314e-06, "loss": 0.1248, "step": 43809 }, { "epoch": 0.7814004922769593, "grad_norm": 0.35496416687965393, "learning_rate": 6.93085063839013e-06, "loss": 0.1466, "step": 43810 }, { "epoch": 0.781418328398673, "grad_norm": 0.2777808904647827, "learning_rate": 6.929774982126302e-06, "loss": 0.1037, "step": 43811 }, { "epoch": 0.7814361645203867, "grad_norm": 0.2724512219429016, "learning_rate": 6.928699395908908e-06, "loss": 0.1415, "step": 43812 }, { "epoch": 0.7814540006421004, "grad_norm": 0.27665358781814575, "learning_rate": 6.927623879742115e-06, "loss": 0.0978, "step": 43813 }, { "epoch": 0.781471836763814, "grad_norm": 0.2774742543697357, "learning_rate": 6.926548433630087e-06, "loss": 0.1109, "step": 43814 }, { "epoch": 0.7814896728855277, "grad_norm": 0.3812437951564789, "learning_rate": 6.925473057577009e-06, "loss": 0.1205, "step": 43815 }, { "epoch": 0.7815075090072414, "grad_norm": 0.2852420210838318, "learning_rate": 6.92439775158703e-06, "loss": 0.0946, "step": 43816 }, { "epoch": 0.7815253451289551, "grad_norm": 0.2125966101884842, "learning_rate": 6.923322515664335e-06, "loss": 0.1242, "step": 43817 }, { "epoch": 0.7815431812506689, "grad_norm": 0.2485707402229309, "learning_rate": 6.922247349813088e-06, "loss": 0.1039, "step": 43818 }, { "epoch": 0.7815610173723826, "grad_norm": 0.4582907557487488, "learning_rate": 6.921172254037448e-06, "loss": 0.1862, "step": 43819 }, { "epoch": 0.7815788534940963, "grad_norm": 0.3127819001674652, "learning_rate": 6.920097228341593e-06, "loss": 0.1136, "step": 43820 }, { "epoch": 0.78159668961581, "grad_norm": 0.2834848463535309, "learning_rate": 6.919022272729686e-06, "loss": 0.1324, "step": 43821 }, { "epoch": 0.7816145257375237, "grad_norm": 0.2748400568962097, "learning_rate": 6.9179473872058946e-06, "loss": 0.1222, "step": 43822 }, { "epoch": 0.7816323618592373, "grad_norm": 0.3027372658252716, "learning_rate": 6.916872571774377e-06, "loss": 0.1306, "step": 43823 }, { "epoch": 0.781650197980951, "grad_norm": 0.3411955237388611, "learning_rate": 6.915797826439313e-06, "loss": 0.1106, "step": 43824 }, { "epoch": 0.7816680341026647, "grad_norm": 0.32070213556289673, "learning_rate": 6.914723151204866e-06, "loss": 0.1481, "step": 43825 }, { "epoch": 0.7816858702243784, "grad_norm": 0.31144171953201294, "learning_rate": 6.913648546075197e-06, "loss": 0.1643, "step": 43826 }, { "epoch": 0.7817037063460921, "grad_norm": 0.2526528537273407, "learning_rate": 6.912574011054462e-06, "loss": 0.0235, "step": 43827 }, { "epoch": 0.7817215424678058, "grad_norm": 0.2426234781742096, "learning_rate": 6.911499546146849e-06, "loss": 0.1071, "step": 43828 }, { "epoch": 0.7817393785895195, "grad_norm": 0.2782787084579468, "learning_rate": 6.910425151356503e-06, "loss": 0.0915, "step": 43829 }, { "epoch": 0.7817572147112332, "grad_norm": 0.27718019485473633, "learning_rate": 6.909350826687605e-06, "loss": 0.1442, "step": 43830 }, { "epoch": 0.7817750508329468, "grad_norm": 0.2478848546743393, "learning_rate": 6.9082765721443096e-06, "loss": 0.1174, "step": 43831 }, { "epoch": 0.7817928869546605, "grad_norm": 0.3368293046951294, "learning_rate": 6.907202387730777e-06, "loss": 0.1697, "step": 43832 }, { "epoch": 0.7818107230763742, "grad_norm": 0.30247530341148376, "learning_rate": 6.906128273451184e-06, "loss": 0.1477, "step": 43833 }, { "epoch": 0.7818285591980879, "grad_norm": 0.2598891854286194, "learning_rate": 6.905054229309688e-06, "loss": 0.1296, "step": 43834 }, { "epoch": 0.7818463953198017, "grad_norm": 0.26453354954719543, "learning_rate": 6.903980255310449e-06, "loss": 0.0963, "step": 43835 }, { "epoch": 0.7818642314415154, "grad_norm": 0.2543056309223175, "learning_rate": 6.902906351457628e-06, "loss": 0.1073, "step": 43836 }, { "epoch": 0.7818820675632291, "grad_norm": 0.32321298122406006, "learning_rate": 6.901832517755399e-06, "loss": 0.0419, "step": 43837 }, { "epoch": 0.7818999036849428, "grad_norm": 0.32585689425468445, "learning_rate": 6.900758754207917e-06, "loss": 0.1218, "step": 43838 }, { "epoch": 0.7819177398066565, "grad_norm": 0.24500715732574463, "learning_rate": 6.899685060819347e-06, "loss": 0.1076, "step": 43839 }, { "epoch": 0.7819355759283702, "grad_norm": 0.2550657093524933, "learning_rate": 6.898611437593841e-06, "loss": 0.1249, "step": 43840 }, { "epoch": 0.7819534120500838, "grad_norm": 0.22559019923210144, "learning_rate": 6.897537884535579e-06, "loss": 0.1215, "step": 43841 }, { "epoch": 0.7819712481717975, "grad_norm": 0.2994731068611145, "learning_rate": 6.8964644016487085e-06, "loss": 0.163, "step": 43842 }, { "epoch": 0.7819890842935112, "grad_norm": 0.267466276884079, "learning_rate": 6.895390988937389e-06, "loss": 0.0943, "step": 43843 }, { "epoch": 0.7820069204152249, "grad_norm": 0.24043338000774384, "learning_rate": 6.894317646405796e-06, "loss": 0.1274, "step": 43844 }, { "epoch": 0.7820247565369386, "grad_norm": 0.2896980941295624, "learning_rate": 6.893244374058072e-06, "loss": 0.1213, "step": 43845 }, { "epoch": 0.7820425926586523, "grad_norm": 0.23170100152492523, "learning_rate": 6.892171171898396e-06, "loss": 0.1357, "step": 43846 }, { "epoch": 0.782060428780366, "grad_norm": 0.23925979435443878, "learning_rate": 6.891098039930918e-06, "loss": 0.0695, "step": 43847 }, { "epoch": 0.7820782649020797, "grad_norm": 0.4208838939666748, "learning_rate": 6.890024978159798e-06, "loss": 0.1702, "step": 43848 }, { "epoch": 0.7820961010237933, "grad_norm": 0.32660186290740967, "learning_rate": 6.888951986589187e-06, "loss": 0.202, "step": 43849 }, { "epoch": 0.782113937145507, "grad_norm": 0.350597620010376, "learning_rate": 6.8878790652232655e-06, "loss": 0.1263, "step": 43850 }, { "epoch": 0.7821317732672207, "grad_norm": 0.28322306275367737, "learning_rate": 6.886806214066177e-06, "loss": 0.1368, "step": 43851 }, { "epoch": 0.7821496093889345, "grad_norm": 0.3919888138771057, "learning_rate": 6.885733433122085e-06, "loss": 0.1047, "step": 43852 }, { "epoch": 0.7821674455106482, "grad_norm": 0.2230241596698761, "learning_rate": 6.884660722395137e-06, "loss": 0.0808, "step": 43853 }, { "epoch": 0.7821852816323619, "grad_norm": 0.25251758098602295, "learning_rate": 6.883588081889511e-06, "loss": 0.1184, "step": 43854 }, { "epoch": 0.7822031177540756, "grad_norm": 0.23842795193195343, "learning_rate": 6.882515511609353e-06, "loss": 0.0665, "step": 43855 }, { "epoch": 0.7822209538757893, "grad_norm": 0.2097027748823166, "learning_rate": 6.881443011558817e-06, "loss": 0.117, "step": 43856 }, { "epoch": 0.782238789997503, "grad_norm": 0.2694172263145447, "learning_rate": 6.880370581742071e-06, "loss": 0.1148, "step": 43857 }, { "epoch": 0.7822566261192166, "grad_norm": 0.22149275243282318, "learning_rate": 6.8792982221632606e-06, "loss": 0.1118, "step": 43858 }, { "epoch": 0.7822744622409303, "grad_norm": 0.2684837281703949, "learning_rate": 6.878225932826557e-06, "loss": 0.1523, "step": 43859 }, { "epoch": 0.782292298362644, "grad_norm": 0.26369479298591614, "learning_rate": 6.877153713736109e-06, "loss": 0.1392, "step": 43860 }, { "epoch": 0.7823101344843577, "grad_norm": 0.24295039474964142, "learning_rate": 6.8760815648960725e-06, "loss": 0.0775, "step": 43861 }, { "epoch": 0.7823279706060714, "grad_norm": 0.33857327699661255, "learning_rate": 6.8750094863105936e-06, "loss": 0.0966, "step": 43862 }, { "epoch": 0.7823458067277851, "grad_norm": 0.2506985664367676, "learning_rate": 6.87393747798385e-06, "loss": 0.1054, "step": 43863 }, { "epoch": 0.7823636428494988, "grad_norm": 0.3456091582775116, "learning_rate": 6.8728655399199815e-06, "loss": 0.1244, "step": 43864 }, { "epoch": 0.7823814789712125, "grad_norm": 0.25720977783203125, "learning_rate": 6.8717936721231475e-06, "loss": 0.1144, "step": 43865 }, { "epoch": 0.7823993150929262, "grad_norm": 0.3041876256465912, "learning_rate": 6.870721874597497e-06, "loss": 0.147, "step": 43866 }, { "epoch": 0.7824171512146398, "grad_norm": 0.2692136764526367, "learning_rate": 6.869650147347196e-06, "loss": 0.1315, "step": 43867 }, { "epoch": 0.7824349873363536, "grad_norm": 0.22703337669372559, "learning_rate": 6.868578490376393e-06, "loss": 0.1152, "step": 43868 }, { "epoch": 0.7824528234580673, "grad_norm": 0.318968802690506, "learning_rate": 6.867506903689244e-06, "loss": 0.0974, "step": 43869 }, { "epoch": 0.782470659579781, "grad_norm": 0.251120388507843, "learning_rate": 6.866435387289893e-06, "loss": 0.1455, "step": 43870 }, { "epoch": 0.7824884957014947, "grad_norm": 0.435168594121933, "learning_rate": 6.865363941182507e-06, "loss": 0.1353, "step": 43871 }, { "epoch": 0.7825063318232084, "grad_norm": 0.28813475370407104, "learning_rate": 6.864292565371228e-06, "loss": 0.1158, "step": 43872 }, { "epoch": 0.7825241679449221, "grad_norm": 0.3141791820526123, "learning_rate": 6.863221259860223e-06, "loss": 0.1158, "step": 43873 }, { "epoch": 0.7825420040666358, "grad_norm": 0.2665664255619049, "learning_rate": 6.862150024653635e-06, "loss": 0.081, "step": 43874 }, { "epoch": 0.7825598401883495, "grad_norm": 0.2959728538990021, "learning_rate": 6.861078859755613e-06, "loss": 0.1067, "step": 43875 }, { "epoch": 0.7825776763100631, "grad_norm": 0.4040279984474182, "learning_rate": 6.860007765170318e-06, "loss": 0.0859, "step": 43876 }, { "epoch": 0.7825955124317768, "grad_norm": 0.1971277892589569, "learning_rate": 6.8589367409019e-06, "loss": 0.0733, "step": 43877 }, { "epoch": 0.7826133485534905, "grad_norm": 0.28245672583580017, "learning_rate": 6.857865786954509e-06, "loss": 0.1077, "step": 43878 }, { "epoch": 0.7826311846752042, "grad_norm": 0.2788733243942261, "learning_rate": 6.856794903332292e-06, "loss": 0.081, "step": 43879 }, { "epoch": 0.7826490207969179, "grad_norm": 0.2858203053474426, "learning_rate": 6.855724090039408e-06, "loss": 0.1732, "step": 43880 }, { "epoch": 0.7826668569186316, "grad_norm": 0.30907875299453735, "learning_rate": 6.8546533470800066e-06, "loss": 0.1307, "step": 43881 }, { "epoch": 0.7826846930403453, "grad_norm": 0.2967752516269684, "learning_rate": 6.853582674458234e-06, "loss": 0.1237, "step": 43882 }, { "epoch": 0.782702529162059, "grad_norm": 0.2370687872171402, "learning_rate": 6.852512072178236e-06, "loss": 0.1414, "step": 43883 }, { "epoch": 0.7827203652837726, "grad_norm": 0.23565343022346497, "learning_rate": 6.851441540244175e-06, "loss": 0.0707, "step": 43884 }, { "epoch": 0.7827382014054864, "grad_norm": 0.31024396419525146, "learning_rate": 6.850371078660189e-06, "loss": 0.1524, "step": 43885 }, { "epoch": 0.7827560375272001, "grad_norm": 0.31341469287872314, "learning_rate": 6.84930068743044e-06, "loss": 0.0962, "step": 43886 }, { "epoch": 0.7827738736489138, "grad_norm": 0.30206796526908875, "learning_rate": 6.8482303665590716e-06, "loss": 0.1373, "step": 43887 }, { "epoch": 0.7827917097706275, "grad_norm": 0.3254294693470001, "learning_rate": 6.847160116050225e-06, "loss": 0.1002, "step": 43888 }, { "epoch": 0.7828095458923412, "grad_norm": 0.235929474234581, "learning_rate": 6.8460899359080625e-06, "loss": 0.0718, "step": 43889 }, { "epoch": 0.7828273820140549, "grad_norm": 0.41313448548316956, "learning_rate": 6.845019826136726e-06, "loss": 0.1527, "step": 43890 }, { "epoch": 0.7828452181357686, "grad_norm": 0.28257110714912415, "learning_rate": 6.843949786740362e-06, "loss": 0.0807, "step": 43891 }, { "epoch": 0.7828630542574823, "grad_norm": 0.30631351470947266, "learning_rate": 6.8428798177231104e-06, "loss": 0.1376, "step": 43892 }, { "epoch": 0.782880890379196, "grad_norm": 0.2863081395626068, "learning_rate": 6.841809919089137e-06, "loss": 0.0975, "step": 43893 }, { "epoch": 0.7828987265009096, "grad_norm": 0.257034033536911, "learning_rate": 6.840740090842582e-06, "loss": 0.0913, "step": 43894 }, { "epoch": 0.7829165626226233, "grad_norm": 0.230775848031044, "learning_rate": 6.839670332987588e-06, "loss": 0.1023, "step": 43895 }, { "epoch": 0.782934398744337, "grad_norm": 0.3579098582267761, "learning_rate": 6.8386006455283035e-06, "loss": 0.1564, "step": 43896 }, { "epoch": 0.7829522348660507, "grad_norm": 0.24949531257152557, "learning_rate": 6.837531028468871e-06, "loss": 0.1204, "step": 43897 }, { "epoch": 0.7829700709877644, "grad_norm": 0.24813711643218994, "learning_rate": 6.836461481813447e-06, "loss": 0.1109, "step": 43898 }, { "epoch": 0.7829879071094781, "grad_norm": 0.21334555745124817, "learning_rate": 6.835392005566166e-06, "loss": 0.1135, "step": 43899 }, { "epoch": 0.7830057432311918, "grad_norm": 0.43501394987106323, "learning_rate": 6.834322599731185e-06, "loss": 0.2158, "step": 43900 }, { "epoch": 0.7830235793529055, "grad_norm": 0.2504563331604004, "learning_rate": 6.833253264312637e-06, "loss": 0.1289, "step": 43901 }, { "epoch": 0.7830414154746193, "grad_norm": 0.23282556235790253, "learning_rate": 6.832183999314682e-06, "loss": 0.1426, "step": 43902 }, { "epoch": 0.7830592515963329, "grad_norm": 0.24387699365615845, "learning_rate": 6.831114804741453e-06, "loss": 0.098, "step": 43903 }, { "epoch": 0.7830770877180466, "grad_norm": 0.35548272728919983, "learning_rate": 6.830045680597103e-06, "loss": 0.1442, "step": 43904 }, { "epoch": 0.7830949238397603, "grad_norm": 0.31087082624435425, "learning_rate": 6.828976626885763e-06, "loss": 0.0827, "step": 43905 }, { "epoch": 0.783112759961474, "grad_norm": 0.24772372841835022, "learning_rate": 6.827907643611592e-06, "loss": 0.1621, "step": 43906 }, { "epoch": 0.7831305960831877, "grad_norm": 0.2577557861804962, "learning_rate": 6.826838730778728e-06, "loss": 0.095, "step": 43907 }, { "epoch": 0.7831484322049014, "grad_norm": 0.250766783952713, "learning_rate": 6.825769888391315e-06, "loss": 0.1094, "step": 43908 }, { "epoch": 0.7831662683266151, "grad_norm": 0.28177720308303833, "learning_rate": 6.824701116453494e-06, "loss": 0.063, "step": 43909 }, { "epoch": 0.7831841044483288, "grad_norm": 0.2567859888076782, "learning_rate": 6.823632414969402e-06, "loss": 0.0855, "step": 43910 }, { "epoch": 0.7832019405700424, "grad_norm": 0.30823659896850586, "learning_rate": 6.822563783943195e-06, "loss": 0.1175, "step": 43911 }, { "epoch": 0.7832197766917561, "grad_norm": 0.28615644574165344, "learning_rate": 6.8214952233790015e-06, "loss": 0.1461, "step": 43912 }, { "epoch": 0.7832376128134698, "grad_norm": 0.2536158859729767, "learning_rate": 6.82042673328098e-06, "loss": 0.0681, "step": 43913 }, { "epoch": 0.7832554489351835, "grad_norm": 0.3210618495941162, "learning_rate": 6.8193583136532565e-06, "loss": 0.1579, "step": 43914 }, { "epoch": 0.7832732850568972, "grad_norm": 0.33921271562576294, "learning_rate": 6.818289964499985e-06, "loss": 0.1581, "step": 43915 }, { "epoch": 0.7832911211786109, "grad_norm": 0.20877552032470703, "learning_rate": 6.8172216858253035e-06, "loss": 0.0845, "step": 43916 }, { "epoch": 0.7833089573003246, "grad_norm": 0.2081398218870163, "learning_rate": 6.81615347763335e-06, "loss": 0.0666, "step": 43917 }, { "epoch": 0.7833267934220383, "grad_norm": 0.3053925633430481, "learning_rate": 6.81508533992826e-06, "loss": 0.114, "step": 43918 }, { "epoch": 0.7833446295437521, "grad_norm": 0.2868819534778595, "learning_rate": 6.814017272714185e-06, "loss": 0.1223, "step": 43919 }, { "epoch": 0.7833624656654657, "grad_norm": 0.27690690755844116, "learning_rate": 6.812949275995262e-06, "loss": 0.1664, "step": 43920 }, { "epoch": 0.7833803017871794, "grad_norm": 0.3040023446083069, "learning_rate": 6.811881349775628e-06, "loss": 0.0935, "step": 43921 }, { "epoch": 0.7833981379088931, "grad_norm": 0.29304078221321106, "learning_rate": 6.810813494059423e-06, "loss": 0.1231, "step": 43922 }, { "epoch": 0.7834159740306068, "grad_norm": 0.21517883241176605, "learning_rate": 6.8097457088507815e-06, "loss": 0.1323, "step": 43923 }, { "epoch": 0.7834338101523205, "grad_norm": 0.2635672688484192, "learning_rate": 6.808677994153856e-06, "loss": 0.1296, "step": 43924 }, { "epoch": 0.7834516462740342, "grad_norm": 0.36073535680770874, "learning_rate": 6.807610349972776e-06, "loss": 0.0946, "step": 43925 }, { "epoch": 0.7834694823957479, "grad_norm": 0.2448207139968872, "learning_rate": 6.806542776311678e-06, "loss": 0.0882, "step": 43926 }, { "epoch": 0.7834873185174616, "grad_norm": 0.2364901900291443, "learning_rate": 6.8054752731746996e-06, "loss": 0.0632, "step": 43927 }, { "epoch": 0.7835051546391752, "grad_norm": 0.33054494857788086, "learning_rate": 6.8044078405659934e-06, "loss": 0.1269, "step": 43928 }, { "epoch": 0.7835229907608889, "grad_norm": 0.308354914188385, "learning_rate": 6.803340478489686e-06, "loss": 0.1209, "step": 43929 }, { "epoch": 0.7835408268826026, "grad_norm": 0.28551816940307617, "learning_rate": 6.802273186949914e-06, "loss": 0.0817, "step": 43930 }, { "epoch": 0.7835586630043163, "grad_norm": 0.20581507682800293, "learning_rate": 6.801205965950813e-06, "loss": 0.0769, "step": 43931 }, { "epoch": 0.78357649912603, "grad_norm": 0.32127198576927185, "learning_rate": 6.800138815496529e-06, "loss": 0.1092, "step": 43932 }, { "epoch": 0.7835943352477437, "grad_norm": 0.2638777196407318, "learning_rate": 6.799071735591192e-06, "loss": 0.1252, "step": 43933 }, { "epoch": 0.7836121713694574, "grad_norm": 0.29972749948501587, "learning_rate": 6.798004726238941e-06, "loss": 0.1693, "step": 43934 }, { "epoch": 0.7836300074911711, "grad_norm": 0.20652666687965393, "learning_rate": 6.796937787443908e-06, "loss": 0.0846, "step": 43935 }, { "epoch": 0.7836478436128849, "grad_norm": 0.42658916115760803, "learning_rate": 6.795870919210226e-06, "loss": 0.1372, "step": 43936 }, { "epoch": 0.7836656797345986, "grad_norm": 0.43499237298965454, "learning_rate": 6.794804121542042e-06, "loss": 0.0947, "step": 43937 }, { "epoch": 0.7836835158563122, "grad_norm": 0.2992672026157379, "learning_rate": 6.793737394443486e-06, "loss": 0.0922, "step": 43938 }, { "epoch": 0.7837013519780259, "grad_norm": 0.17826256155967712, "learning_rate": 6.792670737918685e-06, "loss": 0.0666, "step": 43939 }, { "epoch": 0.7837191880997396, "grad_norm": 0.2414674162864685, "learning_rate": 6.791604151971786e-06, "loss": 0.1049, "step": 43940 }, { "epoch": 0.7837370242214533, "grad_norm": 0.24372652173042297, "learning_rate": 6.790537636606914e-06, "loss": 0.0896, "step": 43941 }, { "epoch": 0.783754860343167, "grad_norm": 0.37273260951042175, "learning_rate": 6.789471191828215e-06, "loss": 0.1177, "step": 43942 }, { "epoch": 0.7837726964648807, "grad_norm": 0.2619059681892395, "learning_rate": 6.788404817639812e-06, "loss": 0.1578, "step": 43943 }, { "epoch": 0.7837905325865944, "grad_norm": 0.2871662974357605, "learning_rate": 6.7873385140458355e-06, "loss": 0.1044, "step": 43944 }, { "epoch": 0.7838083687083081, "grad_norm": 0.2847343981266022, "learning_rate": 6.786272281050435e-06, "loss": 0.1309, "step": 43945 }, { "epoch": 0.7838262048300217, "grad_norm": 0.23448815941810608, "learning_rate": 6.785206118657733e-06, "loss": 0.1646, "step": 43946 }, { "epoch": 0.7838440409517354, "grad_norm": 0.26971331238746643, "learning_rate": 6.784140026871863e-06, "loss": 0.1352, "step": 43947 }, { "epoch": 0.7838618770734491, "grad_norm": 0.3233478367328644, "learning_rate": 6.783074005696957e-06, "loss": 0.1504, "step": 43948 }, { "epoch": 0.7838797131951628, "grad_norm": 0.24761062860488892, "learning_rate": 6.78200805513714e-06, "loss": 0.099, "step": 43949 }, { "epoch": 0.7838975493168765, "grad_norm": 0.29679733514785767, "learning_rate": 6.7809421751965616e-06, "loss": 0.141, "step": 43950 }, { "epoch": 0.7839153854385902, "grad_norm": 0.4595220983028412, "learning_rate": 6.779876365879342e-06, "loss": 0.1813, "step": 43951 }, { "epoch": 0.7839332215603039, "grad_norm": 0.47264882922172546, "learning_rate": 6.778810627189616e-06, "loss": 0.1052, "step": 43952 }, { "epoch": 0.7839510576820177, "grad_norm": 0.19188813865184784, "learning_rate": 6.7777449591315074e-06, "loss": 0.0832, "step": 43953 }, { "epoch": 0.7839688938037314, "grad_norm": 0.26854994893074036, "learning_rate": 6.776679361709151e-06, "loss": 0.1306, "step": 43954 }, { "epoch": 0.783986729925445, "grad_norm": 0.23165906965732574, "learning_rate": 6.775613834926686e-06, "loss": 0.1024, "step": 43955 }, { "epoch": 0.7840045660471587, "grad_norm": 0.24162918329238892, "learning_rate": 6.77454837878824e-06, "loss": 0.1252, "step": 43956 }, { "epoch": 0.7840224021688724, "grad_norm": 0.18442334234714508, "learning_rate": 6.773482993297928e-06, "loss": 0.0696, "step": 43957 }, { "epoch": 0.7840402382905861, "grad_norm": 0.3295544981956482, "learning_rate": 6.772417678459902e-06, "loss": 0.1308, "step": 43958 }, { "epoch": 0.7840580744122998, "grad_norm": 0.32126617431640625, "learning_rate": 6.7713524342782776e-06, "loss": 0.1189, "step": 43959 }, { "epoch": 0.7840759105340135, "grad_norm": 0.2620071470737457, "learning_rate": 6.770287260757188e-06, "loss": 0.0904, "step": 43960 }, { "epoch": 0.7840937466557272, "grad_norm": 0.21310366690158844, "learning_rate": 6.769222157900762e-06, "loss": 0.0881, "step": 43961 }, { "epoch": 0.7841115827774409, "grad_norm": 0.3438452482223511, "learning_rate": 6.7681571257131205e-06, "loss": 0.1422, "step": 43962 }, { "epoch": 0.7841294188991546, "grad_norm": 0.24785585701465607, "learning_rate": 6.767092164198407e-06, "loss": 0.113, "step": 43963 }, { "epoch": 0.7841472550208682, "grad_norm": 0.3359091281890869, "learning_rate": 6.76602727336074e-06, "loss": 0.1273, "step": 43964 }, { "epoch": 0.7841650911425819, "grad_norm": 0.30333253741264343, "learning_rate": 6.764962453204249e-06, "loss": 0.1639, "step": 43965 }, { "epoch": 0.7841829272642956, "grad_norm": 0.31708574295043945, "learning_rate": 6.763897703733055e-06, "loss": 0.1451, "step": 43966 }, { "epoch": 0.7842007633860093, "grad_norm": 0.3266368508338928, "learning_rate": 6.762833024951301e-06, "loss": 0.1222, "step": 43967 }, { "epoch": 0.784218599507723, "grad_norm": 0.25479280948638916, "learning_rate": 6.761768416863096e-06, "loss": 0.1451, "step": 43968 }, { "epoch": 0.7842364356294368, "grad_norm": 0.2788829803466797, "learning_rate": 6.760703879472582e-06, "loss": 0.1289, "step": 43969 }, { "epoch": 0.7842542717511505, "grad_norm": 0.2246149480342865, "learning_rate": 6.759639412783875e-06, "loss": 0.131, "step": 43970 }, { "epoch": 0.7842721078728642, "grad_norm": 0.2637142539024353, "learning_rate": 6.758575016801111e-06, "loss": 0.1328, "step": 43971 }, { "epoch": 0.7842899439945779, "grad_norm": 0.27741870284080505, "learning_rate": 6.757510691528412e-06, "loss": 0.1065, "step": 43972 }, { "epoch": 0.7843077801162915, "grad_norm": 0.2824453115463257, "learning_rate": 6.7564464369699015e-06, "loss": 0.1518, "step": 43973 }, { "epoch": 0.7843256162380052, "grad_norm": 0.23903805017471313, "learning_rate": 6.755382253129705e-06, "loss": 0.1283, "step": 43974 }, { "epoch": 0.7843434523597189, "grad_norm": 0.33735576272010803, "learning_rate": 6.754318140011942e-06, "loss": 0.0976, "step": 43975 }, { "epoch": 0.7843612884814326, "grad_norm": 0.20487698912620544, "learning_rate": 6.753254097620751e-06, "loss": 0.0272, "step": 43976 }, { "epoch": 0.7843791246031463, "grad_norm": 0.31918108463287354, "learning_rate": 6.752190125960251e-06, "loss": 0.1709, "step": 43977 }, { "epoch": 0.78439696072486, "grad_norm": 0.31330522894859314, "learning_rate": 6.7511262250345625e-06, "loss": 0.1598, "step": 43978 }, { "epoch": 0.7844147968465737, "grad_norm": 0.4117167592048645, "learning_rate": 6.7500623948478035e-06, "loss": 0.1416, "step": 43979 }, { "epoch": 0.7844326329682874, "grad_norm": 0.2615671455860138, "learning_rate": 6.748998635404114e-06, "loss": 0.1172, "step": 43980 }, { "epoch": 0.784450469090001, "grad_norm": 0.2784390449523926, "learning_rate": 6.747934946707604e-06, "loss": 0.1363, "step": 43981 }, { "epoch": 0.7844683052117147, "grad_norm": 0.2899610102176666, "learning_rate": 6.746871328762408e-06, "loss": 0.0806, "step": 43982 }, { "epoch": 0.7844861413334284, "grad_norm": 0.23531195521354675, "learning_rate": 6.745807781572636e-06, "loss": 0.1121, "step": 43983 }, { "epoch": 0.7845039774551421, "grad_norm": 0.27561086416244507, "learning_rate": 6.744744305142425e-06, "loss": 0.1054, "step": 43984 }, { "epoch": 0.7845218135768558, "grad_norm": 0.3087584376335144, "learning_rate": 6.743680899475891e-06, "loss": 0.1488, "step": 43985 }, { "epoch": 0.7845396496985696, "grad_norm": 0.23982226848602295, "learning_rate": 6.742617564577152e-06, "loss": 0.0784, "step": 43986 }, { "epoch": 0.7845574858202833, "grad_norm": 0.26415035128593445, "learning_rate": 6.741554300450339e-06, "loss": 0.1149, "step": 43987 }, { "epoch": 0.784575321941997, "grad_norm": 0.2885240614414215, "learning_rate": 6.740491107099556e-06, "loss": 0.1686, "step": 43988 }, { "epoch": 0.7845931580637107, "grad_norm": 0.2962561249732971, "learning_rate": 6.739427984528945e-06, "loss": 0.1542, "step": 43989 }, { "epoch": 0.7846109941854243, "grad_norm": 0.5219005942344666, "learning_rate": 6.738364932742619e-06, "loss": 0.2019, "step": 43990 }, { "epoch": 0.784628830307138, "grad_norm": 0.30370572209358215, "learning_rate": 6.737301951744696e-06, "loss": 0.1721, "step": 43991 }, { "epoch": 0.7846466664288517, "grad_norm": 0.2811250388622284, "learning_rate": 6.736239041539294e-06, "loss": 0.1278, "step": 43992 }, { "epoch": 0.7846645025505654, "grad_norm": 0.2856088876724243, "learning_rate": 6.7351762021305415e-06, "loss": 0.0667, "step": 43993 }, { "epoch": 0.7846823386722791, "grad_norm": 0.26672953367233276, "learning_rate": 6.734113433522557e-06, "loss": 0.1022, "step": 43994 }, { "epoch": 0.7847001747939928, "grad_norm": 0.2474367469549179, "learning_rate": 6.733050735719448e-06, "loss": 0.1071, "step": 43995 }, { "epoch": 0.7847180109157065, "grad_norm": 0.3611794710159302, "learning_rate": 6.731988108725354e-06, "loss": 0.0933, "step": 43996 }, { "epoch": 0.7847358470374202, "grad_norm": 0.24332818388938904, "learning_rate": 6.730925552544373e-06, "loss": 0.1158, "step": 43997 }, { "epoch": 0.7847536831591339, "grad_norm": 0.27692005038261414, "learning_rate": 6.729863067180645e-06, "loss": 0.1256, "step": 43998 }, { "epoch": 0.7847715192808475, "grad_norm": 0.40714526176452637, "learning_rate": 6.728800652638276e-06, "loss": 0.1782, "step": 43999 }, { "epoch": 0.7847893554025612, "grad_norm": 0.3263236880302429, "learning_rate": 6.727738308921386e-06, "loss": 0.0926, "step": 44000 }, { "epoch": 0.7847893554025612, "eval_loss": 0.11120539903640747, "eval_runtime": 108.1296, "eval_samples_per_second": 9.47, "eval_steps_per_second": 1.581, "step": 44000 }, { "epoch": 0.7848071915242749, "grad_norm": 0.18879170715808868, "learning_rate": 6.726676036034086e-06, "loss": 0.072, "step": 44001 }, { "epoch": 0.7848250276459886, "grad_norm": 0.25284790992736816, "learning_rate": 6.725613833980509e-06, "loss": 0.1251, "step": 44002 }, { "epoch": 0.7848428637677024, "grad_norm": 0.22584082186222076, "learning_rate": 6.724551702764764e-06, "loss": 0.0748, "step": 44003 }, { "epoch": 0.7848606998894161, "grad_norm": 0.2852792739868164, "learning_rate": 6.7234896423909705e-06, "loss": 0.1283, "step": 44004 }, { "epoch": 0.7848785360111298, "grad_norm": 0.2150062769651413, "learning_rate": 6.722427652863236e-06, "loss": 0.0903, "step": 44005 }, { "epoch": 0.7848963721328435, "grad_norm": 0.26435598731040955, "learning_rate": 6.721365734185692e-06, "loss": 0.0964, "step": 44006 }, { "epoch": 0.7849142082545572, "grad_norm": 0.3176110088825226, "learning_rate": 6.720303886362444e-06, "loss": 0.142, "step": 44007 }, { "epoch": 0.7849320443762708, "grad_norm": 0.28956207633018494, "learning_rate": 6.719242109397617e-06, "loss": 0.0835, "step": 44008 }, { "epoch": 0.7849498804979845, "grad_norm": 0.2351217269897461, "learning_rate": 6.71818040329531e-06, "loss": 0.0728, "step": 44009 }, { "epoch": 0.7849677166196982, "grad_norm": 0.22506709396839142, "learning_rate": 6.717118768059652e-06, "loss": 0.1348, "step": 44010 }, { "epoch": 0.7849855527414119, "grad_norm": 0.3307000696659088, "learning_rate": 6.716057203694764e-06, "loss": 0.1138, "step": 44011 }, { "epoch": 0.7850033888631256, "grad_norm": 0.25659558176994324, "learning_rate": 6.714995710204755e-06, "loss": 0.092, "step": 44012 }, { "epoch": 0.7850212249848393, "grad_norm": 0.2781889736652374, "learning_rate": 6.713934287593734e-06, "loss": 0.0997, "step": 44013 }, { "epoch": 0.785039061106553, "grad_norm": 0.22953982651233673, "learning_rate": 6.712872935865816e-06, "loss": 0.12, "step": 44014 }, { "epoch": 0.7850568972282667, "grad_norm": 0.22984986007213593, "learning_rate": 6.711811655025124e-06, "loss": 0.1173, "step": 44015 }, { "epoch": 0.7850747333499803, "grad_norm": 0.19141502678394318, "learning_rate": 6.710750445075767e-06, "loss": 0.0783, "step": 44016 }, { "epoch": 0.785092569471694, "grad_norm": 0.24057647585868835, "learning_rate": 6.709689306021857e-06, "loss": 0.0946, "step": 44017 }, { "epoch": 0.7851104055934077, "grad_norm": 0.25510695576667786, "learning_rate": 6.7086282378675035e-06, "loss": 0.13, "step": 44018 }, { "epoch": 0.7851282417151214, "grad_norm": 0.1844690442085266, "learning_rate": 6.70756724061683e-06, "loss": 0.0952, "step": 44019 }, { "epoch": 0.7851460778368352, "grad_norm": 0.2828642725944519, "learning_rate": 6.706506314273944e-06, "loss": 0.1141, "step": 44020 }, { "epoch": 0.7851639139585489, "grad_norm": 0.3131805658340454, "learning_rate": 6.70544545884296e-06, "loss": 0.1497, "step": 44021 }, { "epoch": 0.7851817500802626, "grad_norm": 0.2940351068973541, "learning_rate": 6.7043846743279794e-06, "loss": 0.1044, "step": 44022 }, { "epoch": 0.7851995862019763, "grad_norm": 0.23246759176254272, "learning_rate": 6.703323960733129e-06, "loss": 0.0636, "step": 44023 }, { "epoch": 0.78521742232369, "grad_norm": 0.2708224952220917, "learning_rate": 6.70226331806251e-06, "loss": 0.1226, "step": 44024 }, { "epoch": 0.7852352584454036, "grad_norm": 0.30450719594955444, "learning_rate": 6.701202746320243e-06, "loss": 0.1517, "step": 44025 }, { "epoch": 0.7852530945671173, "grad_norm": 0.3627087473869324, "learning_rate": 6.700142245510435e-06, "loss": 0.1171, "step": 44026 }, { "epoch": 0.785270930688831, "grad_norm": 0.526991069316864, "learning_rate": 6.699081815637187e-06, "loss": 0.1053, "step": 44027 }, { "epoch": 0.7852887668105447, "grad_norm": 0.33565235137939453, "learning_rate": 6.698021456704628e-06, "loss": 0.1473, "step": 44028 }, { "epoch": 0.7853066029322584, "grad_norm": 0.4455755650997162, "learning_rate": 6.696961168716862e-06, "loss": 0.177, "step": 44029 }, { "epoch": 0.7853244390539721, "grad_norm": 0.3626958131790161, "learning_rate": 6.695900951677992e-06, "loss": 0.1453, "step": 44030 }, { "epoch": 0.7853422751756858, "grad_norm": 0.29024064540863037, "learning_rate": 6.6948408055921255e-06, "loss": 0.1466, "step": 44031 }, { "epoch": 0.7853601112973995, "grad_norm": 0.23067434132099152, "learning_rate": 6.693780730463387e-06, "loss": 0.094, "step": 44032 }, { "epoch": 0.7853779474191132, "grad_norm": 0.2677970826625824, "learning_rate": 6.692720726295876e-06, "loss": 0.1596, "step": 44033 }, { "epoch": 0.7853957835408268, "grad_norm": 0.2243032306432724, "learning_rate": 6.691660793093704e-06, "loss": 0.129, "step": 44034 }, { "epoch": 0.7854136196625405, "grad_norm": 0.22577647864818573, "learning_rate": 6.690600930860969e-06, "loss": 0.1229, "step": 44035 }, { "epoch": 0.7854314557842542, "grad_norm": 0.24724464118480682, "learning_rate": 6.689541139601799e-06, "loss": 0.156, "step": 44036 }, { "epoch": 0.785449291905968, "grad_norm": 0.2320672869682312, "learning_rate": 6.68848141932028e-06, "loss": 0.0798, "step": 44037 }, { "epoch": 0.7854671280276817, "grad_norm": 0.3871418237686157, "learning_rate": 6.687421770020541e-06, "loss": 0.0982, "step": 44038 }, { "epoch": 0.7854849641493954, "grad_norm": 0.24611538648605347, "learning_rate": 6.686362191706683e-06, "loss": 0.0809, "step": 44039 }, { "epoch": 0.7855028002711091, "grad_norm": 0.1619541347026825, "learning_rate": 6.685302684382799e-06, "loss": 0.0761, "step": 44040 }, { "epoch": 0.7855206363928228, "grad_norm": 0.2616454064846039, "learning_rate": 6.684243248053018e-06, "loss": 0.1387, "step": 44041 }, { "epoch": 0.7855384725145365, "grad_norm": 0.2392408400774002, "learning_rate": 6.683183882721434e-06, "loss": 0.1228, "step": 44042 }, { "epoch": 0.7855563086362501, "grad_norm": 0.3478841781616211, "learning_rate": 6.682124588392155e-06, "loss": 0.1072, "step": 44043 }, { "epoch": 0.7855741447579638, "grad_norm": 0.29405397176742554, "learning_rate": 6.681065365069283e-06, "loss": 0.0961, "step": 44044 }, { "epoch": 0.7855919808796775, "grad_norm": 0.2923218011856079, "learning_rate": 6.680006212756934e-06, "loss": 0.1052, "step": 44045 }, { "epoch": 0.7856098170013912, "grad_norm": 0.3023574948310852, "learning_rate": 6.678947131459213e-06, "loss": 0.092, "step": 44046 }, { "epoch": 0.7856276531231049, "grad_norm": 0.23581300675868988, "learning_rate": 6.677888121180214e-06, "loss": 0.1415, "step": 44047 }, { "epoch": 0.7856454892448186, "grad_norm": 0.2426312118768692, "learning_rate": 6.676829181924046e-06, "loss": 0.097, "step": 44048 }, { "epoch": 0.7856633253665323, "grad_norm": 0.26105406880378723, "learning_rate": 6.6757703136948235e-06, "loss": 0.1386, "step": 44049 }, { "epoch": 0.785681161488246, "grad_norm": 0.28056710958480835, "learning_rate": 6.674711516496643e-06, "loss": 0.1255, "step": 44050 }, { "epoch": 0.7856989976099596, "grad_norm": 0.33771297335624695, "learning_rate": 6.673652790333604e-06, "loss": 0.1507, "step": 44051 }, { "epoch": 0.7857168337316733, "grad_norm": 0.29584062099456787, "learning_rate": 6.672594135209823e-06, "loss": 0.1335, "step": 44052 }, { "epoch": 0.785734669853387, "grad_norm": 0.26861950755119324, "learning_rate": 6.6715355511293905e-06, "loss": 0.1004, "step": 44053 }, { "epoch": 0.7857525059751008, "grad_norm": 0.47461599111557007, "learning_rate": 6.6704770380964235e-06, "loss": 0.1927, "step": 44054 }, { "epoch": 0.7857703420968145, "grad_norm": 0.22906163334846497, "learning_rate": 6.669418596115018e-06, "loss": 0.1195, "step": 44055 }, { "epoch": 0.7857881782185282, "grad_norm": 0.3668989837169647, "learning_rate": 6.668360225189277e-06, "loss": 0.099, "step": 44056 }, { "epoch": 0.7858060143402419, "grad_norm": 0.2638819217681885, "learning_rate": 6.667301925323296e-06, "loss": 0.1338, "step": 44057 }, { "epoch": 0.7858238504619556, "grad_norm": 0.22786612808704376, "learning_rate": 6.666243696521194e-06, "loss": 0.1199, "step": 44058 }, { "epoch": 0.7858416865836693, "grad_norm": 0.303611159324646, "learning_rate": 6.665185538787061e-06, "loss": 0.1326, "step": 44059 }, { "epoch": 0.785859522705383, "grad_norm": 0.293671578168869, "learning_rate": 6.664127452125005e-06, "loss": 0.1643, "step": 44060 }, { "epoch": 0.7858773588270966, "grad_norm": 0.2638128995895386, "learning_rate": 6.663069436539113e-06, "loss": 0.1016, "step": 44061 }, { "epoch": 0.7858951949488103, "grad_norm": 0.3524613082408905, "learning_rate": 6.662011492033507e-06, "loss": 0.1341, "step": 44062 }, { "epoch": 0.785913031070524, "grad_norm": 0.16846193373203278, "learning_rate": 6.660953618612276e-06, "loss": 0.0622, "step": 44063 }, { "epoch": 0.7859308671922377, "grad_norm": 0.2662205696105957, "learning_rate": 6.659895816279518e-06, "loss": 0.0639, "step": 44064 }, { "epoch": 0.7859487033139514, "grad_norm": 0.2711627185344696, "learning_rate": 6.658838085039343e-06, "loss": 0.1012, "step": 44065 }, { "epoch": 0.7859665394356651, "grad_norm": 0.30686184763908386, "learning_rate": 6.65778042489584e-06, "loss": 0.0938, "step": 44066 }, { "epoch": 0.7859843755573788, "grad_norm": 0.22379377484321594, "learning_rate": 6.656722835853124e-06, "loss": 0.0838, "step": 44067 }, { "epoch": 0.7860022116790925, "grad_norm": 0.28610849380493164, "learning_rate": 6.655665317915286e-06, "loss": 0.0515, "step": 44068 }, { "epoch": 0.7860200478008061, "grad_norm": 0.2630413770675659, "learning_rate": 6.654607871086424e-06, "loss": 0.1497, "step": 44069 }, { "epoch": 0.7860378839225199, "grad_norm": 0.22608932852745056, "learning_rate": 6.653550495370631e-06, "loss": 0.1221, "step": 44070 }, { "epoch": 0.7860557200442336, "grad_norm": 0.29861024022102356, "learning_rate": 6.652493190772019e-06, "loss": 0.1484, "step": 44071 }, { "epoch": 0.7860735561659473, "grad_norm": 0.2642778158187866, "learning_rate": 6.651435957294683e-06, "loss": 0.1031, "step": 44072 }, { "epoch": 0.786091392287661, "grad_norm": 0.24304579198360443, "learning_rate": 6.650378794942716e-06, "loss": 0.1514, "step": 44073 }, { "epoch": 0.7861092284093747, "grad_norm": 0.20899224281311035, "learning_rate": 6.649321703720212e-06, "loss": 0.0754, "step": 44074 }, { "epoch": 0.7861270645310884, "grad_norm": 0.443224161863327, "learning_rate": 6.648264683631284e-06, "loss": 0.1127, "step": 44075 }, { "epoch": 0.7861449006528021, "grad_norm": 0.2710306644439697, "learning_rate": 6.647207734680019e-06, "loss": 0.1265, "step": 44076 }, { "epoch": 0.7861627367745158, "grad_norm": 0.2565361559391022, "learning_rate": 6.646150856870517e-06, "loss": 0.1612, "step": 44077 }, { "epoch": 0.7861805728962294, "grad_norm": 0.2519514858722687, "learning_rate": 6.645094050206865e-06, "loss": 0.114, "step": 44078 }, { "epoch": 0.7861984090179431, "grad_norm": 0.2312752604484558, "learning_rate": 6.644037314693174e-06, "loss": 0.0598, "step": 44079 }, { "epoch": 0.7862162451396568, "grad_norm": 0.3019639253616333, "learning_rate": 6.64298065033353e-06, "loss": 0.1264, "step": 44080 }, { "epoch": 0.7862340812613705, "grad_norm": 0.18915559351444244, "learning_rate": 6.64192405713204e-06, "loss": 0.0664, "step": 44081 }, { "epoch": 0.7862519173830842, "grad_norm": 0.5290731191635132, "learning_rate": 6.640867535092793e-06, "loss": 0.1023, "step": 44082 }, { "epoch": 0.7862697535047979, "grad_norm": 0.2455829232931137, "learning_rate": 6.6398110842198765e-06, "loss": 0.17, "step": 44083 }, { "epoch": 0.7862875896265116, "grad_norm": 0.2832101881504059, "learning_rate": 6.6387547045174e-06, "loss": 0.0911, "step": 44084 }, { "epoch": 0.7863054257482253, "grad_norm": 0.23851561546325684, "learning_rate": 6.637698395989453e-06, "loss": 0.0893, "step": 44085 }, { "epoch": 0.786323261869939, "grad_norm": 0.2527659833431244, "learning_rate": 6.63664215864013e-06, "loss": 0.0606, "step": 44086 }, { "epoch": 0.7863410979916527, "grad_norm": 0.2977469861507416, "learning_rate": 6.635585992473517e-06, "loss": 0.1266, "step": 44087 }, { "epoch": 0.7863589341133664, "grad_norm": 0.26551225781440735, "learning_rate": 6.634529897493721e-06, "loss": 0.0934, "step": 44088 }, { "epoch": 0.7863767702350801, "grad_norm": 0.3453245460987091, "learning_rate": 6.633473873704832e-06, "loss": 0.1202, "step": 44089 }, { "epoch": 0.7863946063567938, "grad_norm": 0.24251382052898407, "learning_rate": 6.632417921110942e-06, "loss": 0.0762, "step": 44090 }, { "epoch": 0.7864124424785075, "grad_norm": 0.2546616494655609, "learning_rate": 6.631362039716136e-06, "loss": 0.0995, "step": 44091 }, { "epoch": 0.7864302786002212, "grad_norm": 0.26139745116233826, "learning_rate": 6.630306229524521e-06, "loss": 0.1043, "step": 44092 }, { "epoch": 0.7864481147219349, "grad_norm": 0.29433515667915344, "learning_rate": 6.629250490540178e-06, "loss": 0.0932, "step": 44093 }, { "epoch": 0.7864659508436486, "grad_norm": 0.21292799711227417, "learning_rate": 6.628194822767214e-06, "loss": 0.1255, "step": 44094 }, { "epoch": 0.7864837869653623, "grad_norm": 0.29793331027030945, "learning_rate": 6.627139226209711e-06, "loss": 0.1582, "step": 44095 }, { "epoch": 0.7865016230870759, "grad_norm": 0.26597556471824646, "learning_rate": 6.626083700871755e-06, "loss": 0.1383, "step": 44096 }, { "epoch": 0.7865194592087896, "grad_norm": 0.22141043841838837, "learning_rate": 6.62502824675745e-06, "loss": 0.1089, "step": 44097 }, { "epoch": 0.7865372953305033, "grad_norm": 0.2063092589378357, "learning_rate": 6.623972863870884e-06, "loss": 0.092, "step": 44098 }, { "epoch": 0.786555131452217, "grad_norm": 0.24598990380764008, "learning_rate": 6.622917552216146e-06, "loss": 0.1184, "step": 44099 }, { "epoch": 0.7865729675739307, "grad_norm": 0.26491016149520874, "learning_rate": 6.621862311797319e-06, "loss": 0.1118, "step": 44100 }, { "epoch": 0.7865908036956444, "grad_norm": 0.26054850220680237, "learning_rate": 6.6208071426185095e-06, "loss": 0.1312, "step": 44101 }, { "epoch": 0.7866086398173581, "grad_norm": 0.22832149267196655, "learning_rate": 6.619752044683799e-06, "loss": 0.1279, "step": 44102 }, { "epoch": 0.7866264759390718, "grad_norm": 0.2645712196826935, "learning_rate": 6.6186970179972775e-06, "loss": 0.162, "step": 44103 }, { "epoch": 0.7866443120607856, "grad_norm": 0.32220274209976196, "learning_rate": 6.617642062563034e-06, "loss": 0.1027, "step": 44104 }, { "epoch": 0.7866621481824992, "grad_norm": 0.2885728180408478, "learning_rate": 6.616587178385153e-06, "loss": 0.093, "step": 44105 }, { "epoch": 0.7866799843042129, "grad_norm": 0.2289877086877823, "learning_rate": 6.615532365467738e-06, "loss": 0.0963, "step": 44106 }, { "epoch": 0.7866978204259266, "grad_norm": 0.314732164144516, "learning_rate": 6.614477623814861e-06, "loss": 0.1155, "step": 44107 }, { "epoch": 0.7867156565476403, "grad_norm": 0.29838791489601135, "learning_rate": 6.613422953430625e-06, "loss": 0.2084, "step": 44108 }, { "epoch": 0.786733492669354, "grad_norm": 0.26508355140686035, "learning_rate": 6.612368354319107e-06, "loss": 0.1286, "step": 44109 }, { "epoch": 0.7867513287910677, "grad_norm": 0.2413455992937088, "learning_rate": 6.611313826484405e-06, "loss": 0.1452, "step": 44110 }, { "epoch": 0.7867691649127814, "grad_norm": 0.29571884870529175, "learning_rate": 6.610259369930605e-06, "loss": 0.1165, "step": 44111 }, { "epoch": 0.7867870010344951, "grad_norm": 0.3609950840473175, "learning_rate": 6.609204984661787e-06, "loss": 0.1727, "step": 44112 }, { "epoch": 0.7868048371562087, "grad_norm": 0.3429317772388458, "learning_rate": 6.60815067068204e-06, "loss": 0.0949, "step": 44113 }, { "epoch": 0.7868226732779224, "grad_norm": 0.16995365917682648, "learning_rate": 6.607096427995457e-06, "loss": 0.1056, "step": 44114 }, { "epoch": 0.7868405093996361, "grad_norm": 0.2643730938434601, "learning_rate": 6.606042256606121e-06, "loss": 0.1085, "step": 44115 }, { "epoch": 0.7868583455213498, "grad_norm": 0.3374778628349304, "learning_rate": 6.604988156518122e-06, "loss": 0.1198, "step": 44116 }, { "epoch": 0.7868761816430635, "grad_norm": 0.24649089574813843, "learning_rate": 6.603934127735539e-06, "loss": 0.1162, "step": 44117 }, { "epoch": 0.7868940177647772, "grad_norm": 0.2864466905593872, "learning_rate": 6.602880170262455e-06, "loss": 0.1448, "step": 44118 }, { "epoch": 0.7869118538864909, "grad_norm": 0.2783842980861664, "learning_rate": 6.601826284102966e-06, "loss": 0.0748, "step": 44119 }, { "epoch": 0.7869296900082046, "grad_norm": 0.28984659910202026, "learning_rate": 6.600772469261149e-06, "loss": 0.1792, "step": 44120 }, { "epoch": 0.7869475261299184, "grad_norm": 0.26915243268013, "learning_rate": 6.599718725741099e-06, "loss": 0.1742, "step": 44121 }, { "epoch": 0.786965362251632, "grad_norm": 0.2268848568201065, "learning_rate": 6.598665053546888e-06, "loss": 0.1124, "step": 44122 }, { "epoch": 0.7869831983733457, "grad_norm": 0.25053054094314575, "learning_rate": 6.597611452682614e-06, "loss": 0.1393, "step": 44123 }, { "epoch": 0.7870010344950594, "grad_norm": 0.25598159432411194, "learning_rate": 6.596557923152352e-06, "loss": 0.0828, "step": 44124 }, { "epoch": 0.7870188706167731, "grad_norm": 0.2450539916753769, "learning_rate": 6.59550446496019e-06, "loss": 0.1358, "step": 44125 }, { "epoch": 0.7870367067384868, "grad_norm": 0.27337446808815, "learning_rate": 6.594451078110201e-06, "loss": 0.0911, "step": 44126 }, { "epoch": 0.7870545428602005, "grad_norm": 0.40898844599723816, "learning_rate": 6.5933977626064845e-06, "loss": 0.2003, "step": 44127 }, { "epoch": 0.7870723789819142, "grad_norm": 0.3594771921634674, "learning_rate": 6.592344518453117e-06, "loss": 0.0781, "step": 44128 }, { "epoch": 0.7870902151036279, "grad_norm": 0.20938251912593842, "learning_rate": 6.591291345654177e-06, "loss": 0.1095, "step": 44129 }, { "epoch": 0.7871080512253416, "grad_norm": 0.4531751871109009, "learning_rate": 6.590238244213753e-06, "loss": 0.1569, "step": 44130 }, { "epoch": 0.7871258873470552, "grad_norm": 0.2849150002002716, "learning_rate": 6.589185214135915e-06, "loss": 0.1223, "step": 44131 }, { "epoch": 0.7871437234687689, "grad_norm": 0.4003971815109253, "learning_rate": 6.588132255424762e-06, "loss": 0.0958, "step": 44132 }, { "epoch": 0.7871615595904826, "grad_norm": 0.2347901463508606, "learning_rate": 6.5870793680843676e-06, "loss": 0.0801, "step": 44133 }, { "epoch": 0.7871793957121963, "grad_norm": 0.30664703249931335, "learning_rate": 6.586026552118804e-06, "loss": 0.1286, "step": 44134 }, { "epoch": 0.78719723183391, "grad_norm": 0.326532244682312, "learning_rate": 6.584973807532163e-06, "loss": 0.1538, "step": 44135 }, { "epoch": 0.7872150679556237, "grad_norm": 0.3017213046550751, "learning_rate": 6.58392113432853e-06, "loss": 0.1564, "step": 44136 }, { "epoch": 0.7872329040773374, "grad_norm": 0.2550777196884155, "learning_rate": 6.582868532511982e-06, "loss": 0.0493, "step": 44137 }, { "epoch": 0.7872507401990512, "grad_norm": 0.25276365876197815, "learning_rate": 6.5818160020865935e-06, "loss": 0.1335, "step": 44138 }, { "epoch": 0.7872685763207649, "grad_norm": 0.2225196659564972, "learning_rate": 6.580763543056442e-06, "loss": 0.1137, "step": 44139 }, { "epoch": 0.7872864124424785, "grad_norm": 0.24990713596343994, "learning_rate": 6.579711155425622e-06, "loss": 0.0981, "step": 44140 }, { "epoch": 0.7873042485641922, "grad_norm": 0.23552751541137695, "learning_rate": 6.578658839198201e-06, "loss": 0.127, "step": 44141 }, { "epoch": 0.7873220846859059, "grad_norm": 0.27550774812698364, "learning_rate": 6.577606594378261e-06, "loss": 0.1125, "step": 44142 }, { "epoch": 0.7873399208076196, "grad_norm": 0.3579949140548706, "learning_rate": 6.5765544209698825e-06, "loss": 0.1141, "step": 44143 }, { "epoch": 0.7873577569293333, "grad_norm": 0.25921282172203064, "learning_rate": 6.575502318977134e-06, "loss": 0.0946, "step": 44144 }, { "epoch": 0.787375593051047, "grad_norm": 0.3165900409221649, "learning_rate": 6.5744502884041105e-06, "loss": 0.1219, "step": 44145 }, { "epoch": 0.7873934291727607, "grad_norm": 0.20930802822113037, "learning_rate": 6.573398329254879e-06, "loss": 0.0919, "step": 44146 }, { "epoch": 0.7874112652944744, "grad_norm": 0.2014750987291336, "learning_rate": 6.572346441533514e-06, "loss": 0.066, "step": 44147 }, { "epoch": 0.787429101416188, "grad_norm": 0.2759567201137543, "learning_rate": 6.571294625244107e-06, "loss": 0.1018, "step": 44148 }, { "epoch": 0.7874469375379017, "grad_norm": 0.22385810315608978, "learning_rate": 6.5702428803907176e-06, "loss": 0.1042, "step": 44149 }, { "epoch": 0.7874647736596154, "grad_norm": 0.20451262593269348, "learning_rate": 6.569191206977443e-06, "loss": 0.0971, "step": 44150 }, { "epoch": 0.7874826097813291, "grad_norm": 0.30356380343437195, "learning_rate": 6.568139605008347e-06, "loss": 0.0889, "step": 44151 }, { "epoch": 0.7875004459030428, "grad_norm": 0.21762573719024658, "learning_rate": 6.567088074487499e-06, "loss": 0.0701, "step": 44152 }, { "epoch": 0.7875182820247565, "grad_norm": 0.32316359877586365, "learning_rate": 6.566036615418994e-06, "loss": 0.1323, "step": 44153 }, { "epoch": 0.7875361181464702, "grad_norm": 0.20996029675006866, "learning_rate": 6.564985227806897e-06, "loss": 0.0836, "step": 44154 }, { "epoch": 0.787553954268184, "grad_norm": 0.28935426473617554, "learning_rate": 6.563933911655285e-06, "loss": 0.089, "step": 44155 }, { "epoch": 0.7875717903898977, "grad_norm": 0.26105326414108276, "learning_rate": 6.562882666968234e-06, "loss": 0.0968, "step": 44156 }, { "epoch": 0.7875896265116114, "grad_norm": 0.2796895503997803, "learning_rate": 6.561831493749809e-06, "loss": 0.0987, "step": 44157 }, { "epoch": 0.787607462633325, "grad_norm": 0.36162832379341125, "learning_rate": 6.560780392004101e-06, "loss": 0.0869, "step": 44158 }, { "epoch": 0.7876252987550387, "grad_norm": 0.32262855768203735, "learning_rate": 6.559729361735176e-06, "loss": 0.1833, "step": 44159 }, { "epoch": 0.7876431348767524, "grad_norm": 0.25811102986335754, "learning_rate": 6.5586784029471105e-06, "loss": 0.1136, "step": 44160 }, { "epoch": 0.7876609709984661, "grad_norm": 0.2625448703765869, "learning_rate": 6.557627515643968e-06, "loss": 0.0869, "step": 44161 }, { "epoch": 0.7876788071201798, "grad_norm": 0.2576397657394409, "learning_rate": 6.5565766998298385e-06, "loss": 0.0654, "step": 44162 }, { "epoch": 0.7876966432418935, "grad_norm": 0.19497458636760712, "learning_rate": 6.555525955508782e-06, "loss": 0.1238, "step": 44163 }, { "epoch": 0.7877144793636072, "grad_norm": 0.24587573111057281, "learning_rate": 6.5544752826848825e-06, "loss": 0.1382, "step": 44164 }, { "epoch": 0.7877323154853209, "grad_norm": 0.44457289576530457, "learning_rate": 6.553424681362202e-06, "loss": 0.1091, "step": 44165 }, { "epoch": 0.7877501516070345, "grad_norm": 0.25439682602882385, "learning_rate": 6.552374151544827e-06, "loss": 0.108, "step": 44166 }, { "epoch": 0.7877679877287482, "grad_norm": 0.3162674307823181, "learning_rate": 6.551323693236822e-06, "loss": 0.1064, "step": 44167 }, { "epoch": 0.7877858238504619, "grad_norm": 0.31190550327301025, "learning_rate": 6.550273306442256e-06, "loss": 0.1059, "step": 44168 }, { "epoch": 0.7878036599721756, "grad_norm": 0.3222202658653259, "learning_rate": 6.549222991165197e-06, "loss": 0.1667, "step": 44169 }, { "epoch": 0.7878214960938893, "grad_norm": 0.2259027659893036, "learning_rate": 6.548172747409728e-06, "loss": 0.0863, "step": 44170 }, { "epoch": 0.7878393322156031, "grad_norm": 0.22337354719638824, "learning_rate": 6.547122575179915e-06, "loss": 0.1119, "step": 44171 }, { "epoch": 0.7878571683373168, "grad_norm": 0.21057939529418945, "learning_rate": 6.546072474479828e-06, "loss": 0.09, "step": 44172 }, { "epoch": 0.7878750044590305, "grad_norm": 0.1994534283876419, "learning_rate": 6.5450224453135394e-06, "loss": 0.0961, "step": 44173 }, { "epoch": 0.7878928405807442, "grad_norm": 0.2664986848831177, "learning_rate": 6.54397248768511e-06, "loss": 0.0644, "step": 44174 }, { "epoch": 0.7879106767024578, "grad_norm": 0.2602781355381012, "learning_rate": 6.542922601598625e-06, "loss": 0.1645, "step": 44175 }, { "epoch": 0.7879285128241715, "grad_norm": 0.21840496361255646, "learning_rate": 6.54187278705814e-06, "loss": 0.0982, "step": 44176 }, { "epoch": 0.7879463489458852, "grad_norm": 0.22354631125926971, "learning_rate": 6.540823044067737e-06, "loss": 0.0952, "step": 44177 }, { "epoch": 0.7879641850675989, "grad_norm": 0.255677729845047, "learning_rate": 6.5397733726314716e-06, "loss": 0.1202, "step": 44178 }, { "epoch": 0.7879820211893126, "grad_norm": 0.24608290195465088, "learning_rate": 6.53872377275343e-06, "loss": 0.0968, "step": 44179 }, { "epoch": 0.7879998573110263, "grad_norm": 0.34032246470451355, "learning_rate": 6.537674244437672e-06, "loss": 0.1532, "step": 44180 }, { "epoch": 0.78801769343274, "grad_norm": 0.2574082911014557, "learning_rate": 6.536624787688264e-06, "loss": 0.1178, "step": 44181 }, { "epoch": 0.7880355295544537, "grad_norm": 0.28018561005592346, "learning_rate": 6.535575402509269e-06, "loss": 0.1359, "step": 44182 }, { "epoch": 0.7880533656761673, "grad_norm": 0.24346445500850677, "learning_rate": 6.534526088904769e-06, "loss": 0.0768, "step": 44183 }, { "epoch": 0.788071201797881, "grad_norm": 0.24253524839878082, "learning_rate": 6.533476846878822e-06, "loss": 0.1442, "step": 44184 }, { "epoch": 0.7880890379195947, "grad_norm": 0.2577386498451233, "learning_rate": 6.5324276764354956e-06, "loss": 0.1385, "step": 44185 }, { "epoch": 0.7881068740413084, "grad_norm": 0.29873839020729065, "learning_rate": 6.531378577578862e-06, "loss": 0.0844, "step": 44186 }, { "epoch": 0.7881247101630221, "grad_norm": 0.2075805813074112, "learning_rate": 6.530329550312972e-06, "loss": 0.106, "step": 44187 }, { "epoch": 0.7881425462847359, "grad_norm": 0.29521387815475464, "learning_rate": 6.529280594641915e-06, "loss": 0.0875, "step": 44188 }, { "epoch": 0.7881603824064496, "grad_norm": 0.2747701108455658, "learning_rate": 6.528231710569744e-06, "loss": 0.1558, "step": 44189 }, { "epoch": 0.7881782185281633, "grad_norm": 0.19759778678417206, "learning_rate": 6.52718289810052e-06, "loss": 0.1004, "step": 44190 }, { "epoch": 0.788196054649877, "grad_norm": 0.3707346022129059, "learning_rate": 6.526134157238315e-06, "loss": 0.1194, "step": 44191 }, { "epoch": 0.7882138907715907, "grad_norm": 0.31901875138282776, "learning_rate": 6.525085487987203e-06, "loss": 0.0578, "step": 44192 }, { "epoch": 0.7882317268933043, "grad_norm": 0.285643994808197, "learning_rate": 6.52403689035124e-06, "loss": 0.1198, "step": 44193 }, { "epoch": 0.788249563015018, "grad_norm": 0.34529829025268555, "learning_rate": 6.52298836433449e-06, "loss": 0.1275, "step": 44194 }, { "epoch": 0.7882673991367317, "grad_norm": 0.23882043361663818, "learning_rate": 6.521939909941013e-06, "loss": 0.1015, "step": 44195 }, { "epoch": 0.7882852352584454, "grad_norm": 0.28525108098983765, "learning_rate": 6.5208915271748886e-06, "loss": 0.1171, "step": 44196 }, { "epoch": 0.7883030713801591, "grad_norm": 0.28349438309669495, "learning_rate": 6.519843216040167e-06, "loss": 0.1545, "step": 44197 }, { "epoch": 0.7883209075018728, "grad_norm": 0.22000150382518768, "learning_rate": 6.5187949765409175e-06, "loss": 0.1256, "step": 44198 }, { "epoch": 0.7883387436235865, "grad_norm": 0.20423874258995056, "learning_rate": 6.517746808681202e-06, "loss": 0.0935, "step": 44199 }, { "epoch": 0.7883565797453002, "grad_norm": 0.35680824518203735, "learning_rate": 6.516698712465077e-06, "loss": 0.1289, "step": 44200 }, { "epoch": 0.7883744158670138, "grad_norm": 0.343405544757843, "learning_rate": 6.515650687896619e-06, "loss": 0.1619, "step": 44201 }, { "epoch": 0.7883922519887275, "grad_norm": 0.30054977536201477, "learning_rate": 6.514602734979883e-06, "loss": 0.1145, "step": 44202 }, { "epoch": 0.7884100881104412, "grad_norm": 0.27992501854896545, "learning_rate": 6.513554853718923e-06, "loss": 0.0883, "step": 44203 }, { "epoch": 0.7884279242321549, "grad_norm": 0.2517690658569336, "learning_rate": 6.512507044117816e-06, "loss": 0.1296, "step": 44204 }, { "epoch": 0.7884457603538687, "grad_norm": 0.391408234834671, "learning_rate": 6.511459306180612e-06, "loss": 0.1029, "step": 44205 }, { "epoch": 0.7884635964755824, "grad_norm": 0.3042883276939392, "learning_rate": 6.5104116399113845e-06, "loss": 0.0887, "step": 44206 }, { "epoch": 0.7884814325972961, "grad_norm": 0.24673470854759216, "learning_rate": 6.509364045314187e-06, "loss": 0.0883, "step": 44207 }, { "epoch": 0.7884992687190098, "grad_norm": 0.26099276542663574, "learning_rate": 6.508316522393074e-06, "loss": 0.1171, "step": 44208 }, { "epoch": 0.7885171048407235, "grad_norm": 0.26348787546157837, "learning_rate": 6.507269071152119e-06, "loss": 0.0777, "step": 44209 }, { "epoch": 0.7885349409624371, "grad_norm": 0.3050234615802765, "learning_rate": 6.5062216915953766e-06, "loss": 0.091, "step": 44210 }, { "epoch": 0.7885527770841508, "grad_norm": 0.32991647720336914, "learning_rate": 6.505174383726908e-06, "loss": 0.1302, "step": 44211 }, { "epoch": 0.7885706132058645, "grad_norm": 0.3159943222999573, "learning_rate": 6.50412714755077e-06, "loss": 0.1138, "step": 44212 }, { "epoch": 0.7885884493275782, "grad_norm": 0.31988975405693054, "learning_rate": 6.503079983071017e-06, "loss": 0.152, "step": 44213 }, { "epoch": 0.7886062854492919, "grad_norm": 0.21109743416309357, "learning_rate": 6.50203289029172e-06, "loss": 0.109, "step": 44214 }, { "epoch": 0.7886241215710056, "grad_norm": 0.3054670989513397, "learning_rate": 6.500985869216936e-06, "loss": 0.0839, "step": 44215 }, { "epoch": 0.7886419576927193, "grad_norm": 0.21854069828987122, "learning_rate": 6.499938919850718e-06, "loss": 0.0688, "step": 44216 }, { "epoch": 0.788659793814433, "grad_norm": 0.3289863169193268, "learning_rate": 6.498892042197119e-06, "loss": 0.1317, "step": 44217 }, { "epoch": 0.7886776299361467, "grad_norm": 0.30067890882492065, "learning_rate": 6.497845236260205e-06, "loss": 0.1565, "step": 44218 }, { "epoch": 0.7886954660578603, "grad_norm": 0.2696108818054199, "learning_rate": 6.496798502044041e-06, "loss": 0.145, "step": 44219 }, { "epoch": 0.788713302179574, "grad_norm": 0.25394347310066223, "learning_rate": 6.495751839552675e-06, "loss": 0.0787, "step": 44220 }, { "epoch": 0.7887311383012877, "grad_norm": 0.2914705276489258, "learning_rate": 6.494705248790162e-06, "loss": 0.1398, "step": 44221 }, { "epoch": 0.7887489744230015, "grad_norm": 0.22038881480693817, "learning_rate": 6.493658729760565e-06, "loss": 0.0369, "step": 44222 }, { "epoch": 0.7887668105447152, "grad_norm": 0.19869518280029297, "learning_rate": 6.492612282467944e-06, "loss": 0.0431, "step": 44223 }, { "epoch": 0.7887846466664289, "grad_norm": 0.39566680788993835, "learning_rate": 6.491565906916347e-06, "loss": 0.0935, "step": 44224 }, { "epoch": 0.7888024827881426, "grad_norm": 0.2697812616825104, "learning_rate": 6.490519603109835e-06, "loss": 0.0966, "step": 44225 }, { "epoch": 0.7888203189098563, "grad_norm": 0.29709675908088684, "learning_rate": 6.489473371052454e-06, "loss": 0.0789, "step": 44226 }, { "epoch": 0.78883815503157, "grad_norm": 0.2259664386510849, "learning_rate": 6.488427210748274e-06, "loss": 0.1021, "step": 44227 }, { "epoch": 0.7888559911532836, "grad_norm": 0.22971664369106293, "learning_rate": 6.487381122201344e-06, "loss": 0.1566, "step": 44228 }, { "epoch": 0.7888738272749973, "grad_norm": 0.25513756275177, "learning_rate": 6.486335105415719e-06, "loss": 0.0943, "step": 44229 }, { "epoch": 0.788891663396711, "grad_norm": 0.3283088207244873, "learning_rate": 6.485289160395447e-06, "loss": 0.1692, "step": 44230 }, { "epoch": 0.7889094995184247, "grad_norm": 0.2767787575721741, "learning_rate": 6.484243287144595e-06, "loss": 0.096, "step": 44231 }, { "epoch": 0.7889273356401384, "grad_norm": 0.21235200762748718, "learning_rate": 6.4831974856672054e-06, "loss": 0.0677, "step": 44232 }, { "epoch": 0.7889451717618521, "grad_norm": 0.30044808983802795, "learning_rate": 6.482151755967345e-06, "loss": 0.1193, "step": 44233 }, { "epoch": 0.7889630078835658, "grad_norm": 0.2610991597175598, "learning_rate": 6.481106098049053e-06, "loss": 0.1082, "step": 44234 }, { "epoch": 0.7889808440052795, "grad_norm": 0.2509284019470215, "learning_rate": 6.480060511916394e-06, "loss": 0.1052, "step": 44235 }, { "epoch": 0.7889986801269931, "grad_norm": 0.2177760750055313, "learning_rate": 6.479014997573421e-06, "loss": 0.0933, "step": 44236 }, { "epoch": 0.7890165162487068, "grad_norm": 0.24624665081501007, "learning_rate": 6.477969555024182e-06, "loss": 0.0783, "step": 44237 }, { "epoch": 0.7890343523704205, "grad_norm": 0.2457483559846878, "learning_rate": 6.476924184272729e-06, "loss": 0.1013, "step": 44238 }, { "epoch": 0.7890521884921343, "grad_norm": 0.28774693608283997, "learning_rate": 6.475878885323108e-06, "loss": 0.1572, "step": 44239 }, { "epoch": 0.789070024613848, "grad_norm": 0.32484716176986694, "learning_rate": 6.474833658179388e-06, "loss": 0.0958, "step": 44240 }, { "epoch": 0.7890878607355617, "grad_norm": 0.2363191843032837, "learning_rate": 6.47378850284561e-06, "loss": 0.091, "step": 44241 }, { "epoch": 0.7891056968572754, "grad_norm": 0.31062182784080505, "learning_rate": 6.472743419325827e-06, "loss": 0.1044, "step": 44242 }, { "epoch": 0.7891235329789891, "grad_norm": 0.3855638802051544, "learning_rate": 6.471698407624083e-06, "loss": 0.1304, "step": 44243 }, { "epoch": 0.7891413691007028, "grad_norm": 0.2847430109977722, "learning_rate": 6.470653467744439e-06, "loss": 0.0995, "step": 44244 }, { "epoch": 0.7891592052224164, "grad_norm": 0.1684645116329193, "learning_rate": 6.469608599690938e-06, "loss": 0.0404, "step": 44245 }, { "epoch": 0.7891770413441301, "grad_norm": 0.3510969579219818, "learning_rate": 6.4685638034676396e-06, "loss": 0.0967, "step": 44246 }, { "epoch": 0.7891948774658438, "grad_norm": 0.2813323140144348, "learning_rate": 6.467519079078582e-06, "loss": 0.1072, "step": 44247 }, { "epoch": 0.7892127135875575, "grad_norm": 0.29802244901657104, "learning_rate": 6.4664744265278295e-06, "loss": 0.0921, "step": 44248 }, { "epoch": 0.7892305497092712, "grad_norm": 0.23579160869121552, "learning_rate": 6.465429845819424e-06, "loss": 0.0725, "step": 44249 }, { "epoch": 0.7892483858309849, "grad_norm": 0.3534276783466339, "learning_rate": 6.464385336957413e-06, "loss": 0.1132, "step": 44250 }, { "epoch": 0.7892662219526986, "grad_norm": 0.2933705151081085, "learning_rate": 6.463340899945847e-06, "loss": 0.1093, "step": 44251 }, { "epoch": 0.7892840580744123, "grad_norm": 0.34300175309181213, "learning_rate": 6.462296534788764e-06, "loss": 0.122, "step": 44252 }, { "epoch": 0.789301894196126, "grad_norm": 0.19918321073055267, "learning_rate": 6.4612522414902315e-06, "loss": 0.0873, "step": 44253 }, { "epoch": 0.7893197303178396, "grad_norm": 0.3685144782066345, "learning_rate": 6.46020802005429e-06, "loss": 0.1464, "step": 44254 }, { "epoch": 0.7893375664395533, "grad_norm": 0.2875499725341797, "learning_rate": 6.459163870484983e-06, "loss": 0.0845, "step": 44255 }, { "epoch": 0.7893554025612671, "grad_norm": 0.24322588741779327, "learning_rate": 6.458119792786355e-06, "loss": 0.1259, "step": 44256 }, { "epoch": 0.7893732386829808, "grad_norm": 0.364108145236969, "learning_rate": 6.457075786962466e-06, "loss": 0.1165, "step": 44257 }, { "epoch": 0.7893910748046945, "grad_norm": 0.22436676919460297, "learning_rate": 6.456031853017356e-06, "loss": 0.0877, "step": 44258 }, { "epoch": 0.7894089109264082, "grad_norm": 0.3096891939640045, "learning_rate": 6.454987990955063e-06, "loss": 0.12, "step": 44259 }, { "epoch": 0.7894267470481219, "grad_norm": 0.23672813177108765, "learning_rate": 6.453944200779649e-06, "loss": 0.0963, "step": 44260 }, { "epoch": 0.7894445831698356, "grad_norm": 0.24908851087093353, "learning_rate": 6.452900482495147e-06, "loss": 0.1125, "step": 44261 }, { "epoch": 0.7894624192915493, "grad_norm": 0.2525533437728882, "learning_rate": 6.451856836105616e-06, "loss": 0.1196, "step": 44262 }, { "epoch": 0.7894802554132629, "grad_norm": 0.22468255460262299, "learning_rate": 6.450813261615093e-06, "loss": 0.0908, "step": 44263 }, { "epoch": 0.7894980915349766, "grad_norm": 0.23806005716323853, "learning_rate": 6.449769759027624e-06, "loss": 0.107, "step": 44264 }, { "epoch": 0.7895159276566903, "grad_norm": 0.29737845063209534, "learning_rate": 6.448726328347249e-06, "loss": 0.1501, "step": 44265 }, { "epoch": 0.789533763778404, "grad_norm": 0.2882936894893646, "learning_rate": 6.447682969578026e-06, "loss": 0.0901, "step": 44266 }, { "epoch": 0.7895515999001177, "grad_norm": 0.2789209187030792, "learning_rate": 6.44663968272399e-06, "loss": 0.0918, "step": 44267 }, { "epoch": 0.7895694360218314, "grad_norm": 0.21692532300949097, "learning_rate": 6.445596467789186e-06, "loss": 0.092, "step": 44268 }, { "epoch": 0.7895872721435451, "grad_norm": 0.23506997525691986, "learning_rate": 6.444553324777652e-06, "loss": 0.0972, "step": 44269 }, { "epoch": 0.7896051082652588, "grad_norm": 0.28100505471229553, "learning_rate": 6.4435102536934464e-06, "loss": 0.1062, "step": 44270 }, { "epoch": 0.7896229443869724, "grad_norm": 0.24430091679096222, "learning_rate": 6.4424672545406016e-06, "loss": 0.1265, "step": 44271 }, { "epoch": 0.7896407805086862, "grad_norm": 0.24555829167366028, "learning_rate": 6.441424327323159e-06, "loss": 0.1244, "step": 44272 }, { "epoch": 0.7896586166303999, "grad_norm": 0.1920609325170517, "learning_rate": 6.4403814720451705e-06, "loss": 0.1347, "step": 44273 }, { "epoch": 0.7896764527521136, "grad_norm": 0.25160709023475647, "learning_rate": 6.439338688710669e-06, "loss": 0.0834, "step": 44274 }, { "epoch": 0.7896942888738273, "grad_norm": 0.2756267488002777, "learning_rate": 6.438295977323708e-06, "loss": 0.1066, "step": 44275 }, { "epoch": 0.789712124995541, "grad_norm": 0.3179618716239929, "learning_rate": 6.437253337888321e-06, "loss": 0.1626, "step": 44276 }, { "epoch": 0.7897299611172547, "grad_norm": 0.2695470154285431, "learning_rate": 6.436210770408551e-06, "loss": 0.113, "step": 44277 }, { "epoch": 0.7897477972389684, "grad_norm": 0.27090758085250854, "learning_rate": 6.4351682748884365e-06, "loss": 0.0555, "step": 44278 }, { "epoch": 0.7897656333606821, "grad_norm": 0.2736127972602844, "learning_rate": 6.4341258513320266e-06, "loss": 0.1131, "step": 44279 }, { "epoch": 0.7897834694823958, "grad_norm": 0.2599885165691376, "learning_rate": 6.433083499743356e-06, "loss": 0.0776, "step": 44280 }, { "epoch": 0.7898013056041094, "grad_norm": 0.329611599445343, "learning_rate": 6.432041220126469e-06, "loss": 0.1161, "step": 44281 }, { "epoch": 0.7898191417258231, "grad_norm": 0.34924349188804626, "learning_rate": 6.430999012485395e-06, "loss": 0.1431, "step": 44282 }, { "epoch": 0.7898369778475368, "grad_norm": 0.21205946803092957, "learning_rate": 6.4299568768241894e-06, "loss": 0.0844, "step": 44283 }, { "epoch": 0.7898548139692505, "grad_norm": 0.3495330810546875, "learning_rate": 6.4289148131468855e-06, "loss": 0.1286, "step": 44284 }, { "epoch": 0.7898726500909642, "grad_norm": 0.25523877143859863, "learning_rate": 6.427872821457523e-06, "loss": 0.1101, "step": 44285 }, { "epoch": 0.7898904862126779, "grad_norm": 0.22484418749809265, "learning_rate": 6.426830901760131e-06, "loss": 0.1269, "step": 44286 }, { "epoch": 0.7899083223343916, "grad_norm": 0.21227017045021057, "learning_rate": 6.4257890540587645e-06, "loss": 0.1236, "step": 44287 }, { "epoch": 0.7899261584561053, "grad_norm": 0.22469249367713928, "learning_rate": 6.424747278357449e-06, "loss": 0.1086, "step": 44288 }, { "epoch": 0.789943994577819, "grad_norm": 0.23707841336727142, "learning_rate": 6.423705574660235e-06, "loss": 0.0741, "step": 44289 }, { "epoch": 0.7899618306995327, "grad_norm": 0.258785605430603, "learning_rate": 6.422663942971155e-06, "loss": 0.0958, "step": 44290 }, { "epoch": 0.7899796668212464, "grad_norm": 0.26216599345207214, "learning_rate": 6.421622383294237e-06, "loss": 0.1234, "step": 44291 }, { "epoch": 0.7899975029429601, "grad_norm": 0.2765684723854065, "learning_rate": 6.420580895633538e-06, "loss": 0.1436, "step": 44292 }, { "epoch": 0.7900153390646738, "grad_norm": 0.2912364602088928, "learning_rate": 6.419539479993081e-06, "loss": 0.1785, "step": 44293 }, { "epoch": 0.7900331751863875, "grad_norm": 0.2888382077217102, "learning_rate": 6.418498136376907e-06, "loss": 0.1175, "step": 44294 }, { "epoch": 0.7900510113081012, "grad_norm": 0.24683113396167755, "learning_rate": 6.417456864789048e-06, "loss": 0.118, "step": 44295 }, { "epoch": 0.7900688474298149, "grad_norm": 0.28047481179237366, "learning_rate": 6.4164156652335495e-06, "loss": 0.1135, "step": 44296 }, { "epoch": 0.7900866835515286, "grad_norm": 0.23094502091407776, "learning_rate": 6.415374537714442e-06, "loss": 0.0918, "step": 44297 }, { "epoch": 0.7901045196732422, "grad_norm": 0.25547873973846436, "learning_rate": 6.414333482235763e-06, "loss": 0.144, "step": 44298 }, { "epoch": 0.7901223557949559, "grad_norm": 0.2930883765220642, "learning_rate": 6.413292498801538e-06, "loss": 0.0691, "step": 44299 }, { "epoch": 0.7901401919166696, "grad_norm": 0.3080432116985321, "learning_rate": 6.412251587415818e-06, "loss": 0.1842, "step": 44300 }, { "epoch": 0.7901580280383833, "grad_norm": 0.3612186014652252, "learning_rate": 6.411210748082625e-06, "loss": 0.1709, "step": 44301 }, { "epoch": 0.790175864160097, "grad_norm": 0.22774101793766022, "learning_rate": 6.410169980806005e-06, "loss": 0.1101, "step": 44302 }, { "epoch": 0.7901937002818107, "grad_norm": 0.2938668727874756, "learning_rate": 6.409129285589988e-06, "loss": 0.1111, "step": 44303 }, { "epoch": 0.7902115364035244, "grad_norm": 0.34203335642814636, "learning_rate": 6.408088662438599e-06, "loss": 0.1319, "step": 44304 }, { "epoch": 0.7902293725252381, "grad_norm": 0.26302456855773926, "learning_rate": 6.40704811135589e-06, "loss": 0.091, "step": 44305 }, { "epoch": 0.7902472086469519, "grad_norm": 0.2113720327615738, "learning_rate": 6.4060076323458816e-06, "loss": 0.0854, "step": 44306 }, { "epoch": 0.7902650447686655, "grad_norm": 0.25970640778541565, "learning_rate": 6.404967225412609e-06, "loss": 0.0988, "step": 44307 }, { "epoch": 0.7902828808903792, "grad_norm": 0.21835792064666748, "learning_rate": 6.403926890560099e-06, "loss": 0.1336, "step": 44308 }, { "epoch": 0.7903007170120929, "grad_norm": 0.22718465328216553, "learning_rate": 6.402886627792401e-06, "loss": 0.1361, "step": 44309 }, { "epoch": 0.7903185531338066, "grad_norm": 0.23819546401500702, "learning_rate": 6.401846437113537e-06, "loss": 0.1397, "step": 44310 }, { "epoch": 0.7903363892555203, "grad_norm": 0.26632481813430786, "learning_rate": 6.400806318527539e-06, "loss": 0.0824, "step": 44311 }, { "epoch": 0.790354225377234, "grad_norm": 0.2611066997051239, "learning_rate": 6.399766272038432e-06, "loss": 0.1306, "step": 44312 }, { "epoch": 0.7903720614989477, "grad_norm": 0.3123415410518646, "learning_rate": 6.398726297650262e-06, "loss": 0.1418, "step": 44313 }, { "epoch": 0.7903898976206614, "grad_norm": 0.2951338291168213, "learning_rate": 6.3976863953670554e-06, "loss": 0.0809, "step": 44314 }, { "epoch": 0.790407733742375, "grad_norm": 0.2597586214542389, "learning_rate": 6.396646565192832e-06, "loss": 0.1155, "step": 44315 }, { "epoch": 0.7904255698640887, "grad_norm": 0.27297112345695496, "learning_rate": 6.395606807131641e-06, "loss": 0.0877, "step": 44316 }, { "epoch": 0.7904434059858024, "grad_norm": 0.2519475519657135, "learning_rate": 6.394567121187494e-06, "loss": 0.1167, "step": 44317 }, { "epoch": 0.7904612421075161, "grad_norm": 0.7212458848953247, "learning_rate": 6.393527507364442e-06, "loss": 0.1943, "step": 44318 }, { "epoch": 0.7904790782292298, "grad_norm": 0.23843009769916534, "learning_rate": 6.392487965666502e-06, "loss": 0.0988, "step": 44319 }, { "epoch": 0.7904969143509435, "grad_norm": 0.6382744312286377, "learning_rate": 6.3914484960977045e-06, "loss": 0.1134, "step": 44320 }, { "epoch": 0.7905147504726572, "grad_norm": 0.2201121598482132, "learning_rate": 6.390409098662073e-06, "loss": 0.1179, "step": 44321 }, { "epoch": 0.7905325865943709, "grad_norm": 0.2574596703052521, "learning_rate": 6.389369773363651e-06, "loss": 0.1397, "step": 44322 }, { "epoch": 0.7905504227160847, "grad_norm": 0.2841186225414276, "learning_rate": 6.388330520206459e-06, "loss": 0.1324, "step": 44323 }, { "epoch": 0.7905682588377984, "grad_norm": 0.3004613518714905, "learning_rate": 6.387291339194529e-06, "loss": 0.1052, "step": 44324 }, { "epoch": 0.790586094959512, "grad_norm": 0.23583769798278809, "learning_rate": 6.386252230331877e-06, "loss": 0.0936, "step": 44325 }, { "epoch": 0.7906039310812257, "grad_norm": 0.20927172899246216, "learning_rate": 6.3852131936225465e-06, "loss": 0.0771, "step": 44326 }, { "epoch": 0.7906217672029394, "grad_norm": 0.2318449765443802, "learning_rate": 6.384174229070561e-06, "loss": 0.0976, "step": 44327 }, { "epoch": 0.7906396033246531, "grad_norm": 0.2622761130332947, "learning_rate": 6.383135336679935e-06, "loss": 0.1164, "step": 44328 }, { "epoch": 0.7906574394463668, "grad_norm": 0.24776549637317657, "learning_rate": 6.382096516454716e-06, "loss": 0.1304, "step": 44329 }, { "epoch": 0.7906752755680805, "grad_norm": 0.26426374912261963, "learning_rate": 6.381057768398915e-06, "loss": 0.1223, "step": 44330 }, { "epoch": 0.7906931116897942, "grad_norm": 0.29112571477890015, "learning_rate": 6.380019092516573e-06, "loss": 0.1097, "step": 44331 }, { "epoch": 0.7907109478115079, "grad_norm": 0.2609473466873169, "learning_rate": 6.378980488811706e-06, "loss": 0.1146, "step": 44332 }, { "epoch": 0.7907287839332215, "grad_norm": 0.19225920736789703, "learning_rate": 6.377941957288344e-06, "loss": 0.099, "step": 44333 }, { "epoch": 0.7907466200549352, "grad_norm": 0.23920294642448425, "learning_rate": 6.376903497950501e-06, "loss": 0.102, "step": 44334 }, { "epoch": 0.7907644561766489, "grad_norm": 0.36490070819854736, "learning_rate": 6.375865110802221e-06, "loss": 0.1569, "step": 44335 }, { "epoch": 0.7907822922983626, "grad_norm": 0.26597926020622253, "learning_rate": 6.374826795847522e-06, "loss": 0.128, "step": 44336 }, { "epoch": 0.7908001284200763, "grad_norm": 0.24884183704853058, "learning_rate": 6.3737885530904235e-06, "loss": 0.1356, "step": 44337 }, { "epoch": 0.79081796454179, "grad_norm": 0.2525271773338318, "learning_rate": 6.3727503825349504e-06, "loss": 0.1488, "step": 44338 }, { "epoch": 0.7908358006635037, "grad_norm": 0.2771569490432739, "learning_rate": 6.3717122841851356e-06, "loss": 0.1417, "step": 44339 }, { "epoch": 0.7908536367852175, "grad_norm": 0.310963898897171, "learning_rate": 6.370674258044998e-06, "loss": 0.1113, "step": 44340 }, { "epoch": 0.7908714729069312, "grad_norm": 0.23170971870422363, "learning_rate": 6.369636304118562e-06, "loss": 0.131, "step": 44341 }, { "epoch": 0.7908893090286448, "grad_norm": 0.28286445140838623, "learning_rate": 6.3685984224098436e-06, "loss": 0.1089, "step": 44342 }, { "epoch": 0.7909071451503585, "grad_norm": 0.22349539399147034, "learning_rate": 6.367560612922879e-06, "loss": 0.152, "step": 44343 }, { "epoch": 0.7909249812720722, "grad_norm": 0.3122520446777344, "learning_rate": 6.366522875661676e-06, "loss": 0.1842, "step": 44344 }, { "epoch": 0.7909428173937859, "grad_norm": 0.2264707386493683, "learning_rate": 6.365485210630276e-06, "loss": 0.1202, "step": 44345 }, { "epoch": 0.7909606535154996, "grad_norm": 0.20424042642116547, "learning_rate": 6.364447617832692e-06, "loss": 0.1114, "step": 44346 }, { "epoch": 0.7909784896372133, "grad_norm": 0.28276780247688293, "learning_rate": 6.363410097272937e-06, "loss": 0.1058, "step": 44347 }, { "epoch": 0.790996325758927, "grad_norm": 0.27995067834854126, "learning_rate": 6.362372648955048e-06, "loss": 0.0951, "step": 44348 }, { "epoch": 0.7910141618806407, "grad_norm": 0.23828350007534027, "learning_rate": 6.36133527288304e-06, "loss": 0.0799, "step": 44349 }, { "epoch": 0.7910319980023544, "grad_norm": 0.23907338082790375, "learning_rate": 6.360297969060935e-06, "loss": 0.0971, "step": 44350 }, { "epoch": 0.791049834124068, "grad_norm": 0.33674052357673645, "learning_rate": 6.359260737492745e-06, "loss": 0.1667, "step": 44351 }, { "epoch": 0.7910676702457817, "grad_norm": 0.2130574882030487, "learning_rate": 6.358223578182507e-06, "loss": 0.0965, "step": 44352 }, { "epoch": 0.7910855063674954, "grad_norm": 0.3549567461013794, "learning_rate": 6.357186491134232e-06, "loss": 0.1677, "step": 44353 }, { "epoch": 0.7911033424892091, "grad_norm": 0.23653914034366608, "learning_rate": 6.356149476351942e-06, "loss": 0.0855, "step": 44354 }, { "epoch": 0.7911211786109228, "grad_norm": 0.2747228145599365, "learning_rate": 6.355112533839649e-06, "loss": 0.1328, "step": 44355 }, { "epoch": 0.7911390147326365, "grad_norm": 0.23279783129692078, "learning_rate": 6.354075663601386e-06, "loss": 0.0876, "step": 44356 }, { "epoch": 0.7911568508543503, "grad_norm": 0.28982770442962646, "learning_rate": 6.353038865641159e-06, "loss": 0.1482, "step": 44357 }, { "epoch": 0.791174686976064, "grad_norm": 0.3094983696937561, "learning_rate": 6.352002139963001e-06, "loss": 0.0791, "step": 44358 }, { "epoch": 0.7911925230977777, "grad_norm": 0.2396293431520462, "learning_rate": 6.3509654865709224e-06, "loss": 0.0938, "step": 44359 }, { "epoch": 0.7912103592194913, "grad_norm": 0.22900012135505676, "learning_rate": 6.349928905468935e-06, "loss": 0.1143, "step": 44360 }, { "epoch": 0.791228195341205, "grad_norm": 0.27662521600723267, "learning_rate": 6.348892396661074e-06, "loss": 0.1062, "step": 44361 }, { "epoch": 0.7912460314629187, "grad_norm": 0.30716392397880554, "learning_rate": 6.347855960151347e-06, "loss": 0.1341, "step": 44362 }, { "epoch": 0.7912638675846324, "grad_norm": 0.2991967797279358, "learning_rate": 6.346819595943773e-06, "loss": 0.1172, "step": 44363 }, { "epoch": 0.7912817037063461, "grad_norm": 0.21833184361457825, "learning_rate": 6.345783304042363e-06, "loss": 0.1, "step": 44364 }, { "epoch": 0.7912995398280598, "grad_norm": 0.3661629259586334, "learning_rate": 6.344747084451144e-06, "loss": 0.1691, "step": 44365 }, { "epoch": 0.7913173759497735, "grad_norm": 0.2607227563858032, "learning_rate": 6.343710937174132e-06, "loss": 0.1024, "step": 44366 }, { "epoch": 0.7913352120714872, "grad_norm": 0.31266945600509644, "learning_rate": 6.342674862215337e-06, "loss": 0.0569, "step": 44367 }, { "epoch": 0.7913530481932008, "grad_norm": 0.3785927891731262, "learning_rate": 6.34163885957878e-06, "loss": 0.0808, "step": 44368 }, { "epoch": 0.7913708843149145, "grad_norm": 0.23941653966903687, "learning_rate": 6.340602929268469e-06, "loss": 0.17, "step": 44369 }, { "epoch": 0.7913887204366282, "grad_norm": 0.332263320684433, "learning_rate": 6.33956707128843e-06, "loss": 0.1079, "step": 44370 }, { "epoch": 0.7914065565583419, "grad_norm": 0.3225208818912506, "learning_rate": 6.338531285642668e-06, "loss": 0.1639, "step": 44371 }, { "epoch": 0.7914243926800556, "grad_norm": 0.27201715111732483, "learning_rate": 6.337495572335211e-06, "loss": 0.1179, "step": 44372 }, { "epoch": 0.7914422288017694, "grad_norm": 0.2180429846048355, "learning_rate": 6.336459931370062e-06, "loss": 0.1165, "step": 44373 }, { "epoch": 0.7914600649234831, "grad_norm": 0.21508382260799408, "learning_rate": 6.335424362751247e-06, "loss": 0.0668, "step": 44374 }, { "epoch": 0.7914779010451968, "grad_norm": 0.3044489324092865, "learning_rate": 6.3343888664827714e-06, "loss": 0.1478, "step": 44375 }, { "epoch": 0.7914957371669105, "grad_norm": 0.25947749614715576, "learning_rate": 6.333353442568654e-06, "loss": 0.1202, "step": 44376 }, { "epoch": 0.7915135732886242, "grad_norm": 0.31481418013572693, "learning_rate": 6.332318091012898e-06, "loss": 0.1986, "step": 44377 }, { "epoch": 0.7915314094103378, "grad_norm": 0.22756798565387726, "learning_rate": 6.331282811819531e-06, "loss": 0.1422, "step": 44378 }, { "epoch": 0.7915492455320515, "grad_norm": 0.3005802631378174, "learning_rate": 6.330247604992562e-06, "loss": 0.0889, "step": 44379 }, { "epoch": 0.7915670816537652, "grad_norm": 0.2736281752586365, "learning_rate": 6.329212470535997e-06, "loss": 0.1018, "step": 44380 }, { "epoch": 0.7915849177754789, "grad_norm": 0.2804303765296936, "learning_rate": 6.328177408453859e-06, "loss": 0.0881, "step": 44381 }, { "epoch": 0.7916027538971926, "grad_norm": 0.2587086260318756, "learning_rate": 6.327142418750143e-06, "loss": 0.1246, "step": 44382 }, { "epoch": 0.7916205900189063, "grad_norm": 0.24446746706962585, "learning_rate": 6.326107501428883e-06, "loss": 0.1079, "step": 44383 }, { "epoch": 0.79163842614062, "grad_norm": 0.2886585295200348, "learning_rate": 6.325072656494069e-06, "loss": 0.1096, "step": 44384 }, { "epoch": 0.7916562622623337, "grad_norm": 0.39698928594589233, "learning_rate": 6.3240378839497325e-06, "loss": 0.1073, "step": 44385 }, { "epoch": 0.7916740983840473, "grad_norm": 0.38284069299697876, "learning_rate": 6.323003183799869e-06, "loss": 0.0584, "step": 44386 }, { "epoch": 0.791691934505761, "grad_norm": 0.36693501472473145, "learning_rate": 6.3219685560485024e-06, "loss": 0.0932, "step": 44387 }, { "epoch": 0.7917097706274747, "grad_norm": 0.19956958293914795, "learning_rate": 6.320934000699638e-06, "loss": 0.1126, "step": 44388 }, { "epoch": 0.7917276067491884, "grad_norm": 0.22734947502613068, "learning_rate": 6.319899517757283e-06, "loss": 0.1307, "step": 44389 }, { "epoch": 0.7917454428709022, "grad_norm": 0.20080597698688507, "learning_rate": 6.318865107225444e-06, "loss": 0.0711, "step": 44390 }, { "epoch": 0.7917632789926159, "grad_norm": 0.42116034030914307, "learning_rate": 6.317830769108141e-06, "loss": 0.1971, "step": 44391 }, { "epoch": 0.7917811151143296, "grad_norm": 0.2806299924850464, "learning_rate": 6.31679650340938e-06, "loss": 0.1295, "step": 44392 }, { "epoch": 0.7917989512360433, "grad_norm": 0.2743414342403412, "learning_rate": 6.315762310133169e-06, "loss": 0.1546, "step": 44393 }, { "epoch": 0.791816787357757, "grad_norm": 0.3417699635028839, "learning_rate": 6.314728189283517e-06, "loss": 0.1423, "step": 44394 }, { "epoch": 0.7918346234794706, "grad_norm": 0.24632671475410461, "learning_rate": 6.313694140864423e-06, "loss": 0.1335, "step": 44395 }, { "epoch": 0.7918524596011843, "grad_norm": 0.2817908823490143, "learning_rate": 6.312660164879916e-06, "loss": 0.1166, "step": 44396 }, { "epoch": 0.791870295722898, "grad_norm": 0.18462194502353668, "learning_rate": 6.311626261333989e-06, "loss": 0.0677, "step": 44397 }, { "epoch": 0.7918881318446117, "grad_norm": 0.23208342492580414, "learning_rate": 6.310592430230647e-06, "loss": 0.1124, "step": 44398 }, { "epoch": 0.7919059679663254, "grad_norm": 0.20546483993530273, "learning_rate": 6.309558671573903e-06, "loss": 0.0856, "step": 44399 }, { "epoch": 0.7919238040880391, "grad_norm": 0.27161574363708496, "learning_rate": 6.3085249853677725e-06, "loss": 0.1051, "step": 44400 }, { "epoch": 0.7919416402097528, "grad_norm": 0.23061218857765198, "learning_rate": 6.307491371616256e-06, "loss": 0.0978, "step": 44401 }, { "epoch": 0.7919594763314665, "grad_norm": 0.24145588278770447, "learning_rate": 6.306457830323359e-06, "loss": 0.1359, "step": 44402 }, { "epoch": 0.7919773124531801, "grad_norm": 0.25866636633872986, "learning_rate": 6.305424361493081e-06, "loss": 0.1215, "step": 44403 }, { "epoch": 0.7919951485748938, "grad_norm": 0.33015015721321106, "learning_rate": 6.304390965129442e-06, "loss": 0.1328, "step": 44404 }, { "epoch": 0.7920129846966075, "grad_norm": 0.23680657148361206, "learning_rate": 6.30335764123644e-06, "loss": 0.1173, "step": 44405 }, { "epoch": 0.7920308208183212, "grad_norm": 0.23965995013713837, "learning_rate": 6.302324389818081e-06, "loss": 0.1155, "step": 44406 }, { "epoch": 0.792048656940035, "grad_norm": 0.28786271810531616, "learning_rate": 6.301291210878374e-06, "loss": 0.122, "step": 44407 }, { "epoch": 0.7920664930617487, "grad_norm": 0.3374701142311096, "learning_rate": 6.300258104421311e-06, "loss": 0.1199, "step": 44408 }, { "epoch": 0.7920843291834624, "grad_norm": 0.233884796500206, "learning_rate": 6.299225070450912e-06, "loss": 0.1134, "step": 44409 }, { "epoch": 0.7921021653051761, "grad_norm": 0.36059561371803284, "learning_rate": 6.2981921089711765e-06, "loss": 0.1654, "step": 44410 }, { "epoch": 0.7921200014268898, "grad_norm": 0.4107843339443207, "learning_rate": 6.297159219986101e-06, "loss": 0.1174, "step": 44411 }, { "epoch": 0.7921378375486035, "grad_norm": 0.3168709874153137, "learning_rate": 6.296126403499705e-06, "loss": 0.1351, "step": 44412 }, { "epoch": 0.7921556736703171, "grad_norm": 0.2797567546367645, "learning_rate": 6.295093659515974e-06, "loss": 0.1258, "step": 44413 }, { "epoch": 0.7921735097920308, "grad_norm": 0.39774811267852783, "learning_rate": 6.2940609880389304e-06, "loss": 0.1765, "step": 44414 }, { "epoch": 0.7921913459137445, "grad_norm": 0.24939168989658356, "learning_rate": 6.293028389072564e-06, "loss": 0.1237, "step": 44415 }, { "epoch": 0.7922091820354582, "grad_norm": 0.29042425751686096, "learning_rate": 6.291995862620876e-06, "loss": 0.1718, "step": 44416 }, { "epoch": 0.7922270181571719, "grad_norm": 0.27344655990600586, "learning_rate": 6.290963408687878e-06, "loss": 0.1619, "step": 44417 }, { "epoch": 0.7922448542788856, "grad_norm": 0.27999576926231384, "learning_rate": 6.289931027277568e-06, "loss": 0.0995, "step": 44418 }, { "epoch": 0.7922626904005993, "grad_norm": 0.25451192259788513, "learning_rate": 6.288898718393948e-06, "loss": 0.122, "step": 44419 }, { "epoch": 0.792280526522313, "grad_norm": 0.33843305706977844, "learning_rate": 6.2878664820410215e-06, "loss": 0.1036, "step": 44420 }, { "epoch": 0.7922983626440266, "grad_norm": 0.3082786798477173, "learning_rate": 6.286834318222779e-06, "loss": 0.0973, "step": 44421 }, { "epoch": 0.7923161987657403, "grad_norm": 0.3015062212944031, "learning_rate": 6.285802226943235e-06, "loss": 0.1296, "step": 44422 }, { "epoch": 0.792334034887454, "grad_norm": 0.26641401648521423, "learning_rate": 6.284770208206389e-06, "loss": 0.1037, "step": 44423 }, { "epoch": 0.7923518710091678, "grad_norm": 0.2790592610836029, "learning_rate": 6.283738262016234e-06, "loss": 0.1166, "step": 44424 }, { "epoch": 0.7923697071308815, "grad_norm": 0.3925189971923828, "learning_rate": 6.282706388376769e-06, "loss": 0.1631, "step": 44425 }, { "epoch": 0.7923875432525952, "grad_norm": 0.30460017919540405, "learning_rate": 6.281674587292e-06, "loss": 0.1136, "step": 44426 }, { "epoch": 0.7924053793743089, "grad_norm": 0.26576054096221924, "learning_rate": 6.280642858765929e-06, "loss": 0.1147, "step": 44427 }, { "epoch": 0.7924232154960226, "grad_norm": 0.31471821665763855, "learning_rate": 6.279611202802554e-06, "loss": 0.1233, "step": 44428 }, { "epoch": 0.7924410516177363, "grad_norm": 0.22131535410881042, "learning_rate": 6.278579619405864e-06, "loss": 0.1139, "step": 44429 }, { "epoch": 0.79245888773945, "grad_norm": 0.25198453664779663, "learning_rate": 6.277548108579873e-06, "loss": 0.1384, "step": 44430 }, { "epoch": 0.7924767238611636, "grad_norm": 0.3263704776763916, "learning_rate": 6.276516670328572e-06, "loss": 0.1599, "step": 44431 }, { "epoch": 0.7924945599828773, "grad_norm": 0.3255254030227661, "learning_rate": 6.275485304655959e-06, "loss": 0.1315, "step": 44432 }, { "epoch": 0.792512396104591, "grad_norm": 0.22428010404109955, "learning_rate": 6.274454011566033e-06, "loss": 0.0739, "step": 44433 }, { "epoch": 0.7925302322263047, "grad_norm": 0.29232409596443176, "learning_rate": 6.273422791062783e-06, "loss": 0.1154, "step": 44434 }, { "epoch": 0.7925480683480184, "grad_norm": 0.3024180233478546, "learning_rate": 6.272391643150222e-06, "loss": 0.1802, "step": 44435 }, { "epoch": 0.7925659044697321, "grad_norm": 0.24665972590446472, "learning_rate": 6.271360567832338e-06, "loss": 0.0968, "step": 44436 }, { "epoch": 0.7925837405914458, "grad_norm": 0.25616490840911865, "learning_rate": 6.270329565113131e-06, "loss": 0.1047, "step": 44437 }, { "epoch": 0.7926015767131595, "grad_norm": 0.2507772147655487, "learning_rate": 6.269298634996587e-06, "loss": 0.1619, "step": 44438 }, { "epoch": 0.7926194128348731, "grad_norm": 0.2172888219356537, "learning_rate": 6.26826777748672e-06, "loss": 0.1379, "step": 44439 }, { "epoch": 0.7926372489565868, "grad_norm": 0.2531301975250244, "learning_rate": 6.267236992587505e-06, "loss": 0.1099, "step": 44440 }, { "epoch": 0.7926550850783006, "grad_norm": 0.22529707849025726, "learning_rate": 6.266206280302961e-06, "loss": 0.092, "step": 44441 }, { "epoch": 0.7926729212000143, "grad_norm": 0.3378083109855652, "learning_rate": 6.265175640637064e-06, "loss": 0.1823, "step": 44442 }, { "epoch": 0.792690757321728, "grad_norm": 0.3259870707988739, "learning_rate": 6.264145073593822e-06, "loss": 0.156, "step": 44443 }, { "epoch": 0.7927085934434417, "grad_norm": 0.3068363666534424, "learning_rate": 6.263114579177229e-06, "loss": 0.0946, "step": 44444 }, { "epoch": 0.7927264295651554, "grad_norm": 0.2478625476360321, "learning_rate": 6.2620841573912705e-06, "loss": 0.1094, "step": 44445 }, { "epoch": 0.7927442656868691, "grad_norm": 0.2599877715110779, "learning_rate": 6.26105380823995e-06, "loss": 0.1056, "step": 44446 }, { "epoch": 0.7927621018085828, "grad_norm": 0.23376449942588806, "learning_rate": 6.2600235317272465e-06, "loss": 0.0785, "step": 44447 }, { "epoch": 0.7927799379302964, "grad_norm": 0.35235193371772766, "learning_rate": 6.258993327857174e-06, "loss": 0.1466, "step": 44448 }, { "epoch": 0.7927977740520101, "grad_norm": 0.25425320863723755, "learning_rate": 6.257963196633715e-06, "loss": 0.0804, "step": 44449 }, { "epoch": 0.7928156101737238, "grad_norm": 0.21206626296043396, "learning_rate": 6.2569331380608635e-06, "loss": 0.141, "step": 44450 }, { "epoch": 0.7928334462954375, "grad_norm": 0.32689717411994934, "learning_rate": 6.255903152142609e-06, "loss": 0.1756, "step": 44451 }, { "epoch": 0.7928512824171512, "grad_norm": 0.339819997549057, "learning_rate": 6.254873238882952e-06, "loss": 0.0814, "step": 44452 }, { "epoch": 0.7928691185388649, "grad_norm": 0.2103695273399353, "learning_rate": 6.253843398285875e-06, "loss": 0.1139, "step": 44453 }, { "epoch": 0.7928869546605786, "grad_norm": 0.22478380799293518, "learning_rate": 6.2528136303553805e-06, "loss": 0.0908, "step": 44454 }, { "epoch": 0.7929047907822923, "grad_norm": 0.2647428512573242, "learning_rate": 6.251783935095449e-06, "loss": 0.1358, "step": 44455 }, { "epoch": 0.792922626904006, "grad_norm": 0.3228669762611389, "learning_rate": 6.250754312510087e-06, "loss": 0.144, "step": 44456 }, { "epoch": 0.7929404630257196, "grad_norm": 0.284750759601593, "learning_rate": 6.249724762603276e-06, "loss": 0.1203, "step": 44457 }, { "epoch": 0.7929582991474334, "grad_norm": 0.31602364778518677, "learning_rate": 6.248695285379008e-06, "loss": 0.1117, "step": 44458 }, { "epoch": 0.7929761352691471, "grad_norm": 0.3020690381526947, "learning_rate": 6.247665880841275e-06, "loss": 0.172, "step": 44459 }, { "epoch": 0.7929939713908608, "grad_norm": 0.25382643938064575, "learning_rate": 6.246636548994056e-06, "loss": 0.132, "step": 44460 }, { "epoch": 0.7930118075125745, "grad_norm": 0.281044065952301, "learning_rate": 6.24560728984136e-06, "loss": 0.0716, "step": 44461 }, { "epoch": 0.7930296436342882, "grad_norm": 0.2512074112892151, "learning_rate": 6.244578103387169e-06, "loss": 0.1412, "step": 44462 }, { "epoch": 0.7930474797560019, "grad_norm": 0.24529364705085754, "learning_rate": 6.24354898963547e-06, "loss": 0.1639, "step": 44463 }, { "epoch": 0.7930653158777156, "grad_norm": 0.26589420437812805, "learning_rate": 6.242519948590245e-06, "loss": 0.0737, "step": 44464 }, { "epoch": 0.7930831519994292, "grad_norm": 0.26851651072502136, "learning_rate": 6.241490980255499e-06, "loss": 0.1284, "step": 44465 }, { "epoch": 0.7931009881211429, "grad_norm": 0.3239595890045166, "learning_rate": 6.240462084635213e-06, "loss": 0.1128, "step": 44466 }, { "epoch": 0.7931188242428566, "grad_norm": 0.23602654039859772, "learning_rate": 6.239433261733368e-06, "loss": 0.1119, "step": 44467 }, { "epoch": 0.7931366603645703, "grad_norm": 0.277048259973526, "learning_rate": 6.2384045115539666e-06, "loss": 0.0961, "step": 44468 }, { "epoch": 0.793154496486284, "grad_norm": 0.3234660029411316, "learning_rate": 6.237375834100983e-06, "loss": 0.1143, "step": 44469 }, { "epoch": 0.7931723326079977, "grad_norm": 0.22434797883033752, "learning_rate": 6.236347229378417e-06, "loss": 0.1028, "step": 44470 }, { "epoch": 0.7931901687297114, "grad_norm": 0.27867910265922546, "learning_rate": 6.2353186973902524e-06, "loss": 0.0652, "step": 44471 }, { "epoch": 0.7932080048514251, "grad_norm": 0.2899375259876251, "learning_rate": 6.234290238140472e-06, "loss": 0.0899, "step": 44472 }, { "epoch": 0.7932258409731388, "grad_norm": 0.20784126222133636, "learning_rate": 6.2332618516330545e-06, "loss": 0.14, "step": 44473 }, { "epoch": 0.7932436770948526, "grad_norm": 0.26721426844596863, "learning_rate": 6.232233537872006e-06, "loss": 0.0977, "step": 44474 }, { "epoch": 0.7932615132165662, "grad_norm": 0.27338501811027527, "learning_rate": 6.231205296861303e-06, "loss": 0.0918, "step": 44475 }, { "epoch": 0.7932793493382799, "grad_norm": 0.45138785243034363, "learning_rate": 6.230177128604927e-06, "loss": 0.1527, "step": 44476 }, { "epoch": 0.7932971854599936, "grad_norm": 0.4324415922164917, "learning_rate": 6.229149033106866e-06, "loss": 0.1662, "step": 44477 }, { "epoch": 0.7933150215817073, "grad_norm": 0.28207719326019287, "learning_rate": 6.22812101037111e-06, "loss": 0.1932, "step": 44478 }, { "epoch": 0.793332857703421, "grad_norm": 0.2585281431674957, "learning_rate": 6.22709306040164e-06, "loss": 0.1057, "step": 44479 }, { "epoch": 0.7933506938251347, "grad_norm": 0.34037742018699646, "learning_rate": 6.226065183202437e-06, "loss": 0.0905, "step": 44480 }, { "epoch": 0.7933685299468484, "grad_norm": 0.25283282995224, "learning_rate": 6.225037378777493e-06, "loss": 0.1054, "step": 44481 }, { "epoch": 0.793386366068562, "grad_norm": 0.18677212297916412, "learning_rate": 6.224009647130785e-06, "loss": 0.0813, "step": 44482 }, { "epoch": 0.7934042021902757, "grad_norm": 0.2476346343755722, "learning_rate": 6.2229819882663045e-06, "loss": 0.1496, "step": 44483 }, { "epoch": 0.7934220383119894, "grad_norm": 0.2059735655784607, "learning_rate": 6.221954402188035e-06, "loss": 0.1264, "step": 44484 }, { "epoch": 0.7934398744337031, "grad_norm": 0.429033100605011, "learning_rate": 6.220926888899953e-06, "loss": 0.1534, "step": 44485 }, { "epoch": 0.7934577105554168, "grad_norm": 0.2717283368110657, "learning_rate": 6.219899448406039e-06, "loss": 0.1492, "step": 44486 }, { "epoch": 0.7934755466771305, "grad_norm": 0.26215073466300964, "learning_rate": 6.2188720807102865e-06, "loss": 0.1139, "step": 44487 }, { "epoch": 0.7934933827988442, "grad_norm": 0.26236769556999207, "learning_rate": 6.217844785816676e-06, "loss": 0.1237, "step": 44488 }, { "epoch": 0.7935112189205579, "grad_norm": 0.2535540461540222, "learning_rate": 6.2168175637291825e-06, "loss": 0.1169, "step": 44489 }, { "epoch": 0.7935290550422716, "grad_norm": 0.29390227794647217, "learning_rate": 6.215790414451786e-06, "loss": 0.1076, "step": 44490 }, { "epoch": 0.7935468911639854, "grad_norm": 0.2571776807308197, "learning_rate": 6.214763337988483e-06, "loss": 0.0798, "step": 44491 }, { "epoch": 0.793564727285699, "grad_norm": 0.25504249334335327, "learning_rate": 6.2137363343432425e-06, "loss": 0.1405, "step": 44492 }, { "epoch": 0.7935825634074127, "grad_norm": 0.27247005701065063, "learning_rate": 6.212709403520048e-06, "loss": 0.127, "step": 44493 }, { "epoch": 0.7936003995291264, "grad_norm": 0.23732003569602966, "learning_rate": 6.211682545522876e-06, "loss": 0.1384, "step": 44494 }, { "epoch": 0.7936182356508401, "grad_norm": 0.23826231062412262, "learning_rate": 6.210655760355718e-06, "loss": 0.0838, "step": 44495 }, { "epoch": 0.7936360717725538, "grad_norm": 0.2973790466785431, "learning_rate": 6.209629048022541e-06, "loss": 0.0851, "step": 44496 }, { "epoch": 0.7936539078942675, "grad_norm": 0.23042847216129303, "learning_rate": 6.208602408527339e-06, "loss": 0.091, "step": 44497 }, { "epoch": 0.7936717440159812, "grad_norm": 0.2569870054721832, "learning_rate": 6.207575841874083e-06, "loss": 0.1015, "step": 44498 }, { "epoch": 0.7936895801376949, "grad_norm": 0.27724137902259827, "learning_rate": 6.206549348066748e-06, "loss": 0.1023, "step": 44499 }, { "epoch": 0.7937074162594085, "grad_norm": 0.31265929341316223, "learning_rate": 6.205522927109325e-06, "loss": 0.141, "step": 44500 }, { "epoch": 0.7937252523811222, "grad_norm": 0.3048022985458374, "learning_rate": 6.204496579005789e-06, "loss": 0.1378, "step": 44501 }, { "epoch": 0.7937430885028359, "grad_norm": 0.25097495317459106, "learning_rate": 6.203470303760114e-06, "loss": 0.0996, "step": 44502 }, { "epoch": 0.7937609246245496, "grad_norm": 0.2565118670463562, "learning_rate": 6.202444101376273e-06, "loss": 0.1173, "step": 44503 }, { "epoch": 0.7937787607462633, "grad_norm": 0.25220057368278503, "learning_rate": 6.201417971858259e-06, "loss": 0.1132, "step": 44504 }, { "epoch": 0.793796596867977, "grad_norm": 0.22475945949554443, "learning_rate": 6.200391915210041e-06, "loss": 0.0707, "step": 44505 }, { "epoch": 0.7938144329896907, "grad_norm": 0.24830904603004456, "learning_rate": 6.1993659314355965e-06, "loss": 0.0731, "step": 44506 }, { "epoch": 0.7938322691114044, "grad_norm": 0.23916874825954437, "learning_rate": 6.198340020538898e-06, "loss": 0.0759, "step": 44507 }, { "epoch": 0.7938501052331182, "grad_norm": 0.27155864238739014, "learning_rate": 6.197314182523933e-06, "loss": 0.0894, "step": 44508 }, { "epoch": 0.7938679413548319, "grad_norm": 0.2335289865732193, "learning_rate": 6.196288417394666e-06, "loss": 0.0996, "step": 44509 }, { "epoch": 0.7938857774765455, "grad_norm": 0.2389247715473175, "learning_rate": 6.195262725155085e-06, "loss": 0.0763, "step": 44510 }, { "epoch": 0.7939036135982592, "grad_norm": 0.22371631860733032, "learning_rate": 6.194237105809164e-06, "loss": 0.0809, "step": 44511 }, { "epoch": 0.7939214497199729, "grad_norm": 0.2821425497531891, "learning_rate": 6.193211559360864e-06, "loss": 0.1418, "step": 44512 }, { "epoch": 0.7939392858416866, "grad_norm": 0.291654109954834, "learning_rate": 6.192186085814183e-06, "loss": 0.1344, "step": 44513 }, { "epoch": 0.7939571219634003, "grad_norm": 0.21793454885482788, "learning_rate": 6.191160685173083e-06, "loss": 0.1059, "step": 44514 }, { "epoch": 0.793974958085114, "grad_norm": 0.38878750801086426, "learning_rate": 6.19013535744154e-06, "loss": 0.106, "step": 44515 }, { "epoch": 0.7939927942068277, "grad_norm": 0.2460489124059677, "learning_rate": 6.1891101026235215e-06, "loss": 0.0705, "step": 44516 }, { "epoch": 0.7940106303285414, "grad_norm": 0.27745917439460754, "learning_rate": 6.188084920723019e-06, "loss": 0.1146, "step": 44517 }, { "epoch": 0.794028466450255, "grad_norm": 0.2203586846590042, "learning_rate": 6.187059811743995e-06, "loss": 0.0859, "step": 44518 }, { "epoch": 0.7940463025719687, "grad_norm": 0.23983919620513916, "learning_rate": 6.186034775690424e-06, "loss": 0.0793, "step": 44519 }, { "epoch": 0.7940641386936824, "grad_norm": 0.3579029142856598, "learning_rate": 6.185009812566275e-06, "loss": 0.1266, "step": 44520 }, { "epoch": 0.7940819748153961, "grad_norm": 0.2203003615140915, "learning_rate": 6.183984922375533e-06, "loss": 0.1256, "step": 44521 }, { "epoch": 0.7940998109371098, "grad_norm": 0.2137032002210617, "learning_rate": 6.182960105122165e-06, "loss": 0.1141, "step": 44522 }, { "epoch": 0.7941176470588235, "grad_norm": 0.2582463026046753, "learning_rate": 6.181935360810134e-06, "loss": 0.1038, "step": 44523 }, { "epoch": 0.7941354831805372, "grad_norm": 0.21422302722930908, "learning_rate": 6.180910689443428e-06, "loss": 0.082, "step": 44524 }, { "epoch": 0.794153319302251, "grad_norm": 0.3135656416416168, "learning_rate": 6.179886091026008e-06, "loss": 0.1697, "step": 44525 }, { "epoch": 0.7941711554239647, "grad_norm": 0.20665223896503448, "learning_rate": 6.178861565561855e-06, "loss": 0.1129, "step": 44526 }, { "epoch": 0.7941889915456783, "grad_norm": 0.25325390696525574, "learning_rate": 6.177837113054935e-06, "loss": 0.0978, "step": 44527 }, { "epoch": 0.794206827667392, "grad_norm": 0.25414136052131653, "learning_rate": 6.176812733509219e-06, "loss": 0.1279, "step": 44528 }, { "epoch": 0.7942246637891057, "grad_norm": 0.2939840853214264, "learning_rate": 6.175788426928672e-06, "loss": 0.101, "step": 44529 }, { "epoch": 0.7942424999108194, "grad_norm": 0.23991821706295013, "learning_rate": 6.1747641933172754e-06, "loss": 0.1139, "step": 44530 }, { "epoch": 0.7942603360325331, "grad_norm": 0.370981901884079, "learning_rate": 6.173740032678999e-06, "loss": 0.1572, "step": 44531 }, { "epoch": 0.7942781721542468, "grad_norm": 0.28368422389030457, "learning_rate": 6.172715945017807e-06, "loss": 0.1206, "step": 44532 }, { "epoch": 0.7942960082759605, "grad_norm": 0.3193915784358978, "learning_rate": 6.171691930337661e-06, "loss": 0.0806, "step": 44533 }, { "epoch": 0.7943138443976742, "grad_norm": 0.2615368068218231, "learning_rate": 6.17066798864255e-06, "loss": 0.0811, "step": 44534 }, { "epoch": 0.7943316805193879, "grad_norm": 0.22125034034252167, "learning_rate": 6.169644119936432e-06, "loss": 0.0808, "step": 44535 }, { "epoch": 0.7943495166411015, "grad_norm": 0.3314410448074341, "learning_rate": 6.16862032422327e-06, "loss": 0.1334, "step": 44536 }, { "epoch": 0.7943673527628152, "grad_norm": 0.2585088908672333, "learning_rate": 6.167596601507047e-06, "loss": 0.085, "step": 44537 }, { "epoch": 0.7943851888845289, "grad_norm": 0.2760941982269287, "learning_rate": 6.1665729517917185e-06, "loss": 0.1452, "step": 44538 }, { "epoch": 0.7944030250062426, "grad_norm": 0.3180933892726898, "learning_rate": 6.165549375081264e-06, "loss": 0.0648, "step": 44539 }, { "epoch": 0.7944208611279563, "grad_norm": 0.22410927712917328, "learning_rate": 6.164525871379648e-06, "loss": 0.0976, "step": 44540 }, { "epoch": 0.79443869724967, "grad_norm": 0.3005995750427246, "learning_rate": 6.163502440690832e-06, "loss": 0.0832, "step": 44541 }, { "epoch": 0.7944565333713838, "grad_norm": 0.30551469326019287, "learning_rate": 6.16247908301878e-06, "loss": 0.0642, "step": 44542 }, { "epoch": 0.7944743694930975, "grad_norm": 0.2784944474697113, "learning_rate": 6.161455798367472e-06, "loss": 0.1351, "step": 44543 }, { "epoch": 0.7944922056148112, "grad_norm": 0.2333540916442871, "learning_rate": 6.160432586740869e-06, "loss": 0.1492, "step": 44544 }, { "epoch": 0.7945100417365248, "grad_norm": 0.3198825716972351, "learning_rate": 6.159409448142936e-06, "loss": 0.1047, "step": 44545 }, { "epoch": 0.7945278778582385, "grad_norm": 0.25376787781715393, "learning_rate": 6.1583863825776326e-06, "loss": 0.1181, "step": 44546 }, { "epoch": 0.7945457139799522, "grad_norm": 0.29596784710884094, "learning_rate": 6.1573633900489366e-06, "loss": 0.1085, "step": 44547 }, { "epoch": 0.7945635501016659, "grad_norm": 0.28149908781051636, "learning_rate": 6.156340470560809e-06, "loss": 0.2084, "step": 44548 }, { "epoch": 0.7945813862233796, "grad_norm": 0.3240255117416382, "learning_rate": 6.1553176241172155e-06, "loss": 0.1484, "step": 44549 }, { "epoch": 0.7945992223450933, "grad_norm": 0.2148492932319641, "learning_rate": 6.154294850722112e-06, "loss": 0.1056, "step": 44550 }, { "epoch": 0.794617058466807, "grad_norm": 0.2300100475549698, "learning_rate": 6.1532721503794775e-06, "loss": 0.0888, "step": 44551 }, { "epoch": 0.7946348945885207, "grad_norm": 0.3131921589374542, "learning_rate": 6.152249523093262e-06, "loss": 0.1333, "step": 44552 }, { "epoch": 0.7946527307102343, "grad_norm": 0.3293219208717346, "learning_rate": 6.151226968867443e-06, "loss": 0.1047, "step": 44553 }, { "epoch": 0.794670566831948, "grad_norm": 0.19201670587062836, "learning_rate": 6.150204487705982e-06, "loss": 0.0926, "step": 44554 }, { "epoch": 0.7946884029536617, "grad_norm": 0.29677143692970276, "learning_rate": 6.149182079612828e-06, "loss": 0.0903, "step": 44555 }, { "epoch": 0.7947062390753754, "grad_norm": 0.2133985310792923, "learning_rate": 6.148159744591966e-06, "loss": 0.081, "step": 44556 }, { "epoch": 0.7947240751970891, "grad_norm": 0.25235363841056824, "learning_rate": 6.147137482647344e-06, "loss": 0.1127, "step": 44557 }, { "epoch": 0.7947419113188028, "grad_norm": 0.31030669808387756, "learning_rate": 6.1461152937829335e-06, "loss": 0.1009, "step": 44558 }, { "epoch": 0.7947597474405166, "grad_norm": 0.42810261249542236, "learning_rate": 6.145093178002681e-06, "loss": 0.1793, "step": 44559 }, { "epoch": 0.7947775835622303, "grad_norm": 0.32217907905578613, "learning_rate": 6.1440711353105665e-06, "loss": 0.1543, "step": 44560 }, { "epoch": 0.794795419683944, "grad_norm": 0.28193140029907227, "learning_rate": 6.143049165710549e-06, "loss": 0.1, "step": 44561 }, { "epoch": 0.7948132558056576, "grad_norm": 0.29300662875175476, "learning_rate": 6.142027269206582e-06, "loss": 0.1146, "step": 44562 }, { "epoch": 0.7948310919273713, "grad_norm": 0.24098087847232819, "learning_rate": 6.141005445802625e-06, "loss": 0.1265, "step": 44563 }, { "epoch": 0.794848928049085, "grad_norm": 0.2366962730884552, "learning_rate": 6.139983695502652e-06, "loss": 0.1291, "step": 44564 }, { "epoch": 0.7948667641707987, "grad_norm": 0.2564215064048767, "learning_rate": 6.13896201831061e-06, "loss": 0.1208, "step": 44565 }, { "epoch": 0.7948846002925124, "grad_norm": 0.41292035579681396, "learning_rate": 6.13794041423047e-06, "loss": 0.15, "step": 44566 }, { "epoch": 0.7949024364142261, "grad_norm": 0.29780304431915283, "learning_rate": 6.136918883266188e-06, "loss": 0.1376, "step": 44567 }, { "epoch": 0.7949202725359398, "grad_norm": 0.24170997738838196, "learning_rate": 6.1358974254217185e-06, "loss": 0.0818, "step": 44568 }, { "epoch": 0.7949381086576535, "grad_norm": 0.23589842021465302, "learning_rate": 6.134876040701032e-06, "loss": 0.0819, "step": 44569 }, { "epoch": 0.7949559447793672, "grad_norm": 0.25793835520744324, "learning_rate": 6.1338547291080825e-06, "loss": 0.1173, "step": 44570 }, { "epoch": 0.7949737809010808, "grad_norm": 0.3261379599571228, "learning_rate": 6.1328334906468275e-06, "loss": 0.1239, "step": 44571 }, { "epoch": 0.7949916170227945, "grad_norm": 0.3002544939517975, "learning_rate": 6.131812325321218e-06, "loss": 0.0956, "step": 44572 }, { "epoch": 0.7950094531445082, "grad_norm": 0.30837634205818176, "learning_rate": 6.13079123313523e-06, "loss": 0.093, "step": 44573 }, { "epoch": 0.7950272892662219, "grad_norm": 0.24205940961837769, "learning_rate": 6.129770214092812e-06, "loss": 0.1495, "step": 44574 }, { "epoch": 0.7950451253879357, "grad_norm": 0.3052802085876465, "learning_rate": 6.128749268197923e-06, "loss": 0.1284, "step": 44575 }, { "epoch": 0.7950629615096494, "grad_norm": 0.22689349949359894, "learning_rate": 6.12772839545451e-06, "loss": 0.1016, "step": 44576 }, { "epoch": 0.7950807976313631, "grad_norm": 0.20815925300121307, "learning_rate": 6.1267075958665484e-06, "loss": 0.0928, "step": 44577 }, { "epoch": 0.7950986337530768, "grad_norm": 0.23941612243652344, "learning_rate": 6.125686869437988e-06, "loss": 0.0704, "step": 44578 }, { "epoch": 0.7951164698747905, "grad_norm": 0.2548205852508545, "learning_rate": 6.1246662161727755e-06, "loss": 0.1272, "step": 44579 }, { "epoch": 0.7951343059965041, "grad_norm": 0.2453823983669281, "learning_rate": 6.1236456360748846e-06, "loss": 0.0806, "step": 44580 }, { "epoch": 0.7951521421182178, "grad_norm": 0.3220204710960388, "learning_rate": 6.122625129148254e-06, "loss": 0.1581, "step": 44581 }, { "epoch": 0.7951699782399315, "grad_norm": 0.3273429274559021, "learning_rate": 6.121604695396857e-06, "loss": 0.0941, "step": 44582 }, { "epoch": 0.7951878143616452, "grad_norm": 0.248832568526268, "learning_rate": 6.120584334824642e-06, "loss": 0.0775, "step": 44583 }, { "epoch": 0.7952056504833589, "grad_norm": 0.21122051775455475, "learning_rate": 6.119564047435561e-06, "loss": 0.0798, "step": 44584 }, { "epoch": 0.7952234866050726, "grad_norm": 0.28824931383132935, "learning_rate": 6.118543833233562e-06, "loss": 0.1304, "step": 44585 }, { "epoch": 0.7952413227267863, "grad_norm": 0.22789673507213593, "learning_rate": 6.117523692222618e-06, "loss": 0.1034, "step": 44586 }, { "epoch": 0.7952591588485, "grad_norm": 0.3981494605541229, "learning_rate": 6.116503624406675e-06, "loss": 0.1692, "step": 44587 }, { "epoch": 0.7952769949702136, "grad_norm": 0.3661998212337494, "learning_rate": 6.115483629789684e-06, "loss": 0.1225, "step": 44588 }, { "epoch": 0.7952948310919273, "grad_norm": 0.26195618510246277, "learning_rate": 6.114463708375595e-06, "loss": 0.0743, "step": 44589 }, { "epoch": 0.795312667213641, "grad_norm": 0.2997496724128723, "learning_rate": 6.1134438601683716e-06, "loss": 0.1042, "step": 44590 }, { "epoch": 0.7953305033353547, "grad_norm": 0.3216645419597626, "learning_rate": 6.1124240851719665e-06, "loss": 0.1218, "step": 44591 }, { "epoch": 0.7953483394570685, "grad_norm": 0.35883399844169617, "learning_rate": 6.11140438339032e-06, "loss": 0.1306, "step": 44592 }, { "epoch": 0.7953661755787822, "grad_norm": 0.29565638303756714, "learning_rate": 6.110384754827403e-06, "loss": 0.0907, "step": 44593 }, { "epoch": 0.7953840117004959, "grad_norm": 0.2854943573474884, "learning_rate": 6.109365199487152e-06, "loss": 0.1296, "step": 44594 }, { "epoch": 0.7954018478222096, "grad_norm": 0.3640003204345703, "learning_rate": 6.1083457173735315e-06, "loss": 0.0883, "step": 44595 }, { "epoch": 0.7954196839439233, "grad_norm": 0.2673422396183014, "learning_rate": 6.107326308490488e-06, "loss": 0.1095, "step": 44596 }, { "epoch": 0.795437520065637, "grad_norm": 0.27546003460884094, "learning_rate": 6.106306972841974e-06, "loss": 0.1191, "step": 44597 }, { "epoch": 0.7954553561873506, "grad_norm": 0.24141313135623932, "learning_rate": 6.105287710431934e-06, "loss": 0.0908, "step": 44598 }, { "epoch": 0.7954731923090643, "grad_norm": 0.29652106761932373, "learning_rate": 6.104268521264331e-06, "loss": 0.1171, "step": 44599 }, { "epoch": 0.795491028430778, "grad_norm": 0.27894893288612366, "learning_rate": 6.103249405343109e-06, "loss": 0.0891, "step": 44600 }, { "epoch": 0.7955088645524917, "grad_norm": 0.24989303946495056, "learning_rate": 6.102230362672218e-06, "loss": 0.1147, "step": 44601 }, { "epoch": 0.7955267006742054, "grad_norm": 0.32122403383255005, "learning_rate": 6.101211393255602e-06, "loss": 0.0934, "step": 44602 }, { "epoch": 0.7955445367959191, "grad_norm": 0.25291064381599426, "learning_rate": 6.1001924970972265e-06, "loss": 0.0841, "step": 44603 }, { "epoch": 0.7955623729176328, "grad_norm": 0.2650677561759949, "learning_rate": 6.099173674201034e-06, "loss": 0.1391, "step": 44604 }, { "epoch": 0.7955802090393465, "grad_norm": 0.25109490752220154, "learning_rate": 6.09815492457097e-06, "loss": 0.0925, "step": 44605 }, { "epoch": 0.7955980451610601, "grad_norm": 0.3072631359100342, "learning_rate": 6.0971362482109805e-06, "loss": 0.1582, "step": 44606 }, { "epoch": 0.7956158812827738, "grad_norm": 0.24367989599704742, "learning_rate": 6.096117645125018e-06, "loss": 0.0806, "step": 44607 }, { "epoch": 0.7956337174044875, "grad_norm": 0.28080081939697266, "learning_rate": 6.095099115317041e-06, "loss": 0.1297, "step": 44608 }, { "epoch": 0.7956515535262013, "grad_norm": 0.2517644762992859, "learning_rate": 6.0940806587909904e-06, "loss": 0.1446, "step": 44609 }, { "epoch": 0.795669389647915, "grad_norm": 0.3113599717617035, "learning_rate": 6.093062275550812e-06, "loss": 0.138, "step": 44610 }, { "epoch": 0.7956872257696287, "grad_norm": 0.20456230640411377, "learning_rate": 6.092043965600447e-06, "loss": 0.0884, "step": 44611 }, { "epoch": 0.7957050618913424, "grad_norm": 0.4639633595943451, "learning_rate": 6.091025728943858e-06, "loss": 0.1647, "step": 44612 }, { "epoch": 0.7957228980130561, "grad_norm": 0.21989652514457703, "learning_rate": 6.090007565584982e-06, "loss": 0.1022, "step": 44613 }, { "epoch": 0.7957407341347698, "grad_norm": 0.35461410880088806, "learning_rate": 6.088989475527771e-06, "loss": 0.1562, "step": 44614 }, { "epoch": 0.7957585702564834, "grad_norm": 0.28047627210617065, "learning_rate": 6.08797145877616e-06, "loss": 0.1617, "step": 44615 }, { "epoch": 0.7957764063781971, "grad_norm": 0.24063174426555634, "learning_rate": 6.086953515334109e-06, "loss": 0.1073, "step": 44616 }, { "epoch": 0.7957942424999108, "grad_norm": 0.38832253217697144, "learning_rate": 6.085935645205562e-06, "loss": 0.0877, "step": 44617 }, { "epoch": 0.7958120786216245, "grad_norm": 0.30571722984313965, "learning_rate": 6.0849178483944585e-06, "loss": 0.0976, "step": 44618 }, { "epoch": 0.7958299147433382, "grad_norm": 0.27339184284210205, "learning_rate": 6.0839001249047425e-06, "loss": 0.1694, "step": 44619 }, { "epoch": 0.7958477508650519, "grad_norm": 0.2583093047142029, "learning_rate": 6.082882474740367e-06, "loss": 0.0939, "step": 44620 }, { "epoch": 0.7958655869867656, "grad_norm": 0.2858339846134186, "learning_rate": 6.0818648979052675e-06, "loss": 0.1473, "step": 44621 }, { "epoch": 0.7958834231084793, "grad_norm": 0.309569776058197, "learning_rate": 6.0808473944033995e-06, "loss": 0.1099, "step": 44622 }, { "epoch": 0.795901259230193, "grad_norm": 0.28179314732551575, "learning_rate": 6.079829964238704e-06, "loss": 0.096, "step": 44623 }, { "epoch": 0.7959190953519066, "grad_norm": 0.2213532030582428, "learning_rate": 6.07881260741511e-06, "loss": 0.1289, "step": 44624 }, { "epoch": 0.7959369314736203, "grad_norm": 0.2249257117509842, "learning_rate": 6.077795323936586e-06, "loss": 0.0726, "step": 44625 }, { "epoch": 0.7959547675953341, "grad_norm": 0.37136173248291016, "learning_rate": 6.07677811380706e-06, "loss": 0.0576, "step": 44626 }, { "epoch": 0.7959726037170478, "grad_norm": 0.21767722070217133, "learning_rate": 6.0757609770304785e-06, "loss": 0.0667, "step": 44627 }, { "epoch": 0.7959904398387615, "grad_norm": 0.3218723237514496, "learning_rate": 6.0747439136107785e-06, "loss": 0.1565, "step": 44628 }, { "epoch": 0.7960082759604752, "grad_norm": 0.23924113810062408, "learning_rate": 6.073726923551912e-06, "loss": 0.1174, "step": 44629 }, { "epoch": 0.7960261120821889, "grad_norm": 0.3085210919380188, "learning_rate": 6.072710006857818e-06, "loss": 0.0882, "step": 44630 }, { "epoch": 0.7960439482039026, "grad_norm": 0.3239879906177521, "learning_rate": 6.071693163532438e-06, "loss": 0.1402, "step": 44631 }, { "epoch": 0.7960617843256163, "grad_norm": 0.35857465863227844, "learning_rate": 6.070676393579711e-06, "loss": 0.19, "step": 44632 }, { "epoch": 0.7960796204473299, "grad_norm": 0.2874899208545685, "learning_rate": 6.069659697003574e-06, "loss": 0.1096, "step": 44633 }, { "epoch": 0.7960974565690436, "grad_norm": 0.32893192768096924, "learning_rate": 6.068643073807975e-06, "loss": 0.1301, "step": 44634 }, { "epoch": 0.7961152926907573, "grad_norm": 0.25399476289749146, "learning_rate": 6.067626523996861e-06, "loss": 0.1149, "step": 44635 }, { "epoch": 0.796133128812471, "grad_norm": 0.25364476442337036, "learning_rate": 6.066610047574164e-06, "loss": 0.0856, "step": 44636 }, { "epoch": 0.7961509649341847, "grad_norm": 0.2583354711532593, "learning_rate": 6.065593644543821e-06, "loss": 0.0986, "step": 44637 }, { "epoch": 0.7961688010558984, "grad_norm": 0.30223697423934937, "learning_rate": 6.064577314909784e-06, "loss": 0.1833, "step": 44638 }, { "epoch": 0.7961866371776121, "grad_norm": 0.39697685837745667, "learning_rate": 6.063561058675985e-06, "loss": 0.1055, "step": 44639 }, { "epoch": 0.7962044732993258, "grad_norm": 0.24909250438213348, "learning_rate": 6.062544875846362e-06, "loss": 0.1085, "step": 44640 }, { "epoch": 0.7962223094210394, "grad_norm": 0.2788204848766327, "learning_rate": 6.061528766424851e-06, "loss": 0.0938, "step": 44641 }, { "epoch": 0.7962401455427531, "grad_norm": 0.23912757635116577, "learning_rate": 6.060512730415402e-06, "loss": 0.0762, "step": 44642 }, { "epoch": 0.7962579816644669, "grad_norm": 0.2298707365989685, "learning_rate": 6.0594967678219455e-06, "loss": 0.1152, "step": 44643 }, { "epoch": 0.7962758177861806, "grad_norm": 0.2764599323272705, "learning_rate": 6.058480878648423e-06, "loss": 0.1827, "step": 44644 }, { "epoch": 0.7962936539078943, "grad_norm": 0.22942933440208435, "learning_rate": 6.05746506289877e-06, "loss": 0.0749, "step": 44645 }, { "epoch": 0.796311490029608, "grad_norm": 0.28335922956466675, "learning_rate": 6.05644932057692e-06, "loss": 0.128, "step": 44646 }, { "epoch": 0.7963293261513217, "grad_norm": 0.3464876115322113, "learning_rate": 6.055433651686821e-06, "loss": 0.1522, "step": 44647 }, { "epoch": 0.7963471622730354, "grad_norm": 0.24680723249912262, "learning_rate": 6.054418056232397e-06, "loss": 0.1112, "step": 44648 }, { "epoch": 0.7963649983947491, "grad_norm": 0.31065842509269714, "learning_rate": 6.053402534217601e-06, "loss": 0.1648, "step": 44649 }, { "epoch": 0.7963828345164627, "grad_norm": 0.3023083806037903, "learning_rate": 6.052387085646349e-06, "loss": 0.1195, "step": 44650 }, { "epoch": 0.7964006706381764, "grad_norm": 0.2382739782333374, "learning_rate": 6.051371710522599e-06, "loss": 0.1388, "step": 44651 }, { "epoch": 0.7964185067598901, "grad_norm": 0.2436676025390625, "learning_rate": 6.050356408850277e-06, "loss": 0.1158, "step": 44652 }, { "epoch": 0.7964363428816038, "grad_norm": 0.30252712965011597, "learning_rate": 6.0493411806333195e-06, "loss": 0.178, "step": 44653 }, { "epoch": 0.7964541790033175, "grad_norm": 0.27006739377975464, "learning_rate": 6.048326025875653e-06, "loss": 0.1361, "step": 44654 }, { "epoch": 0.7964720151250312, "grad_norm": 0.2818959057331085, "learning_rate": 6.047310944581228e-06, "loss": 0.1102, "step": 44655 }, { "epoch": 0.7964898512467449, "grad_norm": 0.2770169973373413, "learning_rate": 6.046295936753971e-06, "loss": 0.1406, "step": 44656 }, { "epoch": 0.7965076873684586, "grad_norm": 0.422354519367218, "learning_rate": 6.045281002397818e-06, "loss": 0.1102, "step": 44657 }, { "epoch": 0.7965255234901722, "grad_norm": 0.30090510845184326, "learning_rate": 6.0442661415167026e-06, "loss": 0.1127, "step": 44658 }, { "epoch": 0.7965433596118859, "grad_norm": 0.20314498245716095, "learning_rate": 6.043251354114552e-06, "loss": 0.1054, "step": 44659 }, { "epoch": 0.7965611957335997, "grad_norm": 0.32014185190200806, "learning_rate": 6.042236640195312e-06, "loss": 0.1379, "step": 44660 }, { "epoch": 0.7965790318553134, "grad_norm": 0.3027096092700958, "learning_rate": 6.041221999762911e-06, "loss": 0.1125, "step": 44661 }, { "epoch": 0.7965968679770271, "grad_norm": 0.2541898787021637, "learning_rate": 6.0402074328212764e-06, "loss": 0.0664, "step": 44662 }, { "epoch": 0.7966147040987408, "grad_norm": 0.2724062204360962, "learning_rate": 6.039192939374344e-06, "loss": 0.0824, "step": 44663 }, { "epoch": 0.7966325402204545, "grad_norm": 0.2679769992828369, "learning_rate": 6.038178519426057e-06, "loss": 0.0988, "step": 44664 }, { "epoch": 0.7966503763421682, "grad_norm": 0.23171375691890717, "learning_rate": 6.0371641729803395e-06, "loss": 0.1044, "step": 44665 }, { "epoch": 0.7966682124638819, "grad_norm": 0.2951165735721588, "learning_rate": 6.036149900041124e-06, "loss": 0.0964, "step": 44666 }, { "epoch": 0.7966860485855956, "grad_norm": 0.28471213579177856, "learning_rate": 6.035135700612332e-06, "loss": 0.1364, "step": 44667 }, { "epoch": 0.7967038847073092, "grad_norm": 0.34432467818260193, "learning_rate": 6.034121574697912e-06, "loss": 0.1306, "step": 44668 }, { "epoch": 0.7967217208290229, "grad_norm": 0.24009296298027039, "learning_rate": 6.03310752230179e-06, "loss": 0.1078, "step": 44669 }, { "epoch": 0.7967395569507366, "grad_norm": 0.2597629129886627, "learning_rate": 6.032093543427892e-06, "loss": 0.0942, "step": 44670 }, { "epoch": 0.7967573930724503, "grad_norm": 0.23199214041233063, "learning_rate": 6.03107963808015e-06, "loss": 0.1127, "step": 44671 }, { "epoch": 0.796775229194164, "grad_norm": 0.2756112515926361, "learning_rate": 6.030065806262489e-06, "loss": 0.1017, "step": 44672 }, { "epoch": 0.7967930653158777, "grad_norm": 0.26526373624801636, "learning_rate": 6.0290520479788506e-06, "loss": 0.1064, "step": 44673 }, { "epoch": 0.7968109014375914, "grad_norm": 0.29982566833496094, "learning_rate": 6.02803836323316e-06, "loss": 0.1343, "step": 44674 }, { "epoch": 0.7968287375593051, "grad_norm": 0.2448246330022812, "learning_rate": 6.02702475202934e-06, "loss": 0.0778, "step": 44675 }, { "epoch": 0.7968465736810187, "grad_norm": 0.2666827440261841, "learning_rate": 6.02601121437133e-06, "loss": 0.0803, "step": 44676 }, { "epoch": 0.7968644098027325, "grad_norm": 0.2296871840953827, "learning_rate": 6.024997750263045e-06, "loss": 0.1407, "step": 44677 }, { "epoch": 0.7968822459244462, "grad_norm": 0.2853383719921112, "learning_rate": 6.023984359708432e-06, "loss": 0.1397, "step": 44678 }, { "epoch": 0.7969000820461599, "grad_norm": 0.25889334082603455, "learning_rate": 6.0229710427114115e-06, "loss": 0.127, "step": 44679 }, { "epoch": 0.7969179181678736, "grad_norm": 0.22680220007896423, "learning_rate": 6.021957799275898e-06, "loss": 0.1438, "step": 44680 }, { "epoch": 0.7969357542895873, "grad_norm": 0.31417372822761536, "learning_rate": 6.020944629405842e-06, "loss": 0.1408, "step": 44681 }, { "epoch": 0.796953590411301, "grad_norm": 0.25277039408683777, "learning_rate": 6.019931533105158e-06, "loss": 0.0941, "step": 44682 }, { "epoch": 0.7969714265330147, "grad_norm": 0.35460928082466125, "learning_rate": 6.018918510377777e-06, "loss": 0.1429, "step": 44683 }, { "epoch": 0.7969892626547284, "grad_norm": 0.28174686431884766, "learning_rate": 6.017905561227621e-06, "loss": 0.1385, "step": 44684 }, { "epoch": 0.797007098776442, "grad_norm": 0.24566136300563812, "learning_rate": 6.0168926856586126e-06, "loss": 0.0646, "step": 44685 }, { "epoch": 0.7970249348981557, "grad_norm": 0.27640655636787415, "learning_rate": 6.015879883674694e-06, "loss": 0.1158, "step": 44686 }, { "epoch": 0.7970427710198694, "grad_norm": 0.4684193730354309, "learning_rate": 6.014867155279779e-06, "loss": 0.1207, "step": 44687 }, { "epoch": 0.7970606071415831, "grad_norm": 0.3309164047241211, "learning_rate": 6.0138545004777976e-06, "loss": 0.1212, "step": 44688 }, { "epoch": 0.7970784432632968, "grad_norm": 0.21871930360794067, "learning_rate": 6.012841919272666e-06, "loss": 0.123, "step": 44689 }, { "epoch": 0.7970962793850105, "grad_norm": 0.37281590700149536, "learning_rate": 6.011829411668318e-06, "loss": 0.1214, "step": 44690 }, { "epoch": 0.7971141155067242, "grad_norm": 0.4736943542957306, "learning_rate": 6.010816977668684e-06, "loss": 0.1526, "step": 44691 }, { "epoch": 0.7971319516284379, "grad_norm": 0.22863249480724335, "learning_rate": 6.009804617277681e-06, "loss": 0.0725, "step": 44692 }, { "epoch": 0.7971497877501517, "grad_norm": 0.19314421713352203, "learning_rate": 6.00879233049923e-06, "loss": 0.072, "step": 44693 }, { "epoch": 0.7971676238718653, "grad_norm": 0.2543048858642578, "learning_rate": 6.007780117337264e-06, "loss": 0.1051, "step": 44694 }, { "epoch": 0.797185459993579, "grad_norm": 0.29834023118019104, "learning_rate": 6.0067679777957045e-06, "loss": 0.1117, "step": 44695 }, { "epoch": 0.7972032961152927, "grad_norm": 0.3886660039424896, "learning_rate": 6.005755911878469e-06, "loss": 0.1145, "step": 44696 }, { "epoch": 0.7972211322370064, "grad_norm": 0.31640034914016724, "learning_rate": 6.004743919589487e-06, "loss": 0.191, "step": 44697 }, { "epoch": 0.7972389683587201, "grad_norm": 0.3111501634120941, "learning_rate": 6.003732000932669e-06, "loss": 0.1042, "step": 44698 }, { "epoch": 0.7972568044804338, "grad_norm": 0.2425641417503357, "learning_rate": 6.002720155911956e-06, "loss": 0.1194, "step": 44699 }, { "epoch": 0.7972746406021475, "grad_norm": 0.26602521538734436, "learning_rate": 6.00170838453126e-06, "loss": 0.1091, "step": 44700 }, { "epoch": 0.7972924767238612, "grad_norm": 0.28066277503967285, "learning_rate": 6.000696686794502e-06, "loss": 0.1315, "step": 44701 }, { "epoch": 0.7973103128455749, "grad_norm": 0.2511526942253113, "learning_rate": 5.999685062705601e-06, "loss": 0.0851, "step": 44702 }, { "epoch": 0.7973281489672885, "grad_norm": 0.18181085586547852, "learning_rate": 5.99867351226849e-06, "loss": 0.098, "step": 44703 }, { "epoch": 0.7973459850890022, "grad_norm": 0.24556975066661835, "learning_rate": 5.997662035487075e-06, "loss": 0.1115, "step": 44704 }, { "epoch": 0.7973638212107159, "grad_norm": 0.3535599708557129, "learning_rate": 5.996650632365294e-06, "loss": 0.1115, "step": 44705 }, { "epoch": 0.7973816573324296, "grad_norm": 0.22648532688617706, "learning_rate": 5.995639302907052e-06, "loss": 0.1191, "step": 44706 }, { "epoch": 0.7973994934541433, "grad_norm": 0.27012690901756287, "learning_rate": 5.994628047116282e-06, "loss": 0.0924, "step": 44707 }, { "epoch": 0.797417329575857, "grad_norm": 0.3251741826534271, "learning_rate": 5.993616864996898e-06, "loss": 0.1351, "step": 44708 }, { "epoch": 0.7974351656975707, "grad_norm": 0.33112552762031555, "learning_rate": 5.992605756552819e-06, "loss": 0.1522, "step": 44709 }, { "epoch": 0.7974530018192845, "grad_norm": 0.2304413616657257, "learning_rate": 5.991594721787966e-06, "loss": 0.1318, "step": 44710 }, { "epoch": 0.7974708379409982, "grad_norm": 0.2530154883861542, "learning_rate": 5.99058376070625e-06, "loss": 0.0619, "step": 44711 }, { "epoch": 0.7974886740627118, "grad_norm": 0.290822297334671, "learning_rate": 5.989572873311605e-06, "loss": 0.093, "step": 44712 }, { "epoch": 0.7975065101844255, "grad_norm": 0.27113252878189087, "learning_rate": 5.988562059607939e-06, "loss": 0.1283, "step": 44713 }, { "epoch": 0.7975243463061392, "grad_norm": 0.18801848590373993, "learning_rate": 5.987551319599174e-06, "loss": 0.0841, "step": 44714 }, { "epoch": 0.7975421824278529, "grad_norm": 0.3197059631347656, "learning_rate": 5.986540653289221e-06, "loss": 0.0869, "step": 44715 }, { "epoch": 0.7975600185495666, "grad_norm": 0.2285647839307785, "learning_rate": 5.98553006068201e-06, "loss": 0.0883, "step": 44716 }, { "epoch": 0.7975778546712803, "grad_norm": 0.26284387707710266, "learning_rate": 5.984519541781444e-06, "loss": 0.0914, "step": 44717 }, { "epoch": 0.797595690792994, "grad_norm": 0.2824154496192932, "learning_rate": 5.9835090965914555e-06, "loss": 0.1089, "step": 44718 }, { "epoch": 0.7976135269147077, "grad_norm": 0.2424059361219406, "learning_rate": 5.9824987251159455e-06, "loss": 0.1448, "step": 44719 }, { "epoch": 0.7976313630364213, "grad_norm": 0.26008403301239014, "learning_rate": 5.981488427358847e-06, "loss": 0.1072, "step": 44720 }, { "epoch": 0.797649199158135, "grad_norm": 0.32084017992019653, "learning_rate": 5.980478203324069e-06, "loss": 0.1134, "step": 44721 }, { "epoch": 0.7976670352798487, "grad_norm": 0.40345853567123413, "learning_rate": 5.9794680530155246e-06, "loss": 0.1691, "step": 44722 }, { "epoch": 0.7976848714015624, "grad_norm": 0.34984374046325684, "learning_rate": 5.978457976437132e-06, "loss": 0.1388, "step": 44723 }, { "epoch": 0.7977027075232761, "grad_norm": 0.2532453238964081, "learning_rate": 5.977447973592801e-06, "loss": 0.1023, "step": 44724 }, { "epoch": 0.7977205436449898, "grad_norm": 0.2549111545085907, "learning_rate": 5.976438044486457e-06, "loss": 0.1178, "step": 44725 }, { "epoch": 0.7977383797667035, "grad_norm": 0.29619550704956055, "learning_rate": 5.975428189122009e-06, "loss": 0.1309, "step": 44726 }, { "epoch": 0.7977562158884173, "grad_norm": 0.29465508460998535, "learning_rate": 5.974418407503374e-06, "loss": 0.1491, "step": 44727 }, { "epoch": 0.797774052010131, "grad_norm": 0.36146080493927, "learning_rate": 5.973408699634459e-06, "loss": 0.1108, "step": 44728 }, { "epoch": 0.7977918881318447, "grad_norm": 0.30643779039382935, "learning_rate": 5.972399065519188e-06, "loss": 0.195, "step": 44729 }, { "epoch": 0.7978097242535583, "grad_norm": 0.34295615553855896, "learning_rate": 5.97138950516147e-06, "loss": 0.1128, "step": 44730 }, { "epoch": 0.797827560375272, "grad_norm": 0.2404564768075943, "learning_rate": 5.970380018565211e-06, "loss": 0.1023, "step": 44731 }, { "epoch": 0.7978453964969857, "grad_norm": 0.31376343965530396, "learning_rate": 5.969370605734339e-06, "loss": 0.0984, "step": 44732 }, { "epoch": 0.7978632326186994, "grad_norm": 0.2687996029853821, "learning_rate": 5.968361266672754e-06, "loss": 0.0991, "step": 44733 }, { "epoch": 0.7978810687404131, "grad_norm": 0.20550496876239777, "learning_rate": 5.96735200138438e-06, "loss": 0.1258, "step": 44734 }, { "epoch": 0.7978989048621268, "grad_norm": 0.27825260162353516, "learning_rate": 5.966342809873124e-06, "loss": 0.1251, "step": 44735 }, { "epoch": 0.7979167409838405, "grad_norm": 0.22712254524230957, "learning_rate": 5.965333692142896e-06, "loss": 0.1051, "step": 44736 }, { "epoch": 0.7979345771055542, "grad_norm": 0.30901479721069336, "learning_rate": 5.964324648197603e-06, "loss": 0.1157, "step": 44737 }, { "epoch": 0.7979524132272678, "grad_norm": 0.26727601885795593, "learning_rate": 5.9633156780411705e-06, "loss": 0.0934, "step": 44738 }, { "epoch": 0.7979702493489815, "grad_norm": 0.22644126415252686, "learning_rate": 5.962306781677499e-06, "loss": 0.1207, "step": 44739 }, { "epoch": 0.7979880854706952, "grad_norm": 0.27483099699020386, "learning_rate": 5.961297959110504e-06, "loss": 0.0998, "step": 44740 }, { "epoch": 0.7980059215924089, "grad_norm": 0.33201122283935547, "learning_rate": 5.960289210344086e-06, "loss": 0.1532, "step": 44741 }, { "epoch": 0.7980237577141226, "grad_norm": 0.289569616317749, "learning_rate": 5.9592805353821695e-06, "loss": 0.1328, "step": 44742 }, { "epoch": 0.7980415938358363, "grad_norm": 0.30282092094421387, "learning_rate": 5.958271934228659e-06, "loss": 0.1476, "step": 44743 }, { "epoch": 0.7980594299575501, "grad_norm": 0.2952158749103546, "learning_rate": 5.957263406887456e-06, "loss": 0.1093, "step": 44744 }, { "epoch": 0.7980772660792638, "grad_norm": 0.2503235340118408, "learning_rate": 5.956254953362483e-06, "loss": 0.1085, "step": 44745 }, { "epoch": 0.7980951022009775, "grad_norm": 0.35521817207336426, "learning_rate": 5.9552465736576385e-06, "loss": 0.1364, "step": 44746 }, { "epoch": 0.7981129383226911, "grad_norm": 0.2889169454574585, "learning_rate": 5.954238267776841e-06, "loss": 0.1091, "step": 44747 }, { "epoch": 0.7981307744444048, "grad_norm": 0.2498825639486313, "learning_rate": 5.9532300357239944e-06, "loss": 0.099, "step": 44748 }, { "epoch": 0.7981486105661185, "grad_norm": 0.25176000595092773, "learning_rate": 5.952221877503009e-06, "loss": 0.0684, "step": 44749 }, { "epoch": 0.7981664466878322, "grad_norm": 0.2770783305168152, "learning_rate": 5.951213793117783e-06, "loss": 0.097, "step": 44750 }, { "epoch": 0.7981842828095459, "grad_norm": 0.2908453345298767, "learning_rate": 5.950205782572235e-06, "loss": 0.114, "step": 44751 }, { "epoch": 0.7982021189312596, "grad_norm": 0.35370782017707825, "learning_rate": 5.9491978458702716e-06, "loss": 0.1459, "step": 44752 }, { "epoch": 0.7982199550529733, "grad_norm": 0.2853609025478363, "learning_rate": 5.948189983015798e-06, "loss": 0.1001, "step": 44753 }, { "epoch": 0.798237791174687, "grad_norm": 0.2976193428039551, "learning_rate": 5.947182194012712e-06, "loss": 0.1096, "step": 44754 }, { "epoch": 0.7982556272964006, "grad_norm": 0.3756830096244812, "learning_rate": 5.946174478864938e-06, "loss": 0.1281, "step": 44755 }, { "epoch": 0.7982734634181143, "grad_norm": 0.2182513028383255, "learning_rate": 5.945166837576369e-06, "loss": 0.1608, "step": 44756 }, { "epoch": 0.798291299539828, "grad_norm": 0.20052571594715118, "learning_rate": 5.944159270150917e-06, "loss": 0.0709, "step": 44757 }, { "epoch": 0.7983091356615417, "grad_norm": 0.21268951892852783, "learning_rate": 5.943151776592479e-06, "loss": 0.1195, "step": 44758 }, { "epoch": 0.7983269717832554, "grad_norm": 0.18963152170181274, "learning_rate": 5.9421443569049715e-06, "loss": 0.0838, "step": 44759 }, { "epoch": 0.7983448079049691, "grad_norm": 0.21648621559143066, "learning_rate": 5.941137011092288e-06, "loss": 0.0591, "step": 44760 }, { "epoch": 0.7983626440266829, "grad_norm": 0.29309189319610596, "learning_rate": 5.940129739158348e-06, "loss": 0.139, "step": 44761 }, { "epoch": 0.7983804801483966, "grad_norm": 0.26586008071899414, "learning_rate": 5.939122541107048e-06, "loss": 0.0551, "step": 44762 }, { "epoch": 0.7983983162701103, "grad_norm": 0.2932027280330658, "learning_rate": 5.938115416942286e-06, "loss": 0.1666, "step": 44763 }, { "epoch": 0.798416152391824, "grad_norm": 0.28911998867988586, "learning_rate": 5.937108366667979e-06, "loss": 0.0879, "step": 44764 }, { "epoch": 0.7984339885135376, "grad_norm": 0.2317652851343155, "learning_rate": 5.936101390288023e-06, "loss": 0.0831, "step": 44765 }, { "epoch": 0.7984518246352513, "grad_norm": 0.22962601482868195, "learning_rate": 5.9350944878063256e-06, "loss": 0.1318, "step": 44766 }, { "epoch": 0.798469660756965, "grad_norm": 0.3798615038394928, "learning_rate": 5.934087659226775e-06, "loss": 0.1247, "step": 44767 }, { "epoch": 0.7984874968786787, "grad_norm": 0.2868122458457947, "learning_rate": 5.933080904553298e-06, "loss": 0.083, "step": 44768 }, { "epoch": 0.7985053330003924, "grad_norm": 0.374931663274765, "learning_rate": 5.9320742237897825e-06, "loss": 0.0878, "step": 44769 }, { "epoch": 0.7985231691221061, "grad_norm": 0.3661167621612549, "learning_rate": 5.9310676169401304e-06, "loss": 0.1071, "step": 44770 }, { "epoch": 0.7985410052438198, "grad_norm": 0.29262712597846985, "learning_rate": 5.9300610840082425e-06, "loss": 0.1313, "step": 44771 }, { "epoch": 0.7985588413655335, "grad_norm": 0.22950060665607452, "learning_rate": 5.929054624998032e-06, "loss": 0.1068, "step": 44772 }, { "epoch": 0.7985766774872471, "grad_norm": 0.24029165506362915, "learning_rate": 5.928048239913384e-06, "loss": 0.1175, "step": 44773 }, { "epoch": 0.7985945136089608, "grad_norm": 0.23541101813316345, "learning_rate": 5.927041928758217e-06, "loss": 0.0694, "step": 44774 }, { "epoch": 0.7986123497306745, "grad_norm": 0.28525310754776, "learning_rate": 5.926035691536421e-06, "loss": 0.1085, "step": 44775 }, { "epoch": 0.7986301858523882, "grad_norm": 0.29268792271614075, "learning_rate": 5.925029528251894e-06, "loss": 0.1015, "step": 44776 }, { "epoch": 0.7986480219741019, "grad_norm": 0.23851439356803894, "learning_rate": 5.924023438908546e-06, "loss": 0.0766, "step": 44777 }, { "epoch": 0.7986658580958157, "grad_norm": 0.21398480236530304, "learning_rate": 5.923017423510271e-06, "loss": 0.0756, "step": 44778 }, { "epoch": 0.7986836942175294, "grad_norm": 0.31902143359184265, "learning_rate": 5.922011482060971e-06, "loss": 0.1305, "step": 44779 }, { "epoch": 0.7987015303392431, "grad_norm": 0.3252716064453125, "learning_rate": 5.921005614564537e-06, "loss": 0.14, "step": 44780 }, { "epoch": 0.7987193664609568, "grad_norm": 0.26099663972854614, "learning_rate": 5.919999821024883e-06, "loss": 0.1109, "step": 44781 }, { "epoch": 0.7987372025826704, "grad_norm": 0.3772627115249634, "learning_rate": 5.918994101445896e-06, "loss": 0.1663, "step": 44782 }, { "epoch": 0.7987550387043841, "grad_norm": 0.2689274251461029, "learning_rate": 5.91798845583148e-06, "loss": 0.1152, "step": 44783 }, { "epoch": 0.7987728748260978, "grad_norm": 0.3023391366004944, "learning_rate": 5.916982884185526e-06, "loss": 0.1308, "step": 44784 }, { "epoch": 0.7987907109478115, "grad_norm": 0.37498417496681213, "learning_rate": 5.915977386511942e-06, "loss": 0.1142, "step": 44785 }, { "epoch": 0.7988085470695252, "grad_norm": 0.3197740316390991, "learning_rate": 5.914971962814622e-06, "loss": 0.1826, "step": 44786 }, { "epoch": 0.7988263831912389, "grad_norm": 0.2431715875864029, "learning_rate": 5.9139666130974574e-06, "loss": 0.1326, "step": 44787 }, { "epoch": 0.7988442193129526, "grad_norm": 0.4366306662559509, "learning_rate": 5.9129613373643536e-06, "loss": 0.0868, "step": 44788 }, { "epoch": 0.7988620554346663, "grad_norm": 0.42088744044303894, "learning_rate": 5.911956135619201e-06, "loss": 0.1481, "step": 44789 }, { "epoch": 0.79887989155638, "grad_norm": 0.32417958974838257, "learning_rate": 5.910951007865903e-06, "loss": 0.1423, "step": 44790 }, { "epoch": 0.7988977276780936, "grad_norm": 0.3563603162765503, "learning_rate": 5.909945954108354e-06, "loss": 0.0791, "step": 44791 }, { "epoch": 0.7989155637998073, "grad_norm": 0.20565392076969147, "learning_rate": 5.908940974350446e-06, "loss": 0.1102, "step": 44792 }, { "epoch": 0.798933399921521, "grad_norm": 0.25399577617645264, "learning_rate": 5.90793606859607e-06, "loss": 0.1182, "step": 44793 }, { "epoch": 0.7989512360432348, "grad_norm": 0.3509679436683655, "learning_rate": 5.9069312368491365e-06, "loss": 0.116, "step": 44794 }, { "epoch": 0.7989690721649485, "grad_norm": 0.2200997918844223, "learning_rate": 5.9059264791135326e-06, "loss": 0.0994, "step": 44795 }, { "epoch": 0.7989869082866622, "grad_norm": 0.37163692712783813, "learning_rate": 5.904921795393151e-06, "loss": 0.1444, "step": 44796 }, { "epoch": 0.7990047444083759, "grad_norm": 0.4139951467514038, "learning_rate": 5.903917185691881e-06, "loss": 0.1314, "step": 44797 }, { "epoch": 0.7990225805300896, "grad_norm": 0.5621646046638489, "learning_rate": 5.902912650013631e-06, "loss": 0.1038, "step": 44798 }, { "epoch": 0.7990404166518033, "grad_norm": 0.28355610370635986, "learning_rate": 5.901908188362288e-06, "loss": 0.1275, "step": 44799 }, { "epoch": 0.7990582527735169, "grad_norm": 0.26689136028289795, "learning_rate": 5.9009038007417365e-06, "loss": 0.0829, "step": 44800 }, { "epoch": 0.7990760888952306, "grad_norm": 0.17263464629650116, "learning_rate": 5.899899487155886e-06, "loss": 0.0763, "step": 44801 }, { "epoch": 0.7990939250169443, "grad_norm": 0.2657405436038971, "learning_rate": 5.898895247608616e-06, "loss": 0.081, "step": 44802 }, { "epoch": 0.799111761138658, "grad_norm": 0.27339357137680054, "learning_rate": 5.897891082103832e-06, "loss": 0.1332, "step": 44803 }, { "epoch": 0.7991295972603717, "grad_norm": 0.17493490874767303, "learning_rate": 5.89688699064542e-06, "loss": 0.0746, "step": 44804 }, { "epoch": 0.7991474333820854, "grad_norm": 0.2501816749572754, "learning_rate": 5.895882973237271e-06, "loss": 0.1197, "step": 44805 }, { "epoch": 0.7991652695037991, "grad_norm": 0.24011845886707306, "learning_rate": 5.894879029883271e-06, "loss": 0.1051, "step": 44806 }, { "epoch": 0.7991831056255128, "grad_norm": 0.17578662931919098, "learning_rate": 5.893875160587325e-06, "loss": 0.0816, "step": 44807 }, { "epoch": 0.7992009417472264, "grad_norm": 0.2163763791322708, "learning_rate": 5.89287136535332e-06, "loss": 0.0955, "step": 44808 }, { "epoch": 0.7992187778689401, "grad_norm": 0.22871968150138855, "learning_rate": 5.891867644185145e-06, "loss": 0.0851, "step": 44809 }, { "epoch": 0.7992366139906538, "grad_norm": 0.298725426197052, "learning_rate": 5.8908639970866815e-06, "loss": 0.1068, "step": 44810 }, { "epoch": 0.7992544501123676, "grad_norm": 0.2751402258872986, "learning_rate": 5.889860424061838e-06, "loss": 0.1447, "step": 44811 }, { "epoch": 0.7992722862340813, "grad_norm": 0.2923526167869568, "learning_rate": 5.888856925114494e-06, "loss": 0.0838, "step": 44812 }, { "epoch": 0.799290122355795, "grad_norm": 0.2836334705352783, "learning_rate": 5.887853500248544e-06, "loss": 0.179, "step": 44813 }, { "epoch": 0.7993079584775087, "grad_norm": 0.21170085668563843, "learning_rate": 5.886850149467868e-06, "loss": 0.104, "step": 44814 }, { "epoch": 0.7993257945992224, "grad_norm": 0.3128630220890045, "learning_rate": 5.88584687277636e-06, "loss": 0.0634, "step": 44815 }, { "epoch": 0.7993436307209361, "grad_norm": 0.284605473279953, "learning_rate": 5.8848436701779195e-06, "loss": 0.0745, "step": 44816 }, { "epoch": 0.7993614668426497, "grad_norm": 0.3086356818675995, "learning_rate": 5.883840541676428e-06, "loss": 0.1369, "step": 44817 }, { "epoch": 0.7993793029643634, "grad_norm": 0.2522403299808502, "learning_rate": 5.882837487275769e-06, "loss": 0.1047, "step": 44818 }, { "epoch": 0.7993971390860771, "grad_norm": 0.2797737717628479, "learning_rate": 5.881834506979833e-06, "loss": 0.0854, "step": 44819 }, { "epoch": 0.7994149752077908, "grad_norm": 0.3291873335838318, "learning_rate": 5.880831600792514e-06, "loss": 0.1122, "step": 44820 }, { "epoch": 0.7994328113295045, "grad_norm": 0.2671118676662445, "learning_rate": 5.879828768717696e-06, "loss": 0.1256, "step": 44821 }, { "epoch": 0.7994506474512182, "grad_norm": 0.30141565203666687, "learning_rate": 5.878826010759264e-06, "loss": 0.1044, "step": 44822 }, { "epoch": 0.7994684835729319, "grad_norm": 0.4954824149608612, "learning_rate": 5.8778233269211e-06, "loss": 0.1582, "step": 44823 }, { "epoch": 0.7994863196946456, "grad_norm": 0.24249166250228882, "learning_rate": 5.876820717207107e-06, "loss": 0.1092, "step": 44824 }, { "epoch": 0.7995041558163593, "grad_norm": 0.2866115868091583, "learning_rate": 5.875818181621159e-06, "loss": 0.11, "step": 44825 }, { "epoch": 0.7995219919380729, "grad_norm": 0.28984498977661133, "learning_rate": 5.874815720167145e-06, "loss": 0.1341, "step": 44826 }, { "epoch": 0.7995398280597866, "grad_norm": 0.220419779419899, "learning_rate": 5.873813332848946e-06, "loss": 0.0618, "step": 44827 }, { "epoch": 0.7995576641815004, "grad_norm": 0.2809552550315857, "learning_rate": 5.87281101967046e-06, "loss": 0.0615, "step": 44828 }, { "epoch": 0.7995755003032141, "grad_norm": 0.2695557177066803, "learning_rate": 5.871808780635555e-06, "loss": 0.1046, "step": 44829 }, { "epoch": 0.7995933364249278, "grad_norm": 0.23549309372901917, "learning_rate": 5.870806615748134e-06, "loss": 0.1201, "step": 44830 }, { "epoch": 0.7996111725466415, "grad_norm": 0.33861011266708374, "learning_rate": 5.869804525012074e-06, "loss": 0.1121, "step": 44831 }, { "epoch": 0.7996290086683552, "grad_norm": 0.2549445629119873, "learning_rate": 5.8688025084312505e-06, "loss": 0.1328, "step": 44832 }, { "epoch": 0.7996468447900689, "grad_norm": 0.3028838336467743, "learning_rate": 5.867800566009565e-06, "loss": 0.1569, "step": 44833 }, { "epoch": 0.7996646809117826, "grad_norm": 0.252324640750885, "learning_rate": 5.8667986977508915e-06, "loss": 0.0899, "step": 44834 }, { "epoch": 0.7996825170334962, "grad_norm": 0.2836589813232422, "learning_rate": 5.865796903659115e-06, "loss": 0.1394, "step": 44835 }, { "epoch": 0.7997003531552099, "grad_norm": 0.22945933043956757, "learning_rate": 5.8647951837381114e-06, "loss": 0.0957, "step": 44836 }, { "epoch": 0.7997181892769236, "grad_norm": 0.36276066303253174, "learning_rate": 5.863793537991779e-06, "loss": 0.071, "step": 44837 }, { "epoch": 0.7997360253986373, "grad_norm": 0.2796826660633087, "learning_rate": 5.8627919664239915e-06, "loss": 0.0829, "step": 44838 }, { "epoch": 0.799753861520351, "grad_norm": 0.2649815082550049, "learning_rate": 5.86179046903863e-06, "loss": 0.1025, "step": 44839 }, { "epoch": 0.7997716976420647, "grad_norm": 0.26657721400260925, "learning_rate": 5.860789045839582e-06, "loss": 0.0982, "step": 44840 }, { "epoch": 0.7997895337637784, "grad_norm": 0.2324734777212143, "learning_rate": 5.859787696830718e-06, "loss": 0.1298, "step": 44841 }, { "epoch": 0.7998073698854921, "grad_norm": 0.21991927921772003, "learning_rate": 5.8587864220159325e-06, "loss": 0.1046, "step": 44842 }, { "epoch": 0.7998252060072057, "grad_norm": 0.20212183892726898, "learning_rate": 5.857785221399098e-06, "loss": 0.1175, "step": 44843 }, { "epoch": 0.7998430421289194, "grad_norm": 0.365593820810318, "learning_rate": 5.856784094984105e-06, "loss": 0.1121, "step": 44844 }, { "epoch": 0.7998608782506332, "grad_norm": 0.24445931613445282, "learning_rate": 5.855783042774823e-06, "loss": 0.0966, "step": 44845 }, { "epoch": 0.7998787143723469, "grad_norm": 0.3151172399520874, "learning_rate": 5.854782064775144e-06, "loss": 0.1302, "step": 44846 }, { "epoch": 0.7998965504940606, "grad_norm": 0.22199754416942596, "learning_rate": 5.8537811609889415e-06, "loss": 0.07, "step": 44847 }, { "epoch": 0.7999143866157743, "grad_norm": 0.32313480973243713, "learning_rate": 5.852780331420096e-06, "loss": 0.1293, "step": 44848 }, { "epoch": 0.799932222737488, "grad_norm": 0.20466674864292145, "learning_rate": 5.851779576072483e-06, "loss": 0.1114, "step": 44849 }, { "epoch": 0.7999500588592017, "grad_norm": 0.2798991799354553, "learning_rate": 5.8507788949499905e-06, "loss": 0.1249, "step": 44850 }, { "epoch": 0.7999678949809154, "grad_norm": 0.36040040850639343, "learning_rate": 5.849778288056492e-06, "loss": 0.1169, "step": 44851 }, { "epoch": 0.799985731102629, "grad_norm": 0.3420540988445282, "learning_rate": 5.848777755395871e-06, "loss": 0.0663, "step": 44852 }, { "epoch": 0.8000035672243427, "grad_norm": 0.27414995431900024, "learning_rate": 5.847777296971998e-06, "loss": 0.1217, "step": 44853 }, { "epoch": 0.8000214033460564, "grad_norm": 0.20114260911941528, "learning_rate": 5.846776912788751e-06, "loss": 0.0662, "step": 44854 }, { "epoch": 0.8000392394677701, "grad_norm": 0.2768368422985077, "learning_rate": 5.8457766028500175e-06, "loss": 0.1094, "step": 44855 }, { "epoch": 0.8000570755894838, "grad_norm": 0.2530750036239624, "learning_rate": 5.844776367159663e-06, "loss": 0.1176, "step": 44856 }, { "epoch": 0.8000749117111975, "grad_norm": 0.23680151998996735, "learning_rate": 5.8437762057215796e-06, "loss": 0.0809, "step": 44857 }, { "epoch": 0.8000927478329112, "grad_norm": 0.24132221937179565, "learning_rate": 5.842776118539628e-06, "loss": 0.1462, "step": 44858 }, { "epoch": 0.8001105839546249, "grad_norm": 0.24527911841869354, "learning_rate": 5.841776105617699e-06, "loss": 0.087, "step": 44859 }, { "epoch": 0.8001284200763386, "grad_norm": 0.2281140834093094, "learning_rate": 5.840776166959663e-06, "loss": 0.059, "step": 44860 }, { "epoch": 0.8001462561980522, "grad_norm": 0.39640456438064575, "learning_rate": 5.839776302569397e-06, "loss": 0.0838, "step": 44861 }, { "epoch": 0.800164092319766, "grad_norm": 0.2991827428340912, "learning_rate": 5.838776512450767e-06, "loss": 0.1645, "step": 44862 }, { "epoch": 0.8001819284414797, "grad_norm": 0.25122809410095215, "learning_rate": 5.837776796607666e-06, "loss": 0.1443, "step": 44863 }, { "epoch": 0.8001997645631934, "grad_norm": 0.3652280867099762, "learning_rate": 5.83677715504396e-06, "loss": 0.1713, "step": 44864 }, { "epoch": 0.8002176006849071, "grad_norm": 0.283097505569458, "learning_rate": 5.835777587763524e-06, "loss": 0.1034, "step": 44865 }, { "epoch": 0.8002354368066208, "grad_norm": 0.35019248723983765, "learning_rate": 5.8347780947702324e-06, "loss": 0.0533, "step": 44866 }, { "epoch": 0.8002532729283345, "grad_norm": 0.2504485547542572, "learning_rate": 5.833778676067955e-06, "loss": 0.0958, "step": 44867 }, { "epoch": 0.8002711090500482, "grad_norm": 0.2404722273349762, "learning_rate": 5.832779331660579e-06, "loss": 0.1273, "step": 44868 }, { "epoch": 0.8002889451717619, "grad_norm": 0.2237074375152588, "learning_rate": 5.83178006155197e-06, "loss": 0.0864, "step": 44869 }, { "epoch": 0.8003067812934755, "grad_norm": 0.2370162308216095, "learning_rate": 5.830780865745994e-06, "loss": 0.1284, "step": 44870 }, { "epoch": 0.8003246174151892, "grad_norm": 0.25863733887672424, "learning_rate": 5.829781744246532e-06, "loss": 0.134, "step": 44871 }, { "epoch": 0.8003424535369029, "grad_norm": 0.2798623740673065, "learning_rate": 5.828782697057464e-06, "loss": 0.1293, "step": 44872 }, { "epoch": 0.8003602896586166, "grad_norm": 0.27565184235572815, "learning_rate": 5.827783724182656e-06, "loss": 0.143, "step": 44873 }, { "epoch": 0.8003781257803303, "grad_norm": 0.31971368193626404, "learning_rate": 5.82678482562598e-06, "loss": 0.1029, "step": 44874 }, { "epoch": 0.800395961902044, "grad_norm": 0.25721362233161926, "learning_rate": 5.8257860013913035e-06, "loss": 0.1667, "step": 44875 }, { "epoch": 0.8004137980237577, "grad_norm": 0.2920261323451996, "learning_rate": 5.824787251482508e-06, "loss": 0.0892, "step": 44876 }, { "epoch": 0.8004316341454714, "grad_norm": 0.24468368291854858, "learning_rate": 5.82378857590346e-06, "loss": 0.1073, "step": 44877 }, { "epoch": 0.800449470267185, "grad_norm": 0.29385194182395935, "learning_rate": 5.82278997465803e-06, "loss": 0.0745, "step": 44878 }, { "epoch": 0.8004673063888988, "grad_norm": 0.32971900701522827, "learning_rate": 5.821791447750092e-06, "loss": 0.1335, "step": 44879 }, { "epoch": 0.8004851425106125, "grad_norm": 0.377947598695755, "learning_rate": 5.820792995183505e-06, "loss": 0.1464, "step": 44880 }, { "epoch": 0.8005029786323262, "grad_norm": 0.22087959945201874, "learning_rate": 5.819794616962157e-06, "loss": 0.093, "step": 44881 }, { "epoch": 0.8005208147540399, "grad_norm": 0.24202612042427063, "learning_rate": 5.818796313089911e-06, "loss": 0.0985, "step": 44882 }, { "epoch": 0.8005386508757536, "grad_norm": 0.2946437895298004, "learning_rate": 5.8177980835706244e-06, "loss": 0.1381, "step": 44883 }, { "epoch": 0.8005564869974673, "grad_norm": 0.21484124660491943, "learning_rate": 5.81679992840819e-06, "loss": 0.0929, "step": 44884 }, { "epoch": 0.800574323119181, "grad_norm": 0.36438870429992676, "learning_rate": 5.815801847606453e-06, "loss": 0.1202, "step": 44885 }, { "epoch": 0.8005921592408947, "grad_norm": 0.3173169493675232, "learning_rate": 5.814803841169303e-06, "loss": 0.1715, "step": 44886 }, { "epoch": 0.8006099953626084, "grad_norm": 0.27175572514533997, "learning_rate": 5.813805909100601e-06, "loss": 0.1258, "step": 44887 }, { "epoch": 0.800627831484322, "grad_norm": 0.325126975774765, "learning_rate": 5.8128080514042034e-06, "loss": 0.1532, "step": 44888 }, { "epoch": 0.8006456676060357, "grad_norm": 0.27757707238197327, "learning_rate": 5.811810268084001e-06, "loss": 0.1217, "step": 44889 }, { "epoch": 0.8006635037277494, "grad_norm": 0.2835046947002411, "learning_rate": 5.810812559143844e-06, "loss": 0.2006, "step": 44890 }, { "epoch": 0.8006813398494631, "grad_norm": 0.25689783692359924, "learning_rate": 5.809814924587609e-06, "loss": 0.1409, "step": 44891 }, { "epoch": 0.8006991759711768, "grad_norm": 0.3308696150779724, "learning_rate": 5.8088173644191586e-06, "loss": 0.0936, "step": 44892 }, { "epoch": 0.8007170120928905, "grad_norm": 0.3758254647254944, "learning_rate": 5.807819878642351e-06, "loss": 0.1516, "step": 44893 }, { "epoch": 0.8007348482146042, "grad_norm": 0.22307826578617096, "learning_rate": 5.806822467261072e-06, "loss": 0.1058, "step": 44894 }, { "epoch": 0.800752684336318, "grad_norm": 0.303335040807724, "learning_rate": 5.805825130279177e-06, "loss": 0.088, "step": 44895 }, { "epoch": 0.8007705204580317, "grad_norm": 0.21667756140232086, "learning_rate": 5.8048278677005355e-06, "loss": 0.1272, "step": 44896 }, { "epoch": 0.8007883565797453, "grad_norm": 0.23222732543945312, "learning_rate": 5.803830679529001e-06, "loss": 0.0619, "step": 44897 }, { "epoch": 0.800806192701459, "grad_norm": 0.23286797106266022, "learning_rate": 5.8028335657684506e-06, "loss": 0.1616, "step": 44898 }, { "epoch": 0.8008240288231727, "grad_norm": 0.2144184410572052, "learning_rate": 5.801836526422755e-06, "loss": 0.1033, "step": 44899 }, { "epoch": 0.8008418649448864, "grad_norm": 0.31754070520401, "learning_rate": 5.800839561495772e-06, "loss": 0.1438, "step": 44900 }, { "epoch": 0.8008597010666001, "grad_norm": 0.24232779443264008, "learning_rate": 5.799842670991356e-06, "loss": 0.1111, "step": 44901 }, { "epoch": 0.8008775371883138, "grad_norm": 0.2632555365562439, "learning_rate": 5.79884585491339e-06, "loss": 0.0846, "step": 44902 }, { "epoch": 0.8008953733100275, "grad_norm": 0.1810254454612732, "learning_rate": 5.797849113265732e-06, "loss": 0.0778, "step": 44903 }, { "epoch": 0.8009132094317412, "grad_norm": 0.2572481334209442, "learning_rate": 5.796852446052239e-06, "loss": 0.0706, "step": 44904 }, { "epoch": 0.8009310455534548, "grad_norm": 0.3234332203865051, "learning_rate": 5.795855853276783e-06, "loss": 0.1554, "step": 44905 }, { "epoch": 0.8009488816751685, "grad_norm": 0.27203384041786194, "learning_rate": 5.794859334943212e-06, "loss": 0.1256, "step": 44906 }, { "epoch": 0.8009667177968822, "grad_norm": 0.2624721825122833, "learning_rate": 5.793862891055407e-06, "loss": 0.1572, "step": 44907 }, { "epoch": 0.8009845539185959, "grad_norm": 0.33851128816604614, "learning_rate": 5.792866521617224e-06, "loss": 0.1316, "step": 44908 }, { "epoch": 0.8010023900403096, "grad_norm": 0.2457079291343689, "learning_rate": 5.791870226632523e-06, "loss": 0.072, "step": 44909 }, { "epoch": 0.8010202261620233, "grad_norm": 0.23305246233940125, "learning_rate": 5.790874006105162e-06, "loss": 0.1482, "step": 44910 }, { "epoch": 0.801038062283737, "grad_norm": 0.3221266567707062, "learning_rate": 5.789877860039014e-06, "loss": 0.138, "step": 44911 }, { "epoch": 0.8010558984054508, "grad_norm": 0.2858541011810303, "learning_rate": 5.788881788437927e-06, "loss": 0.1273, "step": 44912 }, { "epoch": 0.8010737345271645, "grad_norm": 0.22206105291843414, "learning_rate": 5.787885791305778e-06, "loss": 0.1104, "step": 44913 }, { "epoch": 0.8010915706488781, "grad_norm": 0.27187642455101013, "learning_rate": 5.786889868646411e-06, "loss": 0.1274, "step": 44914 }, { "epoch": 0.8011094067705918, "grad_norm": 0.27411359548568726, "learning_rate": 5.785894020463703e-06, "loss": 0.1212, "step": 44915 }, { "epoch": 0.8011272428923055, "grad_norm": 0.29509982466697693, "learning_rate": 5.7848982467615056e-06, "loss": 0.1132, "step": 44916 }, { "epoch": 0.8011450790140192, "grad_norm": 0.30816808342933655, "learning_rate": 5.783902547543676e-06, "loss": 0.1258, "step": 44917 }, { "epoch": 0.8011629151357329, "grad_norm": 0.28559520840644836, "learning_rate": 5.782906922814082e-06, "loss": 0.1386, "step": 44918 }, { "epoch": 0.8011807512574466, "grad_norm": 0.27879440784454346, "learning_rate": 5.7819113725765675e-06, "loss": 0.1241, "step": 44919 }, { "epoch": 0.8011985873791603, "grad_norm": 0.2690989077091217, "learning_rate": 5.780915896835013e-06, "loss": 0.0802, "step": 44920 }, { "epoch": 0.801216423500874, "grad_norm": 0.2710437476634979, "learning_rate": 5.779920495593263e-06, "loss": 0.1222, "step": 44921 }, { "epoch": 0.8012342596225877, "grad_norm": 0.2932475507259369, "learning_rate": 5.778925168855182e-06, "loss": 0.1234, "step": 44922 }, { "epoch": 0.8012520957443013, "grad_norm": 0.3456650674343109, "learning_rate": 5.777929916624619e-06, "loss": 0.0959, "step": 44923 }, { "epoch": 0.801269931866015, "grad_norm": 0.3106158673763275, "learning_rate": 5.776934738905443e-06, "loss": 0.1218, "step": 44924 }, { "epoch": 0.8012877679877287, "grad_norm": 0.28460800647735596, "learning_rate": 5.7759396357015025e-06, "loss": 0.124, "step": 44925 }, { "epoch": 0.8013056041094424, "grad_norm": 0.29398831725120544, "learning_rate": 5.774944607016666e-06, "loss": 0.1621, "step": 44926 }, { "epoch": 0.8013234402311561, "grad_norm": 0.31669294834136963, "learning_rate": 5.773949652854779e-06, "loss": 0.1293, "step": 44927 }, { "epoch": 0.8013412763528698, "grad_norm": 0.2677053213119507, "learning_rate": 5.772954773219707e-06, "loss": 0.1502, "step": 44928 }, { "epoch": 0.8013591124745836, "grad_norm": 0.3948040008544922, "learning_rate": 5.771959968115306e-06, "loss": 0.1507, "step": 44929 }, { "epoch": 0.8013769485962973, "grad_norm": 0.38298657536506653, "learning_rate": 5.7709652375454285e-06, "loss": 0.1694, "step": 44930 }, { "epoch": 0.801394784718011, "grad_norm": 0.18793480098247528, "learning_rate": 5.769970581513931e-06, "loss": 0.0756, "step": 44931 }, { "epoch": 0.8014126208397246, "grad_norm": 0.2480316311120987, "learning_rate": 5.768976000024664e-06, "loss": 0.138, "step": 44932 }, { "epoch": 0.8014304569614383, "grad_norm": 0.32688817381858826, "learning_rate": 5.767981493081492e-06, "loss": 0.1159, "step": 44933 }, { "epoch": 0.801448293083152, "grad_norm": 0.20329535007476807, "learning_rate": 5.766987060688269e-06, "loss": 0.0846, "step": 44934 }, { "epoch": 0.8014661292048657, "grad_norm": 0.2714150547981262, "learning_rate": 5.765992702848847e-06, "loss": 0.1133, "step": 44935 }, { "epoch": 0.8014839653265794, "grad_norm": 0.3031516373157501, "learning_rate": 5.764998419567072e-06, "loss": 0.1632, "step": 44936 }, { "epoch": 0.8015018014482931, "grad_norm": 0.43094682693481445, "learning_rate": 5.764004210846815e-06, "loss": 0.0945, "step": 44937 }, { "epoch": 0.8015196375700068, "grad_norm": 0.289556622505188, "learning_rate": 5.763010076691921e-06, "loss": 0.1406, "step": 44938 }, { "epoch": 0.8015374736917205, "grad_norm": 0.33710530400276184, "learning_rate": 5.762016017106237e-06, "loss": 0.1114, "step": 44939 }, { "epoch": 0.8015553098134341, "grad_norm": 0.22856399416923523, "learning_rate": 5.76102203209363e-06, "loss": 0.083, "step": 44940 }, { "epoch": 0.8015731459351478, "grad_norm": 0.21168820559978485, "learning_rate": 5.76002812165794e-06, "loss": 0.1254, "step": 44941 }, { "epoch": 0.8015909820568615, "grad_norm": 0.2864570617675781, "learning_rate": 5.759034285803033e-06, "loss": 0.1056, "step": 44942 }, { "epoch": 0.8016088181785752, "grad_norm": 0.20837455987930298, "learning_rate": 5.758040524532756e-06, "loss": 0.152, "step": 44943 }, { "epoch": 0.8016266543002889, "grad_norm": 0.274724543094635, "learning_rate": 5.75704683785096e-06, "loss": 0.1126, "step": 44944 }, { "epoch": 0.8016444904220026, "grad_norm": 0.5435351729393005, "learning_rate": 5.756053225761488e-06, "loss": 0.0977, "step": 44945 }, { "epoch": 0.8016623265437164, "grad_norm": 0.1976233720779419, "learning_rate": 5.7550596882682075e-06, "loss": 0.0733, "step": 44946 }, { "epoch": 0.8016801626654301, "grad_norm": 0.2799014151096344, "learning_rate": 5.754066225374966e-06, "loss": 0.0841, "step": 44947 }, { "epoch": 0.8016979987871438, "grad_norm": 0.25921720266342163, "learning_rate": 5.7530728370856076e-06, "loss": 0.1272, "step": 44948 }, { "epoch": 0.8017158349088574, "grad_norm": 0.24318300187587738, "learning_rate": 5.75207952340398e-06, "loss": 0.1599, "step": 44949 }, { "epoch": 0.8017336710305711, "grad_norm": 0.241105854511261, "learning_rate": 5.751086284333948e-06, "loss": 0.0895, "step": 44950 }, { "epoch": 0.8017515071522848, "grad_norm": 0.5471779704093933, "learning_rate": 5.750093119879352e-06, "loss": 0.1493, "step": 44951 }, { "epoch": 0.8017693432739985, "grad_norm": 0.3248126804828644, "learning_rate": 5.749100030044038e-06, "loss": 0.1263, "step": 44952 }, { "epoch": 0.8017871793957122, "grad_norm": 0.22908511757850647, "learning_rate": 5.748107014831869e-06, "loss": 0.1114, "step": 44953 }, { "epoch": 0.8018050155174259, "grad_norm": 0.23781849443912506, "learning_rate": 5.747114074246679e-06, "loss": 0.1068, "step": 44954 }, { "epoch": 0.8018228516391396, "grad_norm": 0.25892335176467896, "learning_rate": 5.746121208292332e-06, "loss": 0.1089, "step": 44955 }, { "epoch": 0.8018406877608533, "grad_norm": 0.5099577307701111, "learning_rate": 5.74512841697267e-06, "loss": 0.1345, "step": 44956 }, { "epoch": 0.801858523882567, "grad_norm": 0.2208169847726822, "learning_rate": 5.7441357002915395e-06, "loss": 0.0771, "step": 44957 }, { "epoch": 0.8018763600042806, "grad_norm": 0.2946223318576813, "learning_rate": 5.743143058252784e-06, "loss": 0.1411, "step": 44958 }, { "epoch": 0.8018941961259943, "grad_norm": 0.30401602387428284, "learning_rate": 5.742150490860262e-06, "loss": 0.1807, "step": 44959 }, { "epoch": 0.801912032247708, "grad_norm": 0.2889178395271301, "learning_rate": 5.741157998117816e-06, "loss": 0.1386, "step": 44960 }, { "epoch": 0.8019298683694217, "grad_norm": 0.35939544439315796, "learning_rate": 5.740165580029297e-06, "loss": 0.121, "step": 44961 }, { "epoch": 0.8019477044911354, "grad_norm": 0.41577592492103577, "learning_rate": 5.739173236598539e-06, "loss": 0.2049, "step": 44962 }, { "epoch": 0.8019655406128492, "grad_norm": 0.2711866497993469, "learning_rate": 5.738180967829404e-06, "loss": 0.1185, "step": 44963 }, { "epoch": 0.8019833767345629, "grad_norm": 0.24924075603485107, "learning_rate": 5.7371887737257325e-06, "loss": 0.1036, "step": 44964 }, { "epoch": 0.8020012128562766, "grad_norm": 0.2239152491092682, "learning_rate": 5.736196654291373e-06, "loss": 0.1349, "step": 44965 }, { "epoch": 0.8020190489779903, "grad_norm": 0.2896084487438202, "learning_rate": 5.7352046095301595e-06, "loss": 0.1499, "step": 44966 }, { "epoch": 0.802036885099704, "grad_norm": 0.24542298913002014, "learning_rate": 5.734212639445954e-06, "loss": 0.0808, "step": 44967 }, { "epoch": 0.8020547212214176, "grad_norm": 0.27352175116539, "learning_rate": 5.733220744042589e-06, "loss": 0.1103, "step": 44968 }, { "epoch": 0.8020725573431313, "grad_norm": 0.3354392349720001, "learning_rate": 5.7322289233239206e-06, "loss": 0.1046, "step": 44969 }, { "epoch": 0.802090393464845, "grad_norm": 0.25808966159820557, "learning_rate": 5.73123717729378e-06, "loss": 0.0842, "step": 44970 }, { "epoch": 0.8021082295865587, "grad_norm": 0.26042869687080383, "learning_rate": 5.7302455059560275e-06, "loss": 0.1277, "step": 44971 }, { "epoch": 0.8021260657082724, "grad_norm": 0.42380964756011963, "learning_rate": 5.7292539093145e-06, "loss": 0.1286, "step": 44972 }, { "epoch": 0.8021439018299861, "grad_norm": 0.33321458101272583, "learning_rate": 5.7282623873730374e-06, "loss": 0.1733, "step": 44973 }, { "epoch": 0.8021617379516998, "grad_norm": 0.28361377120018005, "learning_rate": 5.727270940135487e-06, "loss": 0.1312, "step": 44974 }, { "epoch": 0.8021795740734134, "grad_norm": 0.3127893805503845, "learning_rate": 5.726279567605686e-06, "loss": 0.1212, "step": 44975 }, { "epoch": 0.8021974101951271, "grad_norm": 0.22174373269081116, "learning_rate": 5.725288269787488e-06, "loss": 0.1147, "step": 44976 }, { "epoch": 0.8022152463168408, "grad_norm": 0.2840960621833801, "learning_rate": 5.724297046684729e-06, "loss": 0.0992, "step": 44977 }, { "epoch": 0.8022330824385545, "grad_norm": 0.26874086260795593, "learning_rate": 5.723305898301254e-06, "loss": 0.1451, "step": 44978 }, { "epoch": 0.8022509185602682, "grad_norm": 0.28927677869796753, "learning_rate": 5.7223148246408954e-06, "loss": 0.1316, "step": 44979 }, { "epoch": 0.802268754681982, "grad_norm": 0.23745930194854736, "learning_rate": 5.7213238257075115e-06, "loss": 0.1121, "step": 44980 }, { "epoch": 0.8022865908036957, "grad_norm": 0.28322634100914, "learning_rate": 5.720332901504926e-06, "loss": 0.1221, "step": 44981 }, { "epoch": 0.8023044269254094, "grad_norm": 0.2268662005662918, "learning_rate": 5.719342052036997e-06, "loss": 0.0858, "step": 44982 }, { "epoch": 0.8023222630471231, "grad_norm": 0.2538077235221863, "learning_rate": 5.718351277307549e-06, "loss": 0.1306, "step": 44983 }, { "epoch": 0.8023400991688368, "grad_norm": 0.2929036617279053, "learning_rate": 5.717360577320441e-06, "loss": 0.1177, "step": 44984 }, { "epoch": 0.8023579352905504, "grad_norm": 0.2052081972360611, "learning_rate": 5.7163699520795e-06, "loss": 0.0672, "step": 44985 }, { "epoch": 0.8023757714122641, "grad_norm": 0.26290860772132874, "learning_rate": 5.715379401588572e-06, "loss": 0.1204, "step": 44986 }, { "epoch": 0.8023936075339778, "grad_norm": 0.24862372875213623, "learning_rate": 5.714388925851494e-06, "loss": 0.1206, "step": 44987 }, { "epoch": 0.8024114436556915, "grad_norm": 0.3110763728618622, "learning_rate": 5.713398524872096e-06, "loss": 0.1015, "step": 44988 }, { "epoch": 0.8024292797774052, "grad_norm": 0.32171696424484253, "learning_rate": 5.712408198654237e-06, "loss": 0.1295, "step": 44989 }, { "epoch": 0.8024471158991189, "grad_norm": 0.36702901124954224, "learning_rate": 5.711417947201745e-06, "loss": 0.1469, "step": 44990 }, { "epoch": 0.8024649520208326, "grad_norm": 0.26032930612564087, "learning_rate": 5.710427770518456e-06, "loss": 0.1298, "step": 44991 }, { "epoch": 0.8024827881425463, "grad_norm": 0.2838994562625885, "learning_rate": 5.709437668608208e-06, "loss": 0.1518, "step": 44992 }, { "epoch": 0.8025006242642599, "grad_norm": 0.2227109670639038, "learning_rate": 5.708447641474848e-06, "loss": 0.0845, "step": 44993 }, { "epoch": 0.8025184603859736, "grad_norm": 0.3563428223133087, "learning_rate": 5.707457689122208e-06, "loss": 0.1895, "step": 44994 }, { "epoch": 0.8025362965076873, "grad_norm": 0.20921310782432556, "learning_rate": 5.706467811554117e-06, "loss": 0.1238, "step": 44995 }, { "epoch": 0.8025541326294011, "grad_norm": 0.26908808946609497, "learning_rate": 5.705478008774428e-06, "loss": 0.1051, "step": 44996 }, { "epoch": 0.8025719687511148, "grad_norm": 0.3211827576160431, "learning_rate": 5.704488280786966e-06, "loss": 0.1125, "step": 44997 }, { "epoch": 0.8025898048728285, "grad_norm": 0.22332863509655, "learning_rate": 5.703498627595577e-06, "loss": 0.0809, "step": 44998 }, { "epoch": 0.8026076409945422, "grad_norm": 0.2842158377170563, "learning_rate": 5.702509049204094e-06, "loss": 0.1111, "step": 44999 }, { "epoch": 0.8026254771162559, "grad_norm": 0.3417893350124359, "learning_rate": 5.701519545616349e-06, "loss": 0.1101, "step": 45000 }, { "epoch": 0.8026254771162559, "eval_loss": 0.11037413775920868, "eval_runtime": 108.5419, "eval_samples_per_second": 9.434, "eval_steps_per_second": 1.575, "step": 45000 }, { "epoch": 0.8026433132379696, "grad_norm": 0.2643965482711792, "learning_rate": 5.700530116836173e-06, "loss": 0.118, "step": 45001 }, { "epoch": 0.8026611493596832, "grad_norm": 0.3477681875228882, "learning_rate": 5.699540762867414e-06, "loss": 0.0635, "step": 45002 }, { "epoch": 0.8026789854813969, "grad_norm": 0.3364926874637604, "learning_rate": 5.698551483713902e-06, "loss": 0.1621, "step": 45003 }, { "epoch": 0.8026968216031106, "grad_norm": 0.2213757187128067, "learning_rate": 5.697562279379473e-06, "loss": 0.1022, "step": 45004 }, { "epoch": 0.8027146577248243, "grad_norm": 0.2655234932899475, "learning_rate": 5.6965731498679484e-06, "loss": 0.1055, "step": 45005 }, { "epoch": 0.802732493846538, "grad_norm": 0.25524941086769104, "learning_rate": 5.695584095183184e-06, "loss": 0.1076, "step": 45006 }, { "epoch": 0.8027503299682517, "grad_norm": 0.24004822969436646, "learning_rate": 5.694595115329002e-06, "loss": 0.0847, "step": 45007 }, { "epoch": 0.8027681660899654, "grad_norm": 0.3697361946105957, "learning_rate": 5.69360621030923e-06, "loss": 0.127, "step": 45008 }, { "epoch": 0.8027860022116791, "grad_norm": 0.25982481241226196, "learning_rate": 5.692617380127716e-06, "loss": 0.1009, "step": 45009 }, { "epoch": 0.8028038383333927, "grad_norm": 0.25787749886512756, "learning_rate": 5.691628624788278e-06, "loss": 0.1046, "step": 45010 }, { "epoch": 0.8028216744551064, "grad_norm": 0.2753685712814331, "learning_rate": 5.690639944294765e-06, "loss": 0.0778, "step": 45011 }, { "epoch": 0.8028395105768201, "grad_norm": 0.2629126310348511, "learning_rate": 5.6896513386509976e-06, "loss": 0.13, "step": 45012 }, { "epoch": 0.8028573466985339, "grad_norm": 0.2808989882469177, "learning_rate": 5.688662807860815e-06, "loss": 0.1068, "step": 45013 }, { "epoch": 0.8028751828202476, "grad_norm": 0.2294870913028717, "learning_rate": 5.687674351928035e-06, "loss": 0.0419, "step": 45014 }, { "epoch": 0.8028930189419613, "grad_norm": 0.23658153414726257, "learning_rate": 5.686685970856509e-06, "loss": 0.0937, "step": 45015 }, { "epoch": 0.802910855063675, "grad_norm": 0.2728012502193451, "learning_rate": 5.685697664650055e-06, "loss": 0.0936, "step": 45016 }, { "epoch": 0.8029286911853887, "grad_norm": 0.2915666997432709, "learning_rate": 5.684709433312513e-06, "loss": 0.1582, "step": 45017 }, { "epoch": 0.8029465273071024, "grad_norm": 0.33082664012908936, "learning_rate": 5.683721276847698e-06, "loss": 0.1472, "step": 45018 }, { "epoch": 0.802964363428816, "grad_norm": 0.2335931956768036, "learning_rate": 5.68273319525946e-06, "loss": 0.1156, "step": 45019 }, { "epoch": 0.8029821995505297, "grad_norm": 0.2518908977508545, "learning_rate": 5.6817451885516185e-06, "loss": 0.1128, "step": 45020 }, { "epoch": 0.8030000356722434, "grad_norm": 0.2515953779220581, "learning_rate": 5.6807572567280035e-06, "loss": 0.1001, "step": 45021 }, { "epoch": 0.8030178717939571, "grad_norm": 0.4360179603099823, "learning_rate": 5.679769399792442e-06, "loss": 0.0831, "step": 45022 }, { "epoch": 0.8030357079156708, "grad_norm": 0.30175352096557617, "learning_rate": 5.678781617748774e-06, "loss": 0.1145, "step": 45023 }, { "epoch": 0.8030535440373845, "grad_norm": 0.3068403899669647, "learning_rate": 5.677793910600812e-06, "loss": 0.1065, "step": 45024 }, { "epoch": 0.8030713801590982, "grad_norm": 0.3034062087535858, "learning_rate": 5.6768062783524035e-06, "loss": 0.0966, "step": 45025 }, { "epoch": 0.8030892162808119, "grad_norm": 0.30345284938812256, "learning_rate": 5.67581872100737e-06, "loss": 0.1235, "step": 45026 }, { "epoch": 0.8031070524025256, "grad_norm": 0.22523249685764313, "learning_rate": 5.674831238569528e-06, "loss": 0.1522, "step": 45027 }, { "epoch": 0.8031248885242392, "grad_norm": 0.2335573434829712, "learning_rate": 5.673843831042722e-06, "loss": 0.1215, "step": 45028 }, { "epoch": 0.8031427246459529, "grad_norm": 0.3100409507751465, "learning_rate": 5.672856498430773e-06, "loss": 0.067, "step": 45029 }, { "epoch": 0.8031605607676667, "grad_norm": 0.27990153431892395, "learning_rate": 5.671869240737507e-06, "loss": 0.1515, "step": 45030 }, { "epoch": 0.8031783968893804, "grad_norm": 0.2517462968826294, "learning_rate": 5.670882057966745e-06, "loss": 0.109, "step": 45031 }, { "epoch": 0.8031962330110941, "grad_norm": 0.29710128903388977, "learning_rate": 5.669894950122326e-06, "loss": 0.0914, "step": 45032 }, { "epoch": 0.8032140691328078, "grad_norm": 0.23807238042354584, "learning_rate": 5.668907917208072e-06, "loss": 0.1419, "step": 45033 }, { "epoch": 0.8032319052545215, "grad_norm": 0.23975948989391327, "learning_rate": 5.667920959227807e-06, "loss": 0.095, "step": 45034 }, { "epoch": 0.8032497413762352, "grad_norm": 0.31046345829963684, "learning_rate": 5.666934076185351e-06, "loss": 0.1099, "step": 45035 }, { "epoch": 0.8032675774979489, "grad_norm": 0.2461196482181549, "learning_rate": 5.6659472680845425e-06, "loss": 0.0827, "step": 45036 }, { "epoch": 0.8032854136196625, "grad_norm": 0.21382002532482147, "learning_rate": 5.664960534929192e-06, "loss": 0.1192, "step": 45037 }, { "epoch": 0.8033032497413762, "grad_norm": 0.22482174634933472, "learning_rate": 5.663973876723142e-06, "loss": 0.0935, "step": 45038 }, { "epoch": 0.8033210858630899, "grad_norm": 0.2876605689525604, "learning_rate": 5.662987293470207e-06, "loss": 0.0794, "step": 45039 }, { "epoch": 0.8033389219848036, "grad_norm": 0.23094086349010468, "learning_rate": 5.662000785174204e-06, "loss": 0.0753, "step": 45040 }, { "epoch": 0.8033567581065173, "grad_norm": 0.2665034830570221, "learning_rate": 5.661014351838972e-06, "loss": 0.1044, "step": 45041 }, { "epoch": 0.803374594228231, "grad_norm": 0.2721155881881714, "learning_rate": 5.660027993468328e-06, "loss": 0.1105, "step": 45042 }, { "epoch": 0.8033924303499447, "grad_norm": 0.3114945590496063, "learning_rate": 5.659041710066096e-06, "loss": 0.1109, "step": 45043 }, { "epoch": 0.8034102664716584, "grad_norm": 0.23734666407108307, "learning_rate": 5.658055501636092e-06, "loss": 0.0938, "step": 45044 }, { "epoch": 0.803428102593372, "grad_norm": 0.2699252665042877, "learning_rate": 5.65706936818215e-06, "loss": 0.1524, "step": 45045 }, { "epoch": 0.8034459387150857, "grad_norm": 0.36435607075691223, "learning_rate": 5.656083309708091e-06, "loss": 0.1789, "step": 45046 }, { "epoch": 0.8034637748367995, "grad_norm": 0.38901329040527344, "learning_rate": 5.655097326217732e-06, "loss": 0.1393, "step": 45047 }, { "epoch": 0.8034816109585132, "grad_norm": 0.2220451831817627, "learning_rate": 5.654111417714892e-06, "loss": 0.0944, "step": 45048 }, { "epoch": 0.8034994470802269, "grad_norm": 0.2760920226573944, "learning_rate": 5.653125584203403e-06, "loss": 0.1498, "step": 45049 }, { "epoch": 0.8035172832019406, "grad_norm": 0.2670243978500366, "learning_rate": 5.652139825687083e-06, "loss": 0.1334, "step": 45050 }, { "epoch": 0.8035351193236543, "grad_norm": 0.26425203680992126, "learning_rate": 5.651154142169743e-06, "loss": 0.0639, "step": 45051 }, { "epoch": 0.803552955445368, "grad_norm": 0.21602346003055573, "learning_rate": 5.6501685336552204e-06, "loss": 0.0859, "step": 45052 }, { "epoch": 0.8035707915670817, "grad_norm": 0.24008624255657196, "learning_rate": 5.649183000147323e-06, "loss": 0.1167, "step": 45053 }, { "epoch": 0.8035886276887954, "grad_norm": 0.2429155856370926, "learning_rate": 5.64819754164988e-06, "loss": 0.0958, "step": 45054 }, { "epoch": 0.803606463810509, "grad_norm": 0.2998778223991394, "learning_rate": 5.647212158166706e-06, "loss": 0.0978, "step": 45055 }, { "epoch": 0.8036242999322227, "grad_norm": 0.24849781394004822, "learning_rate": 5.646226849701625e-06, "loss": 0.0737, "step": 45056 }, { "epoch": 0.8036421360539364, "grad_norm": 0.2383917272090912, "learning_rate": 5.645241616258446e-06, "loss": 0.0945, "step": 45057 }, { "epoch": 0.8036599721756501, "grad_norm": 0.2024536281824112, "learning_rate": 5.6442564578410015e-06, "loss": 0.127, "step": 45058 }, { "epoch": 0.8036778082973638, "grad_norm": 0.3398240804672241, "learning_rate": 5.6432713744531046e-06, "loss": 0.1413, "step": 45059 }, { "epoch": 0.8036956444190775, "grad_norm": 0.2868044674396515, "learning_rate": 5.642286366098573e-06, "loss": 0.1235, "step": 45060 }, { "epoch": 0.8037134805407912, "grad_norm": 0.24181649088859558, "learning_rate": 5.641301432781218e-06, "loss": 0.0952, "step": 45061 }, { "epoch": 0.8037313166625049, "grad_norm": 0.28084084391593933, "learning_rate": 5.6403165745048725e-06, "loss": 0.1019, "step": 45062 }, { "epoch": 0.8037491527842185, "grad_norm": 0.25780850648880005, "learning_rate": 5.639331791273347e-06, "loss": 0.1362, "step": 45063 }, { "epoch": 0.8037669889059323, "grad_norm": 0.28611013293266296, "learning_rate": 5.638347083090451e-06, "loss": 0.1187, "step": 45064 }, { "epoch": 0.803784825027646, "grad_norm": 0.2852856516838074, "learning_rate": 5.637362449960015e-06, "loss": 0.1133, "step": 45065 }, { "epoch": 0.8038026611493597, "grad_norm": 0.25416988134384155, "learning_rate": 5.636377891885844e-06, "loss": 0.167, "step": 45066 }, { "epoch": 0.8038204972710734, "grad_norm": 0.2378191500902176, "learning_rate": 5.635393408871767e-06, "loss": 0.1333, "step": 45067 }, { "epoch": 0.8038383333927871, "grad_norm": 0.31271690130233765, "learning_rate": 5.634409000921595e-06, "loss": 0.1027, "step": 45068 }, { "epoch": 0.8038561695145008, "grad_norm": 0.3686150908470154, "learning_rate": 5.633424668039142e-06, "loss": 0.152, "step": 45069 }, { "epoch": 0.8038740056362145, "grad_norm": 0.31732410192489624, "learning_rate": 5.6324404102282156e-06, "loss": 0.0975, "step": 45070 }, { "epoch": 0.8038918417579282, "grad_norm": 0.2282390594482422, "learning_rate": 5.631456227492646e-06, "loss": 0.0809, "step": 45071 }, { "epoch": 0.8039096778796418, "grad_norm": 0.23849719762802124, "learning_rate": 5.630472119836242e-06, "loss": 0.0827, "step": 45072 }, { "epoch": 0.8039275140013555, "grad_norm": 0.3120070993900299, "learning_rate": 5.629488087262819e-06, "loss": 0.1176, "step": 45073 }, { "epoch": 0.8039453501230692, "grad_norm": 0.3191993534564972, "learning_rate": 5.6285041297761825e-06, "loss": 0.1107, "step": 45074 }, { "epoch": 0.8039631862447829, "grad_norm": 0.24546119570732117, "learning_rate": 5.627520247380164e-06, "loss": 0.0797, "step": 45075 }, { "epoch": 0.8039810223664966, "grad_norm": 0.2516654431819916, "learning_rate": 5.626536440078564e-06, "loss": 0.1097, "step": 45076 }, { "epoch": 0.8039988584882103, "grad_norm": 0.2875223159790039, "learning_rate": 5.625552707875203e-06, "loss": 0.1137, "step": 45077 }, { "epoch": 0.804016694609924, "grad_norm": 0.3131698966026306, "learning_rate": 5.624569050773884e-06, "loss": 0.0742, "step": 45078 }, { "epoch": 0.8040345307316377, "grad_norm": 0.17964769899845123, "learning_rate": 5.6235854687784276e-06, "loss": 0.0731, "step": 45079 }, { "epoch": 0.8040523668533514, "grad_norm": 0.22102344036102295, "learning_rate": 5.622601961892654e-06, "loss": 0.0792, "step": 45080 }, { "epoch": 0.8040702029750652, "grad_norm": 0.24348902702331543, "learning_rate": 5.621618530120367e-06, "loss": 0.0926, "step": 45081 }, { "epoch": 0.8040880390967788, "grad_norm": 0.3022458851337433, "learning_rate": 5.62063517346538e-06, "loss": 0.0714, "step": 45082 }, { "epoch": 0.8041058752184925, "grad_norm": 0.3298971652984619, "learning_rate": 5.619651891931496e-06, "loss": 0.1466, "step": 45083 }, { "epoch": 0.8041237113402062, "grad_norm": 0.2805505096912384, "learning_rate": 5.618668685522544e-06, "loss": 0.1626, "step": 45084 }, { "epoch": 0.8041415474619199, "grad_norm": 0.3055839538574219, "learning_rate": 5.617685554242325e-06, "loss": 0.1402, "step": 45085 }, { "epoch": 0.8041593835836336, "grad_norm": 0.2969001233577728, "learning_rate": 5.616702498094653e-06, "loss": 0.1344, "step": 45086 }, { "epoch": 0.8041772197053473, "grad_norm": 0.23529979586601257, "learning_rate": 5.615719517083328e-06, "loss": 0.0963, "step": 45087 }, { "epoch": 0.804195055827061, "grad_norm": 0.20105794072151184, "learning_rate": 5.614736611212176e-06, "loss": 0.0634, "step": 45088 }, { "epoch": 0.8042128919487747, "grad_norm": 0.333313912153244, "learning_rate": 5.613753780485001e-06, "loss": 0.1504, "step": 45089 }, { "epoch": 0.8042307280704883, "grad_norm": 0.2451031357049942, "learning_rate": 5.6127710249056135e-06, "loss": 0.1081, "step": 45090 }, { "epoch": 0.804248564192202, "grad_norm": 0.2931724786758423, "learning_rate": 5.611788344477814e-06, "loss": 0.1219, "step": 45091 }, { "epoch": 0.8042664003139157, "grad_norm": 0.216327965259552, "learning_rate": 5.6108057392054244e-06, "loss": 0.0641, "step": 45092 }, { "epoch": 0.8042842364356294, "grad_norm": 0.27867671847343445, "learning_rate": 5.6098232090922434e-06, "loss": 0.0718, "step": 45093 }, { "epoch": 0.8043020725573431, "grad_norm": 0.4103490114212036, "learning_rate": 5.608840754142092e-06, "loss": 0.1015, "step": 45094 }, { "epoch": 0.8043199086790568, "grad_norm": 0.24888038635253906, "learning_rate": 5.607858374358771e-06, "loss": 0.0836, "step": 45095 }, { "epoch": 0.8043377448007705, "grad_norm": 0.22790265083312988, "learning_rate": 5.606876069746081e-06, "loss": 0.098, "step": 45096 }, { "epoch": 0.8043555809224843, "grad_norm": 0.20715153217315674, "learning_rate": 5.605893840307846e-06, "loss": 0.0817, "step": 45097 }, { "epoch": 0.804373417044198, "grad_norm": 0.26539748907089233, "learning_rate": 5.604911686047865e-06, "loss": 0.0888, "step": 45098 }, { "epoch": 0.8043912531659116, "grad_norm": 0.2161572426557541, "learning_rate": 5.603929606969943e-06, "loss": 0.1207, "step": 45099 }, { "epoch": 0.8044090892876253, "grad_norm": 0.3037853538990021, "learning_rate": 5.602947603077882e-06, "loss": 0.0725, "step": 45100 }, { "epoch": 0.804426925409339, "grad_norm": 0.20741458237171173, "learning_rate": 5.601965674375503e-06, "loss": 0.1112, "step": 45101 }, { "epoch": 0.8044447615310527, "grad_norm": 0.4309438169002533, "learning_rate": 5.600983820866607e-06, "loss": 0.1519, "step": 45102 }, { "epoch": 0.8044625976527664, "grad_norm": 0.2536090910434723, "learning_rate": 5.600002042554997e-06, "loss": 0.1144, "step": 45103 }, { "epoch": 0.8044804337744801, "grad_norm": 0.19193938374519348, "learning_rate": 5.59902033944448e-06, "loss": 0.0928, "step": 45104 }, { "epoch": 0.8044982698961938, "grad_norm": 0.23917387425899506, "learning_rate": 5.598038711538853e-06, "loss": 0.0635, "step": 45105 }, { "epoch": 0.8045161060179075, "grad_norm": 0.3996824622154236, "learning_rate": 5.597057158841929e-06, "loss": 0.1706, "step": 45106 }, { "epoch": 0.8045339421396211, "grad_norm": 0.32945168018341064, "learning_rate": 5.596075681357521e-06, "loss": 0.1093, "step": 45107 }, { "epoch": 0.8045517782613348, "grad_norm": 0.2145969420671463, "learning_rate": 5.595094279089425e-06, "loss": 0.0858, "step": 45108 }, { "epoch": 0.8045696143830485, "grad_norm": 0.262411504983902, "learning_rate": 5.5941129520414385e-06, "loss": 0.1032, "step": 45109 }, { "epoch": 0.8045874505047622, "grad_norm": 0.32691875100135803, "learning_rate": 5.59313170021738e-06, "loss": 0.0888, "step": 45110 }, { "epoch": 0.8046052866264759, "grad_norm": 0.1854233592748642, "learning_rate": 5.592150523621045e-06, "loss": 0.0895, "step": 45111 }, { "epoch": 0.8046231227481896, "grad_norm": 0.2635454833507538, "learning_rate": 5.59116942225624e-06, "loss": 0.1418, "step": 45112 }, { "epoch": 0.8046409588699033, "grad_norm": 0.29499322175979614, "learning_rate": 5.590188396126758e-06, "loss": 0.0955, "step": 45113 }, { "epoch": 0.8046587949916171, "grad_norm": 0.5843046307563782, "learning_rate": 5.589207445236417e-06, "loss": 0.1424, "step": 45114 }, { "epoch": 0.8046766311133308, "grad_norm": 0.33900749683380127, "learning_rate": 5.588226569589011e-06, "loss": 0.1168, "step": 45115 }, { "epoch": 0.8046944672350445, "grad_norm": 0.2698301374912262, "learning_rate": 5.587245769188345e-06, "loss": 0.1295, "step": 45116 }, { "epoch": 0.8047123033567581, "grad_norm": 0.29449552297592163, "learning_rate": 5.586265044038219e-06, "loss": 0.1378, "step": 45117 }, { "epoch": 0.8047301394784718, "grad_norm": 0.24851654469966888, "learning_rate": 5.585284394142426e-06, "loss": 0.0987, "step": 45118 }, { "epoch": 0.8047479756001855, "grad_norm": 0.2852621376514435, "learning_rate": 5.5843038195047855e-06, "loss": 0.0903, "step": 45119 }, { "epoch": 0.8047658117218992, "grad_norm": 0.2575474977493286, "learning_rate": 5.583323320129083e-06, "loss": 0.1368, "step": 45120 }, { "epoch": 0.8047836478436129, "grad_norm": 0.31236347556114197, "learning_rate": 5.58234289601913e-06, "loss": 0.0863, "step": 45121 }, { "epoch": 0.8048014839653266, "grad_norm": 0.25784802436828613, "learning_rate": 5.581362547178717e-06, "loss": 0.1434, "step": 45122 }, { "epoch": 0.8048193200870403, "grad_norm": 0.17321154475212097, "learning_rate": 5.580382273611656e-06, "loss": 0.0842, "step": 45123 }, { "epoch": 0.804837156208754, "grad_norm": 0.251720666885376, "learning_rate": 5.579402075321743e-06, "loss": 0.0548, "step": 45124 }, { "epoch": 0.8048549923304676, "grad_norm": 0.2474765032529831, "learning_rate": 5.5784219523127735e-06, "loss": 0.0995, "step": 45125 }, { "epoch": 0.8048728284521813, "grad_norm": 0.29034796357154846, "learning_rate": 5.57744190458854e-06, "loss": 0.1727, "step": 45126 }, { "epoch": 0.804890664573895, "grad_norm": 0.170152947306633, "learning_rate": 5.57646193215286e-06, "loss": 0.0699, "step": 45127 }, { "epoch": 0.8049085006956087, "grad_norm": 0.22174742817878723, "learning_rate": 5.5754820350095185e-06, "loss": 0.0966, "step": 45128 }, { "epoch": 0.8049263368173224, "grad_norm": 0.3623199164867401, "learning_rate": 5.574502213162322e-06, "loss": 0.1249, "step": 45129 }, { "epoch": 0.8049441729390361, "grad_norm": 0.23711462318897247, "learning_rate": 5.573522466615061e-06, "loss": 0.1066, "step": 45130 }, { "epoch": 0.8049620090607499, "grad_norm": 0.3087281882762909, "learning_rate": 5.572542795371532e-06, "loss": 0.0881, "step": 45131 }, { "epoch": 0.8049798451824636, "grad_norm": 0.28367748856544495, "learning_rate": 5.571563199435542e-06, "loss": 0.0928, "step": 45132 }, { "epoch": 0.8049976813041773, "grad_norm": 0.2632720172405243, "learning_rate": 5.570583678810878e-06, "loss": 0.1397, "step": 45133 }, { "epoch": 0.805015517425891, "grad_norm": 0.19186727702617645, "learning_rate": 5.569604233501349e-06, "loss": 0.0988, "step": 45134 }, { "epoch": 0.8050333535476046, "grad_norm": 0.23219752311706543, "learning_rate": 5.568624863510738e-06, "loss": 0.1139, "step": 45135 }, { "epoch": 0.8050511896693183, "grad_norm": 0.25439921021461487, "learning_rate": 5.567645568842855e-06, "loss": 0.1123, "step": 45136 }, { "epoch": 0.805069025791032, "grad_norm": 0.25233781337738037, "learning_rate": 5.5666663495014895e-06, "loss": 0.134, "step": 45137 }, { "epoch": 0.8050868619127457, "grad_norm": 0.35540053248405457, "learning_rate": 5.5656872054904355e-06, "loss": 0.0841, "step": 45138 }, { "epoch": 0.8051046980344594, "grad_norm": 0.33396226167678833, "learning_rate": 5.564708136813487e-06, "loss": 0.1098, "step": 45139 }, { "epoch": 0.8051225341561731, "grad_norm": 0.23016005754470825, "learning_rate": 5.563729143474447e-06, "loss": 0.0999, "step": 45140 }, { "epoch": 0.8051403702778868, "grad_norm": 0.224091574549675, "learning_rate": 5.562750225477106e-06, "loss": 0.1301, "step": 45141 }, { "epoch": 0.8051582063996005, "grad_norm": 0.27392706274986267, "learning_rate": 5.5617713828252595e-06, "loss": 0.1573, "step": 45142 }, { "epoch": 0.8051760425213141, "grad_norm": 0.24790118634700775, "learning_rate": 5.5607926155227e-06, "loss": 0.134, "step": 45143 }, { "epoch": 0.8051938786430278, "grad_norm": 0.297635018825531, "learning_rate": 5.559813923573218e-06, "loss": 0.0862, "step": 45144 }, { "epoch": 0.8052117147647415, "grad_norm": 0.2815331816673279, "learning_rate": 5.558835306980614e-06, "loss": 0.1119, "step": 45145 }, { "epoch": 0.8052295508864552, "grad_norm": 0.2936416566371918, "learning_rate": 5.557856765748684e-06, "loss": 0.0927, "step": 45146 }, { "epoch": 0.8052473870081689, "grad_norm": 0.2213561236858368, "learning_rate": 5.556878299881208e-06, "loss": 0.0969, "step": 45147 }, { "epoch": 0.8052652231298827, "grad_norm": 0.25681453943252563, "learning_rate": 5.555899909381992e-06, "loss": 0.1215, "step": 45148 }, { "epoch": 0.8052830592515964, "grad_norm": 0.275356262922287, "learning_rate": 5.5549215942548165e-06, "loss": 0.1122, "step": 45149 }, { "epoch": 0.8053008953733101, "grad_norm": 0.24249376356601715, "learning_rate": 5.553943354503491e-06, "loss": 0.0878, "step": 45150 }, { "epoch": 0.8053187314950238, "grad_norm": 0.2888849675655365, "learning_rate": 5.552965190131795e-06, "loss": 0.1032, "step": 45151 }, { "epoch": 0.8053365676167374, "grad_norm": 0.2656308114528656, "learning_rate": 5.551987101143516e-06, "loss": 0.1198, "step": 45152 }, { "epoch": 0.8053544037384511, "grad_norm": 0.2765926420688629, "learning_rate": 5.5510090875424606e-06, "loss": 0.1681, "step": 45153 }, { "epoch": 0.8053722398601648, "grad_norm": 0.2646331191062927, "learning_rate": 5.55003114933241e-06, "loss": 0.1447, "step": 45154 }, { "epoch": 0.8053900759818785, "grad_norm": 0.3718184232711792, "learning_rate": 5.549053286517159e-06, "loss": 0.0848, "step": 45155 }, { "epoch": 0.8054079121035922, "grad_norm": 0.17985741794109344, "learning_rate": 5.548075499100492e-06, "loss": 0.0805, "step": 45156 }, { "epoch": 0.8054257482253059, "grad_norm": 0.28909024596214294, "learning_rate": 5.547097787086197e-06, "loss": 0.1047, "step": 45157 }, { "epoch": 0.8054435843470196, "grad_norm": 0.18139904737472534, "learning_rate": 5.546120150478076e-06, "loss": 0.0923, "step": 45158 }, { "epoch": 0.8054614204687333, "grad_norm": 0.22457453608512878, "learning_rate": 5.545142589279914e-06, "loss": 0.0563, "step": 45159 }, { "epoch": 0.805479256590447, "grad_norm": 0.27773961424827576, "learning_rate": 5.544165103495494e-06, "loss": 0.1149, "step": 45160 }, { "epoch": 0.8054970927121606, "grad_norm": 0.300950288772583, "learning_rate": 5.5431876931286125e-06, "loss": 0.1301, "step": 45161 }, { "epoch": 0.8055149288338743, "grad_norm": 0.32615914940834045, "learning_rate": 5.54221035818305e-06, "loss": 0.1465, "step": 45162 }, { "epoch": 0.805532764955588, "grad_norm": 0.21602334082126617, "learning_rate": 5.54123309866261e-06, "loss": 0.0476, "step": 45163 }, { "epoch": 0.8055506010773017, "grad_norm": 0.2839105725288391, "learning_rate": 5.540255914571069e-06, "loss": 0.089, "step": 45164 }, { "epoch": 0.8055684371990155, "grad_norm": 0.3230873942375183, "learning_rate": 5.539278805912209e-06, "loss": 0.099, "step": 45165 }, { "epoch": 0.8055862733207292, "grad_norm": 0.3063286542892456, "learning_rate": 5.5383017726898355e-06, "loss": 0.1526, "step": 45166 }, { "epoch": 0.8056041094424429, "grad_norm": 0.2745344638824463, "learning_rate": 5.537324814907727e-06, "loss": 0.1161, "step": 45167 }, { "epoch": 0.8056219455641566, "grad_norm": 0.22862201929092407, "learning_rate": 5.536347932569669e-06, "loss": 0.1224, "step": 45168 }, { "epoch": 0.8056397816858702, "grad_norm": 0.21719032526016235, "learning_rate": 5.535371125679448e-06, "loss": 0.0551, "step": 45169 }, { "epoch": 0.8056576178075839, "grad_norm": 0.23315177857875824, "learning_rate": 5.534394394240844e-06, "loss": 0.123, "step": 45170 }, { "epoch": 0.8056754539292976, "grad_norm": 0.31189781427383423, "learning_rate": 5.533417738257657e-06, "loss": 0.1532, "step": 45171 }, { "epoch": 0.8056932900510113, "grad_norm": 0.2474830448627472, "learning_rate": 5.532441157733667e-06, "loss": 0.1223, "step": 45172 }, { "epoch": 0.805711126172725, "grad_norm": 0.46391186118125916, "learning_rate": 5.531464652672661e-06, "loss": 0.1326, "step": 45173 }, { "epoch": 0.8057289622944387, "grad_norm": 0.28167542815208435, "learning_rate": 5.5304882230784115e-06, "loss": 0.1295, "step": 45174 }, { "epoch": 0.8057467984161524, "grad_norm": 0.3139234483242035, "learning_rate": 5.5295118689547235e-06, "loss": 0.121, "step": 45175 }, { "epoch": 0.8057646345378661, "grad_norm": 0.2539048194885254, "learning_rate": 5.528535590305364e-06, "loss": 0.0924, "step": 45176 }, { "epoch": 0.8057824706595798, "grad_norm": 0.2819989025592804, "learning_rate": 5.527559387134135e-06, "loss": 0.094, "step": 45177 }, { "epoch": 0.8058003067812934, "grad_norm": 0.3445577323436737, "learning_rate": 5.526583259444803e-06, "loss": 0.082, "step": 45178 }, { "epoch": 0.8058181429030071, "grad_norm": 0.2964233160018921, "learning_rate": 5.525607207241168e-06, "loss": 0.1327, "step": 45179 }, { "epoch": 0.8058359790247208, "grad_norm": 0.20467011630535126, "learning_rate": 5.524631230527006e-06, "loss": 0.0885, "step": 45180 }, { "epoch": 0.8058538151464345, "grad_norm": 0.34546440839767456, "learning_rate": 5.5236553293060984e-06, "loss": 0.1146, "step": 45181 }, { "epoch": 0.8058716512681483, "grad_norm": 0.22661687433719635, "learning_rate": 5.522679503582231e-06, "loss": 0.1388, "step": 45182 }, { "epoch": 0.805889487389862, "grad_norm": 0.255628764629364, "learning_rate": 5.521703753359178e-06, "loss": 0.1476, "step": 45183 }, { "epoch": 0.8059073235115757, "grad_norm": 0.4289093017578125, "learning_rate": 5.520728078640733e-06, "loss": 0.1218, "step": 45184 }, { "epoch": 0.8059251596332894, "grad_norm": 0.2473369538784027, "learning_rate": 5.519752479430676e-06, "loss": 0.1099, "step": 45185 }, { "epoch": 0.805942995755003, "grad_norm": 0.23515115678310394, "learning_rate": 5.518776955732788e-06, "loss": 0.0996, "step": 45186 }, { "epoch": 0.8059608318767167, "grad_norm": 0.2763703167438507, "learning_rate": 5.51780150755084e-06, "loss": 0.1374, "step": 45187 }, { "epoch": 0.8059786679984304, "grad_norm": 0.33515024185180664, "learning_rate": 5.516826134888631e-06, "loss": 0.1016, "step": 45188 }, { "epoch": 0.8059965041201441, "grad_norm": 0.36895760893821716, "learning_rate": 5.515850837749925e-06, "loss": 0.1364, "step": 45189 }, { "epoch": 0.8060143402418578, "grad_norm": 0.2607574164867401, "learning_rate": 5.514875616138518e-06, "loss": 0.1351, "step": 45190 }, { "epoch": 0.8060321763635715, "grad_norm": 0.332884281873703, "learning_rate": 5.5139004700581785e-06, "loss": 0.1566, "step": 45191 }, { "epoch": 0.8060500124852852, "grad_norm": 0.30799686908721924, "learning_rate": 5.512925399512694e-06, "loss": 0.1178, "step": 45192 }, { "epoch": 0.8060678486069989, "grad_norm": 0.25493577122688293, "learning_rate": 5.511950404505845e-06, "loss": 0.1161, "step": 45193 }, { "epoch": 0.8060856847287126, "grad_norm": 0.31678780913352966, "learning_rate": 5.510975485041403e-06, "loss": 0.1276, "step": 45194 }, { "epoch": 0.8061035208504262, "grad_norm": 0.2563844621181488, "learning_rate": 5.510000641123153e-06, "loss": 0.0574, "step": 45195 }, { "epoch": 0.8061213569721399, "grad_norm": 0.31718015670776367, "learning_rate": 5.509025872754866e-06, "loss": 0.116, "step": 45196 }, { "epoch": 0.8061391930938536, "grad_norm": 0.2768203914165497, "learning_rate": 5.508051179940335e-06, "loss": 0.0763, "step": 45197 }, { "epoch": 0.8061570292155674, "grad_norm": 0.36266446113586426, "learning_rate": 5.507076562683327e-06, "loss": 0.083, "step": 45198 }, { "epoch": 0.8061748653372811, "grad_norm": 0.29332104325294495, "learning_rate": 5.506102020987625e-06, "loss": 0.0915, "step": 45199 }, { "epoch": 0.8061927014589948, "grad_norm": 0.2650769054889679, "learning_rate": 5.5051275548569956e-06, "loss": 0.1006, "step": 45200 }, { "epoch": 0.8062105375807085, "grad_norm": 0.3163204789161682, "learning_rate": 5.504153164295234e-06, "loss": 0.1396, "step": 45201 }, { "epoch": 0.8062283737024222, "grad_norm": 0.2902640700340271, "learning_rate": 5.503178849306107e-06, "loss": 0.1449, "step": 45202 }, { "epoch": 0.8062462098241359, "grad_norm": 0.20983174443244934, "learning_rate": 5.502204609893388e-06, "loss": 0.0708, "step": 45203 }, { "epoch": 0.8062640459458496, "grad_norm": 0.48085594177246094, "learning_rate": 5.501230446060862e-06, "loss": 0.0937, "step": 45204 }, { "epoch": 0.8062818820675632, "grad_norm": 0.35073500871658325, "learning_rate": 5.5002563578122975e-06, "loss": 0.1547, "step": 45205 }, { "epoch": 0.8062997181892769, "grad_norm": 0.32338613271713257, "learning_rate": 5.49928234515148e-06, "loss": 0.1266, "step": 45206 }, { "epoch": 0.8063175543109906, "grad_norm": 0.27109187841415405, "learning_rate": 5.498308408082179e-06, "loss": 0.1025, "step": 45207 }, { "epoch": 0.8063353904327043, "grad_norm": 0.3133688271045685, "learning_rate": 5.497334546608171e-06, "loss": 0.1282, "step": 45208 }, { "epoch": 0.806353226554418, "grad_norm": 0.3348323106765747, "learning_rate": 5.496360760733221e-06, "loss": 0.0999, "step": 45209 }, { "epoch": 0.8063710626761317, "grad_norm": 0.2402358502149582, "learning_rate": 5.495387050461123e-06, "loss": 0.1361, "step": 45210 }, { "epoch": 0.8063888987978454, "grad_norm": 0.29074907302856445, "learning_rate": 5.49441341579564e-06, "loss": 0.1058, "step": 45211 }, { "epoch": 0.806406734919559, "grad_norm": 0.269368052482605, "learning_rate": 5.4934398567405486e-06, "loss": 0.1044, "step": 45212 }, { "epoch": 0.8064245710412727, "grad_norm": 0.22036467492580414, "learning_rate": 5.492466373299615e-06, "loss": 0.0945, "step": 45213 }, { "epoch": 0.8064424071629864, "grad_norm": 0.28387391567230225, "learning_rate": 5.491492965476624e-06, "loss": 0.1768, "step": 45214 }, { "epoch": 0.8064602432847002, "grad_norm": 0.2515878975391388, "learning_rate": 5.490519633275348e-06, "loss": 0.1027, "step": 45215 }, { "epoch": 0.8064780794064139, "grad_norm": 0.274103581905365, "learning_rate": 5.489546376699548e-06, "loss": 0.1066, "step": 45216 }, { "epoch": 0.8064959155281276, "grad_norm": 0.29180335998535156, "learning_rate": 5.48857319575301e-06, "loss": 0.0624, "step": 45217 }, { "epoch": 0.8065137516498413, "grad_norm": 0.3230469822883606, "learning_rate": 5.487600090439496e-06, "loss": 0.1331, "step": 45218 }, { "epoch": 0.806531587771555, "grad_norm": 0.1601676493883133, "learning_rate": 5.486627060762789e-06, "loss": 0.0728, "step": 45219 }, { "epoch": 0.8065494238932687, "grad_norm": 0.31972986459732056, "learning_rate": 5.485654106726657e-06, "loss": 0.09, "step": 45220 }, { "epoch": 0.8065672600149824, "grad_norm": 0.3033163547515869, "learning_rate": 5.484681228334867e-06, "loss": 0.0946, "step": 45221 }, { "epoch": 0.806585096136696, "grad_norm": 0.3145233988761902, "learning_rate": 5.483708425591188e-06, "loss": 0.1253, "step": 45222 }, { "epoch": 0.8066029322584097, "grad_norm": 0.327591210603714, "learning_rate": 5.482735698499403e-06, "loss": 0.1036, "step": 45223 }, { "epoch": 0.8066207683801234, "grad_norm": 0.28029635548591614, "learning_rate": 5.4817630470632734e-06, "loss": 0.1382, "step": 45224 }, { "epoch": 0.8066386045018371, "grad_norm": 0.39999592304229736, "learning_rate": 5.480790471286573e-06, "loss": 0.141, "step": 45225 }, { "epoch": 0.8066564406235508, "grad_norm": 0.2242717742919922, "learning_rate": 5.4798179711730655e-06, "loss": 0.0792, "step": 45226 }, { "epoch": 0.8066742767452645, "grad_norm": 0.3036152720451355, "learning_rate": 5.47884554672653e-06, "loss": 0.1648, "step": 45227 }, { "epoch": 0.8066921128669782, "grad_norm": 0.216957688331604, "learning_rate": 5.477873197950733e-06, "loss": 0.0882, "step": 45228 }, { "epoch": 0.8067099489886919, "grad_norm": 0.2038707137107849, "learning_rate": 5.47690092484944e-06, "loss": 0.0897, "step": 45229 }, { "epoch": 0.8067277851104055, "grad_norm": 0.23714205622673035, "learning_rate": 5.4759287274264185e-06, "loss": 0.1102, "step": 45230 }, { "epoch": 0.8067456212321192, "grad_norm": 0.27829429507255554, "learning_rate": 5.474956605685444e-06, "loss": 0.0913, "step": 45231 }, { "epoch": 0.806763457353833, "grad_norm": 0.24379919469356537, "learning_rate": 5.473984559630277e-06, "loss": 0.083, "step": 45232 }, { "epoch": 0.8067812934755467, "grad_norm": 0.23219719529151917, "learning_rate": 5.4730125892646976e-06, "loss": 0.0956, "step": 45233 }, { "epoch": 0.8067991295972604, "grad_norm": 0.2861798405647278, "learning_rate": 5.472040694592465e-06, "loss": 0.1157, "step": 45234 }, { "epoch": 0.8068169657189741, "grad_norm": 0.2962949872016907, "learning_rate": 5.471068875617339e-06, "loss": 0.1185, "step": 45235 }, { "epoch": 0.8068348018406878, "grad_norm": 0.2821102738380432, "learning_rate": 5.470097132343102e-06, "loss": 0.1205, "step": 45236 }, { "epoch": 0.8068526379624015, "grad_norm": 0.2512767016887665, "learning_rate": 5.469125464773514e-06, "loss": 0.0966, "step": 45237 }, { "epoch": 0.8068704740841152, "grad_norm": 0.23213151097297668, "learning_rate": 5.468153872912343e-06, "loss": 0.0818, "step": 45238 }, { "epoch": 0.8068883102058289, "grad_norm": 0.40025150775909424, "learning_rate": 5.467182356763345e-06, "loss": 0.14, "step": 45239 }, { "epoch": 0.8069061463275425, "grad_norm": 0.28509029746055603, "learning_rate": 5.466210916330303e-06, "loss": 0.0973, "step": 45240 }, { "epoch": 0.8069239824492562, "grad_norm": 0.2456107884645462, "learning_rate": 5.465239551616974e-06, "loss": 0.1296, "step": 45241 }, { "epoch": 0.8069418185709699, "grad_norm": 0.23805761337280273, "learning_rate": 5.464268262627123e-06, "loss": 0.1172, "step": 45242 }, { "epoch": 0.8069596546926836, "grad_norm": 0.26924633979797363, "learning_rate": 5.46329704936451e-06, "loss": 0.0835, "step": 45243 }, { "epoch": 0.8069774908143973, "grad_norm": 0.23496891558170319, "learning_rate": 5.462325911832911e-06, "loss": 0.1314, "step": 45244 }, { "epoch": 0.806995326936111, "grad_norm": 0.27983197569847107, "learning_rate": 5.461354850036077e-06, "loss": 0.1264, "step": 45245 }, { "epoch": 0.8070131630578247, "grad_norm": 0.2608453929424286, "learning_rate": 5.4603838639777884e-06, "loss": 0.0947, "step": 45246 }, { "epoch": 0.8070309991795384, "grad_norm": 0.29466572403907776, "learning_rate": 5.459412953661803e-06, "loss": 0.0936, "step": 45247 }, { "epoch": 0.807048835301252, "grad_norm": 0.28669387102127075, "learning_rate": 5.458442119091872e-06, "loss": 0.1141, "step": 45248 }, { "epoch": 0.8070666714229658, "grad_norm": 0.2387753576040268, "learning_rate": 5.457471360271777e-06, "loss": 0.0931, "step": 45249 }, { "epoch": 0.8070845075446795, "grad_norm": 0.35160550475120544, "learning_rate": 5.45650067720527e-06, "loss": 0.1409, "step": 45250 }, { "epoch": 0.8071023436663932, "grad_norm": 0.28481483459472656, "learning_rate": 5.45553006989612e-06, "loss": 0.0896, "step": 45251 }, { "epoch": 0.8071201797881069, "grad_norm": 0.33091050386428833, "learning_rate": 5.454559538348078e-06, "loss": 0.1419, "step": 45252 }, { "epoch": 0.8071380159098206, "grad_norm": 0.27689868211746216, "learning_rate": 5.453589082564919e-06, "loss": 0.0966, "step": 45253 }, { "epoch": 0.8071558520315343, "grad_norm": 0.2798336148262024, "learning_rate": 5.452618702550402e-06, "loss": 0.0675, "step": 45254 }, { "epoch": 0.807173688153248, "grad_norm": 0.3700137138366699, "learning_rate": 5.4516483983082845e-06, "loss": 0.1241, "step": 45255 }, { "epoch": 0.8071915242749617, "grad_norm": 0.38202112913131714, "learning_rate": 5.450678169842324e-06, "loss": 0.1495, "step": 45256 }, { "epoch": 0.8072093603966753, "grad_norm": 0.36776942014694214, "learning_rate": 5.449708017156294e-06, "loss": 0.1109, "step": 45257 }, { "epoch": 0.807227196518389, "grad_norm": 0.23688314855098724, "learning_rate": 5.448737940253948e-06, "loss": 0.0926, "step": 45258 }, { "epoch": 0.8072450326401027, "grad_norm": 0.2886868417263031, "learning_rate": 5.447767939139039e-06, "loss": 0.1216, "step": 45259 }, { "epoch": 0.8072628687618164, "grad_norm": 0.2926797866821289, "learning_rate": 5.446798013815343e-06, "loss": 0.1306, "step": 45260 }, { "epoch": 0.8072807048835301, "grad_norm": 0.27875691652297974, "learning_rate": 5.4458281642866035e-06, "loss": 0.1107, "step": 45261 }, { "epoch": 0.8072985410052438, "grad_norm": 0.2355824112892151, "learning_rate": 5.444858390556596e-06, "loss": 0.1308, "step": 45262 }, { "epoch": 0.8073163771269575, "grad_norm": 0.29015809297561646, "learning_rate": 5.443888692629071e-06, "loss": 0.1068, "step": 45263 }, { "epoch": 0.8073342132486712, "grad_norm": 0.3303722143173218, "learning_rate": 5.442919070507788e-06, "loss": 0.1743, "step": 45264 }, { "epoch": 0.8073520493703848, "grad_norm": 0.22878235578536987, "learning_rate": 5.441949524196496e-06, "loss": 0.1249, "step": 45265 }, { "epoch": 0.8073698854920986, "grad_norm": 0.20490944385528564, "learning_rate": 5.440980053698971e-06, "loss": 0.1136, "step": 45266 }, { "epoch": 0.8073877216138123, "grad_norm": 0.44089174270629883, "learning_rate": 5.440010659018965e-06, "loss": 0.0872, "step": 45267 }, { "epoch": 0.807405557735526, "grad_norm": 0.3440295457839966, "learning_rate": 5.439041340160231e-06, "loss": 0.1273, "step": 45268 }, { "epoch": 0.8074233938572397, "grad_norm": 0.3534582853317261, "learning_rate": 5.438072097126523e-06, "loss": 0.1736, "step": 45269 }, { "epoch": 0.8074412299789534, "grad_norm": 0.229813814163208, "learning_rate": 5.437102929921612e-06, "loss": 0.0702, "step": 45270 }, { "epoch": 0.8074590661006671, "grad_norm": 0.2391553819179535, "learning_rate": 5.436133838549243e-06, "loss": 0.0805, "step": 45271 }, { "epoch": 0.8074769022223808, "grad_norm": 0.2872280478477478, "learning_rate": 5.435164823013172e-06, "loss": 0.1754, "step": 45272 }, { "epoch": 0.8074947383440945, "grad_norm": 0.25781434774398804, "learning_rate": 5.434195883317167e-06, "loss": 0.1228, "step": 45273 }, { "epoch": 0.8075125744658082, "grad_norm": 0.23661333322525024, "learning_rate": 5.43322701946497e-06, "loss": 0.1015, "step": 45274 }, { "epoch": 0.8075304105875218, "grad_norm": 0.3195432722568512, "learning_rate": 5.432258231460349e-06, "loss": 0.127, "step": 45275 }, { "epoch": 0.8075482467092355, "grad_norm": 0.33412986993789673, "learning_rate": 5.4312895193070534e-06, "loss": 0.1047, "step": 45276 }, { "epoch": 0.8075660828309492, "grad_norm": 0.2995836138725281, "learning_rate": 5.430320883008841e-06, "loss": 0.1401, "step": 45277 }, { "epoch": 0.8075839189526629, "grad_norm": 0.3090651035308838, "learning_rate": 5.429352322569453e-06, "loss": 0.0981, "step": 45278 }, { "epoch": 0.8076017550743766, "grad_norm": 0.1910848319530487, "learning_rate": 5.428383837992665e-06, "loss": 0.0838, "step": 45279 }, { "epoch": 0.8076195911960903, "grad_norm": 0.29134660959243774, "learning_rate": 5.42741542928222e-06, "loss": 0.1018, "step": 45280 }, { "epoch": 0.807637427317804, "grad_norm": 0.23909783363342285, "learning_rate": 5.4264470964418725e-06, "loss": 0.1075, "step": 45281 }, { "epoch": 0.8076552634395177, "grad_norm": 0.3901885747909546, "learning_rate": 5.42547883947537e-06, "loss": 0.1206, "step": 45282 }, { "epoch": 0.8076730995612315, "grad_norm": 0.23201575875282288, "learning_rate": 5.424510658386478e-06, "loss": 0.0905, "step": 45283 }, { "epoch": 0.8076909356829451, "grad_norm": 0.3962501883506775, "learning_rate": 5.423542553178945e-06, "loss": 0.1488, "step": 45284 }, { "epoch": 0.8077087718046588, "grad_norm": 0.27960798144340515, "learning_rate": 5.422574523856524e-06, "loss": 0.1044, "step": 45285 }, { "epoch": 0.8077266079263725, "grad_norm": 0.26997601985931396, "learning_rate": 5.421606570422957e-06, "loss": 0.0945, "step": 45286 }, { "epoch": 0.8077444440480862, "grad_norm": 0.31664034724235535, "learning_rate": 5.420638692882007e-06, "loss": 0.1457, "step": 45287 }, { "epoch": 0.8077622801697999, "grad_norm": 0.18644289672374725, "learning_rate": 5.41967089123743e-06, "loss": 0.0652, "step": 45288 }, { "epoch": 0.8077801162915136, "grad_norm": 0.2650397717952728, "learning_rate": 5.41870316549297e-06, "loss": 0.0951, "step": 45289 }, { "epoch": 0.8077979524132273, "grad_norm": 0.33292874693870544, "learning_rate": 5.4177355156523805e-06, "loss": 0.1262, "step": 45290 }, { "epoch": 0.807815788534941, "grad_norm": 0.2799188196659088, "learning_rate": 5.4167679417194054e-06, "loss": 0.1109, "step": 45291 }, { "epoch": 0.8078336246566546, "grad_norm": 0.19513505697250366, "learning_rate": 5.415800443697808e-06, "loss": 0.0601, "step": 45292 }, { "epoch": 0.8078514607783683, "grad_norm": 0.33457663655281067, "learning_rate": 5.414833021591334e-06, "loss": 0.0982, "step": 45293 }, { "epoch": 0.807869296900082, "grad_norm": 0.28720855712890625, "learning_rate": 5.413865675403729e-06, "loss": 0.1105, "step": 45294 }, { "epoch": 0.8078871330217957, "grad_norm": 0.2925865650177002, "learning_rate": 5.412898405138738e-06, "loss": 0.1344, "step": 45295 }, { "epoch": 0.8079049691435094, "grad_norm": 0.43825265765190125, "learning_rate": 5.411931210800128e-06, "loss": 0.1363, "step": 45296 }, { "epoch": 0.8079228052652231, "grad_norm": 0.2749407887458801, "learning_rate": 5.410964092391638e-06, "loss": 0.1115, "step": 45297 }, { "epoch": 0.8079406413869368, "grad_norm": 0.28501713275909424, "learning_rate": 5.409997049917015e-06, "loss": 0.1518, "step": 45298 }, { "epoch": 0.8079584775086506, "grad_norm": 0.24590031802654266, "learning_rate": 5.409030083380001e-06, "loss": 0.1574, "step": 45299 }, { "epoch": 0.8079763136303643, "grad_norm": 0.2159082591533661, "learning_rate": 5.408063192784363e-06, "loss": 0.1042, "step": 45300 }, { "epoch": 0.807994149752078, "grad_norm": 0.22653235495090485, "learning_rate": 5.4070963781338325e-06, "loss": 0.1135, "step": 45301 }, { "epoch": 0.8080119858737916, "grad_norm": 0.25408491492271423, "learning_rate": 5.40612963943217e-06, "loss": 0.1055, "step": 45302 }, { "epoch": 0.8080298219955053, "grad_norm": 0.23485468327999115, "learning_rate": 5.405162976683115e-06, "loss": 0.0987, "step": 45303 }, { "epoch": 0.808047658117219, "grad_norm": 0.2339015007019043, "learning_rate": 5.404196389890409e-06, "loss": 0.0752, "step": 45304 }, { "epoch": 0.8080654942389327, "grad_norm": 0.23099899291992188, "learning_rate": 5.403229879057814e-06, "loss": 0.1144, "step": 45305 }, { "epoch": 0.8080833303606464, "grad_norm": 0.25047290325164795, "learning_rate": 5.402263444189068e-06, "loss": 0.103, "step": 45306 }, { "epoch": 0.8081011664823601, "grad_norm": 0.20365071296691895, "learning_rate": 5.401297085287918e-06, "loss": 0.0788, "step": 45307 }, { "epoch": 0.8081190026040738, "grad_norm": 0.28805556893348694, "learning_rate": 5.400330802358103e-06, "loss": 0.1513, "step": 45308 }, { "epoch": 0.8081368387257875, "grad_norm": 0.2784179151058197, "learning_rate": 5.399364595403381e-06, "loss": 0.0606, "step": 45309 }, { "epoch": 0.8081546748475011, "grad_norm": 0.23879724740982056, "learning_rate": 5.398398464427493e-06, "loss": 0.0809, "step": 45310 }, { "epoch": 0.8081725109692148, "grad_norm": 0.21566037833690643, "learning_rate": 5.3974324094341835e-06, "loss": 0.1426, "step": 45311 }, { "epoch": 0.8081903470909285, "grad_norm": 0.27589720487594604, "learning_rate": 5.396466430427194e-06, "loss": 0.0978, "step": 45312 }, { "epoch": 0.8082081832126422, "grad_norm": 0.3204553723335266, "learning_rate": 5.395500527410266e-06, "loss": 0.1047, "step": 45313 }, { "epoch": 0.8082260193343559, "grad_norm": 0.19571274518966675, "learning_rate": 5.394534700387149e-06, "loss": 0.0942, "step": 45314 }, { "epoch": 0.8082438554560696, "grad_norm": 0.2984962463378906, "learning_rate": 5.3935689493615935e-06, "loss": 0.1027, "step": 45315 }, { "epoch": 0.8082616915777834, "grad_norm": 0.3122525215148926, "learning_rate": 5.392603274337338e-06, "loss": 0.1513, "step": 45316 }, { "epoch": 0.8082795276994971, "grad_norm": 0.28807875514030457, "learning_rate": 5.391637675318117e-06, "loss": 0.0864, "step": 45317 }, { "epoch": 0.8082973638212108, "grad_norm": 0.27992531657218933, "learning_rate": 5.390672152307688e-06, "loss": 0.129, "step": 45318 }, { "epoch": 0.8083151999429244, "grad_norm": 0.252861887216568, "learning_rate": 5.389706705309786e-06, "loss": 0.1353, "step": 45319 }, { "epoch": 0.8083330360646381, "grad_norm": 0.25114619731903076, "learning_rate": 5.388741334328154e-06, "loss": 0.1115, "step": 45320 }, { "epoch": 0.8083508721863518, "grad_norm": 0.25021761655807495, "learning_rate": 5.387776039366527e-06, "loss": 0.1112, "step": 45321 }, { "epoch": 0.8083687083080655, "grad_norm": 0.30372557044029236, "learning_rate": 5.3868108204286585e-06, "loss": 0.0697, "step": 45322 }, { "epoch": 0.8083865444297792, "grad_norm": 0.22994790971279144, "learning_rate": 5.385845677518289e-06, "loss": 0.1065, "step": 45323 }, { "epoch": 0.8084043805514929, "grad_norm": 0.25752994418144226, "learning_rate": 5.384880610639156e-06, "loss": 0.096, "step": 45324 }, { "epoch": 0.8084222166732066, "grad_norm": 0.31726139783859253, "learning_rate": 5.383915619794999e-06, "loss": 0.0661, "step": 45325 }, { "epoch": 0.8084400527949203, "grad_norm": 0.33966130018234253, "learning_rate": 5.382950704989554e-06, "loss": 0.1742, "step": 45326 }, { "epoch": 0.808457888916634, "grad_norm": 0.33929044008255005, "learning_rate": 5.381985866226574e-06, "loss": 0.1721, "step": 45327 }, { "epoch": 0.8084757250383476, "grad_norm": 0.276747465133667, "learning_rate": 5.381021103509787e-06, "loss": 0.1305, "step": 45328 }, { "epoch": 0.8084935611600613, "grad_norm": 0.33432528376579285, "learning_rate": 5.380056416842943e-06, "loss": 0.1515, "step": 45329 }, { "epoch": 0.808511397281775, "grad_norm": 0.26534348726272583, "learning_rate": 5.379091806229772e-06, "loss": 0.1102, "step": 45330 }, { "epoch": 0.8085292334034887, "grad_norm": 0.3019811809062958, "learning_rate": 5.3781272716740245e-06, "loss": 0.0961, "step": 45331 }, { "epoch": 0.8085470695252024, "grad_norm": 0.25023484230041504, "learning_rate": 5.377162813179434e-06, "loss": 0.0622, "step": 45332 }, { "epoch": 0.8085649056469162, "grad_norm": 0.2626052796840668, "learning_rate": 5.376198430749735e-06, "loss": 0.0995, "step": 45333 }, { "epoch": 0.8085827417686299, "grad_norm": 0.27859359979629517, "learning_rate": 5.375234124388665e-06, "loss": 0.0833, "step": 45334 }, { "epoch": 0.8086005778903436, "grad_norm": 0.35295358300209045, "learning_rate": 5.37426989409997e-06, "loss": 0.1261, "step": 45335 }, { "epoch": 0.8086184140120573, "grad_norm": 0.2497841715812683, "learning_rate": 5.373305739887385e-06, "loss": 0.1603, "step": 45336 }, { "epoch": 0.8086362501337709, "grad_norm": 0.33090224862098694, "learning_rate": 5.372341661754646e-06, "loss": 0.0806, "step": 45337 }, { "epoch": 0.8086540862554846, "grad_norm": 0.29360634088516235, "learning_rate": 5.371377659705487e-06, "loss": 0.1249, "step": 45338 }, { "epoch": 0.8086719223771983, "grad_norm": 0.2407098114490509, "learning_rate": 5.3704137337436424e-06, "loss": 0.1363, "step": 45339 }, { "epoch": 0.808689758498912, "grad_norm": 0.23742075264453888, "learning_rate": 5.369449883872863e-06, "loss": 0.068, "step": 45340 }, { "epoch": 0.8087075946206257, "grad_norm": 0.3116680383682251, "learning_rate": 5.368486110096874e-06, "loss": 0.15, "step": 45341 }, { "epoch": 0.8087254307423394, "grad_norm": 0.22685708105564117, "learning_rate": 5.367522412419407e-06, "loss": 0.0923, "step": 45342 }, { "epoch": 0.8087432668640531, "grad_norm": 0.35855111479759216, "learning_rate": 5.366558790844203e-06, "loss": 0.1252, "step": 45343 }, { "epoch": 0.8087611029857668, "grad_norm": 0.2319633960723877, "learning_rate": 5.365595245375007e-06, "loss": 0.102, "step": 45344 }, { "epoch": 0.8087789391074804, "grad_norm": 0.24426911771297455, "learning_rate": 5.364631776015544e-06, "loss": 0.1571, "step": 45345 }, { "epoch": 0.8087967752291941, "grad_norm": 0.23831485211849213, "learning_rate": 5.363668382769551e-06, "loss": 0.0956, "step": 45346 }, { "epoch": 0.8088146113509078, "grad_norm": 0.23909638822078705, "learning_rate": 5.362705065640755e-06, "loss": 0.1011, "step": 45347 }, { "epoch": 0.8088324474726215, "grad_norm": 0.2797805070877075, "learning_rate": 5.361741824632901e-06, "loss": 0.151, "step": 45348 }, { "epoch": 0.8088502835943352, "grad_norm": 0.27477607131004333, "learning_rate": 5.360778659749721e-06, "loss": 0.0876, "step": 45349 }, { "epoch": 0.808868119716049, "grad_norm": 0.3827594518661499, "learning_rate": 5.359815570994945e-06, "loss": 0.1061, "step": 45350 }, { "epoch": 0.8088859558377627, "grad_norm": 0.3047603964805603, "learning_rate": 5.358852558372307e-06, "loss": 0.108, "step": 45351 }, { "epoch": 0.8089037919594764, "grad_norm": 0.270510733127594, "learning_rate": 5.357889621885534e-06, "loss": 0.0744, "step": 45352 }, { "epoch": 0.8089216280811901, "grad_norm": 0.3078206479549408, "learning_rate": 5.356926761538372e-06, "loss": 0.1057, "step": 45353 }, { "epoch": 0.8089394642029037, "grad_norm": 0.20678578317165375, "learning_rate": 5.355963977334547e-06, "loss": 0.1026, "step": 45354 }, { "epoch": 0.8089573003246174, "grad_norm": 0.31052365899086, "learning_rate": 5.355001269277784e-06, "loss": 0.1168, "step": 45355 }, { "epoch": 0.8089751364463311, "grad_norm": 0.27801328897476196, "learning_rate": 5.354038637371827e-06, "loss": 0.1204, "step": 45356 }, { "epoch": 0.8089929725680448, "grad_norm": 0.3298667073249817, "learning_rate": 5.353076081620395e-06, "loss": 0.1019, "step": 45357 }, { "epoch": 0.8090108086897585, "grad_norm": 0.28805720806121826, "learning_rate": 5.352113602027231e-06, "loss": 0.0737, "step": 45358 }, { "epoch": 0.8090286448114722, "grad_norm": 0.26523420214653015, "learning_rate": 5.351151198596063e-06, "loss": 0.1193, "step": 45359 }, { "epoch": 0.8090464809331859, "grad_norm": 0.27644097805023193, "learning_rate": 5.350188871330614e-06, "loss": 0.1108, "step": 45360 }, { "epoch": 0.8090643170548996, "grad_norm": 0.24303224682807922, "learning_rate": 5.349226620234624e-06, "loss": 0.0607, "step": 45361 }, { "epoch": 0.8090821531766133, "grad_norm": 0.407665878534317, "learning_rate": 5.348264445311818e-06, "loss": 0.1127, "step": 45362 }, { "epoch": 0.8090999892983269, "grad_norm": 0.2172544300556183, "learning_rate": 5.34730234656593e-06, "loss": 0.1218, "step": 45363 }, { "epoch": 0.8091178254200406, "grad_norm": 0.2502395808696747, "learning_rate": 5.346340324000681e-06, "loss": 0.1047, "step": 45364 }, { "epoch": 0.8091356615417543, "grad_norm": 0.2945944368839264, "learning_rate": 5.345378377619803e-06, "loss": 0.1256, "step": 45365 }, { "epoch": 0.809153497663468, "grad_norm": 0.3464438021183014, "learning_rate": 5.344416507427033e-06, "loss": 0.1232, "step": 45366 }, { "epoch": 0.8091713337851818, "grad_norm": 0.2606756091117859, "learning_rate": 5.343454713426091e-06, "loss": 0.1027, "step": 45367 }, { "epoch": 0.8091891699068955, "grad_norm": 0.3260132074356079, "learning_rate": 5.34249299562071e-06, "loss": 0.0947, "step": 45368 }, { "epoch": 0.8092070060286092, "grad_norm": 0.23424182832241058, "learning_rate": 5.341531354014606e-06, "loss": 0.112, "step": 45369 }, { "epoch": 0.8092248421503229, "grad_norm": 0.2808634042739868, "learning_rate": 5.340569788611518e-06, "loss": 0.1647, "step": 45370 }, { "epoch": 0.8092426782720366, "grad_norm": 0.3497547507286072, "learning_rate": 5.33960829941518e-06, "loss": 0.1384, "step": 45371 }, { "epoch": 0.8092605143937502, "grad_norm": 0.3503676652908325, "learning_rate": 5.338646886429308e-06, "loss": 0.1763, "step": 45372 }, { "epoch": 0.8092783505154639, "grad_norm": 0.39602136611938477, "learning_rate": 5.337685549657626e-06, "loss": 0.1178, "step": 45373 }, { "epoch": 0.8092961866371776, "grad_norm": 0.23481430113315582, "learning_rate": 5.336724289103873e-06, "loss": 0.1499, "step": 45374 }, { "epoch": 0.8093140227588913, "grad_norm": 0.265619158744812, "learning_rate": 5.335763104771768e-06, "loss": 0.0962, "step": 45375 }, { "epoch": 0.809331858880605, "grad_norm": 0.3164747655391693, "learning_rate": 5.334801996665037e-06, "loss": 0.1536, "step": 45376 }, { "epoch": 0.8093496950023187, "grad_norm": 0.3203544020652771, "learning_rate": 5.333840964787398e-06, "loss": 0.1213, "step": 45377 }, { "epoch": 0.8093675311240324, "grad_norm": 0.2461533397436142, "learning_rate": 5.332880009142594e-06, "loss": 0.0993, "step": 45378 }, { "epoch": 0.8093853672457461, "grad_norm": 0.2611466944217682, "learning_rate": 5.331919129734336e-06, "loss": 0.0835, "step": 45379 }, { "epoch": 0.8094032033674597, "grad_norm": 0.24052175879478455, "learning_rate": 5.330958326566354e-06, "loss": 0.1321, "step": 45380 }, { "epoch": 0.8094210394891734, "grad_norm": 0.35289251804351807, "learning_rate": 5.3299975996423695e-06, "loss": 0.0838, "step": 45381 }, { "epoch": 0.8094388756108871, "grad_norm": 0.343667209148407, "learning_rate": 5.329036948966104e-06, "loss": 0.1083, "step": 45382 }, { "epoch": 0.8094567117326008, "grad_norm": 0.264646977186203, "learning_rate": 5.328076374541291e-06, "loss": 0.1264, "step": 45383 }, { "epoch": 0.8094745478543146, "grad_norm": 0.29366007447242737, "learning_rate": 5.327115876371641e-06, "loss": 0.0981, "step": 45384 }, { "epoch": 0.8094923839760283, "grad_norm": 0.2466009557247162, "learning_rate": 5.3261554544608904e-06, "loss": 0.1015, "step": 45385 }, { "epoch": 0.809510220097742, "grad_norm": 0.4913146495819092, "learning_rate": 5.3251951088127526e-06, "loss": 0.1348, "step": 45386 }, { "epoch": 0.8095280562194557, "grad_norm": 0.27386918663978577, "learning_rate": 5.324234839430958e-06, "loss": 0.1125, "step": 45387 }, { "epoch": 0.8095458923411694, "grad_norm": 0.2521750330924988, "learning_rate": 5.3232746463192265e-06, "loss": 0.0946, "step": 45388 }, { "epoch": 0.809563728462883, "grad_norm": 0.21190997958183289, "learning_rate": 5.3223145294812785e-06, "loss": 0.0708, "step": 45389 }, { "epoch": 0.8095815645845967, "grad_norm": 0.24246685206890106, "learning_rate": 5.3213544889208295e-06, "loss": 0.118, "step": 45390 }, { "epoch": 0.8095994007063104, "grad_norm": 0.30345553159713745, "learning_rate": 5.320394524641614e-06, "loss": 0.0848, "step": 45391 }, { "epoch": 0.8096172368280241, "grad_norm": 0.27112045884132385, "learning_rate": 5.319434636647347e-06, "loss": 0.1116, "step": 45392 }, { "epoch": 0.8096350729497378, "grad_norm": 0.2395063042640686, "learning_rate": 5.318474824941747e-06, "loss": 0.0912, "step": 45393 }, { "epoch": 0.8096529090714515, "grad_norm": 0.4231487810611725, "learning_rate": 5.317515089528535e-06, "loss": 0.1551, "step": 45394 }, { "epoch": 0.8096707451931652, "grad_norm": 0.2898949980735779, "learning_rate": 5.31655543041143e-06, "loss": 0.1214, "step": 45395 }, { "epoch": 0.8096885813148789, "grad_norm": 0.29833000898361206, "learning_rate": 5.31559584759416e-06, "loss": 0.1317, "step": 45396 }, { "epoch": 0.8097064174365926, "grad_norm": 0.22038598358631134, "learning_rate": 5.314636341080431e-06, "loss": 0.1067, "step": 45397 }, { "epoch": 0.8097242535583062, "grad_norm": 0.38286957144737244, "learning_rate": 5.313676910873977e-06, "loss": 0.102, "step": 45398 }, { "epoch": 0.8097420896800199, "grad_norm": 0.37689468264579773, "learning_rate": 5.312717556978506e-06, "loss": 0.1172, "step": 45399 }, { "epoch": 0.8097599258017337, "grad_norm": 0.2364596724510193, "learning_rate": 5.311758279397747e-06, "loss": 0.0719, "step": 45400 }, { "epoch": 0.8097777619234474, "grad_norm": 0.297269731760025, "learning_rate": 5.310799078135415e-06, "loss": 0.0741, "step": 45401 }, { "epoch": 0.8097955980451611, "grad_norm": 0.19961655139923096, "learning_rate": 5.309839953195222e-06, "loss": 0.1392, "step": 45402 }, { "epoch": 0.8098134341668748, "grad_norm": 0.36211714148521423, "learning_rate": 5.308880904580887e-06, "loss": 0.137, "step": 45403 }, { "epoch": 0.8098312702885885, "grad_norm": 0.3193663954734802, "learning_rate": 5.307921932296136e-06, "loss": 0.1574, "step": 45404 }, { "epoch": 0.8098491064103022, "grad_norm": 0.2649797797203064, "learning_rate": 5.3069630363446835e-06, "loss": 0.1191, "step": 45405 }, { "epoch": 0.8098669425320159, "grad_norm": 0.21580186486244202, "learning_rate": 5.306004216730243e-06, "loss": 0.1256, "step": 45406 }, { "epoch": 0.8098847786537295, "grad_norm": 0.35072061419487, "learning_rate": 5.30504547345653e-06, "loss": 0.1801, "step": 45407 }, { "epoch": 0.8099026147754432, "grad_norm": 0.2494833618402481, "learning_rate": 5.304086806527259e-06, "loss": 0.1623, "step": 45408 }, { "epoch": 0.8099204508971569, "grad_norm": 0.3665267825126648, "learning_rate": 5.303128215946154e-06, "loss": 0.1888, "step": 45409 }, { "epoch": 0.8099382870188706, "grad_norm": 0.22827298939228058, "learning_rate": 5.30216970171693e-06, "loss": 0.1056, "step": 45410 }, { "epoch": 0.8099561231405843, "grad_norm": 0.28469589352607727, "learning_rate": 5.301211263843292e-06, "loss": 0.0809, "step": 45411 }, { "epoch": 0.809973959262298, "grad_norm": 0.24700549244880676, "learning_rate": 5.300252902328967e-06, "loss": 0.1034, "step": 45412 }, { "epoch": 0.8099917953840117, "grad_norm": 0.307685524225235, "learning_rate": 5.299294617177664e-06, "loss": 0.0926, "step": 45413 }, { "epoch": 0.8100096315057254, "grad_norm": 0.36144816875457764, "learning_rate": 5.298336408393101e-06, "loss": 0.1243, "step": 45414 }, { "epoch": 0.810027467627439, "grad_norm": 0.37847742438316345, "learning_rate": 5.297378275978995e-06, "loss": 0.1035, "step": 45415 }, { "epoch": 0.8100453037491527, "grad_norm": 0.19188730418682098, "learning_rate": 5.2964202199390465e-06, "loss": 0.0844, "step": 45416 }, { "epoch": 0.8100631398708665, "grad_norm": 0.30874645709991455, "learning_rate": 5.295462240276988e-06, "loss": 0.091, "step": 45417 }, { "epoch": 0.8100809759925802, "grad_norm": 0.3077942728996277, "learning_rate": 5.294504336996523e-06, "loss": 0.1251, "step": 45418 }, { "epoch": 0.8100988121142939, "grad_norm": 0.2177252471446991, "learning_rate": 5.293546510101363e-06, "loss": 0.0859, "step": 45419 }, { "epoch": 0.8101166482360076, "grad_norm": 0.266027569770813, "learning_rate": 5.292588759595224e-06, "loss": 0.1066, "step": 45420 }, { "epoch": 0.8101344843577213, "grad_norm": 0.3502812385559082, "learning_rate": 5.291631085481813e-06, "loss": 0.1464, "step": 45421 }, { "epoch": 0.810152320479435, "grad_norm": 0.2796567678451538, "learning_rate": 5.29067348776485e-06, "loss": 0.1476, "step": 45422 }, { "epoch": 0.8101701566011487, "grad_norm": 0.33431828022003174, "learning_rate": 5.2897159664480474e-06, "loss": 0.1369, "step": 45423 }, { "epoch": 0.8101879927228623, "grad_norm": 0.2717862129211426, "learning_rate": 5.288758521535106e-06, "loss": 0.1159, "step": 45424 }, { "epoch": 0.810205828844576, "grad_norm": 0.2470252364873886, "learning_rate": 5.28780115302975e-06, "loss": 0.0777, "step": 45425 }, { "epoch": 0.8102236649662897, "grad_norm": 0.4683379828929901, "learning_rate": 5.286843860935678e-06, "loss": 0.1295, "step": 45426 }, { "epoch": 0.8102415010880034, "grad_norm": 0.27637097239494324, "learning_rate": 5.285886645256616e-06, "loss": 0.139, "step": 45427 }, { "epoch": 0.8102593372097171, "grad_norm": 0.25384750962257385, "learning_rate": 5.284929505996266e-06, "loss": 0.0546, "step": 45428 }, { "epoch": 0.8102771733314308, "grad_norm": 0.2812712490558624, "learning_rate": 5.283972443158333e-06, "loss": 0.106, "step": 45429 }, { "epoch": 0.8102950094531445, "grad_norm": 0.2187226265668869, "learning_rate": 5.283015456746537e-06, "loss": 0.0937, "step": 45430 }, { "epoch": 0.8103128455748582, "grad_norm": 0.22594816982746124, "learning_rate": 5.2820585467645844e-06, "loss": 0.0994, "step": 45431 }, { "epoch": 0.8103306816965719, "grad_norm": 0.278167188167572, "learning_rate": 5.281101713216183e-06, "loss": 0.095, "step": 45432 }, { "epoch": 0.8103485178182855, "grad_norm": 0.23320238292217255, "learning_rate": 5.280144956105043e-06, "loss": 0.1177, "step": 45433 }, { "epoch": 0.8103663539399993, "grad_norm": 0.26970675587654114, "learning_rate": 5.279188275434865e-06, "loss": 0.0894, "step": 45434 }, { "epoch": 0.810384190061713, "grad_norm": 0.234837144613266, "learning_rate": 5.278231671209371e-06, "loss": 0.1126, "step": 45435 }, { "epoch": 0.8104020261834267, "grad_norm": 0.3310607373714447, "learning_rate": 5.277275143432262e-06, "loss": 0.1434, "step": 45436 }, { "epoch": 0.8104198623051404, "grad_norm": 0.24881210923194885, "learning_rate": 5.2763186921072465e-06, "loss": 0.0972, "step": 45437 }, { "epoch": 0.8104376984268541, "grad_norm": 0.378174751996994, "learning_rate": 5.275362317238028e-06, "loss": 0.1244, "step": 45438 }, { "epoch": 0.8104555345485678, "grad_norm": 0.3615255355834961, "learning_rate": 5.274406018828321e-06, "loss": 0.0749, "step": 45439 }, { "epoch": 0.8104733706702815, "grad_norm": 0.25198599696159363, "learning_rate": 5.273449796881824e-06, "loss": 0.1348, "step": 45440 }, { "epoch": 0.8104912067919952, "grad_norm": 0.28914254903793335, "learning_rate": 5.2724936514022575e-06, "loss": 0.2376, "step": 45441 }, { "epoch": 0.8105090429137088, "grad_norm": 0.21748897433280945, "learning_rate": 5.27153758239331e-06, "loss": 0.1057, "step": 45442 }, { "epoch": 0.8105268790354225, "grad_norm": 0.3717272877693176, "learning_rate": 5.270581589858703e-06, "loss": 0.1122, "step": 45443 }, { "epoch": 0.8105447151571362, "grad_norm": 0.200483039021492, "learning_rate": 5.269625673802139e-06, "loss": 0.0788, "step": 45444 }, { "epoch": 0.8105625512788499, "grad_norm": 0.2612393796443939, "learning_rate": 5.268669834227319e-06, "loss": 0.0993, "step": 45445 }, { "epoch": 0.8105803874005636, "grad_norm": 0.24470101296901703, "learning_rate": 5.26771407113795e-06, "loss": 0.12, "step": 45446 }, { "epoch": 0.8105982235222773, "grad_norm": 0.302722305059433, "learning_rate": 5.266758384537729e-06, "loss": 0.1116, "step": 45447 }, { "epoch": 0.810616059643991, "grad_norm": 0.24601230025291443, "learning_rate": 5.2658027744303755e-06, "loss": 0.1039, "step": 45448 }, { "epoch": 0.8106338957657047, "grad_norm": 0.3152858316898346, "learning_rate": 5.264847240819587e-06, "loss": 0.1576, "step": 45449 }, { "epoch": 0.8106517318874183, "grad_norm": 0.22193369269371033, "learning_rate": 5.263891783709066e-06, "loss": 0.0933, "step": 45450 }, { "epoch": 0.8106695680091321, "grad_norm": 0.2663132846355438, "learning_rate": 5.262936403102511e-06, "loss": 0.1239, "step": 45451 }, { "epoch": 0.8106874041308458, "grad_norm": 0.20785358548164368, "learning_rate": 5.261981099003638e-06, "loss": 0.0885, "step": 45452 }, { "epoch": 0.8107052402525595, "grad_norm": 0.26577529311180115, "learning_rate": 5.261025871416137e-06, "loss": 0.1332, "step": 45453 }, { "epoch": 0.8107230763742732, "grad_norm": 0.40488943457603455, "learning_rate": 5.260070720343724e-06, "loss": 0.1805, "step": 45454 }, { "epoch": 0.8107409124959869, "grad_norm": 0.2246762365102768, "learning_rate": 5.259115645790086e-06, "loss": 0.0791, "step": 45455 }, { "epoch": 0.8107587486177006, "grad_norm": 0.3010610044002533, "learning_rate": 5.2581606477589415e-06, "loss": 0.0714, "step": 45456 }, { "epoch": 0.8107765847394143, "grad_norm": 0.2886185050010681, "learning_rate": 5.257205726253989e-06, "loss": 0.0949, "step": 45457 }, { "epoch": 0.810794420861128, "grad_norm": 0.2818879783153534, "learning_rate": 5.25625088127892e-06, "loss": 0.1343, "step": 45458 }, { "epoch": 0.8108122569828417, "grad_norm": 0.3077107071876526, "learning_rate": 5.255296112837446e-06, "loss": 0.154, "step": 45459 }, { "epoch": 0.8108300931045553, "grad_norm": 0.3322410583496094, "learning_rate": 5.254341420933256e-06, "loss": 0.2221, "step": 45460 }, { "epoch": 0.810847929226269, "grad_norm": 0.3636000454425812, "learning_rate": 5.253386805570065e-06, "loss": 0.0897, "step": 45461 }, { "epoch": 0.8108657653479827, "grad_norm": 0.288411408662796, "learning_rate": 5.25243226675157e-06, "loss": 0.1188, "step": 45462 }, { "epoch": 0.8108836014696964, "grad_norm": 0.2190844714641571, "learning_rate": 5.251477804481464e-06, "loss": 0.097, "step": 45463 }, { "epoch": 0.8109014375914101, "grad_norm": 0.2701435983181, "learning_rate": 5.250523418763445e-06, "loss": 0.1339, "step": 45464 }, { "epoch": 0.8109192737131238, "grad_norm": 0.24628344178199768, "learning_rate": 5.249569109601227e-06, "loss": 0.1358, "step": 45465 }, { "epoch": 0.8109371098348375, "grad_norm": 0.22886212170124054, "learning_rate": 5.2486148769985004e-06, "loss": 0.1247, "step": 45466 }, { "epoch": 0.8109549459565512, "grad_norm": 0.3496599793434143, "learning_rate": 5.247660720958955e-06, "loss": 0.1343, "step": 45467 }, { "epoch": 0.810972782078265, "grad_norm": 0.27672773599624634, "learning_rate": 5.246706641486301e-06, "loss": 0.0802, "step": 45468 }, { "epoch": 0.8109906181999786, "grad_norm": 0.21181270480155945, "learning_rate": 5.245752638584242e-06, "loss": 0.064, "step": 45469 }, { "epoch": 0.8110084543216923, "grad_norm": 0.2689070701599121, "learning_rate": 5.244798712256469e-06, "loss": 0.1083, "step": 45470 }, { "epoch": 0.811026290443406, "grad_norm": 0.27168264985084534, "learning_rate": 5.243844862506677e-06, "loss": 0.111, "step": 45471 }, { "epoch": 0.8110441265651197, "grad_norm": 0.2746426463127136, "learning_rate": 5.242891089338567e-06, "loss": 0.1578, "step": 45472 }, { "epoch": 0.8110619626868334, "grad_norm": 0.29743823409080505, "learning_rate": 5.24193739275583e-06, "loss": 0.1282, "step": 45473 }, { "epoch": 0.8110797988085471, "grad_norm": 0.3167383372783661, "learning_rate": 5.240983772762173e-06, "loss": 0.1252, "step": 45474 }, { "epoch": 0.8110976349302608, "grad_norm": 0.2434619814157486, "learning_rate": 5.240030229361287e-06, "loss": 0.101, "step": 45475 }, { "epoch": 0.8111154710519745, "grad_norm": 0.41078394651412964, "learning_rate": 5.239076762556869e-06, "loss": 0.1384, "step": 45476 }, { "epoch": 0.8111333071736881, "grad_norm": 0.28844285011291504, "learning_rate": 5.2381233723526075e-06, "loss": 0.1179, "step": 45477 }, { "epoch": 0.8111511432954018, "grad_norm": 0.31216683983802795, "learning_rate": 5.2371700587522136e-06, "loss": 0.1591, "step": 45478 }, { "epoch": 0.8111689794171155, "grad_norm": 0.3527508080005646, "learning_rate": 5.236216821759373e-06, "loss": 0.1439, "step": 45479 }, { "epoch": 0.8111868155388292, "grad_norm": 0.24327363073825836, "learning_rate": 5.235263661377776e-06, "loss": 0.1285, "step": 45480 }, { "epoch": 0.8112046516605429, "grad_norm": 0.2778516113758087, "learning_rate": 5.2343105776111315e-06, "loss": 0.1402, "step": 45481 }, { "epoch": 0.8112224877822566, "grad_norm": 0.2080429047346115, "learning_rate": 5.233357570463118e-06, "loss": 0.1142, "step": 45482 }, { "epoch": 0.8112403239039703, "grad_norm": 0.34583908319473267, "learning_rate": 5.232404639937444e-06, "loss": 0.1003, "step": 45483 }, { "epoch": 0.811258160025684, "grad_norm": 0.30335095524787903, "learning_rate": 5.231451786037797e-06, "loss": 0.1015, "step": 45484 }, { "epoch": 0.8112759961473978, "grad_norm": 0.3012371361255646, "learning_rate": 5.2304990087678726e-06, "loss": 0.0951, "step": 45485 }, { "epoch": 0.8112938322691114, "grad_norm": 0.22829729318618774, "learning_rate": 5.229546308131353e-06, "loss": 0.0971, "step": 45486 }, { "epoch": 0.8113116683908251, "grad_norm": 0.2483467012643814, "learning_rate": 5.22859368413195e-06, "loss": 0.1393, "step": 45487 }, { "epoch": 0.8113295045125388, "grad_norm": 0.23309765756130219, "learning_rate": 5.227641136773345e-06, "loss": 0.1102, "step": 45488 }, { "epoch": 0.8113473406342525, "grad_norm": 0.409842848777771, "learning_rate": 5.226688666059232e-06, "loss": 0.1037, "step": 45489 }, { "epoch": 0.8113651767559662, "grad_norm": 0.295044481754303, "learning_rate": 5.225736271993295e-06, "loss": 0.0911, "step": 45490 }, { "epoch": 0.8113830128776799, "grad_norm": 0.30489203333854675, "learning_rate": 5.224783954579243e-06, "loss": 0.14, "step": 45491 }, { "epoch": 0.8114008489993936, "grad_norm": 0.22889581322669983, "learning_rate": 5.22383171382076e-06, "loss": 0.1037, "step": 45492 }, { "epoch": 0.8114186851211073, "grad_norm": 0.3222343921661377, "learning_rate": 5.222879549721532e-06, "loss": 0.0725, "step": 45493 }, { "epoch": 0.811436521242821, "grad_norm": 0.3243221342563629, "learning_rate": 5.22192746228525e-06, "loss": 0.158, "step": 45494 }, { "epoch": 0.8114543573645346, "grad_norm": 0.2591312825679779, "learning_rate": 5.220975451515614e-06, "loss": 0.1031, "step": 45495 }, { "epoch": 0.8114721934862483, "grad_norm": 0.42368006706237793, "learning_rate": 5.2200235174163e-06, "loss": 0.1648, "step": 45496 }, { "epoch": 0.811490029607962, "grad_norm": 0.20873235166072845, "learning_rate": 5.219071659991015e-06, "loss": 0.0738, "step": 45497 }, { "epoch": 0.8115078657296757, "grad_norm": 0.2601599097251892, "learning_rate": 5.218119879243441e-06, "loss": 0.086, "step": 45498 }, { "epoch": 0.8115257018513894, "grad_norm": 0.270443320274353, "learning_rate": 5.217168175177259e-06, "loss": 0.0952, "step": 45499 }, { "epoch": 0.8115435379731031, "grad_norm": 0.24155418574810028, "learning_rate": 5.216216547796174e-06, "loss": 0.1, "step": 45500 }, { "epoch": 0.8115613740948168, "grad_norm": 0.3387152850627899, "learning_rate": 5.215264997103866e-06, "loss": 0.1008, "step": 45501 }, { "epoch": 0.8115792102165306, "grad_norm": 0.24622072279453278, "learning_rate": 5.214313523104023e-06, "loss": 0.0912, "step": 45502 }, { "epoch": 0.8115970463382443, "grad_norm": 0.20400360226631165, "learning_rate": 5.213362125800328e-06, "loss": 0.1189, "step": 45503 }, { "epoch": 0.8116148824599579, "grad_norm": 0.35512739419937134, "learning_rate": 5.212410805196486e-06, "loss": 0.1417, "step": 45504 }, { "epoch": 0.8116327185816716, "grad_norm": 0.2782239317893982, "learning_rate": 5.2114595612961695e-06, "loss": 0.0987, "step": 45505 }, { "epoch": 0.8116505547033853, "grad_norm": 0.35960808396339417, "learning_rate": 5.210508394103072e-06, "loss": 0.0482, "step": 45506 }, { "epoch": 0.811668390825099, "grad_norm": 0.24351949989795685, "learning_rate": 5.209557303620874e-06, "loss": 0.1027, "step": 45507 }, { "epoch": 0.8116862269468127, "grad_norm": 0.20865245163440704, "learning_rate": 5.208606289853271e-06, "loss": 0.0702, "step": 45508 }, { "epoch": 0.8117040630685264, "grad_norm": 0.2898094356060028, "learning_rate": 5.207655352803942e-06, "loss": 0.1421, "step": 45509 }, { "epoch": 0.8117218991902401, "grad_norm": 0.2679760158061981, "learning_rate": 5.20670449247658e-06, "loss": 0.1076, "step": 45510 }, { "epoch": 0.8117397353119538, "grad_norm": 0.21027739346027374, "learning_rate": 5.205753708874872e-06, "loss": 0.111, "step": 45511 }, { "epoch": 0.8117575714336674, "grad_norm": 0.3852689266204834, "learning_rate": 5.20480300200249e-06, "loss": 0.1523, "step": 45512 }, { "epoch": 0.8117754075553811, "grad_norm": 0.3098362982273102, "learning_rate": 5.2038523718631354e-06, "loss": 0.0951, "step": 45513 }, { "epoch": 0.8117932436770948, "grad_norm": 0.26572349667549133, "learning_rate": 5.202901818460487e-06, "loss": 0.1287, "step": 45514 }, { "epoch": 0.8118110797988085, "grad_norm": 0.3147519528865814, "learning_rate": 5.201951341798228e-06, "loss": 0.1277, "step": 45515 }, { "epoch": 0.8118289159205222, "grad_norm": 0.18713118135929108, "learning_rate": 5.201000941880038e-06, "loss": 0.0824, "step": 45516 }, { "epoch": 0.8118467520422359, "grad_norm": 0.31820791959762573, "learning_rate": 5.200050618709615e-06, "loss": 0.1576, "step": 45517 }, { "epoch": 0.8118645881639497, "grad_norm": 0.2600247859954834, "learning_rate": 5.19910037229063e-06, "loss": 0.0805, "step": 45518 }, { "epoch": 0.8118824242856634, "grad_norm": 0.3205106854438782, "learning_rate": 5.198150202626776e-06, "loss": 0.1539, "step": 45519 }, { "epoch": 0.8119002604073771, "grad_norm": 0.28935134410858154, "learning_rate": 5.19720010972172e-06, "loss": 0.1462, "step": 45520 }, { "epoch": 0.8119180965290907, "grad_norm": 0.2988860607147217, "learning_rate": 5.196250093579166e-06, "loss": 0.1603, "step": 45521 }, { "epoch": 0.8119359326508044, "grad_norm": 0.2744669020175934, "learning_rate": 5.195300154202784e-06, "loss": 0.0897, "step": 45522 }, { "epoch": 0.8119537687725181, "grad_norm": 0.33278608322143555, "learning_rate": 5.1943502915962536e-06, "loss": 0.1208, "step": 45523 }, { "epoch": 0.8119716048942318, "grad_norm": 0.3321242332458496, "learning_rate": 5.193400505763269e-06, "loss": 0.1725, "step": 45524 }, { "epoch": 0.8119894410159455, "grad_norm": 0.2584283649921417, "learning_rate": 5.192450796707498e-06, "loss": 0.1263, "step": 45525 }, { "epoch": 0.8120072771376592, "grad_norm": 0.3969503343105316, "learning_rate": 5.191501164432635e-06, "loss": 0.1509, "step": 45526 }, { "epoch": 0.8120251132593729, "grad_norm": 0.3709224760532379, "learning_rate": 5.190551608942357e-06, "loss": 0.1851, "step": 45527 }, { "epoch": 0.8120429493810866, "grad_norm": 0.20300635695457458, "learning_rate": 5.189602130240342e-06, "loss": 0.1082, "step": 45528 }, { "epoch": 0.8120607855028003, "grad_norm": 0.2492382526397705, "learning_rate": 5.188652728330265e-06, "loss": 0.0996, "step": 45529 }, { "epoch": 0.8120786216245139, "grad_norm": 0.2658096253871918, "learning_rate": 5.187703403215818e-06, "loss": 0.0918, "step": 45530 }, { "epoch": 0.8120964577462276, "grad_norm": 0.28822076320648193, "learning_rate": 5.186754154900678e-06, "loss": 0.1254, "step": 45531 }, { "epoch": 0.8121142938679413, "grad_norm": 0.28774335980415344, "learning_rate": 5.18580498338852e-06, "loss": 0.1015, "step": 45532 }, { "epoch": 0.812132129989655, "grad_norm": 0.1896011084318161, "learning_rate": 5.184855888683019e-06, "loss": 0.0675, "step": 45533 }, { "epoch": 0.8121499661113687, "grad_norm": 0.27161189913749695, "learning_rate": 5.183906870787869e-06, "loss": 0.1315, "step": 45534 }, { "epoch": 0.8121678022330825, "grad_norm": 0.3012565076351166, "learning_rate": 5.182957929706738e-06, "loss": 0.1459, "step": 45535 }, { "epoch": 0.8121856383547962, "grad_norm": 0.2427787482738495, "learning_rate": 5.182009065443302e-06, "loss": 0.1037, "step": 45536 }, { "epoch": 0.8122034744765099, "grad_norm": 0.2582356035709381, "learning_rate": 5.181060278001249e-06, "loss": 0.0865, "step": 45537 }, { "epoch": 0.8122213105982236, "grad_norm": 0.23960572481155396, "learning_rate": 5.180111567384244e-06, "loss": 0.0821, "step": 45538 }, { "epoch": 0.8122391467199372, "grad_norm": 0.20808137953281403, "learning_rate": 5.17916293359598e-06, "loss": 0.0954, "step": 45539 }, { "epoch": 0.8122569828416509, "grad_norm": 0.21785911917686462, "learning_rate": 5.1782143766401285e-06, "loss": 0.0678, "step": 45540 }, { "epoch": 0.8122748189633646, "grad_norm": 0.2476358413696289, "learning_rate": 5.1772658965203615e-06, "loss": 0.1017, "step": 45541 }, { "epoch": 0.8122926550850783, "grad_norm": 0.27477723360061646, "learning_rate": 5.176317493240351e-06, "loss": 0.1249, "step": 45542 }, { "epoch": 0.812310491206792, "grad_norm": 0.2926079332828522, "learning_rate": 5.17536916680379e-06, "loss": 0.1193, "step": 45543 }, { "epoch": 0.8123283273285057, "grad_norm": 0.2808745205402374, "learning_rate": 5.174420917214345e-06, "loss": 0.1096, "step": 45544 }, { "epoch": 0.8123461634502194, "grad_norm": 0.32176074385643005, "learning_rate": 5.173472744475691e-06, "loss": 0.0952, "step": 45545 }, { "epoch": 0.8123639995719331, "grad_norm": 0.2728419303894043, "learning_rate": 5.172524648591498e-06, "loss": 0.1039, "step": 45546 }, { "epoch": 0.8123818356936467, "grad_norm": 0.2153676450252533, "learning_rate": 5.171576629565456e-06, "loss": 0.1234, "step": 45547 }, { "epoch": 0.8123996718153604, "grad_norm": 0.25571686029434204, "learning_rate": 5.170628687401227e-06, "loss": 0.1074, "step": 45548 }, { "epoch": 0.8124175079370741, "grad_norm": 0.5360467433929443, "learning_rate": 5.1696808221024934e-06, "loss": 0.1582, "step": 45549 }, { "epoch": 0.8124353440587878, "grad_norm": 0.24733681976795197, "learning_rate": 5.16873303367292e-06, "loss": 0.081, "step": 45550 }, { "epoch": 0.8124531801805015, "grad_norm": 0.24323980510234833, "learning_rate": 5.1677853221161865e-06, "loss": 0.1176, "step": 45551 }, { "epoch": 0.8124710163022153, "grad_norm": 0.22319646179676056, "learning_rate": 5.1668376874359715e-06, "loss": 0.1065, "step": 45552 }, { "epoch": 0.812488852423929, "grad_norm": 0.2035447210073471, "learning_rate": 5.165890129635947e-06, "loss": 0.0655, "step": 45553 }, { "epoch": 0.8125066885456427, "grad_norm": 0.22651202976703644, "learning_rate": 5.164942648719781e-06, "loss": 0.1002, "step": 45554 }, { "epoch": 0.8125245246673564, "grad_norm": 0.33447906374931335, "learning_rate": 5.163995244691142e-06, "loss": 0.0735, "step": 45555 }, { "epoch": 0.81254236078907, "grad_norm": 0.300897479057312, "learning_rate": 5.163047917553715e-06, "loss": 0.1373, "step": 45556 }, { "epoch": 0.8125601969107837, "grad_norm": 0.2702885568141937, "learning_rate": 5.162100667311165e-06, "loss": 0.1619, "step": 45557 }, { "epoch": 0.8125780330324974, "grad_norm": 0.5546003580093384, "learning_rate": 5.161153493967164e-06, "loss": 0.1464, "step": 45558 }, { "epoch": 0.8125958691542111, "grad_norm": 0.25141334533691406, "learning_rate": 5.16020639752538e-06, "loss": 0.121, "step": 45559 }, { "epoch": 0.8126137052759248, "grad_norm": 0.4828642010688782, "learning_rate": 5.159259377989492e-06, "loss": 0.1217, "step": 45560 }, { "epoch": 0.8126315413976385, "grad_norm": 0.3760753870010376, "learning_rate": 5.158312435363169e-06, "loss": 0.1516, "step": 45561 }, { "epoch": 0.8126493775193522, "grad_norm": 0.5467627048492432, "learning_rate": 5.15736556965008e-06, "loss": 0.1251, "step": 45562 }, { "epoch": 0.8126672136410659, "grad_norm": 0.32679885625839233, "learning_rate": 5.156418780853892e-06, "loss": 0.1064, "step": 45563 }, { "epoch": 0.8126850497627796, "grad_norm": 0.24771232903003693, "learning_rate": 5.1554720689782815e-06, "loss": 0.1102, "step": 45564 }, { "epoch": 0.8127028858844932, "grad_norm": 0.2394687682390213, "learning_rate": 5.154525434026908e-06, "loss": 0.1624, "step": 45565 }, { "epoch": 0.8127207220062069, "grad_norm": 0.261261522769928, "learning_rate": 5.153578876003457e-06, "loss": 0.1228, "step": 45566 }, { "epoch": 0.8127385581279206, "grad_norm": 0.25472745299339294, "learning_rate": 5.152632394911589e-06, "loss": 0.1455, "step": 45567 }, { "epoch": 0.8127563942496343, "grad_norm": 0.24779358506202698, "learning_rate": 5.151685990754965e-06, "loss": 0.1109, "step": 45568 }, { "epoch": 0.8127742303713481, "grad_norm": 0.21714620292186737, "learning_rate": 5.150739663537269e-06, "loss": 0.1037, "step": 45569 }, { "epoch": 0.8127920664930618, "grad_norm": 0.2893657684326172, "learning_rate": 5.149793413262163e-06, "loss": 0.1416, "step": 45570 }, { "epoch": 0.8128099026147755, "grad_norm": 0.22147493064403534, "learning_rate": 5.1488472399333105e-06, "loss": 0.0647, "step": 45571 }, { "epoch": 0.8128277387364892, "grad_norm": 0.27817443013191223, "learning_rate": 5.147901143554379e-06, "loss": 0.0669, "step": 45572 }, { "epoch": 0.8128455748582029, "grad_norm": 0.24891194701194763, "learning_rate": 5.146955124129044e-06, "loss": 0.1303, "step": 45573 }, { "epoch": 0.8128634109799165, "grad_norm": 0.28080177307128906, "learning_rate": 5.146009181660968e-06, "loss": 0.1156, "step": 45574 }, { "epoch": 0.8128812471016302, "grad_norm": 0.2582859396934509, "learning_rate": 5.14506331615382e-06, "loss": 0.135, "step": 45575 }, { "epoch": 0.8128990832233439, "grad_norm": 0.7043549418449402, "learning_rate": 5.1441175276112615e-06, "loss": 0.1255, "step": 45576 }, { "epoch": 0.8129169193450576, "grad_norm": 0.29657599329948425, "learning_rate": 5.1431718160369565e-06, "loss": 0.1221, "step": 45577 }, { "epoch": 0.8129347554667713, "grad_norm": 0.3936665654182434, "learning_rate": 5.142226181434576e-06, "loss": 0.1425, "step": 45578 }, { "epoch": 0.812952591588485, "grad_norm": 0.3372699022293091, "learning_rate": 5.141280623807792e-06, "loss": 0.1076, "step": 45579 }, { "epoch": 0.8129704277101987, "grad_norm": 0.27011269330978394, "learning_rate": 5.140335143160263e-06, "loss": 0.0886, "step": 45580 }, { "epoch": 0.8129882638319124, "grad_norm": 0.26672306656837463, "learning_rate": 5.139389739495645e-06, "loss": 0.1002, "step": 45581 }, { "epoch": 0.813006099953626, "grad_norm": 0.21756118535995483, "learning_rate": 5.138444412817623e-06, "loss": 0.1249, "step": 45582 }, { "epoch": 0.8130239360753397, "grad_norm": 0.33472296595573425, "learning_rate": 5.137499163129849e-06, "loss": 0.1548, "step": 45583 }, { "epoch": 0.8130417721970534, "grad_norm": 0.28582215309143066, "learning_rate": 5.1365539904359884e-06, "loss": 0.0851, "step": 45584 }, { "epoch": 0.8130596083187671, "grad_norm": 0.35146069526672363, "learning_rate": 5.135608894739696e-06, "loss": 0.1057, "step": 45585 }, { "epoch": 0.8130774444404809, "grad_norm": 0.25881344079971313, "learning_rate": 5.134663876044654e-06, "loss": 0.1455, "step": 45586 }, { "epoch": 0.8130952805621946, "grad_norm": 0.25682884454727173, "learning_rate": 5.1337189343545146e-06, "loss": 0.1213, "step": 45587 }, { "epoch": 0.8131131166839083, "grad_norm": 0.30452170968055725, "learning_rate": 5.132774069672944e-06, "loss": 0.1082, "step": 45588 }, { "epoch": 0.813130952805622, "grad_norm": 0.26689738035202026, "learning_rate": 5.131829282003603e-06, "loss": 0.0904, "step": 45589 }, { "epoch": 0.8131487889273357, "grad_norm": 0.32830995321273804, "learning_rate": 5.130884571350144e-06, "loss": 0.1182, "step": 45590 }, { "epoch": 0.8131666250490494, "grad_norm": 0.2156449407339096, "learning_rate": 5.1299399377162495e-06, "loss": 0.1106, "step": 45591 }, { "epoch": 0.813184461170763, "grad_norm": 0.3808838129043579, "learning_rate": 5.128995381105561e-06, "loss": 0.1609, "step": 45592 }, { "epoch": 0.8132022972924767, "grad_norm": 0.2852393090724945, "learning_rate": 5.1280509015217585e-06, "loss": 0.1308, "step": 45593 }, { "epoch": 0.8132201334141904, "grad_norm": 0.25662487745285034, "learning_rate": 5.1271064989684865e-06, "loss": 0.109, "step": 45594 }, { "epoch": 0.8132379695359041, "grad_norm": 0.2496776431798935, "learning_rate": 5.126162173449422e-06, "loss": 0.0821, "step": 45595 }, { "epoch": 0.8132558056576178, "grad_norm": 0.2867102324962616, "learning_rate": 5.1252179249682144e-06, "loss": 0.1423, "step": 45596 }, { "epoch": 0.8132736417793315, "grad_norm": 0.29804888367652893, "learning_rate": 5.12427375352853e-06, "loss": 0.1729, "step": 45597 }, { "epoch": 0.8132914779010452, "grad_norm": 0.3755727708339691, "learning_rate": 5.123329659134016e-06, "loss": 0.1786, "step": 45598 }, { "epoch": 0.8133093140227589, "grad_norm": 0.2618981897830963, "learning_rate": 5.122385641788349e-06, "loss": 0.1396, "step": 45599 }, { "epoch": 0.8133271501444725, "grad_norm": 0.19386625289916992, "learning_rate": 5.121441701495181e-06, "loss": 0.0789, "step": 45600 }, { "epoch": 0.8133449862661862, "grad_norm": 0.3044191002845764, "learning_rate": 5.12049783825817e-06, "loss": 0.077, "step": 45601 }, { "epoch": 0.8133628223878999, "grad_norm": 0.2679678201675415, "learning_rate": 5.1195540520809745e-06, "loss": 0.097, "step": 45602 }, { "epoch": 0.8133806585096137, "grad_norm": 0.24042688310146332, "learning_rate": 5.118610342967248e-06, "loss": 0.0959, "step": 45603 }, { "epoch": 0.8133984946313274, "grad_norm": 0.25474920868873596, "learning_rate": 5.117666710920663e-06, "loss": 0.1522, "step": 45604 }, { "epoch": 0.8134163307530411, "grad_norm": 0.3378788232803345, "learning_rate": 5.116723155944861e-06, "loss": 0.1436, "step": 45605 }, { "epoch": 0.8134341668747548, "grad_norm": 0.3056812882423401, "learning_rate": 5.115779678043514e-06, "loss": 0.1352, "step": 45606 }, { "epoch": 0.8134520029964685, "grad_norm": 0.2749176621437073, "learning_rate": 5.114836277220267e-06, "loss": 0.1294, "step": 45607 }, { "epoch": 0.8134698391181822, "grad_norm": 0.27322229743003845, "learning_rate": 5.113892953478788e-06, "loss": 0.0488, "step": 45608 }, { "epoch": 0.8134876752398958, "grad_norm": 0.2960701584815979, "learning_rate": 5.112949706822731e-06, "loss": 0.1122, "step": 45609 }, { "epoch": 0.8135055113616095, "grad_norm": 0.37147918343544006, "learning_rate": 5.112006537255748e-06, "loss": 0.1299, "step": 45610 }, { "epoch": 0.8135233474833232, "grad_norm": 0.2725314795970917, "learning_rate": 5.111063444781489e-06, "loss": 0.1314, "step": 45611 }, { "epoch": 0.8135411836050369, "grad_norm": 0.2502182424068451, "learning_rate": 5.1101204294036255e-06, "loss": 0.1237, "step": 45612 }, { "epoch": 0.8135590197267506, "grad_norm": 0.29174044728279114, "learning_rate": 5.109177491125805e-06, "loss": 0.111, "step": 45613 }, { "epoch": 0.8135768558484643, "grad_norm": 0.25557634234428406, "learning_rate": 5.108234629951683e-06, "loss": 0.1001, "step": 45614 }, { "epoch": 0.813594691970178, "grad_norm": 0.29499757289886475, "learning_rate": 5.1072918458849124e-06, "loss": 0.1387, "step": 45615 }, { "epoch": 0.8136125280918917, "grad_norm": 0.2681562304496765, "learning_rate": 5.1063491389291425e-06, "loss": 0.1106, "step": 45616 }, { "epoch": 0.8136303642136054, "grad_norm": 0.21198689937591553, "learning_rate": 5.105406509088042e-06, "loss": 0.095, "step": 45617 }, { "epoch": 0.813648200335319, "grad_norm": 0.2974684238433838, "learning_rate": 5.104463956365258e-06, "loss": 0.1522, "step": 45618 }, { "epoch": 0.8136660364570328, "grad_norm": 0.26667389273643494, "learning_rate": 5.1035214807644366e-06, "loss": 0.1414, "step": 45619 }, { "epoch": 0.8136838725787465, "grad_norm": 0.34514445066452026, "learning_rate": 5.102579082289244e-06, "loss": 0.1932, "step": 45620 }, { "epoch": 0.8137017087004602, "grad_norm": 0.2809849679470062, "learning_rate": 5.101636760943321e-06, "loss": 0.1031, "step": 45621 }, { "epoch": 0.8137195448221739, "grad_norm": 0.2639085650444031, "learning_rate": 5.1006945167303314e-06, "loss": 0.0896, "step": 45622 }, { "epoch": 0.8137373809438876, "grad_norm": 0.33750295639038086, "learning_rate": 5.099752349653924e-06, "loss": 0.1268, "step": 45623 }, { "epoch": 0.8137552170656013, "grad_norm": 0.28890225291252136, "learning_rate": 5.0988102597177455e-06, "loss": 0.0819, "step": 45624 }, { "epoch": 0.813773053187315, "grad_norm": 0.2128099948167801, "learning_rate": 5.097868246925455e-06, "loss": 0.1091, "step": 45625 }, { "epoch": 0.8137908893090287, "grad_norm": 0.32496771216392517, "learning_rate": 5.096926311280703e-06, "loss": 0.0741, "step": 45626 }, { "epoch": 0.8138087254307423, "grad_norm": 0.23552510142326355, "learning_rate": 5.095984452787139e-06, "loss": 0.0979, "step": 45627 }, { "epoch": 0.813826561552456, "grad_norm": 0.29348504543304443, "learning_rate": 5.095042671448413e-06, "loss": 0.1294, "step": 45628 }, { "epoch": 0.8138443976741697, "grad_norm": 0.2769838869571686, "learning_rate": 5.094100967268173e-06, "loss": 0.1222, "step": 45629 }, { "epoch": 0.8138622337958834, "grad_norm": 0.25167563557624817, "learning_rate": 5.0931593402500756e-06, "loss": 0.1268, "step": 45630 }, { "epoch": 0.8138800699175971, "grad_norm": 0.23509718477725983, "learning_rate": 5.092217790397771e-06, "loss": 0.1037, "step": 45631 }, { "epoch": 0.8138979060393108, "grad_norm": 0.24677452445030212, "learning_rate": 5.091276317714896e-06, "loss": 0.0859, "step": 45632 }, { "epoch": 0.8139157421610245, "grad_norm": 0.24758413434028625, "learning_rate": 5.09033492220512e-06, "loss": 0.103, "step": 45633 }, { "epoch": 0.8139335782827382, "grad_norm": 0.35802656412124634, "learning_rate": 5.089393603872075e-06, "loss": 0.113, "step": 45634 }, { "epoch": 0.8139514144044518, "grad_norm": 0.2309873104095459, "learning_rate": 5.088452362719426e-06, "loss": 0.1004, "step": 45635 }, { "epoch": 0.8139692505261656, "grad_norm": 0.21573497354984283, "learning_rate": 5.087511198750811e-06, "loss": 0.0821, "step": 45636 }, { "epoch": 0.8139870866478793, "grad_norm": 0.3483726382255554, "learning_rate": 5.086570111969871e-06, "loss": 0.2048, "step": 45637 }, { "epoch": 0.814004922769593, "grad_norm": 0.2416190803050995, "learning_rate": 5.085629102380274e-06, "loss": 0.094, "step": 45638 }, { "epoch": 0.8140227588913067, "grad_norm": 0.216502845287323, "learning_rate": 5.0846881699856545e-06, "loss": 0.0955, "step": 45639 }, { "epoch": 0.8140405950130204, "grad_norm": 0.17814892530441284, "learning_rate": 5.0837473147896605e-06, "loss": 0.0998, "step": 45640 }, { "epoch": 0.8140584311347341, "grad_norm": 0.2848738431930542, "learning_rate": 5.082806536795945e-06, "loss": 0.1058, "step": 45641 }, { "epoch": 0.8140762672564478, "grad_norm": 0.26333603262901306, "learning_rate": 5.08186583600814e-06, "loss": 0.0717, "step": 45642 }, { "epoch": 0.8140941033781615, "grad_norm": 0.1773180216550827, "learning_rate": 5.080925212429913e-06, "loss": 0.1241, "step": 45643 }, { "epoch": 0.8141119394998751, "grad_norm": 0.23399409651756287, "learning_rate": 5.079984666064897e-06, "loss": 0.129, "step": 45644 }, { "epoch": 0.8141297756215888, "grad_norm": 0.32898133993148804, "learning_rate": 5.079044196916741e-06, "loss": 0.1564, "step": 45645 }, { "epoch": 0.8141476117433025, "grad_norm": 0.25006625056266785, "learning_rate": 5.078103804989082e-06, "loss": 0.1198, "step": 45646 }, { "epoch": 0.8141654478650162, "grad_norm": 0.2780681550502777, "learning_rate": 5.077163490285583e-06, "loss": 0.1327, "step": 45647 }, { "epoch": 0.8141832839867299, "grad_norm": 0.2767432928085327, "learning_rate": 5.076223252809873e-06, "loss": 0.1238, "step": 45648 }, { "epoch": 0.8142011201084436, "grad_norm": 0.2799425721168518, "learning_rate": 5.075283092565605e-06, "loss": 0.1291, "step": 45649 }, { "epoch": 0.8142189562301573, "grad_norm": 0.2554751932621002, "learning_rate": 5.0743430095564205e-06, "loss": 0.132, "step": 45650 }, { "epoch": 0.814236792351871, "grad_norm": 0.28389936685562134, "learning_rate": 5.073403003785967e-06, "loss": 0.1562, "step": 45651 }, { "epoch": 0.8142546284735847, "grad_norm": 0.427094042301178, "learning_rate": 5.072463075257889e-06, "loss": 0.0977, "step": 45652 }, { "epoch": 0.8142724645952985, "grad_norm": 0.3419346213340759, "learning_rate": 5.071523223975824e-06, "loss": 0.118, "step": 45653 }, { "epoch": 0.8142903007170121, "grad_norm": 0.24643169343471527, "learning_rate": 5.0705834499434225e-06, "loss": 0.0629, "step": 45654 }, { "epoch": 0.8143081368387258, "grad_norm": 0.2812938392162323, "learning_rate": 5.069643753164313e-06, "loss": 0.1073, "step": 45655 }, { "epoch": 0.8143259729604395, "grad_norm": 0.2212173491716385, "learning_rate": 5.068704133642155e-06, "loss": 0.118, "step": 45656 }, { "epoch": 0.8143438090821532, "grad_norm": 0.2015131711959839, "learning_rate": 5.067764591380583e-06, "loss": 0.0895, "step": 45657 }, { "epoch": 0.8143616452038669, "grad_norm": 0.23904988169670105, "learning_rate": 5.066825126383243e-06, "loss": 0.1093, "step": 45658 }, { "epoch": 0.8143794813255806, "grad_norm": 0.41175538301467896, "learning_rate": 5.0658857386537635e-06, "loss": 0.0687, "step": 45659 }, { "epoch": 0.8143973174472943, "grad_norm": 0.39152294397354126, "learning_rate": 5.064946428195805e-06, "loss": 0.1213, "step": 45660 }, { "epoch": 0.814415153569008, "grad_norm": 0.26551494002342224, "learning_rate": 5.06400719501299e-06, "loss": 0.0754, "step": 45661 }, { "epoch": 0.8144329896907216, "grad_norm": 0.322030633687973, "learning_rate": 5.0630680391089755e-06, "loss": 0.0933, "step": 45662 }, { "epoch": 0.8144508258124353, "grad_norm": 0.23832233250141144, "learning_rate": 5.0621289604873915e-06, "loss": 0.0859, "step": 45663 }, { "epoch": 0.814468661934149, "grad_norm": 0.2087278962135315, "learning_rate": 5.061189959151888e-06, "loss": 0.1146, "step": 45664 }, { "epoch": 0.8144864980558627, "grad_norm": 0.2993241548538208, "learning_rate": 5.060251035106098e-06, "loss": 0.1284, "step": 45665 }, { "epoch": 0.8145043341775764, "grad_norm": 0.2602251172065735, "learning_rate": 5.059312188353662e-06, "loss": 0.1124, "step": 45666 }, { "epoch": 0.8145221702992901, "grad_norm": 0.20799730718135834, "learning_rate": 5.058373418898219e-06, "loss": 0.1142, "step": 45667 }, { "epoch": 0.8145400064210038, "grad_norm": 0.3496764004230499, "learning_rate": 5.057434726743401e-06, "loss": 0.1039, "step": 45668 }, { "epoch": 0.8145578425427175, "grad_norm": 0.25862595438957214, "learning_rate": 5.056496111892864e-06, "loss": 0.0815, "step": 45669 }, { "epoch": 0.8145756786644313, "grad_norm": 0.28510621190071106, "learning_rate": 5.055557574350234e-06, "loss": 0.1165, "step": 45670 }, { "epoch": 0.814593514786145, "grad_norm": 0.25111693143844604, "learning_rate": 5.05461911411915e-06, "loss": 0.1061, "step": 45671 }, { "epoch": 0.8146113509078586, "grad_norm": 0.2907634377479553, "learning_rate": 5.053680731203245e-06, "loss": 0.0933, "step": 45672 }, { "epoch": 0.8146291870295723, "grad_norm": 0.32620877027511597, "learning_rate": 5.0527424256061715e-06, "loss": 0.1024, "step": 45673 }, { "epoch": 0.814647023151286, "grad_norm": 0.29583412408828735, "learning_rate": 5.051804197331555e-06, "loss": 0.127, "step": 45674 }, { "epoch": 0.8146648592729997, "grad_norm": 0.23249945044517517, "learning_rate": 5.050866046383032e-06, "loss": 0.0914, "step": 45675 }, { "epoch": 0.8146826953947134, "grad_norm": 0.24768604338169098, "learning_rate": 5.049927972764246e-06, "loss": 0.0955, "step": 45676 }, { "epoch": 0.8147005315164271, "grad_norm": 0.4107540249824524, "learning_rate": 5.048989976478824e-06, "loss": 0.1162, "step": 45677 }, { "epoch": 0.8147183676381408, "grad_norm": 0.2767286002635956, "learning_rate": 5.048052057530417e-06, "loss": 0.0876, "step": 45678 }, { "epoch": 0.8147362037598544, "grad_norm": 0.3293555974960327, "learning_rate": 5.047114215922649e-06, "loss": 0.1332, "step": 45679 }, { "epoch": 0.8147540398815681, "grad_norm": 0.29267212748527527, "learning_rate": 5.046176451659157e-06, "loss": 0.1168, "step": 45680 }, { "epoch": 0.8147718760032818, "grad_norm": 0.38418206572532654, "learning_rate": 5.04523876474357e-06, "loss": 0.1405, "step": 45681 }, { "epoch": 0.8147897121249955, "grad_norm": 0.3118765950202942, "learning_rate": 5.04430115517954e-06, "loss": 0.1173, "step": 45682 }, { "epoch": 0.8148075482467092, "grad_norm": 0.3011217415332794, "learning_rate": 5.043363622970687e-06, "loss": 0.1225, "step": 45683 }, { "epoch": 0.8148253843684229, "grad_norm": 0.2403777688741684, "learning_rate": 5.042426168120653e-06, "loss": 0.1244, "step": 45684 }, { "epoch": 0.8148432204901366, "grad_norm": 0.2491941601037979, "learning_rate": 5.041488790633059e-06, "loss": 0.0733, "step": 45685 }, { "epoch": 0.8148610566118503, "grad_norm": 0.27331098914146423, "learning_rate": 5.040551490511555e-06, "loss": 0.1204, "step": 45686 }, { "epoch": 0.8148788927335641, "grad_norm": 0.27458685636520386, "learning_rate": 5.0396142677597676e-06, "loss": 0.1111, "step": 45687 }, { "epoch": 0.8148967288552778, "grad_norm": 0.2450343519449234, "learning_rate": 5.0386771223813244e-06, "loss": 0.1056, "step": 45688 }, { "epoch": 0.8149145649769914, "grad_norm": 0.2235519289970398, "learning_rate": 5.037740054379866e-06, "loss": 0.0772, "step": 45689 }, { "epoch": 0.8149324010987051, "grad_norm": 0.24457727372646332, "learning_rate": 5.036803063759018e-06, "loss": 0.0935, "step": 45690 }, { "epoch": 0.8149502372204188, "grad_norm": 0.19804850220680237, "learning_rate": 5.0358661505224226e-06, "loss": 0.12, "step": 45691 }, { "epoch": 0.8149680733421325, "grad_norm": 0.3335053622722626, "learning_rate": 5.0349293146737055e-06, "loss": 0.1271, "step": 45692 }, { "epoch": 0.8149859094638462, "grad_norm": 0.19297271966934204, "learning_rate": 5.033992556216499e-06, "loss": 0.0987, "step": 45693 }, { "epoch": 0.8150037455855599, "grad_norm": 0.26616060733795166, "learning_rate": 5.0330558751544245e-06, "loss": 0.1063, "step": 45694 }, { "epoch": 0.8150215817072736, "grad_norm": 0.29165148735046387, "learning_rate": 5.032119271491129e-06, "loss": 0.1173, "step": 45695 }, { "epoch": 0.8150394178289873, "grad_norm": 0.22301040589809418, "learning_rate": 5.031182745230237e-06, "loss": 0.1062, "step": 45696 }, { "epoch": 0.8150572539507009, "grad_norm": 0.2579430639743805, "learning_rate": 5.030246296375377e-06, "loss": 0.0845, "step": 45697 }, { "epoch": 0.8150750900724146, "grad_norm": 0.3751802444458008, "learning_rate": 5.029309924930173e-06, "loss": 0.1316, "step": 45698 }, { "epoch": 0.8150929261941283, "grad_norm": 0.2684709429740906, "learning_rate": 5.028373630898267e-06, "loss": 0.1166, "step": 45699 }, { "epoch": 0.815110762315842, "grad_norm": 0.22469931840896606, "learning_rate": 5.027437414283284e-06, "loss": 0.0979, "step": 45700 }, { "epoch": 0.8151285984375557, "grad_norm": 0.26601627469062805, "learning_rate": 5.026501275088852e-06, "loss": 0.1554, "step": 45701 }, { "epoch": 0.8151464345592694, "grad_norm": 0.3379831314086914, "learning_rate": 5.0255652133185925e-06, "loss": 0.1192, "step": 45702 }, { "epoch": 0.8151642706809831, "grad_norm": 0.24312078952789307, "learning_rate": 5.0246292289761494e-06, "loss": 0.088, "step": 45703 }, { "epoch": 0.8151821068026969, "grad_norm": 0.22759027779102325, "learning_rate": 5.023693322065134e-06, "loss": 0.1286, "step": 45704 }, { "epoch": 0.8151999429244106, "grad_norm": 0.2826850116252899, "learning_rate": 5.022757492589192e-06, "loss": 0.0865, "step": 45705 }, { "epoch": 0.8152177790461242, "grad_norm": 0.3726387917995453, "learning_rate": 5.021821740551938e-06, "loss": 0.1297, "step": 45706 }, { "epoch": 0.8152356151678379, "grad_norm": 0.3333369493484497, "learning_rate": 5.020886065957001e-06, "loss": 0.1793, "step": 45707 }, { "epoch": 0.8152534512895516, "grad_norm": 0.3990958333015442, "learning_rate": 5.019950468808013e-06, "loss": 0.1521, "step": 45708 }, { "epoch": 0.8152712874112653, "grad_norm": 0.2901363968849182, "learning_rate": 5.019014949108599e-06, "loss": 0.1265, "step": 45709 }, { "epoch": 0.815289123532979, "grad_norm": 0.4082321226596832, "learning_rate": 5.018079506862386e-06, "loss": 0.1229, "step": 45710 }, { "epoch": 0.8153069596546927, "grad_norm": 0.23928017914295197, "learning_rate": 5.017144142072988e-06, "loss": 0.1095, "step": 45711 }, { "epoch": 0.8153247957764064, "grad_norm": 0.29541197419166565, "learning_rate": 5.01620885474405e-06, "loss": 0.1406, "step": 45712 }, { "epoch": 0.8153426318981201, "grad_norm": 0.22327785193920135, "learning_rate": 5.0152736448791895e-06, "loss": 0.0964, "step": 45713 }, { "epoch": 0.8153604680198338, "grad_norm": 0.3019932806491852, "learning_rate": 5.014338512482031e-06, "loss": 0.1351, "step": 45714 }, { "epoch": 0.8153783041415474, "grad_norm": 0.23647254705429077, "learning_rate": 5.013403457556193e-06, "loss": 0.0657, "step": 45715 }, { "epoch": 0.8153961402632611, "grad_norm": 0.29700228571891785, "learning_rate": 5.0124684801053115e-06, "loss": 0.0881, "step": 45716 }, { "epoch": 0.8154139763849748, "grad_norm": 0.2415122091770172, "learning_rate": 5.011533580132999e-06, "loss": 0.0727, "step": 45717 }, { "epoch": 0.8154318125066885, "grad_norm": 0.22231937944889069, "learning_rate": 5.0105987576428925e-06, "loss": 0.0797, "step": 45718 }, { "epoch": 0.8154496486284022, "grad_norm": 0.3903101682662964, "learning_rate": 5.0096640126386095e-06, "loss": 0.1282, "step": 45719 }, { "epoch": 0.815467484750116, "grad_norm": 0.43967410922050476, "learning_rate": 5.008729345123767e-06, "loss": 0.1407, "step": 45720 }, { "epoch": 0.8154853208718297, "grad_norm": 0.22630712389945984, "learning_rate": 5.0077947551020025e-06, "loss": 0.0905, "step": 45721 }, { "epoch": 0.8155031569935434, "grad_norm": 0.25379830598831177, "learning_rate": 5.0068602425769275e-06, "loss": 0.1629, "step": 45722 }, { "epoch": 0.815520993115257, "grad_norm": 0.3421266973018646, "learning_rate": 5.00592580755217e-06, "loss": 0.1377, "step": 45723 }, { "epoch": 0.8155388292369707, "grad_norm": 0.260030597448349, "learning_rate": 5.004991450031341e-06, "loss": 0.0868, "step": 45724 }, { "epoch": 0.8155566653586844, "grad_norm": 0.3151165246963501, "learning_rate": 5.004057170018081e-06, "loss": 0.1177, "step": 45725 }, { "epoch": 0.8155745014803981, "grad_norm": 0.2416413575410843, "learning_rate": 5.003122967515999e-06, "loss": 0.1296, "step": 45726 }, { "epoch": 0.8155923376021118, "grad_norm": 0.28530260920524597, "learning_rate": 5.0021888425287215e-06, "loss": 0.0892, "step": 45727 }, { "epoch": 0.8156101737238255, "grad_norm": 0.23772463202476501, "learning_rate": 5.001254795059857e-06, "loss": 0.1304, "step": 45728 }, { "epoch": 0.8156280098455392, "grad_norm": 0.22185170650482178, "learning_rate": 5.000320825113045e-06, "loss": 0.1085, "step": 45729 }, { "epoch": 0.8156458459672529, "grad_norm": 0.26586902141571045, "learning_rate": 4.999386932691896e-06, "loss": 0.1079, "step": 45730 }, { "epoch": 0.8156636820889666, "grad_norm": 0.21446923911571503, "learning_rate": 4.998453117800026e-06, "loss": 0.0996, "step": 45731 }, { "epoch": 0.8156815182106802, "grad_norm": 0.30676373839378357, "learning_rate": 4.997519380441068e-06, "loss": 0.1463, "step": 45732 }, { "epoch": 0.8156993543323939, "grad_norm": 0.2974746823310852, "learning_rate": 4.996585720618624e-06, "loss": 0.1027, "step": 45733 }, { "epoch": 0.8157171904541076, "grad_norm": 0.30245092511177063, "learning_rate": 4.995652138336329e-06, "loss": 0.1176, "step": 45734 }, { "epoch": 0.8157350265758213, "grad_norm": 0.22816167771816254, "learning_rate": 4.994718633597798e-06, "loss": 0.0915, "step": 45735 }, { "epoch": 0.815752862697535, "grad_norm": 0.33669912815093994, "learning_rate": 4.993785206406648e-06, "loss": 0.1251, "step": 45736 }, { "epoch": 0.8157706988192488, "grad_norm": 0.3293575346469879, "learning_rate": 4.992851856766487e-06, "loss": 0.1637, "step": 45737 }, { "epoch": 0.8157885349409625, "grad_norm": 0.2731926143169403, "learning_rate": 4.991918584680949e-06, "loss": 0.1454, "step": 45738 }, { "epoch": 0.8158063710626762, "grad_norm": 0.2395857721567154, "learning_rate": 4.990985390153647e-06, "loss": 0.1157, "step": 45739 }, { "epoch": 0.8158242071843899, "grad_norm": 0.24014852941036224, "learning_rate": 4.9900522731881945e-06, "loss": 0.0914, "step": 45740 }, { "epoch": 0.8158420433061035, "grad_norm": 0.35486292839050293, "learning_rate": 4.989119233788206e-06, "loss": 0.181, "step": 45741 }, { "epoch": 0.8158598794278172, "grad_norm": 0.24551841616630554, "learning_rate": 4.98818627195731e-06, "loss": 0.1261, "step": 45742 }, { "epoch": 0.8158777155495309, "grad_norm": 0.23259590566158295, "learning_rate": 4.987253387699115e-06, "loss": 0.0665, "step": 45743 }, { "epoch": 0.8158955516712446, "grad_norm": 0.341943621635437, "learning_rate": 4.986320581017231e-06, "loss": 0.1485, "step": 45744 }, { "epoch": 0.8159133877929583, "grad_norm": 0.36857911944389343, "learning_rate": 4.985387851915288e-06, "loss": 0.122, "step": 45745 }, { "epoch": 0.815931223914672, "grad_norm": 0.265610933303833, "learning_rate": 4.984455200396887e-06, "loss": 0.1259, "step": 45746 }, { "epoch": 0.8159490600363857, "grad_norm": 0.2793954908847809, "learning_rate": 4.983522626465658e-06, "loss": 0.1138, "step": 45747 }, { "epoch": 0.8159668961580994, "grad_norm": 0.24487145245075226, "learning_rate": 4.982590130125208e-06, "loss": 0.1315, "step": 45748 }, { "epoch": 0.815984732279813, "grad_norm": 0.27068623900413513, "learning_rate": 4.981657711379154e-06, "loss": 0.0811, "step": 45749 }, { "epoch": 0.8160025684015267, "grad_norm": 0.24784843623638153, "learning_rate": 4.980725370231101e-06, "loss": 0.0863, "step": 45750 }, { "epoch": 0.8160204045232404, "grad_norm": 0.3247925937175751, "learning_rate": 4.979793106684677e-06, "loss": 0.0895, "step": 45751 }, { "epoch": 0.8160382406449541, "grad_norm": 0.2142827957868576, "learning_rate": 4.978860920743492e-06, "loss": 0.0794, "step": 45752 }, { "epoch": 0.8160560767666678, "grad_norm": 0.31806308031082153, "learning_rate": 4.977928812411156e-06, "loss": 0.1146, "step": 45753 }, { "epoch": 0.8160739128883816, "grad_norm": 0.25226280093193054, "learning_rate": 4.976996781691276e-06, "loss": 0.1248, "step": 45754 }, { "epoch": 0.8160917490100953, "grad_norm": 0.22675657272338867, "learning_rate": 4.976064828587479e-06, "loss": 0.1015, "step": 45755 }, { "epoch": 0.816109585131809, "grad_norm": 0.2481113225221634, "learning_rate": 4.975132953103371e-06, "loss": 0.0821, "step": 45756 }, { "epoch": 0.8161274212535227, "grad_norm": 0.1804397851228714, "learning_rate": 4.974201155242564e-06, "loss": 0.0781, "step": 45757 }, { "epoch": 0.8161452573752364, "grad_norm": 0.26124024391174316, "learning_rate": 4.973269435008662e-06, "loss": 0.1038, "step": 45758 }, { "epoch": 0.81616309349695, "grad_norm": 0.2906644344329834, "learning_rate": 4.972337792405288e-06, "loss": 0.1582, "step": 45759 }, { "epoch": 0.8161809296186637, "grad_norm": 0.25993266701698303, "learning_rate": 4.971406227436054e-06, "loss": 0.2159, "step": 45760 }, { "epoch": 0.8161987657403774, "grad_norm": 0.29438257217407227, "learning_rate": 4.970474740104569e-06, "loss": 0.1117, "step": 45761 }, { "epoch": 0.8162166018620911, "grad_norm": 0.27353140711784363, "learning_rate": 4.969543330414439e-06, "loss": 0.1076, "step": 45762 }, { "epoch": 0.8162344379838048, "grad_norm": 0.25626784563064575, "learning_rate": 4.968611998369274e-06, "loss": 0.087, "step": 45763 }, { "epoch": 0.8162522741055185, "grad_norm": 0.24238912761211395, "learning_rate": 4.967680743972691e-06, "loss": 0.0801, "step": 45764 }, { "epoch": 0.8162701102272322, "grad_norm": 0.37771111726760864, "learning_rate": 4.966749567228298e-06, "loss": 0.1855, "step": 45765 }, { "epoch": 0.8162879463489459, "grad_norm": 0.2881709039211273, "learning_rate": 4.965818468139705e-06, "loss": 0.0789, "step": 45766 }, { "epoch": 0.8163057824706595, "grad_norm": 0.28422465920448303, "learning_rate": 4.964887446710509e-06, "loss": 0.1139, "step": 45767 }, { "epoch": 0.8163236185923732, "grad_norm": 0.3086110055446625, "learning_rate": 4.9639565029443395e-06, "loss": 0.1484, "step": 45768 }, { "epoch": 0.8163414547140869, "grad_norm": 0.2513750493526459, "learning_rate": 4.96302563684479e-06, "loss": 0.1055, "step": 45769 }, { "epoch": 0.8163592908358006, "grad_norm": 0.2533184885978699, "learning_rate": 4.9620948484154775e-06, "loss": 0.1157, "step": 45770 }, { "epoch": 0.8163771269575144, "grad_norm": 0.232883483171463, "learning_rate": 4.961164137659999e-06, "loss": 0.106, "step": 45771 }, { "epoch": 0.8163949630792281, "grad_norm": 0.38878074288368225, "learning_rate": 4.960233504581973e-06, "loss": 0.1654, "step": 45772 }, { "epoch": 0.8164127992009418, "grad_norm": 0.2574453055858612, "learning_rate": 4.959302949185002e-06, "loss": 0.1214, "step": 45773 }, { "epoch": 0.8164306353226555, "grad_norm": 0.20646406710147858, "learning_rate": 4.958372471472697e-06, "loss": 0.091, "step": 45774 }, { "epoch": 0.8164484714443692, "grad_norm": 0.2746943235397339, "learning_rate": 4.957442071448665e-06, "loss": 0.1221, "step": 45775 }, { "epoch": 0.8164663075660828, "grad_norm": 0.21967127919197083, "learning_rate": 4.956511749116502e-06, "loss": 0.0915, "step": 45776 }, { "epoch": 0.8164841436877965, "grad_norm": 0.26342082023620605, "learning_rate": 4.955581504479831e-06, "loss": 0.1027, "step": 45777 }, { "epoch": 0.8165019798095102, "grad_norm": 0.25215309858322144, "learning_rate": 4.954651337542246e-06, "loss": 0.0958, "step": 45778 }, { "epoch": 0.8165198159312239, "grad_norm": 0.2854245901107788, "learning_rate": 4.953721248307358e-06, "loss": 0.1234, "step": 45779 }, { "epoch": 0.8165376520529376, "grad_norm": 0.3793368339538574, "learning_rate": 4.9527912367787615e-06, "loss": 0.1675, "step": 45780 }, { "epoch": 0.8165554881746513, "grad_norm": 0.2954586148262024, "learning_rate": 4.951861302960078e-06, "loss": 0.1387, "step": 45781 }, { "epoch": 0.816573324296365, "grad_norm": 0.2922453284263611, "learning_rate": 4.950931446854906e-06, "loss": 0.1192, "step": 45782 }, { "epoch": 0.8165911604180787, "grad_norm": 0.2825854420661926, "learning_rate": 4.950001668466847e-06, "loss": 0.142, "step": 45783 }, { "epoch": 0.8166089965397924, "grad_norm": 0.21359816193580627, "learning_rate": 4.949071967799501e-06, "loss": 0.0717, "step": 45784 }, { "epoch": 0.816626832661506, "grad_norm": 0.23116125166416168, "learning_rate": 4.948142344856482e-06, "loss": 0.06, "step": 45785 }, { "epoch": 0.8166446687832197, "grad_norm": 0.30504778027534485, "learning_rate": 4.9472127996413846e-06, "loss": 0.1091, "step": 45786 }, { "epoch": 0.8166625049049334, "grad_norm": 0.31183409690856934, "learning_rate": 4.946283332157822e-06, "loss": 0.128, "step": 45787 }, { "epoch": 0.8166803410266472, "grad_norm": 0.26938846707344055, "learning_rate": 4.945353942409392e-06, "loss": 0.1011, "step": 45788 }, { "epoch": 0.8166981771483609, "grad_norm": 0.36265090107917786, "learning_rate": 4.944424630399688e-06, "loss": 0.1167, "step": 45789 }, { "epoch": 0.8167160132700746, "grad_norm": 0.31251442432403564, "learning_rate": 4.94349539613233e-06, "loss": 0.0947, "step": 45790 }, { "epoch": 0.8167338493917883, "grad_norm": 0.2830224335193634, "learning_rate": 4.94256623961091e-06, "loss": 0.1542, "step": 45791 }, { "epoch": 0.816751685513502, "grad_norm": 0.21515852212905884, "learning_rate": 4.941637160839033e-06, "loss": 0.077, "step": 45792 }, { "epoch": 0.8167695216352157, "grad_norm": 0.23835642635822296, "learning_rate": 4.94070815982029e-06, "loss": 0.1479, "step": 45793 }, { "epoch": 0.8167873577569293, "grad_norm": 0.38214388489723206, "learning_rate": 4.939779236558298e-06, "loss": 0.1306, "step": 45794 }, { "epoch": 0.816805193878643, "grad_norm": 0.21305690705776215, "learning_rate": 4.938850391056648e-06, "loss": 0.0799, "step": 45795 }, { "epoch": 0.8168230300003567, "grad_norm": 0.4495021104812622, "learning_rate": 4.937921623318944e-06, "loss": 0.1168, "step": 45796 }, { "epoch": 0.8168408661220704, "grad_norm": 0.31790244579315186, "learning_rate": 4.93699293334878e-06, "loss": 0.1797, "step": 45797 }, { "epoch": 0.8168587022437841, "grad_norm": 0.3388820290565491, "learning_rate": 4.936064321149766e-06, "loss": 0.1488, "step": 45798 }, { "epoch": 0.8168765383654978, "grad_norm": 0.30388545989990234, "learning_rate": 4.935135786725498e-06, "loss": 0.1315, "step": 45799 }, { "epoch": 0.8168943744872115, "grad_norm": 0.20960137248039246, "learning_rate": 4.9342073300795654e-06, "loss": 0.0885, "step": 45800 }, { "epoch": 0.8169122106089252, "grad_norm": 0.22565042972564697, "learning_rate": 4.933278951215581e-06, "loss": 0.1111, "step": 45801 }, { "epoch": 0.8169300467306388, "grad_norm": 0.26642322540283203, "learning_rate": 4.932350650137135e-06, "loss": 0.1104, "step": 45802 }, { "epoch": 0.8169478828523525, "grad_norm": 0.2083117812871933, "learning_rate": 4.931422426847834e-06, "loss": 0.1007, "step": 45803 }, { "epoch": 0.8169657189740662, "grad_norm": 0.25963038206100464, "learning_rate": 4.930494281351269e-06, "loss": 0.1256, "step": 45804 }, { "epoch": 0.81698355509578, "grad_norm": 0.26066914200782776, "learning_rate": 4.9295662136510434e-06, "loss": 0.0713, "step": 45805 }, { "epoch": 0.8170013912174937, "grad_norm": 0.23632046580314636, "learning_rate": 4.928638223750745e-06, "loss": 0.1243, "step": 45806 }, { "epoch": 0.8170192273392074, "grad_norm": 0.2195056676864624, "learning_rate": 4.927710311653982e-06, "loss": 0.104, "step": 45807 }, { "epoch": 0.8170370634609211, "grad_norm": 0.19980207085609436, "learning_rate": 4.926782477364348e-06, "loss": 0.0877, "step": 45808 }, { "epoch": 0.8170548995826348, "grad_norm": 0.237391859292984, "learning_rate": 4.925854720885436e-06, "loss": 0.0976, "step": 45809 }, { "epoch": 0.8170727357043485, "grad_norm": 0.2724595069885254, "learning_rate": 4.924927042220839e-06, "loss": 0.0809, "step": 45810 }, { "epoch": 0.8170905718260622, "grad_norm": 0.21402789652347565, "learning_rate": 4.923999441374164e-06, "loss": 0.0892, "step": 45811 }, { "epoch": 0.8171084079477758, "grad_norm": 0.2948894202709198, "learning_rate": 4.923071918349003e-06, "loss": 0.0972, "step": 45812 }, { "epoch": 0.8171262440694895, "grad_norm": 0.2897847592830658, "learning_rate": 4.922144473148943e-06, "loss": 0.1289, "step": 45813 }, { "epoch": 0.8171440801912032, "grad_norm": 0.2478969246149063, "learning_rate": 4.92121710577759e-06, "loss": 0.1016, "step": 45814 }, { "epoch": 0.8171619163129169, "grad_norm": 0.21380673348903656, "learning_rate": 4.92028981623853e-06, "loss": 0.1043, "step": 45815 }, { "epoch": 0.8171797524346306, "grad_norm": 0.21537034213542938, "learning_rate": 4.919362604535368e-06, "loss": 0.0798, "step": 45816 }, { "epoch": 0.8171975885563443, "grad_norm": 0.2611224055290222, "learning_rate": 4.918435470671692e-06, "loss": 0.146, "step": 45817 }, { "epoch": 0.817215424678058, "grad_norm": 0.3332725167274475, "learning_rate": 4.917508414651095e-06, "loss": 0.1237, "step": 45818 }, { "epoch": 0.8172332607997717, "grad_norm": 0.4206140637397766, "learning_rate": 4.916581436477169e-06, "loss": 0.0917, "step": 45819 }, { "epoch": 0.8172510969214853, "grad_norm": 0.38903236389160156, "learning_rate": 4.915654536153513e-06, "loss": 0.1404, "step": 45820 }, { "epoch": 0.8172689330431991, "grad_norm": 0.26197049021720886, "learning_rate": 4.914727713683717e-06, "loss": 0.0757, "step": 45821 }, { "epoch": 0.8172867691649128, "grad_norm": 0.2817513048648834, "learning_rate": 4.913800969071375e-06, "loss": 0.0934, "step": 45822 }, { "epoch": 0.8173046052866265, "grad_norm": 0.28478750586509705, "learning_rate": 4.912874302320069e-06, "loss": 0.1077, "step": 45823 }, { "epoch": 0.8173224414083402, "grad_norm": 0.25764408707618713, "learning_rate": 4.91194771343341e-06, "loss": 0.1108, "step": 45824 }, { "epoch": 0.8173402775300539, "grad_norm": 0.29493528604507446, "learning_rate": 4.911021202414978e-06, "loss": 0.1475, "step": 45825 }, { "epoch": 0.8173581136517676, "grad_norm": 0.2621796131134033, "learning_rate": 4.910094769268367e-06, "loss": 0.1698, "step": 45826 }, { "epoch": 0.8173759497734813, "grad_norm": 0.3376453220844269, "learning_rate": 4.909168413997162e-06, "loss": 0.127, "step": 45827 }, { "epoch": 0.817393785895195, "grad_norm": 0.23872889578342438, "learning_rate": 4.908242136604962e-06, "loss": 0.1166, "step": 45828 }, { "epoch": 0.8174116220169086, "grad_norm": 0.21681855618953705, "learning_rate": 4.907315937095352e-06, "loss": 0.0882, "step": 45829 }, { "epoch": 0.8174294581386223, "grad_norm": 0.26990383863449097, "learning_rate": 4.906389815471929e-06, "loss": 0.0902, "step": 45830 }, { "epoch": 0.817447294260336, "grad_norm": 0.23880477249622345, "learning_rate": 4.905463771738283e-06, "loss": 0.0397, "step": 45831 }, { "epoch": 0.8174651303820497, "grad_norm": 0.21226686239242554, "learning_rate": 4.9045378058979905e-06, "loss": 0.1026, "step": 45832 }, { "epoch": 0.8174829665037634, "grad_norm": 0.2717452645301819, "learning_rate": 4.903611917954656e-06, "loss": 0.1114, "step": 45833 }, { "epoch": 0.8175008026254771, "grad_norm": 0.37122642993927, "learning_rate": 4.902686107911867e-06, "loss": 0.1324, "step": 45834 }, { "epoch": 0.8175186387471908, "grad_norm": 0.24966545403003693, "learning_rate": 4.901760375773204e-06, "loss": 0.0978, "step": 45835 }, { "epoch": 0.8175364748689045, "grad_norm": 0.2755209505558014, "learning_rate": 4.900834721542255e-06, "loss": 0.1006, "step": 45836 }, { "epoch": 0.8175543109906181, "grad_norm": 0.20548062026500702, "learning_rate": 4.899909145222617e-06, "loss": 0.0453, "step": 45837 }, { "epoch": 0.817572147112332, "grad_norm": 0.22613300383090973, "learning_rate": 4.898983646817876e-06, "loss": 0.136, "step": 45838 }, { "epoch": 0.8175899832340456, "grad_norm": 0.22451215982437134, "learning_rate": 4.898058226331615e-06, "loss": 0.1168, "step": 45839 }, { "epoch": 0.8176078193557593, "grad_norm": 0.2814333438873291, "learning_rate": 4.897132883767425e-06, "loss": 0.1011, "step": 45840 }, { "epoch": 0.817625655477473, "grad_norm": 0.26289355754852295, "learning_rate": 4.896207619128884e-06, "loss": 0.1122, "step": 45841 }, { "epoch": 0.8176434915991867, "grad_norm": 0.2840304374694824, "learning_rate": 4.895282432419585e-06, "loss": 0.0935, "step": 45842 }, { "epoch": 0.8176613277209004, "grad_norm": 0.27413636445999146, "learning_rate": 4.894357323643123e-06, "loss": 0.083, "step": 45843 }, { "epoch": 0.8176791638426141, "grad_norm": 0.27589163184165955, "learning_rate": 4.8934322928030764e-06, "loss": 0.1403, "step": 45844 }, { "epoch": 0.8176969999643278, "grad_norm": 0.3354912996292114, "learning_rate": 4.892507339903024e-06, "loss": 0.1162, "step": 45845 }, { "epoch": 0.8177148360860415, "grad_norm": 0.2817024886608124, "learning_rate": 4.891582464946562e-06, "loss": 0.1184, "step": 45846 }, { "epoch": 0.8177326722077551, "grad_norm": 0.2827165424823761, "learning_rate": 4.890657667937276e-06, "loss": 0.1107, "step": 45847 }, { "epoch": 0.8177505083294688, "grad_norm": 0.27988970279693604, "learning_rate": 4.889732948878745e-06, "loss": 0.0916, "step": 45848 }, { "epoch": 0.8177683444511825, "grad_norm": 0.3029497563838959, "learning_rate": 4.888808307774545e-06, "loss": 0.1428, "step": 45849 }, { "epoch": 0.8177861805728962, "grad_norm": 0.306348979473114, "learning_rate": 4.88788374462828e-06, "loss": 0.0998, "step": 45850 }, { "epoch": 0.8178040166946099, "grad_norm": 0.2668544054031372, "learning_rate": 4.886959259443524e-06, "loss": 0.1205, "step": 45851 }, { "epoch": 0.8178218528163236, "grad_norm": 0.2521647810935974, "learning_rate": 4.886034852223862e-06, "loss": 0.1194, "step": 45852 }, { "epoch": 0.8178396889380373, "grad_norm": 0.2065790295600891, "learning_rate": 4.885110522972875e-06, "loss": 0.0756, "step": 45853 }, { "epoch": 0.817857525059751, "grad_norm": 0.24508579075336456, "learning_rate": 4.884186271694138e-06, "loss": 0.0932, "step": 45854 }, { "epoch": 0.8178753611814648, "grad_norm": 0.2725170850753784, "learning_rate": 4.88326209839125e-06, "loss": 0.1723, "step": 45855 }, { "epoch": 0.8178931973031784, "grad_norm": 0.28779685497283936, "learning_rate": 4.882338003067783e-06, "loss": 0.1604, "step": 45856 }, { "epoch": 0.8179110334248921, "grad_norm": 0.2571767568588257, "learning_rate": 4.881413985727326e-06, "loss": 0.1409, "step": 45857 }, { "epoch": 0.8179288695466058, "grad_norm": 0.2688414752483368, "learning_rate": 4.880490046373451e-06, "loss": 0.1, "step": 45858 }, { "epoch": 0.8179467056683195, "grad_norm": 0.23567961156368256, "learning_rate": 4.879566185009754e-06, "loss": 0.1245, "step": 45859 }, { "epoch": 0.8179645417900332, "grad_norm": 0.2957104742527008, "learning_rate": 4.878642401639805e-06, "loss": 0.1138, "step": 45860 }, { "epoch": 0.8179823779117469, "grad_norm": 0.28867942094802856, "learning_rate": 4.877718696267189e-06, "loss": 0.0653, "step": 45861 }, { "epoch": 0.8180002140334606, "grad_norm": 0.35218754410743713, "learning_rate": 4.876795068895479e-06, "loss": 0.1095, "step": 45862 }, { "epoch": 0.8180180501551743, "grad_norm": 0.33524566888809204, "learning_rate": 4.875871519528269e-06, "loss": 0.1696, "step": 45863 }, { "epoch": 0.818035886276888, "grad_norm": 0.2608015537261963, "learning_rate": 4.874948048169131e-06, "loss": 0.0963, "step": 45864 }, { "epoch": 0.8180537223986016, "grad_norm": 0.2816460430622101, "learning_rate": 4.874024654821643e-06, "loss": 0.1386, "step": 45865 }, { "epoch": 0.8180715585203153, "grad_norm": 0.25960037112236023, "learning_rate": 4.873101339489392e-06, "loss": 0.1348, "step": 45866 }, { "epoch": 0.818089394642029, "grad_norm": 0.2543811500072479, "learning_rate": 4.872178102175939e-06, "loss": 0.0835, "step": 45867 }, { "epoch": 0.8181072307637427, "grad_norm": 0.27094703912734985, "learning_rate": 4.8712549428848865e-06, "loss": 0.1189, "step": 45868 }, { "epoch": 0.8181250668854564, "grad_norm": 0.27168506383895874, "learning_rate": 4.870331861619795e-06, "loss": 0.1252, "step": 45869 }, { "epoch": 0.8181429030071701, "grad_norm": 0.2607700526714325, "learning_rate": 4.869408858384256e-06, "loss": 0.0899, "step": 45870 }, { "epoch": 0.8181607391288838, "grad_norm": 0.2877371907234192, "learning_rate": 4.868485933181832e-06, "loss": 0.0862, "step": 45871 }, { "epoch": 0.8181785752505976, "grad_norm": 0.2727615237236023, "learning_rate": 4.867563086016119e-06, "loss": 0.1597, "step": 45872 }, { "epoch": 0.8181964113723112, "grad_norm": 0.2519082725048065, "learning_rate": 4.866640316890686e-06, "loss": 0.1317, "step": 45873 }, { "epoch": 0.8182142474940249, "grad_norm": 0.32453885674476624, "learning_rate": 4.865717625809108e-06, "loss": 0.1045, "step": 45874 }, { "epoch": 0.8182320836157386, "grad_norm": 0.2822349965572357, "learning_rate": 4.864795012774953e-06, "loss": 0.1303, "step": 45875 }, { "epoch": 0.8182499197374523, "grad_norm": 0.2806457579135895, "learning_rate": 4.863872477791817e-06, "loss": 0.1222, "step": 45876 }, { "epoch": 0.818267755859166, "grad_norm": 0.3050583600997925, "learning_rate": 4.862950020863266e-06, "loss": 0.136, "step": 45877 }, { "epoch": 0.8182855919808797, "grad_norm": 0.22583162784576416, "learning_rate": 4.862027641992875e-06, "loss": 0.1082, "step": 45878 }, { "epoch": 0.8183034281025934, "grad_norm": 0.3267974853515625, "learning_rate": 4.861105341184219e-06, "loss": 0.1137, "step": 45879 }, { "epoch": 0.8183212642243071, "grad_norm": 0.3673345446586609, "learning_rate": 4.86018311844087e-06, "loss": 0.1569, "step": 45880 }, { "epoch": 0.8183391003460208, "grad_norm": 0.2517510652542114, "learning_rate": 4.859260973766413e-06, "loss": 0.1102, "step": 45881 }, { "epoch": 0.8183569364677344, "grad_norm": 0.3128267824649811, "learning_rate": 4.858338907164417e-06, "loss": 0.0868, "step": 45882 }, { "epoch": 0.8183747725894481, "grad_norm": 0.2551311254501343, "learning_rate": 4.857416918638449e-06, "loss": 0.1136, "step": 45883 }, { "epoch": 0.8183926087111618, "grad_norm": 0.2428465038537979, "learning_rate": 4.856495008192097e-06, "loss": 0.1141, "step": 45884 }, { "epoch": 0.8184104448328755, "grad_norm": 0.3564857244491577, "learning_rate": 4.85557317582892e-06, "loss": 0.1493, "step": 45885 }, { "epoch": 0.8184282809545892, "grad_norm": 0.31332963705062866, "learning_rate": 4.854651421552509e-06, "loss": 0.1231, "step": 45886 }, { "epoch": 0.8184461170763029, "grad_norm": 0.27633076906204224, "learning_rate": 4.853729745366423e-06, "loss": 0.1179, "step": 45887 }, { "epoch": 0.8184639531980166, "grad_norm": 0.2887042164802551, "learning_rate": 4.852808147274235e-06, "loss": 0.0956, "step": 45888 }, { "epoch": 0.8184817893197304, "grad_norm": 0.3558990955352783, "learning_rate": 4.851886627279525e-06, "loss": 0.1406, "step": 45889 }, { "epoch": 0.8184996254414441, "grad_norm": 0.3414379954338074, "learning_rate": 4.850965185385864e-06, "loss": 0.0849, "step": 45890 }, { "epoch": 0.8185174615631577, "grad_norm": 0.2530554234981537, "learning_rate": 4.8500438215968226e-06, "loss": 0.1132, "step": 45891 }, { "epoch": 0.8185352976848714, "grad_norm": 0.347713828086853, "learning_rate": 4.849122535915968e-06, "loss": 0.1423, "step": 45892 }, { "epoch": 0.8185531338065851, "grad_norm": 0.2625904977321625, "learning_rate": 4.848201328346869e-06, "loss": 0.083, "step": 45893 }, { "epoch": 0.8185709699282988, "grad_norm": 0.2691967189311981, "learning_rate": 4.847280198893106e-06, "loss": 0.1555, "step": 45894 }, { "epoch": 0.8185888060500125, "grad_norm": 0.27361661195755005, "learning_rate": 4.846359147558249e-06, "loss": 0.0712, "step": 45895 }, { "epoch": 0.8186066421717262, "grad_norm": 0.2857093811035156, "learning_rate": 4.845438174345856e-06, "loss": 0.1275, "step": 45896 }, { "epoch": 0.8186244782934399, "grad_norm": 0.26945415139198303, "learning_rate": 4.844517279259514e-06, "loss": 0.11, "step": 45897 }, { "epoch": 0.8186423144151536, "grad_norm": 0.27251341938972473, "learning_rate": 4.843596462302777e-06, "loss": 0.1218, "step": 45898 }, { "epoch": 0.8186601505368672, "grad_norm": 0.2999931871891022, "learning_rate": 4.842675723479229e-06, "loss": 0.1198, "step": 45899 }, { "epoch": 0.8186779866585809, "grad_norm": 0.3390146493911743, "learning_rate": 4.8417550627924305e-06, "loss": 0.1462, "step": 45900 }, { "epoch": 0.8186958227802946, "grad_norm": 0.24994206428527832, "learning_rate": 4.840834480245945e-06, "loss": 0.1209, "step": 45901 }, { "epoch": 0.8187136589020083, "grad_norm": 0.3419128358364105, "learning_rate": 4.839913975843355e-06, "loss": 0.0972, "step": 45902 }, { "epoch": 0.818731495023722, "grad_norm": 0.25583016872406006, "learning_rate": 4.838993549588222e-06, "loss": 0.1276, "step": 45903 }, { "epoch": 0.8187493311454357, "grad_norm": 0.3033185303211212, "learning_rate": 4.838073201484114e-06, "loss": 0.1267, "step": 45904 }, { "epoch": 0.8187671672671494, "grad_norm": 0.2594734728336334, "learning_rate": 4.837152931534597e-06, "loss": 0.1089, "step": 45905 }, { "epoch": 0.8187850033888632, "grad_norm": 0.3624280095100403, "learning_rate": 4.836232739743235e-06, "loss": 0.1091, "step": 45906 }, { "epoch": 0.8188028395105769, "grad_norm": 0.26875096559524536, "learning_rate": 4.835312626113603e-06, "loss": 0.0885, "step": 45907 }, { "epoch": 0.8188206756322906, "grad_norm": 0.2868305742740631, "learning_rate": 4.8343925906492646e-06, "loss": 0.1384, "step": 45908 }, { "epoch": 0.8188385117540042, "grad_norm": 0.22457817196846008, "learning_rate": 4.833472633353786e-06, "loss": 0.105, "step": 45909 }, { "epoch": 0.8188563478757179, "grad_norm": 0.37026724219322205, "learning_rate": 4.832552754230726e-06, "loss": 0.0971, "step": 45910 }, { "epoch": 0.8188741839974316, "grad_norm": 0.2729056775569916, "learning_rate": 4.831632953283663e-06, "loss": 0.1161, "step": 45911 }, { "epoch": 0.8188920201191453, "grad_norm": 0.22457444667816162, "learning_rate": 4.8307132305161515e-06, "loss": 0.1265, "step": 45912 }, { "epoch": 0.818909856240859, "grad_norm": 0.22164063155651093, "learning_rate": 4.829793585931766e-06, "loss": 0.0669, "step": 45913 }, { "epoch": 0.8189276923625727, "grad_norm": 0.32471734285354614, "learning_rate": 4.828874019534063e-06, "loss": 0.0635, "step": 45914 }, { "epoch": 0.8189455284842864, "grad_norm": 0.22897087037563324, "learning_rate": 4.827954531326617e-06, "loss": 0.0971, "step": 45915 }, { "epoch": 0.818963364606, "grad_norm": 0.2785676419734955, "learning_rate": 4.827035121312987e-06, "loss": 0.1101, "step": 45916 }, { "epoch": 0.8189812007277137, "grad_norm": 0.1943778693675995, "learning_rate": 4.8261157894967355e-06, "loss": 0.052, "step": 45917 }, { "epoch": 0.8189990368494274, "grad_norm": 0.20942983031272888, "learning_rate": 4.8251965358814265e-06, "loss": 0.0987, "step": 45918 }, { "epoch": 0.8190168729711411, "grad_norm": 0.22504980862140656, "learning_rate": 4.824277360470619e-06, "loss": 0.1372, "step": 45919 }, { "epoch": 0.8190347090928548, "grad_norm": 0.34018179774284363, "learning_rate": 4.823358263267885e-06, "loss": 0.122, "step": 45920 }, { "epoch": 0.8190525452145685, "grad_norm": 0.2242036610841751, "learning_rate": 4.822439244276786e-06, "loss": 0.0938, "step": 45921 }, { "epoch": 0.8190703813362823, "grad_norm": 0.29010704159736633, "learning_rate": 4.8215203035008785e-06, "loss": 0.127, "step": 45922 }, { "epoch": 0.819088217457996, "grad_norm": 0.35392218828201294, "learning_rate": 4.820601440943723e-06, "loss": 0.1512, "step": 45923 }, { "epoch": 0.8191060535797097, "grad_norm": 0.2635032534599304, "learning_rate": 4.8196826566088926e-06, "loss": 0.1185, "step": 45924 }, { "epoch": 0.8191238897014234, "grad_norm": 0.2950946092605591, "learning_rate": 4.818763950499935e-06, "loss": 0.1308, "step": 45925 }, { "epoch": 0.819141725823137, "grad_norm": 0.2535664439201355, "learning_rate": 4.817845322620426e-06, "loss": 0.0633, "step": 45926 }, { "epoch": 0.8191595619448507, "grad_norm": 0.33581188321113586, "learning_rate": 4.816926772973915e-06, "loss": 0.0996, "step": 45927 }, { "epoch": 0.8191773980665644, "grad_norm": 0.2544093132019043, "learning_rate": 4.81600830156397e-06, "loss": 0.1417, "step": 45928 }, { "epoch": 0.8191952341882781, "grad_norm": 0.25684642791748047, "learning_rate": 4.815089908394149e-06, "loss": 0.1048, "step": 45929 }, { "epoch": 0.8192130703099918, "grad_norm": 0.39580997824668884, "learning_rate": 4.814171593468011e-06, "loss": 0.1266, "step": 45930 }, { "epoch": 0.8192309064317055, "grad_norm": 0.36412855982780457, "learning_rate": 4.813253356789116e-06, "loss": 0.1319, "step": 45931 }, { "epoch": 0.8192487425534192, "grad_norm": 0.2551431953907013, "learning_rate": 4.812335198361018e-06, "loss": 0.1167, "step": 45932 }, { "epoch": 0.8192665786751329, "grad_norm": 0.2499801218509674, "learning_rate": 4.8114171181872865e-06, "loss": 0.1393, "step": 45933 }, { "epoch": 0.8192844147968465, "grad_norm": 0.25826510787010193, "learning_rate": 4.810499116271475e-06, "loss": 0.1789, "step": 45934 }, { "epoch": 0.8193022509185602, "grad_norm": 0.27540749311447144, "learning_rate": 4.809581192617143e-06, "loss": 0.1421, "step": 45935 }, { "epoch": 0.8193200870402739, "grad_norm": 0.4772101640701294, "learning_rate": 4.80866334722784e-06, "loss": 0.153, "step": 45936 }, { "epoch": 0.8193379231619876, "grad_norm": 0.24885480105876923, "learning_rate": 4.80774558010714e-06, "loss": 0.1363, "step": 45937 }, { "epoch": 0.8193557592837013, "grad_norm": 0.2563723921775818, "learning_rate": 4.8068278912585915e-06, "loss": 0.0514, "step": 45938 }, { "epoch": 0.8193735954054151, "grad_norm": 0.46413376927375793, "learning_rate": 4.805910280685746e-06, "loss": 0.1271, "step": 45939 }, { "epoch": 0.8193914315271288, "grad_norm": 0.29271721839904785, "learning_rate": 4.804992748392168e-06, "loss": 0.0525, "step": 45940 }, { "epoch": 0.8194092676488425, "grad_norm": 0.2971426248550415, "learning_rate": 4.804075294381416e-06, "loss": 0.1054, "step": 45941 }, { "epoch": 0.8194271037705562, "grad_norm": 0.2233019918203354, "learning_rate": 4.803157918657048e-06, "loss": 0.1254, "step": 45942 }, { "epoch": 0.8194449398922699, "grad_norm": 0.3524329662322998, "learning_rate": 4.802240621222615e-06, "loss": 0.1251, "step": 45943 }, { "epoch": 0.8194627760139835, "grad_norm": 0.2646976113319397, "learning_rate": 4.80132340208167e-06, "loss": 0.1607, "step": 45944 }, { "epoch": 0.8194806121356972, "grad_norm": 0.23442672193050385, "learning_rate": 4.800406261237769e-06, "loss": 0.0622, "step": 45945 }, { "epoch": 0.8194984482574109, "grad_norm": 0.38687658309936523, "learning_rate": 4.799489198694476e-06, "loss": 0.1152, "step": 45946 }, { "epoch": 0.8195162843791246, "grad_norm": 0.3659440279006958, "learning_rate": 4.798572214455338e-06, "loss": 0.1753, "step": 45947 }, { "epoch": 0.8195341205008383, "grad_norm": 0.2823337912559509, "learning_rate": 4.797655308523913e-06, "loss": 0.0914, "step": 45948 }, { "epoch": 0.819551956622552, "grad_norm": 0.3602820336818695, "learning_rate": 4.796738480903748e-06, "loss": 0.112, "step": 45949 }, { "epoch": 0.8195697927442657, "grad_norm": 0.22025497257709503, "learning_rate": 4.795821731598407e-06, "loss": 0.0834, "step": 45950 }, { "epoch": 0.8195876288659794, "grad_norm": 0.20211149752140045, "learning_rate": 4.794905060611441e-06, "loss": 0.0685, "step": 45951 }, { "epoch": 0.819605464987693, "grad_norm": 0.4158110022544861, "learning_rate": 4.793988467946395e-06, "loss": 0.1474, "step": 45952 }, { "epoch": 0.8196233011094067, "grad_norm": 0.28092360496520996, "learning_rate": 4.793071953606834e-06, "loss": 0.1187, "step": 45953 }, { "epoch": 0.8196411372311204, "grad_norm": 0.19877710938453674, "learning_rate": 4.7921555175963e-06, "loss": 0.1029, "step": 45954 }, { "epoch": 0.8196589733528341, "grad_norm": 0.3380809724330902, "learning_rate": 4.791239159918357e-06, "loss": 0.1204, "step": 45955 }, { "epoch": 0.8196768094745479, "grad_norm": 0.2953563928604126, "learning_rate": 4.7903228805765504e-06, "loss": 0.1664, "step": 45956 }, { "epoch": 0.8196946455962616, "grad_norm": 0.3931449353694916, "learning_rate": 4.789406679574432e-06, "loss": 0.1175, "step": 45957 }, { "epoch": 0.8197124817179753, "grad_norm": 0.24556396901607513, "learning_rate": 4.788490556915548e-06, "loss": 0.1005, "step": 45958 }, { "epoch": 0.819730317839689, "grad_norm": 0.2688406705856323, "learning_rate": 4.787574512603462e-06, "loss": 0.1612, "step": 45959 }, { "epoch": 0.8197481539614027, "grad_norm": 0.23852618038654327, "learning_rate": 4.78665854664172e-06, "loss": 0.1307, "step": 45960 }, { "epoch": 0.8197659900831163, "grad_norm": 0.23295815289020538, "learning_rate": 4.785742659033867e-06, "loss": 0.0887, "step": 45961 }, { "epoch": 0.81978382620483, "grad_norm": 0.24690911173820496, "learning_rate": 4.784826849783455e-06, "loss": 0.106, "step": 45962 }, { "epoch": 0.8198016623265437, "grad_norm": 0.22683537006378174, "learning_rate": 4.783911118894041e-06, "loss": 0.1655, "step": 45963 }, { "epoch": 0.8198194984482574, "grad_norm": 0.3383115828037262, "learning_rate": 4.782995466369169e-06, "loss": 0.1226, "step": 45964 }, { "epoch": 0.8198373345699711, "grad_norm": 0.25798147916793823, "learning_rate": 4.782079892212391e-06, "loss": 0.157, "step": 45965 }, { "epoch": 0.8198551706916848, "grad_norm": 0.21800567209720612, "learning_rate": 4.781164396427246e-06, "loss": 0.1141, "step": 45966 }, { "epoch": 0.8198730068133985, "grad_norm": 0.2954353094100952, "learning_rate": 4.780248979017293e-06, "loss": 0.1138, "step": 45967 }, { "epoch": 0.8198908429351122, "grad_norm": 0.24430164694786072, "learning_rate": 4.7793336399860834e-06, "loss": 0.0797, "step": 45968 }, { "epoch": 0.8199086790568259, "grad_norm": 0.31240203976631165, "learning_rate": 4.778418379337163e-06, "loss": 0.13, "step": 45969 }, { "epoch": 0.8199265151785395, "grad_norm": 0.3194526433944702, "learning_rate": 4.777503197074074e-06, "loss": 0.1572, "step": 45970 }, { "epoch": 0.8199443513002532, "grad_norm": 0.3290162682533264, "learning_rate": 4.776588093200362e-06, "loss": 0.1071, "step": 45971 }, { "epoch": 0.8199621874219669, "grad_norm": 0.3180892765522003, "learning_rate": 4.775673067719588e-06, "loss": 0.1232, "step": 45972 }, { "epoch": 0.8199800235436807, "grad_norm": 0.36144909262657166, "learning_rate": 4.774758120635289e-06, "loss": 0.1918, "step": 45973 }, { "epoch": 0.8199978596653944, "grad_norm": 0.24784095585346222, "learning_rate": 4.773843251951013e-06, "loss": 0.1172, "step": 45974 }, { "epoch": 0.8200156957871081, "grad_norm": 0.2473553866147995, "learning_rate": 4.772928461670298e-06, "loss": 0.1039, "step": 45975 }, { "epoch": 0.8200335319088218, "grad_norm": 0.2709693908691406, "learning_rate": 4.772013749796708e-06, "loss": 0.0885, "step": 45976 }, { "epoch": 0.8200513680305355, "grad_norm": 0.2229028344154358, "learning_rate": 4.771099116333777e-06, "loss": 0.1504, "step": 45977 }, { "epoch": 0.8200692041522492, "grad_norm": 0.27017247676849365, "learning_rate": 4.770184561285054e-06, "loss": 0.1208, "step": 45978 }, { "epoch": 0.8200870402739628, "grad_norm": 0.37522339820861816, "learning_rate": 4.769270084654076e-06, "loss": 0.0739, "step": 45979 }, { "epoch": 0.8201048763956765, "grad_norm": 0.2650935649871826, "learning_rate": 4.7683556864444e-06, "loss": 0.1614, "step": 45980 }, { "epoch": 0.8201227125173902, "grad_norm": 0.40602293610572815, "learning_rate": 4.7674413666595615e-06, "loss": 0.1094, "step": 45981 }, { "epoch": 0.8201405486391039, "grad_norm": 0.24445992708206177, "learning_rate": 4.766527125303113e-06, "loss": 0.1026, "step": 45982 }, { "epoch": 0.8201583847608176, "grad_norm": 0.22367045283317566, "learning_rate": 4.765612962378593e-06, "loss": 0.1301, "step": 45983 }, { "epoch": 0.8201762208825313, "grad_norm": 0.2645864188671112, "learning_rate": 4.7646988778895425e-06, "loss": 0.099, "step": 45984 }, { "epoch": 0.820194057004245, "grad_norm": 0.28095898032188416, "learning_rate": 4.76378487183951e-06, "loss": 0.123, "step": 45985 }, { "epoch": 0.8202118931259587, "grad_norm": 0.276136577129364, "learning_rate": 4.76287094423204e-06, "loss": 0.1173, "step": 45986 }, { "epoch": 0.8202297292476723, "grad_norm": 0.2820354402065277, "learning_rate": 4.761957095070671e-06, "loss": 0.1207, "step": 45987 }, { "epoch": 0.820247565369386, "grad_norm": 0.2825775146484375, "learning_rate": 4.761043324358941e-06, "loss": 0.1193, "step": 45988 }, { "epoch": 0.8202654014910997, "grad_norm": 0.26971086859703064, "learning_rate": 4.760129632100402e-06, "loss": 0.1242, "step": 45989 }, { "epoch": 0.8202832376128135, "grad_norm": 0.2302245944738388, "learning_rate": 4.759216018298593e-06, "loss": 0.1128, "step": 45990 }, { "epoch": 0.8203010737345272, "grad_norm": 0.2266547679901123, "learning_rate": 4.7583024829570525e-06, "loss": 0.095, "step": 45991 }, { "epoch": 0.8203189098562409, "grad_norm": 0.30688589811325073, "learning_rate": 4.757389026079317e-06, "loss": 0.135, "step": 45992 }, { "epoch": 0.8203367459779546, "grad_norm": 0.24869315326213837, "learning_rate": 4.75647564766894e-06, "loss": 0.0882, "step": 45993 }, { "epoch": 0.8203545820996683, "grad_norm": 0.2837980389595032, "learning_rate": 4.7555623477294535e-06, "loss": 0.1092, "step": 45994 }, { "epoch": 0.820372418221382, "grad_norm": 0.6464115381240845, "learning_rate": 4.754649126264394e-06, "loss": 0.1296, "step": 45995 }, { "epoch": 0.8203902543430956, "grad_norm": 0.2385513186454773, "learning_rate": 4.753735983277313e-06, "loss": 0.083, "step": 45996 }, { "epoch": 0.8204080904648093, "grad_norm": 0.2110484391450882, "learning_rate": 4.752822918771738e-06, "loss": 0.1038, "step": 45997 }, { "epoch": 0.820425926586523, "grad_norm": 0.258468896150589, "learning_rate": 4.751909932751223e-06, "loss": 0.1293, "step": 45998 }, { "epoch": 0.8204437627082367, "grad_norm": 0.2708444893360138, "learning_rate": 4.750997025219295e-06, "loss": 0.1405, "step": 45999 }, { "epoch": 0.8204615988299504, "grad_norm": 0.3401334881782532, "learning_rate": 4.750084196179499e-06, "loss": 0.1088, "step": 46000 }, { "epoch": 0.8204615988299504, "eval_loss": 0.11009883880615234, "eval_runtime": 107.0669, "eval_samples_per_second": 9.564, "eval_steps_per_second": 1.597, "step": 46000 }, { "epoch": 0.8204794349516641, "grad_norm": 0.21945084631443024, "learning_rate": 4.749171445635362e-06, "loss": 0.1046, "step": 46001 }, { "epoch": 0.8204972710733778, "grad_norm": 0.266580194234848, "learning_rate": 4.748258773590439e-06, "loss": 0.0913, "step": 46002 }, { "epoch": 0.8205151071950915, "grad_norm": 0.34601345658302307, "learning_rate": 4.747346180048259e-06, "loss": 0.1417, "step": 46003 }, { "epoch": 0.8205329433168052, "grad_norm": 0.26941192150115967, "learning_rate": 4.746433665012362e-06, "loss": 0.0878, "step": 46004 }, { "epoch": 0.8205507794385188, "grad_norm": 0.21882638335227966, "learning_rate": 4.745521228486274e-06, "loss": 0.0973, "step": 46005 }, { "epoch": 0.8205686155602325, "grad_norm": 0.39612606167793274, "learning_rate": 4.744608870473552e-06, "loss": 0.1076, "step": 46006 }, { "epoch": 0.8205864516819463, "grad_norm": 0.24326537549495697, "learning_rate": 4.743696590977717e-06, "loss": 0.1012, "step": 46007 }, { "epoch": 0.82060428780366, "grad_norm": 0.23805387318134308, "learning_rate": 4.742784390002308e-06, "loss": 0.1202, "step": 46008 }, { "epoch": 0.8206221239253737, "grad_norm": 0.24482424557209015, "learning_rate": 4.741872267550868e-06, "loss": 0.0827, "step": 46009 }, { "epoch": 0.8206399600470874, "grad_norm": 0.27266693115234375, "learning_rate": 4.740960223626922e-06, "loss": 0.1414, "step": 46010 }, { "epoch": 0.8206577961688011, "grad_norm": 0.2890705168247223, "learning_rate": 4.740048258234017e-06, "loss": 0.1029, "step": 46011 }, { "epoch": 0.8206756322905148, "grad_norm": 0.23045755922794342, "learning_rate": 4.739136371375683e-06, "loss": 0.0576, "step": 46012 }, { "epoch": 0.8206934684122285, "grad_norm": 0.26319772005081177, "learning_rate": 4.7382245630554545e-06, "loss": 0.1277, "step": 46013 }, { "epoch": 0.8207113045339421, "grad_norm": 0.2503160536289215, "learning_rate": 4.737312833276861e-06, "loss": 0.0872, "step": 46014 }, { "epoch": 0.8207291406556558, "grad_norm": 0.2337036430835724, "learning_rate": 4.7364011820434455e-06, "loss": 0.1105, "step": 46015 }, { "epoch": 0.8207469767773695, "grad_norm": 0.27023717761039734, "learning_rate": 4.735489609358737e-06, "loss": 0.1145, "step": 46016 }, { "epoch": 0.8207648128990832, "grad_norm": 0.19023779034614563, "learning_rate": 4.734578115226271e-06, "loss": 0.0691, "step": 46017 }, { "epoch": 0.8207826490207969, "grad_norm": 0.21077920496463776, "learning_rate": 4.733666699649575e-06, "loss": 0.1108, "step": 46018 }, { "epoch": 0.8208004851425106, "grad_norm": 0.28194013237953186, "learning_rate": 4.732755362632191e-06, "loss": 0.1055, "step": 46019 }, { "epoch": 0.8208183212642243, "grad_norm": 0.27430495619773865, "learning_rate": 4.731844104177649e-06, "loss": 0.1107, "step": 46020 }, { "epoch": 0.820836157385938, "grad_norm": 0.29253271222114563, "learning_rate": 4.730932924289477e-06, "loss": 0.0811, "step": 46021 }, { "epoch": 0.8208539935076516, "grad_norm": 0.3712082505226135, "learning_rate": 4.7300218229712065e-06, "loss": 0.1573, "step": 46022 }, { "epoch": 0.8208718296293654, "grad_norm": 0.37444156408309937, "learning_rate": 4.729110800226372e-06, "loss": 0.1089, "step": 46023 }, { "epoch": 0.8208896657510791, "grad_norm": 0.22761695086956024, "learning_rate": 4.72819985605851e-06, "loss": 0.1267, "step": 46024 }, { "epoch": 0.8209075018727928, "grad_norm": 0.35790693759918213, "learning_rate": 4.727288990471146e-06, "loss": 0.1264, "step": 46025 }, { "epoch": 0.8209253379945065, "grad_norm": 0.24947425723075867, "learning_rate": 4.7263782034678154e-06, "loss": 0.1254, "step": 46026 }, { "epoch": 0.8209431741162202, "grad_norm": 0.26250845193862915, "learning_rate": 4.725467495052035e-06, "loss": 0.053, "step": 46027 }, { "epoch": 0.8209610102379339, "grad_norm": 0.3390832543373108, "learning_rate": 4.724556865227356e-06, "loss": 0.1046, "step": 46028 }, { "epoch": 0.8209788463596476, "grad_norm": 0.2783950865268707, "learning_rate": 4.723646313997293e-06, "loss": 0.1201, "step": 46029 }, { "epoch": 0.8209966824813613, "grad_norm": 0.19751502573490143, "learning_rate": 4.722735841365383e-06, "loss": 0.0877, "step": 46030 }, { "epoch": 0.821014518603075, "grad_norm": 0.28201955556869507, "learning_rate": 4.7218254473351455e-06, "loss": 0.168, "step": 46031 }, { "epoch": 0.8210323547247886, "grad_norm": 0.275782972574234, "learning_rate": 4.720915131910122e-06, "loss": 0.1407, "step": 46032 }, { "epoch": 0.8210501908465023, "grad_norm": 0.25187429785728455, "learning_rate": 4.720004895093835e-06, "loss": 0.166, "step": 46033 }, { "epoch": 0.821068026968216, "grad_norm": 0.2605955898761749, "learning_rate": 4.719094736889815e-06, "loss": 0.0994, "step": 46034 }, { "epoch": 0.8210858630899297, "grad_norm": 0.4518495500087738, "learning_rate": 4.718184657301583e-06, "loss": 0.1708, "step": 46035 }, { "epoch": 0.8211036992116434, "grad_norm": 0.23418988287448883, "learning_rate": 4.7172746563326765e-06, "loss": 0.0974, "step": 46036 }, { "epoch": 0.8211215353333571, "grad_norm": 0.19696015119552612, "learning_rate": 4.716364733986614e-06, "loss": 0.0911, "step": 46037 }, { "epoch": 0.8211393714550708, "grad_norm": 0.21459051966667175, "learning_rate": 4.715454890266932e-06, "loss": 0.0535, "step": 46038 }, { "epoch": 0.8211572075767845, "grad_norm": 0.40713441371917725, "learning_rate": 4.714545125177156e-06, "loss": 0.1223, "step": 46039 }, { "epoch": 0.8211750436984983, "grad_norm": 0.33374494314193726, "learning_rate": 4.7136354387208006e-06, "loss": 0.1311, "step": 46040 }, { "epoch": 0.8211928798202119, "grad_norm": 0.3240242600440979, "learning_rate": 4.71272583090141e-06, "loss": 0.1002, "step": 46041 }, { "epoch": 0.8212107159419256, "grad_norm": 0.2358447015285492, "learning_rate": 4.711816301722499e-06, "loss": 0.0964, "step": 46042 }, { "epoch": 0.8212285520636393, "grad_norm": 0.21508219838142395, "learning_rate": 4.710906851187594e-06, "loss": 0.1051, "step": 46043 }, { "epoch": 0.821246388185353, "grad_norm": 0.2570163905620575, "learning_rate": 4.709997479300219e-06, "loss": 0.1179, "step": 46044 }, { "epoch": 0.8212642243070667, "grad_norm": 0.31717947125434875, "learning_rate": 4.709088186063903e-06, "loss": 0.0704, "step": 46045 }, { "epoch": 0.8212820604287804, "grad_norm": 0.2874803841114044, "learning_rate": 4.7081789714821745e-06, "loss": 0.099, "step": 46046 }, { "epoch": 0.8212998965504941, "grad_norm": 0.19525286555290222, "learning_rate": 4.70726983555855e-06, "loss": 0.0587, "step": 46047 }, { "epoch": 0.8213177326722078, "grad_norm": 0.288347452878952, "learning_rate": 4.706360778296557e-06, "loss": 0.146, "step": 46048 }, { "epoch": 0.8213355687939214, "grad_norm": 0.23644642531871796, "learning_rate": 4.705451799699711e-06, "loss": 0.1047, "step": 46049 }, { "epoch": 0.8213534049156351, "grad_norm": 0.2333265244960785, "learning_rate": 4.7045428997715465e-06, "loss": 0.0921, "step": 46050 }, { "epoch": 0.8213712410373488, "grad_norm": 0.3645409643650055, "learning_rate": 4.703634078515589e-06, "loss": 0.0753, "step": 46051 }, { "epoch": 0.8213890771590625, "grad_norm": 0.2837405800819397, "learning_rate": 4.702725335935354e-06, "loss": 0.0862, "step": 46052 }, { "epoch": 0.8214069132807762, "grad_norm": 0.29357659816741943, "learning_rate": 4.701816672034362e-06, "loss": 0.0872, "step": 46053 }, { "epoch": 0.8214247494024899, "grad_norm": 0.3213426172733307, "learning_rate": 4.700908086816144e-06, "loss": 0.1171, "step": 46054 }, { "epoch": 0.8214425855242036, "grad_norm": 0.35706669092178345, "learning_rate": 4.69999958028422e-06, "loss": 0.1341, "step": 46055 }, { "epoch": 0.8214604216459173, "grad_norm": 0.28023555874824524, "learning_rate": 4.699091152442106e-06, "loss": 0.1708, "step": 46056 }, { "epoch": 0.8214782577676311, "grad_norm": 0.3517388105392456, "learning_rate": 4.698182803293321e-06, "loss": 0.0939, "step": 46057 }, { "epoch": 0.8214960938893447, "grad_norm": 0.30235356092453003, "learning_rate": 4.697274532841398e-06, "loss": 0.113, "step": 46058 }, { "epoch": 0.8215139300110584, "grad_norm": 0.29654595255851746, "learning_rate": 4.696366341089853e-06, "loss": 0.0862, "step": 46059 }, { "epoch": 0.8215317661327721, "grad_norm": 0.384662002325058, "learning_rate": 4.695458228042202e-06, "loss": 0.1445, "step": 46060 }, { "epoch": 0.8215496022544858, "grad_norm": 0.22031594812870026, "learning_rate": 4.694550193701968e-06, "loss": 0.11, "step": 46061 }, { "epoch": 0.8215674383761995, "grad_norm": 0.344844788312912, "learning_rate": 4.693642238072666e-06, "loss": 0.0608, "step": 46062 }, { "epoch": 0.8215852744979132, "grad_norm": 0.2458350956439972, "learning_rate": 4.692734361157827e-06, "loss": 0.0962, "step": 46063 }, { "epoch": 0.8216031106196269, "grad_norm": 0.39920634031295776, "learning_rate": 4.691826562960955e-06, "loss": 0.1046, "step": 46064 }, { "epoch": 0.8216209467413406, "grad_norm": 0.3210431635379791, "learning_rate": 4.690918843485584e-06, "loss": 0.1007, "step": 46065 }, { "epoch": 0.8216387828630543, "grad_norm": 0.3173997104167938, "learning_rate": 4.690011202735223e-06, "loss": 0.1173, "step": 46066 }, { "epoch": 0.8216566189847679, "grad_norm": 0.34847620129585266, "learning_rate": 4.689103640713397e-06, "loss": 0.1162, "step": 46067 }, { "epoch": 0.8216744551064816, "grad_norm": 0.21079584956169128, "learning_rate": 4.688196157423619e-06, "loss": 0.0804, "step": 46068 }, { "epoch": 0.8216922912281953, "grad_norm": 0.26405075192451477, "learning_rate": 4.687288752869412e-06, "loss": 0.1703, "step": 46069 }, { "epoch": 0.821710127349909, "grad_norm": 0.1882033795118332, "learning_rate": 4.686381427054279e-06, "loss": 0.0751, "step": 46070 }, { "epoch": 0.8217279634716227, "grad_norm": 0.2501179277896881, "learning_rate": 4.685474179981758e-06, "loss": 0.109, "step": 46071 }, { "epoch": 0.8217457995933364, "grad_norm": 0.2737066447734833, "learning_rate": 4.684567011655353e-06, "loss": 0.1259, "step": 46072 }, { "epoch": 0.8217636357150501, "grad_norm": 0.24505580961704254, "learning_rate": 4.683659922078584e-06, "loss": 0.0697, "step": 46073 }, { "epoch": 0.8217814718367639, "grad_norm": 0.42098796367645264, "learning_rate": 4.682752911254965e-06, "loss": 0.1164, "step": 46074 }, { "epoch": 0.8217993079584776, "grad_norm": 0.3601228892803192, "learning_rate": 4.681845979188007e-06, "loss": 0.1656, "step": 46075 }, { "epoch": 0.8218171440801912, "grad_norm": 0.23303711414337158, "learning_rate": 4.680939125881239e-06, "loss": 0.1239, "step": 46076 }, { "epoch": 0.8218349802019049, "grad_norm": 0.19640538096427917, "learning_rate": 4.680032351338162e-06, "loss": 0.082, "step": 46077 }, { "epoch": 0.8218528163236186, "grad_norm": 0.28218916058540344, "learning_rate": 4.679125655562306e-06, "loss": 0.1414, "step": 46078 }, { "epoch": 0.8218706524453323, "grad_norm": 0.3526959717273712, "learning_rate": 4.678219038557169e-06, "loss": 0.14, "step": 46079 }, { "epoch": 0.821888488567046, "grad_norm": 0.3054324686527252, "learning_rate": 4.677312500326281e-06, "loss": 0.1145, "step": 46080 }, { "epoch": 0.8219063246887597, "grad_norm": 0.21161223948001862, "learning_rate": 4.676406040873149e-06, "loss": 0.0559, "step": 46081 }, { "epoch": 0.8219241608104734, "grad_norm": 0.2855243682861328, "learning_rate": 4.675499660201288e-06, "loss": 0.1074, "step": 46082 }, { "epoch": 0.8219419969321871, "grad_norm": 0.3317650258541107, "learning_rate": 4.6745933583142e-06, "loss": 0.1019, "step": 46083 }, { "epoch": 0.8219598330539007, "grad_norm": 0.22176623344421387, "learning_rate": 4.6736871352154185e-06, "loss": 0.0769, "step": 46084 }, { "epoch": 0.8219776691756144, "grad_norm": 0.2839299142360687, "learning_rate": 4.672780990908446e-06, "loss": 0.1151, "step": 46085 }, { "epoch": 0.8219955052973281, "grad_norm": 0.2464563250541687, "learning_rate": 4.671874925396794e-06, "loss": 0.1275, "step": 46086 }, { "epoch": 0.8220133414190418, "grad_norm": 0.18365976214408875, "learning_rate": 4.670968938683975e-06, "loss": 0.089, "step": 46087 }, { "epoch": 0.8220311775407555, "grad_norm": 0.2941281199455261, "learning_rate": 4.670063030773497e-06, "loss": 0.1436, "step": 46088 }, { "epoch": 0.8220490136624692, "grad_norm": 0.24149273335933685, "learning_rate": 4.669157201668881e-06, "loss": 0.0949, "step": 46089 }, { "epoch": 0.8220668497841829, "grad_norm": 0.30592742562294006, "learning_rate": 4.668251451373634e-06, "loss": 0.1302, "step": 46090 }, { "epoch": 0.8220846859058967, "grad_norm": 0.27544450759887695, "learning_rate": 4.66734577989126e-06, "loss": 0.0993, "step": 46091 }, { "epoch": 0.8221025220276104, "grad_norm": 0.2548133134841919, "learning_rate": 4.666440187225285e-06, "loss": 0.1098, "step": 46092 }, { "epoch": 0.822120358149324, "grad_norm": 0.2702014744281769, "learning_rate": 4.665534673379204e-06, "loss": 0.1056, "step": 46093 }, { "epoch": 0.8221381942710377, "grad_norm": 0.3659670352935791, "learning_rate": 4.664629238356541e-06, "loss": 0.1448, "step": 46094 }, { "epoch": 0.8221560303927514, "grad_norm": 0.32081031799316406, "learning_rate": 4.663723882160797e-06, "loss": 0.0991, "step": 46095 }, { "epoch": 0.8221738665144651, "grad_norm": 0.2757716178894043, "learning_rate": 4.662818604795477e-06, "loss": 0.1485, "step": 46096 }, { "epoch": 0.8221917026361788, "grad_norm": 0.28562456369400024, "learning_rate": 4.661913406264101e-06, "loss": 0.1212, "step": 46097 }, { "epoch": 0.8222095387578925, "grad_norm": 0.26772695779800415, "learning_rate": 4.661008286570176e-06, "loss": 0.1072, "step": 46098 }, { "epoch": 0.8222273748796062, "grad_norm": 0.2566722631454468, "learning_rate": 4.660103245717207e-06, "loss": 0.1075, "step": 46099 }, { "epoch": 0.8222452110013199, "grad_norm": 0.32634469866752625, "learning_rate": 4.659198283708705e-06, "loss": 0.1117, "step": 46100 }, { "epoch": 0.8222630471230336, "grad_norm": 0.1959335058927536, "learning_rate": 4.658293400548166e-06, "loss": 0.0658, "step": 46101 }, { "epoch": 0.8222808832447472, "grad_norm": 0.2890969216823578, "learning_rate": 4.657388596239115e-06, "loss": 0.1041, "step": 46102 }, { "epoch": 0.8222987193664609, "grad_norm": 0.2475145161151886, "learning_rate": 4.6564838707850525e-06, "loss": 0.1031, "step": 46103 }, { "epoch": 0.8223165554881746, "grad_norm": 0.3062688410282135, "learning_rate": 4.655579224189477e-06, "loss": 0.1241, "step": 46104 }, { "epoch": 0.8223343916098883, "grad_norm": 0.2573624849319458, "learning_rate": 4.654674656455912e-06, "loss": 0.1172, "step": 46105 }, { "epoch": 0.822352227731602, "grad_norm": 0.2881820499897003, "learning_rate": 4.653770167587848e-06, "loss": 0.1329, "step": 46106 }, { "epoch": 0.8223700638533157, "grad_norm": 0.29084765911102295, "learning_rate": 4.652865757588803e-06, "loss": 0.0892, "step": 46107 }, { "epoch": 0.8223878999750295, "grad_norm": 0.3494262397289276, "learning_rate": 4.6519614264622785e-06, "loss": 0.1266, "step": 46108 }, { "epoch": 0.8224057360967432, "grad_norm": 0.34002685546875, "learning_rate": 4.651057174211776e-06, "loss": 0.0869, "step": 46109 }, { "epoch": 0.8224235722184569, "grad_norm": 0.24071046710014343, "learning_rate": 4.650153000840807e-06, "loss": 0.0915, "step": 46110 }, { "epoch": 0.8224414083401705, "grad_norm": 0.3147448003292084, "learning_rate": 4.6492489063528736e-06, "loss": 0.0709, "step": 46111 }, { "epoch": 0.8224592444618842, "grad_norm": 0.284736692905426, "learning_rate": 4.648344890751483e-06, "loss": 0.1317, "step": 46112 }, { "epoch": 0.8224770805835979, "grad_norm": 0.30460789799690247, "learning_rate": 4.647440954040136e-06, "loss": 0.0767, "step": 46113 }, { "epoch": 0.8224949167053116, "grad_norm": 0.28746193647384644, "learning_rate": 4.646537096222331e-06, "loss": 0.1052, "step": 46114 }, { "epoch": 0.8225127528270253, "grad_norm": 0.2532510459423065, "learning_rate": 4.645633317301584e-06, "loss": 0.0849, "step": 46115 }, { "epoch": 0.822530588948739, "grad_norm": 0.22827962040901184, "learning_rate": 4.64472961728139e-06, "loss": 0.044, "step": 46116 }, { "epoch": 0.8225484250704527, "grad_norm": 0.40525200963020325, "learning_rate": 4.643825996165257e-06, "loss": 0.1295, "step": 46117 }, { "epoch": 0.8225662611921664, "grad_norm": 0.25670236349105835, "learning_rate": 4.64292245395668e-06, "loss": 0.0794, "step": 46118 }, { "epoch": 0.82258409731388, "grad_norm": 0.2593821585178375, "learning_rate": 4.642018990659172e-06, "loss": 0.109, "step": 46119 }, { "epoch": 0.8226019334355937, "grad_norm": 0.26307791471481323, "learning_rate": 4.641115606276222e-06, "loss": 0.156, "step": 46120 }, { "epoch": 0.8226197695573074, "grad_norm": 0.4257407784461975, "learning_rate": 4.640212300811348e-06, "loss": 0.1378, "step": 46121 }, { "epoch": 0.8226376056790211, "grad_norm": 0.2438928633928299, "learning_rate": 4.639309074268036e-06, "loss": 0.1343, "step": 46122 }, { "epoch": 0.8226554418007348, "grad_norm": 0.3015868663787842, "learning_rate": 4.638405926649802e-06, "loss": 0.1374, "step": 46123 }, { "epoch": 0.8226732779224486, "grad_norm": 0.2525673508644104, "learning_rate": 4.637502857960138e-06, "loss": 0.0661, "step": 46124 }, { "epoch": 0.8226911140441623, "grad_norm": 0.2619667649269104, "learning_rate": 4.636599868202546e-06, "loss": 0.1491, "step": 46125 }, { "epoch": 0.822708950165876, "grad_norm": 0.30909499526023865, "learning_rate": 4.6356969573805255e-06, "loss": 0.0896, "step": 46126 }, { "epoch": 0.8227267862875897, "grad_norm": 0.3124559819698334, "learning_rate": 4.6347941254975716e-06, "loss": 0.141, "step": 46127 }, { "epoch": 0.8227446224093033, "grad_norm": 0.21617282927036285, "learning_rate": 4.633891372557195e-06, "loss": 0.0803, "step": 46128 }, { "epoch": 0.822762458531017, "grad_norm": 0.29223906993865967, "learning_rate": 4.632988698562893e-06, "loss": 0.1427, "step": 46129 }, { "epoch": 0.8227802946527307, "grad_norm": 0.27456992864608765, "learning_rate": 4.632086103518157e-06, "loss": 0.1054, "step": 46130 }, { "epoch": 0.8227981307744444, "grad_norm": 0.2903895080089569, "learning_rate": 4.631183587426485e-06, "loss": 0.1329, "step": 46131 }, { "epoch": 0.8228159668961581, "grad_norm": 0.26002299785614014, "learning_rate": 4.630281150291388e-06, "loss": 0.0917, "step": 46132 }, { "epoch": 0.8228338030178718, "grad_norm": 0.3393615186214447, "learning_rate": 4.629378792116351e-06, "loss": 0.1507, "step": 46133 }, { "epoch": 0.8228516391395855, "grad_norm": 0.26019203662872314, "learning_rate": 4.628476512904884e-06, "loss": 0.1179, "step": 46134 }, { "epoch": 0.8228694752612992, "grad_norm": 0.25363609194755554, "learning_rate": 4.627574312660468e-06, "loss": 0.0938, "step": 46135 }, { "epoch": 0.8228873113830129, "grad_norm": 0.3216564655303955, "learning_rate": 4.626672191386622e-06, "loss": 0.1426, "step": 46136 }, { "epoch": 0.8229051475047265, "grad_norm": 0.3058150112628937, "learning_rate": 4.6257701490868275e-06, "loss": 0.1369, "step": 46137 }, { "epoch": 0.8229229836264402, "grad_norm": 0.3566642999649048, "learning_rate": 4.624868185764586e-06, "loss": 0.142, "step": 46138 }, { "epoch": 0.8229408197481539, "grad_norm": 0.38568368554115295, "learning_rate": 4.623966301423393e-06, "loss": 0.1114, "step": 46139 }, { "epoch": 0.8229586558698676, "grad_norm": 0.2623521089553833, "learning_rate": 4.623064496066737e-06, "loss": 0.0951, "step": 46140 }, { "epoch": 0.8229764919915814, "grad_norm": 0.2472400814294815, "learning_rate": 4.62216276969813e-06, "loss": 0.0708, "step": 46141 }, { "epoch": 0.8229943281132951, "grad_norm": 0.2786538600921631, "learning_rate": 4.621261122321055e-06, "loss": 0.0978, "step": 46142 }, { "epoch": 0.8230121642350088, "grad_norm": 0.344588041305542, "learning_rate": 4.620359553939013e-06, "loss": 0.1347, "step": 46143 }, { "epoch": 0.8230300003567225, "grad_norm": 0.26819539070129395, "learning_rate": 4.6194580645554875e-06, "loss": 0.1014, "step": 46144 }, { "epoch": 0.8230478364784362, "grad_norm": 0.24067746102809906, "learning_rate": 4.618556654173991e-06, "loss": 0.0911, "step": 46145 }, { "epoch": 0.8230656726001498, "grad_norm": 0.31288135051727295, "learning_rate": 4.617655322798006e-06, "loss": 0.0821, "step": 46146 }, { "epoch": 0.8230835087218635, "grad_norm": 0.22926175594329834, "learning_rate": 4.616754070431023e-06, "loss": 0.0784, "step": 46147 }, { "epoch": 0.8231013448435772, "grad_norm": 0.29319366812705994, "learning_rate": 4.615852897076542e-06, "loss": 0.1395, "step": 46148 }, { "epoch": 0.8231191809652909, "grad_norm": 0.2678544521331787, "learning_rate": 4.614951802738063e-06, "loss": 0.1451, "step": 46149 }, { "epoch": 0.8231370170870046, "grad_norm": 0.4900416433811188, "learning_rate": 4.6140507874190694e-06, "loss": 0.1076, "step": 46150 }, { "epoch": 0.8231548532087183, "grad_norm": 0.2736718952655792, "learning_rate": 4.613149851123058e-06, "loss": 0.0868, "step": 46151 }, { "epoch": 0.823172689330432, "grad_norm": 0.25595220923423767, "learning_rate": 4.6122489938535185e-06, "loss": 0.1107, "step": 46152 }, { "epoch": 0.8231905254521457, "grad_norm": 0.26881736516952515, "learning_rate": 4.6113482156139366e-06, "loss": 0.1092, "step": 46153 }, { "epoch": 0.8232083615738593, "grad_norm": 0.2667938768863678, "learning_rate": 4.6104475164078175e-06, "loss": 0.136, "step": 46154 }, { "epoch": 0.823226197695573, "grad_norm": 0.2897258400917053, "learning_rate": 4.609546896238645e-06, "loss": 0.0871, "step": 46155 }, { "epoch": 0.8232440338172867, "grad_norm": 0.2286834865808487, "learning_rate": 4.6086463551099135e-06, "loss": 0.1065, "step": 46156 }, { "epoch": 0.8232618699390004, "grad_norm": 0.23880279064178467, "learning_rate": 4.607745893025106e-06, "loss": 0.1048, "step": 46157 }, { "epoch": 0.8232797060607142, "grad_norm": 0.3318907618522644, "learning_rate": 4.606845509987723e-06, "loss": 0.155, "step": 46158 }, { "epoch": 0.8232975421824279, "grad_norm": 0.31443554162979126, "learning_rate": 4.6059452060012495e-06, "loss": 0.147, "step": 46159 }, { "epoch": 0.8233153783041416, "grad_norm": 0.30736300349235535, "learning_rate": 4.605044981069173e-06, "loss": 0.1438, "step": 46160 }, { "epoch": 0.8233332144258553, "grad_norm": 0.4022292494773865, "learning_rate": 4.60414483519499e-06, "loss": 0.1387, "step": 46161 }, { "epoch": 0.823351050547569, "grad_norm": 0.27604565024375916, "learning_rate": 4.603244768382181e-06, "loss": 0.0981, "step": 46162 }, { "epoch": 0.8233688866692827, "grad_norm": 0.26068148016929626, "learning_rate": 4.602344780634247e-06, "loss": 0.0869, "step": 46163 }, { "epoch": 0.8233867227909963, "grad_norm": 0.24158501625061035, "learning_rate": 4.601444871954669e-06, "loss": 0.0925, "step": 46164 }, { "epoch": 0.82340455891271, "grad_norm": 0.2656979560852051, "learning_rate": 4.6005450423469345e-06, "loss": 0.1353, "step": 46165 }, { "epoch": 0.8234223950344237, "grad_norm": 0.2927142381668091, "learning_rate": 4.599645291814531e-06, "loss": 0.1221, "step": 46166 }, { "epoch": 0.8234402311561374, "grad_norm": 0.21333450078964233, "learning_rate": 4.59874562036095e-06, "loss": 0.1057, "step": 46167 }, { "epoch": 0.8234580672778511, "grad_norm": 0.3720265328884125, "learning_rate": 4.5978460279896805e-06, "loss": 0.1283, "step": 46168 }, { "epoch": 0.8234759033995648, "grad_norm": 0.21119123697280884, "learning_rate": 4.596946514704206e-06, "loss": 0.0924, "step": 46169 }, { "epoch": 0.8234937395212785, "grad_norm": 0.26303210854530334, "learning_rate": 4.5960470805080065e-06, "loss": 0.0894, "step": 46170 }, { "epoch": 0.8235115756429922, "grad_norm": 0.2832821011543274, "learning_rate": 4.595147725404583e-06, "loss": 0.1359, "step": 46171 }, { "epoch": 0.8235294117647058, "grad_norm": 0.20838138461112976, "learning_rate": 4.594248449397415e-06, "loss": 0.0952, "step": 46172 }, { "epoch": 0.8235472478864195, "grad_norm": 0.3012889325618744, "learning_rate": 4.593349252489987e-06, "loss": 0.139, "step": 46173 }, { "epoch": 0.8235650840081332, "grad_norm": 0.2740786671638489, "learning_rate": 4.592450134685778e-06, "loss": 0.1676, "step": 46174 }, { "epoch": 0.823582920129847, "grad_norm": 0.4201247990131378, "learning_rate": 4.591551095988289e-06, "loss": 0.1753, "step": 46175 }, { "epoch": 0.8236007562515607, "grad_norm": 0.2660380005836487, "learning_rate": 4.590652136400989e-06, "loss": 0.1123, "step": 46176 }, { "epoch": 0.8236185923732744, "grad_norm": 0.2980002462863922, "learning_rate": 4.589753255927378e-06, "loss": 0.1459, "step": 46177 }, { "epoch": 0.8236364284949881, "grad_norm": 0.24808001518249512, "learning_rate": 4.588854454570926e-06, "loss": 0.1427, "step": 46178 }, { "epoch": 0.8236542646167018, "grad_norm": 0.20960798859596252, "learning_rate": 4.587955732335131e-06, "loss": 0.0938, "step": 46179 }, { "epoch": 0.8236721007384155, "grad_norm": 0.3316652774810791, "learning_rate": 4.58705708922347e-06, "loss": 0.0954, "step": 46180 }, { "epoch": 0.8236899368601291, "grad_norm": 0.31050950288772583, "learning_rate": 4.586158525239425e-06, "loss": 0.091, "step": 46181 }, { "epoch": 0.8237077729818428, "grad_norm": 0.2365371286869049, "learning_rate": 4.58526004038648e-06, "loss": 0.111, "step": 46182 }, { "epoch": 0.8237256091035565, "grad_norm": 0.2544407546520233, "learning_rate": 4.584361634668113e-06, "loss": 0.0996, "step": 46183 }, { "epoch": 0.8237434452252702, "grad_norm": 0.28068748116493225, "learning_rate": 4.583463308087815e-06, "loss": 0.1494, "step": 46184 }, { "epoch": 0.8237612813469839, "grad_norm": 0.230164036154747, "learning_rate": 4.5825650606490685e-06, "loss": 0.0944, "step": 46185 }, { "epoch": 0.8237791174686976, "grad_norm": 0.38114142417907715, "learning_rate": 4.581666892355352e-06, "loss": 0.0768, "step": 46186 }, { "epoch": 0.8237969535904113, "grad_norm": 0.3075874447822571, "learning_rate": 4.580768803210137e-06, "loss": 0.1374, "step": 46187 }, { "epoch": 0.823814789712125, "grad_norm": 0.27401652932167053, "learning_rate": 4.579870793216923e-06, "loss": 0.0906, "step": 46188 }, { "epoch": 0.8238326258338386, "grad_norm": 0.30371198058128357, "learning_rate": 4.578972862379177e-06, "loss": 0.1769, "step": 46189 }, { "epoch": 0.8238504619555523, "grad_norm": 0.34023985266685486, "learning_rate": 4.578075010700392e-06, "loss": 0.1147, "step": 46190 }, { "epoch": 0.823868298077266, "grad_norm": 0.1968194991350174, "learning_rate": 4.577177238184033e-06, "loss": 0.043, "step": 46191 }, { "epoch": 0.8238861341989798, "grad_norm": 0.23457229137420654, "learning_rate": 4.576279544833597e-06, "loss": 0.0852, "step": 46192 }, { "epoch": 0.8239039703206935, "grad_norm": 0.2830899655818939, "learning_rate": 4.575381930652556e-06, "loss": 0.1041, "step": 46193 }, { "epoch": 0.8239218064424072, "grad_norm": 0.27478930354118347, "learning_rate": 4.574484395644388e-06, "loss": 0.1207, "step": 46194 }, { "epoch": 0.8239396425641209, "grad_norm": 0.32030749320983887, "learning_rate": 4.573586939812574e-06, "loss": 0.1171, "step": 46195 }, { "epoch": 0.8239574786858346, "grad_norm": 0.2744278609752655, "learning_rate": 4.5726895631605845e-06, "loss": 0.0863, "step": 46196 }, { "epoch": 0.8239753148075483, "grad_norm": 0.17130441963672638, "learning_rate": 4.571792265691913e-06, "loss": 0.0488, "step": 46197 }, { "epoch": 0.823993150929262, "grad_norm": 0.30343884229660034, "learning_rate": 4.570895047410032e-06, "loss": 0.0778, "step": 46198 }, { "epoch": 0.8240109870509756, "grad_norm": 0.2665086090564728, "learning_rate": 4.569997908318416e-06, "loss": 0.1172, "step": 46199 }, { "epoch": 0.8240288231726893, "grad_norm": 0.2738859951496124, "learning_rate": 4.569100848420538e-06, "loss": 0.1355, "step": 46200 }, { "epoch": 0.824046659294403, "grad_norm": 0.34839731454849243, "learning_rate": 4.568203867719886e-06, "loss": 0.137, "step": 46201 }, { "epoch": 0.8240644954161167, "grad_norm": 0.2935768663883209, "learning_rate": 4.567306966219937e-06, "loss": 0.0888, "step": 46202 }, { "epoch": 0.8240823315378304, "grad_norm": 0.2638930678367615, "learning_rate": 4.566410143924152e-06, "loss": 0.0843, "step": 46203 }, { "epoch": 0.8241001676595441, "grad_norm": 0.2797393798828125, "learning_rate": 4.565513400836022e-06, "loss": 0.0732, "step": 46204 }, { "epoch": 0.8241180037812578, "grad_norm": 0.35857200622558594, "learning_rate": 4.564616736959027e-06, "loss": 0.12, "step": 46205 }, { "epoch": 0.8241358399029715, "grad_norm": 0.24700576066970825, "learning_rate": 4.5637201522966355e-06, "loss": 0.1195, "step": 46206 }, { "epoch": 0.8241536760246851, "grad_norm": 0.21895363926887512, "learning_rate": 4.562823646852321e-06, "loss": 0.068, "step": 46207 }, { "epoch": 0.8241715121463988, "grad_norm": 0.2787195146083832, "learning_rate": 4.561927220629561e-06, "loss": 0.1097, "step": 46208 }, { "epoch": 0.8241893482681126, "grad_norm": 0.23904530704021454, "learning_rate": 4.561030873631828e-06, "loss": 0.0588, "step": 46209 }, { "epoch": 0.8242071843898263, "grad_norm": 0.2670755684375763, "learning_rate": 4.560134605862601e-06, "loss": 0.1081, "step": 46210 }, { "epoch": 0.82422502051154, "grad_norm": 0.22265464067459106, "learning_rate": 4.559238417325351e-06, "loss": 0.0867, "step": 46211 }, { "epoch": 0.8242428566332537, "grad_norm": 0.3074725270271301, "learning_rate": 4.558342308023556e-06, "loss": 0.1971, "step": 46212 }, { "epoch": 0.8242606927549674, "grad_norm": 0.2584645748138428, "learning_rate": 4.557446277960678e-06, "loss": 0.1079, "step": 46213 }, { "epoch": 0.8242785288766811, "grad_norm": 0.30519789457321167, "learning_rate": 4.556550327140207e-06, "loss": 0.1144, "step": 46214 }, { "epoch": 0.8242963649983948, "grad_norm": 0.19832868874073029, "learning_rate": 4.555654455565609e-06, "loss": 0.1403, "step": 46215 }, { "epoch": 0.8243142011201084, "grad_norm": 0.21667800843715668, "learning_rate": 4.554758663240347e-06, "loss": 0.0774, "step": 46216 }, { "epoch": 0.8243320372418221, "grad_norm": 0.2568855881690979, "learning_rate": 4.553862950167908e-06, "loss": 0.1546, "step": 46217 }, { "epoch": 0.8243498733635358, "grad_norm": 0.25933149456977844, "learning_rate": 4.552967316351753e-06, "loss": 0.2038, "step": 46218 }, { "epoch": 0.8243677094852495, "grad_norm": 0.3272765874862671, "learning_rate": 4.552071761795363e-06, "loss": 0.1183, "step": 46219 }, { "epoch": 0.8243855456069632, "grad_norm": 0.4056278467178345, "learning_rate": 4.551176286502207e-06, "loss": 0.1323, "step": 46220 }, { "epoch": 0.8244033817286769, "grad_norm": 0.24973776936531067, "learning_rate": 4.550280890475756e-06, "loss": 0.1307, "step": 46221 }, { "epoch": 0.8244212178503906, "grad_norm": 0.2523420453071594, "learning_rate": 4.54938557371947e-06, "loss": 0.0872, "step": 46222 }, { "epoch": 0.8244390539721043, "grad_norm": 0.3047141432762146, "learning_rate": 4.548490336236838e-06, "loss": 0.1639, "step": 46223 }, { "epoch": 0.824456890093818, "grad_norm": 0.28480643033981323, "learning_rate": 4.5475951780313194e-06, "loss": 0.1451, "step": 46224 }, { "epoch": 0.8244747262155318, "grad_norm": 0.2801607847213745, "learning_rate": 4.546700099106385e-06, "loss": 0.1047, "step": 46225 }, { "epoch": 0.8244925623372454, "grad_norm": 0.2784700393676758, "learning_rate": 4.545805099465503e-06, "loss": 0.109, "step": 46226 }, { "epoch": 0.8245103984589591, "grad_norm": 0.27022579312324524, "learning_rate": 4.544910179112146e-06, "loss": 0.0845, "step": 46227 }, { "epoch": 0.8245282345806728, "grad_norm": 0.30743691325187683, "learning_rate": 4.5440153380497865e-06, "loss": 0.099, "step": 46228 }, { "epoch": 0.8245460707023865, "grad_norm": 0.2754661440849304, "learning_rate": 4.5431205762818865e-06, "loss": 0.0459, "step": 46229 }, { "epoch": 0.8245639068241002, "grad_norm": 0.23122017085552216, "learning_rate": 4.542225893811913e-06, "loss": 0.1036, "step": 46230 }, { "epoch": 0.8245817429458139, "grad_norm": 0.23332935571670532, "learning_rate": 4.541331290643336e-06, "loss": 0.1177, "step": 46231 }, { "epoch": 0.8245995790675276, "grad_norm": 0.28356999158859253, "learning_rate": 4.540436766779632e-06, "loss": 0.147, "step": 46232 }, { "epoch": 0.8246174151892413, "grad_norm": 0.28929442167282104, "learning_rate": 4.539542322224261e-06, "loss": 0.1079, "step": 46233 }, { "epoch": 0.8246352513109549, "grad_norm": 0.3091000020503998, "learning_rate": 4.5386479569806905e-06, "loss": 0.1382, "step": 46234 }, { "epoch": 0.8246530874326686, "grad_norm": 0.31929466128349304, "learning_rate": 4.537753671052381e-06, "loss": 0.0648, "step": 46235 }, { "epoch": 0.8246709235543823, "grad_norm": 0.2694339156150818, "learning_rate": 4.536859464442814e-06, "loss": 0.104, "step": 46236 }, { "epoch": 0.824688759676096, "grad_norm": 0.3294632136821747, "learning_rate": 4.535965337155445e-06, "loss": 0.1256, "step": 46237 }, { "epoch": 0.8247065957978097, "grad_norm": 0.3106650114059448, "learning_rate": 4.535071289193743e-06, "loss": 0.0953, "step": 46238 }, { "epoch": 0.8247244319195234, "grad_norm": 0.30092278122901917, "learning_rate": 4.534177320561167e-06, "loss": 0.1142, "step": 46239 }, { "epoch": 0.8247422680412371, "grad_norm": 0.34116989374160767, "learning_rate": 4.533283431261195e-06, "loss": 0.1467, "step": 46240 }, { "epoch": 0.8247601041629508, "grad_norm": 0.30845415592193604, "learning_rate": 4.532389621297287e-06, "loss": 0.1024, "step": 46241 }, { "epoch": 0.8247779402846646, "grad_norm": 0.28233957290649414, "learning_rate": 4.531495890672904e-06, "loss": 0.1427, "step": 46242 }, { "epoch": 0.8247957764063782, "grad_norm": 0.5807080268859863, "learning_rate": 4.530602239391507e-06, "loss": 0.1022, "step": 46243 }, { "epoch": 0.8248136125280919, "grad_norm": 0.2583715319633484, "learning_rate": 4.529708667456572e-06, "loss": 0.1166, "step": 46244 }, { "epoch": 0.8248314486498056, "grad_norm": 0.2822147607803345, "learning_rate": 4.528815174871548e-06, "loss": 0.1254, "step": 46245 }, { "epoch": 0.8248492847715193, "grad_norm": 0.20499397814273834, "learning_rate": 4.527921761639916e-06, "loss": 0.0606, "step": 46246 }, { "epoch": 0.824867120893233, "grad_norm": 0.24845187366008759, "learning_rate": 4.527028427765129e-06, "loss": 0.0838, "step": 46247 }, { "epoch": 0.8248849570149467, "grad_norm": 0.2561642825603485, "learning_rate": 4.526135173250645e-06, "loss": 0.0948, "step": 46248 }, { "epoch": 0.8249027931366604, "grad_norm": 0.5120947360992432, "learning_rate": 4.5252419980999384e-06, "loss": 0.1016, "step": 46249 }, { "epoch": 0.8249206292583741, "grad_norm": 0.2862080931663513, "learning_rate": 4.5243489023164665e-06, "loss": 0.1459, "step": 46250 }, { "epoch": 0.8249384653800877, "grad_norm": 0.31652697920799255, "learning_rate": 4.523455885903688e-06, "loss": 0.0719, "step": 46251 }, { "epoch": 0.8249563015018014, "grad_norm": 0.3754929304122925, "learning_rate": 4.522562948865064e-06, "loss": 0.1352, "step": 46252 }, { "epoch": 0.8249741376235151, "grad_norm": 0.2790130078792572, "learning_rate": 4.521670091204061e-06, "loss": 0.0869, "step": 46253 }, { "epoch": 0.8249919737452288, "grad_norm": 0.28048959374427795, "learning_rate": 4.520777312924141e-06, "loss": 0.0873, "step": 46254 }, { "epoch": 0.8250098098669425, "grad_norm": 0.29519322514533997, "learning_rate": 4.51988461402876e-06, "loss": 0.1262, "step": 46255 }, { "epoch": 0.8250276459886562, "grad_norm": 0.2821556329727173, "learning_rate": 4.518991994521374e-06, "loss": 0.0849, "step": 46256 }, { "epoch": 0.8250454821103699, "grad_norm": 0.2634526491165161, "learning_rate": 4.518099454405456e-06, "loss": 0.0891, "step": 46257 }, { "epoch": 0.8250633182320836, "grad_norm": 0.28975245356559753, "learning_rate": 4.51720699368445e-06, "loss": 0.1095, "step": 46258 }, { "epoch": 0.8250811543537974, "grad_norm": 0.3463969826698303, "learning_rate": 4.5163146123618325e-06, "loss": 0.0975, "step": 46259 }, { "epoch": 0.825098990475511, "grad_norm": 0.17858460545539856, "learning_rate": 4.515422310441053e-06, "loss": 0.0596, "step": 46260 }, { "epoch": 0.8251168265972247, "grad_norm": 0.29375067353248596, "learning_rate": 4.514530087925567e-06, "loss": 0.1491, "step": 46261 }, { "epoch": 0.8251346627189384, "grad_norm": 0.28398069739341736, "learning_rate": 4.5136379448188436e-06, "loss": 0.1232, "step": 46262 }, { "epoch": 0.8251524988406521, "grad_norm": 0.2173001617193222, "learning_rate": 4.512745881124333e-06, "loss": 0.0766, "step": 46263 }, { "epoch": 0.8251703349623658, "grad_norm": 0.358110636472702, "learning_rate": 4.5118538968455e-06, "loss": 0.2114, "step": 46264 }, { "epoch": 0.8251881710840795, "grad_norm": 0.4235260486602783, "learning_rate": 4.5109619919857875e-06, "loss": 0.1333, "step": 46265 }, { "epoch": 0.8252060072057932, "grad_norm": 0.32002028822898865, "learning_rate": 4.510070166548671e-06, "loss": 0.1416, "step": 46266 }, { "epoch": 0.8252238433275069, "grad_norm": 0.3162918984889984, "learning_rate": 4.509178420537599e-06, "loss": 0.1469, "step": 46267 }, { "epoch": 0.8252416794492206, "grad_norm": 0.20543918013572693, "learning_rate": 4.5082867539560285e-06, "loss": 0.0878, "step": 46268 }, { "epoch": 0.8252595155709342, "grad_norm": 0.2594909071922302, "learning_rate": 4.507395166807413e-06, "loss": 0.0916, "step": 46269 }, { "epoch": 0.8252773516926479, "grad_norm": 0.22302532196044922, "learning_rate": 4.506503659095216e-06, "loss": 0.0878, "step": 46270 }, { "epoch": 0.8252951878143616, "grad_norm": 0.23516328632831573, "learning_rate": 4.505612230822887e-06, "loss": 0.1346, "step": 46271 }, { "epoch": 0.8253130239360753, "grad_norm": 0.2626056969165802, "learning_rate": 4.504720881993879e-06, "loss": 0.0833, "step": 46272 }, { "epoch": 0.825330860057789, "grad_norm": 0.23956821858882904, "learning_rate": 4.503829612611657e-06, "loss": 0.1006, "step": 46273 }, { "epoch": 0.8253486961795027, "grad_norm": 0.33588707447052, "learning_rate": 4.502938422679665e-06, "loss": 0.1494, "step": 46274 }, { "epoch": 0.8253665323012164, "grad_norm": 0.2861526906490326, "learning_rate": 4.5020473122013685e-06, "loss": 0.1241, "step": 46275 }, { "epoch": 0.8253843684229302, "grad_norm": 0.22129428386688232, "learning_rate": 4.5011562811802184e-06, "loss": 0.1017, "step": 46276 }, { "epoch": 0.8254022045446439, "grad_norm": 0.22234410047531128, "learning_rate": 4.500265329619665e-06, "loss": 0.137, "step": 46277 }, { "epoch": 0.8254200406663575, "grad_norm": 0.2388879954814911, "learning_rate": 4.4993744575231585e-06, "loss": 0.0911, "step": 46278 }, { "epoch": 0.8254378767880712, "grad_norm": 0.3475572168827057, "learning_rate": 4.498483664894162e-06, "loss": 0.1091, "step": 46279 }, { "epoch": 0.8254557129097849, "grad_norm": 0.21472719311714172, "learning_rate": 4.497592951736124e-06, "loss": 0.1065, "step": 46280 }, { "epoch": 0.8254735490314986, "grad_norm": 0.2855726480484009, "learning_rate": 4.496702318052498e-06, "loss": 0.1256, "step": 46281 }, { "epoch": 0.8254913851532123, "grad_norm": 0.2390160858631134, "learning_rate": 4.49581176384673e-06, "loss": 0.0907, "step": 46282 }, { "epoch": 0.825509221274926, "grad_norm": 0.2769002616405487, "learning_rate": 4.494921289122281e-06, "loss": 0.0521, "step": 46283 }, { "epoch": 0.8255270573966397, "grad_norm": 0.3119021952152252, "learning_rate": 4.494030893882603e-06, "loss": 0.1294, "step": 46284 }, { "epoch": 0.8255448935183534, "grad_norm": 0.27752071619033813, "learning_rate": 4.493140578131136e-06, "loss": 0.098, "step": 46285 }, { "epoch": 0.825562729640067, "grad_norm": 0.2968054711818695, "learning_rate": 4.492250341871343e-06, "loss": 0.1231, "step": 46286 }, { "epoch": 0.8255805657617807, "grad_norm": 0.259860634803772, "learning_rate": 4.491360185106666e-06, "loss": 0.1023, "step": 46287 }, { "epoch": 0.8255984018834944, "grad_norm": 0.3266686201095581, "learning_rate": 4.490470107840566e-06, "loss": 0.0819, "step": 46288 }, { "epoch": 0.8256162380052081, "grad_norm": 0.24233923852443695, "learning_rate": 4.4895801100764915e-06, "loss": 0.1191, "step": 46289 }, { "epoch": 0.8256340741269218, "grad_norm": 0.24481399357318878, "learning_rate": 4.4886901918178844e-06, "loss": 0.0793, "step": 46290 }, { "epoch": 0.8256519102486355, "grad_norm": 0.26234865188598633, "learning_rate": 4.487800353068192e-06, "loss": 0.1013, "step": 46291 }, { "epoch": 0.8256697463703492, "grad_norm": 0.21920229494571686, "learning_rate": 4.486910593830878e-06, "loss": 0.1008, "step": 46292 }, { "epoch": 0.825687582492063, "grad_norm": 0.215499609708786, "learning_rate": 4.486020914109384e-06, "loss": 0.1011, "step": 46293 }, { "epoch": 0.8257054186137767, "grad_norm": 0.31064313650131226, "learning_rate": 4.485131313907156e-06, "loss": 0.0958, "step": 46294 }, { "epoch": 0.8257232547354904, "grad_norm": 0.2392728477716446, "learning_rate": 4.484241793227642e-06, "loss": 0.0961, "step": 46295 }, { "epoch": 0.825741090857204, "grad_norm": 0.37710610032081604, "learning_rate": 4.483352352074297e-06, "loss": 0.114, "step": 46296 }, { "epoch": 0.8257589269789177, "grad_norm": 0.2994675636291504, "learning_rate": 4.4824629904505644e-06, "loss": 0.0897, "step": 46297 }, { "epoch": 0.8257767631006314, "grad_norm": 0.2258623093366623, "learning_rate": 4.48157370835989e-06, "loss": 0.0714, "step": 46298 }, { "epoch": 0.8257945992223451, "grad_norm": 0.23060540854930878, "learning_rate": 4.480684505805718e-06, "loss": 0.106, "step": 46299 }, { "epoch": 0.8258124353440588, "grad_norm": 0.3615747392177582, "learning_rate": 4.479795382791508e-06, "loss": 0.102, "step": 46300 }, { "epoch": 0.8258302714657725, "grad_norm": 0.3808033764362335, "learning_rate": 4.478906339320691e-06, "loss": 0.1548, "step": 46301 }, { "epoch": 0.8258481075874862, "grad_norm": 0.2565228343009949, "learning_rate": 4.478017375396726e-06, "loss": 0.0784, "step": 46302 }, { "epoch": 0.8258659437091999, "grad_norm": 0.24148888885974884, "learning_rate": 4.477128491023055e-06, "loss": 0.0957, "step": 46303 }, { "epoch": 0.8258837798309135, "grad_norm": 0.24815578758716583, "learning_rate": 4.476239686203115e-06, "loss": 0.083, "step": 46304 }, { "epoch": 0.8259016159526272, "grad_norm": 0.22411227226257324, "learning_rate": 4.475350960940366e-06, "loss": 0.0841, "step": 46305 }, { "epoch": 0.8259194520743409, "grad_norm": 0.27995002269744873, "learning_rate": 4.474462315238246e-06, "loss": 0.1576, "step": 46306 }, { "epoch": 0.8259372881960546, "grad_norm": 0.25976744294166565, "learning_rate": 4.473573749100199e-06, "loss": 0.0941, "step": 46307 }, { "epoch": 0.8259551243177683, "grad_norm": 0.2552463114261627, "learning_rate": 4.472685262529664e-06, "loss": 0.097, "step": 46308 }, { "epoch": 0.825972960439482, "grad_norm": 0.6652151346206665, "learning_rate": 4.471796855530095e-06, "loss": 0.1407, "step": 46309 }, { "epoch": 0.8259907965611958, "grad_norm": 0.24483048915863037, "learning_rate": 4.470908528104933e-06, "loss": 0.0697, "step": 46310 }, { "epoch": 0.8260086326829095, "grad_norm": 0.34648269414901733, "learning_rate": 4.470020280257619e-06, "loss": 0.1634, "step": 46311 }, { "epoch": 0.8260264688046232, "grad_norm": 0.2337142676115036, "learning_rate": 4.469132111991592e-06, "loss": 0.0973, "step": 46312 }, { "epoch": 0.8260443049263368, "grad_norm": 0.2229405641555786, "learning_rate": 4.468244023310305e-06, "loss": 0.1279, "step": 46313 }, { "epoch": 0.8260621410480505, "grad_norm": 0.3589451014995575, "learning_rate": 4.467356014217192e-06, "loss": 0.134, "step": 46314 }, { "epoch": 0.8260799771697642, "grad_norm": 0.23785343766212463, "learning_rate": 4.466468084715702e-06, "loss": 0.0762, "step": 46315 }, { "epoch": 0.8260978132914779, "grad_norm": 0.29191070795059204, "learning_rate": 4.4655802348092775e-06, "loss": 0.1351, "step": 46316 }, { "epoch": 0.8261156494131916, "grad_norm": 0.2532224953174591, "learning_rate": 4.4646924645013465e-06, "loss": 0.0793, "step": 46317 }, { "epoch": 0.8261334855349053, "grad_norm": 0.524084210395813, "learning_rate": 4.463804773795369e-06, "loss": 0.1477, "step": 46318 }, { "epoch": 0.826151321656619, "grad_norm": 0.21385720372200012, "learning_rate": 4.4629171626947765e-06, "loss": 0.1053, "step": 46319 }, { "epoch": 0.8261691577783327, "grad_norm": 0.35684239864349365, "learning_rate": 4.46202963120301e-06, "loss": 0.1692, "step": 46320 }, { "epoch": 0.8261869939000464, "grad_norm": 0.24282434582710266, "learning_rate": 4.461142179323502e-06, "loss": 0.0716, "step": 46321 }, { "epoch": 0.82620483002176, "grad_norm": 0.20248641073703766, "learning_rate": 4.46025480705971e-06, "loss": 0.0557, "step": 46322 }, { "epoch": 0.8262226661434737, "grad_norm": 0.2696966528892517, "learning_rate": 4.459367514415064e-06, "loss": 0.0908, "step": 46323 }, { "epoch": 0.8262405022651874, "grad_norm": 0.20231105387210846, "learning_rate": 4.458480301393003e-06, "loss": 0.0957, "step": 46324 }, { "epoch": 0.8262583383869011, "grad_norm": 0.3420659303665161, "learning_rate": 4.457593167996968e-06, "loss": 0.0811, "step": 46325 }, { "epoch": 0.8262761745086148, "grad_norm": 0.34777867794036865, "learning_rate": 4.456706114230391e-06, "loss": 0.1331, "step": 46326 }, { "epoch": 0.8262940106303286, "grad_norm": 0.2817397117614746, "learning_rate": 4.455819140096723e-06, "loss": 0.1117, "step": 46327 }, { "epoch": 0.8263118467520423, "grad_norm": 0.3114819824695587, "learning_rate": 4.454932245599389e-06, "loss": 0.0879, "step": 46328 }, { "epoch": 0.826329682873756, "grad_norm": 0.2996848523616791, "learning_rate": 4.454045430741838e-06, "loss": 0.129, "step": 46329 }, { "epoch": 0.8263475189954697, "grad_norm": 0.2899375557899475, "learning_rate": 4.453158695527499e-06, "loss": 0.1045, "step": 46330 }, { "epoch": 0.8263653551171833, "grad_norm": 0.22163324058055878, "learning_rate": 4.452272039959821e-06, "loss": 0.0987, "step": 46331 }, { "epoch": 0.826383191238897, "grad_norm": 0.31059330701828003, "learning_rate": 4.451385464042229e-06, "loss": 0.1088, "step": 46332 }, { "epoch": 0.8264010273606107, "grad_norm": 0.3378532826900482, "learning_rate": 4.4504989677781685e-06, "loss": 0.1295, "step": 46333 }, { "epoch": 0.8264188634823244, "grad_norm": 0.28912967443466187, "learning_rate": 4.449612551171064e-06, "loss": 0.0594, "step": 46334 }, { "epoch": 0.8264366996040381, "grad_norm": 0.2329649031162262, "learning_rate": 4.448726214224366e-06, "loss": 0.1071, "step": 46335 }, { "epoch": 0.8264545357257518, "grad_norm": 0.2518095076084137, "learning_rate": 4.447839956941502e-06, "loss": 0.0512, "step": 46336 }, { "epoch": 0.8264723718474655, "grad_norm": 0.27875518798828125, "learning_rate": 4.44695377932591e-06, "loss": 0.1064, "step": 46337 }, { "epoch": 0.8264902079691792, "grad_norm": 0.2622019648551941, "learning_rate": 4.446067681381022e-06, "loss": 0.0838, "step": 46338 }, { "epoch": 0.8265080440908928, "grad_norm": 0.318490207195282, "learning_rate": 4.4451816631102704e-06, "loss": 0.1198, "step": 46339 }, { "epoch": 0.8265258802126065, "grad_norm": 0.21269266307353973, "learning_rate": 4.4442957245171e-06, "loss": 0.1194, "step": 46340 }, { "epoch": 0.8265437163343202, "grad_norm": 0.23801380395889282, "learning_rate": 4.443409865604933e-06, "loss": 0.1064, "step": 46341 }, { "epoch": 0.8265615524560339, "grad_norm": 0.32886651158332825, "learning_rate": 4.442524086377217e-06, "loss": 0.106, "step": 46342 }, { "epoch": 0.8265793885777477, "grad_norm": 0.293696790933609, "learning_rate": 4.441638386837368e-06, "loss": 0.125, "step": 46343 }, { "epoch": 0.8265972246994614, "grad_norm": 0.32515859603881836, "learning_rate": 4.44075276698884e-06, "loss": 0.1691, "step": 46344 }, { "epoch": 0.8266150608211751, "grad_norm": 0.2994956970214844, "learning_rate": 4.439867226835051e-06, "loss": 0.1323, "step": 46345 }, { "epoch": 0.8266328969428888, "grad_norm": 0.2532998323440552, "learning_rate": 4.438981766379441e-06, "loss": 0.0928, "step": 46346 }, { "epoch": 0.8266507330646025, "grad_norm": 0.2636529803276062, "learning_rate": 4.438096385625431e-06, "loss": 0.0928, "step": 46347 }, { "epoch": 0.8266685691863161, "grad_norm": 0.21411831676959991, "learning_rate": 4.437211084576467e-06, "loss": 0.0853, "step": 46348 }, { "epoch": 0.8266864053080298, "grad_norm": 0.2518365979194641, "learning_rate": 4.436325863235976e-06, "loss": 0.0997, "step": 46349 }, { "epoch": 0.8267042414297435, "grad_norm": 0.24956312775611877, "learning_rate": 4.435440721607389e-06, "loss": 0.1168, "step": 46350 }, { "epoch": 0.8267220775514572, "grad_norm": 0.34560057520866394, "learning_rate": 4.434555659694137e-06, "loss": 0.1014, "step": 46351 }, { "epoch": 0.8267399136731709, "grad_norm": 0.3348969519138336, "learning_rate": 4.433670677499643e-06, "loss": 0.1598, "step": 46352 }, { "epoch": 0.8267577497948846, "grad_norm": 0.25283145904541016, "learning_rate": 4.4327857750273514e-06, "loss": 0.1496, "step": 46353 }, { "epoch": 0.8267755859165983, "grad_norm": 0.30321258306503296, "learning_rate": 4.431900952280685e-06, "loss": 0.1309, "step": 46354 }, { "epoch": 0.826793422038312, "grad_norm": 0.3203035891056061, "learning_rate": 4.43101620926307e-06, "loss": 0.1259, "step": 46355 }, { "epoch": 0.8268112581600257, "grad_norm": 0.26212987303733826, "learning_rate": 4.430131545977945e-06, "loss": 0.1284, "step": 46356 }, { "epoch": 0.8268290942817393, "grad_norm": 0.25806617736816406, "learning_rate": 4.429246962428729e-06, "loss": 0.0977, "step": 46357 }, { "epoch": 0.826846930403453, "grad_norm": 0.22036178410053253, "learning_rate": 4.428362458618865e-06, "loss": 0.1076, "step": 46358 }, { "epoch": 0.8268647665251667, "grad_norm": 0.31598806381225586, "learning_rate": 4.427478034551771e-06, "loss": 0.1135, "step": 46359 }, { "epoch": 0.8268826026468805, "grad_norm": 0.25243866443634033, "learning_rate": 4.4265936902308706e-06, "loss": 0.1123, "step": 46360 }, { "epoch": 0.8269004387685942, "grad_norm": 0.34924691915512085, "learning_rate": 4.425709425659608e-06, "loss": 0.1209, "step": 46361 }, { "epoch": 0.8269182748903079, "grad_norm": 0.293104350566864, "learning_rate": 4.424825240841399e-06, "loss": 0.1347, "step": 46362 }, { "epoch": 0.8269361110120216, "grad_norm": 0.2945515513420105, "learning_rate": 4.423941135779674e-06, "loss": 0.1256, "step": 46363 }, { "epoch": 0.8269539471337353, "grad_norm": 0.3119187355041504, "learning_rate": 4.423057110477863e-06, "loss": 0.0517, "step": 46364 }, { "epoch": 0.826971783255449, "grad_norm": 0.3108843266963959, "learning_rate": 4.42217316493938e-06, "loss": 0.1516, "step": 46365 }, { "epoch": 0.8269896193771626, "grad_norm": 0.26875820755958557, "learning_rate": 4.421289299167669e-06, "loss": 0.1423, "step": 46366 }, { "epoch": 0.8270074554988763, "grad_norm": 0.2618597745895386, "learning_rate": 4.420405513166148e-06, "loss": 0.1166, "step": 46367 }, { "epoch": 0.82702529162059, "grad_norm": 0.2817710041999817, "learning_rate": 4.4195218069382375e-06, "loss": 0.1372, "step": 46368 }, { "epoch": 0.8270431277423037, "grad_norm": 0.22582118213176727, "learning_rate": 4.418638180487375e-06, "loss": 0.0818, "step": 46369 }, { "epoch": 0.8270609638640174, "grad_norm": 0.25454017519950867, "learning_rate": 4.417754633816973e-06, "loss": 0.1304, "step": 46370 }, { "epoch": 0.8270787999857311, "grad_norm": 0.1895970106124878, "learning_rate": 4.416871166930467e-06, "loss": 0.1234, "step": 46371 }, { "epoch": 0.8270966361074448, "grad_norm": 0.2849210798740387, "learning_rate": 4.415987779831279e-06, "loss": 0.1014, "step": 46372 }, { "epoch": 0.8271144722291585, "grad_norm": 0.36823350191116333, "learning_rate": 4.415104472522827e-06, "loss": 0.1161, "step": 46373 }, { "epoch": 0.8271323083508721, "grad_norm": 0.2871535122394562, "learning_rate": 4.414221245008546e-06, "loss": 0.0987, "step": 46374 }, { "epoch": 0.8271501444725858, "grad_norm": 0.23042576014995575, "learning_rate": 4.413338097291852e-06, "loss": 0.0877, "step": 46375 }, { "epoch": 0.8271679805942995, "grad_norm": 0.2570904791355133, "learning_rate": 4.412455029376172e-06, "loss": 0.1335, "step": 46376 }, { "epoch": 0.8271858167160133, "grad_norm": 0.2820962071418762, "learning_rate": 4.411572041264925e-06, "loss": 0.1145, "step": 46377 }, { "epoch": 0.827203652837727, "grad_norm": 0.15524698793888092, "learning_rate": 4.410689132961529e-06, "loss": 0.0478, "step": 46378 }, { "epoch": 0.8272214889594407, "grad_norm": 0.23548926413059235, "learning_rate": 4.409806304469422e-06, "loss": 0.115, "step": 46379 }, { "epoch": 0.8272393250811544, "grad_norm": 0.3157966136932373, "learning_rate": 4.408923555792016e-06, "loss": 0.1111, "step": 46380 }, { "epoch": 0.8272571612028681, "grad_norm": 0.269752562046051, "learning_rate": 4.408040886932735e-06, "loss": 0.1242, "step": 46381 }, { "epoch": 0.8272749973245818, "grad_norm": 0.3442416489124298, "learning_rate": 4.4071582978949925e-06, "loss": 0.1069, "step": 46382 }, { "epoch": 0.8272928334462955, "grad_norm": 0.27273133397102356, "learning_rate": 4.4062757886822235e-06, "loss": 0.1114, "step": 46383 }, { "epoch": 0.8273106695680091, "grad_norm": 0.255541056394577, "learning_rate": 4.405393359297835e-06, "loss": 0.1315, "step": 46384 }, { "epoch": 0.8273285056897228, "grad_norm": 0.3046436011791229, "learning_rate": 4.404511009745263e-06, "loss": 0.0974, "step": 46385 }, { "epoch": 0.8273463418114365, "grad_norm": 0.33289432525634766, "learning_rate": 4.403628740027912e-06, "loss": 0.1784, "step": 46386 }, { "epoch": 0.8273641779331502, "grad_norm": 0.4371652603149414, "learning_rate": 4.4027465501492174e-06, "loss": 0.1443, "step": 46387 }, { "epoch": 0.8273820140548639, "grad_norm": 0.27950721979141235, "learning_rate": 4.401864440112591e-06, "loss": 0.109, "step": 46388 }, { "epoch": 0.8273998501765776, "grad_norm": 0.31119170784950256, "learning_rate": 4.400982409921453e-06, "loss": 0.1158, "step": 46389 }, { "epoch": 0.8274176862982913, "grad_norm": 0.29666703939437866, "learning_rate": 4.40010045957922e-06, "loss": 0.1124, "step": 46390 }, { "epoch": 0.827435522420005, "grad_norm": 0.35534292459487915, "learning_rate": 4.3992185890893075e-06, "loss": 0.1291, "step": 46391 }, { "epoch": 0.8274533585417186, "grad_norm": 0.2469429224729538, "learning_rate": 4.398336798455147e-06, "loss": 0.1092, "step": 46392 }, { "epoch": 0.8274711946634323, "grad_norm": 0.34191715717315674, "learning_rate": 4.397455087680147e-06, "loss": 0.1406, "step": 46393 }, { "epoch": 0.8274890307851461, "grad_norm": 0.5258873701095581, "learning_rate": 4.396573456767725e-06, "loss": 0.1693, "step": 46394 }, { "epoch": 0.8275068669068598, "grad_norm": 0.257668137550354, "learning_rate": 4.395691905721297e-06, "loss": 0.095, "step": 46395 }, { "epoch": 0.8275247030285735, "grad_norm": 0.33640992641448975, "learning_rate": 4.394810434544288e-06, "loss": 0.0933, "step": 46396 }, { "epoch": 0.8275425391502872, "grad_norm": 0.21505111455917358, "learning_rate": 4.393929043240105e-06, "loss": 0.1331, "step": 46397 }, { "epoch": 0.8275603752720009, "grad_norm": 0.23586580157279968, "learning_rate": 4.393047731812177e-06, "loss": 0.1049, "step": 46398 }, { "epoch": 0.8275782113937146, "grad_norm": 0.2532597482204437, "learning_rate": 4.392166500263906e-06, "loss": 0.1283, "step": 46399 }, { "epoch": 0.8275960475154283, "grad_norm": 0.2986949682235718, "learning_rate": 4.391285348598722e-06, "loss": 0.1165, "step": 46400 }, { "epoch": 0.827613883637142, "grad_norm": 0.23167075216770172, "learning_rate": 4.390404276820037e-06, "loss": 0.1167, "step": 46401 }, { "epoch": 0.8276317197588556, "grad_norm": 0.2582891881465912, "learning_rate": 4.3895232849312605e-06, "loss": 0.1255, "step": 46402 }, { "epoch": 0.8276495558805693, "grad_norm": 0.2820345461368561, "learning_rate": 4.388642372935811e-06, "loss": 0.1101, "step": 46403 }, { "epoch": 0.827667392002283, "grad_norm": 0.2451031506061554, "learning_rate": 4.387761540837096e-06, "loss": 0.1137, "step": 46404 }, { "epoch": 0.8276852281239967, "grad_norm": 0.30854934453964233, "learning_rate": 4.386880788638542e-06, "loss": 0.1129, "step": 46405 }, { "epoch": 0.8277030642457104, "grad_norm": 0.30441048741340637, "learning_rate": 4.386000116343558e-06, "loss": 0.1149, "step": 46406 }, { "epoch": 0.8277209003674241, "grad_norm": 0.280292272567749, "learning_rate": 4.3851195239555586e-06, "loss": 0.1061, "step": 46407 }, { "epoch": 0.8277387364891378, "grad_norm": 0.35672205686569214, "learning_rate": 4.384239011477947e-06, "loss": 0.1367, "step": 46408 }, { "epoch": 0.8277565726108514, "grad_norm": 0.23385368287563324, "learning_rate": 4.383358578914154e-06, "loss": 0.0763, "step": 46409 }, { "epoch": 0.8277744087325651, "grad_norm": 0.2788551449775696, "learning_rate": 4.382478226267583e-06, "loss": 0.0703, "step": 46410 }, { "epoch": 0.8277922448542789, "grad_norm": 0.29580509662628174, "learning_rate": 4.381597953541644e-06, "loss": 0.1328, "step": 46411 }, { "epoch": 0.8278100809759926, "grad_norm": 0.269625186920166, "learning_rate": 4.38071776073975e-06, "loss": 0.1018, "step": 46412 }, { "epoch": 0.8278279170977063, "grad_norm": 0.2543848752975464, "learning_rate": 4.379837647865323e-06, "loss": 0.162, "step": 46413 }, { "epoch": 0.82784575321942, "grad_norm": 0.21752704679965973, "learning_rate": 4.378957614921766e-06, "loss": 0.0679, "step": 46414 }, { "epoch": 0.8278635893411337, "grad_norm": 0.2891313433647156, "learning_rate": 4.3780776619124915e-06, "loss": 0.0784, "step": 46415 }, { "epoch": 0.8278814254628474, "grad_norm": 0.23833556473255157, "learning_rate": 4.377197788840912e-06, "loss": 0.1365, "step": 46416 }, { "epoch": 0.8278992615845611, "grad_norm": 0.2756827473640442, "learning_rate": 4.37631799571043e-06, "loss": 0.131, "step": 46417 }, { "epoch": 0.8279170977062748, "grad_norm": 0.1818789839744568, "learning_rate": 4.375438282524469e-06, "loss": 0.0905, "step": 46418 }, { "epoch": 0.8279349338279884, "grad_norm": 0.3355312645435333, "learning_rate": 4.374558649286431e-06, "loss": 0.1281, "step": 46419 }, { "epoch": 0.8279527699497021, "grad_norm": 0.29590240120887756, "learning_rate": 4.37367909599973e-06, "loss": 0.1002, "step": 46420 }, { "epoch": 0.8279706060714158, "grad_norm": 0.29002490639686584, "learning_rate": 4.372799622667764e-06, "loss": 0.1004, "step": 46421 }, { "epoch": 0.8279884421931295, "grad_norm": 0.2694943845272064, "learning_rate": 4.37192022929396e-06, "loss": 0.1225, "step": 46422 }, { "epoch": 0.8280062783148432, "grad_norm": 0.3024190664291382, "learning_rate": 4.371040915881716e-06, "loss": 0.0978, "step": 46423 }, { "epoch": 0.8280241144365569, "grad_norm": 0.30632027983665466, "learning_rate": 4.370161682434437e-06, "loss": 0.1643, "step": 46424 }, { "epoch": 0.8280419505582706, "grad_norm": 0.31171733140945435, "learning_rate": 4.369282528955543e-06, "loss": 0.1489, "step": 46425 }, { "epoch": 0.8280597866799843, "grad_norm": 0.31796911358833313, "learning_rate": 4.368403455448428e-06, "loss": 0.1015, "step": 46426 }, { "epoch": 0.8280776228016979, "grad_norm": 0.282086044549942, "learning_rate": 4.367524461916514e-06, "loss": 0.1115, "step": 46427 }, { "epoch": 0.8280954589234117, "grad_norm": 0.3228877782821655, "learning_rate": 4.366645548363202e-06, "loss": 0.1144, "step": 46428 }, { "epoch": 0.8281132950451254, "grad_norm": 0.5565622448921204, "learning_rate": 4.3657667147918995e-06, "loss": 0.0955, "step": 46429 }, { "epoch": 0.8281311311668391, "grad_norm": 0.2789391875267029, "learning_rate": 4.3648879612060045e-06, "loss": 0.0448, "step": 46430 }, { "epoch": 0.8281489672885528, "grad_norm": 0.26229822635650635, "learning_rate": 4.364009287608936e-06, "loss": 0.0762, "step": 46431 }, { "epoch": 0.8281668034102665, "grad_norm": 0.2685633599758148, "learning_rate": 4.3631306940040975e-06, "loss": 0.0933, "step": 46432 }, { "epoch": 0.8281846395319802, "grad_norm": 0.2916639745235443, "learning_rate": 4.3622521803948905e-06, "loss": 0.1458, "step": 46433 }, { "epoch": 0.8282024756536939, "grad_norm": 0.21343868970870972, "learning_rate": 4.3613737467847164e-06, "loss": 0.098, "step": 46434 }, { "epoch": 0.8282203117754076, "grad_norm": 0.1641271561384201, "learning_rate": 4.3604953931769915e-06, "loss": 0.0167, "step": 46435 }, { "epoch": 0.8282381478971212, "grad_norm": 0.29804444313049316, "learning_rate": 4.3596171195751156e-06, "loss": 0.1086, "step": 46436 }, { "epoch": 0.8282559840188349, "grad_norm": 0.23752933740615845, "learning_rate": 4.358738925982495e-06, "loss": 0.1133, "step": 46437 }, { "epoch": 0.8282738201405486, "grad_norm": 0.2890227138996124, "learning_rate": 4.35786081240252e-06, "loss": 0.093, "step": 46438 }, { "epoch": 0.8282916562622623, "grad_norm": 0.22662892937660217, "learning_rate": 4.356982778838612e-06, "loss": 0.0973, "step": 46439 }, { "epoch": 0.828309492383976, "grad_norm": 0.23539596796035767, "learning_rate": 4.356104825294172e-06, "loss": 0.1063, "step": 46440 }, { "epoch": 0.8283273285056897, "grad_norm": 0.25857070088386536, "learning_rate": 4.355226951772598e-06, "loss": 0.084, "step": 46441 }, { "epoch": 0.8283451646274034, "grad_norm": 0.26156559586524963, "learning_rate": 4.354349158277296e-06, "loss": 0.1011, "step": 46442 }, { "epoch": 0.8283630007491171, "grad_norm": 0.2422717958688736, "learning_rate": 4.3534714448116635e-06, "loss": 0.1304, "step": 46443 }, { "epoch": 0.8283808368708309, "grad_norm": 0.23752345144748688, "learning_rate": 4.35259381137911e-06, "loss": 0.0977, "step": 46444 }, { "epoch": 0.8283986729925445, "grad_norm": 0.33036893606185913, "learning_rate": 4.351716257983035e-06, "loss": 0.1468, "step": 46445 }, { "epoch": 0.8284165091142582, "grad_norm": 0.29562899470329285, "learning_rate": 4.3508387846268425e-06, "loss": 0.0903, "step": 46446 }, { "epoch": 0.8284343452359719, "grad_norm": 0.3533494174480438, "learning_rate": 4.349961391313922e-06, "loss": 0.1626, "step": 46447 }, { "epoch": 0.8284521813576856, "grad_norm": 0.24387799203395844, "learning_rate": 4.3490840780476884e-06, "loss": 0.1007, "step": 46448 }, { "epoch": 0.8284700174793993, "grad_norm": 0.3472498655319214, "learning_rate": 4.34820684483154e-06, "loss": 0.1368, "step": 46449 }, { "epoch": 0.828487853601113, "grad_norm": 0.21397210657596588, "learning_rate": 4.347329691668875e-06, "loss": 0.1037, "step": 46450 }, { "epoch": 0.8285056897228267, "grad_norm": 0.23745764791965485, "learning_rate": 4.346452618563085e-06, "loss": 0.1307, "step": 46451 }, { "epoch": 0.8285235258445404, "grad_norm": 0.4360826909542084, "learning_rate": 4.345575625517584e-06, "loss": 0.115, "step": 46452 }, { "epoch": 0.828541361966254, "grad_norm": 0.22171275317668915, "learning_rate": 4.344698712535761e-06, "loss": 0.0879, "step": 46453 }, { "epoch": 0.8285591980879677, "grad_norm": 0.21346944570541382, "learning_rate": 4.343821879621027e-06, "loss": 0.1111, "step": 46454 }, { "epoch": 0.8285770342096814, "grad_norm": 0.25692903995513916, "learning_rate": 4.342945126776773e-06, "loss": 0.1122, "step": 46455 }, { "epoch": 0.8285948703313951, "grad_norm": 0.3024471402168274, "learning_rate": 4.34206845400639e-06, "loss": 0.1272, "step": 46456 }, { "epoch": 0.8286127064531088, "grad_norm": 0.25844213366508484, "learning_rate": 4.341191861313293e-06, "loss": 0.1124, "step": 46457 }, { "epoch": 0.8286305425748225, "grad_norm": 0.3144952356815338, "learning_rate": 4.3403153487008714e-06, "loss": 0.0786, "step": 46458 }, { "epoch": 0.8286483786965362, "grad_norm": 0.20591102540493011, "learning_rate": 4.339438916172525e-06, "loss": 0.1376, "step": 46459 }, { "epoch": 0.8286662148182499, "grad_norm": 0.2556131184101105, "learning_rate": 4.338562563731641e-06, "loss": 0.0854, "step": 46460 }, { "epoch": 0.8286840509399637, "grad_norm": 0.21406857669353485, "learning_rate": 4.337686291381629e-06, "loss": 0.133, "step": 46461 }, { "epoch": 0.8287018870616774, "grad_norm": 0.23430363833904266, "learning_rate": 4.336810099125884e-06, "loss": 0.0808, "step": 46462 }, { "epoch": 0.828719723183391, "grad_norm": 0.24600858986377716, "learning_rate": 4.335933986967799e-06, "loss": 0.1123, "step": 46463 }, { "epoch": 0.8287375593051047, "grad_norm": 0.40453529357910156, "learning_rate": 4.335057954910768e-06, "loss": 0.1743, "step": 46464 }, { "epoch": 0.8287553954268184, "grad_norm": 0.24288958311080933, "learning_rate": 4.334182002958192e-06, "loss": 0.0964, "step": 46465 }, { "epoch": 0.8287732315485321, "grad_norm": 0.26050734519958496, "learning_rate": 4.33330613111346e-06, "loss": 0.133, "step": 46466 }, { "epoch": 0.8287910676702458, "grad_norm": 0.3225926458835602, "learning_rate": 4.332430339379978e-06, "loss": 0.1277, "step": 46467 }, { "epoch": 0.8288089037919595, "grad_norm": 0.268846720457077, "learning_rate": 4.331554627761134e-06, "loss": 0.116, "step": 46468 }, { "epoch": 0.8288267399136732, "grad_norm": 0.31195732951164246, "learning_rate": 4.330678996260315e-06, "loss": 0.1482, "step": 46469 }, { "epoch": 0.8288445760353869, "grad_norm": 0.24562333524227142, "learning_rate": 4.329803444880931e-06, "loss": 0.1483, "step": 46470 }, { "epoch": 0.8288624121571005, "grad_norm": 0.19142258167266846, "learning_rate": 4.328927973626368e-06, "loss": 0.0765, "step": 46471 }, { "epoch": 0.8288802482788142, "grad_norm": 0.2689981460571289, "learning_rate": 4.32805258250002e-06, "loss": 0.1365, "step": 46472 }, { "epoch": 0.8288980844005279, "grad_norm": 0.23739860951900482, "learning_rate": 4.327177271505273e-06, "loss": 0.1339, "step": 46473 }, { "epoch": 0.8289159205222416, "grad_norm": 0.4686424732208252, "learning_rate": 4.326302040645533e-06, "loss": 0.1034, "step": 46474 }, { "epoch": 0.8289337566439553, "grad_norm": 0.39115047454833984, "learning_rate": 4.325426889924186e-06, "loss": 0.1397, "step": 46475 }, { "epoch": 0.828951592765669, "grad_norm": 0.279804527759552, "learning_rate": 4.324551819344627e-06, "loss": 0.1332, "step": 46476 }, { "epoch": 0.8289694288873827, "grad_norm": 0.22898860275745392, "learning_rate": 4.323676828910239e-06, "loss": 0.0547, "step": 46477 }, { "epoch": 0.8289872650090965, "grad_norm": 0.36812683939933777, "learning_rate": 4.3228019186244275e-06, "loss": 0.1859, "step": 46478 }, { "epoch": 0.8290051011308102, "grad_norm": 0.437730073928833, "learning_rate": 4.321927088490577e-06, "loss": 0.1114, "step": 46479 }, { "epoch": 0.8290229372525239, "grad_norm": 0.23340614140033722, "learning_rate": 4.321052338512074e-06, "loss": 0.0591, "step": 46480 }, { "epoch": 0.8290407733742375, "grad_norm": 0.19981735944747925, "learning_rate": 4.320177668692321e-06, "loss": 0.1089, "step": 46481 }, { "epoch": 0.8290586094959512, "grad_norm": 0.27109524607658386, "learning_rate": 4.319303079034695e-06, "loss": 0.0906, "step": 46482 }, { "epoch": 0.8290764456176649, "grad_norm": 0.3076198995113373, "learning_rate": 4.318428569542599e-06, "loss": 0.0648, "step": 46483 }, { "epoch": 0.8290942817393786, "grad_norm": 0.3023016154766083, "learning_rate": 4.3175541402194155e-06, "loss": 0.1178, "step": 46484 }, { "epoch": 0.8291121178610923, "grad_norm": 0.3083108365535736, "learning_rate": 4.31667979106854e-06, "loss": 0.1241, "step": 46485 }, { "epoch": 0.829129953982806, "grad_norm": 0.1969596892595291, "learning_rate": 4.315805522093347e-06, "loss": 0.0916, "step": 46486 }, { "epoch": 0.8291477901045197, "grad_norm": 0.231434166431427, "learning_rate": 4.3149313332972425e-06, "loss": 0.0824, "step": 46487 }, { "epoch": 0.8291656262262334, "grad_norm": 0.22454939782619476, "learning_rate": 4.3140572246836095e-06, "loss": 0.0892, "step": 46488 }, { "epoch": 0.829183462347947, "grad_norm": 0.257914274930954, "learning_rate": 4.313183196255838e-06, "loss": 0.0983, "step": 46489 }, { "epoch": 0.8292012984696607, "grad_norm": 0.41265565156936646, "learning_rate": 4.312309248017305e-06, "loss": 0.1369, "step": 46490 }, { "epoch": 0.8292191345913744, "grad_norm": 0.18557557463645935, "learning_rate": 4.3114353799714126e-06, "loss": 0.0868, "step": 46491 }, { "epoch": 0.8292369707130881, "grad_norm": 0.2766319513320923, "learning_rate": 4.310561592121539e-06, "loss": 0.1607, "step": 46492 }, { "epoch": 0.8292548068348018, "grad_norm": 0.308193176984787, "learning_rate": 4.309687884471081e-06, "loss": 0.0952, "step": 46493 }, { "epoch": 0.8292726429565155, "grad_norm": 0.2726615369319916, "learning_rate": 4.308814257023408e-06, "loss": 0.2323, "step": 46494 }, { "epoch": 0.8292904790782293, "grad_norm": 0.4430800974369049, "learning_rate": 4.307940709781918e-06, "loss": 0.1004, "step": 46495 }, { "epoch": 0.829308315199943, "grad_norm": 0.2686435878276825, "learning_rate": 4.307067242750007e-06, "loss": 0.1287, "step": 46496 }, { "epoch": 0.8293261513216567, "grad_norm": 0.22180284559726715, "learning_rate": 4.306193855931046e-06, "loss": 0.1109, "step": 46497 }, { "epoch": 0.8293439874433703, "grad_norm": 0.2933272123336792, "learning_rate": 4.305320549328426e-06, "loss": 0.1394, "step": 46498 }, { "epoch": 0.829361823565084, "grad_norm": 0.2671579122543335, "learning_rate": 4.304447322945526e-06, "loss": 0.1148, "step": 46499 }, { "epoch": 0.8293796596867977, "grad_norm": 0.228309765458107, "learning_rate": 4.303574176785741e-06, "loss": 0.0652, "step": 46500 }, { "epoch": 0.8293974958085114, "grad_norm": 0.2643977999687195, "learning_rate": 4.302701110852453e-06, "loss": 0.1005, "step": 46501 }, { "epoch": 0.8294153319302251, "grad_norm": 0.2623671293258667, "learning_rate": 4.301828125149043e-06, "loss": 0.0935, "step": 46502 }, { "epoch": 0.8294331680519388, "grad_norm": 0.2562272250652313, "learning_rate": 4.3009552196788896e-06, "loss": 0.1088, "step": 46503 }, { "epoch": 0.8294510041736525, "grad_norm": 0.3042919337749481, "learning_rate": 4.30008239444539e-06, "loss": 0.1359, "step": 46504 }, { "epoch": 0.8294688402953662, "grad_norm": 0.25932884216308594, "learning_rate": 4.299209649451918e-06, "loss": 0.1304, "step": 46505 }, { "epoch": 0.8294866764170798, "grad_norm": 0.18472617864608765, "learning_rate": 4.298336984701862e-06, "loss": 0.0747, "step": 46506 }, { "epoch": 0.8295045125387935, "grad_norm": 0.2599087953567505, "learning_rate": 4.297464400198595e-06, "loss": 0.1407, "step": 46507 }, { "epoch": 0.8295223486605072, "grad_norm": 0.24152512848377228, "learning_rate": 4.296591895945512e-06, "loss": 0.1273, "step": 46508 }, { "epoch": 0.8295401847822209, "grad_norm": 0.40741512179374695, "learning_rate": 4.295719471945983e-06, "loss": 0.1459, "step": 46509 }, { "epoch": 0.8295580209039346, "grad_norm": 0.19509044289588928, "learning_rate": 4.2948471282034054e-06, "loss": 0.1089, "step": 46510 }, { "epoch": 0.8295758570256483, "grad_norm": 0.23058395087718964, "learning_rate": 4.293974864721148e-06, "loss": 0.079, "step": 46511 }, { "epoch": 0.8295936931473621, "grad_norm": 0.26388972997665405, "learning_rate": 4.293102681502592e-06, "loss": 0.1081, "step": 46512 }, { "epoch": 0.8296115292690758, "grad_norm": 0.28935322165489197, "learning_rate": 4.292230578551126e-06, "loss": 0.1199, "step": 46513 }, { "epoch": 0.8296293653907895, "grad_norm": 0.32966896891593933, "learning_rate": 4.291358555870129e-06, "loss": 0.1422, "step": 46514 }, { "epoch": 0.8296472015125032, "grad_norm": 0.2210475355386734, "learning_rate": 4.290486613462977e-06, "loss": 0.0911, "step": 46515 }, { "epoch": 0.8296650376342168, "grad_norm": 0.23138806223869324, "learning_rate": 4.289614751333046e-06, "loss": 0.0849, "step": 46516 }, { "epoch": 0.8296828737559305, "grad_norm": 0.34146055579185486, "learning_rate": 4.288742969483727e-06, "loss": 0.1037, "step": 46517 }, { "epoch": 0.8297007098776442, "grad_norm": 0.29106923937797546, "learning_rate": 4.287871267918395e-06, "loss": 0.0816, "step": 46518 }, { "epoch": 0.8297185459993579, "grad_norm": 0.24652424454689026, "learning_rate": 4.286999646640428e-06, "loss": 0.1399, "step": 46519 }, { "epoch": 0.8297363821210716, "grad_norm": 0.38204681873321533, "learning_rate": 4.286128105653203e-06, "loss": 0.1478, "step": 46520 }, { "epoch": 0.8297542182427853, "grad_norm": 0.18774308264255524, "learning_rate": 4.2852566449600925e-06, "loss": 0.0624, "step": 46521 }, { "epoch": 0.829772054364499, "grad_norm": 0.19842013716697693, "learning_rate": 4.284385264564483e-06, "loss": 0.1161, "step": 46522 }, { "epoch": 0.8297898904862127, "grad_norm": 0.2144027203321457, "learning_rate": 4.283513964469759e-06, "loss": 0.1017, "step": 46523 }, { "epoch": 0.8298077266079263, "grad_norm": 0.24248646199703217, "learning_rate": 4.282642744679289e-06, "loss": 0.1182, "step": 46524 }, { "epoch": 0.82982556272964, "grad_norm": 0.2572486102581024, "learning_rate": 4.281771605196444e-06, "loss": 0.1021, "step": 46525 }, { "epoch": 0.8298433988513537, "grad_norm": 0.25692108273506165, "learning_rate": 4.280900546024616e-06, "loss": 0.1206, "step": 46526 }, { "epoch": 0.8298612349730674, "grad_norm": 0.19257889688014984, "learning_rate": 4.280029567167174e-06, "loss": 0.0551, "step": 46527 }, { "epoch": 0.8298790710947811, "grad_norm": 0.20198261737823486, "learning_rate": 4.279158668627492e-06, "loss": 0.1652, "step": 46528 }, { "epoch": 0.8298969072164949, "grad_norm": 0.19261956214904785, "learning_rate": 4.278287850408941e-06, "loss": 0.0934, "step": 46529 }, { "epoch": 0.8299147433382086, "grad_norm": 0.22037526965141296, "learning_rate": 4.2774171125149115e-06, "loss": 0.108, "step": 46530 }, { "epoch": 0.8299325794599223, "grad_norm": 0.22742339968681335, "learning_rate": 4.276546454948768e-06, "loss": 0.1072, "step": 46531 }, { "epoch": 0.829950415581636, "grad_norm": 0.22602617740631104, "learning_rate": 4.275675877713891e-06, "loss": 0.071, "step": 46532 }, { "epoch": 0.8299682517033496, "grad_norm": 0.3259783387184143, "learning_rate": 4.274805380813651e-06, "loss": 0.123, "step": 46533 }, { "epoch": 0.8299860878250633, "grad_norm": 0.2571578621864319, "learning_rate": 4.273934964251419e-06, "loss": 0.1206, "step": 46534 }, { "epoch": 0.830003923946777, "grad_norm": 0.3353583514690399, "learning_rate": 4.273064628030576e-06, "loss": 0.0905, "step": 46535 }, { "epoch": 0.8300217600684907, "grad_norm": 0.2520037591457367, "learning_rate": 4.272194372154492e-06, "loss": 0.1502, "step": 46536 }, { "epoch": 0.8300395961902044, "grad_norm": 0.2779551148414612, "learning_rate": 4.2713241966265445e-06, "loss": 0.1721, "step": 46537 }, { "epoch": 0.8300574323119181, "grad_norm": 0.2768584191799164, "learning_rate": 4.2704541014501e-06, "loss": 0.138, "step": 46538 }, { "epoch": 0.8300752684336318, "grad_norm": 0.25665348768234253, "learning_rate": 4.269584086628539e-06, "loss": 0.0784, "step": 46539 }, { "epoch": 0.8300931045553455, "grad_norm": 0.34828171133995056, "learning_rate": 4.2687141521652315e-06, "loss": 0.1183, "step": 46540 }, { "epoch": 0.8301109406770592, "grad_norm": 0.26388639211654663, "learning_rate": 4.267844298063547e-06, "loss": 0.1164, "step": 46541 }, { "epoch": 0.8301287767987728, "grad_norm": 0.29912909865379333, "learning_rate": 4.266974524326856e-06, "loss": 0.1079, "step": 46542 }, { "epoch": 0.8301466129204865, "grad_norm": 0.25899645686149597, "learning_rate": 4.2661048309585335e-06, "loss": 0.1096, "step": 46543 }, { "epoch": 0.8301644490422002, "grad_norm": 0.3737677037715912, "learning_rate": 4.2652352179619546e-06, "loss": 0.1594, "step": 46544 }, { "epoch": 0.830182285163914, "grad_norm": 0.3143066465854645, "learning_rate": 4.264365685340482e-06, "loss": 0.1373, "step": 46545 }, { "epoch": 0.8302001212856277, "grad_norm": 0.27195504307746887, "learning_rate": 4.2634962330974945e-06, "loss": 0.088, "step": 46546 }, { "epoch": 0.8302179574073414, "grad_norm": 0.31021612882614136, "learning_rate": 4.262626861236346e-06, "loss": 0.1611, "step": 46547 }, { "epoch": 0.8302357935290551, "grad_norm": 0.2511056661605835, "learning_rate": 4.261757569760427e-06, "loss": 0.1415, "step": 46548 }, { "epoch": 0.8302536296507688, "grad_norm": 0.3210453987121582, "learning_rate": 4.260888358673093e-06, "loss": 0.137, "step": 46549 }, { "epoch": 0.8302714657724825, "grad_norm": 0.29189273715019226, "learning_rate": 4.260019227977724e-06, "loss": 0.1781, "step": 46550 }, { "epoch": 0.8302893018941961, "grad_norm": 0.2712114453315735, "learning_rate": 4.259150177677679e-06, "loss": 0.0818, "step": 46551 }, { "epoch": 0.8303071380159098, "grad_norm": 0.22185178101062775, "learning_rate": 4.2582812077763375e-06, "loss": 0.1278, "step": 46552 }, { "epoch": 0.8303249741376235, "grad_norm": 0.2672707140445709, "learning_rate": 4.2574123182770595e-06, "loss": 0.112, "step": 46553 }, { "epoch": 0.8303428102593372, "grad_norm": 0.1874978393316269, "learning_rate": 4.256543509183219e-06, "loss": 0.0852, "step": 46554 }, { "epoch": 0.8303606463810509, "grad_norm": 0.34851282835006714, "learning_rate": 4.2556747804981724e-06, "loss": 0.099, "step": 46555 }, { "epoch": 0.8303784825027646, "grad_norm": 0.33474960923194885, "learning_rate": 4.2548061322253e-06, "loss": 0.0762, "step": 46556 }, { "epoch": 0.8303963186244783, "grad_norm": 0.2960427403450012, "learning_rate": 4.253937564367968e-06, "loss": 0.1513, "step": 46557 }, { "epoch": 0.830414154746192, "grad_norm": 0.2283165454864502, "learning_rate": 4.2530690769295365e-06, "loss": 0.0627, "step": 46558 }, { "epoch": 0.8304319908679056, "grad_norm": 0.24846839904785156, "learning_rate": 4.2522006699133754e-06, "loss": 0.0777, "step": 46559 }, { "epoch": 0.8304498269896193, "grad_norm": 0.21080508828163147, "learning_rate": 4.2513323433228445e-06, "loss": 0.0649, "step": 46560 }, { "epoch": 0.830467663111333, "grad_norm": 0.24394956231117249, "learning_rate": 4.2504640971613216e-06, "loss": 0.1151, "step": 46561 }, { "epoch": 0.8304854992330468, "grad_norm": 0.2898518741130829, "learning_rate": 4.249595931432168e-06, "loss": 0.1126, "step": 46562 }, { "epoch": 0.8305033353547605, "grad_norm": 0.27317920327186584, "learning_rate": 4.248727846138742e-06, "loss": 0.0922, "step": 46563 }, { "epoch": 0.8305211714764742, "grad_norm": 0.2531094551086426, "learning_rate": 4.247859841284418e-06, "loss": 0.0733, "step": 46564 }, { "epoch": 0.8305390075981879, "grad_norm": 0.219723641872406, "learning_rate": 4.24699191687255e-06, "loss": 0.1047, "step": 46565 }, { "epoch": 0.8305568437199016, "grad_norm": 0.2982277274131775, "learning_rate": 4.246124072906518e-06, "loss": 0.1106, "step": 46566 }, { "epoch": 0.8305746798416153, "grad_norm": 0.23950234055519104, "learning_rate": 4.2452563093896754e-06, "loss": 0.1226, "step": 46567 }, { "epoch": 0.830592515963329, "grad_norm": 0.289177805185318, "learning_rate": 4.244388626325382e-06, "loss": 0.1303, "step": 46568 }, { "epoch": 0.8306103520850426, "grad_norm": 0.32485687732696533, "learning_rate": 4.243521023717015e-06, "loss": 0.1227, "step": 46569 }, { "epoch": 0.8306281882067563, "grad_norm": 0.3352840542793274, "learning_rate": 4.242653501567928e-06, "loss": 0.1133, "step": 46570 }, { "epoch": 0.83064602432847, "grad_norm": 0.3036191463470459, "learning_rate": 4.241786059881484e-06, "loss": 0.0958, "step": 46571 }, { "epoch": 0.8306638604501837, "grad_norm": 0.335553914308548, "learning_rate": 4.24091869866105e-06, "loss": 0.0876, "step": 46572 }, { "epoch": 0.8306816965718974, "grad_norm": 0.24407418072223663, "learning_rate": 4.2400514179099765e-06, "loss": 0.1022, "step": 46573 }, { "epoch": 0.8306995326936111, "grad_norm": 0.3260498046875, "learning_rate": 4.2391842176316425e-06, "loss": 0.1167, "step": 46574 }, { "epoch": 0.8307173688153248, "grad_norm": 0.27406740188598633, "learning_rate": 4.238317097829397e-06, "loss": 0.073, "step": 46575 }, { "epoch": 0.8307352049370385, "grad_norm": 0.23395362496376038, "learning_rate": 4.237450058506603e-06, "loss": 0.0834, "step": 46576 }, { "epoch": 0.8307530410587521, "grad_norm": 0.2595573365688324, "learning_rate": 4.236583099666628e-06, "loss": 0.0924, "step": 46577 }, { "epoch": 0.8307708771804658, "grad_norm": 0.22945165634155273, "learning_rate": 4.235716221312821e-06, "loss": 0.1095, "step": 46578 }, { "epoch": 0.8307887133021796, "grad_norm": 0.2776692509651184, "learning_rate": 4.234849423448559e-06, "loss": 0.0843, "step": 46579 }, { "epoch": 0.8308065494238933, "grad_norm": 0.3470524251461029, "learning_rate": 4.233982706077191e-06, "loss": 0.1758, "step": 46580 }, { "epoch": 0.830824385545607, "grad_norm": 0.29722994565963745, "learning_rate": 4.233116069202072e-06, "loss": 0.0907, "step": 46581 }, { "epoch": 0.8308422216673207, "grad_norm": 0.4577447772026062, "learning_rate": 4.2322495128265745e-06, "loss": 0.0909, "step": 46582 }, { "epoch": 0.8308600577890344, "grad_norm": 0.3017619252204895, "learning_rate": 4.231383036954051e-06, "loss": 0.0982, "step": 46583 }, { "epoch": 0.8308778939107481, "grad_norm": 0.21828629076480865, "learning_rate": 4.230516641587859e-06, "loss": 0.1159, "step": 46584 }, { "epoch": 0.8308957300324618, "grad_norm": 0.374336302280426, "learning_rate": 4.229650326731352e-06, "loss": 0.0989, "step": 46585 }, { "epoch": 0.8309135661541754, "grad_norm": 0.31565165519714355, "learning_rate": 4.2287840923879025e-06, "loss": 0.1038, "step": 46586 }, { "epoch": 0.8309314022758891, "grad_norm": 0.323011577129364, "learning_rate": 4.227917938560857e-06, "loss": 0.1221, "step": 46587 }, { "epoch": 0.8309492383976028, "grad_norm": 0.47113168239593506, "learning_rate": 4.227051865253579e-06, "loss": 0.1116, "step": 46588 }, { "epoch": 0.8309670745193165, "grad_norm": 0.3476635813713074, "learning_rate": 4.226185872469421e-06, "loss": 0.1423, "step": 46589 }, { "epoch": 0.8309849106410302, "grad_norm": 0.2949986755847931, "learning_rate": 4.225319960211735e-06, "loss": 0.1232, "step": 46590 }, { "epoch": 0.8310027467627439, "grad_norm": 0.30801641941070557, "learning_rate": 4.22445412848389e-06, "loss": 0.0947, "step": 46591 }, { "epoch": 0.8310205828844576, "grad_norm": 0.3262016475200653, "learning_rate": 4.223588377289231e-06, "loss": 0.0994, "step": 46592 }, { "epoch": 0.8310384190061713, "grad_norm": 0.292354017496109, "learning_rate": 4.222722706631127e-06, "loss": 0.1307, "step": 46593 }, { "epoch": 0.831056255127885, "grad_norm": 0.24663913249969482, "learning_rate": 4.22185711651292e-06, "loss": 0.0575, "step": 46594 }, { "epoch": 0.8310740912495986, "grad_norm": 0.2890048623085022, "learning_rate": 4.220991606937974e-06, "loss": 0.1745, "step": 46595 }, { "epoch": 0.8310919273713124, "grad_norm": 0.2675207257270813, "learning_rate": 4.220126177909645e-06, "loss": 0.0928, "step": 46596 }, { "epoch": 0.8311097634930261, "grad_norm": 0.21993179619312286, "learning_rate": 4.219260829431282e-06, "loss": 0.0773, "step": 46597 }, { "epoch": 0.8311275996147398, "grad_norm": 0.20542635023593903, "learning_rate": 4.218395561506236e-06, "loss": 0.1015, "step": 46598 }, { "epoch": 0.8311454357364535, "grad_norm": 0.2960578501224518, "learning_rate": 4.217530374137873e-06, "loss": 0.147, "step": 46599 }, { "epoch": 0.8311632718581672, "grad_norm": 0.2885647118091583, "learning_rate": 4.216665267329539e-06, "loss": 0.0968, "step": 46600 }, { "epoch": 0.8311811079798809, "grad_norm": 0.340796560049057, "learning_rate": 4.215800241084591e-06, "loss": 0.0823, "step": 46601 }, { "epoch": 0.8311989441015946, "grad_norm": 0.2635718584060669, "learning_rate": 4.2149352954063775e-06, "loss": 0.1112, "step": 46602 }, { "epoch": 0.8312167802233082, "grad_norm": 0.28207260370254517, "learning_rate": 4.21407043029825e-06, "loss": 0.1301, "step": 46603 }, { "epoch": 0.8312346163450219, "grad_norm": 0.31943976879119873, "learning_rate": 4.213205645763569e-06, "loss": 0.1206, "step": 46604 }, { "epoch": 0.8312524524667356, "grad_norm": 0.3963462710380554, "learning_rate": 4.212340941805676e-06, "loss": 0.1239, "step": 46605 }, { "epoch": 0.8312702885884493, "grad_norm": 0.23490403592586517, "learning_rate": 4.211476318427937e-06, "loss": 0.0965, "step": 46606 }, { "epoch": 0.831288124710163, "grad_norm": 0.2724912464618683, "learning_rate": 4.210611775633688e-06, "loss": 0.1058, "step": 46607 }, { "epoch": 0.8313059608318767, "grad_norm": 0.3340529203414917, "learning_rate": 4.209747313426296e-06, "loss": 0.0825, "step": 46608 }, { "epoch": 0.8313237969535904, "grad_norm": 0.21293847262859344, "learning_rate": 4.208882931809105e-06, "loss": 0.0949, "step": 46609 }, { "epoch": 0.8313416330753041, "grad_norm": 0.2156914323568344, "learning_rate": 4.208018630785462e-06, "loss": 0.1264, "step": 46610 }, { "epoch": 0.8313594691970178, "grad_norm": 0.32299378514289856, "learning_rate": 4.207154410358716e-06, "loss": 0.1143, "step": 46611 }, { "epoch": 0.8313773053187314, "grad_norm": 0.64911288022995, "learning_rate": 4.206290270532226e-06, "loss": 0.1278, "step": 46612 }, { "epoch": 0.8313951414404452, "grad_norm": 0.3406936526298523, "learning_rate": 4.205426211309338e-06, "loss": 0.1239, "step": 46613 }, { "epoch": 0.8314129775621589, "grad_norm": 0.3430907130241394, "learning_rate": 4.204562232693399e-06, "loss": 0.106, "step": 46614 }, { "epoch": 0.8314308136838726, "grad_norm": 0.2644566595554352, "learning_rate": 4.203698334687761e-06, "loss": 0.1303, "step": 46615 }, { "epoch": 0.8314486498055863, "grad_norm": 0.3030645251274109, "learning_rate": 4.2028345172957614e-06, "loss": 0.1029, "step": 46616 }, { "epoch": 0.8314664859273, "grad_norm": 0.24397756159305573, "learning_rate": 4.201970780520767e-06, "loss": 0.148, "step": 46617 }, { "epoch": 0.8314843220490137, "grad_norm": 0.24367259442806244, "learning_rate": 4.2011071243661185e-06, "loss": 0.1221, "step": 46618 }, { "epoch": 0.8315021581707274, "grad_norm": 0.3192799389362335, "learning_rate": 4.200243548835156e-06, "loss": 0.124, "step": 46619 }, { "epoch": 0.8315199942924411, "grad_norm": 0.2924949824810028, "learning_rate": 4.199380053931232e-06, "loss": 0.1145, "step": 46620 }, { "epoch": 0.8315378304141547, "grad_norm": 0.2935815453529358, "learning_rate": 4.198516639657701e-06, "loss": 0.0927, "step": 46621 }, { "epoch": 0.8315556665358684, "grad_norm": 0.2474581003189087, "learning_rate": 4.197653306017904e-06, "loss": 0.1291, "step": 46622 }, { "epoch": 0.8315735026575821, "grad_norm": 0.24212592840194702, "learning_rate": 4.19679005301519e-06, "loss": 0.0827, "step": 46623 }, { "epoch": 0.8315913387792958, "grad_norm": 0.2553001344203949, "learning_rate": 4.195926880652895e-06, "loss": 0.0955, "step": 46624 }, { "epoch": 0.8316091749010095, "grad_norm": 0.30935606360435486, "learning_rate": 4.195063788934381e-06, "loss": 0.1389, "step": 46625 }, { "epoch": 0.8316270110227232, "grad_norm": 0.2515186071395874, "learning_rate": 4.194200777862984e-06, "loss": 0.0863, "step": 46626 }, { "epoch": 0.8316448471444369, "grad_norm": 0.2340743988752365, "learning_rate": 4.1933378474420495e-06, "loss": 0.0684, "step": 46627 }, { "epoch": 0.8316626832661506, "grad_norm": 0.28276729583740234, "learning_rate": 4.192474997674925e-06, "loss": 0.1053, "step": 46628 }, { "epoch": 0.8316805193878642, "grad_norm": 0.2271561175584793, "learning_rate": 4.191612228564948e-06, "loss": 0.0783, "step": 46629 }, { "epoch": 0.831698355509578, "grad_norm": 0.2162674516439438, "learning_rate": 4.190749540115477e-06, "loss": 0.086, "step": 46630 }, { "epoch": 0.8317161916312917, "grad_norm": 0.2562078535556793, "learning_rate": 4.189886932329845e-06, "loss": 0.0698, "step": 46631 }, { "epoch": 0.8317340277530054, "grad_norm": 0.33728837966918945, "learning_rate": 4.189024405211394e-06, "loss": 0.1417, "step": 46632 }, { "epoch": 0.8317518638747191, "grad_norm": 0.23697872459888458, "learning_rate": 4.188161958763481e-06, "loss": 0.112, "step": 46633 }, { "epoch": 0.8317696999964328, "grad_norm": 0.2588939070701599, "learning_rate": 4.18729959298943e-06, "loss": 0.0676, "step": 46634 }, { "epoch": 0.8317875361181465, "grad_norm": 0.2359021157026291, "learning_rate": 4.186437307892604e-06, "loss": 0.121, "step": 46635 }, { "epoch": 0.8318053722398602, "grad_norm": 0.24883480370044708, "learning_rate": 4.185575103476333e-06, "loss": 0.0929, "step": 46636 }, { "epoch": 0.8318232083615739, "grad_norm": 0.25182873010635376, "learning_rate": 4.184712979743957e-06, "loss": 0.1416, "step": 46637 }, { "epoch": 0.8318410444832876, "grad_norm": 0.41528597474098206, "learning_rate": 4.18385093669883e-06, "loss": 0.2181, "step": 46638 }, { "epoch": 0.8318588806050012, "grad_norm": 0.2870693504810333, "learning_rate": 4.182988974344285e-06, "loss": 0.1018, "step": 46639 }, { "epoch": 0.8318767167267149, "grad_norm": 0.3066214323043823, "learning_rate": 4.182127092683663e-06, "loss": 0.1379, "step": 46640 }, { "epoch": 0.8318945528484286, "grad_norm": 0.22343546152114868, "learning_rate": 4.18126529172031e-06, "loss": 0.0724, "step": 46641 }, { "epoch": 0.8319123889701423, "grad_norm": 0.2248062938451767, "learning_rate": 4.1804035714575555e-06, "loss": 0.1227, "step": 46642 }, { "epoch": 0.831930225091856, "grad_norm": 0.22939786314964294, "learning_rate": 4.179541931898753e-06, "loss": 0.1078, "step": 46643 }, { "epoch": 0.8319480612135697, "grad_norm": 0.23790012300014496, "learning_rate": 4.178680373047239e-06, "loss": 0.0855, "step": 46644 }, { "epoch": 0.8319658973352834, "grad_norm": 0.3449121117591858, "learning_rate": 4.177818894906352e-06, "loss": 0.121, "step": 46645 }, { "epoch": 0.8319837334569972, "grad_norm": 0.24393446743488312, "learning_rate": 4.176957497479423e-06, "loss": 0.0791, "step": 46646 }, { "epoch": 0.8320015695787109, "grad_norm": 0.3197662830352783, "learning_rate": 4.176096180769798e-06, "loss": 0.0727, "step": 46647 }, { "epoch": 0.8320194057004245, "grad_norm": 0.24342453479766846, "learning_rate": 4.175234944780823e-06, "loss": 0.0936, "step": 46648 }, { "epoch": 0.8320372418221382, "grad_norm": 0.26099687814712524, "learning_rate": 4.174373789515831e-06, "loss": 0.1441, "step": 46649 }, { "epoch": 0.8320550779438519, "grad_norm": 0.17786675691604614, "learning_rate": 4.173512714978153e-06, "loss": 0.0614, "step": 46650 }, { "epoch": 0.8320729140655656, "grad_norm": 0.2534498870372772, "learning_rate": 4.172651721171139e-06, "loss": 0.1234, "step": 46651 }, { "epoch": 0.8320907501872793, "grad_norm": 0.29591962695121765, "learning_rate": 4.17179080809812e-06, "loss": 0.1142, "step": 46652 }, { "epoch": 0.832108586308993, "grad_norm": 0.36470091342926025, "learning_rate": 4.170929975762433e-06, "loss": 0.1495, "step": 46653 }, { "epoch": 0.8321264224307067, "grad_norm": 0.3425239324569702, "learning_rate": 4.170069224167416e-06, "loss": 0.1515, "step": 46654 }, { "epoch": 0.8321442585524204, "grad_norm": 0.3186365067958832, "learning_rate": 4.1692085533164005e-06, "loss": 0.086, "step": 46655 }, { "epoch": 0.832162094674134, "grad_norm": 0.38707998394966125, "learning_rate": 4.168347963212732e-06, "loss": 0.166, "step": 46656 }, { "epoch": 0.8321799307958477, "grad_norm": 0.306901216506958, "learning_rate": 4.167487453859742e-06, "loss": 0.1191, "step": 46657 }, { "epoch": 0.8321977669175614, "grad_norm": 0.34486865997314453, "learning_rate": 4.166627025260766e-06, "loss": 0.1032, "step": 46658 }, { "epoch": 0.8322156030392751, "grad_norm": 0.34938499331474304, "learning_rate": 4.165766677419133e-06, "loss": 0.1291, "step": 46659 }, { "epoch": 0.8322334391609888, "grad_norm": 0.21423789858818054, "learning_rate": 4.1649064103381905e-06, "loss": 0.1111, "step": 46660 }, { "epoch": 0.8322512752827025, "grad_norm": 0.29759204387664795, "learning_rate": 4.164046224021262e-06, "loss": 0.0982, "step": 46661 }, { "epoch": 0.8322691114044162, "grad_norm": 0.2146986573934555, "learning_rate": 4.163186118471691e-06, "loss": 0.1001, "step": 46662 }, { "epoch": 0.83228694752613, "grad_norm": 0.308400422334671, "learning_rate": 4.162326093692803e-06, "loss": 0.0859, "step": 46663 }, { "epoch": 0.8323047836478437, "grad_norm": 0.25380659103393555, "learning_rate": 4.16146614968794e-06, "loss": 0.0856, "step": 46664 }, { "epoch": 0.8323226197695573, "grad_norm": 0.25684964656829834, "learning_rate": 4.160606286460433e-06, "loss": 0.0846, "step": 46665 }, { "epoch": 0.832340455891271, "grad_norm": 0.2833910584449768, "learning_rate": 4.159746504013615e-06, "loss": 0.1369, "step": 46666 }, { "epoch": 0.8323582920129847, "grad_norm": 0.21197587251663208, "learning_rate": 4.158886802350817e-06, "loss": 0.1061, "step": 46667 }, { "epoch": 0.8323761281346984, "grad_norm": 0.25691279768943787, "learning_rate": 4.158027181475366e-06, "loss": 0.1277, "step": 46668 }, { "epoch": 0.8323939642564121, "grad_norm": 0.38181617856025696, "learning_rate": 4.1571676413906055e-06, "loss": 0.0966, "step": 46669 }, { "epoch": 0.8324118003781258, "grad_norm": 0.33105215430259705, "learning_rate": 4.156308182099861e-06, "loss": 0.1494, "step": 46670 }, { "epoch": 0.8324296364998395, "grad_norm": 0.2425295114517212, "learning_rate": 4.155448803606466e-06, "loss": 0.087, "step": 46671 }, { "epoch": 0.8324474726215532, "grad_norm": 0.30603736639022827, "learning_rate": 4.154589505913745e-06, "loss": 0.1143, "step": 46672 }, { "epoch": 0.8324653087432669, "grad_norm": 0.288798063993454, "learning_rate": 4.153730289025043e-06, "loss": 0.1005, "step": 46673 }, { "epoch": 0.8324831448649805, "grad_norm": 0.3551574945449829, "learning_rate": 4.1528711529436795e-06, "loss": 0.0958, "step": 46674 }, { "epoch": 0.8325009809866942, "grad_norm": 0.3450653851032257, "learning_rate": 4.152012097672983e-06, "loss": 0.0912, "step": 46675 }, { "epoch": 0.8325188171084079, "grad_norm": 0.2784174978733063, "learning_rate": 4.151153123216286e-06, "loss": 0.0918, "step": 46676 }, { "epoch": 0.8325366532301216, "grad_norm": 0.2502913177013397, "learning_rate": 4.150294229576931e-06, "loss": 0.1075, "step": 46677 }, { "epoch": 0.8325544893518353, "grad_norm": 0.25268688797950745, "learning_rate": 4.149435416758235e-06, "loss": 0.1509, "step": 46678 }, { "epoch": 0.832572325473549, "grad_norm": 0.30086496472358704, "learning_rate": 4.1485766847635275e-06, "loss": 0.1271, "step": 46679 }, { "epoch": 0.8325901615952628, "grad_norm": 0.5147128701210022, "learning_rate": 4.14771803359614e-06, "loss": 0.1055, "step": 46680 }, { "epoch": 0.8326079977169765, "grad_norm": 0.23225994408130646, "learning_rate": 4.146859463259392e-06, "loss": 0.1267, "step": 46681 }, { "epoch": 0.8326258338386902, "grad_norm": 0.3003656268119812, "learning_rate": 4.1460009737566255e-06, "loss": 0.1079, "step": 46682 }, { "epoch": 0.8326436699604038, "grad_norm": 0.24094876646995544, "learning_rate": 4.145142565091165e-06, "loss": 0.089, "step": 46683 }, { "epoch": 0.8326615060821175, "grad_norm": 0.2228277176618576, "learning_rate": 4.14428423726633e-06, "loss": 0.1076, "step": 46684 }, { "epoch": 0.8326793422038312, "grad_norm": 0.2801830470561981, "learning_rate": 4.143425990285449e-06, "loss": 0.1062, "step": 46685 }, { "epoch": 0.8326971783255449, "grad_norm": 0.2542019784450531, "learning_rate": 4.14256782415186e-06, "loss": 0.1087, "step": 46686 }, { "epoch": 0.8327150144472586, "grad_norm": 0.24306438863277435, "learning_rate": 4.141709738868879e-06, "loss": 0.1016, "step": 46687 }, { "epoch": 0.8327328505689723, "grad_norm": 0.24475422501564026, "learning_rate": 4.140851734439832e-06, "loss": 0.1119, "step": 46688 }, { "epoch": 0.832750686690686, "grad_norm": 0.22002215683460236, "learning_rate": 4.139993810868053e-06, "loss": 0.1236, "step": 46689 }, { "epoch": 0.8327685228123997, "grad_norm": 0.3026716709136963, "learning_rate": 4.139135968156857e-06, "loss": 0.1357, "step": 46690 }, { "epoch": 0.8327863589341133, "grad_norm": 0.23907902836799622, "learning_rate": 4.138278206309582e-06, "loss": 0.0865, "step": 46691 }, { "epoch": 0.832804195055827, "grad_norm": 0.27678099274635315, "learning_rate": 4.137420525329544e-06, "loss": 0.0992, "step": 46692 }, { "epoch": 0.8328220311775407, "grad_norm": 0.20918892323970795, "learning_rate": 4.136562925220072e-06, "loss": 0.0913, "step": 46693 }, { "epoch": 0.8328398672992544, "grad_norm": 0.28752925992012024, "learning_rate": 4.13570540598448e-06, "loss": 0.0961, "step": 46694 }, { "epoch": 0.8328577034209681, "grad_norm": 0.2905171811580658, "learning_rate": 4.134847967626107e-06, "loss": 0.1062, "step": 46695 }, { "epoch": 0.8328755395426818, "grad_norm": 0.34169983863830566, "learning_rate": 4.1339906101482724e-06, "loss": 0.1313, "step": 46696 }, { "epoch": 0.8328933756643956, "grad_norm": 0.2458985447883606, "learning_rate": 4.133133333554292e-06, "loss": 0.0977, "step": 46697 }, { "epoch": 0.8329112117861093, "grad_norm": 0.1946738064289093, "learning_rate": 4.132276137847491e-06, "loss": 0.0814, "step": 46698 }, { "epoch": 0.832929047907823, "grad_norm": 0.251022607088089, "learning_rate": 4.131419023031202e-06, "loss": 0.1238, "step": 46699 }, { "epoch": 0.8329468840295366, "grad_norm": 0.32583874464035034, "learning_rate": 4.130561989108739e-06, "loss": 0.1181, "step": 46700 }, { "epoch": 0.8329647201512503, "grad_norm": 0.23731474578380585, "learning_rate": 4.129705036083426e-06, "loss": 0.1067, "step": 46701 }, { "epoch": 0.832982556272964, "grad_norm": 0.29119330644607544, "learning_rate": 4.128848163958579e-06, "loss": 0.0569, "step": 46702 }, { "epoch": 0.8330003923946777, "grad_norm": 0.4712277352809906, "learning_rate": 4.127991372737525e-06, "loss": 0.1925, "step": 46703 }, { "epoch": 0.8330182285163914, "grad_norm": 0.4032341241836548, "learning_rate": 4.127134662423593e-06, "loss": 0.1282, "step": 46704 }, { "epoch": 0.8330360646381051, "grad_norm": 0.28733837604522705, "learning_rate": 4.126278033020095e-06, "loss": 0.1509, "step": 46705 }, { "epoch": 0.8330539007598188, "grad_norm": 0.3174244165420532, "learning_rate": 4.125421484530351e-06, "loss": 0.1459, "step": 46706 }, { "epoch": 0.8330717368815325, "grad_norm": 0.22439810633659363, "learning_rate": 4.12456501695768e-06, "loss": 0.0993, "step": 46707 }, { "epoch": 0.8330895730032462, "grad_norm": 0.3191872537136078, "learning_rate": 4.123708630305409e-06, "loss": 0.099, "step": 46708 }, { "epoch": 0.8331074091249598, "grad_norm": 0.31039050221443176, "learning_rate": 4.122852324576856e-06, "loss": 0.0942, "step": 46709 }, { "epoch": 0.8331252452466735, "grad_norm": 0.3705736994743347, "learning_rate": 4.121996099775335e-06, "loss": 0.1404, "step": 46710 }, { "epoch": 0.8331430813683872, "grad_norm": 0.20622700452804565, "learning_rate": 4.121139955904166e-06, "loss": 0.1335, "step": 46711 }, { "epoch": 0.8331609174901009, "grad_norm": 0.20084848999977112, "learning_rate": 4.120283892966673e-06, "loss": 0.1195, "step": 46712 }, { "epoch": 0.8331787536118146, "grad_norm": 0.24524636566638947, "learning_rate": 4.119427910966173e-06, "loss": 0.1232, "step": 46713 }, { "epoch": 0.8331965897335284, "grad_norm": 0.29304173588752747, "learning_rate": 4.1185720099059825e-06, "loss": 0.0549, "step": 46714 }, { "epoch": 0.8332144258552421, "grad_norm": 0.2700052559375763, "learning_rate": 4.1177161897894116e-06, "loss": 0.0938, "step": 46715 }, { "epoch": 0.8332322619769558, "grad_norm": 0.3403674066066742, "learning_rate": 4.116860450619792e-06, "loss": 0.1064, "step": 46716 }, { "epoch": 0.8332500980986695, "grad_norm": 0.2700015902519226, "learning_rate": 4.116004792400427e-06, "loss": 0.1134, "step": 46717 }, { "epoch": 0.8332679342203831, "grad_norm": 0.22742699086666107, "learning_rate": 4.115149215134648e-06, "loss": 0.1297, "step": 46718 }, { "epoch": 0.8332857703420968, "grad_norm": 0.2205462008714676, "learning_rate": 4.114293718825762e-06, "loss": 0.0961, "step": 46719 }, { "epoch": 0.8333036064638105, "grad_norm": 0.32220178842544556, "learning_rate": 4.113438303477085e-06, "loss": 0.0772, "step": 46720 }, { "epoch": 0.8333214425855242, "grad_norm": 0.24213223159313202, "learning_rate": 4.112582969091938e-06, "loss": 0.1005, "step": 46721 }, { "epoch": 0.8333392787072379, "grad_norm": 0.3152848184108734, "learning_rate": 4.111727715673632e-06, "loss": 0.1024, "step": 46722 }, { "epoch": 0.8333571148289516, "grad_norm": 0.2606961727142334, "learning_rate": 4.1108725432254886e-06, "loss": 0.1131, "step": 46723 }, { "epoch": 0.8333749509506653, "grad_norm": 0.16872140765190125, "learning_rate": 4.110017451750811e-06, "loss": 0.0776, "step": 46724 }, { "epoch": 0.833392787072379, "grad_norm": 0.27245399355888367, "learning_rate": 4.109162441252926e-06, "loss": 0.1081, "step": 46725 }, { "epoch": 0.8334106231940926, "grad_norm": 0.2449917048215866, "learning_rate": 4.108307511735141e-06, "loss": 0.115, "step": 46726 }, { "epoch": 0.8334284593158063, "grad_norm": 0.31169965863227844, "learning_rate": 4.107452663200776e-06, "loss": 0.1232, "step": 46727 }, { "epoch": 0.83344629543752, "grad_norm": 0.26030436158180237, "learning_rate": 4.1065978956531305e-06, "loss": 0.1558, "step": 46728 }, { "epoch": 0.8334641315592337, "grad_norm": 0.8080865740776062, "learning_rate": 4.105743209095536e-06, "loss": 0.1482, "step": 46729 }, { "epoch": 0.8334819676809474, "grad_norm": 0.3591577112674713, "learning_rate": 4.10488860353129e-06, "loss": 0.1426, "step": 46730 }, { "epoch": 0.8334998038026612, "grad_norm": 0.26323091983795166, "learning_rate": 4.1040340789637195e-06, "loss": 0.1314, "step": 46731 }, { "epoch": 0.8335176399243749, "grad_norm": 0.2543598413467407, "learning_rate": 4.103179635396132e-06, "loss": 0.0763, "step": 46732 }, { "epoch": 0.8335354760460886, "grad_norm": 0.29716119170188904, "learning_rate": 4.102325272831828e-06, "loss": 0.0851, "step": 46733 }, { "epoch": 0.8335533121678023, "grad_norm": 0.21345782279968262, "learning_rate": 4.10147099127414e-06, "loss": 0.0749, "step": 46734 }, { "epoch": 0.833571148289516, "grad_norm": 0.2515353858470917, "learning_rate": 4.100616790726364e-06, "loss": 0.0835, "step": 46735 }, { "epoch": 0.8335889844112296, "grad_norm": 0.25271734595298767, "learning_rate": 4.099762671191817e-06, "loss": 0.1085, "step": 46736 }, { "epoch": 0.8336068205329433, "grad_norm": 0.20195934176445007, "learning_rate": 4.0989086326738035e-06, "loss": 0.0617, "step": 46737 }, { "epoch": 0.833624656654657, "grad_norm": 0.2418886125087738, "learning_rate": 4.098054675175644e-06, "loss": 0.065, "step": 46738 }, { "epoch": 0.8336424927763707, "grad_norm": 0.3445945382118225, "learning_rate": 4.097200798700645e-06, "loss": 0.1462, "step": 46739 }, { "epoch": 0.8336603288980844, "grad_norm": 0.4444994032382965, "learning_rate": 4.0963470032521155e-06, "loss": 0.1428, "step": 46740 }, { "epoch": 0.8336781650197981, "grad_norm": 0.26164713501930237, "learning_rate": 4.095493288833358e-06, "loss": 0.0968, "step": 46741 }, { "epoch": 0.8336960011415118, "grad_norm": 0.3222672641277313, "learning_rate": 4.0946396554476934e-06, "loss": 0.1396, "step": 46742 }, { "epoch": 0.8337138372632255, "grad_norm": 0.2579457461833954, "learning_rate": 4.093786103098429e-06, "loss": 0.1307, "step": 46743 }, { "epoch": 0.8337316733849391, "grad_norm": 0.266775906085968, "learning_rate": 4.092932631788862e-06, "loss": 0.1326, "step": 46744 }, { "epoch": 0.8337495095066528, "grad_norm": 0.30796027183532715, "learning_rate": 4.0920792415223175e-06, "loss": 0.1359, "step": 46745 }, { "epoch": 0.8337673456283665, "grad_norm": 0.32440727949142456, "learning_rate": 4.091225932302087e-06, "loss": 0.1591, "step": 46746 }, { "epoch": 0.8337851817500803, "grad_norm": 0.2423679083585739, "learning_rate": 4.0903727041314925e-06, "loss": 0.1254, "step": 46747 }, { "epoch": 0.833803017871794, "grad_norm": 0.31258848309516907, "learning_rate": 4.089519557013838e-06, "loss": 0.1288, "step": 46748 }, { "epoch": 0.8338208539935077, "grad_norm": 0.2675144374370575, "learning_rate": 4.0886664909524256e-06, "loss": 0.11, "step": 46749 }, { "epoch": 0.8338386901152214, "grad_norm": 0.2953801155090332, "learning_rate": 4.0878135059505576e-06, "loss": 0.0988, "step": 46750 }, { "epoch": 0.8338565262369351, "grad_norm": 0.31908726692199707, "learning_rate": 4.086960602011555e-06, "loss": 0.1111, "step": 46751 }, { "epoch": 0.8338743623586488, "grad_norm": 0.27898141741752625, "learning_rate": 4.086107779138718e-06, "loss": 0.1202, "step": 46752 }, { "epoch": 0.8338921984803624, "grad_norm": 0.23802019655704498, "learning_rate": 4.085255037335348e-06, "loss": 0.08, "step": 46753 }, { "epoch": 0.8339100346020761, "grad_norm": 0.33159416913986206, "learning_rate": 4.084402376604749e-06, "loss": 0.1149, "step": 46754 }, { "epoch": 0.8339278707237898, "grad_norm": 0.29219233989715576, "learning_rate": 4.083549796950234e-06, "loss": 0.1394, "step": 46755 }, { "epoch": 0.8339457068455035, "grad_norm": 0.26689863204956055, "learning_rate": 4.082697298375104e-06, "loss": 0.0921, "step": 46756 }, { "epoch": 0.8339635429672172, "grad_norm": 0.34107667207717896, "learning_rate": 4.08184488088266e-06, "loss": 0.161, "step": 46757 }, { "epoch": 0.8339813790889309, "grad_norm": 0.2929271161556244, "learning_rate": 4.080992544476217e-06, "loss": 0.0746, "step": 46758 }, { "epoch": 0.8339992152106446, "grad_norm": 0.2556752562522888, "learning_rate": 4.080140289159063e-06, "loss": 0.1088, "step": 46759 }, { "epoch": 0.8340170513323583, "grad_norm": 0.2826586365699768, "learning_rate": 4.079288114934518e-06, "loss": 0.0692, "step": 46760 }, { "epoch": 0.834034887454072, "grad_norm": 0.30315613746643066, "learning_rate": 4.078436021805879e-06, "loss": 0.0843, "step": 46761 }, { "epoch": 0.8340527235757856, "grad_norm": 0.2464943826198578, "learning_rate": 4.077584009776448e-06, "loss": 0.0923, "step": 46762 }, { "epoch": 0.8340705596974993, "grad_norm": 0.2595449984073639, "learning_rate": 4.0767320788495215e-06, "loss": 0.1292, "step": 46763 }, { "epoch": 0.8340883958192131, "grad_norm": 0.3096935451030731, "learning_rate": 4.075880229028412e-06, "loss": 0.1329, "step": 46764 }, { "epoch": 0.8341062319409268, "grad_norm": 0.2075943797826767, "learning_rate": 4.075028460316421e-06, "loss": 0.063, "step": 46765 }, { "epoch": 0.8341240680626405, "grad_norm": 0.2840370535850525, "learning_rate": 4.074176772716845e-06, "loss": 0.1198, "step": 46766 }, { "epoch": 0.8341419041843542, "grad_norm": 0.33895233273506165, "learning_rate": 4.07332516623298e-06, "loss": 0.0955, "step": 46767 }, { "epoch": 0.8341597403060679, "grad_norm": 0.37132859230041504, "learning_rate": 4.072473640868143e-06, "loss": 0.1652, "step": 46768 }, { "epoch": 0.8341775764277816, "grad_norm": 0.3085973560810089, "learning_rate": 4.071622196625627e-06, "loss": 0.1625, "step": 46769 }, { "epoch": 0.8341954125494953, "grad_norm": 0.31743186712265015, "learning_rate": 4.07077083350873e-06, "loss": 0.1368, "step": 46770 }, { "epoch": 0.8342132486712089, "grad_norm": 0.2818658947944641, "learning_rate": 4.069919551520748e-06, "loss": 0.1741, "step": 46771 }, { "epoch": 0.8342310847929226, "grad_norm": 0.24712364375591278, "learning_rate": 4.069068350664992e-06, "loss": 0.1296, "step": 46772 }, { "epoch": 0.8342489209146363, "grad_norm": 0.25967642664909363, "learning_rate": 4.068217230944754e-06, "loss": 0.105, "step": 46773 }, { "epoch": 0.83426675703635, "grad_norm": 0.2966439425945282, "learning_rate": 4.067366192363339e-06, "loss": 0.1131, "step": 46774 }, { "epoch": 0.8342845931580637, "grad_norm": 0.2699515223503113, "learning_rate": 4.066515234924043e-06, "loss": 0.0998, "step": 46775 }, { "epoch": 0.8343024292797774, "grad_norm": 0.3081156015396118, "learning_rate": 4.065664358630156e-06, "loss": 0.1329, "step": 46776 }, { "epoch": 0.8343202654014911, "grad_norm": 0.2245841771364212, "learning_rate": 4.064813563484993e-06, "loss": 0.0649, "step": 46777 }, { "epoch": 0.8343381015232048, "grad_norm": 0.34844109416007996, "learning_rate": 4.063962849491842e-06, "loss": 0.1189, "step": 46778 }, { "epoch": 0.8343559376449184, "grad_norm": 0.5935043096542358, "learning_rate": 4.063112216654002e-06, "loss": 0.0917, "step": 46779 }, { "epoch": 0.8343737737666321, "grad_norm": 0.27321234345436096, "learning_rate": 4.062261664974767e-06, "loss": 0.059, "step": 46780 }, { "epoch": 0.8343916098883459, "grad_norm": 0.28098952770233154, "learning_rate": 4.0614111944574405e-06, "loss": 0.1446, "step": 46781 }, { "epoch": 0.8344094460100596, "grad_norm": 0.3820337951183319, "learning_rate": 4.060560805105318e-06, "loss": 0.1708, "step": 46782 }, { "epoch": 0.8344272821317733, "grad_norm": 0.22245419025421143, "learning_rate": 4.059710496921692e-06, "loss": 0.1091, "step": 46783 }, { "epoch": 0.834445118253487, "grad_norm": 0.2427702397108078, "learning_rate": 4.058860269909859e-06, "loss": 0.1004, "step": 46784 }, { "epoch": 0.8344629543752007, "grad_norm": 0.26275157928466797, "learning_rate": 4.0580101240731166e-06, "loss": 0.1138, "step": 46785 }, { "epoch": 0.8344807904969144, "grad_norm": 0.28306999802589417, "learning_rate": 4.057160059414758e-06, "loss": 0.0887, "step": 46786 }, { "epoch": 0.8344986266186281, "grad_norm": 0.3550335764884949, "learning_rate": 4.0563100759380854e-06, "loss": 0.109, "step": 46787 }, { "epoch": 0.8345164627403417, "grad_norm": 0.27959829568862915, "learning_rate": 4.0554601736463884e-06, "loss": 0.134, "step": 46788 }, { "epoch": 0.8345342988620554, "grad_norm": 0.23582394421100616, "learning_rate": 4.054610352542956e-06, "loss": 0.0994, "step": 46789 }, { "epoch": 0.8345521349837691, "grad_norm": 0.2507306933403015, "learning_rate": 4.053760612631094e-06, "loss": 0.0784, "step": 46790 }, { "epoch": 0.8345699711054828, "grad_norm": 0.26044243574142456, "learning_rate": 4.052910953914091e-06, "loss": 0.1328, "step": 46791 }, { "epoch": 0.8345878072271965, "grad_norm": 0.25004643201828003, "learning_rate": 4.052061376395241e-06, "loss": 0.0767, "step": 46792 }, { "epoch": 0.8346056433489102, "grad_norm": 0.26451361179351807, "learning_rate": 4.051211880077829e-06, "loss": 0.0802, "step": 46793 }, { "epoch": 0.8346234794706239, "grad_norm": 0.23932376503944397, "learning_rate": 4.05036246496516e-06, "loss": 0.1119, "step": 46794 }, { "epoch": 0.8346413155923376, "grad_norm": 0.3065779507160187, "learning_rate": 4.0495131310605224e-06, "loss": 0.1113, "step": 46795 }, { "epoch": 0.8346591517140513, "grad_norm": 0.31209102272987366, "learning_rate": 4.0486638783672095e-06, "loss": 0.1528, "step": 46796 }, { "epoch": 0.8346769878357649, "grad_norm": 0.2687644958496094, "learning_rate": 4.0478147068885115e-06, "loss": 0.0634, "step": 46797 }, { "epoch": 0.8346948239574787, "grad_norm": 0.24032819271087646, "learning_rate": 4.0469656166277126e-06, "loss": 0.1028, "step": 46798 }, { "epoch": 0.8347126600791924, "grad_norm": 0.23493680357933044, "learning_rate": 4.046116607588121e-06, "loss": 0.095, "step": 46799 }, { "epoch": 0.8347304962009061, "grad_norm": 0.39956793189048767, "learning_rate": 4.04526767977301e-06, "loss": 0.1102, "step": 46800 }, { "epoch": 0.8347483323226198, "grad_norm": 0.28877198696136475, "learning_rate": 4.044418833185684e-06, "loss": 0.0883, "step": 46801 }, { "epoch": 0.8347661684443335, "grad_norm": 0.30131542682647705, "learning_rate": 4.0435700678294245e-06, "loss": 0.1169, "step": 46802 }, { "epoch": 0.8347840045660472, "grad_norm": 0.3342926800251007, "learning_rate": 4.042721383707532e-06, "loss": 0.1382, "step": 46803 }, { "epoch": 0.8348018406877609, "grad_norm": 0.2899371087551117, "learning_rate": 4.041872780823289e-06, "loss": 0.1457, "step": 46804 }, { "epoch": 0.8348196768094746, "grad_norm": 0.4322609007358551, "learning_rate": 4.041024259179988e-06, "loss": 0.0975, "step": 46805 }, { "epoch": 0.8348375129311882, "grad_norm": 0.2646285593509674, "learning_rate": 4.040175818780906e-06, "loss": 0.0725, "step": 46806 }, { "epoch": 0.8348553490529019, "grad_norm": 0.25836241245269775, "learning_rate": 4.03932745962935e-06, "loss": 0.1129, "step": 46807 }, { "epoch": 0.8348731851746156, "grad_norm": 0.26384419202804565, "learning_rate": 4.038479181728599e-06, "loss": 0.0886, "step": 46808 }, { "epoch": 0.8348910212963293, "grad_norm": 0.25899654626846313, "learning_rate": 4.037630985081945e-06, "loss": 0.1188, "step": 46809 }, { "epoch": 0.834908857418043, "grad_norm": 0.25273481011390686, "learning_rate": 4.036782869692671e-06, "loss": 0.1639, "step": 46810 }, { "epoch": 0.8349266935397567, "grad_norm": 0.19507236778736115, "learning_rate": 4.035934835564062e-06, "loss": 0.0874, "step": 46811 }, { "epoch": 0.8349445296614704, "grad_norm": 0.2970743775367737, "learning_rate": 4.035086882699418e-06, "loss": 0.1612, "step": 46812 }, { "epoch": 0.8349623657831841, "grad_norm": 0.21159812808036804, "learning_rate": 4.034239011102009e-06, "loss": 0.1011, "step": 46813 }, { "epoch": 0.8349802019048977, "grad_norm": 0.3126099705696106, "learning_rate": 4.03339122077514e-06, "loss": 0.1217, "step": 46814 }, { "epoch": 0.8349980380266115, "grad_norm": 0.32722559571266174, "learning_rate": 4.032543511722081e-06, "loss": 0.1008, "step": 46815 }, { "epoch": 0.8350158741483252, "grad_norm": 0.6440924406051636, "learning_rate": 4.031695883946129e-06, "loss": 0.0991, "step": 46816 }, { "epoch": 0.8350337102700389, "grad_norm": 0.32656651735305786, "learning_rate": 4.030848337450568e-06, "loss": 0.084, "step": 46817 }, { "epoch": 0.8350515463917526, "grad_norm": 0.29383066296577454, "learning_rate": 4.030000872238681e-06, "loss": 0.0911, "step": 46818 }, { "epoch": 0.8350693825134663, "grad_norm": 0.30996939539909363, "learning_rate": 4.02915348831375e-06, "loss": 0.1044, "step": 46819 }, { "epoch": 0.83508721863518, "grad_norm": 0.17640818655490875, "learning_rate": 4.028306185679068e-06, "loss": 0.108, "step": 46820 }, { "epoch": 0.8351050547568937, "grad_norm": 0.22094972431659698, "learning_rate": 4.0274589643379126e-06, "loss": 0.0653, "step": 46821 }, { "epoch": 0.8351228908786074, "grad_norm": 0.26132553815841675, "learning_rate": 4.026611824293572e-06, "loss": 0.1169, "step": 46822 }, { "epoch": 0.835140727000321, "grad_norm": 0.2136087417602539, "learning_rate": 4.025764765549325e-06, "loss": 0.1159, "step": 46823 }, { "epoch": 0.8351585631220347, "grad_norm": 0.26849299669265747, "learning_rate": 4.0249177881084566e-06, "loss": 0.0753, "step": 46824 }, { "epoch": 0.8351763992437484, "grad_norm": 0.296371191740036, "learning_rate": 4.0240708919742544e-06, "loss": 0.1749, "step": 46825 }, { "epoch": 0.8351942353654621, "grad_norm": 0.24800507724285126, "learning_rate": 4.0232240771499975e-06, "loss": 0.1424, "step": 46826 }, { "epoch": 0.8352120714871758, "grad_norm": 0.22743044793605804, "learning_rate": 4.022377343638964e-06, "loss": 0.1074, "step": 46827 }, { "epoch": 0.8352299076088895, "grad_norm": 0.2506360113620758, "learning_rate": 4.02153069144445e-06, "loss": 0.1074, "step": 46828 }, { "epoch": 0.8352477437306032, "grad_norm": 0.2383185774087906, "learning_rate": 4.020684120569721e-06, "loss": 0.107, "step": 46829 }, { "epoch": 0.8352655798523169, "grad_norm": 0.22619059681892395, "learning_rate": 4.01983763101807e-06, "loss": 0.0842, "step": 46830 }, { "epoch": 0.8352834159740306, "grad_norm": 0.24542072415351868, "learning_rate": 4.018991222792776e-06, "loss": 0.1214, "step": 46831 }, { "epoch": 0.8353012520957444, "grad_norm": 0.46733415126800537, "learning_rate": 4.018144895897113e-06, "loss": 0.1242, "step": 46832 }, { "epoch": 0.835319088217458, "grad_norm": 0.2272469699382782, "learning_rate": 4.017298650334375e-06, "loss": 0.0692, "step": 46833 }, { "epoch": 0.8353369243391717, "grad_norm": 0.3833679258823395, "learning_rate": 4.016452486107833e-06, "loss": 0.1136, "step": 46834 }, { "epoch": 0.8353547604608854, "grad_norm": 0.32804858684539795, "learning_rate": 4.015606403220767e-06, "loss": 0.1552, "step": 46835 }, { "epoch": 0.8353725965825991, "grad_norm": 0.25129881501197815, "learning_rate": 4.0147604016764624e-06, "loss": 0.1243, "step": 46836 }, { "epoch": 0.8353904327043128, "grad_norm": 0.3041929006576538, "learning_rate": 4.013914481478187e-06, "loss": 0.0725, "step": 46837 }, { "epoch": 0.8354082688260265, "grad_norm": 0.2951223850250244, "learning_rate": 4.01306864262923e-06, "loss": 0.1092, "step": 46838 }, { "epoch": 0.8354261049477402, "grad_norm": 0.20860454440116882, "learning_rate": 4.012222885132872e-06, "loss": 0.0606, "step": 46839 }, { "epoch": 0.8354439410694539, "grad_norm": 0.2729339301586151, "learning_rate": 4.0113772089923785e-06, "loss": 0.0907, "step": 46840 }, { "epoch": 0.8354617771911675, "grad_norm": 0.24473010003566742, "learning_rate": 4.010531614211044e-06, "loss": 0.1177, "step": 46841 }, { "epoch": 0.8354796133128812, "grad_norm": 0.24658413231372833, "learning_rate": 4.0096861007921315e-06, "loss": 0.0767, "step": 46842 }, { "epoch": 0.8354974494345949, "grad_norm": 0.2385355681180954, "learning_rate": 4.0088406687389306e-06, "loss": 0.1395, "step": 46843 }, { "epoch": 0.8355152855563086, "grad_norm": 0.378973126411438, "learning_rate": 4.007995318054714e-06, "loss": 0.1173, "step": 46844 }, { "epoch": 0.8355331216780223, "grad_norm": 0.2509481608867645, "learning_rate": 4.007150048742753e-06, "loss": 0.1361, "step": 46845 }, { "epoch": 0.835550957799736, "grad_norm": 0.2565433382987976, "learning_rate": 4.006304860806334e-06, "loss": 0.1267, "step": 46846 }, { "epoch": 0.8355687939214497, "grad_norm": 0.2195865958929062, "learning_rate": 4.005459754248731e-06, "loss": 0.1106, "step": 46847 }, { "epoch": 0.8355866300431635, "grad_norm": 0.20758330821990967, "learning_rate": 4.004614729073214e-06, "loss": 0.0674, "step": 46848 }, { "epoch": 0.8356044661648772, "grad_norm": 0.3654841482639313, "learning_rate": 4.003769785283063e-06, "loss": 0.161, "step": 46849 }, { "epoch": 0.8356223022865908, "grad_norm": 0.2653590440750122, "learning_rate": 4.002924922881546e-06, "loss": 0.0824, "step": 46850 }, { "epoch": 0.8356401384083045, "grad_norm": 0.36507052183151245, "learning_rate": 4.00208014187195e-06, "loss": 0.119, "step": 46851 }, { "epoch": 0.8356579745300182, "grad_norm": 0.31233513355255127, "learning_rate": 4.001235442257545e-06, "loss": 0.1276, "step": 46852 }, { "epoch": 0.8356758106517319, "grad_norm": 0.2052009105682373, "learning_rate": 4.0003908240416025e-06, "loss": 0.1036, "step": 46853 }, { "epoch": 0.8356936467734456, "grad_norm": 0.2272307276725769, "learning_rate": 3.999546287227393e-06, "loss": 0.0952, "step": 46854 }, { "epoch": 0.8357114828951593, "grad_norm": 0.27024638652801514, "learning_rate": 3.9987018318182e-06, "loss": 0.115, "step": 46855 }, { "epoch": 0.835729319016873, "grad_norm": 0.2789422273635864, "learning_rate": 3.997857457817289e-06, "loss": 0.1592, "step": 46856 }, { "epoch": 0.8357471551385867, "grad_norm": 0.2676437199115753, "learning_rate": 3.997013165227939e-06, "loss": 0.1505, "step": 46857 }, { "epoch": 0.8357649912603003, "grad_norm": 0.3002021908760071, "learning_rate": 3.996168954053417e-06, "loss": 0.122, "step": 46858 }, { "epoch": 0.835782827382014, "grad_norm": 0.21754351258277893, "learning_rate": 3.9953248242970036e-06, "loss": 0.1182, "step": 46859 }, { "epoch": 0.8358006635037277, "grad_norm": 0.23948244750499725, "learning_rate": 3.9944807759619676e-06, "loss": 0.1128, "step": 46860 }, { "epoch": 0.8358184996254414, "grad_norm": 0.20021620392799377, "learning_rate": 3.993636809051576e-06, "loss": 0.072, "step": 46861 }, { "epoch": 0.8358363357471551, "grad_norm": 0.2331341952085495, "learning_rate": 3.992792923569105e-06, "loss": 0.0975, "step": 46862 }, { "epoch": 0.8358541718688688, "grad_norm": 0.24508824944496155, "learning_rate": 3.991949119517818e-06, "loss": 0.0909, "step": 46863 }, { "epoch": 0.8358720079905825, "grad_norm": 0.28833362460136414, "learning_rate": 3.991105396901001e-06, "loss": 0.0998, "step": 46864 }, { "epoch": 0.8358898441122963, "grad_norm": 0.3301422595977783, "learning_rate": 3.9902617557219135e-06, "loss": 0.1115, "step": 46865 }, { "epoch": 0.83590768023401, "grad_norm": 0.26082515716552734, "learning_rate": 3.9894181959838266e-06, "loss": 0.1509, "step": 46866 }, { "epoch": 0.8359255163557237, "grad_norm": 0.19411340355873108, "learning_rate": 3.988574717690008e-06, "loss": 0.1115, "step": 46867 }, { "epoch": 0.8359433524774373, "grad_norm": 0.24536892771720886, "learning_rate": 3.987731320843735e-06, "loss": 0.0862, "step": 46868 }, { "epoch": 0.835961188599151, "grad_norm": 0.23225244879722595, "learning_rate": 3.986888005448266e-06, "loss": 0.1198, "step": 46869 }, { "epoch": 0.8359790247208647, "grad_norm": 0.256185919046402, "learning_rate": 3.986044771506886e-06, "loss": 0.126, "step": 46870 }, { "epoch": 0.8359968608425784, "grad_norm": 0.35747939348220825, "learning_rate": 3.985201619022847e-06, "loss": 0.1258, "step": 46871 }, { "epoch": 0.8360146969642921, "grad_norm": 0.26201823353767395, "learning_rate": 3.984358547999431e-06, "loss": 0.1186, "step": 46872 }, { "epoch": 0.8360325330860058, "grad_norm": 0.29478102922439575, "learning_rate": 3.983515558439899e-06, "loss": 0.1143, "step": 46873 }, { "epoch": 0.8360503692077195, "grad_norm": 0.218222513794899, "learning_rate": 3.982672650347521e-06, "loss": 0.1088, "step": 46874 }, { "epoch": 0.8360682053294332, "grad_norm": 0.28917232155799866, "learning_rate": 3.981829823725561e-06, "loss": 0.1161, "step": 46875 }, { "epoch": 0.8360860414511468, "grad_norm": 0.214401513338089, "learning_rate": 3.980987078577284e-06, "loss": 0.1125, "step": 46876 }, { "epoch": 0.8361038775728605, "grad_norm": 0.3000587224960327, "learning_rate": 3.980144414905965e-06, "loss": 0.0852, "step": 46877 }, { "epoch": 0.8361217136945742, "grad_norm": 0.1898646503686905, "learning_rate": 3.979301832714869e-06, "loss": 0.0736, "step": 46878 }, { "epoch": 0.8361395498162879, "grad_norm": 0.23889465630054474, "learning_rate": 3.978459332007256e-06, "loss": 0.0719, "step": 46879 }, { "epoch": 0.8361573859380016, "grad_norm": 0.22260203957557678, "learning_rate": 3.9776169127863915e-06, "loss": 0.0732, "step": 46880 }, { "epoch": 0.8361752220597153, "grad_norm": 0.3586057722568512, "learning_rate": 3.9767745750555505e-06, "loss": 0.1349, "step": 46881 }, { "epoch": 0.8361930581814291, "grad_norm": 0.23854921758174896, "learning_rate": 3.97593231881799e-06, "loss": 0.0924, "step": 46882 }, { "epoch": 0.8362108943031428, "grad_norm": 0.3031052350997925, "learning_rate": 3.975090144076973e-06, "loss": 0.1192, "step": 46883 }, { "epoch": 0.8362287304248565, "grad_norm": 0.2748461961746216, "learning_rate": 3.974248050835769e-06, "loss": 0.0848, "step": 46884 }, { "epoch": 0.8362465665465701, "grad_norm": 0.21828046441078186, "learning_rate": 3.973406039097646e-06, "loss": 0.1026, "step": 46885 }, { "epoch": 0.8362644026682838, "grad_norm": 0.365934818983078, "learning_rate": 3.972564108865867e-06, "loss": 0.1624, "step": 46886 }, { "epoch": 0.8362822387899975, "grad_norm": 0.2756219506263733, "learning_rate": 3.97172226014369e-06, "loss": 0.1263, "step": 46887 }, { "epoch": 0.8363000749117112, "grad_norm": 0.24824893474578857, "learning_rate": 3.9708804929343785e-06, "loss": 0.0601, "step": 46888 }, { "epoch": 0.8363179110334249, "grad_norm": 0.36520570516586304, "learning_rate": 3.970038807241194e-06, "loss": 0.1233, "step": 46889 }, { "epoch": 0.8363357471551386, "grad_norm": 0.25544849038124084, "learning_rate": 3.96919720306741e-06, "loss": 0.0809, "step": 46890 }, { "epoch": 0.8363535832768523, "grad_norm": 0.31959447264671326, "learning_rate": 3.968355680416278e-06, "loss": 0.1209, "step": 46891 }, { "epoch": 0.836371419398566, "grad_norm": 0.26477327942848206, "learning_rate": 3.9675142392910645e-06, "loss": 0.1237, "step": 46892 }, { "epoch": 0.8363892555202797, "grad_norm": 0.22651013731956482, "learning_rate": 3.9666728796950245e-06, "loss": 0.1006, "step": 46893 }, { "epoch": 0.8364070916419933, "grad_norm": 0.3021063804626465, "learning_rate": 3.965831601631431e-06, "loss": 0.097, "step": 46894 }, { "epoch": 0.836424927763707, "grad_norm": 0.38781633973121643, "learning_rate": 3.964990405103539e-06, "loss": 0.1516, "step": 46895 }, { "epoch": 0.8364427638854207, "grad_norm": 0.37945935130119324, "learning_rate": 3.964149290114605e-06, "loss": 0.1364, "step": 46896 }, { "epoch": 0.8364606000071344, "grad_norm": 0.29736632108688354, "learning_rate": 3.963308256667897e-06, "loss": 0.104, "step": 46897 }, { "epoch": 0.8364784361288481, "grad_norm": 0.19665658473968506, "learning_rate": 3.962467304766668e-06, "loss": 0.0821, "step": 46898 }, { "epoch": 0.8364962722505619, "grad_norm": 0.23883381485939026, "learning_rate": 3.961626434414189e-06, "loss": 0.1174, "step": 46899 }, { "epoch": 0.8365141083722756, "grad_norm": 0.3611498773097992, "learning_rate": 3.96078564561371e-06, "loss": 0.151, "step": 46900 }, { "epoch": 0.8365319444939893, "grad_norm": 0.2536463439464569, "learning_rate": 3.9599449383684925e-06, "loss": 0.0697, "step": 46901 }, { "epoch": 0.836549780615703, "grad_norm": 0.2697376310825348, "learning_rate": 3.9591043126817915e-06, "loss": 0.0631, "step": 46902 }, { "epoch": 0.8365676167374166, "grad_norm": 0.23434416949748993, "learning_rate": 3.958263768556875e-06, "loss": 0.1144, "step": 46903 }, { "epoch": 0.8365854528591303, "grad_norm": 0.24046054482460022, "learning_rate": 3.9574233059969965e-06, "loss": 0.1427, "step": 46904 }, { "epoch": 0.836603288980844, "grad_norm": 0.25384941697120667, "learning_rate": 3.9565829250054085e-06, "loss": 0.0602, "step": 46905 }, { "epoch": 0.8366211251025577, "grad_norm": 0.3528127372264862, "learning_rate": 3.955742625585371e-06, "loss": 0.119, "step": 46906 }, { "epoch": 0.8366389612242714, "grad_norm": 0.2794913649559021, "learning_rate": 3.954902407740149e-06, "loss": 0.1026, "step": 46907 }, { "epoch": 0.8366567973459851, "grad_norm": 0.260948121547699, "learning_rate": 3.954062271472994e-06, "loss": 0.181, "step": 46908 }, { "epoch": 0.8366746334676988, "grad_norm": 0.254564106464386, "learning_rate": 3.9532222167871645e-06, "loss": 0.1028, "step": 46909 }, { "epoch": 0.8366924695894125, "grad_norm": 0.2630186080932617, "learning_rate": 3.952382243685907e-06, "loss": 0.0993, "step": 46910 }, { "epoch": 0.8367103057111261, "grad_norm": 0.23984605073928833, "learning_rate": 3.951542352172485e-06, "loss": 0.0932, "step": 46911 }, { "epoch": 0.8367281418328398, "grad_norm": 0.23631185293197632, "learning_rate": 3.950702542250162e-06, "loss": 0.1026, "step": 46912 }, { "epoch": 0.8367459779545535, "grad_norm": 0.2506595551967621, "learning_rate": 3.9498628139221865e-06, "loss": 0.0749, "step": 46913 }, { "epoch": 0.8367638140762672, "grad_norm": 0.22263506054878235, "learning_rate": 3.949023167191812e-06, "loss": 0.1047, "step": 46914 }, { "epoch": 0.8367816501979809, "grad_norm": 0.33422768115997314, "learning_rate": 3.9481836020622884e-06, "loss": 0.076, "step": 46915 }, { "epoch": 0.8367994863196947, "grad_norm": 0.26921719312667847, "learning_rate": 3.947344118536883e-06, "loss": 0.1326, "step": 46916 }, { "epoch": 0.8368173224414084, "grad_norm": 0.41757893562316895, "learning_rate": 3.946504716618843e-06, "loss": 0.1969, "step": 46917 }, { "epoch": 0.8368351585631221, "grad_norm": 0.24989396333694458, "learning_rate": 3.945665396311421e-06, "loss": 0.0699, "step": 46918 }, { "epoch": 0.8368529946848358, "grad_norm": 0.37077704071998596, "learning_rate": 3.944826157617867e-06, "loss": 0.1601, "step": 46919 }, { "epoch": 0.8368708308065494, "grad_norm": 0.2764621078968048, "learning_rate": 3.9439870005414465e-06, "loss": 0.0821, "step": 46920 }, { "epoch": 0.8368886669282631, "grad_norm": 0.2903653383255005, "learning_rate": 3.943147925085403e-06, "loss": 0.114, "step": 46921 }, { "epoch": 0.8369065030499768, "grad_norm": 0.2788127362728119, "learning_rate": 3.942308931252992e-06, "loss": 0.0937, "step": 46922 }, { "epoch": 0.8369243391716905, "grad_norm": 0.2614961266517639, "learning_rate": 3.941470019047458e-06, "loss": 0.0863, "step": 46923 }, { "epoch": 0.8369421752934042, "grad_norm": 0.2824731767177582, "learning_rate": 3.940631188472063e-06, "loss": 0.1104, "step": 46924 }, { "epoch": 0.8369600114151179, "grad_norm": 0.3636886179447174, "learning_rate": 3.939792439530055e-06, "loss": 0.1083, "step": 46925 }, { "epoch": 0.8369778475368316, "grad_norm": 0.338789701461792, "learning_rate": 3.938953772224688e-06, "loss": 0.115, "step": 46926 }, { "epoch": 0.8369956836585453, "grad_norm": 0.24709048867225647, "learning_rate": 3.938115186559211e-06, "loss": 0.1144, "step": 46927 }, { "epoch": 0.837013519780259, "grad_norm": 0.24254465103149414, "learning_rate": 3.937276682536867e-06, "loss": 0.1096, "step": 46928 }, { "epoch": 0.8370313559019726, "grad_norm": 0.21065448224544525, "learning_rate": 3.936438260160918e-06, "loss": 0.0692, "step": 46929 }, { "epoch": 0.8370491920236863, "grad_norm": 0.22972606122493744, "learning_rate": 3.935599919434613e-06, "loss": 0.098, "step": 46930 }, { "epoch": 0.8370670281454, "grad_norm": 0.26478683948516846, "learning_rate": 3.934761660361197e-06, "loss": 0.1162, "step": 46931 }, { "epoch": 0.8370848642671137, "grad_norm": 0.2467789649963379, "learning_rate": 3.933923482943913e-06, "loss": 0.1304, "step": 46932 }, { "epoch": 0.8371027003888275, "grad_norm": 0.3902963697910309, "learning_rate": 3.9330853871860255e-06, "loss": 0.1596, "step": 46933 }, { "epoch": 0.8371205365105412, "grad_norm": 0.20513826608657837, "learning_rate": 3.932247373090775e-06, "loss": 0.1199, "step": 46934 }, { "epoch": 0.8371383726322549, "grad_norm": 0.2696496546268463, "learning_rate": 3.93140944066141e-06, "loss": 0.0724, "step": 46935 }, { "epoch": 0.8371562087539686, "grad_norm": 0.26693907380104065, "learning_rate": 3.930571589901172e-06, "loss": 0.1264, "step": 46936 }, { "epoch": 0.8371740448756823, "grad_norm": 0.26409271359443665, "learning_rate": 3.929733820813322e-06, "loss": 0.076, "step": 46937 }, { "epoch": 0.8371918809973959, "grad_norm": 0.2790376543998718, "learning_rate": 3.928896133401097e-06, "loss": 0.0947, "step": 46938 }, { "epoch": 0.8372097171191096, "grad_norm": 0.4949314594268799, "learning_rate": 3.928058527667752e-06, "loss": 0.18, "step": 46939 }, { "epoch": 0.8372275532408233, "grad_norm": 0.22583194077014923, "learning_rate": 3.927221003616533e-06, "loss": 0.104, "step": 46940 }, { "epoch": 0.837245389362537, "grad_norm": 0.32609236240386963, "learning_rate": 3.926383561250674e-06, "loss": 0.0782, "step": 46941 }, { "epoch": 0.8372632254842507, "grad_norm": 0.334391713142395, "learning_rate": 3.925546200573438e-06, "loss": 0.1414, "step": 46942 }, { "epoch": 0.8372810616059644, "grad_norm": 0.2561906576156616, "learning_rate": 3.9247089215880665e-06, "loss": 0.1149, "step": 46943 }, { "epoch": 0.8372988977276781, "grad_norm": 0.28949081897735596, "learning_rate": 3.923871724297801e-06, "loss": 0.1039, "step": 46944 }, { "epoch": 0.8373167338493918, "grad_norm": 0.26782864332199097, "learning_rate": 3.9230346087058806e-06, "loss": 0.1093, "step": 46945 }, { "epoch": 0.8373345699711054, "grad_norm": 0.3119884729385376, "learning_rate": 3.922197574815564e-06, "loss": 0.1102, "step": 46946 }, { "epoch": 0.8373524060928191, "grad_norm": 0.30032435059547424, "learning_rate": 3.921360622630091e-06, "loss": 0.1161, "step": 46947 }, { "epoch": 0.8373702422145328, "grad_norm": 0.5193751454353333, "learning_rate": 3.920523752152702e-06, "loss": 0.187, "step": 46948 }, { "epoch": 0.8373880783362466, "grad_norm": 0.2769351005554199, "learning_rate": 3.91968696338664e-06, "loss": 0.152, "step": 46949 }, { "epoch": 0.8374059144579603, "grad_norm": 0.2023262232542038, "learning_rate": 3.918850256335157e-06, "loss": 0.09, "step": 46950 }, { "epoch": 0.837423750579674, "grad_norm": 0.27765601873397827, "learning_rate": 3.918013631001494e-06, "loss": 0.125, "step": 46951 }, { "epoch": 0.8374415867013877, "grad_norm": 0.21986523270606995, "learning_rate": 3.917177087388885e-06, "loss": 0.0647, "step": 46952 }, { "epoch": 0.8374594228231014, "grad_norm": 0.3394491374492645, "learning_rate": 3.916340625500583e-06, "loss": 0.0861, "step": 46953 }, { "epoch": 0.8374772589448151, "grad_norm": 0.31232213973999023, "learning_rate": 3.915504245339821e-06, "loss": 0.121, "step": 46954 }, { "epoch": 0.8374950950665287, "grad_norm": 0.3081868886947632, "learning_rate": 3.914667946909856e-06, "loss": 0.1276, "step": 46955 }, { "epoch": 0.8375129311882424, "grad_norm": 0.2707572877407074, "learning_rate": 3.913831730213918e-06, "loss": 0.0955, "step": 46956 }, { "epoch": 0.8375307673099561, "grad_norm": 0.28062984347343445, "learning_rate": 3.912995595255253e-06, "loss": 0.0909, "step": 46957 }, { "epoch": 0.8375486034316698, "grad_norm": 0.30293378233909607, "learning_rate": 3.912159542037092e-06, "loss": 0.1146, "step": 46958 }, { "epoch": 0.8375664395533835, "grad_norm": 0.2180204838514328, "learning_rate": 3.9113235705626915e-06, "loss": 0.0953, "step": 46959 }, { "epoch": 0.8375842756750972, "grad_norm": 0.314439982175827, "learning_rate": 3.910487680835284e-06, "loss": 0.0863, "step": 46960 }, { "epoch": 0.8376021117968109, "grad_norm": 0.22588887810707092, "learning_rate": 3.909651872858111e-06, "loss": 0.1089, "step": 46961 }, { "epoch": 0.8376199479185246, "grad_norm": 0.25089627504348755, "learning_rate": 3.908816146634406e-06, "loss": 0.0891, "step": 46962 }, { "epoch": 0.8376377840402383, "grad_norm": 0.28479036688804626, "learning_rate": 3.907980502167419e-06, "loss": 0.1245, "step": 46963 }, { "epoch": 0.8376556201619519, "grad_norm": 0.19454067945480347, "learning_rate": 3.907144939460386e-06, "loss": 0.054, "step": 46964 }, { "epoch": 0.8376734562836656, "grad_norm": 0.2349027693271637, "learning_rate": 3.906309458516538e-06, "loss": 0.0735, "step": 46965 }, { "epoch": 0.8376912924053794, "grad_norm": 0.2898387014865875, "learning_rate": 3.905474059339126e-06, "loss": 0.1199, "step": 46966 }, { "epoch": 0.8377091285270931, "grad_norm": 0.2563013434410095, "learning_rate": 3.904638741931374e-06, "loss": 0.1208, "step": 46967 }, { "epoch": 0.8377269646488068, "grad_norm": 0.26255422830581665, "learning_rate": 3.903803506296538e-06, "loss": 0.0536, "step": 46968 }, { "epoch": 0.8377448007705205, "grad_norm": 0.27324071526527405, "learning_rate": 3.902968352437844e-06, "loss": 0.1349, "step": 46969 }, { "epoch": 0.8377626368922342, "grad_norm": 0.30811983346939087, "learning_rate": 3.902133280358533e-06, "loss": 0.1424, "step": 46970 }, { "epoch": 0.8377804730139479, "grad_norm": 0.26903247833251953, "learning_rate": 3.901298290061831e-06, "loss": 0.1067, "step": 46971 }, { "epoch": 0.8377983091356616, "grad_norm": 0.27189138531684875, "learning_rate": 3.900463381550993e-06, "loss": 0.1248, "step": 46972 }, { "epoch": 0.8378161452573752, "grad_norm": 0.40170568227767944, "learning_rate": 3.899628554829246e-06, "loss": 0.1353, "step": 46973 }, { "epoch": 0.8378339813790889, "grad_norm": 0.3523317575454712, "learning_rate": 3.898793809899826e-06, "loss": 0.1398, "step": 46974 }, { "epoch": 0.8378518175008026, "grad_norm": 0.28895458579063416, "learning_rate": 3.897959146765965e-06, "loss": 0.1153, "step": 46975 }, { "epoch": 0.8378696536225163, "grad_norm": 0.41567447781562805, "learning_rate": 3.897124565430907e-06, "loss": 0.1426, "step": 46976 }, { "epoch": 0.83788748974423, "grad_norm": 0.1604563295841217, "learning_rate": 3.896290065897881e-06, "loss": 0.0535, "step": 46977 }, { "epoch": 0.8379053258659437, "grad_norm": 0.1963864117860794, "learning_rate": 3.895455648170127e-06, "loss": 0.0655, "step": 46978 }, { "epoch": 0.8379231619876574, "grad_norm": 0.17659622430801392, "learning_rate": 3.894621312250871e-06, "loss": 0.0883, "step": 46979 }, { "epoch": 0.8379409981093711, "grad_norm": 0.329047828912735, "learning_rate": 3.893787058143355e-06, "loss": 0.1082, "step": 46980 }, { "epoch": 0.8379588342310847, "grad_norm": 0.2841615080833435, "learning_rate": 3.892952885850806e-06, "loss": 0.1017, "step": 46981 }, { "epoch": 0.8379766703527984, "grad_norm": 0.2920970916748047, "learning_rate": 3.892118795376467e-06, "loss": 0.1252, "step": 46982 }, { "epoch": 0.8379945064745122, "grad_norm": 0.297720730304718, "learning_rate": 3.891284786723568e-06, "loss": 0.1574, "step": 46983 }, { "epoch": 0.8380123425962259, "grad_norm": 0.2133379429578781, "learning_rate": 3.890450859895331e-06, "loss": 0.0698, "step": 46984 }, { "epoch": 0.8380301787179396, "grad_norm": 0.28827035427093506, "learning_rate": 3.889617014895006e-06, "loss": 0.085, "step": 46985 }, { "epoch": 0.8380480148396533, "grad_norm": 0.19673387706279755, "learning_rate": 3.8887832517258135e-06, "loss": 0.0777, "step": 46986 }, { "epoch": 0.838065850961367, "grad_norm": 0.3254310190677643, "learning_rate": 3.887949570390992e-06, "loss": 0.0969, "step": 46987 }, { "epoch": 0.8380836870830807, "grad_norm": 0.2297883927822113, "learning_rate": 3.887115970893762e-06, "loss": 0.0653, "step": 46988 }, { "epoch": 0.8381015232047944, "grad_norm": 0.33748525381088257, "learning_rate": 3.886282453237369e-06, "loss": 0.0744, "step": 46989 }, { "epoch": 0.838119359326508, "grad_norm": 0.24912230670452118, "learning_rate": 3.885449017425039e-06, "loss": 0.0823, "step": 46990 }, { "epoch": 0.8381371954482217, "grad_norm": 0.2941327393054962, "learning_rate": 3.8846156634600005e-06, "loss": 0.1199, "step": 46991 }, { "epoch": 0.8381550315699354, "grad_norm": 0.24996671080589294, "learning_rate": 3.883782391345478e-06, "loss": 0.0876, "step": 46992 }, { "epoch": 0.8381728676916491, "grad_norm": 0.26333895325660706, "learning_rate": 3.882949201084715e-06, "loss": 0.1206, "step": 46993 }, { "epoch": 0.8381907038133628, "grad_norm": 0.34092408418655396, "learning_rate": 3.882116092680926e-06, "loss": 0.132, "step": 46994 }, { "epoch": 0.8382085399350765, "grad_norm": 0.26638486981391907, "learning_rate": 3.881283066137359e-06, "loss": 0.1176, "step": 46995 }, { "epoch": 0.8382263760567902, "grad_norm": 0.2559550106525421, "learning_rate": 3.880450121457229e-06, "loss": 0.1165, "step": 46996 }, { "epoch": 0.8382442121785039, "grad_norm": 0.20587700605392456, "learning_rate": 3.879617258643767e-06, "loss": 0.0732, "step": 46997 }, { "epoch": 0.8382620483002176, "grad_norm": 0.24560266733169556, "learning_rate": 3.878784477700206e-06, "loss": 0.0987, "step": 46998 }, { "epoch": 0.8382798844219312, "grad_norm": 0.21615678071975708, "learning_rate": 3.877951778629773e-06, "loss": 0.1035, "step": 46999 }, { "epoch": 0.838297720543645, "grad_norm": 0.3579657971858978, "learning_rate": 3.8771191614356935e-06, "loss": 0.1236, "step": 47000 }, { "epoch": 0.838297720543645, "eval_loss": 0.10923902690410614, "eval_runtime": 108.4643, "eval_samples_per_second": 9.441, "eval_steps_per_second": 1.577, "step": 47000 }, { "epoch": 0.8383155566653587, "grad_norm": 0.30751803517341614, "learning_rate": 3.876286626121189e-06, "loss": 0.1314, "step": 47001 }, { "epoch": 0.8383333927870724, "grad_norm": 0.3310813009738922, "learning_rate": 3.875454172689502e-06, "loss": 0.1384, "step": 47002 }, { "epoch": 0.8383512289087861, "grad_norm": 0.2768760621547699, "learning_rate": 3.874621801143849e-06, "loss": 0.1063, "step": 47003 }, { "epoch": 0.8383690650304998, "grad_norm": 0.41729825735092163, "learning_rate": 3.873789511487458e-06, "loss": 0.1367, "step": 47004 }, { "epoch": 0.8383869011522135, "grad_norm": 0.26368626952171326, "learning_rate": 3.8729573037235515e-06, "loss": 0.1624, "step": 47005 }, { "epoch": 0.8384047372739272, "grad_norm": 0.27840062975883484, "learning_rate": 3.8721251778553626e-06, "loss": 0.0701, "step": 47006 }, { "epoch": 0.8384225733956409, "grad_norm": 0.19710266590118408, "learning_rate": 3.871293133886117e-06, "loss": 0.0829, "step": 47007 }, { "epoch": 0.8384404095173545, "grad_norm": 0.3673233389854431, "learning_rate": 3.870461171819029e-06, "loss": 0.122, "step": 47008 }, { "epoch": 0.8384582456390682, "grad_norm": 0.28950875997543335, "learning_rate": 3.869629291657337e-06, "loss": 0.1261, "step": 47009 }, { "epoch": 0.8384760817607819, "grad_norm": 0.22746475040912628, "learning_rate": 3.868797493404255e-06, "loss": 0.0651, "step": 47010 }, { "epoch": 0.8384939178824956, "grad_norm": 0.2901719808578491, "learning_rate": 3.8679657770630175e-06, "loss": 0.1506, "step": 47011 }, { "epoch": 0.8385117540042093, "grad_norm": 0.25291070342063904, "learning_rate": 3.8671341426368444e-06, "loss": 0.18, "step": 47012 }, { "epoch": 0.838529590125923, "grad_norm": 0.40263795852661133, "learning_rate": 3.866302590128959e-06, "loss": 0.1225, "step": 47013 }, { "epoch": 0.8385474262476367, "grad_norm": 0.24514323472976685, "learning_rate": 3.865471119542577e-06, "loss": 0.1131, "step": 47014 }, { "epoch": 0.8385652623693504, "grad_norm": 0.2155514657497406, "learning_rate": 3.864639730880934e-06, "loss": 0.1073, "step": 47015 }, { "epoch": 0.838583098491064, "grad_norm": 0.323479026556015, "learning_rate": 3.86380842414725e-06, "loss": 0.1243, "step": 47016 }, { "epoch": 0.8386009346127778, "grad_norm": 0.24916298687458038, "learning_rate": 3.862977199344742e-06, "loss": 0.12, "step": 47017 }, { "epoch": 0.8386187707344915, "grad_norm": 0.49524566531181335, "learning_rate": 3.862146056476629e-06, "loss": 0.1144, "step": 47018 }, { "epoch": 0.8386366068562052, "grad_norm": 0.2047007977962494, "learning_rate": 3.861314995546145e-06, "loss": 0.1089, "step": 47019 }, { "epoch": 0.8386544429779189, "grad_norm": 0.321159303188324, "learning_rate": 3.860484016556507e-06, "loss": 0.14, "step": 47020 }, { "epoch": 0.8386722790996326, "grad_norm": 0.2594822645187378, "learning_rate": 3.8596531195109256e-06, "loss": 0.1322, "step": 47021 }, { "epoch": 0.8386901152213463, "grad_norm": 0.22090981900691986, "learning_rate": 3.858822304412638e-06, "loss": 0.107, "step": 47022 }, { "epoch": 0.83870795134306, "grad_norm": 0.3495054841041565, "learning_rate": 3.857991571264852e-06, "loss": 0.0941, "step": 47023 }, { "epoch": 0.8387257874647737, "grad_norm": 0.21863503754138947, "learning_rate": 3.857160920070796e-06, "loss": 0.0771, "step": 47024 }, { "epoch": 0.8387436235864874, "grad_norm": 0.3025311827659607, "learning_rate": 3.85633035083369e-06, "loss": 0.0721, "step": 47025 }, { "epoch": 0.838761459708201, "grad_norm": 0.19793066382408142, "learning_rate": 3.855499863556747e-06, "loss": 0.0827, "step": 47026 }, { "epoch": 0.8387792958299147, "grad_norm": 0.23742131888866425, "learning_rate": 3.8546694582431845e-06, "loss": 0.1133, "step": 47027 }, { "epoch": 0.8387971319516284, "grad_norm": 0.26667022705078125, "learning_rate": 3.853839134896234e-06, "loss": 0.1008, "step": 47028 }, { "epoch": 0.8388149680733421, "grad_norm": 0.31092989444732666, "learning_rate": 3.853008893519105e-06, "loss": 0.114, "step": 47029 }, { "epoch": 0.8388328041950558, "grad_norm": 0.19924920797348022, "learning_rate": 3.85217873411502e-06, "loss": 0.1028, "step": 47030 }, { "epoch": 0.8388506403167695, "grad_norm": 0.27402082085609436, "learning_rate": 3.851348656687187e-06, "loss": 0.1854, "step": 47031 }, { "epoch": 0.8388684764384832, "grad_norm": 0.2764783799648285, "learning_rate": 3.850518661238836e-06, "loss": 0.0718, "step": 47032 }, { "epoch": 0.8388863125601969, "grad_norm": 0.2881247401237488, "learning_rate": 3.849688747773181e-06, "loss": 0.1085, "step": 47033 }, { "epoch": 0.8389041486819107, "grad_norm": 0.2363775372505188, "learning_rate": 3.848858916293438e-06, "loss": 0.1064, "step": 47034 }, { "epoch": 0.8389219848036243, "grad_norm": 0.2641986310482025, "learning_rate": 3.8480291668028165e-06, "loss": 0.1458, "step": 47035 }, { "epoch": 0.838939820925338, "grad_norm": 0.2976537048816681, "learning_rate": 3.847199499304543e-06, "loss": 0.089, "step": 47036 }, { "epoch": 0.8389576570470517, "grad_norm": 0.3063163757324219, "learning_rate": 3.846369913801828e-06, "loss": 0.0853, "step": 47037 }, { "epoch": 0.8389754931687654, "grad_norm": 0.30863767862319946, "learning_rate": 3.845540410297893e-06, "loss": 0.1223, "step": 47038 }, { "epoch": 0.8389933292904791, "grad_norm": 0.37166640162467957, "learning_rate": 3.844710988795952e-06, "loss": 0.1261, "step": 47039 }, { "epoch": 0.8390111654121928, "grad_norm": 0.26229560375213623, "learning_rate": 3.8438816492992106e-06, "loss": 0.1092, "step": 47040 }, { "epoch": 0.8390290015339065, "grad_norm": 0.2673822343349457, "learning_rate": 3.843052391810897e-06, "loss": 0.0785, "step": 47041 }, { "epoch": 0.8390468376556202, "grad_norm": 0.31263089179992676, "learning_rate": 3.84222321633422e-06, "loss": 0.1425, "step": 47042 }, { "epoch": 0.8390646737773338, "grad_norm": 0.24355749785900116, "learning_rate": 3.841394122872394e-06, "loss": 0.1108, "step": 47043 }, { "epoch": 0.8390825098990475, "grad_norm": 0.24575401842594147, "learning_rate": 3.840565111428626e-06, "loss": 0.0815, "step": 47044 }, { "epoch": 0.8391003460207612, "grad_norm": 0.48401495814323425, "learning_rate": 3.83973618200614e-06, "loss": 0.087, "step": 47045 }, { "epoch": 0.8391181821424749, "grad_norm": 0.2347661852836609, "learning_rate": 3.838907334608146e-06, "loss": 0.0922, "step": 47046 }, { "epoch": 0.8391360182641886, "grad_norm": 0.31247958540916443, "learning_rate": 3.838078569237857e-06, "loss": 0.1571, "step": 47047 }, { "epoch": 0.8391538543859023, "grad_norm": 0.25748899579048157, "learning_rate": 3.837249885898481e-06, "loss": 0.1124, "step": 47048 }, { "epoch": 0.839171690507616, "grad_norm": 0.44267764687538147, "learning_rate": 3.836421284593236e-06, "loss": 0.1052, "step": 47049 }, { "epoch": 0.8391895266293298, "grad_norm": 0.31609952449798584, "learning_rate": 3.835592765325327e-06, "loss": 0.1443, "step": 47050 }, { "epoch": 0.8392073627510435, "grad_norm": 0.37156808376312256, "learning_rate": 3.834764328097976e-06, "loss": 0.1624, "step": 47051 }, { "epoch": 0.8392251988727571, "grad_norm": 0.3659432828426361, "learning_rate": 3.833935972914388e-06, "loss": 0.0868, "step": 47052 }, { "epoch": 0.8392430349944708, "grad_norm": 0.3445312976837158, "learning_rate": 3.833107699777768e-06, "loss": 0.1182, "step": 47053 }, { "epoch": 0.8392608711161845, "grad_norm": 0.25811076164245605, "learning_rate": 3.832279508691344e-06, "loss": 0.0863, "step": 47054 }, { "epoch": 0.8392787072378982, "grad_norm": 0.2872600555419922, "learning_rate": 3.8314513996583115e-06, "loss": 0.1272, "step": 47055 }, { "epoch": 0.8392965433596119, "grad_norm": 0.24091780185699463, "learning_rate": 3.830623372681885e-06, "loss": 0.1253, "step": 47056 }, { "epoch": 0.8393143794813256, "grad_norm": 0.250893235206604, "learning_rate": 3.82979542776527e-06, "loss": 0.0907, "step": 47057 }, { "epoch": 0.8393322156030393, "grad_norm": 0.24283510446548462, "learning_rate": 3.828967564911684e-06, "loss": 0.136, "step": 47058 }, { "epoch": 0.839350051724753, "grad_norm": 0.21466827392578125, "learning_rate": 3.828139784124335e-06, "loss": 0.1238, "step": 47059 }, { "epoch": 0.8393678878464667, "grad_norm": 0.26720157265663147, "learning_rate": 3.827312085406426e-06, "loss": 0.1198, "step": 47060 }, { "epoch": 0.8393857239681803, "grad_norm": 0.22254715859889984, "learning_rate": 3.826484468761168e-06, "loss": 0.1126, "step": 47061 }, { "epoch": 0.839403560089894, "grad_norm": 0.2646508514881134, "learning_rate": 3.825656934191763e-06, "loss": 0.0744, "step": 47062 }, { "epoch": 0.8394213962116077, "grad_norm": 0.22626057267189026, "learning_rate": 3.824829481701434e-06, "loss": 0.1143, "step": 47063 }, { "epoch": 0.8394392323333214, "grad_norm": 0.2767675220966339, "learning_rate": 3.8240021112933706e-06, "loss": 0.1091, "step": 47064 }, { "epoch": 0.8394570684550351, "grad_norm": 0.25615358352661133, "learning_rate": 3.823174822970796e-06, "loss": 0.1448, "step": 47065 }, { "epoch": 0.8394749045767488, "grad_norm": 0.2957111597061157, "learning_rate": 3.822347616736904e-06, "loss": 0.0643, "step": 47066 }, { "epoch": 0.8394927406984626, "grad_norm": 0.24870948493480682, "learning_rate": 3.821520492594913e-06, "loss": 0.1296, "step": 47067 }, { "epoch": 0.8395105768201763, "grad_norm": 0.29272136092185974, "learning_rate": 3.820693450548024e-06, "loss": 0.1187, "step": 47068 }, { "epoch": 0.83952841294189, "grad_norm": 0.25594907999038696, "learning_rate": 3.819866490599442e-06, "loss": 0.1481, "step": 47069 }, { "epoch": 0.8395462490636036, "grad_norm": 0.21848732233047485, "learning_rate": 3.819039612752367e-06, "loss": 0.0946, "step": 47070 }, { "epoch": 0.8395640851853173, "grad_norm": 0.2692481279373169, "learning_rate": 3.818212817010014e-06, "loss": 0.118, "step": 47071 }, { "epoch": 0.839581921307031, "grad_norm": 0.26383277773857117, "learning_rate": 3.8173861033755864e-06, "loss": 0.1263, "step": 47072 }, { "epoch": 0.8395997574287447, "grad_norm": 0.2758817672729492, "learning_rate": 3.8165594718522855e-06, "loss": 0.1059, "step": 47073 }, { "epoch": 0.8396175935504584, "grad_norm": 0.26964548230171204, "learning_rate": 3.815732922443319e-06, "loss": 0.1155, "step": 47074 }, { "epoch": 0.8396354296721721, "grad_norm": 0.34769174456596375, "learning_rate": 3.814906455151879e-06, "loss": 0.1203, "step": 47075 }, { "epoch": 0.8396532657938858, "grad_norm": 0.31437820196151733, "learning_rate": 3.8140800699811878e-06, "loss": 0.144, "step": 47076 }, { "epoch": 0.8396711019155995, "grad_norm": 0.46026819944381714, "learning_rate": 3.8132537669344325e-06, "loss": 0.1693, "step": 47077 }, { "epoch": 0.8396889380373131, "grad_norm": 0.29425153136253357, "learning_rate": 3.8124275460148268e-06, "loss": 0.0718, "step": 47078 }, { "epoch": 0.8397067741590268, "grad_norm": 0.31868693232536316, "learning_rate": 3.811601407225568e-06, "loss": 0.1218, "step": 47079 }, { "epoch": 0.8397246102807405, "grad_norm": 0.4028632640838623, "learning_rate": 3.8107753505698644e-06, "loss": 0.1006, "step": 47080 }, { "epoch": 0.8397424464024542, "grad_norm": 0.2891925275325775, "learning_rate": 3.809949376050914e-06, "loss": 0.0761, "step": 47081 }, { "epoch": 0.8397602825241679, "grad_norm": 0.24664293229579926, "learning_rate": 3.8091234836719168e-06, "loss": 0.1266, "step": 47082 }, { "epoch": 0.8397781186458816, "grad_norm": 0.25259220600128174, "learning_rate": 3.8082976734360733e-06, "loss": 0.1418, "step": 47083 }, { "epoch": 0.8397959547675954, "grad_norm": 0.24793516099452972, "learning_rate": 3.8074719453465912e-06, "loss": 0.0996, "step": 47084 }, { "epoch": 0.8398137908893091, "grad_norm": 0.31412291526794434, "learning_rate": 3.8066462994066686e-06, "loss": 0.1953, "step": 47085 }, { "epoch": 0.8398316270110228, "grad_norm": 0.3072883188724518, "learning_rate": 3.805820735619506e-06, "loss": 0.1475, "step": 47086 }, { "epoch": 0.8398494631327365, "grad_norm": 0.2971581518650055, "learning_rate": 3.8049952539883e-06, "loss": 0.057, "step": 47087 }, { "epoch": 0.8398672992544501, "grad_norm": 0.3154681622982025, "learning_rate": 3.8041698545162484e-06, "loss": 0.1271, "step": 47088 }, { "epoch": 0.8398851353761638, "grad_norm": 0.2794531583786011, "learning_rate": 3.80334453720656e-06, "loss": 0.1074, "step": 47089 }, { "epoch": 0.8399029714978775, "grad_norm": 0.250113844871521, "learning_rate": 3.8025193020624297e-06, "loss": 0.1052, "step": 47090 }, { "epoch": 0.8399208076195912, "grad_norm": 0.20722746849060059, "learning_rate": 3.801694149087051e-06, "loss": 0.0753, "step": 47091 }, { "epoch": 0.8399386437413049, "grad_norm": 0.3281230330467224, "learning_rate": 3.8008690782836283e-06, "loss": 0.1639, "step": 47092 }, { "epoch": 0.8399564798630186, "grad_norm": 0.18458016216754913, "learning_rate": 3.8000440896553636e-06, "loss": 0.0763, "step": 47093 }, { "epoch": 0.8399743159847323, "grad_norm": 0.304382860660553, "learning_rate": 3.7992191832054493e-06, "loss": 0.14, "step": 47094 }, { "epoch": 0.839992152106446, "grad_norm": 0.22286227345466614, "learning_rate": 3.7983943589370852e-06, "loss": 0.13, "step": 47095 }, { "epoch": 0.8400099882281596, "grad_norm": 0.2638026177883148, "learning_rate": 3.797569616853461e-06, "loss": 0.1371, "step": 47096 }, { "epoch": 0.8400278243498733, "grad_norm": 0.2293182909488678, "learning_rate": 3.7967449569577847e-06, "loss": 0.0941, "step": 47097 }, { "epoch": 0.840045660471587, "grad_norm": 0.2778116464614868, "learning_rate": 3.795920379253251e-06, "loss": 0.1189, "step": 47098 }, { "epoch": 0.8400634965933007, "grad_norm": 0.2454736977815628, "learning_rate": 3.7950958837430523e-06, "loss": 0.0714, "step": 47099 }, { "epoch": 0.8400813327150144, "grad_norm": 0.2624067962169647, "learning_rate": 3.7942714704303854e-06, "loss": 0.0935, "step": 47100 }, { "epoch": 0.8400991688367282, "grad_norm": 0.2683139145374298, "learning_rate": 3.79344713931844e-06, "loss": 0.1073, "step": 47101 }, { "epoch": 0.8401170049584419, "grad_norm": 0.22290875017642975, "learning_rate": 3.7926228904104245e-06, "loss": 0.1077, "step": 47102 }, { "epoch": 0.8401348410801556, "grad_norm": 0.3065320551395416, "learning_rate": 3.7917987237095277e-06, "loss": 0.1504, "step": 47103 }, { "epoch": 0.8401526772018693, "grad_norm": 0.3087966740131378, "learning_rate": 3.7909746392189388e-06, "loss": 0.1338, "step": 47104 }, { "epoch": 0.840170513323583, "grad_norm": 0.2408827394247055, "learning_rate": 3.7901506369418636e-06, "loss": 0.0934, "step": 47105 }, { "epoch": 0.8401883494452966, "grad_norm": 0.24439117312431335, "learning_rate": 3.7893267168814833e-06, "loss": 0.1007, "step": 47106 }, { "epoch": 0.8402061855670103, "grad_norm": 0.2937263250350952, "learning_rate": 3.788502879041006e-06, "loss": 0.1313, "step": 47107 }, { "epoch": 0.840224021688724, "grad_norm": 0.29831087589263916, "learning_rate": 3.7876791234236155e-06, "loss": 0.1037, "step": 47108 }, { "epoch": 0.8402418578104377, "grad_norm": 0.2955385744571686, "learning_rate": 3.7868554500325033e-06, "loss": 0.1233, "step": 47109 }, { "epoch": 0.8402596939321514, "grad_norm": 0.29125431180000305, "learning_rate": 3.7860318588708697e-06, "loss": 0.1003, "step": 47110 }, { "epoch": 0.8402775300538651, "grad_norm": 0.35177454352378845, "learning_rate": 3.7852083499419073e-06, "loss": 0.1228, "step": 47111 }, { "epoch": 0.8402953661755788, "grad_norm": 0.35921815037727356, "learning_rate": 3.7843849232488017e-06, "loss": 0.1427, "step": 47112 }, { "epoch": 0.8403132022972924, "grad_norm": 0.24147279560565948, "learning_rate": 3.7835615787947475e-06, "loss": 0.0934, "step": 47113 }, { "epoch": 0.8403310384190061, "grad_norm": 0.24416770040988922, "learning_rate": 3.7827383165829343e-06, "loss": 0.1241, "step": 47114 }, { "epoch": 0.8403488745407198, "grad_norm": 0.2558423578739166, "learning_rate": 3.7819151366165566e-06, "loss": 0.0878, "step": 47115 }, { "epoch": 0.8403667106624335, "grad_norm": 0.35122865438461304, "learning_rate": 3.7810920388988093e-06, "loss": 0.1217, "step": 47116 }, { "epoch": 0.8403845467841472, "grad_norm": 0.2535398006439209, "learning_rate": 3.7802690234328754e-06, "loss": 0.109, "step": 47117 }, { "epoch": 0.840402382905861, "grad_norm": 0.26111915707588196, "learning_rate": 3.7794460902219416e-06, "loss": 0.1035, "step": 47118 }, { "epoch": 0.8404202190275747, "grad_norm": 0.22617845237255096, "learning_rate": 3.778623239269202e-06, "loss": 0.0858, "step": 47119 }, { "epoch": 0.8404380551492884, "grad_norm": 0.4816853106021881, "learning_rate": 3.777800470577858e-06, "loss": 0.1381, "step": 47120 }, { "epoch": 0.8404558912710021, "grad_norm": 0.3486986756324768, "learning_rate": 3.7769777841510894e-06, "loss": 0.1224, "step": 47121 }, { "epoch": 0.8404737273927158, "grad_norm": 0.28654128313064575, "learning_rate": 3.7761551799920773e-06, "loss": 0.119, "step": 47122 }, { "epoch": 0.8404915635144294, "grad_norm": 0.25931915640830994, "learning_rate": 3.7753326581040247e-06, "loss": 0.1158, "step": 47123 }, { "epoch": 0.8405093996361431, "grad_norm": 0.22576895356178284, "learning_rate": 3.7745102184901128e-06, "loss": 0.1459, "step": 47124 }, { "epoch": 0.8405272357578568, "grad_norm": 0.22982607781887054, "learning_rate": 3.7736878611535297e-06, "loss": 0.1105, "step": 47125 }, { "epoch": 0.8405450718795705, "grad_norm": 0.3667536675930023, "learning_rate": 3.7728655860974653e-06, "loss": 0.1097, "step": 47126 }, { "epoch": 0.8405629080012842, "grad_norm": 0.23805628716945648, "learning_rate": 3.7720433933251004e-06, "loss": 0.1245, "step": 47127 }, { "epoch": 0.8405807441229979, "grad_norm": 0.26823189854621887, "learning_rate": 3.771221282839632e-06, "loss": 0.1295, "step": 47128 }, { "epoch": 0.8405985802447116, "grad_norm": 0.2834422290325165, "learning_rate": 3.7703992546442414e-06, "loss": 0.1266, "step": 47129 }, { "epoch": 0.8406164163664253, "grad_norm": 0.28688740730285645, "learning_rate": 3.769577308742117e-06, "loss": 0.0754, "step": 47130 }, { "epoch": 0.8406342524881389, "grad_norm": 0.2513481378555298, "learning_rate": 3.768755445136438e-06, "loss": 0.1497, "step": 47131 }, { "epoch": 0.8406520886098526, "grad_norm": 0.42138680815696716, "learning_rate": 3.7679336638304004e-06, "loss": 0.1684, "step": 47132 }, { "epoch": 0.8406699247315663, "grad_norm": 0.21968132257461548, "learning_rate": 3.7671119648271803e-06, "loss": 0.0901, "step": 47133 }, { "epoch": 0.84068776085328, "grad_norm": 0.2773551940917969, "learning_rate": 3.766290348129972e-06, "loss": 0.0878, "step": 47134 }, { "epoch": 0.8407055969749938, "grad_norm": 0.13072916865348816, "learning_rate": 3.7654688137419543e-06, "loss": 0.04, "step": 47135 }, { "epoch": 0.8407234330967075, "grad_norm": 0.35075798630714417, "learning_rate": 3.764647361666315e-06, "loss": 0.1314, "step": 47136 }, { "epoch": 0.8407412692184212, "grad_norm": 0.30211958289146423, "learning_rate": 3.763825991906239e-06, "loss": 0.1325, "step": 47137 }, { "epoch": 0.8407591053401349, "grad_norm": 0.2839547097682953, "learning_rate": 3.763004704464909e-06, "loss": 0.1234, "step": 47138 }, { "epoch": 0.8407769414618486, "grad_norm": 0.2972462475299835, "learning_rate": 3.762183499345506e-06, "loss": 0.1574, "step": 47139 }, { "epoch": 0.8407947775835622, "grad_norm": 0.3199516236782074, "learning_rate": 3.7613623765512113e-06, "loss": 0.1281, "step": 47140 }, { "epoch": 0.8408126137052759, "grad_norm": 0.25842055678367615, "learning_rate": 3.760541336085216e-06, "loss": 0.1084, "step": 47141 }, { "epoch": 0.8408304498269896, "grad_norm": 0.3160461485385895, "learning_rate": 3.7597203779506982e-06, "loss": 0.1834, "step": 47142 }, { "epoch": 0.8408482859487033, "grad_norm": 0.2349184900522232, "learning_rate": 3.7588995021508424e-06, "loss": 0.1533, "step": 47143 }, { "epoch": 0.840866122070417, "grad_norm": 0.22943779826164246, "learning_rate": 3.75807870868882e-06, "loss": 0.1094, "step": 47144 }, { "epoch": 0.8408839581921307, "grad_norm": 0.27372583746910095, "learning_rate": 3.7572579975678284e-06, "loss": 0.1123, "step": 47145 }, { "epoch": 0.8409017943138444, "grad_norm": 0.2682541012763977, "learning_rate": 3.7564373687910354e-06, "loss": 0.1846, "step": 47146 }, { "epoch": 0.8409196304355581, "grad_norm": 0.2469344586133957, "learning_rate": 3.755616822361635e-06, "loss": 0.1391, "step": 47147 }, { "epoch": 0.8409374665572718, "grad_norm": 0.21677681803703308, "learning_rate": 3.7547963582827974e-06, "loss": 0.1145, "step": 47148 }, { "epoch": 0.8409553026789854, "grad_norm": 0.2241763323545456, "learning_rate": 3.753975976557708e-06, "loss": 0.1464, "step": 47149 }, { "epoch": 0.8409731388006991, "grad_norm": 0.2170475572347641, "learning_rate": 3.7531556771895486e-06, "loss": 0.1003, "step": 47150 }, { "epoch": 0.8409909749224128, "grad_norm": 0.24082481861114502, "learning_rate": 3.752335460181497e-06, "loss": 0.1018, "step": 47151 }, { "epoch": 0.8410088110441266, "grad_norm": 0.21301297843456268, "learning_rate": 3.751515325536731e-06, "loss": 0.0804, "step": 47152 }, { "epoch": 0.8410266471658403, "grad_norm": 0.2911536991596222, "learning_rate": 3.7506952732584256e-06, "loss": 0.1019, "step": 47153 }, { "epoch": 0.841044483287554, "grad_norm": 0.35989171266555786, "learning_rate": 3.74987530334977e-06, "loss": 0.1255, "step": 47154 }, { "epoch": 0.8410623194092677, "grad_norm": 0.21408697962760925, "learning_rate": 3.7490554158139372e-06, "loss": 0.0817, "step": 47155 }, { "epoch": 0.8410801555309814, "grad_norm": 0.2765394151210785, "learning_rate": 3.748235610654105e-06, "loss": 0.1084, "step": 47156 }, { "epoch": 0.841097991652695, "grad_norm": 0.1714177131652832, "learning_rate": 3.7474158878734456e-06, "loss": 0.0657, "step": 47157 }, { "epoch": 0.8411158277744087, "grad_norm": 0.27308839559555054, "learning_rate": 3.746596247475148e-06, "loss": 0.1047, "step": 47158 }, { "epoch": 0.8411336638961224, "grad_norm": 0.306439071893692, "learning_rate": 3.7457766894623854e-06, "loss": 0.0981, "step": 47159 }, { "epoch": 0.8411515000178361, "grad_norm": 0.6013026237487793, "learning_rate": 3.7449572138383266e-06, "loss": 0.1126, "step": 47160 }, { "epoch": 0.8411693361395498, "grad_norm": 0.29482531547546387, "learning_rate": 3.744137820606161e-06, "loss": 0.1117, "step": 47161 }, { "epoch": 0.8411871722612635, "grad_norm": 0.269481360912323, "learning_rate": 3.7433185097690533e-06, "loss": 0.1343, "step": 47162 }, { "epoch": 0.8412050083829772, "grad_norm": 0.30088502168655396, "learning_rate": 3.7424992813301916e-06, "loss": 0.148, "step": 47163 }, { "epoch": 0.8412228445046909, "grad_norm": 0.3397041857242584, "learning_rate": 3.741680135292744e-06, "loss": 0.0479, "step": 47164 }, { "epoch": 0.8412406806264046, "grad_norm": 0.24093548953533173, "learning_rate": 3.740861071659885e-06, "loss": 0.1071, "step": 47165 }, { "epoch": 0.8412585167481182, "grad_norm": 0.32494139671325684, "learning_rate": 3.7400420904347864e-06, "loss": 0.17, "step": 47166 }, { "epoch": 0.8412763528698319, "grad_norm": 0.34829822182655334, "learning_rate": 3.7392231916206334e-06, "loss": 0.1609, "step": 47167 }, { "epoch": 0.8412941889915457, "grad_norm": 0.2576776146888733, "learning_rate": 3.7384043752205944e-06, "loss": 0.0842, "step": 47168 }, { "epoch": 0.8413120251132594, "grad_norm": 0.19019296765327454, "learning_rate": 3.737585641237845e-06, "loss": 0.114, "step": 47169 }, { "epoch": 0.8413298612349731, "grad_norm": 0.22499391436576843, "learning_rate": 3.73676698967555e-06, "loss": 0.0678, "step": 47170 }, { "epoch": 0.8413476973566868, "grad_norm": 0.2032153308391571, "learning_rate": 3.7359484205368973e-06, "loss": 0.1002, "step": 47171 }, { "epoch": 0.8413655334784005, "grad_norm": 0.2861258387565613, "learning_rate": 3.7351299338250517e-06, "loss": 0.0874, "step": 47172 }, { "epoch": 0.8413833696001142, "grad_norm": 0.28178420662879944, "learning_rate": 3.7343115295431884e-06, "loss": 0.1331, "step": 47173 }, { "epoch": 0.8414012057218279, "grad_norm": 0.2825622260570526, "learning_rate": 3.7334932076944717e-06, "loss": 0.1188, "step": 47174 }, { "epoch": 0.8414190418435415, "grad_norm": 0.2185794860124588, "learning_rate": 3.7326749682820794e-06, "loss": 0.0711, "step": 47175 }, { "epoch": 0.8414368779652552, "grad_norm": 0.22176779806613922, "learning_rate": 3.7318568113091924e-06, "loss": 0.1064, "step": 47176 }, { "epoch": 0.8414547140869689, "grad_norm": 0.28279492259025574, "learning_rate": 3.7310387367789745e-06, "loss": 0.0701, "step": 47177 }, { "epoch": 0.8414725502086826, "grad_norm": 0.36179807782173157, "learning_rate": 3.730220744694596e-06, "loss": 0.1244, "step": 47178 }, { "epoch": 0.8414903863303963, "grad_norm": 0.3489610552787781, "learning_rate": 3.7294028350592204e-06, "loss": 0.1266, "step": 47179 }, { "epoch": 0.84150822245211, "grad_norm": 0.32125207781791687, "learning_rate": 3.728585007876034e-06, "loss": 0.1113, "step": 47180 }, { "epoch": 0.8415260585738237, "grad_norm": 0.27386870980262756, "learning_rate": 3.7277672631481965e-06, "loss": 0.1386, "step": 47181 }, { "epoch": 0.8415438946955374, "grad_norm": 0.23990623652935028, "learning_rate": 3.7269496008788845e-06, "loss": 0.0609, "step": 47182 }, { "epoch": 0.841561730817251, "grad_norm": 0.3674491047859192, "learning_rate": 3.7261320210712547e-06, "loss": 0.0983, "step": 47183 }, { "epoch": 0.8415795669389647, "grad_norm": 0.3062206506729126, "learning_rate": 3.7253145237284903e-06, "loss": 0.1934, "step": 47184 }, { "epoch": 0.8415974030606785, "grad_norm": 0.23742343485355377, "learning_rate": 3.7244971088537556e-06, "loss": 0.1062, "step": 47185 }, { "epoch": 0.8416152391823922, "grad_norm": 0.24429085850715637, "learning_rate": 3.7236797764502174e-06, "loss": 0.0987, "step": 47186 }, { "epoch": 0.8416330753041059, "grad_norm": 0.24329285323619843, "learning_rate": 3.7228625265210428e-06, "loss": 0.1077, "step": 47187 }, { "epoch": 0.8416509114258196, "grad_norm": 0.25650450587272644, "learning_rate": 3.7220453590694038e-06, "loss": 0.0791, "step": 47188 }, { "epoch": 0.8416687475475333, "grad_norm": 0.26111355423927307, "learning_rate": 3.7212282740984624e-06, "loss": 0.0947, "step": 47189 }, { "epoch": 0.841686583669247, "grad_norm": 0.2582864463329315, "learning_rate": 3.720411271611393e-06, "loss": 0.0823, "step": 47190 }, { "epoch": 0.8417044197909607, "grad_norm": 0.3363458812236786, "learning_rate": 3.7195943516113637e-06, "loss": 0.1281, "step": 47191 }, { "epoch": 0.8417222559126744, "grad_norm": 0.30207955837249756, "learning_rate": 3.7187775141015265e-06, "loss": 0.1042, "step": 47192 }, { "epoch": 0.841740092034388, "grad_norm": 0.23146916925907135, "learning_rate": 3.7179607590850653e-06, "loss": 0.0852, "step": 47193 }, { "epoch": 0.8417579281561017, "grad_norm": 0.27197352051734924, "learning_rate": 3.7171440865651412e-06, "loss": 0.1207, "step": 47194 }, { "epoch": 0.8417757642778154, "grad_norm": 0.33557239174842834, "learning_rate": 3.7163274965449133e-06, "loss": 0.1045, "step": 47195 }, { "epoch": 0.8417936003995291, "grad_norm": 0.23681728541851044, "learning_rate": 3.7155109890275487e-06, "loss": 0.1272, "step": 47196 }, { "epoch": 0.8418114365212428, "grad_norm": 0.3109634518623352, "learning_rate": 3.714694564016219e-06, "loss": 0.1025, "step": 47197 }, { "epoch": 0.8418292726429565, "grad_norm": 0.2443607896566391, "learning_rate": 3.713878221514086e-06, "loss": 0.1103, "step": 47198 }, { "epoch": 0.8418471087646702, "grad_norm": 0.17061349749565125, "learning_rate": 3.7130619615243113e-06, "loss": 0.0491, "step": 47199 }, { "epoch": 0.8418649448863839, "grad_norm": 0.267388254404068, "learning_rate": 3.7122457840500557e-06, "loss": 0.1152, "step": 47200 }, { "epoch": 0.8418827810080975, "grad_norm": 0.21858885884284973, "learning_rate": 3.7114296890944947e-06, "loss": 0.0826, "step": 47201 }, { "epoch": 0.8419006171298113, "grad_norm": 0.1882113814353943, "learning_rate": 3.710613676660779e-06, "loss": 0.0611, "step": 47202 }, { "epoch": 0.841918453251525, "grad_norm": 0.19960112869739532, "learning_rate": 3.7097977467520832e-06, "loss": 0.0676, "step": 47203 }, { "epoch": 0.8419362893732387, "grad_norm": 0.24927645921707153, "learning_rate": 3.7089818993715638e-06, "loss": 0.1044, "step": 47204 }, { "epoch": 0.8419541254949524, "grad_norm": 0.25596579909324646, "learning_rate": 3.708166134522381e-06, "loss": 0.0917, "step": 47205 }, { "epoch": 0.8419719616166661, "grad_norm": 0.5513489246368408, "learning_rate": 3.7073504522077028e-06, "loss": 0.1379, "step": 47206 }, { "epoch": 0.8419897977383798, "grad_norm": 0.2549701929092407, "learning_rate": 3.7065348524306902e-06, "loss": 0.0674, "step": 47207 }, { "epoch": 0.8420076338600935, "grad_norm": 0.6302520036697388, "learning_rate": 3.705719335194502e-06, "loss": 0.1477, "step": 47208 }, { "epoch": 0.8420254699818072, "grad_norm": 0.2891847491264343, "learning_rate": 3.704903900502296e-06, "loss": 0.0834, "step": 47209 }, { "epoch": 0.8420433061035208, "grad_norm": 0.19554609060287476, "learning_rate": 3.704088548357243e-06, "loss": 0.0636, "step": 47210 }, { "epoch": 0.8420611422252345, "grad_norm": 0.3935930132865906, "learning_rate": 3.703273278762498e-06, "loss": 0.1661, "step": 47211 }, { "epoch": 0.8420789783469482, "grad_norm": 0.35310694575309753, "learning_rate": 3.7024580917212225e-06, "loss": 0.1535, "step": 47212 }, { "epoch": 0.8420968144686619, "grad_norm": 0.32745763659477234, "learning_rate": 3.701642987236567e-06, "loss": 0.0668, "step": 47213 }, { "epoch": 0.8421146505903756, "grad_norm": 0.2037838250398636, "learning_rate": 3.7008279653117093e-06, "loss": 0.0848, "step": 47214 }, { "epoch": 0.8421324867120893, "grad_norm": 0.24979925155639648, "learning_rate": 3.7000130259497944e-06, "loss": 0.1398, "step": 47215 }, { "epoch": 0.842150322833803, "grad_norm": 0.3978791832923889, "learning_rate": 3.6991981691539835e-06, "loss": 0.1498, "step": 47216 }, { "epoch": 0.8421681589555167, "grad_norm": 0.23118583858013153, "learning_rate": 3.6983833949274407e-06, "loss": 0.1009, "step": 47217 }, { "epoch": 0.8421859950772304, "grad_norm": 0.22183813154697418, "learning_rate": 3.6975687032733185e-06, "loss": 0.0854, "step": 47218 }, { "epoch": 0.8422038311989442, "grad_norm": 0.3228885531425476, "learning_rate": 3.696754094194782e-06, "loss": 0.1048, "step": 47219 }, { "epoch": 0.8422216673206578, "grad_norm": 0.2788088619709015, "learning_rate": 3.695939567694984e-06, "loss": 0.1152, "step": 47220 }, { "epoch": 0.8422395034423715, "grad_norm": 0.2885431945323944, "learning_rate": 3.695125123777082e-06, "loss": 0.0798, "step": 47221 }, { "epoch": 0.8422573395640852, "grad_norm": 0.22045758366584778, "learning_rate": 3.694310762444228e-06, "loss": 0.0895, "step": 47222 }, { "epoch": 0.8422751756857989, "grad_norm": 0.2593652606010437, "learning_rate": 3.6934964836995907e-06, "loss": 0.084, "step": 47223 }, { "epoch": 0.8422930118075126, "grad_norm": 0.2291940152645111, "learning_rate": 3.69268228754632e-06, "loss": 0.132, "step": 47224 }, { "epoch": 0.8423108479292263, "grad_norm": 0.38843873143196106, "learning_rate": 3.69186817398757e-06, "loss": 0.0973, "step": 47225 }, { "epoch": 0.84232868405094, "grad_norm": 0.2545664310455322, "learning_rate": 3.6910541430264955e-06, "loss": 0.1159, "step": 47226 }, { "epoch": 0.8423465201726537, "grad_norm": 0.23164300620555878, "learning_rate": 3.6902401946662585e-06, "loss": 0.0816, "step": 47227 }, { "epoch": 0.8423643562943673, "grad_norm": 0.22125941514968872, "learning_rate": 3.6894263289100115e-06, "loss": 0.0718, "step": 47228 }, { "epoch": 0.842382192416081, "grad_norm": 0.3010319471359253, "learning_rate": 3.6886125457609056e-06, "loss": 0.1211, "step": 47229 }, { "epoch": 0.8424000285377947, "grad_norm": 0.2706752419471741, "learning_rate": 3.6877988452221014e-06, "loss": 0.1108, "step": 47230 }, { "epoch": 0.8424178646595084, "grad_norm": 0.27833816409111023, "learning_rate": 3.6869852272967435e-06, "loss": 0.1409, "step": 47231 }, { "epoch": 0.8424357007812221, "grad_norm": 0.241594597697258, "learning_rate": 3.686171691987997e-06, "loss": 0.1379, "step": 47232 }, { "epoch": 0.8424535369029358, "grad_norm": 0.3220077455043793, "learning_rate": 3.6853582392990138e-06, "loss": 0.095, "step": 47233 }, { "epoch": 0.8424713730246495, "grad_norm": 0.3200768232345581, "learning_rate": 3.684544869232942e-06, "loss": 0.1218, "step": 47234 }, { "epoch": 0.8424892091463632, "grad_norm": 0.20809081196784973, "learning_rate": 3.6837315817929318e-06, "loss": 0.0845, "step": 47235 }, { "epoch": 0.842507045268077, "grad_norm": 0.2517867982387543, "learning_rate": 3.6829183769821445e-06, "loss": 0.1304, "step": 47236 }, { "epoch": 0.8425248813897906, "grad_norm": 0.332599014043808, "learning_rate": 3.6821052548037276e-06, "loss": 0.0906, "step": 47237 }, { "epoch": 0.8425427175115043, "grad_norm": 0.2423558384180069, "learning_rate": 3.681292215260837e-06, "loss": 0.1135, "step": 47238 }, { "epoch": 0.842560553633218, "grad_norm": 0.18209552764892578, "learning_rate": 3.6804792583566116e-06, "loss": 0.1141, "step": 47239 }, { "epoch": 0.8425783897549317, "grad_norm": 0.21611355245113373, "learning_rate": 3.6796663840942215e-06, "loss": 0.0859, "step": 47240 }, { "epoch": 0.8425962258766454, "grad_norm": 0.34535863995552063, "learning_rate": 3.6788535924768053e-06, "loss": 0.2071, "step": 47241 }, { "epoch": 0.8426140619983591, "grad_norm": 0.21731248497962952, "learning_rate": 3.6780408835075192e-06, "loss": 0.117, "step": 47242 }, { "epoch": 0.8426318981200728, "grad_norm": 0.28919926285743713, "learning_rate": 3.677228257189502e-06, "loss": 0.0837, "step": 47243 }, { "epoch": 0.8426497342417865, "grad_norm": 0.20098577439785004, "learning_rate": 3.6764157135259213e-06, "loss": 0.1228, "step": 47244 }, { "epoch": 0.8426675703635002, "grad_norm": 0.24654437601566315, "learning_rate": 3.67560325251991e-06, "loss": 0.1268, "step": 47245 }, { "epoch": 0.8426854064852138, "grad_norm": 0.24432142078876495, "learning_rate": 3.674790874174636e-06, "loss": 0.1164, "step": 47246 }, { "epoch": 0.8427032426069275, "grad_norm": 0.2780130207538605, "learning_rate": 3.673978578493234e-06, "loss": 0.0699, "step": 47247 }, { "epoch": 0.8427210787286412, "grad_norm": 0.2677513062953949, "learning_rate": 3.67316636547885e-06, "loss": 0.1012, "step": 47248 }, { "epoch": 0.8427389148503549, "grad_norm": 0.23376131057739258, "learning_rate": 3.672354235134648e-06, "loss": 0.068, "step": 47249 }, { "epoch": 0.8427567509720686, "grad_norm": 0.24621011316776276, "learning_rate": 3.671542187463767e-06, "loss": 0.1482, "step": 47250 }, { "epoch": 0.8427745870937823, "grad_norm": 0.18394404649734497, "learning_rate": 3.670730222469354e-06, "loss": 0.0717, "step": 47251 }, { "epoch": 0.842792423215496, "grad_norm": 0.3231929838657379, "learning_rate": 3.6699183401545516e-06, "loss": 0.1223, "step": 47252 }, { "epoch": 0.8428102593372098, "grad_norm": 0.25396332144737244, "learning_rate": 3.6691065405225212e-06, "loss": 0.0893, "step": 47253 }, { "epoch": 0.8428280954589235, "grad_norm": 0.265811562538147, "learning_rate": 3.6682948235763988e-06, "loss": 0.0967, "step": 47254 }, { "epoch": 0.8428459315806371, "grad_norm": 0.3177051842212677, "learning_rate": 3.6674831893193347e-06, "loss": 0.1091, "step": 47255 }, { "epoch": 0.8428637677023508, "grad_norm": 0.25777530670166016, "learning_rate": 3.6666716377544653e-06, "loss": 0.1545, "step": 47256 }, { "epoch": 0.8428816038240645, "grad_norm": 0.288001149892807, "learning_rate": 3.665860168884955e-06, "loss": 0.0827, "step": 47257 }, { "epoch": 0.8428994399457782, "grad_norm": 0.30498483777046204, "learning_rate": 3.6650487827139312e-06, "loss": 0.158, "step": 47258 }, { "epoch": 0.8429172760674919, "grad_norm": 0.31337258219718933, "learning_rate": 3.6642374792445504e-06, "loss": 0.1595, "step": 47259 }, { "epoch": 0.8429351121892056, "grad_norm": 0.2762129008769989, "learning_rate": 3.6634262584799574e-06, "loss": 0.0865, "step": 47260 }, { "epoch": 0.8429529483109193, "grad_norm": 0.254638135433197, "learning_rate": 3.662615120423288e-06, "loss": 0.0836, "step": 47261 }, { "epoch": 0.842970784432633, "grad_norm": 0.2897399663925171, "learning_rate": 3.661804065077698e-06, "loss": 0.1213, "step": 47262 }, { "epoch": 0.8429886205543466, "grad_norm": 0.2596624791622162, "learning_rate": 3.6609930924463242e-06, "loss": 0.1179, "step": 47263 }, { "epoch": 0.8430064566760603, "grad_norm": 0.27492207288742065, "learning_rate": 3.6601822025323112e-06, "loss": 0.0906, "step": 47264 }, { "epoch": 0.843024292797774, "grad_norm": 0.29992106556892395, "learning_rate": 3.659371395338798e-06, "loss": 0.1697, "step": 47265 }, { "epoch": 0.8430421289194877, "grad_norm": 0.2744263708591461, "learning_rate": 3.658560670868938e-06, "loss": 0.0537, "step": 47266 }, { "epoch": 0.8430599650412014, "grad_norm": 0.26597514748573303, "learning_rate": 3.6577500291258672e-06, "loss": 0.07, "step": 47267 }, { "epoch": 0.8430778011629151, "grad_norm": 0.28149116039276123, "learning_rate": 3.6569394701127273e-06, "loss": 0.0714, "step": 47268 }, { "epoch": 0.8430956372846289, "grad_norm": 0.29164019227027893, "learning_rate": 3.6561289938326635e-06, "loss": 0.1425, "step": 47269 }, { "epoch": 0.8431134734063426, "grad_norm": 0.2393653690814972, "learning_rate": 3.6553186002888095e-06, "loss": 0.0994, "step": 47270 }, { "epoch": 0.8431313095280563, "grad_norm": 0.3910019099712372, "learning_rate": 3.654508289484318e-06, "loss": 0.1224, "step": 47271 }, { "epoch": 0.84314914564977, "grad_norm": 0.3971771001815796, "learning_rate": 3.65369806142232e-06, "loss": 0.0783, "step": 47272 }, { "epoch": 0.8431669817714836, "grad_norm": 0.26440882682800293, "learning_rate": 3.6528879161059655e-06, "loss": 0.1391, "step": 47273 }, { "epoch": 0.8431848178931973, "grad_norm": 0.34841808676719666, "learning_rate": 3.6520778535383853e-06, "loss": 0.1167, "step": 47274 }, { "epoch": 0.843202654014911, "grad_norm": 0.3352072238922119, "learning_rate": 3.6512678737227304e-06, "loss": 0.1141, "step": 47275 }, { "epoch": 0.8432204901366247, "grad_norm": 0.29006505012512207, "learning_rate": 3.650457976662136e-06, "loss": 0.1092, "step": 47276 }, { "epoch": 0.8432383262583384, "grad_norm": 0.2419443279504776, "learning_rate": 3.6496481623597393e-06, "loss": 0.0855, "step": 47277 }, { "epoch": 0.8432561623800521, "grad_norm": 0.30216437578201294, "learning_rate": 3.6488384308186743e-06, "loss": 0.1055, "step": 47278 }, { "epoch": 0.8432739985017658, "grad_norm": 0.23101922869682312, "learning_rate": 3.648028782042093e-06, "loss": 0.1323, "step": 47279 }, { "epoch": 0.8432918346234795, "grad_norm": 0.39689844846725464, "learning_rate": 3.6472192160331264e-06, "loss": 0.1208, "step": 47280 }, { "epoch": 0.8433096707451931, "grad_norm": 0.21119514107704163, "learning_rate": 3.6464097327949142e-06, "loss": 0.0729, "step": 47281 }, { "epoch": 0.8433275068669068, "grad_norm": 0.2485380619764328, "learning_rate": 3.64560033233059e-06, "loss": 0.0981, "step": 47282 }, { "epoch": 0.8433453429886205, "grad_norm": 0.21566466987133026, "learning_rate": 3.6447910146432923e-06, "loss": 0.1072, "step": 47283 }, { "epoch": 0.8433631791103342, "grad_norm": 0.20807580649852753, "learning_rate": 3.643981779736164e-06, "loss": 0.0643, "step": 47284 }, { "epoch": 0.8433810152320479, "grad_norm": 0.28790777921676636, "learning_rate": 3.6431726276123345e-06, "loss": 0.128, "step": 47285 }, { "epoch": 0.8433988513537617, "grad_norm": 0.2902168333530426, "learning_rate": 3.642363558274947e-06, "loss": 0.1373, "step": 47286 }, { "epoch": 0.8434166874754754, "grad_norm": 0.24145163595676422, "learning_rate": 3.641554571727132e-06, "loss": 0.0932, "step": 47287 }, { "epoch": 0.8434345235971891, "grad_norm": 0.42242881655693054, "learning_rate": 3.640745667972034e-06, "loss": 0.1574, "step": 47288 }, { "epoch": 0.8434523597189028, "grad_norm": 0.3094054162502289, "learning_rate": 3.6399368470127813e-06, "loss": 0.1037, "step": 47289 }, { "epoch": 0.8434701958406164, "grad_norm": 0.2627965211868286, "learning_rate": 3.639128108852513e-06, "loss": 0.1186, "step": 47290 }, { "epoch": 0.8434880319623301, "grad_norm": 0.2837042212486267, "learning_rate": 3.6383194534943544e-06, "loss": 0.1556, "step": 47291 }, { "epoch": 0.8435058680840438, "grad_norm": 0.22625494003295898, "learning_rate": 3.6375108809414555e-06, "loss": 0.109, "step": 47292 }, { "epoch": 0.8435237042057575, "grad_norm": 0.20810389518737793, "learning_rate": 3.6367023911969417e-06, "loss": 0.0677, "step": 47293 }, { "epoch": 0.8435415403274712, "grad_norm": 0.2682685852050781, "learning_rate": 3.635893984263947e-06, "loss": 0.1439, "step": 47294 }, { "epoch": 0.8435593764491849, "grad_norm": 0.25742673873901367, "learning_rate": 3.635085660145607e-06, "loss": 0.1165, "step": 47295 }, { "epoch": 0.8435772125708986, "grad_norm": 0.37175577878952026, "learning_rate": 3.63427741884505e-06, "loss": 0.0821, "step": 47296 }, { "epoch": 0.8435950486926123, "grad_norm": 0.22788120806217194, "learning_rate": 3.6334692603654184e-06, "loss": 0.1169, "step": 47297 }, { "epoch": 0.843612884814326, "grad_norm": 0.2793791592121124, "learning_rate": 3.6326611847098375e-06, "loss": 0.1012, "step": 47298 }, { "epoch": 0.8436307209360396, "grad_norm": 0.3146321773529053, "learning_rate": 3.6318531918814373e-06, "loss": 0.1108, "step": 47299 }, { "epoch": 0.8436485570577533, "grad_norm": 0.29969334602355957, "learning_rate": 3.6310452818833553e-06, "loss": 0.1067, "step": 47300 }, { "epoch": 0.843666393179467, "grad_norm": 0.26239240169525146, "learning_rate": 3.6302374547187266e-06, "loss": 0.151, "step": 47301 }, { "epoch": 0.8436842293011807, "grad_norm": 0.2955058515071869, "learning_rate": 3.6294297103906803e-06, "loss": 0.0925, "step": 47302 }, { "epoch": 0.8437020654228945, "grad_norm": 0.2752370536327362, "learning_rate": 3.6286220489023444e-06, "loss": 0.0749, "step": 47303 }, { "epoch": 0.8437199015446082, "grad_norm": 0.2915973365306854, "learning_rate": 3.6278144702568462e-06, "loss": 0.1361, "step": 47304 }, { "epoch": 0.8437377376663219, "grad_norm": 0.26207631826400757, "learning_rate": 3.6270069744573252e-06, "loss": 0.0861, "step": 47305 }, { "epoch": 0.8437555737880356, "grad_norm": 0.2538292407989502, "learning_rate": 3.6261995615069074e-06, "loss": 0.1599, "step": 47306 }, { "epoch": 0.8437734099097493, "grad_norm": 0.27881550788879395, "learning_rate": 3.6253922314087225e-06, "loss": 0.0918, "step": 47307 }, { "epoch": 0.8437912460314629, "grad_norm": 0.32826316356658936, "learning_rate": 3.624584984165899e-06, "loss": 0.1032, "step": 47308 }, { "epoch": 0.8438090821531766, "grad_norm": 0.35892581939697266, "learning_rate": 3.6237778197815624e-06, "loss": 0.1221, "step": 47309 }, { "epoch": 0.8438269182748903, "grad_norm": 0.27105948328971863, "learning_rate": 3.622970738258852e-06, "loss": 0.1215, "step": 47310 }, { "epoch": 0.843844754396604, "grad_norm": 0.2932094931602478, "learning_rate": 3.6221637396008895e-06, "loss": 0.1253, "step": 47311 }, { "epoch": 0.8438625905183177, "grad_norm": 0.22844460606575012, "learning_rate": 3.621356823810798e-06, "loss": 0.0827, "step": 47312 }, { "epoch": 0.8438804266400314, "grad_norm": 0.2548949122428894, "learning_rate": 3.6205499908917196e-06, "loss": 0.0986, "step": 47313 }, { "epoch": 0.8438982627617451, "grad_norm": 0.24343626201152802, "learning_rate": 3.6197432408467653e-06, "loss": 0.0956, "step": 47314 }, { "epoch": 0.8439160988834588, "grad_norm": 0.3302106559276581, "learning_rate": 3.618936573679077e-06, "loss": 0.1021, "step": 47315 }, { "epoch": 0.8439339350051724, "grad_norm": 0.2927775979042053, "learning_rate": 3.6181299893917777e-06, "loss": 0.1189, "step": 47316 }, { "epoch": 0.8439517711268861, "grad_norm": 0.4156448543071747, "learning_rate": 3.6173234879879837e-06, "loss": 0.1202, "step": 47317 }, { "epoch": 0.8439696072485998, "grad_norm": 0.2190490961074829, "learning_rate": 3.6165170694708343e-06, "loss": 0.1087, "step": 47318 }, { "epoch": 0.8439874433703135, "grad_norm": 0.42909204959869385, "learning_rate": 3.61571073384345e-06, "loss": 0.059, "step": 47319 }, { "epoch": 0.8440052794920273, "grad_norm": 0.2620382606983185, "learning_rate": 3.6149044811089576e-06, "loss": 0.1272, "step": 47320 }, { "epoch": 0.844023115613741, "grad_norm": 0.3202836513519287, "learning_rate": 3.6140983112704836e-06, "loss": 0.0863, "step": 47321 }, { "epoch": 0.8440409517354547, "grad_norm": 0.24201911687850952, "learning_rate": 3.613292224331144e-06, "loss": 0.1144, "step": 47322 }, { "epoch": 0.8440587878571684, "grad_norm": 0.31293395161628723, "learning_rate": 3.6124862202940756e-06, "loss": 0.1436, "step": 47323 }, { "epoch": 0.8440766239788821, "grad_norm": 0.2554352581501007, "learning_rate": 3.611680299162398e-06, "loss": 0.0687, "step": 47324 }, { "epoch": 0.8440944601005957, "grad_norm": 0.2146880179643631, "learning_rate": 3.6108744609392343e-06, "loss": 0.1078, "step": 47325 }, { "epoch": 0.8441122962223094, "grad_norm": 0.2741975486278534, "learning_rate": 3.6100687056277033e-06, "loss": 0.1299, "step": 47326 }, { "epoch": 0.8441301323440231, "grad_norm": 0.3124517500400543, "learning_rate": 3.6092630332309392e-06, "loss": 0.1704, "step": 47327 }, { "epoch": 0.8441479684657368, "grad_norm": 0.22870393097400665, "learning_rate": 3.6084574437520557e-06, "loss": 0.0664, "step": 47328 }, { "epoch": 0.8441658045874505, "grad_norm": 0.3728640079498291, "learning_rate": 3.6076519371941842e-06, "loss": 0.1805, "step": 47329 }, { "epoch": 0.8441836407091642, "grad_norm": 0.3032281696796417, "learning_rate": 3.6068465135604355e-06, "loss": 0.0802, "step": 47330 }, { "epoch": 0.8442014768308779, "grad_norm": 0.3359457552433014, "learning_rate": 3.6060411728539466e-06, "loss": 0.1278, "step": 47331 }, { "epoch": 0.8442193129525916, "grad_norm": 0.3064267039299011, "learning_rate": 3.6052359150778285e-06, "loss": 0.1469, "step": 47332 }, { "epoch": 0.8442371490743052, "grad_norm": 0.23197786509990692, "learning_rate": 3.6044307402352095e-06, "loss": 0.0958, "step": 47333 }, { "epoch": 0.8442549851960189, "grad_norm": 0.2805977165699005, "learning_rate": 3.6036256483292036e-06, "loss": 0.0858, "step": 47334 }, { "epoch": 0.8442728213177326, "grad_norm": 0.29823020100593567, "learning_rate": 3.6028206393629304e-06, "loss": 0.1231, "step": 47335 }, { "epoch": 0.8442906574394463, "grad_norm": 0.22281208634376526, "learning_rate": 3.6020157133395185e-06, "loss": 0.1185, "step": 47336 }, { "epoch": 0.8443084935611601, "grad_norm": 0.2655372619628906, "learning_rate": 3.6012108702620866e-06, "loss": 0.1016, "step": 47337 }, { "epoch": 0.8443263296828738, "grad_norm": 0.2519085705280304, "learning_rate": 3.6004061101337527e-06, "loss": 0.109, "step": 47338 }, { "epoch": 0.8443441658045875, "grad_norm": 0.24964651465415955, "learning_rate": 3.599601432957628e-06, "loss": 0.125, "step": 47339 }, { "epoch": 0.8443620019263012, "grad_norm": 0.24692180752754211, "learning_rate": 3.5987968387368453e-06, "loss": 0.0633, "step": 47340 }, { "epoch": 0.8443798380480149, "grad_norm": 0.2330651432275772, "learning_rate": 3.597992327474514e-06, "loss": 0.1086, "step": 47341 }, { "epoch": 0.8443976741697286, "grad_norm": 0.24702002108097076, "learning_rate": 3.5971878991737623e-06, "loss": 0.1098, "step": 47342 }, { "epoch": 0.8444155102914422, "grad_norm": 0.4640333652496338, "learning_rate": 3.5963835538376956e-06, "loss": 0.1358, "step": 47343 }, { "epoch": 0.8444333464131559, "grad_norm": 0.30912286043167114, "learning_rate": 3.5955792914694446e-06, "loss": 0.1519, "step": 47344 }, { "epoch": 0.8444511825348696, "grad_norm": 0.28649622201919556, "learning_rate": 3.5947751120721213e-06, "loss": 0.1731, "step": 47345 }, { "epoch": 0.8444690186565833, "grad_norm": 0.29214009642601013, "learning_rate": 3.5939710156488423e-06, "loss": 0.0992, "step": 47346 }, { "epoch": 0.844486854778297, "grad_norm": 0.2939797043800354, "learning_rate": 3.5931670022027243e-06, "loss": 0.1076, "step": 47347 }, { "epoch": 0.8445046909000107, "grad_norm": 0.28265005350112915, "learning_rate": 3.5923630717368815e-06, "loss": 0.1495, "step": 47348 }, { "epoch": 0.8445225270217244, "grad_norm": 0.21351316571235657, "learning_rate": 3.591559224254437e-06, "loss": 0.0875, "step": 47349 }, { "epoch": 0.844540363143438, "grad_norm": 0.2238522619009018, "learning_rate": 3.5907554597585048e-06, "loss": 0.1065, "step": 47350 }, { "epoch": 0.8445581992651517, "grad_norm": 0.21403296291828156, "learning_rate": 3.5899517782521987e-06, "loss": 0.0839, "step": 47351 }, { "epoch": 0.8445760353868654, "grad_norm": 0.3860260844230652, "learning_rate": 3.589148179738627e-06, "loss": 0.1393, "step": 47352 }, { "epoch": 0.8445938715085791, "grad_norm": 0.3730800747871399, "learning_rate": 3.588344664220919e-06, "loss": 0.1159, "step": 47353 }, { "epoch": 0.8446117076302929, "grad_norm": 0.27137458324432373, "learning_rate": 3.5875412317021824e-06, "loss": 0.1235, "step": 47354 }, { "epoch": 0.8446295437520066, "grad_norm": 0.2496897280216217, "learning_rate": 3.586737882185526e-06, "loss": 0.0575, "step": 47355 }, { "epoch": 0.8446473798737203, "grad_norm": 0.29221904277801514, "learning_rate": 3.5859346156740694e-06, "loss": 0.1031, "step": 47356 }, { "epoch": 0.844665215995434, "grad_norm": 0.33788564801216125, "learning_rate": 3.5851314321709294e-06, "loss": 0.1222, "step": 47357 }, { "epoch": 0.8446830521171477, "grad_norm": 0.14239247143268585, "learning_rate": 3.584328331679221e-06, "loss": 0.0396, "step": 47358 }, { "epoch": 0.8447008882388614, "grad_norm": 0.20731514692306519, "learning_rate": 3.5835253142020493e-06, "loss": 0.137, "step": 47359 }, { "epoch": 0.844718724360575, "grad_norm": 0.2822842597961426, "learning_rate": 3.5827223797425315e-06, "loss": 0.0812, "step": 47360 }, { "epoch": 0.8447365604822887, "grad_norm": 0.3031470775604248, "learning_rate": 3.581919528303773e-06, "loss": 0.1061, "step": 47361 }, { "epoch": 0.8447543966040024, "grad_norm": 0.2415207028388977, "learning_rate": 3.5811167598889e-06, "loss": 0.1291, "step": 47362 }, { "epoch": 0.8447722327257161, "grad_norm": 0.28498753905296326, "learning_rate": 3.5803140745010148e-06, "loss": 0.0835, "step": 47363 }, { "epoch": 0.8447900688474298, "grad_norm": 0.28839969635009766, "learning_rate": 3.5795114721432293e-06, "loss": 0.0994, "step": 47364 }, { "epoch": 0.8448079049691435, "grad_norm": 0.26176631450653076, "learning_rate": 3.578708952818652e-06, "loss": 0.1081, "step": 47365 }, { "epoch": 0.8448257410908572, "grad_norm": 0.2957668900489807, "learning_rate": 3.5779065165304023e-06, "loss": 0.1612, "step": 47366 }, { "epoch": 0.8448435772125709, "grad_norm": 0.2040109932422638, "learning_rate": 3.577104163281586e-06, "loss": 0.1232, "step": 47367 }, { "epoch": 0.8448614133342846, "grad_norm": 0.2923802137374878, "learning_rate": 3.5763018930753067e-06, "loss": 0.107, "step": 47368 }, { "epoch": 0.8448792494559982, "grad_norm": 0.23593628406524658, "learning_rate": 3.5754997059146867e-06, "loss": 0.1288, "step": 47369 }, { "epoch": 0.844897085577712, "grad_norm": 0.2378631979227066, "learning_rate": 3.5746976018028233e-06, "loss": 0.0929, "step": 47370 }, { "epoch": 0.8449149216994257, "grad_norm": 0.27903860807418823, "learning_rate": 3.5738955807428393e-06, "loss": 0.1091, "step": 47371 }, { "epoch": 0.8449327578211394, "grad_norm": 0.26834574341773987, "learning_rate": 3.573093642737835e-06, "loss": 0.0719, "step": 47372 }, { "epoch": 0.8449505939428531, "grad_norm": 0.3107265830039978, "learning_rate": 3.5722917877909213e-06, "loss": 0.1485, "step": 47373 }, { "epoch": 0.8449684300645668, "grad_norm": 0.24711358547210693, "learning_rate": 3.571490015905199e-06, "loss": 0.1127, "step": 47374 }, { "epoch": 0.8449862661862805, "grad_norm": 0.28482112288475037, "learning_rate": 3.5706883270837903e-06, "loss": 0.118, "step": 47375 }, { "epoch": 0.8450041023079942, "grad_norm": 0.4752761125564575, "learning_rate": 3.569886721329793e-06, "loss": 0.0893, "step": 47376 }, { "epoch": 0.8450219384297079, "grad_norm": 0.27395132184028625, "learning_rate": 3.569085198646316e-06, "loss": 0.1118, "step": 47377 }, { "epoch": 0.8450397745514215, "grad_norm": 0.3193538784980774, "learning_rate": 3.568283759036464e-06, "loss": 0.1463, "step": 47378 }, { "epoch": 0.8450576106731352, "grad_norm": 0.3200143575668335, "learning_rate": 3.56748240250335e-06, "loss": 0.1255, "step": 47379 }, { "epoch": 0.8450754467948489, "grad_norm": 0.23412470519542694, "learning_rate": 3.566681129050076e-06, "loss": 0.1276, "step": 47380 }, { "epoch": 0.8450932829165626, "grad_norm": 0.25210949778556824, "learning_rate": 3.56587993867975e-06, "loss": 0.0611, "step": 47381 }, { "epoch": 0.8451111190382763, "grad_norm": 0.31062883138656616, "learning_rate": 3.5650788313954716e-06, "loss": 0.1343, "step": 47382 }, { "epoch": 0.84512895515999, "grad_norm": 0.20535065233707428, "learning_rate": 3.5642778072003504e-06, "loss": 0.1134, "step": 47383 }, { "epoch": 0.8451467912817037, "grad_norm": 0.19917286932468414, "learning_rate": 3.5634768660974956e-06, "loss": 0.061, "step": 47384 }, { "epoch": 0.8451646274034174, "grad_norm": 0.2603689730167389, "learning_rate": 3.56267600809001e-06, "loss": 0.0836, "step": 47385 }, { "epoch": 0.845182463525131, "grad_norm": 0.26594775915145874, "learning_rate": 3.5618752331809974e-06, "loss": 0.0993, "step": 47386 }, { "epoch": 0.8452002996468448, "grad_norm": 0.3551575243473053, "learning_rate": 3.5610745413735546e-06, "loss": 0.1809, "step": 47387 }, { "epoch": 0.8452181357685585, "grad_norm": 0.2946797013282776, "learning_rate": 3.5602739326707962e-06, "loss": 0.1626, "step": 47388 }, { "epoch": 0.8452359718902722, "grad_norm": 0.30070677399635315, "learning_rate": 3.5594734070758223e-06, "loss": 0.0839, "step": 47389 }, { "epoch": 0.8452538080119859, "grad_norm": 0.25644591450691223, "learning_rate": 3.558672964591736e-06, "loss": 0.1137, "step": 47390 }, { "epoch": 0.8452716441336996, "grad_norm": 0.43660518527030945, "learning_rate": 3.557872605221632e-06, "loss": 0.1705, "step": 47391 }, { "epoch": 0.8452894802554133, "grad_norm": 0.1884179711341858, "learning_rate": 3.5570723289686247e-06, "loss": 0.1029, "step": 47392 }, { "epoch": 0.845307316377127, "grad_norm": 0.3108867108821869, "learning_rate": 3.5562721358358114e-06, "loss": 0.1132, "step": 47393 }, { "epoch": 0.8453251524988407, "grad_norm": 0.30239301919937134, "learning_rate": 3.555472025826295e-06, "loss": 0.1641, "step": 47394 }, { "epoch": 0.8453429886205543, "grad_norm": 0.33123305439949036, "learning_rate": 3.554671998943168e-06, "loss": 0.0976, "step": 47395 }, { "epoch": 0.845360824742268, "grad_norm": 0.28957194089889526, "learning_rate": 3.5538720551895473e-06, "loss": 0.0864, "step": 47396 }, { "epoch": 0.8453786608639817, "grad_norm": 0.22637823224067688, "learning_rate": 3.5530721945685184e-06, "loss": 0.1022, "step": 47397 }, { "epoch": 0.8453964969856954, "grad_norm": 0.22780421376228333, "learning_rate": 3.552272417083194e-06, "loss": 0.0693, "step": 47398 }, { "epoch": 0.8454143331074091, "grad_norm": 0.3114381432533264, "learning_rate": 3.551472722736665e-06, "loss": 0.1105, "step": 47399 }, { "epoch": 0.8454321692291228, "grad_norm": 0.2825041711330414, "learning_rate": 3.5506731115320406e-06, "loss": 0.1362, "step": 47400 }, { "epoch": 0.8454500053508365, "grad_norm": 0.24676674604415894, "learning_rate": 3.5498735834724183e-06, "loss": 0.0988, "step": 47401 }, { "epoch": 0.8454678414725502, "grad_norm": 0.2726946175098419, "learning_rate": 3.549074138560893e-06, "loss": 0.107, "step": 47402 }, { "epoch": 0.8454856775942639, "grad_norm": 0.4542251527309418, "learning_rate": 3.5482747768005643e-06, "loss": 0.1515, "step": 47403 }, { "epoch": 0.8455035137159777, "grad_norm": 0.31879186630249023, "learning_rate": 3.5474754981945272e-06, "loss": 0.0973, "step": 47404 }, { "epoch": 0.8455213498376913, "grad_norm": 0.35587841272354126, "learning_rate": 3.5466763027458884e-06, "loss": 0.1494, "step": 47405 }, { "epoch": 0.845539185959405, "grad_norm": 0.2953929901123047, "learning_rate": 3.545877190457744e-06, "loss": 0.1112, "step": 47406 }, { "epoch": 0.8455570220811187, "grad_norm": 0.3713397979736328, "learning_rate": 3.54507816133319e-06, "loss": 0.1583, "step": 47407 }, { "epoch": 0.8455748582028324, "grad_norm": 0.2765306532382965, "learning_rate": 3.544279215375315e-06, "loss": 0.1225, "step": 47408 }, { "epoch": 0.8455926943245461, "grad_norm": 0.26851508021354675, "learning_rate": 3.5434803525872335e-06, "loss": 0.0814, "step": 47409 }, { "epoch": 0.8456105304462598, "grad_norm": 0.20504872500896454, "learning_rate": 3.542681572972026e-06, "loss": 0.0594, "step": 47410 }, { "epoch": 0.8456283665679735, "grad_norm": 0.3291880190372467, "learning_rate": 3.541882876532801e-06, "loss": 0.0756, "step": 47411 }, { "epoch": 0.8456462026896872, "grad_norm": 0.19813303649425507, "learning_rate": 3.5410842632726426e-06, "loss": 0.0903, "step": 47412 }, { "epoch": 0.8456640388114008, "grad_norm": 0.29659345746040344, "learning_rate": 3.540285733194662e-06, "loss": 0.1175, "step": 47413 }, { "epoch": 0.8456818749331145, "grad_norm": 0.25105470418930054, "learning_rate": 3.539487286301943e-06, "loss": 0.1092, "step": 47414 }, { "epoch": 0.8456997110548282, "grad_norm": 0.308626651763916, "learning_rate": 3.5386889225975855e-06, "loss": 0.1271, "step": 47415 }, { "epoch": 0.8457175471765419, "grad_norm": 0.19902539253234863, "learning_rate": 3.537890642084679e-06, "loss": 0.0701, "step": 47416 }, { "epoch": 0.8457353832982556, "grad_norm": 0.2225547879934311, "learning_rate": 3.5370924447663184e-06, "loss": 0.1215, "step": 47417 }, { "epoch": 0.8457532194199693, "grad_norm": 0.2490280419588089, "learning_rate": 3.5362943306456063e-06, "loss": 0.1084, "step": 47418 }, { "epoch": 0.845771055541683, "grad_norm": 0.35284045338630676, "learning_rate": 3.5354962997256295e-06, "loss": 0.0924, "step": 47419 }, { "epoch": 0.8457888916633967, "grad_norm": 0.2676253914833069, "learning_rate": 3.5346983520094827e-06, "loss": 0.0972, "step": 47420 }, { "epoch": 0.8458067277851105, "grad_norm": 0.2826889753341675, "learning_rate": 3.533900487500255e-06, "loss": 0.1009, "step": 47421 }, { "epoch": 0.8458245639068241, "grad_norm": 0.3616482615470886, "learning_rate": 3.533102706201047e-06, "loss": 0.1495, "step": 47422 }, { "epoch": 0.8458424000285378, "grad_norm": 0.3494814932346344, "learning_rate": 3.5323050081149475e-06, "loss": 0.1563, "step": 47423 }, { "epoch": 0.8458602361502515, "grad_norm": 0.26231318712234497, "learning_rate": 3.531507393245043e-06, "loss": 0.0961, "step": 47424 }, { "epoch": 0.8458780722719652, "grad_norm": 0.28493982553482056, "learning_rate": 3.5307098615944363e-06, "loss": 0.0861, "step": 47425 }, { "epoch": 0.8458959083936789, "grad_norm": 0.2542304992675781, "learning_rate": 3.5299124131662085e-06, "loss": 0.1067, "step": 47426 }, { "epoch": 0.8459137445153926, "grad_norm": 0.2479018270969391, "learning_rate": 3.5291150479634605e-06, "loss": 0.0851, "step": 47427 }, { "epoch": 0.8459315806371063, "grad_norm": 0.23945067822933197, "learning_rate": 3.528317765989278e-06, "loss": 0.124, "step": 47428 }, { "epoch": 0.84594941675882, "grad_norm": 0.2718231976032257, "learning_rate": 3.5275205672467533e-06, "loss": 0.112, "step": 47429 }, { "epoch": 0.8459672528805336, "grad_norm": 0.2712881565093994, "learning_rate": 3.52672345173897e-06, "loss": 0.1395, "step": 47430 }, { "epoch": 0.8459850890022473, "grad_norm": 0.26649418473243713, "learning_rate": 3.5259264194690278e-06, "loss": 0.1096, "step": 47431 }, { "epoch": 0.846002925123961, "grad_norm": 0.2700745761394501, "learning_rate": 3.525129470440011e-06, "loss": 0.0961, "step": 47432 }, { "epoch": 0.8460207612456747, "grad_norm": 0.3305078446865082, "learning_rate": 3.524332604655012e-06, "loss": 0.1, "step": 47433 }, { "epoch": 0.8460385973673884, "grad_norm": 0.24270285665988922, "learning_rate": 3.5235358221171078e-06, "loss": 0.1, "step": 47434 }, { "epoch": 0.8460564334891021, "grad_norm": 0.383864164352417, "learning_rate": 3.522739122829405e-06, "loss": 0.1013, "step": 47435 }, { "epoch": 0.8460742696108158, "grad_norm": 0.21216845512390137, "learning_rate": 3.521942506794984e-06, "loss": 0.1128, "step": 47436 }, { "epoch": 0.8460921057325295, "grad_norm": 0.23016326129436493, "learning_rate": 3.5211459740169235e-06, "loss": 0.0759, "step": 47437 }, { "epoch": 0.8461099418542433, "grad_norm": 0.3105771243572235, "learning_rate": 3.5203495244983286e-06, "loss": 0.0865, "step": 47438 }, { "epoch": 0.846127777975957, "grad_norm": 0.26920071244239807, "learning_rate": 3.51955315824227e-06, "loss": 0.1212, "step": 47439 }, { "epoch": 0.8461456140976706, "grad_norm": 0.21645879745483398, "learning_rate": 3.5187568752518496e-06, "loss": 0.092, "step": 47440 }, { "epoch": 0.8461634502193843, "grad_norm": 0.24848566949367523, "learning_rate": 3.5179606755301463e-06, "loss": 0.0917, "step": 47441 }, { "epoch": 0.846181286341098, "grad_norm": 0.2764410674571991, "learning_rate": 3.5171645590802465e-06, "loss": 0.0942, "step": 47442 }, { "epoch": 0.8461991224628117, "grad_norm": 0.28439468145370483, "learning_rate": 3.516368525905231e-06, "loss": 0.1564, "step": 47443 }, { "epoch": 0.8462169585845254, "grad_norm": 0.348168283700943, "learning_rate": 3.515572576008197e-06, "loss": 0.0812, "step": 47444 }, { "epoch": 0.8462347947062391, "grad_norm": 0.2516811490058899, "learning_rate": 3.5147767093922257e-06, "loss": 0.1298, "step": 47445 }, { "epoch": 0.8462526308279528, "grad_norm": 0.3369740843772888, "learning_rate": 3.513980926060398e-06, "loss": 0.1045, "step": 47446 }, { "epoch": 0.8462704669496665, "grad_norm": 0.2666391432285309, "learning_rate": 3.5131852260157972e-06, "loss": 0.1383, "step": 47447 }, { "epoch": 0.8462883030713801, "grad_norm": 0.2141275256872177, "learning_rate": 3.5123896092615184e-06, "loss": 0.0949, "step": 47448 }, { "epoch": 0.8463061391930938, "grad_norm": 0.26325419545173645, "learning_rate": 3.5115940758006394e-06, "loss": 0.1133, "step": 47449 }, { "epoch": 0.8463239753148075, "grad_norm": 0.23058579862117767, "learning_rate": 3.5107986256362415e-06, "loss": 0.1165, "step": 47450 }, { "epoch": 0.8463418114365212, "grad_norm": 0.2442142814397812, "learning_rate": 3.5100032587714048e-06, "loss": 0.1293, "step": 47451 }, { "epoch": 0.8463596475582349, "grad_norm": 0.24232317507266998, "learning_rate": 3.509207975209225e-06, "loss": 0.0895, "step": 47452 }, { "epoch": 0.8463774836799486, "grad_norm": 0.29069605469703674, "learning_rate": 3.508412774952771e-06, "loss": 0.1178, "step": 47453 }, { "epoch": 0.8463953198016623, "grad_norm": 0.2489462047815323, "learning_rate": 3.5076176580051385e-06, "loss": 0.1146, "step": 47454 }, { "epoch": 0.8464131559233761, "grad_norm": 0.27243614196777344, "learning_rate": 3.506822624369402e-06, "loss": 0.1372, "step": 47455 }, { "epoch": 0.8464309920450898, "grad_norm": 0.2851302921772003, "learning_rate": 3.50602767404864e-06, "loss": 0.1194, "step": 47456 }, { "epoch": 0.8464488281668034, "grad_norm": 0.40311288833618164, "learning_rate": 3.505232807045944e-06, "loss": 0.1388, "step": 47457 }, { "epoch": 0.8464666642885171, "grad_norm": 0.49015572667121887, "learning_rate": 3.504438023364387e-06, "loss": 0.1762, "step": 47458 }, { "epoch": 0.8464845004102308, "grad_norm": 0.31908130645751953, "learning_rate": 3.5036433230070553e-06, "loss": 0.0949, "step": 47459 }, { "epoch": 0.8465023365319445, "grad_norm": 0.29065850377082825, "learning_rate": 3.5028487059770217e-06, "loss": 0.0957, "step": 47460 }, { "epoch": 0.8465201726536582, "grad_norm": 0.27658164501190186, "learning_rate": 3.5020541722773746e-06, "loss": 0.0841, "step": 47461 }, { "epoch": 0.8465380087753719, "grad_norm": 0.30853772163391113, "learning_rate": 3.5012597219111933e-06, "loss": 0.1396, "step": 47462 }, { "epoch": 0.8465558448970856, "grad_norm": 0.29472896456718445, "learning_rate": 3.5004653548815547e-06, "loss": 0.1255, "step": 47463 }, { "epoch": 0.8465736810187993, "grad_norm": 0.2253725826740265, "learning_rate": 3.4996710711915293e-06, "loss": 0.1204, "step": 47464 }, { "epoch": 0.846591517140513, "grad_norm": 0.24394692480564117, "learning_rate": 3.4988768708442143e-06, "loss": 0.0891, "step": 47465 }, { "epoch": 0.8466093532622266, "grad_norm": 0.2033083587884903, "learning_rate": 3.4980827538426686e-06, "loss": 0.1318, "step": 47466 }, { "epoch": 0.8466271893839403, "grad_norm": 0.25815555453300476, "learning_rate": 3.4972887201899896e-06, "loss": 0.075, "step": 47467 }, { "epoch": 0.846645025505654, "grad_norm": 0.2514638304710388, "learning_rate": 3.496494769889247e-06, "loss": 0.0929, "step": 47468 }, { "epoch": 0.8466628616273677, "grad_norm": 0.4418571889400482, "learning_rate": 3.4957009029435108e-06, "loss": 0.0933, "step": 47469 }, { "epoch": 0.8466806977490814, "grad_norm": 0.2199210673570633, "learning_rate": 3.4949071193558698e-06, "loss": 0.1008, "step": 47470 }, { "epoch": 0.8466985338707952, "grad_norm": 0.33602094650268555, "learning_rate": 3.4941134191293968e-06, "loss": 0.1686, "step": 47471 }, { "epoch": 0.8467163699925089, "grad_norm": 0.22433800995349884, "learning_rate": 3.49331980226717e-06, "loss": 0.0765, "step": 47472 }, { "epoch": 0.8467342061142226, "grad_norm": 0.2530791759490967, "learning_rate": 3.492526268772256e-06, "loss": 0.0936, "step": 47473 }, { "epoch": 0.8467520422359363, "grad_norm": 0.3687301576137543, "learning_rate": 3.491732818647747e-06, "loss": 0.1595, "step": 47474 }, { "epoch": 0.8467698783576499, "grad_norm": 0.23837460577487946, "learning_rate": 3.4909394518967076e-06, "loss": 0.1027, "step": 47475 }, { "epoch": 0.8467877144793636, "grad_norm": 0.2569620609283447, "learning_rate": 3.4901461685222183e-06, "loss": 0.1236, "step": 47476 }, { "epoch": 0.8468055506010773, "grad_norm": 0.34468820691108704, "learning_rate": 3.489352968527343e-06, "loss": 0.1015, "step": 47477 }, { "epoch": 0.846823386722791, "grad_norm": 0.202947735786438, "learning_rate": 3.4885598519151713e-06, "loss": 0.0873, "step": 47478 }, { "epoch": 0.8468412228445047, "grad_norm": 0.3023819029331207, "learning_rate": 3.487766818688773e-06, "loss": 0.0683, "step": 47479 }, { "epoch": 0.8468590589662184, "grad_norm": 0.2978323698043823, "learning_rate": 3.486973868851215e-06, "loss": 0.1133, "step": 47480 }, { "epoch": 0.8468768950879321, "grad_norm": 0.19978269934654236, "learning_rate": 3.4861810024055806e-06, "loss": 0.1152, "step": 47481 }, { "epoch": 0.8468947312096458, "grad_norm": 0.2727530598640442, "learning_rate": 3.4853882193549346e-06, "loss": 0.113, "step": 47482 }, { "epoch": 0.8469125673313594, "grad_norm": 0.22486057877540588, "learning_rate": 3.4845955197023606e-06, "loss": 0.0565, "step": 47483 }, { "epoch": 0.8469304034530731, "grad_norm": 0.2608107030391693, "learning_rate": 3.4838029034509278e-06, "loss": 0.1, "step": 47484 }, { "epoch": 0.8469482395747868, "grad_norm": 0.24796272814273834, "learning_rate": 3.4830103706037038e-06, "loss": 0.1201, "step": 47485 }, { "epoch": 0.8469660756965005, "grad_norm": 0.22716866433620453, "learning_rate": 3.4822179211637577e-06, "loss": 0.1335, "step": 47486 }, { "epoch": 0.8469839118182142, "grad_norm": 0.2889477610588074, "learning_rate": 3.481425555134171e-06, "loss": 0.145, "step": 47487 }, { "epoch": 0.847001747939928, "grad_norm": 0.3446348309516907, "learning_rate": 3.4806332725180133e-06, "loss": 0.1498, "step": 47488 }, { "epoch": 0.8470195840616417, "grad_norm": 0.4340376555919647, "learning_rate": 3.4798410733183547e-06, "loss": 0.1223, "step": 47489 }, { "epoch": 0.8470374201833554, "grad_norm": 0.28792908787727356, "learning_rate": 3.4790489575382586e-06, "loss": 0.0703, "step": 47490 }, { "epoch": 0.8470552563050691, "grad_norm": 0.27349692583084106, "learning_rate": 3.478256925180806e-06, "loss": 0.1537, "step": 47491 }, { "epoch": 0.8470730924267827, "grad_norm": 0.2780435085296631, "learning_rate": 3.4774649762490617e-06, "loss": 0.0818, "step": 47492 }, { "epoch": 0.8470909285484964, "grad_norm": 0.2728854715824127, "learning_rate": 3.476673110746095e-06, "loss": 0.1346, "step": 47493 }, { "epoch": 0.8471087646702101, "grad_norm": 0.2203640341758728, "learning_rate": 3.475881328674982e-06, "loss": 0.1143, "step": 47494 }, { "epoch": 0.8471266007919238, "grad_norm": 0.24378696084022522, "learning_rate": 3.4750896300387804e-06, "loss": 0.0936, "step": 47495 }, { "epoch": 0.8471444369136375, "grad_norm": 0.4511106610298157, "learning_rate": 3.474298014840571e-06, "loss": 0.1096, "step": 47496 }, { "epoch": 0.8471622730353512, "grad_norm": 0.23231393098831177, "learning_rate": 3.4735064830834164e-06, "loss": 0.1398, "step": 47497 }, { "epoch": 0.8471801091570649, "grad_norm": 0.3215214014053345, "learning_rate": 3.472715034770388e-06, "loss": 0.0876, "step": 47498 }, { "epoch": 0.8471979452787786, "grad_norm": 0.17329655587673187, "learning_rate": 3.4719236699045447e-06, "loss": 0.0997, "step": 47499 }, { "epoch": 0.8472157814004923, "grad_norm": 0.2522377371788025, "learning_rate": 3.471132388488968e-06, "loss": 0.0822, "step": 47500 }, { "epoch": 0.8472336175222059, "grad_norm": 0.21335873007774353, "learning_rate": 3.4703411905267158e-06, "loss": 0.1188, "step": 47501 }, { "epoch": 0.8472514536439196, "grad_norm": 0.22173181176185608, "learning_rate": 3.469550076020858e-06, "loss": 0.1183, "step": 47502 }, { "epoch": 0.8472692897656333, "grad_norm": 0.2797526717185974, "learning_rate": 3.468759044974454e-06, "loss": 0.1624, "step": 47503 }, { "epoch": 0.847287125887347, "grad_norm": 0.22542330622673035, "learning_rate": 3.4679680973905837e-06, "loss": 0.0925, "step": 47504 }, { "epoch": 0.8473049620090608, "grad_norm": 0.23701652884483337, "learning_rate": 3.467177233272306e-06, "loss": 0.1184, "step": 47505 }, { "epoch": 0.8473227981307745, "grad_norm": 0.5262187719345093, "learning_rate": 3.4663864526226856e-06, "loss": 0.1524, "step": 47506 }, { "epoch": 0.8473406342524882, "grad_norm": 0.29847222566604614, "learning_rate": 3.4655957554447836e-06, "loss": 0.1051, "step": 47507 }, { "epoch": 0.8473584703742019, "grad_norm": 0.2641250789165497, "learning_rate": 3.4648051417416754e-06, "loss": 0.1126, "step": 47508 }, { "epoch": 0.8473763064959156, "grad_norm": 0.27309516072273254, "learning_rate": 3.464014611516417e-06, "loss": 0.1278, "step": 47509 }, { "epoch": 0.8473941426176292, "grad_norm": 0.24848908185958862, "learning_rate": 3.4632241647720774e-06, "loss": 0.092, "step": 47510 }, { "epoch": 0.8474119787393429, "grad_norm": 0.2899356782436371, "learning_rate": 3.4624338015117246e-06, "loss": 0.0799, "step": 47511 }, { "epoch": 0.8474298148610566, "grad_norm": 0.2715561091899872, "learning_rate": 3.4616435217384084e-06, "loss": 0.1176, "step": 47512 }, { "epoch": 0.8474476509827703, "grad_norm": 0.33382654190063477, "learning_rate": 3.460853325455207e-06, "loss": 0.0816, "step": 47513 }, { "epoch": 0.847465487104484, "grad_norm": 0.3166084587574005, "learning_rate": 3.4600632126651793e-06, "loss": 0.1558, "step": 47514 }, { "epoch": 0.8474833232261977, "grad_norm": 0.2971554100513458, "learning_rate": 3.4592731833713836e-06, "loss": 0.1521, "step": 47515 }, { "epoch": 0.8475011593479114, "grad_norm": 0.20303970575332642, "learning_rate": 3.4584832375768816e-06, "loss": 0.0728, "step": 47516 }, { "epoch": 0.8475189954696251, "grad_norm": 0.2333047240972519, "learning_rate": 3.457693375284743e-06, "loss": 0.0881, "step": 47517 }, { "epoch": 0.8475368315913387, "grad_norm": 0.30360716581344604, "learning_rate": 3.4569035964980264e-06, "loss": 0.1277, "step": 47518 }, { "epoch": 0.8475546677130524, "grad_norm": 0.322036474943161, "learning_rate": 3.45611390121979e-06, "loss": 0.1308, "step": 47519 }, { "epoch": 0.8475725038347661, "grad_norm": 0.31250110268592834, "learning_rate": 3.4553242894530937e-06, "loss": 0.1826, "step": 47520 }, { "epoch": 0.8475903399564798, "grad_norm": 0.2294568568468094, "learning_rate": 3.454534761201006e-06, "loss": 0.1206, "step": 47521 }, { "epoch": 0.8476081760781936, "grad_norm": 0.3135239779949188, "learning_rate": 3.4537453164665806e-06, "loss": 0.1583, "step": 47522 }, { "epoch": 0.8476260121999073, "grad_norm": 0.2727711498737335, "learning_rate": 3.452955955252882e-06, "loss": 0.1329, "step": 47523 }, { "epoch": 0.847643848321621, "grad_norm": 0.2505616545677185, "learning_rate": 3.4521666775629706e-06, "loss": 0.1155, "step": 47524 }, { "epoch": 0.8476616844433347, "grad_norm": 0.21993312239646912, "learning_rate": 3.451377483399895e-06, "loss": 0.0611, "step": 47525 }, { "epoch": 0.8476795205650484, "grad_norm": 0.23521654307842255, "learning_rate": 3.450588372766733e-06, "loss": 0.0856, "step": 47526 }, { "epoch": 0.847697356686762, "grad_norm": 0.2705284655094147, "learning_rate": 3.4497993456665294e-06, "loss": 0.1344, "step": 47527 }, { "epoch": 0.8477151928084757, "grad_norm": 0.26749953627586365, "learning_rate": 3.4490104021023484e-06, "loss": 0.1487, "step": 47528 }, { "epoch": 0.8477330289301894, "grad_norm": 0.2799796760082245, "learning_rate": 3.44822154207724e-06, "loss": 0.1504, "step": 47529 }, { "epoch": 0.8477508650519031, "grad_norm": 0.24230726063251495, "learning_rate": 3.4474327655942774e-06, "loss": 0.0937, "step": 47530 }, { "epoch": 0.8477687011736168, "grad_norm": 0.2881892919540405, "learning_rate": 3.4466440726565077e-06, "loss": 0.1073, "step": 47531 }, { "epoch": 0.8477865372953305, "grad_norm": 0.25630617141723633, "learning_rate": 3.4458554632669893e-06, "loss": 0.092, "step": 47532 }, { "epoch": 0.8478043734170442, "grad_norm": 0.22776642441749573, "learning_rate": 3.4450669374287787e-06, "loss": 0.0936, "step": 47533 }, { "epoch": 0.8478222095387579, "grad_norm": 0.25008949637413025, "learning_rate": 3.4442784951449284e-06, "loss": 0.1394, "step": 47534 }, { "epoch": 0.8478400456604716, "grad_norm": 0.34504738450050354, "learning_rate": 3.4434901364185062e-06, "loss": 0.128, "step": 47535 }, { "epoch": 0.8478578817821852, "grad_norm": 0.2894987463951111, "learning_rate": 3.4427018612525564e-06, "loss": 0.1002, "step": 47536 }, { "epoch": 0.8478757179038989, "grad_norm": 0.29916563630104065, "learning_rate": 3.441913669650143e-06, "loss": 0.0618, "step": 47537 }, { "epoch": 0.8478935540256126, "grad_norm": 0.30666837096214294, "learning_rate": 3.441125561614314e-06, "loss": 0.1237, "step": 47538 }, { "epoch": 0.8479113901473264, "grad_norm": 0.3498028516769409, "learning_rate": 3.440337537148136e-06, "loss": 0.1099, "step": 47539 }, { "epoch": 0.8479292262690401, "grad_norm": 0.3540576100349426, "learning_rate": 3.4395495962546543e-06, "loss": 0.1443, "step": 47540 }, { "epoch": 0.8479470623907538, "grad_norm": 0.2628134787082672, "learning_rate": 3.4387617389369242e-06, "loss": 0.0942, "step": 47541 }, { "epoch": 0.8479648985124675, "grad_norm": 0.5075153708457947, "learning_rate": 3.4379739651979963e-06, "loss": 0.1307, "step": 47542 }, { "epoch": 0.8479827346341812, "grad_norm": 0.24735112488269806, "learning_rate": 3.437186275040935e-06, "loss": 0.1075, "step": 47543 }, { "epoch": 0.8480005707558949, "grad_norm": 0.34918805956840515, "learning_rate": 3.436398668468785e-06, "loss": 0.1336, "step": 47544 }, { "epoch": 0.8480184068776085, "grad_norm": 0.23646296560764313, "learning_rate": 3.4356111454846048e-06, "loss": 0.1388, "step": 47545 }, { "epoch": 0.8480362429993222, "grad_norm": 0.27730274200439453, "learning_rate": 3.434823706091442e-06, "loss": 0.1163, "step": 47546 }, { "epoch": 0.8480540791210359, "grad_norm": 0.25620508193969727, "learning_rate": 3.434036350292344e-06, "loss": 0.0942, "step": 47547 }, { "epoch": 0.8480719152427496, "grad_norm": 0.295282244682312, "learning_rate": 3.4332490780903754e-06, "loss": 0.1269, "step": 47548 }, { "epoch": 0.8480897513644633, "grad_norm": 0.32848411798477173, "learning_rate": 3.432461889488578e-06, "loss": 0.1531, "step": 47549 }, { "epoch": 0.848107587486177, "grad_norm": 0.338253915309906, "learning_rate": 3.4316747844900133e-06, "loss": 0.1175, "step": 47550 }, { "epoch": 0.8481254236078907, "grad_norm": 0.2497081309556961, "learning_rate": 3.4308877630977203e-06, "loss": 0.1326, "step": 47551 }, { "epoch": 0.8481432597296044, "grad_norm": 0.2737867534160614, "learning_rate": 3.4301008253147637e-06, "loss": 0.1174, "step": 47552 }, { "epoch": 0.848161095851318, "grad_norm": 0.2538291811943054, "learning_rate": 3.4293139711441852e-06, "loss": 0.1166, "step": 47553 }, { "epoch": 0.8481789319730317, "grad_norm": 0.2532767951488495, "learning_rate": 3.428527200589035e-06, "loss": 0.1146, "step": 47554 }, { "epoch": 0.8481967680947454, "grad_norm": 0.2863558232784271, "learning_rate": 3.4277405136523582e-06, "loss": 0.0997, "step": 47555 }, { "epoch": 0.8482146042164592, "grad_norm": 0.2636297941207886, "learning_rate": 3.426953910337216e-06, "loss": 0.0772, "step": 47556 }, { "epoch": 0.8482324403381729, "grad_norm": 0.3280991315841675, "learning_rate": 3.4261673906466508e-06, "loss": 0.1403, "step": 47557 }, { "epoch": 0.8482502764598866, "grad_norm": 0.34249600768089294, "learning_rate": 3.425380954583715e-06, "loss": 0.079, "step": 47558 }, { "epoch": 0.8482681125816003, "grad_norm": 0.3236106336116791, "learning_rate": 3.4245946021514515e-06, "loss": 0.1064, "step": 47559 }, { "epoch": 0.848285948703314, "grad_norm": 0.1811663955450058, "learning_rate": 3.423808333352907e-06, "loss": 0.0791, "step": 47560 }, { "epoch": 0.8483037848250277, "grad_norm": 0.23325011134147644, "learning_rate": 3.423022148191138e-06, "loss": 0.0893, "step": 47561 }, { "epoch": 0.8483216209467414, "grad_norm": 0.33347317576408386, "learning_rate": 3.4222360466691888e-06, "loss": 0.153, "step": 47562 }, { "epoch": 0.848339457068455, "grad_norm": 0.2656535804271698, "learning_rate": 3.421450028790099e-06, "loss": 0.1191, "step": 47563 }, { "epoch": 0.8483572931901687, "grad_norm": 0.23828156292438507, "learning_rate": 3.4206640945569217e-06, "loss": 0.0908, "step": 47564 }, { "epoch": 0.8483751293118824, "grad_norm": 0.23775924742221832, "learning_rate": 3.4198782439727102e-06, "loss": 0.1254, "step": 47565 }, { "epoch": 0.8483929654335961, "grad_norm": 0.24574865400791168, "learning_rate": 3.419092477040503e-06, "loss": 0.1228, "step": 47566 }, { "epoch": 0.8484108015553098, "grad_norm": 0.20682393014431, "learning_rate": 3.4183067937633486e-06, "loss": 0.1044, "step": 47567 }, { "epoch": 0.8484286376770235, "grad_norm": 0.2883542478084564, "learning_rate": 3.417521194144285e-06, "loss": 0.1119, "step": 47568 }, { "epoch": 0.8484464737987372, "grad_norm": 0.2940611243247986, "learning_rate": 3.4167356781863696e-06, "loss": 0.0963, "step": 47569 }, { "epoch": 0.8484643099204509, "grad_norm": 0.25916025042533875, "learning_rate": 3.415950245892641e-06, "loss": 0.0745, "step": 47570 }, { "epoch": 0.8484821460421645, "grad_norm": 0.23165477812290192, "learning_rate": 3.4151648972661432e-06, "loss": 0.0807, "step": 47571 }, { "epoch": 0.8484999821638783, "grad_norm": 0.42337608337402344, "learning_rate": 3.414379632309922e-06, "loss": 0.1239, "step": 47572 }, { "epoch": 0.848517818285592, "grad_norm": 0.27986547350883484, "learning_rate": 3.4135944510270133e-06, "loss": 0.101, "step": 47573 }, { "epoch": 0.8485356544073057, "grad_norm": 0.5604416728019714, "learning_rate": 3.412809353420476e-06, "loss": 0.1333, "step": 47574 }, { "epoch": 0.8485534905290194, "grad_norm": 0.34631314873695374, "learning_rate": 3.412024339493347e-06, "loss": 0.1424, "step": 47575 }, { "epoch": 0.8485713266507331, "grad_norm": 0.2847054600715637, "learning_rate": 3.4112394092486594e-06, "loss": 0.1335, "step": 47576 }, { "epoch": 0.8485891627724468, "grad_norm": 0.23745013773441315, "learning_rate": 3.410454562689472e-06, "loss": 0.093, "step": 47577 }, { "epoch": 0.8486069988941605, "grad_norm": 0.4014798104763031, "learning_rate": 3.4096697998188127e-06, "loss": 0.1327, "step": 47578 }, { "epoch": 0.8486248350158742, "grad_norm": 0.2616578936576843, "learning_rate": 3.4088851206397347e-06, "loss": 0.084, "step": 47579 }, { "epoch": 0.8486426711375878, "grad_norm": 0.2699027359485626, "learning_rate": 3.408100525155278e-06, "loss": 0.0988, "step": 47580 }, { "epoch": 0.8486605072593015, "grad_norm": 0.2636522054672241, "learning_rate": 3.407316013368475e-06, "loss": 0.0472, "step": 47581 }, { "epoch": 0.8486783433810152, "grad_norm": 0.3269490897655487, "learning_rate": 3.406531585282377e-06, "loss": 0.089, "step": 47582 }, { "epoch": 0.8486961795027289, "grad_norm": 0.32814884185791016, "learning_rate": 3.405747240900023e-06, "loss": 0.112, "step": 47583 }, { "epoch": 0.8487140156244426, "grad_norm": 0.24202416837215424, "learning_rate": 3.4049629802244493e-06, "loss": 0.1023, "step": 47584 }, { "epoch": 0.8487318517461563, "grad_norm": 0.2033878117799759, "learning_rate": 3.4041788032587006e-06, "loss": 0.064, "step": 47585 }, { "epoch": 0.84874968786787, "grad_norm": 0.29180243611335754, "learning_rate": 3.4033947100058055e-06, "loss": 0.1218, "step": 47586 }, { "epoch": 0.8487675239895837, "grad_norm": 0.2479466199874878, "learning_rate": 3.4026107004688167e-06, "loss": 0.0972, "step": 47587 }, { "epoch": 0.8487853601112973, "grad_norm": 0.2755719721317291, "learning_rate": 3.401826774650771e-06, "loss": 0.1174, "step": 47588 }, { "epoch": 0.8488031962330111, "grad_norm": 0.27944543957710266, "learning_rate": 3.4010429325547016e-06, "loss": 0.1332, "step": 47589 }, { "epoch": 0.8488210323547248, "grad_norm": 0.32449841499328613, "learning_rate": 3.4002591741836454e-06, "loss": 0.0959, "step": 47590 }, { "epoch": 0.8488388684764385, "grad_norm": 0.2929539084434509, "learning_rate": 3.399475499540647e-06, "loss": 0.1349, "step": 47591 }, { "epoch": 0.8488567045981522, "grad_norm": 0.19197528064250946, "learning_rate": 3.3986919086287454e-06, "loss": 0.0771, "step": 47592 }, { "epoch": 0.8488745407198659, "grad_norm": 0.4546467661857605, "learning_rate": 3.3979084014509747e-06, "loss": 0.1159, "step": 47593 }, { "epoch": 0.8488923768415796, "grad_norm": 0.37847015261650085, "learning_rate": 3.3971249780103688e-06, "loss": 0.0879, "step": 47594 }, { "epoch": 0.8489102129632933, "grad_norm": 0.28531503677368164, "learning_rate": 3.3963416383099718e-06, "loss": 0.1276, "step": 47595 }, { "epoch": 0.848928049085007, "grad_norm": 0.35991430282592773, "learning_rate": 3.395558382352815e-06, "loss": 0.0973, "step": 47596 }, { "epoch": 0.8489458852067207, "grad_norm": 0.278708279132843, "learning_rate": 3.394775210141937e-06, "loss": 0.093, "step": 47597 }, { "epoch": 0.8489637213284343, "grad_norm": 0.30353036522865295, "learning_rate": 3.3939921216803726e-06, "loss": 0.181, "step": 47598 }, { "epoch": 0.848981557450148, "grad_norm": 0.26497504115104675, "learning_rate": 3.393209116971152e-06, "loss": 0.0823, "step": 47599 }, { "epoch": 0.8489993935718617, "grad_norm": 0.32025888562202454, "learning_rate": 3.392426196017323e-06, "loss": 0.1543, "step": 47600 }, { "epoch": 0.8490172296935754, "grad_norm": 0.26852986216545105, "learning_rate": 3.391643358821911e-06, "loss": 0.1028, "step": 47601 }, { "epoch": 0.8490350658152891, "grad_norm": 0.21738412976264954, "learning_rate": 3.3908606053879522e-06, "loss": 0.0829, "step": 47602 }, { "epoch": 0.8490529019370028, "grad_norm": 0.2731854021549225, "learning_rate": 3.3900779357184746e-06, "loss": 0.0593, "step": 47603 }, { "epoch": 0.8490707380587165, "grad_norm": 0.29218968749046326, "learning_rate": 3.3892953498165265e-06, "loss": 0.1307, "step": 47604 }, { "epoch": 0.8490885741804302, "grad_norm": 0.2593521475791931, "learning_rate": 3.3885128476851268e-06, "loss": 0.0864, "step": 47605 }, { "epoch": 0.849106410302144, "grad_norm": 0.4156484603881836, "learning_rate": 3.387730429327324e-06, "loss": 0.0817, "step": 47606 }, { "epoch": 0.8491242464238576, "grad_norm": 0.2704949975013733, "learning_rate": 3.386948094746134e-06, "loss": 0.1571, "step": 47607 }, { "epoch": 0.8491420825455713, "grad_norm": 0.27881699800491333, "learning_rate": 3.3861658439446054e-06, "loss": 0.1339, "step": 47608 }, { "epoch": 0.849159918667285, "grad_norm": 0.280504435300827, "learning_rate": 3.385383676925763e-06, "loss": 0.0979, "step": 47609 }, { "epoch": 0.8491777547889987, "grad_norm": 0.2182489037513733, "learning_rate": 3.384601593692638e-06, "loss": 0.1075, "step": 47610 }, { "epoch": 0.8491955909107124, "grad_norm": 0.2650125026702881, "learning_rate": 3.3838195942482635e-06, "loss": 0.1101, "step": 47611 }, { "epoch": 0.8492134270324261, "grad_norm": 0.21502986550331116, "learning_rate": 3.383037678595663e-06, "loss": 0.1209, "step": 47612 }, { "epoch": 0.8492312631541398, "grad_norm": 0.23931175470352173, "learning_rate": 3.3822558467378834e-06, "loss": 0.1281, "step": 47613 }, { "epoch": 0.8492490992758535, "grad_norm": 0.27963826060295105, "learning_rate": 3.381474098677945e-06, "loss": 0.1187, "step": 47614 }, { "epoch": 0.8492669353975671, "grad_norm": 0.32648712396621704, "learning_rate": 3.3806924344188806e-06, "loss": 0.0978, "step": 47615 }, { "epoch": 0.8492847715192808, "grad_norm": 0.3141188323497772, "learning_rate": 3.379910853963711e-06, "loss": 0.1415, "step": 47616 }, { "epoch": 0.8493026076409945, "grad_norm": 0.23769818246364594, "learning_rate": 3.379129357315483e-06, "loss": 0.0781, "step": 47617 }, { "epoch": 0.8493204437627082, "grad_norm": 0.2554595172405243, "learning_rate": 3.3783479444772113e-06, "loss": 0.1255, "step": 47618 }, { "epoch": 0.8493382798844219, "grad_norm": 0.2738015651702881, "learning_rate": 3.3775666154519355e-06, "loss": 0.111, "step": 47619 }, { "epoch": 0.8493561160061356, "grad_norm": 0.5938237905502319, "learning_rate": 3.376785370242674e-06, "loss": 0.1128, "step": 47620 }, { "epoch": 0.8493739521278493, "grad_norm": 0.3143916130065918, "learning_rate": 3.3760042088524654e-06, "loss": 0.1276, "step": 47621 }, { "epoch": 0.849391788249563, "grad_norm": 0.2929554283618927, "learning_rate": 3.375223131284336e-06, "loss": 0.1721, "step": 47622 }, { "epoch": 0.8494096243712768, "grad_norm": 0.30695393681526184, "learning_rate": 3.374442137541309e-06, "loss": 0.0733, "step": 47623 }, { "epoch": 0.8494274604929904, "grad_norm": 0.29187247157096863, "learning_rate": 3.3736612276264123e-06, "loss": 0.1015, "step": 47624 }, { "epoch": 0.8494452966147041, "grad_norm": 0.24599182605743408, "learning_rate": 3.372880401542672e-06, "loss": 0.0805, "step": 47625 }, { "epoch": 0.8494631327364178, "grad_norm": 0.2022215723991394, "learning_rate": 3.3720996592931183e-06, "loss": 0.1316, "step": 47626 }, { "epoch": 0.8494809688581315, "grad_norm": 0.28157946467399597, "learning_rate": 3.3713190008807793e-06, "loss": 0.1176, "step": 47627 }, { "epoch": 0.8494988049798452, "grad_norm": 0.24914538860321045, "learning_rate": 3.3705384263086783e-06, "loss": 0.1245, "step": 47628 }, { "epoch": 0.8495166411015589, "grad_norm": 0.2940198481082916, "learning_rate": 3.369757935579834e-06, "loss": 0.1294, "step": 47629 }, { "epoch": 0.8495344772232726, "grad_norm": 0.27949362993240356, "learning_rate": 3.368977528697284e-06, "loss": 0.1031, "step": 47630 }, { "epoch": 0.8495523133449863, "grad_norm": 0.3019295930862427, "learning_rate": 3.3681972056640502e-06, "loss": 0.1311, "step": 47631 }, { "epoch": 0.8495701494667, "grad_norm": 0.22180651128292084, "learning_rate": 3.36741696648315e-06, "loss": 0.1203, "step": 47632 }, { "epoch": 0.8495879855884136, "grad_norm": 0.2812873423099518, "learning_rate": 3.366636811157617e-06, "loss": 0.133, "step": 47633 }, { "epoch": 0.8496058217101273, "grad_norm": 0.2788574993610382, "learning_rate": 3.365856739690465e-06, "loss": 0.0779, "step": 47634 }, { "epoch": 0.849623657831841, "grad_norm": 0.2598767876625061, "learning_rate": 3.365076752084734e-06, "loss": 0.1323, "step": 47635 }, { "epoch": 0.8496414939535547, "grad_norm": 0.2924971580505371, "learning_rate": 3.3642968483434343e-06, "loss": 0.1306, "step": 47636 }, { "epoch": 0.8496593300752684, "grad_norm": 0.24843446910381317, "learning_rate": 3.3635170284695955e-06, "loss": 0.1004, "step": 47637 }, { "epoch": 0.8496771661969821, "grad_norm": 0.21223753690719604, "learning_rate": 3.362737292466231e-06, "loss": 0.1246, "step": 47638 }, { "epoch": 0.8496950023186958, "grad_norm": 0.23976342380046844, "learning_rate": 3.3619576403363744e-06, "loss": 0.1319, "step": 47639 }, { "epoch": 0.8497128384404096, "grad_norm": 0.28250566124916077, "learning_rate": 3.3611780720830433e-06, "loss": 0.1247, "step": 47640 }, { "epoch": 0.8497306745621233, "grad_norm": 0.28882476687431335, "learning_rate": 3.3603985877092627e-06, "loss": 0.1597, "step": 47641 }, { "epoch": 0.8497485106838369, "grad_norm": 0.23271915316581726, "learning_rate": 3.359619187218044e-06, "loss": 0.1114, "step": 47642 }, { "epoch": 0.8497663468055506, "grad_norm": 0.23010171949863434, "learning_rate": 3.358839870612421e-06, "loss": 0.077, "step": 47643 }, { "epoch": 0.8497841829272643, "grad_norm": 0.3984963297843933, "learning_rate": 3.3580606378954078e-06, "loss": 0.1015, "step": 47644 }, { "epoch": 0.849802019048978, "grad_norm": 0.24417617917060852, "learning_rate": 3.3572814890700243e-06, "loss": 0.0876, "step": 47645 }, { "epoch": 0.8498198551706917, "grad_norm": 0.2776118218898773, "learning_rate": 3.356502424139296e-06, "loss": 0.0763, "step": 47646 }, { "epoch": 0.8498376912924054, "grad_norm": 0.24080964922904968, "learning_rate": 3.3557234431062344e-06, "loss": 0.0836, "step": 47647 }, { "epoch": 0.8498555274141191, "grad_norm": 0.32639598846435547, "learning_rate": 3.3549445459738706e-06, "loss": 0.1107, "step": 47648 }, { "epoch": 0.8498733635358328, "grad_norm": 0.24550041556358337, "learning_rate": 3.354165732745218e-06, "loss": 0.0753, "step": 47649 }, { "epoch": 0.8498911996575464, "grad_norm": 0.20576785504817963, "learning_rate": 3.3533870034232945e-06, "loss": 0.0805, "step": 47650 }, { "epoch": 0.8499090357792601, "grad_norm": 0.1855613738298416, "learning_rate": 3.352608358011114e-06, "loss": 0.0864, "step": 47651 }, { "epoch": 0.8499268719009738, "grad_norm": 0.27259737253189087, "learning_rate": 3.3518297965117073e-06, "loss": 0.0771, "step": 47652 }, { "epoch": 0.8499447080226875, "grad_norm": 0.32468804717063904, "learning_rate": 3.3510513189280836e-06, "loss": 0.1401, "step": 47653 }, { "epoch": 0.8499625441444012, "grad_norm": 0.29628005623817444, "learning_rate": 3.350272925263265e-06, "loss": 0.1058, "step": 47654 }, { "epoch": 0.8499803802661149, "grad_norm": 0.23224864900112152, "learning_rate": 3.3494946155202573e-06, "loss": 0.0842, "step": 47655 }, { "epoch": 0.8499982163878286, "grad_norm": 0.26734307408332825, "learning_rate": 3.348716389702092e-06, "loss": 0.0539, "step": 47656 }, { "epoch": 0.8500160525095424, "grad_norm": 0.2762904167175293, "learning_rate": 3.3479382478117825e-06, "loss": 0.0931, "step": 47657 }, { "epoch": 0.8500338886312561, "grad_norm": 0.32132840156555176, "learning_rate": 3.3471601898523415e-06, "loss": 0.1487, "step": 47658 }, { "epoch": 0.8500517247529698, "grad_norm": 0.26144763827323914, "learning_rate": 3.3463822158267794e-06, "loss": 0.1556, "step": 47659 }, { "epoch": 0.8500695608746834, "grad_norm": 0.29807576537132263, "learning_rate": 3.345604325738125e-06, "loss": 0.1217, "step": 47660 }, { "epoch": 0.8500873969963971, "grad_norm": 0.21024297177791595, "learning_rate": 3.3448265195893836e-06, "loss": 0.0921, "step": 47661 }, { "epoch": 0.8501052331181108, "grad_norm": 0.2591642141342163, "learning_rate": 3.3440487973835784e-06, "loss": 0.1186, "step": 47662 }, { "epoch": 0.8501230692398245, "grad_norm": 0.26573431491851807, "learning_rate": 3.3432711591237175e-06, "loss": 0.1097, "step": 47663 }, { "epoch": 0.8501409053615382, "grad_norm": 0.27268293499946594, "learning_rate": 3.3424936048128156e-06, "loss": 0.1142, "step": 47664 }, { "epoch": 0.8501587414832519, "grad_norm": 0.3256499767303467, "learning_rate": 3.3417161344538928e-06, "loss": 0.1587, "step": 47665 }, { "epoch": 0.8501765776049656, "grad_norm": 0.29039502143859863, "learning_rate": 3.3409387480499597e-06, "loss": 0.0996, "step": 47666 }, { "epoch": 0.8501944137266793, "grad_norm": 0.29900890588760376, "learning_rate": 3.3401614456040285e-06, "loss": 0.132, "step": 47667 }, { "epoch": 0.8502122498483929, "grad_norm": 0.20569966733455658, "learning_rate": 3.339384227119105e-06, "loss": 0.0865, "step": 47668 }, { "epoch": 0.8502300859701066, "grad_norm": 0.42728641629219055, "learning_rate": 3.338607092598217e-06, "loss": 0.0957, "step": 47669 }, { "epoch": 0.8502479220918203, "grad_norm": 0.40185558795928955, "learning_rate": 3.337830042044368e-06, "loss": 0.1192, "step": 47670 }, { "epoch": 0.850265758213534, "grad_norm": 0.2814464569091797, "learning_rate": 3.337053075460575e-06, "loss": 0.0627, "step": 47671 }, { "epoch": 0.8502835943352477, "grad_norm": 0.2782944440841675, "learning_rate": 3.336276192849838e-06, "loss": 0.1495, "step": 47672 }, { "epoch": 0.8503014304569615, "grad_norm": 0.3199179768562317, "learning_rate": 3.335499394215183e-06, "loss": 0.096, "step": 47673 }, { "epoch": 0.8503192665786752, "grad_norm": 0.24381811916828156, "learning_rate": 3.33472267955961e-06, "loss": 0.1353, "step": 47674 }, { "epoch": 0.8503371027003889, "grad_norm": 0.26505932211875916, "learning_rate": 3.3339460488861385e-06, "loss": 0.1086, "step": 47675 }, { "epoch": 0.8503549388221026, "grad_norm": 0.23636607825756073, "learning_rate": 3.3331695021977778e-06, "loss": 0.0722, "step": 47676 }, { "epoch": 0.8503727749438162, "grad_norm": 0.2680937647819519, "learning_rate": 3.3323930394975305e-06, "loss": 0.1267, "step": 47677 }, { "epoch": 0.8503906110655299, "grad_norm": 0.20951932668685913, "learning_rate": 3.3316166607884143e-06, "loss": 0.0525, "step": 47678 }, { "epoch": 0.8504084471872436, "grad_norm": 0.24381721019744873, "learning_rate": 3.3308403660734374e-06, "loss": 0.0768, "step": 47679 }, { "epoch": 0.8504262833089573, "grad_norm": 0.27948907017707825, "learning_rate": 3.3300641553556083e-06, "loss": 0.0675, "step": 47680 }, { "epoch": 0.850444119430671, "grad_norm": 0.2873189449310303, "learning_rate": 3.329288028637928e-06, "loss": 0.0843, "step": 47681 }, { "epoch": 0.8504619555523847, "grad_norm": 0.25985538959503174, "learning_rate": 3.3285119859234185e-06, "loss": 0.1054, "step": 47682 }, { "epoch": 0.8504797916740984, "grad_norm": 0.29643651843070984, "learning_rate": 3.327736027215081e-06, "loss": 0.1169, "step": 47683 }, { "epoch": 0.8504976277958121, "grad_norm": 0.3242148756980896, "learning_rate": 3.326960152515926e-06, "loss": 0.1364, "step": 47684 }, { "epoch": 0.8505154639175257, "grad_norm": 0.29610541462898254, "learning_rate": 3.3261843618289517e-06, "loss": 0.1275, "step": 47685 }, { "epoch": 0.8505333000392394, "grad_norm": 0.29560166597366333, "learning_rate": 3.3254086551571777e-06, "loss": 0.0669, "step": 47686 }, { "epoch": 0.8505511361609531, "grad_norm": 0.25385379791259766, "learning_rate": 3.3246330325036075e-06, "loss": 0.0983, "step": 47687 }, { "epoch": 0.8505689722826668, "grad_norm": 0.2635827958583832, "learning_rate": 3.323857493871238e-06, "loss": 0.1566, "step": 47688 }, { "epoch": 0.8505868084043805, "grad_norm": 0.23801691830158234, "learning_rate": 3.3230820392630924e-06, "loss": 0.1026, "step": 47689 }, { "epoch": 0.8506046445260943, "grad_norm": 0.2759699821472168, "learning_rate": 3.3223066686821597e-06, "loss": 0.1222, "step": 47690 }, { "epoch": 0.850622480647808, "grad_norm": 0.2693772614002228, "learning_rate": 3.3215313821314625e-06, "loss": 0.1118, "step": 47691 }, { "epoch": 0.8506403167695217, "grad_norm": 0.22049814462661743, "learning_rate": 3.320756179613993e-06, "loss": 0.056, "step": 47692 }, { "epoch": 0.8506581528912354, "grad_norm": 0.21508820354938507, "learning_rate": 3.3199810611327625e-06, "loss": 0.091, "step": 47693 }, { "epoch": 0.850675989012949, "grad_norm": 0.2161596268415451, "learning_rate": 3.319206026690769e-06, "loss": 0.0974, "step": 47694 }, { "epoch": 0.8506938251346627, "grad_norm": 0.30514243245124817, "learning_rate": 3.318431076291023e-06, "loss": 0.0887, "step": 47695 }, { "epoch": 0.8507116612563764, "grad_norm": 0.26499712467193604, "learning_rate": 3.3176562099365316e-06, "loss": 0.123, "step": 47696 }, { "epoch": 0.8507294973780901, "grad_norm": 0.3640264868736267, "learning_rate": 3.316881427630289e-06, "loss": 0.1122, "step": 47697 }, { "epoch": 0.8507473334998038, "grad_norm": 0.3829297721385956, "learning_rate": 3.3161067293753013e-06, "loss": 0.195, "step": 47698 }, { "epoch": 0.8507651696215175, "grad_norm": 0.40436848998069763, "learning_rate": 3.315332115174577e-06, "loss": 0.1181, "step": 47699 }, { "epoch": 0.8507830057432312, "grad_norm": 0.3490535616874695, "learning_rate": 3.314557585031114e-06, "loss": 0.1378, "step": 47700 }, { "epoch": 0.8508008418649449, "grad_norm": 0.3003726005554199, "learning_rate": 3.3137831389479124e-06, "loss": 0.0864, "step": 47701 }, { "epoch": 0.8508186779866586, "grad_norm": 0.2347470074892044, "learning_rate": 3.313008776927981e-06, "loss": 0.1122, "step": 47702 }, { "epoch": 0.8508365141083722, "grad_norm": 0.24559372663497925, "learning_rate": 3.3122344989743147e-06, "loss": 0.0723, "step": 47703 }, { "epoch": 0.8508543502300859, "grad_norm": 0.24125739932060242, "learning_rate": 3.311460305089922e-06, "loss": 0.0973, "step": 47704 }, { "epoch": 0.8508721863517996, "grad_norm": 0.33484894037246704, "learning_rate": 3.3106861952778013e-06, "loss": 0.1782, "step": 47705 }, { "epoch": 0.8508900224735133, "grad_norm": 0.2980024516582489, "learning_rate": 3.309912169540952e-06, "loss": 0.1576, "step": 47706 }, { "epoch": 0.8509078585952271, "grad_norm": 0.28373217582702637, "learning_rate": 3.309138227882369e-06, "loss": 0.1352, "step": 47707 }, { "epoch": 0.8509256947169408, "grad_norm": 0.24331073462963104, "learning_rate": 3.3083643703050615e-06, "loss": 0.0757, "step": 47708 }, { "epoch": 0.8509435308386545, "grad_norm": 0.2487875521183014, "learning_rate": 3.30759059681203e-06, "loss": 0.1837, "step": 47709 }, { "epoch": 0.8509613669603682, "grad_norm": 0.3310057818889618, "learning_rate": 3.3068169074062657e-06, "loss": 0.1377, "step": 47710 }, { "epoch": 0.8509792030820819, "grad_norm": 0.32067668437957764, "learning_rate": 3.3060433020907667e-06, "loss": 0.1251, "step": 47711 }, { "epoch": 0.8509970392037955, "grad_norm": 0.21814998984336853, "learning_rate": 3.3052697808685446e-06, "loss": 0.1237, "step": 47712 }, { "epoch": 0.8510148753255092, "grad_norm": 0.3243829309940338, "learning_rate": 3.3044963437425885e-06, "loss": 0.1505, "step": 47713 }, { "epoch": 0.8510327114472229, "grad_norm": 0.334989994764328, "learning_rate": 3.303722990715896e-06, "loss": 0.1217, "step": 47714 }, { "epoch": 0.8510505475689366, "grad_norm": 0.37650513648986816, "learning_rate": 3.302949721791465e-06, "loss": 0.1212, "step": 47715 }, { "epoch": 0.8510683836906503, "grad_norm": 0.27502763271331787, "learning_rate": 3.3021765369722985e-06, "loss": 0.1398, "step": 47716 }, { "epoch": 0.851086219812364, "grad_norm": 0.2925994098186493, "learning_rate": 3.3014034362613825e-06, "loss": 0.1263, "step": 47717 }, { "epoch": 0.8511040559340777, "grad_norm": 0.2871912121772766, "learning_rate": 3.3006304196617294e-06, "loss": 0.11, "step": 47718 }, { "epoch": 0.8511218920557914, "grad_norm": 0.3749898672103882, "learning_rate": 3.2998574871763277e-06, "loss": 0.1471, "step": 47719 }, { "epoch": 0.851139728177505, "grad_norm": 0.23590585589408875, "learning_rate": 3.299084638808167e-06, "loss": 0.0963, "step": 47720 }, { "epoch": 0.8511575642992187, "grad_norm": 0.24640442430973053, "learning_rate": 3.298311874560256e-06, "loss": 0.1058, "step": 47721 }, { "epoch": 0.8511754004209324, "grad_norm": 0.25836181640625, "learning_rate": 3.297539194435581e-06, "loss": 0.0808, "step": 47722 }, { "epoch": 0.8511932365426461, "grad_norm": 0.2222335785627365, "learning_rate": 3.296766598437143e-06, "loss": 0.0854, "step": 47723 }, { "epoch": 0.8512110726643599, "grad_norm": 0.3288305401802063, "learning_rate": 3.2959940865679273e-06, "loss": 0.1156, "step": 47724 }, { "epoch": 0.8512289087860736, "grad_norm": 0.3277072608470917, "learning_rate": 3.2952216588309383e-06, "loss": 0.1571, "step": 47725 }, { "epoch": 0.8512467449077873, "grad_norm": 0.26358556747436523, "learning_rate": 3.29444931522917e-06, "loss": 0.0842, "step": 47726 }, { "epoch": 0.851264581029501, "grad_norm": 0.21426579356193542, "learning_rate": 3.2936770557656096e-06, "loss": 0.0892, "step": 47727 }, { "epoch": 0.8512824171512147, "grad_norm": 0.1993224024772644, "learning_rate": 3.2929048804432513e-06, "loss": 0.0823, "step": 47728 }, { "epoch": 0.8513002532729284, "grad_norm": 0.31235823035240173, "learning_rate": 3.2921327892650955e-06, "loss": 0.0738, "step": 47729 }, { "epoch": 0.851318089394642, "grad_norm": 0.2811439037322998, "learning_rate": 3.2913607822341235e-06, "loss": 0.1235, "step": 47730 }, { "epoch": 0.8513359255163557, "grad_norm": 0.3290042579174042, "learning_rate": 3.290588859353344e-06, "loss": 0.1163, "step": 47731 }, { "epoch": 0.8513537616380694, "grad_norm": 0.3252313733100891, "learning_rate": 3.2898170206257377e-06, "loss": 0.1069, "step": 47732 }, { "epoch": 0.8513715977597831, "grad_norm": 0.27864041924476624, "learning_rate": 3.2890452660542942e-06, "loss": 0.1041, "step": 47733 }, { "epoch": 0.8513894338814968, "grad_norm": 0.29188069701194763, "learning_rate": 3.288273595642016e-06, "loss": 0.1586, "step": 47734 }, { "epoch": 0.8514072700032105, "grad_norm": 0.30595695972442627, "learning_rate": 3.287502009391888e-06, "loss": 0.0856, "step": 47735 }, { "epoch": 0.8514251061249242, "grad_norm": 0.3293379545211792, "learning_rate": 3.286730507306901e-06, "loss": 0.1251, "step": 47736 }, { "epoch": 0.8514429422466379, "grad_norm": 0.40624716877937317, "learning_rate": 3.2859590893900423e-06, "loss": 0.1425, "step": 47737 }, { "epoch": 0.8514607783683515, "grad_norm": 0.3774282932281494, "learning_rate": 3.285187755644309e-06, "loss": 0.101, "step": 47738 }, { "epoch": 0.8514786144900652, "grad_norm": 0.2766306698322296, "learning_rate": 3.2844165060726905e-06, "loss": 0.1138, "step": 47739 }, { "epoch": 0.8514964506117789, "grad_norm": 0.3679528832435608, "learning_rate": 3.283645340678171e-06, "loss": 0.1442, "step": 47740 }, { "epoch": 0.8515142867334927, "grad_norm": 0.3985013961791992, "learning_rate": 3.2828742594637445e-06, "loss": 0.135, "step": 47741 }, { "epoch": 0.8515321228552064, "grad_norm": 0.28673189878463745, "learning_rate": 3.2821032624323926e-06, "loss": 0.0919, "step": 47742 }, { "epoch": 0.8515499589769201, "grad_norm": 0.2633587121963501, "learning_rate": 3.2813323495871156e-06, "loss": 0.1185, "step": 47743 }, { "epoch": 0.8515677950986338, "grad_norm": 0.28822004795074463, "learning_rate": 3.280561520930889e-06, "loss": 0.0884, "step": 47744 }, { "epoch": 0.8515856312203475, "grad_norm": 0.29646366834640503, "learning_rate": 3.279790776466715e-06, "loss": 0.1309, "step": 47745 }, { "epoch": 0.8516034673420612, "grad_norm": 0.3249569535255432, "learning_rate": 3.2790201161975675e-06, "loss": 0.0893, "step": 47746 }, { "epoch": 0.8516213034637748, "grad_norm": 0.32418540120124817, "learning_rate": 3.2782495401264463e-06, "loss": 0.1752, "step": 47747 }, { "epoch": 0.8516391395854885, "grad_norm": 0.33033403754234314, "learning_rate": 3.2774790482563293e-06, "loss": 0.1643, "step": 47748 }, { "epoch": 0.8516569757072022, "grad_norm": 0.24495819211006165, "learning_rate": 3.276708640590209e-06, "loss": 0.0898, "step": 47749 }, { "epoch": 0.8516748118289159, "grad_norm": 0.3480449318885803, "learning_rate": 3.2759383171310638e-06, "loss": 0.1162, "step": 47750 }, { "epoch": 0.8516926479506296, "grad_norm": 0.20499290525913239, "learning_rate": 3.2751680778818876e-06, "loss": 0.0775, "step": 47751 }, { "epoch": 0.8517104840723433, "grad_norm": 0.3261464238166809, "learning_rate": 3.2743979228456678e-06, "loss": 0.1723, "step": 47752 }, { "epoch": 0.851728320194057, "grad_norm": 0.27224817872047424, "learning_rate": 3.2736278520253823e-06, "loss": 0.0904, "step": 47753 }, { "epoch": 0.8517461563157707, "grad_norm": 0.31743139028549194, "learning_rate": 3.27285786542402e-06, "loss": 0.1329, "step": 47754 }, { "epoch": 0.8517639924374844, "grad_norm": 0.2498500943183899, "learning_rate": 3.2720879630445595e-06, "loss": 0.1189, "step": 47755 }, { "epoch": 0.851781828559198, "grad_norm": 0.3125590980052948, "learning_rate": 3.2713181448899954e-06, "loss": 0.1475, "step": 47756 }, { "epoch": 0.8517996646809117, "grad_norm": 0.2251376509666443, "learning_rate": 3.2705484109633034e-06, "loss": 0.0971, "step": 47757 }, { "epoch": 0.8518175008026255, "grad_norm": 0.2038601189851761, "learning_rate": 3.269778761267475e-06, "loss": 0.1233, "step": 47758 }, { "epoch": 0.8518353369243392, "grad_norm": 0.25741422176361084, "learning_rate": 3.2690091958054834e-06, "loss": 0.1228, "step": 47759 }, { "epoch": 0.8518531730460529, "grad_norm": 0.19874070584774017, "learning_rate": 3.268239714580323e-06, "loss": 0.076, "step": 47760 }, { "epoch": 0.8518710091677666, "grad_norm": 0.2819465398788452, "learning_rate": 3.2674703175949727e-06, "loss": 0.1078, "step": 47761 }, { "epoch": 0.8518888452894803, "grad_norm": 0.25806084275245667, "learning_rate": 3.2667010048524157e-06, "loss": 0.1111, "step": 47762 }, { "epoch": 0.851906681411194, "grad_norm": 0.28122764825820923, "learning_rate": 3.2659317763556242e-06, "loss": 0.126, "step": 47763 }, { "epoch": 0.8519245175329077, "grad_norm": 0.26505550742149353, "learning_rate": 3.2651626321075906e-06, "loss": 0.1356, "step": 47764 }, { "epoch": 0.8519423536546213, "grad_norm": 0.3219742178916931, "learning_rate": 3.264393572111299e-06, "loss": 0.125, "step": 47765 }, { "epoch": 0.851960189776335, "grad_norm": 0.3135806620121002, "learning_rate": 3.2636245963697217e-06, "loss": 0.1464, "step": 47766 }, { "epoch": 0.8519780258980487, "grad_norm": 0.3126005530357361, "learning_rate": 3.2628557048858423e-06, "loss": 0.1204, "step": 47767 }, { "epoch": 0.8519958620197624, "grad_norm": 0.4257054626941681, "learning_rate": 3.262086897662639e-06, "loss": 0.1017, "step": 47768 }, { "epoch": 0.8520136981414761, "grad_norm": 0.4266127645969391, "learning_rate": 3.2613181747030984e-06, "loss": 0.1627, "step": 47769 }, { "epoch": 0.8520315342631898, "grad_norm": 0.29566749930381775, "learning_rate": 3.2605495360101988e-06, "loss": 0.091, "step": 47770 }, { "epoch": 0.8520493703849035, "grad_norm": 0.26643410325050354, "learning_rate": 3.25978098158691e-06, "loss": 0.1108, "step": 47771 }, { "epoch": 0.8520672065066172, "grad_norm": 0.3051968216896057, "learning_rate": 3.2590125114362213e-06, "loss": 0.1001, "step": 47772 }, { "epoch": 0.8520850426283308, "grad_norm": 0.2870226204395294, "learning_rate": 3.2582441255611134e-06, "loss": 0.099, "step": 47773 }, { "epoch": 0.8521028787500446, "grad_norm": 0.191124826669693, "learning_rate": 3.257475823964562e-06, "loss": 0.09, "step": 47774 }, { "epoch": 0.8521207148717583, "grad_norm": 0.290499210357666, "learning_rate": 3.256707606649542e-06, "loss": 0.1516, "step": 47775 }, { "epoch": 0.852138550993472, "grad_norm": 0.2909443974494934, "learning_rate": 3.2559394736190294e-06, "loss": 0.1089, "step": 47776 }, { "epoch": 0.8521563871151857, "grad_norm": 0.262471079826355, "learning_rate": 3.2551714248760077e-06, "loss": 0.0987, "step": 47777 }, { "epoch": 0.8521742232368994, "grad_norm": 0.5307853817939758, "learning_rate": 3.254403460423455e-06, "loss": 0.0972, "step": 47778 }, { "epoch": 0.8521920593586131, "grad_norm": 0.43064555525779724, "learning_rate": 3.253635580264344e-06, "loss": 0.1416, "step": 47779 }, { "epoch": 0.8522098954803268, "grad_norm": 0.2886241376399994, "learning_rate": 3.2528677844016557e-06, "loss": 0.1132, "step": 47780 }, { "epoch": 0.8522277316020405, "grad_norm": 0.30424654483795166, "learning_rate": 3.2521000728383543e-06, "loss": 0.1101, "step": 47781 }, { "epoch": 0.8522455677237541, "grad_norm": 0.24711903929710388, "learning_rate": 3.251332445577429e-06, "loss": 0.16, "step": 47782 }, { "epoch": 0.8522634038454678, "grad_norm": 0.25032106041908264, "learning_rate": 3.250564902621853e-06, "loss": 0.0778, "step": 47783 }, { "epoch": 0.8522812399671815, "grad_norm": 0.19252462685108185, "learning_rate": 3.2497974439745948e-06, "loss": 0.0923, "step": 47784 }, { "epoch": 0.8522990760888952, "grad_norm": 0.29898732900619507, "learning_rate": 3.249030069638637e-06, "loss": 0.1264, "step": 47785 }, { "epoch": 0.8523169122106089, "grad_norm": 0.27894526720046997, "learning_rate": 3.248262779616948e-06, "loss": 0.1461, "step": 47786 }, { "epoch": 0.8523347483323226, "grad_norm": 0.3338276147842407, "learning_rate": 3.2474955739125096e-06, "loss": 0.1237, "step": 47787 }, { "epoch": 0.8523525844540363, "grad_norm": 0.24039319157600403, "learning_rate": 3.2467284525282915e-06, "loss": 0.1248, "step": 47788 }, { "epoch": 0.85237042057575, "grad_norm": 0.2494104653596878, "learning_rate": 3.245961415467261e-06, "loss": 0.0784, "step": 47789 }, { "epoch": 0.8523882566974637, "grad_norm": 0.2040245532989502, "learning_rate": 3.245194462732404e-06, "loss": 0.0987, "step": 47790 }, { "epoch": 0.8524060928191775, "grad_norm": 0.22220703959465027, "learning_rate": 3.2444275943266876e-06, "loss": 0.0756, "step": 47791 }, { "epoch": 0.8524239289408911, "grad_norm": 0.24491311609745026, "learning_rate": 3.2436608102530823e-06, "loss": 0.1064, "step": 47792 }, { "epoch": 0.8524417650626048, "grad_norm": 0.24906878173351288, "learning_rate": 3.2428941105145576e-06, "loss": 0.127, "step": 47793 }, { "epoch": 0.8524596011843185, "grad_norm": 0.24240249395370483, "learning_rate": 3.242127495114097e-06, "loss": 0.1572, "step": 47794 }, { "epoch": 0.8524774373060322, "grad_norm": 0.4117433726787567, "learning_rate": 3.2413609640546627e-06, "loss": 0.1485, "step": 47795 }, { "epoch": 0.8524952734277459, "grad_norm": 0.2782477140426636, "learning_rate": 3.2405945173392293e-06, "loss": 0.1061, "step": 47796 }, { "epoch": 0.8525131095494596, "grad_norm": 0.21209150552749634, "learning_rate": 3.2398281549707673e-06, "loss": 0.0869, "step": 47797 }, { "epoch": 0.8525309456711733, "grad_norm": 0.25119513273239136, "learning_rate": 3.239061876952243e-06, "loss": 0.0864, "step": 47798 }, { "epoch": 0.852548781792887, "grad_norm": 0.26370301842689514, "learning_rate": 3.2382956832866267e-06, "loss": 0.1258, "step": 47799 }, { "epoch": 0.8525666179146006, "grad_norm": 0.2579623758792877, "learning_rate": 3.2375295739769025e-06, "loss": 0.0939, "step": 47800 }, { "epoch": 0.8525844540363143, "grad_norm": 0.34552836418151855, "learning_rate": 3.2367635490260284e-06, "loss": 0.1276, "step": 47801 }, { "epoch": 0.852602290158028, "grad_norm": 0.384785532951355, "learning_rate": 3.2359976084369693e-06, "loss": 0.1169, "step": 47802 }, { "epoch": 0.8526201262797417, "grad_norm": 0.2821256220340729, "learning_rate": 3.2352317522127086e-06, "loss": 0.1238, "step": 47803 }, { "epoch": 0.8526379624014554, "grad_norm": 0.2784591019153595, "learning_rate": 3.234465980356205e-06, "loss": 0.117, "step": 47804 }, { "epoch": 0.8526557985231691, "grad_norm": 0.37206172943115234, "learning_rate": 3.2337002928704287e-06, "loss": 0.1608, "step": 47805 }, { "epoch": 0.8526736346448828, "grad_norm": 0.24293984472751617, "learning_rate": 3.2329346897583405e-06, "loss": 0.0822, "step": 47806 }, { "epoch": 0.8526914707665965, "grad_norm": 0.2291383594274521, "learning_rate": 3.232169171022925e-06, "loss": 0.1034, "step": 47807 }, { "epoch": 0.8527093068883103, "grad_norm": 0.29689157009124756, "learning_rate": 3.231403736667138e-06, "loss": 0.1082, "step": 47808 }, { "epoch": 0.852727143010024, "grad_norm": 0.19419559836387634, "learning_rate": 3.230638386693949e-06, "loss": 0.0995, "step": 47809 }, { "epoch": 0.8527449791317376, "grad_norm": 0.38792508840560913, "learning_rate": 3.229873121106325e-06, "loss": 0.085, "step": 47810 }, { "epoch": 0.8527628152534513, "grad_norm": 0.27517664432525635, "learning_rate": 3.229107939907225e-06, "loss": 0.1313, "step": 47811 }, { "epoch": 0.852780651375165, "grad_norm": 0.3076547682285309, "learning_rate": 3.22834284309963e-06, "loss": 0.132, "step": 47812 }, { "epoch": 0.8527984874968787, "grad_norm": 0.24541279673576355, "learning_rate": 3.22757783068649e-06, "loss": 0.1226, "step": 47813 }, { "epoch": 0.8528163236185924, "grad_norm": 0.24003036320209503, "learning_rate": 3.2268129026707838e-06, "loss": 0.0913, "step": 47814 }, { "epoch": 0.8528341597403061, "grad_norm": 0.20849540829658508, "learning_rate": 3.226048059055467e-06, "loss": 0.0956, "step": 47815 }, { "epoch": 0.8528519958620198, "grad_norm": 0.3137829005718231, "learning_rate": 3.225283299843515e-06, "loss": 0.097, "step": 47816 }, { "epoch": 0.8528698319837335, "grad_norm": 0.3306083083152771, "learning_rate": 3.224518625037884e-06, "loss": 0.1822, "step": 47817 }, { "epoch": 0.8528876681054471, "grad_norm": 0.2567809522151947, "learning_rate": 3.223754034641538e-06, "loss": 0.1184, "step": 47818 }, { "epoch": 0.8529055042271608, "grad_norm": 0.26851823925971985, "learning_rate": 3.222989528657441e-06, "loss": 0.0905, "step": 47819 }, { "epoch": 0.8529233403488745, "grad_norm": 0.41028735041618347, "learning_rate": 3.222225107088561e-06, "loss": 0.1234, "step": 47820 }, { "epoch": 0.8529411764705882, "grad_norm": 0.22635671496391296, "learning_rate": 3.221460769937859e-06, "loss": 0.0779, "step": 47821 }, { "epoch": 0.8529590125923019, "grad_norm": 0.3000737428665161, "learning_rate": 3.2206965172082963e-06, "loss": 0.1122, "step": 47822 }, { "epoch": 0.8529768487140156, "grad_norm": 0.24610120058059692, "learning_rate": 3.2199323489028375e-06, "loss": 0.1026, "step": 47823 }, { "epoch": 0.8529946848357293, "grad_norm": 0.2923082113265991, "learning_rate": 3.219168265024436e-06, "loss": 0.1404, "step": 47824 }, { "epoch": 0.8530125209574431, "grad_norm": 0.40196898579597473, "learning_rate": 3.2184042655760693e-06, "loss": 0.1803, "step": 47825 }, { "epoch": 0.8530303570791568, "grad_norm": 0.24313712120056152, "learning_rate": 3.2176403505606883e-06, "loss": 0.138, "step": 47826 }, { "epoch": 0.8530481932008704, "grad_norm": 0.30020010471343994, "learning_rate": 3.2168765199812543e-06, "loss": 0.1368, "step": 47827 }, { "epoch": 0.8530660293225841, "grad_norm": 0.31225764751434326, "learning_rate": 3.2161127738407294e-06, "loss": 0.1169, "step": 47828 }, { "epoch": 0.8530838654442978, "grad_norm": 0.3640735149383545, "learning_rate": 3.21534911214208e-06, "loss": 0.0992, "step": 47829 }, { "epoch": 0.8531017015660115, "grad_norm": 0.1927071213722229, "learning_rate": 3.2145855348882618e-06, "loss": 0.1082, "step": 47830 }, { "epoch": 0.8531195376877252, "grad_norm": 0.24293819069862366, "learning_rate": 3.2138220420822348e-06, "loss": 0.1612, "step": 47831 }, { "epoch": 0.8531373738094389, "grad_norm": 0.2787235379219055, "learning_rate": 3.2130586337269507e-06, "loss": 0.095, "step": 47832 }, { "epoch": 0.8531552099311526, "grad_norm": 0.3321904242038727, "learning_rate": 3.212295309825383e-06, "loss": 0.1456, "step": 47833 }, { "epoch": 0.8531730460528663, "grad_norm": 0.25644099712371826, "learning_rate": 3.2115320703804852e-06, "loss": 0.0679, "step": 47834 }, { "epoch": 0.85319088217458, "grad_norm": 0.2552539110183716, "learning_rate": 3.2107689153952152e-06, "loss": 0.1412, "step": 47835 }, { "epoch": 0.8532087182962936, "grad_norm": 0.29224133491516113, "learning_rate": 3.2100058448725267e-06, "loss": 0.0989, "step": 47836 }, { "epoch": 0.8532265544180073, "grad_norm": 0.19636109471321106, "learning_rate": 3.209242858815381e-06, "loss": 0.0658, "step": 47837 }, { "epoch": 0.853244390539721, "grad_norm": 0.29283997416496277, "learning_rate": 3.20847995722674e-06, "loss": 0.0877, "step": 47838 }, { "epoch": 0.8532622266614347, "grad_norm": 0.21656470000743866, "learning_rate": 3.2077171401095562e-06, "loss": 0.1431, "step": 47839 }, { "epoch": 0.8532800627831484, "grad_norm": 0.3101223111152649, "learning_rate": 3.2069544074667806e-06, "loss": 0.0634, "step": 47840 }, { "epoch": 0.8532978989048621, "grad_norm": 0.2811841070652008, "learning_rate": 3.2061917593013855e-06, "loss": 0.0901, "step": 47841 }, { "epoch": 0.8533157350265759, "grad_norm": 0.4255165755748749, "learning_rate": 3.205429195616311e-06, "loss": 0.1548, "step": 47842 }, { "epoch": 0.8533335711482896, "grad_norm": 0.2860971689224243, "learning_rate": 3.204666716414528e-06, "loss": 0.1354, "step": 47843 }, { "epoch": 0.8533514072700032, "grad_norm": 0.415427029132843, "learning_rate": 3.2039043216989833e-06, "loss": 0.1167, "step": 47844 }, { "epoch": 0.8533692433917169, "grad_norm": 0.3244549036026001, "learning_rate": 3.203142011472626e-06, "loss": 0.0872, "step": 47845 }, { "epoch": 0.8533870795134306, "grad_norm": 0.29142245650291443, "learning_rate": 3.202379785738427e-06, "loss": 0.1317, "step": 47846 }, { "epoch": 0.8534049156351443, "grad_norm": 0.2919248044490814, "learning_rate": 3.2016176444993327e-06, "loss": 0.1375, "step": 47847 }, { "epoch": 0.853422751756858, "grad_norm": 0.22123698890209198, "learning_rate": 3.2008555877582974e-06, "loss": 0.0868, "step": 47848 }, { "epoch": 0.8534405878785717, "grad_norm": 0.25368037819862366, "learning_rate": 3.2000936155182735e-06, "loss": 0.0894, "step": 47849 }, { "epoch": 0.8534584240002854, "grad_norm": 0.2924588918685913, "learning_rate": 3.199331727782212e-06, "loss": 0.1124, "step": 47850 }, { "epoch": 0.8534762601219991, "grad_norm": 0.37117618322372437, "learning_rate": 3.1985699245530743e-06, "loss": 0.0694, "step": 47851 }, { "epoch": 0.8534940962437128, "grad_norm": 0.1976107656955719, "learning_rate": 3.1978082058338105e-06, "loss": 0.1177, "step": 47852 }, { "epoch": 0.8535119323654264, "grad_norm": 0.3082253038883209, "learning_rate": 3.197046571627374e-06, "loss": 0.1707, "step": 47853 }, { "epoch": 0.8535297684871401, "grad_norm": 0.274643212556839, "learning_rate": 3.196285021936707e-06, "loss": 0.1209, "step": 47854 }, { "epoch": 0.8535476046088538, "grad_norm": 0.2921876311302185, "learning_rate": 3.195523556764771e-06, "loss": 0.0756, "step": 47855 }, { "epoch": 0.8535654407305675, "grad_norm": 0.2211625874042511, "learning_rate": 3.1947621761145243e-06, "loss": 0.116, "step": 47856 }, { "epoch": 0.8535832768522812, "grad_norm": 0.25704023241996765, "learning_rate": 3.1940008799889094e-06, "loss": 0.115, "step": 47857 }, { "epoch": 0.8536011129739949, "grad_norm": 0.28190675377845764, "learning_rate": 3.193239668390871e-06, "loss": 0.1425, "step": 47858 }, { "epoch": 0.8536189490957087, "grad_norm": 0.25090017914772034, "learning_rate": 3.1924785413233763e-06, "loss": 0.1444, "step": 47859 }, { "epoch": 0.8536367852174224, "grad_norm": 0.24649174511432648, "learning_rate": 3.191717498789365e-06, "loss": 0.1168, "step": 47860 }, { "epoch": 0.853654621339136, "grad_norm": 0.23139570653438568, "learning_rate": 3.19095654079179e-06, "loss": 0.1014, "step": 47861 }, { "epoch": 0.8536724574608497, "grad_norm": 0.32303106784820557, "learning_rate": 3.190195667333598e-06, "loss": 0.082, "step": 47862 }, { "epoch": 0.8536902935825634, "grad_norm": 0.2713688910007477, "learning_rate": 3.189434878417735e-06, "loss": 0.0901, "step": 47863 }, { "epoch": 0.8537081297042771, "grad_norm": 0.20291899144649506, "learning_rate": 3.1886741740471626e-06, "loss": 0.0884, "step": 47864 }, { "epoch": 0.8537259658259908, "grad_norm": 0.3431152105331421, "learning_rate": 3.1879135542248223e-06, "loss": 0.1535, "step": 47865 }, { "epoch": 0.8537438019477045, "grad_norm": 0.3150855302810669, "learning_rate": 3.187153018953665e-06, "loss": 0.105, "step": 47866 }, { "epoch": 0.8537616380694182, "grad_norm": 0.327764630317688, "learning_rate": 3.1863925682366265e-06, "loss": 0.0711, "step": 47867 }, { "epoch": 0.8537794741911319, "grad_norm": 0.33379560708999634, "learning_rate": 3.1856322020766715e-06, "loss": 0.1646, "step": 47868 }, { "epoch": 0.8537973103128456, "grad_norm": 0.22928795218467712, "learning_rate": 3.184871920476737e-06, "loss": 0.1212, "step": 47869 }, { "epoch": 0.8538151464345592, "grad_norm": 0.4106377065181732, "learning_rate": 3.1841117234397782e-06, "loss": 0.1119, "step": 47870 }, { "epoch": 0.8538329825562729, "grad_norm": 0.21120992302894592, "learning_rate": 3.1833516109687324e-06, "loss": 0.1117, "step": 47871 }, { "epoch": 0.8538508186779866, "grad_norm": 0.2546467185020447, "learning_rate": 3.1825915830665547e-06, "loss": 0.1265, "step": 47872 }, { "epoch": 0.8538686547997003, "grad_norm": 0.2843717634677887, "learning_rate": 3.181831639736188e-06, "loss": 0.1439, "step": 47873 }, { "epoch": 0.853886490921414, "grad_norm": 0.30135223269462585, "learning_rate": 3.1810717809805796e-06, "loss": 0.1229, "step": 47874 }, { "epoch": 0.8539043270431278, "grad_norm": 0.15905214846134186, "learning_rate": 3.1803120068026713e-06, "loss": 0.0656, "step": 47875 }, { "epoch": 0.8539221631648415, "grad_norm": 0.2464081197977066, "learning_rate": 3.179552317205403e-06, "loss": 0.1209, "step": 47876 }, { "epoch": 0.8539399992865552, "grad_norm": 0.27057844400405884, "learning_rate": 3.1787927121917333e-06, "loss": 0.0732, "step": 47877 }, { "epoch": 0.8539578354082689, "grad_norm": 0.26727089285850525, "learning_rate": 3.178033191764601e-06, "loss": 0.073, "step": 47878 }, { "epoch": 0.8539756715299825, "grad_norm": 0.3358921408653259, "learning_rate": 3.177273755926949e-06, "loss": 0.1594, "step": 47879 }, { "epoch": 0.8539935076516962, "grad_norm": 0.2509421706199646, "learning_rate": 3.176514404681713e-06, "loss": 0.0877, "step": 47880 }, { "epoch": 0.8540113437734099, "grad_norm": 0.18894881010055542, "learning_rate": 3.1757551380318525e-06, "loss": 0.0774, "step": 47881 }, { "epoch": 0.8540291798951236, "grad_norm": 0.2663378417491913, "learning_rate": 3.174995955980298e-06, "loss": 0.1289, "step": 47882 }, { "epoch": 0.8540470160168373, "grad_norm": 0.24263593554496765, "learning_rate": 3.1742368585300026e-06, "loss": 0.1251, "step": 47883 }, { "epoch": 0.854064852138551, "grad_norm": 0.2619529068470001, "learning_rate": 3.1734778456838976e-06, "loss": 0.1001, "step": 47884 }, { "epoch": 0.8540826882602647, "grad_norm": 0.2646733224391937, "learning_rate": 3.1727189174449363e-06, "loss": 0.094, "step": 47885 }, { "epoch": 0.8541005243819784, "grad_norm": 0.26845771074295044, "learning_rate": 3.171960073816055e-06, "loss": 0.1508, "step": 47886 }, { "epoch": 0.854118360503692, "grad_norm": 0.2413213849067688, "learning_rate": 3.1712013148001955e-06, "loss": 0.0723, "step": 47887 }, { "epoch": 0.8541361966254057, "grad_norm": 0.2584402859210968, "learning_rate": 3.170442640400301e-06, "loss": 0.088, "step": 47888 }, { "epoch": 0.8541540327471194, "grad_norm": 0.3772064745426178, "learning_rate": 3.169684050619301e-06, "loss": 0.1367, "step": 47889 }, { "epoch": 0.8541718688688331, "grad_norm": 0.2587524354457855, "learning_rate": 3.1689255454601554e-06, "loss": 0.0915, "step": 47890 }, { "epoch": 0.8541897049905468, "grad_norm": 0.21464362740516663, "learning_rate": 3.1681671249257915e-06, "loss": 0.1097, "step": 47891 }, { "epoch": 0.8542075411122606, "grad_norm": 0.26134949922561646, "learning_rate": 3.167408789019155e-06, "loss": 0.1095, "step": 47892 }, { "epoch": 0.8542253772339743, "grad_norm": 0.27121084928512573, "learning_rate": 3.1666505377431767e-06, "loss": 0.0892, "step": 47893 }, { "epoch": 0.854243213355688, "grad_norm": 0.21713709831237793, "learning_rate": 3.1658923711008067e-06, "loss": 0.081, "step": 47894 }, { "epoch": 0.8542610494774017, "grad_norm": 0.2829188406467438, "learning_rate": 3.165134289094979e-06, "loss": 0.1446, "step": 47895 }, { "epoch": 0.8542788855991154, "grad_norm": 0.2501837909221649, "learning_rate": 3.164376291728627e-06, "loss": 0.1042, "step": 47896 }, { "epoch": 0.854296721720829, "grad_norm": 0.3427073657512665, "learning_rate": 3.1636183790046984e-06, "loss": 0.17, "step": 47897 }, { "epoch": 0.8543145578425427, "grad_norm": 0.22886112332344055, "learning_rate": 3.162860550926122e-06, "loss": 0.0775, "step": 47898 }, { "epoch": 0.8543323939642564, "grad_norm": 0.26632001996040344, "learning_rate": 3.162102807495848e-06, "loss": 0.1177, "step": 47899 }, { "epoch": 0.8543502300859701, "grad_norm": 0.2824237644672394, "learning_rate": 3.1613451487168046e-06, "loss": 0.1391, "step": 47900 }, { "epoch": 0.8543680662076838, "grad_norm": 0.2267133891582489, "learning_rate": 3.1605875745919307e-06, "loss": 0.0758, "step": 47901 }, { "epoch": 0.8543859023293975, "grad_norm": 0.2908259332180023, "learning_rate": 3.1598300851241576e-06, "loss": 0.0792, "step": 47902 }, { "epoch": 0.8544037384511112, "grad_norm": 0.24099712073802948, "learning_rate": 3.15907268031643e-06, "loss": 0.0626, "step": 47903 }, { "epoch": 0.8544215745728249, "grad_norm": 0.29692861437797546, "learning_rate": 3.158315360171682e-06, "loss": 0.1614, "step": 47904 }, { "epoch": 0.8544394106945385, "grad_norm": 0.28205573558807373, "learning_rate": 3.157558124692847e-06, "loss": 0.1156, "step": 47905 }, { "epoch": 0.8544572468162522, "grad_norm": 0.3561142683029175, "learning_rate": 3.1568009738828565e-06, "loss": 0.1219, "step": 47906 }, { "epoch": 0.8544750829379659, "grad_norm": 0.21445907652378082, "learning_rate": 3.1560439077446517e-06, "loss": 0.0393, "step": 47907 }, { "epoch": 0.8544929190596796, "grad_norm": 0.2777465879917145, "learning_rate": 3.1552869262811674e-06, "loss": 0.0881, "step": 47908 }, { "epoch": 0.8545107551813934, "grad_norm": 0.23240768909454346, "learning_rate": 3.1545300294953313e-06, "loss": 0.0849, "step": 47909 }, { "epoch": 0.8545285913031071, "grad_norm": 0.34284019470214844, "learning_rate": 3.153773217390088e-06, "loss": 0.1167, "step": 47910 }, { "epoch": 0.8545464274248208, "grad_norm": 0.23494671285152435, "learning_rate": 3.153016489968358e-06, "loss": 0.0832, "step": 47911 }, { "epoch": 0.8545642635465345, "grad_norm": 0.2619295120239258, "learning_rate": 3.1522598472330882e-06, "loss": 0.1206, "step": 47912 }, { "epoch": 0.8545820996682482, "grad_norm": 0.2884621322154999, "learning_rate": 3.1515032891872046e-06, "loss": 0.1073, "step": 47913 }, { "epoch": 0.8545999357899619, "grad_norm": 0.23793882131576538, "learning_rate": 3.150746815833641e-06, "loss": 0.0557, "step": 47914 }, { "epoch": 0.8546177719116755, "grad_norm": 0.20460525155067444, "learning_rate": 3.1499904271753227e-06, "loss": 0.1007, "step": 47915 }, { "epoch": 0.8546356080333892, "grad_norm": 0.2746855914592743, "learning_rate": 3.1492341232151947e-06, "loss": 0.121, "step": 47916 }, { "epoch": 0.8546534441551029, "grad_norm": 0.2409539520740509, "learning_rate": 3.1484779039561816e-06, "loss": 0.0939, "step": 47917 }, { "epoch": 0.8546712802768166, "grad_norm": 0.36384958028793335, "learning_rate": 3.147721769401216e-06, "loss": 0.1548, "step": 47918 }, { "epoch": 0.8546891163985303, "grad_norm": 0.3874285817146301, "learning_rate": 3.1469657195532243e-06, "loss": 0.1456, "step": 47919 }, { "epoch": 0.854706952520244, "grad_norm": 0.35579147934913635, "learning_rate": 3.146209754415144e-06, "loss": 0.082, "step": 47920 }, { "epoch": 0.8547247886419577, "grad_norm": 0.1876312643289566, "learning_rate": 3.1454538739899038e-06, "loss": 0.082, "step": 47921 }, { "epoch": 0.8547426247636714, "grad_norm": 0.20282939076423645, "learning_rate": 3.1446980782804337e-06, "loss": 0.035, "step": 47922 }, { "epoch": 0.854760460885385, "grad_norm": 0.26944705843925476, "learning_rate": 3.1439423672896566e-06, "loss": 0.127, "step": 47923 }, { "epoch": 0.8547782970070987, "grad_norm": 0.40221691131591797, "learning_rate": 3.1431867410205125e-06, "loss": 0.1595, "step": 47924 }, { "epoch": 0.8547961331288124, "grad_norm": 0.2583920359611511, "learning_rate": 3.1424311994759202e-06, "loss": 0.0894, "step": 47925 }, { "epoch": 0.8548139692505262, "grad_norm": 0.29089394211769104, "learning_rate": 3.14167574265882e-06, "loss": 0.1111, "step": 47926 }, { "epoch": 0.8548318053722399, "grad_norm": 0.32535338401794434, "learning_rate": 3.140920370572134e-06, "loss": 0.1281, "step": 47927 }, { "epoch": 0.8548496414939536, "grad_norm": 0.282058447599411, "learning_rate": 3.1401650832187853e-06, "loss": 0.1501, "step": 47928 }, { "epoch": 0.8548674776156673, "grad_norm": 0.2828871011734009, "learning_rate": 3.1394098806017125e-06, "loss": 0.1162, "step": 47929 }, { "epoch": 0.854885313737381, "grad_norm": 0.28532809019088745, "learning_rate": 3.138654762723836e-06, "loss": 0.1211, "step": 47930 }, { "epoch": 0.8549031498590947, "grad_norm": 0.289338618516922, "learning_rate": 3.137899729588084e-06, "loss": 0.1349, "step": 47931 }, { "epoch": 0.8549209859808083, "grad_norm": 0.26051798462867737, "learning_rate": 3.137144781197379e-06, "loss": 0.1127, "step": 47932 }, { "epoch": 0.854938822102522, "grad_norm": 0.26813679933547974, "learning_rate": 3.136389917554658e-06, "loss": 0.1463, "step": 47933 }, { "epoch": 0.8549566582242357, "grad_norm": 0.49779587984085083, "learning_rate": 3.135635138662843e-06, "loss": 0.0917, "step": 47934 }, { "epoch": 0.8549744943459494, "grad_norm": 0.260145366191864, "learning_rate": 3.1348804445248543e-06, "loss": 0.0898, "step": 47935 }, { "epoch": 0.8549923304676631, "grad_norm": 0.21998216211795807, "learning_rate": 3.134125835143617e-06, "loss": 0.1239, "step": 47936 }, { "epoch": 0.8550101665893768, "grad_norm": 0.2710752785205841, "learning_rate": 3.133371310522065e-06, "loss": 0.1019, "step": 47937 }, { "epoch": 0.8550280027110905, "grad_norm": 0.24698343873023987, "learning_rate": 3.132616870663116e-06, "loss": 0.1185, "step": 47938 }, { "epoch": 0.8550458388328042, "grad_norm": 0.2897292673587799, "learning_rate": 3.1318625155696975e-06, "loss": 0.16, "step": 47939 }, { "epoch": 0.8550636749545178, "grad_norm": 0.2503184974193573, "learning_rate": 3.1311082452447376e-06, "loss": 0.0858, "step": 47940 }, { "epoch": 0.8550815110762315, "grad_norm": 0.2755941152572632, "learning_rate": 3.130354059691146e-06, "loss": 0.1141, "step": 47941 }, { "epoch": 0.8550993471979452, "grad_norm": 0.33380693197250366, "learning_rate": 3.1295999589118637e-06, "loss": 0.1254, "step": 47942 }, { "epoch": 0.855117183319659, "grad_norm": 0.264660120010376, "learning_rate": 3.1288459429098028e-06, "loss": 0.1104, "step": 47943 }, { "epoch": 0.8551350194413727, "grad_norm": 0.278068870306015, "learning_rate": 3.1280920116878916e-06, "loss": 0.1325, "step": 47944 }, { "epoch": 0.8551528555630864, "grad_norm": 0.40675032138824463, "learning_rate": 3.127338165249044e-06, "loss": 0.1353, "step": 47945 }, { "epoch": 0.8551706916848001, "grad_norm": 0.2137865573167801, "learning_rate": 3.1265844035961944e-06, "loss": 0.0282, "step": 47946 }, { "epoch": 0.8551885278065138, "grad_norm": 0.34834906458854675, "learning_rate": 3.1258307267322567e-06, "loss": 0.1414, "step": 47947 }, { "epoch": 0.8552063639282275, "grad_norm": 0.21771539747714996, "learning_rate": 3.1250771346601565e-06, "loss": 0.1351, "step": 47948 }, { "epoch": 0.8552242000499412, "grad_norm": 0.3137732446193695, "learning_rate": 3.1243236273828053e-06, "loss": 0.0977, "step": 47949 }, { "epoch": 0.8552420361716548, "grad_norm": 0.3677910566329956, "learning_rate": 3.123570204903137e-06, "loss": 0.1325, "step": 47950 }, { "epoch": 0.8552598722933685, "grad_norm": 0.2628314793109894, "learning_rate": 3.122816867224068e-06, "loss": 0.1073, "step": 47951 }, { "epoch": 0.8552777084150822, "grad_norm": 0.33903229236602783, "learning_rate": 3.1220636143485084e-06, "loss": 0.1474, "step": 47952 }, { "epoch": 0.8552955445367959, "grad_norm": 0.23210380971431732, "learning_rate": 3.1213104462793964e-06, "loss": 0.1121, "step": 47953 }, { "epoch": 0.8553133806585096, "grad_norm": 0.28530988097190857, "learning_rate": 3.120557363019633e-06, "loss": 0.1303, "step": 47954 }, { "epoch": 0.8553312167802233, "grad_norm": 0.23036034405231476, "learning_rate": 3.1198043645721516e-06, "loss": 0.0833, "step": 47955 }, { "epoch": 0.855349052901937, "grad_norm": 0.2138405740261078, "learning_rate": 3.1190514509398667e-06, "loss": 0.102, "step": 47956 }, { "epoch": 0.8553668890236507, "grad_norm": 0.28536635637283325, "learning_rate": 3.1182986221256956e-06, "loss": 0.0733, "step": 47957 }, { "epoch": 0.8553847251453643, "grad_norm": 0.33033108711242676, "learning_rate": 3.117545878132552e-06, "loss": 0.1123, "step": 47958 }, { "epoch": 0.855402561267078, "grad_norm": 0.31263771653175354, "learning_rate": 3.1167932189633625e-06, "loss": 0.1399, "step": 47959 }, { "epoch": 0.8554203973887918, "grad_norm": 0.2632431983947754, "learning_rate": 3.1160406446210432e-06, "loss": 0.0485, "step": 47960 }, { "epoch": 0.8554382335105055, "grad_norm": 0.29876360297203064, "learning_rate": 3.115288155108506e-06, "loss": 0.0651, "step": 47961 }, { "epoch": 0.8554560696322192, "grad_norm": 0.26709580421447754, "learning_rate": 3.114535750428668e-06, "loss": 0.1415, "step": 47962 }, { "epoch": 0.8554739057539329, "grad_norm": 0.2062436044216156, "learning_rate": 3.1137834305844523e-06, "loss": 0.0859, "step": 47963 }, { "epoch": 0.8554917418756466, "grad_norm": 0.2934791147708893, "learning_rate": 3.1130311955787724e-06, "loss": 0.164, "step": 47964 }, { "epoch": 0.8555095779973603, "grad_norm": 0.23830966651439667, "learning_rate": 3.1122790454145345e-06, "loss": 0.1073, "step": 47965 }, { "epoch": 0.855527414119074, "grad_norm": 0.25940176844596863, "learning_rate": 3.11152698009467e-06, "loss": 0.0966, "step": 47966 }, { "epoch": 0.8555452502407876, "grad_norm": 0.30525127053260803, "learning_rate": 3.110774999622082e-06, "loss": 0.1309, "step": 47967 }, { "epoch": 0.8555630863625013, "grad_norm": 0.32313302159309387, "learning_rate": 3.1100231039996957e-06, "loss": 0.1435, "step": 47968 }, { "epoch": 0.855580922484215, "grad_norm": 0.26141560077667236, "learning_rate": 3.10927129323042e-06, "loss": 0.1013, "step": 47969 }, { "epoch": 0.8555987586059287, "grad_norm": 0.3228697180747986, "learning_rate": 3.1085195673171722e-06, "loss": 0.1385, "step": 47970 }, { "epoch": 0.8556165947276424, "grad_norm": 0.2343200147151947, "learning_rate": 3.1077679262628555e-06, "loss": 0.1534, "step": 47971 }, { "epoch": 0.8556344308493561, "grad_norm": 0.18854576349258423, "learning_rate": 3.107016370070398e-06, "loss": 0.0751, "step": 47972 }, { "epoch": 0.8556522669710698, "grad_norm": 0.2713789939880371, "learning_rate": 3.1062648987427058e-06, "loss": 0.0681, "step": 47973 }, { "epoch": 0.8556701030927835, "grad_norm": 0.33594071865081787, "learning_rate": 3.1055135122826926e-06, "loss": 0.1413, "step": 47974 }, { "epoch": 0.8556879392144972, "grad_norm": 0.20694705843925476, "learning_rate": 3.1047622106932654e-06, "loss": 0.1003, "step": 47975 }, { "epoch": 0.855705775336211, "grad_norm": 0.1782122403383255, "learning_rate": 3.104010993977349e-06, "loss": 0.1007, "step": 47976 }, { "epoch": 0.8557236114579246, "grad_norm": 0.2469550371170044, "learning_rate": 3.1032598621378473e-06, "loss": 0.0853, "step": 47977 }, { "epoch": 0.8557414475796383, "grad_norm": 0.32729366421699524, "learning_rate": 3.1025088151776764e-06, "loss": 0.1541, "step": 47978 }, { "epoch": 0.855759283701352, "grad_norm": 0.2528505325317383, "learning_rate": 3.1017578530997347e-06, "loss": 0.1325, "step": 47979 }, { "epoch": 0.8557771198230657, "grad_norm": 0.21704956889152527, "learning_rate": 3.101006975906945e-06, "loss": 0.1037, "step": 47980 }, { "epoch": 0.8557949559447794, "grad_norm": 0.6580010652542114, "learning_rate": 3.100256183602221e-06, "loss": 0.219, "step": 47981 }, { "epoch": 0.8558127920664931, "grad_norm": 0.21591049432754517, "learning_rate": 3.099505476188469e-06, "loss": 0.0886, "step": 47982 }, { "epoch": 0.8558306281882068, "grad_norm": 0.18405179679393768, "learning_rate": 3.0987548536685977e-06, "loss": 0.0499, "step": 47983 }, { "epoch": 0.8558484643099205, "grad_norm": 0.2931574285030365, "learning_rate": 3.098004316045511e-06, "loss": 0.1338, "step": 47984 }, { "epoch": 0.8558663004316341, "grad_norm": 0.2667537033557892, "learning_rate": 3.0972538633221304e-06, "loss": 0.0954, "step": 47985 }, { "epoch": 0.8558841365533478, "grad_norm": 0.23011080920696259, "learning_rate": 3.096503495501357e-06, "loss": 0.07, "step": 47986 }, { "epoch": 0.8559019726750615, "grad_norm": 0.3122994601726532, "learning_rate": 3.095753212586103e-06, "loss": 0.1312, "step": 47987 }, { "epoch": 0.8559198087967752, "grad_norm": 0.22048909962177277, "learning_rate": 3.0950030145792703e-06, "loss": 0.0871, "step": 47988 }, { "epoch": 0.8559376449184889, "grad_norm": 0.3142906427383423, "learning_rate": 3.094252901483777e-06, "loss": 0.0991, "step": 47989 }, { "epoch": 0.8559554810402026, "grad_norm": 0.43747803568840027, "learning_rate": 3.0935028733025227e-06, "loss": 0.1841, "step": 47990 }, { "epoch": 0.8559733171619163, "grad_norm": 0.3561456799507141, "learning_rate": 3.09275293003842e-06, "loss": 0.1813, "step": 47991 }, { "epoch": 0.85599115328363, "grad_norm": 0.30997058749198914, "learning_rate": 3.092003071694366e-06, "loss": 0.1952, "step": 47992 }, { "epoch": 0.8560089894053438, "grad_norm": 0.251335084438324, "learning_rate": 3.0912532982732835e-06, "loss": 0.0929, "step": 47993 }, { "epoch": 0.8560268255270574, "grad_norm": 0.31679704785346985, "learning_rate": 3.0905036097780613e-06, "loss": 0.1004, "step": 47994 }, { "epoch": 0.8560446616487711, "grad_norm": 0.25929996371269226, "learning_rate": 3.089754006211623e-06, "loss": 0.077, "step": 47995 }, { "epoch": 0.8560624977704848, "grad_norm": 0.340159147977829, "learning_rate": 3.089004487576863e-06, "loss": 0.1604, "step": 47996 }, { "epoch": 0.8560803338921985, "grad_norm": 0.3275354504585266, "learning_rate": 3.0882550538766847e-06, "loss": 0.1095, "step": 47997 }, { "epoch": 0.8560981700139122, "grad_norm": 0.41673585772514343, "learning_rate": 3.087505705114005e-06, "loss": 0.1338, "step": 47998 }, { "epoch": 0.8561160061356259, "grad_norm": 0.24002967774868011, "learning_rate": 3.0867564412917187e-06, "loss": 0.1215, "step": 47999 }, { "epoch": 0.8561338422573396, "grad_norm": 0.20746806263923645, "learning_rate": 3.086007262412735e-06, "loss": 0.1069, "step": 48000 }, { "epoch": 0.8561338422573396, "eval_loss": 0.10846409201622009, "eval_runtime": 107.4048, "eval_samples_per_second": 9.534, "eval_steps_per_second": 1.592, "step": 48000 }, { "epoch": 0.8561516783790533, "grad_norm": 0.31325647234916687, "learning_rate": 3.0852581684799515e-06, "loss": 0.1354, "step": 48001 }, { "epoch": 0.856169514500767, "grad_norm": 0.2544112205505371, "learning_rate": 3.0845091594962793e-06, "loss": 0.126, "step": 48002 }, { "epoch": 0.8561873506224806, "grad_norm": 0.18850870430469513, "learning_rate": 3.0837602354646195e-06, "loss": 0.0908, "step": 48003 }, { "epoch": 0.8562051867441943, "grad_norm": 0.362602561712265, "learning_rate": 3.0830113963878778e-06, "loss": 0.1251, "step": 48004 }, { "epoch": 0.856223022865908, "grad_norm": 0.31313273310661316, "learning_rate": 3.082262642268949e-06, "loss": 0.1418, "step": 48005 }, { "epoch": 0.8562408589876217, "grad_norm": 0.3185657262802124, "learning_rate": 3.0815139731107388e-06, "loss": 0.1281, "step": 48006 }, { "epoch": 0.8562586951093354, "grad_norm": 0.4028942584991455, "learning_rate": 3.0807653889161542e-06, "loss": 0.2, "step": 48007 }, { "epoch": 0.8562765312310491, "grad_norm": 0.2397243231534958, "learning_rate": 3.0800168896880892e-06, "loss": 0.0951, "step": 48008 }, { "epoch": 0.8562943673527628, "grad_norm": 0.22202181816101074, "learning_rate": 3.0792684754294533e-06, "loss": 0.1391, "step": 48009 }, { "epoch": 0.8563122034744766, "grad_norm": 0.2454749196767807, "learning_rate": 3.078520146143141e-06, "loss": 0.0895, "step": 48010 }, { "epoch": 0.8563300395961903, "grad_norm": 0.2699190676212311, "learning_rate": 3.0777719018320604e-06, "loss": 0.0834, "step": 48011 }, { "epoch": 0.8563478757179039, "grad_norm": 0.275518536567688, "learning_rate": 3.0770237424991077e-06, "loss": 0.103, "step": 48012 }, { "epoch": 0.8563657118396176, "grad_norm": 0.22893822193145752, "learning_rate": 3.076275668147183e-06, "loss": 0.1009, "step": 48013 }, { "epoch": 0.8563835479613313, "grad_norm": 0.2821848690509796, "learning_rate": 3.0755276787791804e-06, "loss": 0.097, "step": 48014 }, { "epoch": 0.856401384083045, "grad_norm": 0.27042025327682495, "learning_rate": 3.0747797743980096e-06, "loss": 0.0876, "step": 48015 }, { "epoch": 0.8564192202047587, "grad_norm": 0.30033254623413086, "learning_rate": 3.074031955006565e-06, "loss": 0.1297, "step": 48016 }, { "epoch": 0.8564370563264724, "grad_norm": 0.23884430527687073, "learning_rate": 3.073284220607747e-06, "loss": 0.1034, "step": 48017 }, { "epoch": 0.8564548924481861, "grad_norm": 0.35646435618400574, "learning_rate": 3.0725365712044513e-06, "loss": 0.0793, "step": 48018 }, { "epoch": 0.8564727285698998, "grad_norm": 0.30801934003829956, "learning_rate": 3.0717890067995746e-06, "loss": 0.1364, "step": 48019 }, { "epoch": 0.8564905646916134, "grad_norm": 0.21928814053535461, "learning_rate": 3.071041527396021e-06, "loss": 0.1053, "step": 48020 }, { "epoch": 0.8565084008133271, "grad_norm": 0.30654457211494446, "learning_rate": 3.070294132996679e-06, "loss": 0.1449, "step": 48021 }, { "epoch": 0.8565262369350408, "grad_norm": 0.23428907990455627, "learning_rate": 3.069546823604455e-06, "loss": 0.1216, "step": 48022 }, { "epoch": 0.8565440730567545, "grad_norm": 0.29217803478240967, "learning_rate": 3.0687995992222387e-06, "loss": 0.0812, "step": 48023 }, { "epoch": 0.8565619091784682, "grad_norm": 0.19916154444217682, "learning_rate": 3.0680524598529354e-06, "loss": 0.0847, "step": 48024 }, { "epoch": 0.8565797453001819, "grad_norm": 0.23355744779109955, "learning_rate": 3.067305405499435e-06, "loss": 0.0834, "step": 48025 }, { "epoch": 0.8565975814218956, "grad_norm": 0.26400062441825867, "learning_rate": 3.066558436164635e-06, "loss": 0.1072, "step": 48026 }, { "epoch": 0.8566154175436094, "grad_norm": 0.26424655318260193, "learning_rate": 3.065811551851425e-06, "loss": 0.1266, "step": 48027 }, { "epoch": 0.8566332536653231, "grad_norm": 0.25961050391197205, "learning_rate": 3.0650647525627074e-06, "loss": 0.0948, "step": 48028 }, { "epoch": 0.8566510897870367, "grad_norm": 0.2581084668636322, "learning_rate": 3.064318038301378e-06, "loss": 0.1038, "step": 48029 }, { "epoch": 0.8566689259087504, "grad_norm": 0.29015183448791504, "learning_rate": 3.063571409070329e-06, "loss": 0.0953, "step": 48030 }, { "epoch": 0.8566867620304641, "grad_norm": 0.35230720043182373, "learning_rate": 3.0628248648724517e-06, "loss": 0.0737, "step": 48031 }, { "epoch": 0.8567045981521778, "grad_norm": 0.29183217883110046, "learning_rate": 3.0620784057106363e-06, "loss": 0.107, "step": 48032 }, { "epoch": 0.8567224342738915, "grad_norm": 0.289079874753952, "learning_rate": 3.061332031587791e-06, "loss": 0.0863, "step": 48033 }, { "epoch": 0.8567402703956052, "grad_norm": 0.2917121648788452, "learning_rate": 3.060585742506797e-06, "loss": 0.1415, "step": 48034 }, { "epoch": 0.8567581065173189, "grad_norm": 0.2507006824016571, "learning_rate": 3.0598395384705437e-06, "loss": 0.1157, "step": 48035 }, { "epoch": 0.8567759426390326, "grad_norm": 0.35034796595573425, "learning_rate": 3.0590934194819342e-06, "loss": 0.1561, "step": 48036 }, { "epoch": 0.8567937787607462, "grad_norm": 0.3205254375934601, "learning_rate": 3.058347385543858e-06, "loss": 0.1142, "step": 48037 }, { "epoch": 0.8568116148824599, "grad_norm": 0.2830381393432617, "learning_rate": 3.0576014366592075e-06, "loss": 0.1057, "step": 48038 }, { "epoch": 0.8568294510041736, "grad_norm": 0.2835957705974579, "learning_rate": 3.0568555728308746e-06, "loss": 0.1481, "step": 48039 }, { "epoch": 0.8568472871258873, "grad_norm": 0.2971174716949463, "learning_rate": 3.0561097940617402e-06, "loss": 0.1223, "step": 48040 }, { "epoch": 0.856865123247601, "grad_norm": 0.3129143714904785, "learning_rate": 3.05536410035471e-06, "loss": 0.0785, "step": 48041 }, { "epoch": 0.8568829593693147, "grad_norm": 0.16936978697776794, "learning_rate": 3.0546184917126687e-06, "loss": 0.0896, "step": 48042 }, { "epoch": 0.8569007954910284, "grad_norm": 0.291711688041687, "learning_rate": 3.0538729681385046e-06, "loss": 0.1192, "step": 48043 }, { "epoch": 0.8569186316127422, "grad_norm": 0.32878220081329346, "learning_rate": 3.053127529635108e-06, "loss": 0.0833, "step": 48044 }, { "epoch": 0.8569364677344559, "grad_norm": 0.2906176447868347, "learning_rate": 3.052382176205365e-06, "loss": 0.0563, "step": 48045 }, { "epoch": 0.8569543038561696, "grad_norm": 0.3261968195438385, "learning_rate": 3.051636907852176e-06, "loss": 0.1238, "step": 48046 }, { "epoch": 0.8569721399778832, "grad_norm": 0.26035913825035095, "learning_rate": 3.0508917245784197e-06, "loss": 0.1004, "step": 48047 }, { "epoch": 0.8569899760995969, "grad_norm": 0.29857370257377625, "learning_rate": 3.050146626386985e-06, "loss": 0.102, "step": 48048 }, { "epoch": 0.8570078122213106, "grad_norm": 0.29833894968032837, "learning_rate": 3.0494016132807696e-06, "loss": 0.0959, "step": 48049 }, { "epoch": 0.8570256483430243, "grad_norm": 0.23905020952224731, "learning_rate": 3.0486566852626493e-06, "loss": 0.0961, "step": 48050 }, { "epoch": 0.857043484464738, "grad_norm": 0.3541562855243683, "learning_rate": 3.0479118423355212e-06, "loss": 0.1217, "step": 48051 }, { "epoch": 0.8570613205864517, "grad_norm": 0.17636990547180176, "learning_rate": 3.047167084502267e-06, "loss": 0.0874, "step": 48052 }, { "epoch": 0.8570791567081654, "grad_norm": 0.300947368144989, "learning_rate": 3.046422411765773e-06, "loss": 0.1094, "step": 48053 }, { "epoch": 0.8570969928298791, "grad_norm": 0.270541250705719, "learning_rate": 3.045677824128934e-06, "loss": 0.1258, "step": 48054 }, { "epoch": 0.8571148289515927, "grad_norm": 0.24894101917743683, "learning_rate": 3.0449333215946285e-06, "loss": 0.1002, "step": 48055 }, { "epoch": 0.8571326650733064, "grad_norm": 0.27555519342422485, "learning_rate": 3.0441889041657457e-06, "loss": 0.1189, "step": 48056 }, { "epoch": 0.8571505011950201, "grad_norm": 0.21403095126152039, "learning_rate": 3.043444571845172e-06, "loss": 0.0634, "step": 48057 }, { "epoch": 0.8571683373167338, "grad_norm": 0.3495869040489197, "learning_rate": 3.0427003246357834e-06, "loss": 0.0899, "step": 48058 }, { "epoch": 0.8571861734384475, "grad_norm": 0.2624898850917816, "learning_rate": 3.0419561625404768e-06, "loss": 0.1385, "step": 48059 }, { "epoch": 0.8572040095601612, "grad_norm": 0.279317706823349, "learning_rate": 3.041212085562131e-06, "loss": 0.1241, "step": 48060 }, { "epoch": 0.857221845681875, "grad_norm": 0.21027113497257233, "learning_rate": 3.040468093703633e-06, "loss": 0.1002, "step": 48061 }, { "epoch": 0.8572396818035887, "grad_norm": 0.2977741062641144, "learning_rate": 3.03972418696786e-06, "loss": 0.1633, "step": 48062 }, { "epoch": 0.8572575179253024, "grad_norm": 0.22352856397628784, "learning_rate": 3.038980365357702e-06, "loss": 0.0865, "step": 48063 }, { "epoch": 0.857275354047016, "grad_norm": 0.23762989044189453, "learning_rate": 3.0382366288760454e-06, "loss": 0.0846, "step": 48064 }, { "epoch": 0.8572931901687297, "grad_norm": 0.3085680603981018, "learning_rate": 3.037492977525769e-06, "loss": 0.1363, "step": 48065 }, { "epoch": 0.8573110262904434, "grad_norm": 0.2413221299648285, "learning_rate": 3.03674941130975e-06, "loss": 0.0942, "step": 48066 }, { "epoch": 0.8573288624121571, "grad_norm": 0.2554912269115448, "learning_rate": 3.0360059302308812e-06, "loss": 0.076, "step": 48067 }, { "epoch": 0.8573466985338708, "grad_norm": 0.3013289272785187, "learning_rate": 3.035262534292041e-06, "loss": 0.0649, "step": 48068 }, { "epoch": 0.8573645346555845, "grad_norm": 0.356425017118454, "learning_rate": 3.03451922349611e-06, "loss": 0.1375, "step": 48069 }, { "epoch": 0.8573823707772982, "grad_norm": 0.26607567071914673, "learning_rate": 3.0337759978459667e-06, "loss": 0.1224, "step": 48070 }, { "epoch": 0.8574002068990119, "grad_norm": 0.2775510251522064, "learning_rate": 3.0330328573444892e-06, "loss": 0.1502, "step": 48071 }, { "epoch": 0.8574180430207256, "grad_norm": 0.32446298003196716, "learning_rate": 3.03228980199457e-06, "loss": 0.1031, "step": 48072 }, { "epoch": 0.8574358791424392, "grad_norm": 0.29804527759552, "learning_rate": 3.0315468317990844e-06, "loss": 0.1374, "step": 48073 }, { "epoch": 0.8574537152641529, "grad_norm": 0.297566682100296, "learning_rate": 3.0308039467609075e-06, "loss": 0.0869, "step": 48074 }, { "epoch": 0.8574715513858666, "grad_norm": 0.27729323506355286, "learning_rate": 3.0300611468829203e-06, "loss": 0.1453, "step": 48075 }, { "epoch": 0.8574893875075803, "grad_norm": 0.3541051745414734, "learning_rate": 3.029318432168007e-06, "loss": 0.1838, "step": 48076 }, { "epoch": 0.857507223629294, "grad_norm": 0.2368578016757965, "learning_rate": 3.0285758026190407e-06, "loss": 0.1023, "step": 48077 }, { "epoch": 0.8575250597510078, "grad_norm": 0.23215742409229279, "learning_rate": 3.0278332582389075e-06, "loss": 0.0908, "step": 48078 }, { "epoch": 0.8575428958727215, "grad_norm": 0.2790238559246063, "learning_rate": 3.027090799030477e-06, "loss": 0.1254, "step": 48079 }, { "epoch": 0.8575607319944352, "grad_norm": 0.2865852117538452, "learning_rate": 3.0263484249966364e-06, "loss": 0.092, "step": 48080 }, { "epoch": 0.8575785681161489, "grad_norm": 0.23618711531162262, "learning_rate": 3.0256061361402578e-06, "loss": 0.1363, "step": 48081 }, { "epoch": 0.8575964042378625, "grad_norm": 0.2805115878582001, "learning_rate": 3.02486393246422e-06, "loss": 0.1544, "step": 48082 }, { "epoch": 0.8576142403595762, "grad_norm": 0.24137824773788452, "learning_rate": 3.0241218139714005e-06, "loss": 0.1278, "step": 48083 }, { "epoch": 0.8576320764812899, "grad_norm": 0.4007607102394104, "learning_rate": 3.02337978066467e-06, "loss": 0.1262, "step": 48084 }, { "epoch": 0.8576499126030036, "grad_norm": 0.42904749512672424, "learning_rate": 3.0226378325469153e-06, "loss": 0.1514, "step": 48085 }, { "epoch": 0.8576677487247173, "grad_norm": 0.17265328764915466, "learning_rate": 3.0218959696210055e-06, "loss": 0.0668, "step": 48086 }, { "epoch": 0.857685584846431, "grad_norm": 0.37499865889549255, "learning_rate": 3.021154191889819e-06, "loss": 0.0888, "step": 48087 }, { "epoch": 0.8577034209681447, "grad_norm": 0.2551610767841339, "learning_rate": 3.020412499356223e-06, "loss": 0.1238, "step": 48088 }, { "epoch": 0.8577212570898584, "grad_norm": 0.3171907663345337, "learning_rate": 3.0196708920231074e-06, "loss": 0.1169, "step": 48089 }, { "epoch": 0.857739093211572, "grad_norm": 0.32601386308670044, "learning_rate": 3.0189293698933336e-06, "loss": 0.1702, "step": 48090 }, { "epoch": 0.8577569293332857, "grad_norm": 0.24266552925109863, "learning_rate": 3.018187932969785e-06, "loss": 0.0678, "step": 48091 }, { "epoch": 0.8577747654549994, "grad_norm": 0.2656879723072052, "learning_rate": 3.0174465812553288e-06, "loss": 0.1218, "step": 48092 }, { "epoch": 0.8577926015767131, "grad_norm": 0.20636171102523804, "learning_rate": 3.016705314752846e-06, "loss": 0.1272, "step": 48093 }, { "epoch": 0.8578104376984269, "grad_norm": 0.2429744452238083, "learning_rate": 3.0159641334652102e-06, "loss": 0.1082, "step": 48094 }, { "epoch": 0.8578282738201406, "grad_norm": 0.24921710789203644, "learning_rate": 3.0152230373952874e-06, "loss": 0.1322, "step": 48095 }, { "epoch": 0.8578461099418543, "grad_norm": 0.28033968806266785, "learning_rate": 3.0144820265459508e-06, "loss": 0.0985, "step": 48096 }, { "epoch": 0.857863946063568, "grad_norm": 0.3121553957462311, "learning_rate": 3.0137411009200727e-06, "loss": 0.0912, "step": 48097 }, { "epoch": 0.8578817821852817, "grad_norm": 0.2222258597612381, "learning_rate": 3.0130002605205347e-06, "loss": 0.1123, "step": 48098 }, { "epoch": 0.8578996183069953, "grad_norm": 0.3027598559856415, "learning_rate": 3.012259505350201e-06, "loss": 0.1442, "step": 48099 }, { "epoch": 0.857917454428709, "grad_norm": 0.320352166891098, "learning_rate": 3.011518835411942e-06, "loss": 0.1373, "step": 48100 }, { "epoch": 0.8579352905504227, "grad_norm": 0.3319607973098755, "learning_rate": 3.010778250708626e-06, "loss": 0.122, "step": 48101 }, { "epoch": 0.8579531266721364, "grad_norm": 0.23992297053337097, "learning_rate": 3.0100377512431334e-06, "loss": 0.1179, "step": 48102 }, { "epoch": 0.8579709627938501, "grad_norm": 0.23372259736061096, "learning_rate": 3.0092973370183324e-06, "loss": 0.0933, "step": 48103 }, { "epoch": 0.8579887989155638, "grad_norm": 0.25746750831604004, "learning_rate": 3.00855700803708e-06, "loss": 0.1043, "step": 48104 }, { "epoch": 0.8580066350372775, "grad_norm": 0.22758682072162628, "learning_rate": 3.0078167643022655e-06, "loss": 0.1198, "step": 48105 }, { "epoch": 0.8580244711589912, "grad_norm": 0.34161216020584106, "learning_rate": 3.0070766058167415e-06, "loss": 0.1302, "step": 48106 }, { "epoch": 0.8580423072807049, "grad_norm": 0.2670750319957733, "learning_rate": 3.0063365325833925e-06, "loss": 0.1021, "step": 48107 }, { "epoch": 0.8580601434024185, "grad_norm": 0.25317394733428955, "learning_rate": 3.005596544605077e-06, "loss": 0.1139, "step": 48108 }, { "epoch": 0.8580779795241322, "grad_norm": 0.26877349615097046, "learning_rate": 3.004856641884668e-06, "loss": 0.0459, "step": 48109 }, { "epoch": 0.8580958156458459, "grad_norm": 0.2264004945755005, "learning_rate": 3.004116824425024e-06, "loss": 0.0978, "step": 48110 }, { "epoch": 0.8581136517675597, "grad_norm": 0.23884214460849762, "learning_rate": 3.003377092229026e-06, "loss": 0.0846, "step": 48111 }, { "epoch": 0.8581314878892734, "grad_norm": 0.25644227862358093, "learning_rate": 3.002637445299539e-06, "loss": 0.0964, "step": 48112 }, { "epoch": 0.8581493240109871, "grad_norm": 0.29159095883369446, "learning_rate": 3.0018978836394235e-06, "loss": 0.1273, "step": 48113 }, { "epoch": 0.8581671601327008, "grad_norm": 0.25391098856925964, "learning_rate": 3.001158407251545e-06, "loss": 0.1292, "step": 48114 }, { "epoch": 0.8581849962544145, "grad_norm": 0.22363534569740295, "learning_rate": 3.000419016138781e-06, "loss": 0.0863, "step": 48115 }, { "epoch": 0.8582028323761282, "grad_norm": 0.27557283639907837, "learning_rate": 2.99967971030399e-06, "loss": 0.1611, "step": 48116 }, { "epoch": 0.8582206684978418, "grad_norm": 0.3189485967159271, "learning_rate": 2.998940489750035e-06, "loss": 0.1497, "step": 48117 }, { "epoch": 0.8582385046195555, "grad_norm": 0.2348957061767578, "learning_rate": 2.9982013544797904e-06, "loss": 0.1009, "step": 48118 }, { "epoch": 0.8582563407412692, "grad_norm": 0.17953412234783173, "learning_rate": 2.99746230449611e-06, "loss": 0.0502, "step": 48119 }, { "epoch": 0.8582741768629829, "grad_norm": 0.24287880957126617, "learning_rate": 2.9967233398018714e-06, "loss": 0.1195, "step": 48120 }, { "epoch": 0.8582920129846966, "grad_norm": 0.6242601871490479, "learning_rate": 2.9959844603999335e-06, "loss": 0.142, "step": 48121 }, { "epoch": 0.8583098491064103, "grad_norm": 0.3184574246406555, "learning_rate": 2.995245666293159e-06, "loss": 0.1495, "step": 48122 }, { "epoch": 0.858327685228124, "grad_norm": 0.2637436091899872, "learning_rate": 2.994506957484408e-06, "loss": 0.087, "step": 48123 }, { "epoch": 0.8583455213498377, "grad_norm": 0.21575382351875305, "learning_rate": 2.9937683339765543e-06, "loss": 0.1153, "step": 48124 }, { "epoch": 0.8583633574715513, "grad_norm": 0.33434629440307617, "learning_rate": 2.9930297957724536e-06, "loss": 0.1456, "step": 48125 }, { "epoch": 0.858381193593265, "grad_norm": 0.27986589074134827, "learning_rate": 2.99229134287497e-06, "loss": 0.1498, "step": 48126 }, { "epoch": 0.8583990297149787, "grad_norm": 0.2441047877073288, "learning_rate": 2.9915529752869625e-06, "loss": 0.1096, "step": 48127 }, { "epoch": 0.8584168658366925, "grad_norm": 0.2196342647075653, "learning_rate": 2.990814693011301e-06, "loss": 0.1106, "step": 48128 }, { "epoch": 0.8584347019584062, "grad_norm": 0.1972104012966156, "learning_rate": 2.9900764960508447e-06, "loss": 0.0821, "step": 48129 }, { "epoch": 0.8584525380801199, "grad_norm": 0.3377305567264557, "learning_rate": 2.9893383844084548e-06, "loss": 0.1371, "step": 48130 }, { "epoch": 0.8584703742018336, "grad_norm": 0.26327067613601685, "learning_rate": 2.9886003580869847e-06, "loss": 0.1618, "step": 48131 }, { "epoch": 0.8584882103235473, "grad_norm": 0.24589316546916962, "learning_rate": 2.987862417089307e-06, "loss": 0.0802, "step": 48132 }, { "epoch": 0.858506046445261, "grad_norm": 0.29374364018440247, "learning_rate": 2.9871245614182723e-06, "loss": 0.0716, "step": 48133 }, { "epoch": 0.8585238825669746, "grad_norm": 0.3158067762851715, "learning_rate": 2.9863867910767534e-06, "loss": 0.155, "step": 48134 }, { "epoch": 0.8585417186886883, "grad_norm": 0.3153727352619171, "learning_rate": 2.9856491060676007e-06, "loss": 0.1376, "step": 48135 }, { "epoch": 0.858559554810402, "grad_norm": 0.21420489251613617, "learning_rate": 2.98491150639367e-06, "loss": 0.0757, "step": 48136 }, { "epoch": 0.8585773909321157, "grad_norm": 0.2597881853580475, "learning_rate": 2.9841739920578314e-06, "loss": 0.0884, "step": 48137 }, { "epoch": 0.8585952270538294, "grad_norm": 0.25425538420677185, "learning_rate": 2.9834365630629386e-06, "loss": 0.122, "step": 48138 }, { "epoch": 0.8586130631755431, "grad_norm": 0.2531696856021881, "learning_rate": 2.9826992194118495e-06, "loss": 0.0883, "step": 48139 }, { "epoch": 0.8586308992972568, "grad_norm": 0.9652997255325317, "learning_rate": 2.9819619611074186e-06, "loss": 0.1574, "step": 48140 }, { "epoch": 0.8586487354189705, "grad_norm": 0.2712922990322113, "learning_rate": 2.9812247881525147e-06, "loss": 0.0923, "step": 48141 }, { "epoch": 0.8586665715406842, "grad_norm": 0.3017910122871399, "learning_rate": 2.980487700549986e-06, "loss": 0.1505, "step": 48142 }, { "epoch": 0.8586844076623978, "grad_norm": 0.26514509320259094, "learning_rate": 2.9797506983026914e-06, "loss": 0.1003, "step": 48143 }, { "epoch": 0.8587022437841115, "grad_norm": 0.17610181868076324, "learning_rate": 2.9790137814134864e-06, "loss": 0.0589, "step": 48144 }, { "epoch": 0.8587200799058253, "grad_norm": 0.29654136300086975, "learning_rate": 2.9782769498852338e-06, "loss": 0.139, "step": 48145 }, { "epoch": 0.858737916027539, "grad_norm": 0.3049066662788391, "learning_rate": 2.97754020372078e-06, "loss": 0.1078, "step": 48146 }, { "epoch": 0.8587557521492527, "grad_norm": 0.30414435267448425, "learning_rate": 2.97680354292299e-06, "loss": 0.0998, "step": 48147 }, { "epoch": 0.8587735882709664, "grad_norm": 0.3989607095718384, "learning_rate": 2.97606696749472e-06, "loss": 0.1082, "step": 48148 }, { "epoch": 0.8587914243926801, "grad_norm": 0.32944050431251526, "learning_rate": 2.9753304774388148e-06, "loss": 0.1258, "step": 48149 }, { "epoch": 0.8588092605143938, "grad_norm": 0.3494466245174408, "learning_rate": 2.9745940727581385e-06, "loss": 0.101, "step": 48150 }, { "epoch": 0.8588270966361075, "grad_norm": 0.27578550577163696, "learning_rate": 2.9738577534555445e-06, "loss": 0.1053, "step": 48151 }, { "epoch": 0.8588449327578211, "grad_norm": 0.23384353518486023, "learning_rate": 2.9731215195338863e-06, "loss": 0.0971, "step": 48152 }, { "epoch": 0.8588627688795348, "grad_norm": 0.26223936676979065, "learning_rate": 2.9723853709960114e-06, "loss": 0.0903, "step": 48153 }, { "epoch": 0.8588806050012485, "grad_norm": 0.2957221269607544, "learning_rate": 2.9716493078447844e-06, "loss": 0.1664, "step": 48154 }, { "epoch": 0.8588984411229622, "grad_norm": 0.3796936273574829, "learning_rate": 2.970913330083053e-06, "loss": 0.1355, "step": 48155 }, { "epoch": 0.8589162772446759, "grad_norm": 0.45391565561294556, "learning_rate": 2.970177437713667e-06, "loss": 0.1153, "step": 48156 }, { "epoch": 0.8589341133663896, "grad_norm": 0.3027273118495941, "learning_rate": 2.969441630739481e-06, "loss": 0.1854, "step": 48157 }, { "epoch": 0.8589519494881033, "grad_norm": 0.26276105642318726, "learning_rate": 2.968705909163352e-06, "loss": 0.1005, "step": 48158 }, { "epoch": 0.858969785609817, "grad_norm": 0.2261965423822403, "learning_rate": 2.967970272988127e-06, "loss": 0.0689, "step": 48159 }, { "epoch": 0.8589876217315306, "grad_norm": 0.27368614077568054, "learning_rate": 2.967234722216655e-06, "loss": 0.0948, "step": 48160 }, { "epoch": 0.8590054578532443, "grad_norm": 0.25188788771629333, "learning_rate": 2.9664992568517985e-06, "loss": 0.1044, "step": 48161 }, { "epoch": 0.8590232939749581, "grad_norm": 0.2663467228412628, "learning_rate": 2.9657638768963907e-06, "loss": 0.0983, "step": 48162 }, { "epoch": 0.8590411300966718, "grad_norm": 0.32396388053894043, "learning_rate": 2.965028582353302e-06, "loss": 0.1088, "step": 48163 }, { "epoch": 0.8590589662183855, "grad_norm": 0.33476510643959045, "learning_rate": 2.9642933732253715e-06, "loss": 0.1344, "step": 48164 }, { "epoch": 0.8590768023400992, "grad_norm": 0.27767929434776306, "learning_rate": 2.9635582495154525e-06, "loss": 0.111, "step": 48165 }, { "epoch": 0.8590946384618129, "grad_norm": 0.27167049050331116, "learning_rate": 2.9628232112263842e-06, "loss": 0.1217, "step": 48166 }, { "epoch": 0.8591124745835266, "grad_norm": 0.21474626660346985, "learning_rate": 2.9620882583610317e-06, "loss": 0.0978, "step": 48167 }, { "epoch": 0.8591303107052403, "grad_norm": 0.2251388281583786, "learning_rate": 2.961353390922236e-06, "loss": 0.0818, "step": 48168 }, { "epoch": 0.859148146826954, "grad_norm": 0.3174282908439636, "learning_rate": 2.960618608912849e-06, "loss": 0.1045, "step": 48169 }, { "epoch": 0.8591659829486676, "grad_norm": 0.3171255588531494, "learning_rate": 2.959883912335709e-06, "loss": 0.0715, "step": 48170 }, { "epoch": 0.8591838190703813, "grad_norm": 0.33975762128829956, "learning_rate": 2.9591493011936755e-06, "loss": 0.1548, "step": 48171 }, { "epoch": 0.859201655192095, "grad_norm": 0.2754475772380829, "learning_rate": 2.958414775489596e-06, "loss": 0.1127, "step": 48172 }, { "epoch": 0.8592194913138087, "grad_norm": 0.27586838603019714, "learning_rate": 2.957680335226304e-06, "loss": 0.0801, "step": 48173 }, { "epoch": 0.8592373274355224, "grad_norm": 0.23614056408405304, "learning_rate": 2.956945980406664e-06, "loss": 0.1058, "step": 48174 }, { "epoch": 0.8592551635572361, "grad_norm": 0.23251068592071533, "learning_rate": 2.9562117110335103e-06, "loss": 0.1084, "step": 48175 }, { "epoch": 0.8592729996789498, "grad_norm": 0.27740004658699036, "learning_rate": 2.9554775271096957e-06, "loss": 0.0906, "step": 48176 }, { "epoch": 0.8592908358006635, "grad_norm": 0.35315370559692383, "learning_rate": 2.9547434286380655e-06, "loss": 0.1538, "step": 48177 }, { "epoch": 0.8593086719223771, "grad_norm": 0.2677737772464752, "learning_rate": 2.9540094156214642e-06, "loss": 0.0833, "step": 48178 }, { "epoch": 0.8593265080440909, "grad_norm": 0.23308239877223969, "learning_rate": 2.9532754880627344e-06, "loss": 0.1066, "step": 48179 }, { "epoch": 0.8593443441658046, "grad_norm": 0.24560680985450745, "learning_rate": 2.952541645964724e-06, "loss": 0.091, "step": 48180 }, { "epoch": 0.8593621802875183, "grad_norm": 0.38948020339012146, "learning_rate": 2.9518078893302797e-06, "loss": 0.138, "step": 48181 }, { "epoch": 0.859380016409232, "grad_norm": 0.296694815158844, "learning_rate": 2.951074218162245e-06, "loss": 0.1152, "step": 48182 }, { "epoch": 0.8593978525309457, "grad_norm": 0.2746095061302185, "learning_rate": 2.9503406324634523e-06, "loss": 0.1077, "step": 48183 }, { "epoch": 0.8594156886526594, "grad_norm": 0.3277067542076111, "learning_rate": 2.9496071322367645e-06, "loss": 0.1484, "step": 48184 }, { "epoch": 0.8594335247743731, "grad_norm": 0.2806954085826874, "learning_rate": 2.9488737174850124e-06, "loss": 0.1126, "step": 48185 }, { "epoch": 0.8594513608960868, "grad_norm": 0.20883244276046753, "learning_rate": 2.948140388211043e-06, "loss": 0.0793, "step": 48186 }, { "epoch": 0.8594691970178004, "grad_norm": 0.24078921973705292, "learning_rate": 2.9474071444176936e-06, "loss": 0.1169, "step": 48187 }, { "epoch": 0.8594870331395141, "grad_norm": 0.2193739414215088, "learning_rate": 2.9466739861078146e-06, "loss": 0.0839, "step": 48188 }, { "epoch": 0.8595048692612278, "grad_norm": 0.2722387909889221, "learning_rate": 2.94594091328424e-06, "loss": 0.1082, "step": 48189 }, { "epoch": 0.8595227053829415, "grad_norm": 0.26688340306282043, "learning_rate": 2.9452079259498196e-06, "loss": 0.1041, "step": 48190 }, { "epoch": 0.8595405415046552, "grad_norm": 0.3343719244003296, "learning_rate": 2.944475024107393e-06, "loss": 0.1232, "step": 48191 }, { "epoch": 0.8595583776263689, "grad_norm": 0.33834537863731384, "learning_rate": 2.943742207759792e-06, "loss": 0.1154, "step": 48192 }, { "epoch": 0.8595762137480826, "grad_norm": 0.2993813157081604, "learning_rate": 2.9430094769098693e-06, "loss": 0.1131, "step": 48193 }, { "epoch": 0.8595940498697963, "grad_norm": 0.25525423884391785, "learning_rate": 2.9422768315604615e-06, "loss": 0.0857, "step": 48194 }, { "epoch": 0.8596118859915101, "grad_norm": 0.22723130881786346, "learning_rate": 2.9415442717144053e-06, "loss": 0.0817, "step": 48195 }, { "epoch": 0.8596297221132237, "grad_norm": 0.25193265080451965, "learning_rate": 2.9408117973745404e-06, "loss": 0.1243, "step": 48196 }, { "epoch": 0.8596475582349374, "grad_norm": 0.23626454174518585, "learning_rate": 2.940079408543708e-06, "loss": 0.1159, "step": 48197 }, { "epoch": 0.8596653943566511, "grad_norm": 0.3407726287841797, "learning_rate": 2.9393471052247518e-06, "loss": 0.1472, "step": 48198 }, { "epoch": 0.8596832304783648, "grad_norm": 0.23359082639217377, "learning_rate": 2.9386148874205044e-06, "loss": 0.0737, "step": 48199 }, { "epoch": 0.8597010666000785, "grad_norm": 0.5486839413642883, "learning_rate": 2.9378827551338e-06, "loss": 0.1126, "step": 48200 }, { "epoch": 0.8597189027217922, "grad_norm": 0.2252131998538971, "learning_rate": 2.9371507083674864e-06, "loss": 0.082, "step": 48201 }, { "epoch": 0.8597367388435059, "grad_norm": 0.29695749282836914, "learning_rate": 2.936418747124392e-06, "loss": 0.116, "step": 48202 }, { "epoch": 0.8597545749652196, "grad_norm": 0.27731817960739136, "learning_rate": 2.9356868714073644e-06, "loss": 0.1503, "step": 48203 }, { "epoch": 0.8597724110869333, "grad_norm": 0.3915056586265564, "learning_rate": 2.9349550812192344e-06, "loss": 0.1512, "step": 48204 }, { "epoch": 0.8597902472086469, "grad_norm": 0.28447309136390686, "learning_rate": 2.9342233765628363e-06, "loss": 0.1028, "step": 48205 }, { "epoch": 0.8598080833303606, "grad_norm": 0.2695890963077545, "learning_rate": 2.933491757441015e-06, "loss": 0.1166, "step": 48206 }, { "epoch": 0.8598259194520743, "grad_norm": 0.2625380754470825, "learning_rate": 2.9327602238565984e-06, "loss": 0.1371, "step": 48207 }, { "epoch": 0.859843755573788, "grad_norm": 0.2839668095111847, "learning_rate": 2.932028775812426e-06, "loss": 0.1171, "step": 48208 }, { "epoch": 0.8598615916955017, "grad_norm": 0.28277912735939026, "learning_rate": 2.931297413311329e-06, "loss": 0.07, "step": 48209 }, { "epoch": 0.8598794278172154, "grad_norm": 0.4427390396595001, "learning_rate": 2.9305661363561497e-06, "loss": 0.1108, "step": 48210 }, { "epoch": 0.8598972639389291, "grad_norm": 0.2555272579193115, "learning_rate": 2.9298349449497166e-06, "loss": 0.1262, "step": 48211 }, { "epoch": 0.8599151000606429, "grad_norm": 0.2379920482635498, "learning_rate": 2.9291038390948682e-06, "loss": 0.0541, "step": 48212 }, { "epoch": 0.8599329361823566, "grad_norm": 0.2259339839220047, "learning_rate": 2.928372818794431e-06, "loss": 0.1078, "step": 48213 }, { "epoch": 0.8599507723040702, "grad_norm": 0.24760255217552185, "learning_rate": 2.9276418840512493e-06, "loss": 0.1112, "step": 48214 }, { "epoch": 0.8599686084257839, "grad_norm": 0.1892232745885849, "learning_rate": 2.926911034868149e-06, "loss": 0.0663, "step": 48215 }, { "epoch": 0.8599864445474976, "grad_norm": 0.2524602711200714, "learning_rate": 2.9261802712479635e-06, "loss": 0.0822, "step": 48216 }, { "epoch": 0.8600042806692113, "grad_norm": 0.2967257797718048, "learning_rate": 2.92544959319353e-06, "loss": 0.0769, "step": 48217 }, { "epoch": 0.860022116790925, "grad_norm": 0.3313281536102295, "learning_rate": 2.9247190007076736e-06, "loss": 0.066, "step": 48218 }, { "epoch": 0.8600399529126387, "grad_norm": 0.3659050166606903, "learning_rate": 2.9239884937932366e-06, "loss": 0.1361, "step": 48219 }, { "epoch": 0.8600577890343524, "grad_norm": 0.33097416162490845, "learning_rate": 2.9232580724530444e-06, "loss": 0.1609, "step": 48220 }, { "epoch": 0.8600756251560661, "grad_norm": 0.22080326080322266, "learning_rate": 2.9225277366899285e-06, "loss": 0.0848, "step": 48221 }, { "epoch": 0.8600934612777797, "grad_norm": 0.25807568430900574, "learning_rate": 2.9217974865067164e-06, "loss": 0.1547, "step": 48222 }, { "epoch": 0.8601112973994934, "grad_norm": 0.2913421094417572, "learning_rate": 2.9210673219062484e-06, "loss": 0.0989, "step": 48223 }, { "epoch": 0.8601291335212071, "grad_norm": 0.22137191891670227, "learning_rate": 2.9203372428913493e-06, "loss": 0.1097, "step": 48224 }, { "epoch": 0.8601469696429208, "grad_norm": 0.37268900871276855, "learning_rate": 2.919607249464848e-06, "loss": 0.1271, "step": 48225 }, { "epoch": 0.8601648057646345, "grad_norm": 0.2568928301334381, "learning_rate": 2.9188773416295695e-06, "loss": 0.1258, "step": 48226 }, { "epoch": 0.8601826418863482, "grad_norm": 0.25159287452697754, "learning_rate": 2.9181475193883563e-06, "loss": 0.064, "step": 48227 }, { "epoch": 0.8602004780080619, "grad_norm": 0.213983952999115, "learning_rate": 2.917417782744028e-06, "loss": 0.0917, "step": 48228 }, { "epoch": 0.8602183141297757, "grad_norm": 0.21965080499649048, "learning_rate": 2.9166881316994106e-06, "loss": 0.1035, "step": 48229 }, { "epoch": 0.8602361502514894, "grad_norm": 0.22449922561645508, "learning_rate": 2.915958566257343e-06, "loss": 0.1138, "step": 48230 }, { "epoch": 0.860253986373203, "grad_norm": 0.27136072516441345, "learning_rate": 2.915229086420643e-06, "loss": 0.087, "step": 48231 }, { "epoch": 0.8602718224949167, "grad_norm": 0.3956851065158844, "learning_rate": 2.914499692192149e-06, "loss": 0.1499, "step": 48232 }, { "epoch": 0.8602896586166304, "grad_norm": 0.2291734665632248, "learning_rate": 2.9137703835746794e-06, "loss": 0.1058, "step": 48233 }, { "epoch": 0.8603074947383441, "grad_norm": 0.3187664747238159, "learning_rate": 2.913041160571067e-06, "loss": 0.142, "step": 48234 }, { "epoch": 0.8603253308600578, "grad_norm": 0.2660762369632721, "learning_rate": 2.9123120231841273e-06, "loss": 0.1175, "step": 48235 }, { "epoch": 0.8603431669817715, "grad_norm": 0.33615776896476746, "learning_rate": 2.9115829714167016e-06, "loss": 0.1174, "step": 48236 }, { "epoch": 0.8603610031034852, "grad_norm": 0.34933218359947205, "learning_rate": 2.91085400527161e-06, "loss": 0.1424, "step": 48237 }, { "epoch": 0.8603788392251989, "grad_norm": 0.23214490711688995, "learning_rate": 2.910125124751678e-06, "loss": 0.0859, "step": 48238 }, { "epoch": 0.8603966753469126, "grad_norm": 0.27387022972106934, "learning_rate": 2.909396329859726e-06, "loss": 0.148, "step": 48239 }, { "epoch": 0.8604145114686262, "grad_norm": 0.2868475317955017, "learning_rate": 2.9086676205985872e-06, "loss": 0.1329, "step": 48240 }, { "epoch": 0.8604323475903399, "grad_norm": 0.3276348412036896, "learning_rate": 2.9079389969710823e-06, "loss": 0.0915, "step": 48241 }, { "epoch": 0.8604501837120536, "grad_norm": 0.2806035578250885, "learning_rate": 2.907210458980039e-06, "loss": 0.0903, "step": 48242 }, { "epoch": 0.8604680198337673, "grad_norm": 0.26185864210128784, "learning_rate": 2.9064820066282695e-06, "loss": 0.1487, "step": 48243 }, { "epoch": 0.860485855955481, "grad_norm": 0.24755623936653137, "learning_rate": 2.9057536399186096e-06, "loss": 0.1161, "step": 48244 }, { "epoch": 0.8605036920771947, "grad_norm": 0.2634468078613281, "learning_rate": 2.9050253588538855e-06, "loss": 0.0996, "step": 48245 }, { "epoch": 0.8605215281989085, "grad_norm": 0.2416146695613861, "learning_rate": 2.9042971634369113e-06, "loss": 0.0962, "step": 48246 }, { "epoch": 0.8605393643206222, "grad_norm": 0.30893415212631226, "learning_rate": 2.903569053670516e-06, "loss": 0.163, "step": 48247 }, { "epoch": 0.8605572004423359, "grad_norm": 0.3862980604171753, "learning_rate": 2.9028410295575125e-06, "loss": 0.1409, "step": 48248 }, { "epoch": 0.8605750365640495, "grad_norm": 0.29877278208732605, "learning_rate": 2.9021130911007333e-06, "loss": 0.0812, "step": 48249 }, { "epoch": 0.8605928726857632, "grad_norm": 0.29208359122276306, "learning_rate": 2.9013852383029976e-06, "loss": 0.1073, "step": 48250 }, { "epoch": 0.8606107088074769, "grad_norm": 0.31787821650505066, "learning_rate": 2.900657471167123e-06, "loss": 0.1235, "step": 48251 }, { "epoch": 0.8606285449291906, "grad_norm": 0.25469255447387695, "learning_rate": 2.8999297896959294e-06, "loss": 0.106, "step": 48252 }, { "epoch": 0.8606463810509043, "grad_norm": 0.29802897572517395, "learning_rate": 2.8992021938922475e-06, "loss": 0.1344, "step": 48253 }, { "epoch": 0.860664217172618, "grad_norm": 0.2553252875804901, "learning_rate": 2.8984746837588895e-06, "loss": 0.1493, "step": 48254 }, { "epoch": 0.8606820532943317, "grad_norm": 0.2799240052700043, "learning_rate": 2.8977472592986776e-06, "loss": 0.1269, "step": 48255 }, { "epoch": 0.8606998894160454, "grad_norm": 0.26853373646736145, "learning_rate": 2.8970199205144266e-06, "loss": 0.1647, "step": 48256 }, { "epoch": 0.860717725537759, "grad_norm": 0.3006648123264313, "learning_rate": 2.8962926674089673e-06, "loss": 0.101, "step": 48257 }, { "epoch": 0.8607355616594727, "grad_norm": 0.32147276401519775, "learning_rate": 2.8955654999851033e-06, "loss": 0.1185, "step": 48258 }, { "epoch": 0.8607533977811864, "grad_norm": 0.3249829113483429, "learning_rate": 2.8948384182456684e-06, "loss": 0.1015, "step": 48259 }, { "epoch": 0.8607712339029001, "grad_norm": 0.19432179629802704, "learning_rate": 2.894111422193477e-06, "loss": 0.1341, "step": 48260 }, { "epoch": 0.8607890700246138, "grad_norm": 0.23653775453567505, "learning_rate": 2.8933845118313374e-06, "loss": 0.0743, "step": 48261 }, { "epoch": 0.8608069061463275, "grad_norm": 0.3151514232158661, "learning_rate": 2.8926576871620787e-06, "loss": 0.1215, "step": 48262 }, { "epoch": 0.8608247422680413, "grad_norm": 0.2512984871864319, "learning_rate": 2.8919309481885176e-06, "loss": 0.1018, "step": 48263 }, { "epoch": 0.860842578389755, "grad_norm": 0.24243074655532837, "learning_rate": 2.891204294913466e-06, "loss": 0.0969, "step": 48264 }, { "epoch": 0.8608604145114687, "grad_norm": 0.2831850051879883, "learning_rate": 2.890477727339738e-06, "loss": 0.1195, "step": 48265 }, { "epoch": 0.8608782506331824, "grad_norm": 0.36379262804985046, "learning_rate": 2.88975124547016e-06, "loss": 0.117, "step": 48266 }, { "epoch": 0.860896086754896, "grad_norm": 0.25840455293655396, "learning_rate": 2.8890248493075424e-06, "loss": 0.0917, "step": 48267 }, { "epoch": 0.8609139228766097, "grad_norm": 0.3034076690673828, "learning_rate": 2.8882985388547028e-06, "loss": 0.12, "step": 48268 }, { "epoch": 0.8609317589983234, "grad_norm": 0.2777332663536072, "learning_rate": 2.8875723141144535e-06, "loss": 0.1366, "step": 48269 }, { "epoch": 0.8609495951200371, "grad_norm": 0.2748354971408844, "learning_rate": 2.8868461750896054e-06, "loss": 0.12, "step": 48270 }, { "epoch": 0.8609674312417508, "grad_norm": 0.27870067954063416, "learning_rate": 2.8861201217829814e-06, "loss": 0.1128, "step": 48271 }, { "epoch": 0.8609852673634645, "grad_norm": 0.39468151330947876, "learning_rate": 2.885394154197399e-06, "loss": 0.1263, "step": 48272 }, { "epoch": 0.8610031034851782, "grad_norm": 0.2181810885667801, "learning_rate": 2.8846682723356667e-06, "loss": 0.1091, "step": 48273 }, { "epoch": 0.8610209396068919, "grad_norm": 0.3015720546245575, "learning_rate": 2.8839424762005905e-06, "loss": 0.1019, "step": 48274 }, { "epoch": 0.8610387757286055, "grad_norm": 0.29595452547073364, "learning_rate": 2.883216765794999e-06, "loss": 0.0942, "step": 48275 }, { "epoch": 0.8610566118503192, "grad_norm": 0.25546884536743164, "learning_rate": 2.8824911411216986e-06, "loss": 0.1056, "step": 48276 }, { "epoch": 0.8610744479720329, "grad_norm": 0.20199479162693024, "learning_rate": 2.8817656021835034e-06, "loss": 0.0789, "step": 48277 }, { "epoch": 0.8610922840937466, "grad_norm": 0.34259548783302307, "learning_rate": 2.8810401489832163e-06, "loss": 0.1157, "step": 48278 }, { "epoch": 0.8611101202154603, "grad_norm": 0.26792624592781067, "learning_rate": 2.880314781523663e-06, "loss": 0.1085, "step": 48279 }, { "epoch": 0.8611279563371741, "grad_norm": 0.22999395430088043, "learning_rate": 2.87958949980765e-06, "loss": 0.0876, "step": 48280 }, { "epoch": 0.8611457924588878, "grad_norm": 0.3116852045059204, "learning_rate": 2.8788643038379886e-06, "loss": 0.0934, "step": 48281 }, { "epoch": 0.8611636285806015, "grad_norm": 0.19808802008628845, "learning_rate": 2.8781391936174877e-06, "loss": 0.1092, "step": 48282 }, { "epoch": 0.8611814647023152, "grad_norm": 0.416028767824173, "learning_rate": 2.8774141691489565e-06, "loss": 0.1197, "step": 48283 }, { "epoch": 0.8611993008240288, "grad_norm": 0.2539530098438263, "learning_rate": 2.8766892304352115e-06, "loss": 0.0962, "step": 48284 }, { "epoch": 0.8612171369457425, "grad_norm": 0.22953978180885315, "learning_rate": 2.8759643774790597e-06, "loss": 0.0922, "step": 48285 }, { "epoch": 0.8612349730674562, "grad_norm": 0.31616419553756714, "learning_rate": 2.8752396102833117e-06, "loss": 0.0962, "step": 48286 }, { "epoch": 0.8612528091891699, "grad_norm": 0.33783987164497375, "learning_rate": 2.8745149288507744e-06, "loss": 0.1035, "step": 48287 }, { "epoch": 0.8612706453108836, "grad_norm": 0.28543201088905334, "learning_rate": 2.8737903331842647e-06, "loss": 0.1822, "step": 48288 }, { "epoch": 0.8612884814325973, "grad_norm": 0.27050888538360596, "learning_rate": 2.8730658232865833e-06, "loss": 0.1122, "step": 48289 }, { "epoch": 0.861306317554311, "grad_norm": 0.1776435375213623, "learning_rate": 2.8723413991605415e-06, "loss": 0.0964, "step": 48290 }, { "epoch": 0.8613241536760247, "grad_norm": 0.2499695122241974, "learning_rate": 2.8716170608089425e-06, "loss": 0.0777, "step": 48291 }, { "epoch": 0.8613419897977383, "grad_norm": 0.36284545063972473, "learning_rate": 2.870892808234604e-06, "loss": 0.1336, "step": 48292 }, { "epoch": 0.861359825919452, "grad_norm": 0.40035828948020935, "learning_rate": 2.8701686414403294e-06, "loss": 0.1128, "step": 48293 }, { "epoch": 0.8613776620411657, "grad_norm": 0.20022915303707123, "learning_rate": 2.8694445604289242e-06, "loss": 0.075, "step": 48294 }, { "epoch": 0.8613954981628794, "grad_norm": 0.24053792655467987, "learning_rate": 2.8687205652031953e-06, "loss": 0.1005, "step": 48295 }, { "epoch": 0.8614133342845932, "grad_norm": 0.38681307435035706, "learning_rate": 2.867996655765942e-06, "loss": 0.1181, "step": 48296 }, { "epoch": 0.8614311704063069, "grad_norm": 0.2893669307231903, "learning_rate": 2.8672728321199854e-06, "loss": 0.1156, "step": 48297 }, { "epoch": 0.8614490065280206, "grad_norm": 0.22892136871814728, "learning_rate": 2.8665490942681173e-06, "loss": 0.1183, "step": 48298 }, { "epoch": 0.8614668426497343, "grad_norm": 0.26217445731163025, "learning_rate": 2.865825442213155e-06, "loss": 0.1105, "step": 48299 }, { "epoch": 0.861484678771448, "grad_norm": 0.2384805530309677, "learning_rate": 2.8651018759578956e-06, "loss": 0.0929, "step": 48300 }, { "epoch": 0.8615025148931617, "grad_norm": 0.3178929388523102, "learning_rate": 2.864378395505149e-06, "loss": 0.1182, "step": 48301 }, { "epoch": 0.8615203510148753, "grad_norm": 0.2603487968444824, "learning_rate": 2.863655000857718e-06, "loss": 0.1418, "step": 48302 }, { "epoch": 0.861538187136589, "grad_norm": 0.33509957790374756, "learning_rate": 2.862931692018403e-06, "loss": 0.1692, "step": 48303 }, { "epoch": 0.8615560232583027, "grad_norm": 0.38891443610191345, "learning_rate": 2.8622084689900076e-06, "loss": 0.1718, "step": 48304 }, { "epoch": 0.8615738593800164, "grad_norm": 0.2718924880027771, "learning_rate": 2.861485331775346e-06, "loss": 0.0925, "step": 48305 }, { "epoch": 0.8615916955017301, "grad_norm": 0.3322236239910126, "learning_rate": 2.8607622803772106e-06, "loss": 0.1895, "step": 48306 }, { "epoch": 0.8616095316234438, "grad_norm": 0.23104922473430634, "learning_rate": 2.860039314798407e-06, "loss": 0.1109, "step": 48307 }, { "epoch": 0.8616273677451575, "grad_norm": 0.30988621711730957, "learning_rate": 2.8593164350417396e-06, "loss": 0.1278, "step": 48308 }, { "epoch": 0.8616452038668712, "grad_norm": 0.3167951703071594, "learning_rate": 2.8585936411100022e-06, "loss": 0.1265, "step": 48309 }, { "epoch": 0.8616630399885848, "grad_norm": 0.2360583394765854, "learning_rate": 2.85787093300601e-06, "loss": 0.1104, "step": 48310 }, { "epoch": 0.8616808761102985, "grad_norm": 0.376623272895813, "learning_rate": 2.8571483107325576e-06, "loss": 0.082, "step": 48311 }, { "epoch": 0.8616987122320122, "grad_norm": 0.2665676474571228, "learning_rate": 2.85642577429244e-06, "loss": 0.1452, "step": 48312 }, { "epoch": 0.861716548353726, "grad_norm": 0.28887245059013367, "learning_rate": 2.855703323688469e-06, "loss": 0.1181, "step": 48313 }, { "epoch": 0.8617343844754397, "grad_norm": 0.3180290758609772, "learning_rate": 2.8549809589234365e-06, "loss": 0.1466, "step": 48314 }, { "epoch": 0.8617522205971534, "grad_norm": 0.2863643765449524, "learning_rate": 2.854258680000152e-06, "loss": 0.1142, "step": 48315 }, { "epoch": 0.8617700567188671, "grad_norm": 0.2878120243549347, "learning_rate": 2.853536486921407e-06, "loss": 0.1292, "step": 48316 }, { "epoch": 0.8617878928405808, "grad_norm": 0.23969271779060364, "learning_rate": 2.8528143796899996e-06, "loss": 0.1097, "step": 48317 }, { "epoch": 0.8618057289622945, "grad_norm": 0.1940971463918686, "learning_rate": 2.8520923583087416e-06, "loss": 0.1109, "step": 48318 }, { "epoch": 0.8618235650840081, "grad_norm": 0.25445297360420227, "learning_rate": 2.8513704227804193e-06, "loss": 0.1095, "step": 48319 }, { "epoch": 0.8618414012057218, "grad_norm": 0.3247835040092468, "learning_rate": 2.8506485731078364e-06, "loss": 0.1358, "step": 48320 }, { "epoch": 0.8618592373274355, "grad_norm": 0.29928672313690186, "learning_rate": 2.84992680929379e-06, "loss": 0.1234, "step": 48321 }, { "epoch": 0.8618770734491492, "grad_norm": 0.27817854285240173, "learning_rate": 2.8492051313410733e-06, "loss": 0.0946, "step": 48322 }, { "epoch": 0.8618949095708629, "grad_norm": 0.24670279026031494, "learning_rate": 2.8484835392524917e-06, "loss": 0.0816, "step": 48323 }, { "epoch": 0.8619127456925766, "grad_norm": 0.28345784544944763, "learning_rate": 2.8477620330308404e-06, "loss": 0.1221, "step": 48324 }, { "epoch": 0.8619305818142903, "grad_norm": 0.27013736963272095, "learning_rate": 2.8470406126789083e-06, "loss": 0.1284, "step": 48325 }, { "epoch": 0.861948417936004, "grad_norm": 0.28505557775497437, "learning_rate": 2.8463192781995025e-06, "loss": 0.0967, "step": 48326 }, { "epoch": 0.8619662540577177, "grad_norm": 0.2967435419559479, "learning_rate": 2.8455980295954115e-06, "loss": 0.1106, "step": 48327 }, { "epoch": 0.8619840901794313, "grad_norm": 0.3118458688259125, "learning_rate": 2.8448768668694393e-06, "loss": 0.1181, "step": 48328 }, { "epoch": 0.862001926301145, "grad_norm": 0.2458171844482422, "learning_rate": 2.844155790024375e-06, "loss": 0.0724, "step": 48329 }, { "epoch": 0.8620197624228588, "grad_norm": 0.24572700262069702, "learning_rate": 2.8434347990630107e-06, "loss": 0.0963, "step": 48330 }, { "epoch": 0.8620375985445725, "grad_norm": 0.2939291000366211, "learning_rate": 2.8427138939881527e-06, "loss": 0.1018, "step": 48331 }, { "epoch": 0.8620554346662862, "grad_norm": 0.20687086880207062, "learning_rate": 2.8419930748025846e-06, "loss": 0.0415, "step": 48332 }, { "epoch": 0.8620732707879999, "grad_norm": 0.32366690039634705, "learning_rate": 2.8412723415091076e-06, "loss": 0.1345, "step": 48333 }, { "epoch": 0.8620911069097136, "grad_norm": 0.24365437030792236, "learning_rate": 2.840551694110513e-06, "loss": 0.1158, "step": 48334 }, { "epoch": 0.8621089430314273, "grad_norm": 0.20163924992084503, "learning_rate": 2.8398311326095856e-06, "loss": 0.074, "step": 48335 }, { "epoch": 0.862126779153141, "grad_norm": 0.3035610318183899, "learning_rate": 2.8391106570091336e-06, "loss": 0.1334, "step": 48336 }, { "epoch": 0.8621446152748546, "grad_norm": 0.398946076631546, "learning_rate": 2.8383902673119438e-06, "loss": 0.1628, "step": 48337 }, { "epoch": 0.8621624513965683, "grad_norm": 0.2273399829864502, "learning_rate": 2.8376699635208058e-06, "loss": 0.1027, "step": 48338 }, { "epoch": 0.862180287518282, "grad_norm": 0.2744402289390564, "learning_rate": 2.836949745638509e-06, "loss": 0.1102, "step": 48339 }, { "epoch": 0.8621981236399957, "grad_norm": 0.19958682358264923, "learning_rate": 2.836229613667857e-06, "loss": 0.0732, "step": 48340 }, { "epoch": 0.8622159597617094, "grad_norm": 0.250897079706192, "learning_rate": 2.8355095676116246e-06, "loss": 0.1526, "step": 48341 }, { "epoch": 0.8622337958834231, "grad_norm": 0.2800920605659485, "learning_rate": 2.834789607472621e-06, "loss": 0.0889, "step": 48342 }, { "epoch": 0.8622516320051368, "grad_norm": 0.22744520008563995, "learning_rate": 2.834069733253622e-06, "loss": 0.0808, "step": 48343 }, { "epoch": 0.8622694681268505, "grad_norm": 0.31362220644950867, "learning_rate": 2.833349944957431e-06, "loss": 0.1239, "step": 48344 }, { "epoch": 0.8622873042485641, "grad_norm": 0.29813653230667114, "learning_rate": 2.832630242586831e-06, "loss": 0.1599, "step": 48345 }, { "epoch": 0.8623051403702778, "grad_norm": 0.21249863505363464, "learning_rate": 2.831910626144613e-06, "loss": 0.0663, "step": 48346 }, { "epoch": 0.8623229764919916, "grad_norm": 0.2554519772529602, "learning_rate": 2.8311910956335646e-06, "loss": 0.1316, "step": 48347 }, { "epoch": 0.8623408126137053, "grad_norm": 0.24317017197608948, "learning_rate": 2.830471651056474e-06, "loss": 0.0945, "step": 48348 }, { "epoch": 0.862358648735419, "grad_norm": 0.2667730152606964, "learning_rate": 2.829752292416135e-06, "loss": 0.1065, "step": 48349 }, { "epoch": 0.8623764848571327, "grad_norm": 0.334600567817688, "learning_rate": 2.8290330197153354e-06, "loss": 0.1277, "step": 48350 }, { "epoch": 0.8623943209788464, "grad_norm": 0.30880317091941833, "learning_rate": 2.8283138329568583e-06, "loss": 0.1176, "step": 48351 }, { "epoch": 0.8624121571005601, "grad_norm": 0.21727901697158813, "learning_rate": 2.827594732143493e-06, "loss": 0.0957, "step": 48352 }, { "epoch": 0.8624299932222738, "grad_norm": 0.30124136805534363, "learning_rate": 2.8268757172780325e-06, "loss": 0.164, "step": 48353 }, { "epoch": 0.8624478293439874, "grad_norm": 0.24251937866210938, "learning_rate": 2.8261567883632544e-06, "loss": 0.0749, "step": 48354 }, { "epoch": 0.8624656654657011, "grad_norm": 0.2853952646255493, "learning_rate": 2.825437945401957e-06, "loss": 0.0988, "step": 48355 }, { "epoch": 0.8624835015874148, "grad_norm": 0.3074513375759125, "learning_rate": 2.8247191883969154e-06, "loss": 0.0968, "step": 48356 }, { "epoch": 0.8625013377091285, "grad_norm": 0.3496426045894623, "learning_rate": 2.824000517350925e-06, "loss": 0.1407, "step": 48357 }, { "epoch": 0.8625191738308422, "grad_norm": 0.24741581082344055, "learning_rate": 2.823281932266769e-06, "loss": 0.0707, "step": 48358 }, { "epoch": 0.8625370099525559, "grad_norm": 0.24328818917274475, "learning_rate": 2.8225634331472322e-06, "loss": 0.0763, "step": 48359 }, { "epoch": 0.8625548460742696, "grad_norm": 0.19807767868041992, "learning_rate": 2.821845019995101e-06, "loss": 0.0752, "step": 48360 }, { "epoch": 0.8625726821959833, "grad_norm": 0.2815285325050354, "learning_rate": 2.82112669281315e-06, "loss": 0.1064, "step": 48361 }, { "epoch": 0.862590518317697, "grad_norm": 0.27651694416999817, "learning_rate": 2.8204084516041814e-06, "loss": 0.099, "step": 48362 }, { "epoch": 0.8626083544394106, "grad_norm": 0.3131183087825775, "learning_rate": 2.8196902963709664e-06, "loss": 0.1145, "step": 48363 }, { "epoch": 0.8626261905611244, "grad_norm": 0.2532101571559906, "learning_rate": 2.8189722271162957e-06, "loss": 0.1532, "step": 48364 }, { "epoch": 0.8626440266828381, "grad_norm": 0.29842883348464966, "learning_rate": 2.818254243842941e-06, "loss": 0.0889, "step": 48365 }, { "epoch": 0.8626618628045518, "grad_norm": 0.2780974805355072, "learning_rate": 2.8175363465537034e-06, "loss": 0.1647, "step": 48366 }, { "epoch": 0.8626796989262655, "grad_norm": 0.2839653491973877, "learning_rate": 2.816818535251353e-06, "loss": 0.1322, "step": 48367 }, { "epoch": 0.8626975350479792, "grad_norm": 0.26783478260040283, "learning_rate": 2.8161008099386738e-06, "loss": 0.0682, "step": 48368 }, { "epoch": 0.8627153711696929, "grad_norm": 0.345745712518692, "learning_rate": 2.815383170618452e-06, "loss": 0.1277, "step": 48369 }, { "epoch": 0.8627332072914066, "grad_norm": 0.190636545419693, "learning_rate": 2.814665617293466e-06, "loss": 0.103, "step": 48370 }, { "epoch": 0.8627510434131203, "grad_norm": 0.2475663274526596, "learning_rate": 2.8139481499665e-06, "loss": 0.1413, "step": 48371 }, { "epoch": 0.8627688795348339, "grad_norm": 0.25812238454818726, "learning_rate": 2.8132307686403347e-06, "loss": 0.1233, "step": 48372 }, { "epoch": 0.8627867156565476, "grad_norm": 0.37430524826049805, "learning_rate": 2.8125134733177515e-06, "loss": 0.1394, "step": 48373 }, { "epoch": 0.8628045517782613, "grad_norm": 0.2203422635793686, "learning_rate": 2.8117962640015233e-06, "loss": 0.0869, "step": 48374 }, { "epoch": 0.862822387899975, "grad_norm": 0.46110695600509644, "learning_rate": 2.811079140694442e-06, "loss": 0.1356, "step": 48375 }, { "epoch": 0.8628402240216887, "grad_norm": 0.3198714852333069, "learning_rate": 2.8103621033992804e-06, "loss": 0.0882, "step": 48376 }, { "epoch": 0.8628580601434024, "grad_norm": 0.3065090775489807, "learning_rate": 2.80964515211882e-06, "loss": 0.1979, "step": 48377 }, { "epoch": 0.8628758962651161, "grad_norm": 0.19372273981571198, "learning_rate": 2.808928286855836e-06, "loss": 0.0812, "step": 48378 }, { "epoch": 0.8628937323868298, "grad_norm": 0.21410366892814636, "learning_rate": 2.808211507613115e-06, "loss": 0.0807, "step": 48379 }, { "epoch": 0.8629115685085434, "grad_norm": 0.3122394382953644, "learning_rate": 2.8074948143934297e-06, "loss": 0.0912, "step": 48380 }, { "epoch": 0.8629294046302572, "grad_norm": 0.43264979124069214, "learning_rate": 2.8067782071995535e-06, "loss": 0.1388, "step": 48381 }, { "epoch": 0.8629472407519709, "grad_norm": 0.3185844421386719, "learning_rate": 2.806061686034278e-06, "loss": 0.1489, "step": 48382 }, { "epoch": 0.8629650768736846, "grad_norm": 0.2764148712158203, "learning_rate": 2.805345250900365e-06, "loss": 0.1076, "step": 48383 }, { "epoch": 0.8629829129953983, "grad_norm": 0.25965332984924316, "learning_rate": 2.8046289018006074e-06, "loss": 0.1351, "step": 48384 }, { "epoch": 0.863000749117112, "grad_norm": 0.34573742747306824, "learning_rate": 2.803912638737771e-06, "loss": 0.091, "step": 48385 }, { "epoch": 0.8630185852388257, "grad_norm": 0.24794262647628784, "learning_rate": 2.803196461714638e-06, "loss": 0.1257, "step": 48386 }, { "epoch": 0.8630364213605394, "grad_norm": 0.26193276047706604, "learning_rate": 2.8024803707339757e-06, "loss": 0.0935, "step": 48387 }, { "epoch": 0.8630542574822531, "grad_norm": 0.2464940994977951, "learning_rate": 2.801764365798573e-06, "loss": 0.0866, "step": 48388 }, { "epoch": 0.8630720936039668, "grad_norm": 0.2569274306297302, "learning_rate": 2.8010484469111975e-06, "loss": 0.0922, "step": 48389 }, { "epoch": 0.8630899297256804, "grad_norm": 0.21546293795108795, "learning_rate": 2.800332614074627e-06, "loss": 0.0602, "step": 48390 }, { "epoch": 0.8631077658473941, "grad_norm": 0.2204519361257553, "learning_rate": 2.799616867291627e-06, "loss": 0.0717, "step": 48391 }, { "epoch": 0.8631256019691078, "grad_norm": 0.3106570541858673, "learning_rate": 2.7989012065649883e-06, "loss": 0.1245, "step": 48392 }, { "epoch": 0.8631434380908215, "grad_norm": 0.29296547174453735, "learning_rate": 2.7981856318974736e-06, "loss": 0.0851, "step": 48393 }, { "epoch": 0.8631612742125352, "grad_norm": 0.22861339151859283, "learning_rate": 2.797470143291861e-06, "loss": 0.076, "step": 48394 }, { "epoch": 0.8631791103342489, "grad_norm": 0.2863302528858185, "learning_rate": 2.7967547407509178e-06, "loss": 0.1474, "step": 48395 }, { "epoch": 0.8631969464559626, "grad_norm": 0.33442822098731995, "learning_rate": 2.7960394242774247e-06, "loss": 0.0907, "step": 48396 }, { "epoch": 0.8632147825776764, "grad_norm": 0.31222379207611084, "learning_rate": 2.795324193874149e-06, "loss": 0.1591, "step": 48397 }, { "epoch": 0.86323261869939, "grad_norm": 0.3535809814929962, "learning_rate": 2.7946090495438697e-06, "loss": 0.0976, "step": 48398 }, { "epoch": 0.8632504548211037, "grad_norm": 0.6215869188308716, "learning_rate": 2.793893991289356e-06, "loss": 0.133, "step": 48399 }, { "epoch": 0.8632682909428174, "grad_norm": 0.2511143684387207, "learning_rate": 2.7931790191133762e-06, "loss": 0.1246, "step": 48400 }, { "epoch": 0.8632861270645311, "grad_norm": 0.26210227608680725, "learning_rate": 2.7924641330187048e-06, "loss": 0.1251, "step": 48401 }, { "epoch": 0.8633039631862448, "grad_norm": 0.2091110348701477, "learning_rate": 2.7917493330081176e-06, "loss": 0.0669, "step": 48402 }, { "epoch": 0.8633217993079585, "grad_norm": 0.35500049591064453, "learning_rate": 2.7910346190843763e-06, "loss": 0.1703, "step": 48403 }, { "epoch": 0.8633396354296722, "grad_norm": 0.21513564884662628, "learning_rate": 2.7903199912502537e-06, "loss": 0.1317, "step": 48404 }, { "epoch": 0.8633574715513859, "grad_norm": 0.38147884607315063, "learning_rate": 2.7896054495085227e-06, "loss": 0.0944, "step": 48405 }, { "epoch": 0.8633753076730996, "grad_norm": 0.256989449262619, "learning_rate": 2.7888909938619563e-06, "loss": 0.1338, "step": 48406 }, { "epoch": 0.8633931437948132, "grad_norm": 0.29034656286239624, "learning_rate": 2.7881766243133185e-06, "loss": 0.1176, "step": 48407 }, { "epoch": 0.8634109799165269, "grad_norm": 0.273830771446228, "learning_rate": 2.787462340865374e-06, "loss": 0.1079, "step": 48408 }, { "epoch": 0.8634288160382406, "grad_norm": 0.2569306492805481, "learning_rate": 2.7867481435209034e-06, "loss": 0.1279, "step": 48409 }, { "epoch": 0.8634466521599543, "grad_norm": 0.33249443769454956, "learning_rate": 2.786034032282664e-06, "loss": 0.1765, "step": 48410 }, { "epoch": 0.863464488281668, "grad_norm": 0.31615397334098816, "learning_rate": 2.785320007153433e-06, "loss": 0.1507, "step": 48411 }, { "epoch": 0.8634823244033817, "grad_norm": 0.2986748516559601, "learning_rate": 2.7846060681359754e-06, "loss": 0.0743, "step": 48412 }, { "epoch": 0.8635001605250954, "grad_norm": 0.39069390296936035, "learning_rate": 2.7838922152330526e-06, "loss": 0.1779, "step": 48413 }, { "epoch": 0.8635179966468092, "grad_norm": 0.25558531284332275, "learning_rate": 2.783178448447443e-06, "loss": 0.0794, "step": 48414 }, { "epoch": 0.8635358327685229, "grad_norm": 0.28744328022003174, "learning_rate": 2.782464767781906e-06, "loss": 0.1352, "step": 48415 }, { "epoch": 0.8635536688902365, "grad_norm": 0.30753999948501587, "learning_rate": 2.781751173239208e-06, "loss": 0.1039, "step": 48416 }, { "epoch": 0.8635715050119502, "grad_norm": 0.36474648118019104, "learning_rate": 2.781037664822114e-06, "loss": 0.1591, "step": 48417 }, { "epoch": 0.8635893411336639, "grad_norm": 0.21047815680503845, "learning_rate": 2.780324242533397e-06, "loss": 0.065, "step": 48418 }, { "epoch": 0.8636071772553776, "grad_norm": 0.27666041254997253, "learning_rate": 2.7796109063758154e-06, "loss": 0.1202, "step": 48419 }, { "epoch": 0.8636250133770913, "grad_norm": 0.24565847218036652, "learning_rate": 2.7788976563521363e-06, "loss": 0.1485, "step": 48420 }, { "epoch": 0.863642849498805, "grad_norm": 0.24123437702655792, "learning_rate": 2.778184492465122e-06, "loss": 0.1296, "step": 48421 }, { "epoch": 0.8636606856205187, "grad_norm": 0.3007345199584961, "learning_rate": 2.7774714147175444e-06, "loss": 0.1257, "step": 48422 }, { "epoch": 0.8636785217422324, "grad_norm": 0.17736253142356873, "learning_rate": 2.7767584231121636e-06, "loss": 0.0482, "step": 48423 }, { "epoch": 0.863696357863946, "grad_norm": 0.3148968815803528, "learning_rate": 2.7760455176517347e-06, "loss": 0.1458, "step": 48424 }, { "epoch": 0.8637141939856597, "grad_norm": 0.27487996220588684, "learning_rate": 2.7753326983390365e-06, "loss": 0.0927, "step": 48425 }, { "epoch": 0.8637320301073734, "grad_norm": 0.24090854823589325, "learning_rate": 2.7746199651768194e-06, "loss": 0.104, "step": 48426 }, { "epoch": 0.8637498662290871, "grad_norm": 0.2680315673351288, "learning_rate": 2.7739073181678594e-06, "loss": 0.1032, "step": 48427 }, { "epoch": 0.8637677023508008, "grad_norm": 0.24864837527275085, "learning_rate": 2.7731947573149094e-06, "loss": 0.0909, "step": 48428 }, { "epoch": 0.8637855384725145, "grad_norm": 0.27116402983665466, "learning_rate": 2.7724822826207308e-06, "loss": 0.1319, "step": 48429 }, { "epoch": 0.8638033745942282, "grad_norm": 0.2195434421300888, "learning_rate": 2.771769894088086e-06, "loss": 0.0947, "step": 48430 }, { "epoch": 0.863821210715942, "grad_norm": 0.39954325556755066, "learning_rate": 2.7710575917197417e-06, "loss": 0.1038, "step": 48431 }, { "epoch": 0.8638390468376557, "grad_norm": 0.39382103085517883, "learning_rate": 2.7703453755184574e-06, "loss": 0.1526, "step": 48432 }, { "epoch": 0.8638568829593694, "grad_norm": 0.30479347705841064, "learning_rate": 2.769633245486991e-06, "loss": 0.1316, "step": 48433 }, { "epoch": 0.863874719081083, "grad_norm": 0.2860756516456604, "learning_rate": 2.7689212016281025e-06, "loss": 0.1291, "step": 48434 }, { "epoch": 0.8638925552027967, "grad_norm": 0.3144417405128479, "learning_rate": 2.7682092439445554e-06, "loss": 0.1609, "step": 48435 }, { "epoch": 0.8639103913245104, "grad_norm": 0.31592923402786255, "learning_rate": 2.7674973724391095e-06, "loss": 0.097, "step": 48436 }, { "epoch": 0.8639282274462241, "grad_norm": 0.30795443058013916, "learning_rate": 2.766785587114515e-06, "loss": 0.1386, "step": 48437 }, { "epoch": 0.8639460635679378, "grad_norm": 0.31524521112442017, "learning_rate": 2.766073887973547e-06, "loss": 0.1338, "step": 48438 }, { "epoch": 0.8639638996896515, "grad_norm": 0.35194703936576843, "learning_rate": 2.7653622750189484e-06, "loss": 0.0999, "step": 48439 }, { "epoch": 0.8639817358113652, "grad_norm": 0.3119635283946991, "learning_rate": 2.7646507482534916e-06, "loss": 0.1378, "step": 48440 }, { "epoch": 0.8639995719330789, "grad_norm": 0.268070787191391, "learning_rate": 2.7639393076799276e-06, "loss": 0.0825, "step": 48441 }, { "epoch": 0.8640174080547925, "grad_norm": 0.24179629981517792, "learning_rate": 2.7632279533010174e-06, "loss": 0.1153, "step": 48442 }, { "epoch": 0.8640352441765062, "grad_norm": 0.19377778470516205, "learning_rate": 2.7625166851195067e-06, "loss": 0.098, "step": 48443 }, { "epoch": 0.8640530802982199, "grad_norm": 0.35624709725379944, "learning_rate": 2.7618055031381707e-06, "loss": 0.1018, "step": 48444 }, { "epoch": 0.8640709164199336, "grad_norm": 0.35638681054115295, "learning_rate": 2.7610944073597546e-06, "loss": 0.1087, "step": 48445 }, { "epoch": 0.8640887525416473, "grad_norm": 0.20993869006633759, "learning_rate": 2.76038339778702e-06, "loss": 0.1168, "step": 48446 }, { "epoch": 0.864106588663361, "grad_norm": 0.26464006304740906, "learning_rate": 2.7596724744227142e-06, "loss": 0.1776, "step": 48447 }, { "epoch": 0.8641244247850748, "grad_norm": 0.2416379451751709, "learning_rate": 2.7589616372696053e-06, "loss": 0.1229, "step": 48448 }, { "epoch": 0.8641422609067885, "grad_norm": 0.22152984142303467, "learning_rate": 2.7582508863304435e-06, "loss": 0.0919, "step": 48449 }, { "epoch": 0.8641600970285022, "grad_norm": 0.2515111267566681, "learning_rate": 2.757540221607982e-06, "loss": 0.0861, "step": 48450 }, { "epoch": 0.8641779331502158, "grad_norm": 0.2078169733285904, "learning_rate": 2.7568296431049716e-06, "loss": 0.0881, "step": 48451 }, { "epoch": 0.8641957692719295, "grad_norm": 0.3556353449821472, "learning_rate": 2.756119150824171e-06, "loss": 0.1091, "step": 48452 }, { "epoch": 0.8642136053936432, "grad_norm": 0.40062204003334045, "learning_rate": 2.7554087447683426e-06, "loss": 0.1476, "step": 48453 }, { "epoch": 0.8642314415153569, "grad_norm": 0.2830500602722168, "learning_rate": 2.754698424940233e-06, "loss": 0.1096, "step": 48454 }, { "epoch": 0.8642492776370706, "grad_norm": 0.2188422530889511, "learning_rate": 2.7539881913425938e-06, "loss": 0.0481, "step": 48455 }, { "epoch": 0.8642671137587843, "grad_norm": 0.28958621621131897, "learning_rate": 2.7532780439781774e-06, "loss": 0.1333, "step": 48456 }, { "epoch": 0.864284949880498, "grad_norm": 0.195339173078537, "learning_rate": 2.7525679828497437e-06, "loss": 0.0939, "step": 48457 }, { "epoch": 0.8643027860022117, "grad_norm": 0.24720843136310577, "learning_rate": 2.751858007960037e-06, "loss": 0.1188, "step": 48458 }, { "epoch": 0.8643206221239254, "grad_norm": 0.2433614730834961, "learning_rate": 2.7511481193118165e-06, "loss": 0.1001, "step": 48459 }, { "epoch": 0.864338458245639, "grad_norm": 0.25807812809944153, "learning_rate": 2.7504383169078245e-06, "loss": 0.116, "step": 48460 }, { "epoch": 0.8643562943673527, "grad_norm": 0.18046694993972778, "learning_rate": 2.749728600750823e-06, "loss": 0.097, "step": 48461 }, { "epoch": 0.8643741304890664, "grad_norm": 0.48721593618392944, "learning_rate": 2.7490189708435587e-06, "loss": 0.1569, "step": 48462 }, { "epoch": 0.8643919666107801, "grad_norm": 0.22629275918006897, "learning_rate": 2.7483094271887833e-06, "loss": 0.1188, "step": 48463 }, { "epoch": 0.8644098027324938, "grad_norm": 0.22580598294734955, "learning_rate": 2.7475999697892386e-06, "loss": 0.1254, "step": 48464 }, { "epoch": 0.8644276388542076, "grad_norm": 0.310934841632843, "learning_rate": 2.746890598647689e-06, "loss": 0.1284, "step": 48465 }, { "epoch": 0.8644454749759213, "grad_norm": 0.2310737818479538, "learning_rate": 2.7461813137668714e-06, "loss": 0.1152, "step": 48466 }, { "epoch": 0.864463311097635, "grad_norm": 0.3070048987865448, "learning_rate": 2.745472115149547e-06, "loss": 0.1175, "step": 48467 }, { "epoch": 0.8644811472193487, "grad_norm": 0.2577265799045563, "learning_rate": 2.744763002798459e-06, "loss": 0.109, "step": 48468 }, { "epoch": 0.8644989833410623, "grad_norm": 0.25178617238998413, "learning_rate": 2.7440539767163485e-06, "loss": 0.0519, "step": 48469 }, { "epoch": 0.864516819462776, "grad_norm": 0.2439354807138443, "learning_rate": 2.743345036905981e-06, "loss": 0.1209, "step": 48470 }, { "epoch": 0.8645346555844897, "grad_norm": 0.3122340440750122, "learning_rate": 2.7426361833700927e-06, "loss": 0.1789, "step": 48471 }, { "epoch": 0.8645524917062034, "grad_norm": 0.21167029440402985, "learning_rate": 2.741927416111434e-06, "loss": 0.0635, "step": 48472 }, { "epoch": 0.8645703278279171, "grad_norm": 0.2811305820941925, "learning_rate": 2.74121873513275e-06, "loss": 0.1061, "step": 48473 }, { "epoch": 0.8645881639496308, "grad_norm": 0.355853408575058, "learning_rate": 2.740510140436792e-06, "loss": 0.1083, "step": 48474 }, { "epoch": 0.8646060000713445, "grad_norm": 0.22062143683433533, "learning_rate": 2.7398016320263063e-06, "loss": 0.099, "step": 48475 }, { "epoch": 0.8646238361930582, "grad_norm": 0.29981282353401184, "learning_rate": 2.7390932099040395e-06, "loss": 0.1235, "step": 48476 }, { "epoch": 0.8646416723147718, "grad_norm": 0.2699138820171356, "learning_rate": 2.738384874072733e-06, "loss": 0.0822, "step": 48477 }, { "epoch": 0.8646595084364855, "grad_norm": 0.27444812655448914, "learning_rate": 2.737676624535132e-06, "loss": 0.0957, "step": 48478 }, { "epoch": 0.8646773445581992, "grad_norm": 0.24969972670078278, "learning_rate": 2.7369684612939874e-06, "loss": 0.1039, "step": 48479 }, { "epoch": 0.8646951806799129, "grad_norm": 0.24319885671138763, "learning_rate": 2.7362603843520437e-06, "loss": 0.0857, "step": 48480 }, { "epoch": 0.8647130168016266, "grad_norm": 0.24497871100902557, "learning_rate": 2.7355523937120457e-06, "loss": 0.1476, "step": 48481 }, { "epoch": 0.8647308529233404, "grad_norm": 0.2588385045528412, "learning_rate": 2.734844489376734e-06, "loss": 0.1043, "step": 48482 }, { "epoch": 0.8647486890450541, "grad_norm": 0.26025617122650146, "learning_rate": 2.734136671348858e-06, "loss": 0.0751, "step": 48483 }, { "epoch": 0.8647665251667678, "grad_norm": 0.2640632688999176, "learning_rate": 2.733428939631158e-06, "loss": 0.0789, "step": 48484 }, { "epoch": 0.8647843612884815, "grad_norm": 0.3763984739780426, "learning_rate": 2.7327212942263815e-06, "loss": 0.0394, "step": 48485 }, { "epoch": 0.8648021974101952, "grad_norm": 0.4510432481765747, "learning_rate": 2.7320137351372594e-06, "loss": 0.128, "step": 48486 }, { "epoch": 0.8648200335319088, "grad_norm": 0.24882939457893372, "learning_rate": 2.731306262366548e-06, "loss": 0.1178, "step": 48487 }, { "epoch": 0.8648378696536225, "grad_norm": 0.2520524561405182, "learning_rate": 2.730598875916987e-06, "loss": 0.1034, "step": 48488 }, { "epoch": 0.8648557057753362, "grad_norm": 0.2537497878074646, "learning_rate": 2.729891575791316e-06, "loss": 0.1232, "step": 48489 }, { "epoch": 0.8648735418970499, "grad_norm": 0.2315804660320282, "learning_rate": 2.729184361992276e-06, "loss": 0.1028, "step": 48490 }, { "epoch": 0.8648913780187636, "grad_norm": 0.2543641924858093, "learning_rate": 2.7284772345226058e-06, "loss": 0.1083, "step": 48491 }, { "epoch": 0.8649092141404773, "grad_norm": 0.2539544105529785, "learning_rate": 2.727770193385057e-06, "loss": 0.1616, "step": 48492 }, { "epoch": 0.864927050262191, "grad_norm": 0.2560049295425415, "learning_rate": 2.727063238582356e-06, "loss": 0.0908, "step": 48493 }, { "epoch": 0.8649448863839047, "grad_norm": 0.2839914858341217, "learning_rate": 2.726356370117256e-06, "loss": 0.1434, "step": 48494 }, { "epoch": 0.8649627225056183, "grad_norm": 0.31279420852661133, "learning_rate": 2.725649587992485e-06, "loss": 0.1245, "step": 48495 }, { "epoch": 0.864980558627332, "grad_norm": 0.37675201892852783, "learning_rate": 2.7249428922107965e-06, "loss": 0.1395, "step": 48496 }, { "epoch": 0.8649983947490457, "grad_norm": 0.41199633479118347, "learning_rate": 2.7242362827749223e-06, "loss": 0.1555, "step": 48497 }, { "epoch": 0.8650162308707595, "grad_norm": 0.24773286283016205, "learning_rate": 2.7235297596876043e-06, "loss": 0.1554, "step": 48498 }, { "epoch": 0.8650340669924732, "grad_norm": 0.22875894606113434, "learning_rate": 2.7228233229515705e-06, "loss": 0.1052, "step": 48499 }, { "epoch": 0.8650519031141869, "grad_norm": 0.18755696713924408, "learning_rate": 2.7221169725695745e-06, "loss": 0.11, "step": 48500 }, { "epoch": 0.8650697392359006, "grad_norm": 0.2680853605270386, "learning_rate": 2.7214107085443476e-06, "loss": 0.1193, "step": 48501 }, { "epoch": 0.8650875753576143, "grad_norm": 0.34382468461990356, "learning_rate": 2.7207045308786287e-06, "loss": 0.104, "step": 48502 }, { "epoch": 0.865105411479328, "grad_norm": 0.2973938584327698, "learning_rate": 2.7199984395751525e-06, "loss": 0.1129, "step": 48503 }, { "epoch": 0.8651232476010416, "grad_norm": 0.23787064850330353, "learning_rate": 2.7192924346366527e-06, "loss": 0.1061, "step": 48504 }, { "epoch": 0.8651410837227553, "grad_norm": 0.3175320029258728, "learning_rate": 2.718586516065877e-06, "loss": 0.1146, "step": 48505 }, { "epoch": 0.865158919844469, "grad_norm": 0.31334465742111206, "learning_rate": 2.7178806838655535e-06, "loss": 0.1215, "step": 48506 }, { "epoch": 0.8651767559661827, "grad_norm": 0.2948490083217621, "learning_rate": 2.7171749380384167e-06, "loss": 0.1569, "step": 48507 }, { "epoch": 0.8651945920878964, "grad_norm": 0.28500592708587646, "learning_rate": 2.7164692785872055e-06, "loss": 0.11, "step": 48508 }, { "epoch": 0.8652124282096101, "grad_norm": 0.2451358586549759, "learning_rate": 2.715763705514662e-06, "loss": 0.1261, "step": 48509 }, { "epoch": 0.8652302643313238, "grad_norm": 0.3460720181465149, "learning_rate": 2.715058218823516e-06, "loss": 0.1697, "step": 48510 }, { "epoch": 0.8652481004530375, "grad_norm": 0.3354368805885315, "learning_rate": 2.7143528185164995e-06, "loss": 0.1596, "step": 48511 }, { "epoch": 0.8652659365747511, "grad_norm": 0.22452349960803986, "learning_rate": 2.713647504596345e-06, "loss": 0.1259, "step": 48512 }, { "epoch": 0.8652837726964648, "grad_norm": 0.31383538246154785, "learning_rate": 2.7129422770657944e-06, "loss": 0.1157, "step": 48513 }, { "epoch": 0.8653016088181785, "grad_norm": 0.236113503575325, "learning_rate": 2.7122371359275754e-06, "loss": 0.1056, "step": 48514 }, { "epoch": 0.8653194449398923, "grad_norm": 0.22444918751716614, "learning_rate": 2.7115320811844258e-06, "loss": 0.0606, "step": 48515 }, { "epoch": 0.865337281061606, "grad_norm": 0.29247623682022095, "learning_rate": 2.710827112839076e-06, "loss": 0.1104, "step": 48516 }, { "epoch": 0.8653551171833197, "grad_norm": 0.2545079290866852, "learning_rate": 2.710122230894252e-06, "loss": 0.1136, "step": 48517 }, { "epoch": 0.8653729533050334, "grad_norm": 0.32490989565849304, "learning_rate": 2.7094174353527012e-06, "loss": 0.0841, "step": 48518 }, { "epoch": 0.8653907894267471, "grad_norm": 0.3200085163116455, "learning_rate": 2.7087127262171443e-06, "loss": 0.1315, "step": 48519 }, { "epoch": 0.8654086255484608, "grad_norm": 0.2946043610572815, "learning_rate": 2.7080081034903114e-06, "loss": 0.1581, "step": 48520 }, { "epoch": 0.8654264616701745, "grad_norm": 0.19989019632339478, "learning_rate": 2.707303567174943e-06, "loss": 0.0785, "step": 48521 }, { "epoch": 0.8654442977918881, "grad_norm": 0.3084639012813568, "learning_rate": 2.706599117273764e-06, "loss": 0.1461, "step": 48522 }, { "epoch": 0.8654621339136018, "grad_norm": 0.24902957677841187, "learning_rate": 2.705894753789509e-06, "loss": 0.0618, "step": 48523 }, { "epoch": 0.8654799700353155, "grad_norm": 0.27955931425094604, "learning_rate": 2.7051904767249058e-06, "loss": 0.1022, "step": 48524 }, { "epoch": 0.8654978061570292, "grad_norm": 0.26947861909866333, "learning_rate": 2.7044862860826803e-06, "loss": 0.0832, "step": 48525 }, { "epoch": 0.8655156422787429, "grad_norm": 0.4156228005886078, "learning_rate": 2.703782181865572e-06, "loss": 0.1303, "step": 48526 }, { "epoch": 0.8655334784004566, "grad_norm": 0.34134963154792786, "learning_rate": 2.7030781640763033e-06, "loss": 0.0961, "step": 48527 }, { "epoch": 0.8655513145221703, "grad_norm": 0.28540530800819397, "learning_rate": 2.7023742327176053e-06, "loss": 0.1747, "step": 48528 }, { "epoch": 0.865569150643884, "grad_norm": 0.2902222275733948, "learning_rate": 2.701670387792207e-06, "loss": 0.109, "step": 48529 }, { "epoch": 0.8655869867655976, "grad_norm": 0.317419171333313, "learning_rate": 2.7009666293028312e-06, "loss": 0.1398, "step": 48530 }, { "epoch": 0.8656048228873113, "grad_norm": 0.2997209131717682, "learning_rate": 2.7002629572522142e-06, "loss": 0.0891, "step": 48531 }, { "epoch": 0.8656226590090251, "grad_norm": 0.26505598425865173, "learning_rate": 2.699559371643082e-06, "loss": 0.1258, "step": 48532 }, { "epoch": 0.8656404951307388, "grad_norm": 0.30268096923828125, "learning_rate": 2.698855872478159e-06, "loss": 0.1225, "step": 48533 }, { "epoch": 0.8656583312524525, "grad_norm": 0.2430025041103363, "learning_rate": 2.698152459760167e-06, "loss": 0.0908, "step": 48534 }, { "epoch": 0.8656761673741662, "grad_norm": 0.22666838765144348, "learning_rate": 2.697449133491839e-06, "loss": 0.1618, "step": 48535 }, { "epoch": 0.8656940034958799, "grad_norm": 0.2456955760717392, "learning_rate": 2.696745893675909e-06, "loss": 0.1154, "step": 48536 }, { "epoch": 0.8657118396175936, "grad_norm": 0.4044748544692993, "learning_rate": 2.696042740315094e-06, "loss": 0.1663, "step": 48537 }, { "epoch": 0.8657296757393073, "grad_norm": 0.3034094572067261, "learning_rate": 2.6953396734121146e-06, "loss": 0.0714, "step": 48538 }, { "epoch": 0.865747511861021, "grad_norm": 0.2581062912940979, "learning_rate": 2.694636692969707e-06, "loss": 0.1104, "step": 48539 }, { "epoch": 0.8657653479827346, "grad_norm": 0.20388630032539368, "learning_rate": 2.6939337989905916e-06, "loss": 0.1082, "step": 48540 }, { "epoch": 0.8657831841044483, "grad_norm": 0.24164210259914398, "learning_rate": 2.693230991477494e-06, "loss": 0.0889, "step": 48541 }, { "epoch": 0.865801020226162, "grad_norm": 0.24522261321544647, "learning_rate": 2.6925282704331395e-06, "loss": 0.1196, "step": 48542 }, { "epoch": 0.8658188563478757, "grad_norm": 0.20962579548358917, "learning_rate": 2.691825635860243e-06, "loss": 0.0904, "step": 48543 }, { "epoch": 0.8658366924695894, "grad_norm": 0.3322601020336151, "learning_rate": 2.6911230877615406e-06, "loss": 0.1619, "step": 48544 }, { "epoch": 0.8658545285913031, "grad_norm": 0.27501949667930603, "learning_rate": 2.69042062613975e-06, "loss": 0.1422, "step": 48545 }, { "epoch": 0.8658723647130168, "grad_norm": 0.43415945768356323, "learning_rate": 2.6897182509975945e-06, "loss": 0.156, "step": 48546 }, { "epoch": 0.8658902008347305, "grad_norm": 0.25982412695884705, "learning_rate": 2.689015962337793e-06, "loss": 0.0784, "step": 48547 }, { "epoch": 0.8659080369564441, "grad_norm": 0.2423533797264099, "learning_rate": 2.6883137601630747e-06, "loss": 0.0904, "step": 48548 }, { "epoch": 0.8659258730781579, "grad_norm": 0.3680278956890106, "learning_rate": 2.687611644476157e-06, "loss": 0.0938, "step": 48549 }, { "epoch": 0.8659437091998716, "grad_norm": 0.3058181405067444, "learning_rate": 2.6869096152797647e-06, "loss": 0.1178, "step": 48550 }, { "epoch": 0.8659615453215853, "grad_norm": 0.33312928676605225, "learning_rate": 2.6862076725766127e-06, "loss": 0.1328, "step": 48551 }, { "epoch": 0.865979381443299, "grad_norm": 0.3377118706703186, "learning_rate": 2.6855058163694324e-06, "loss": 0.1286, "step": 48552 }, { "epoch": 0.8659972175650127, "grad_norm": 0.3017858564853668, "learning_rate": 2.6848040466609383e-06, "loss": 0.1582, "step": 48553 }, { "epoch": 0.8660150536867264, "grad_norm": 0.27093982696533203, "learning_rate": 2.6841023634538526e-06, "loss": 0.0815, "step": 48554 }, { "epoch": 0.8660328898084401, "grad_norm": 0.26729387044906616, "learning_rate": 2.683400766750893e-06, "loss": 0.0957, "step": 48555 }, { "epoch": 0.8660507259301538, "grad_norm": 0.3356668949127197, "learning_rate": 2.6826992565547744e-06, "loss": 0.1247, "step": 48556 }, { "epoch": 0.8660685620518674, "grad_norm": 0.24405401945114136, "learning_rate": 2.681997832868227e-06, "loss": 0.1384, "step": 48557 }, { "epoch": 0.8660863981735811, "grad_norm": 0.27856531739234924, "learning_rate": 2.6812964956939637e-06, "loss": 0.1073, "step": 48558 }, { "epoch": 0.8661042342952948, "grad_norm": 0.27748432755470276, "learning_rate": 2.680595245034703e-06, "loss": 0.1182, "step": 48559 }, { "epoch": 0.8661220704170085, "grad_norm": 0.24369068443775177, "learning_rate": 2.679894080893161e-06, "loss": 0.0638, "step": 48560 }, { "epoch": 0.8661399065387222, "grad_norm": 0.2648668587207794, "learning_rate": 2.6791930032720626e-06, "loss": 0.0687, "step": 48561 }, { "epoch": 0.8661577426604359, "grad_norm": 0.3763103187084198, "learning_rate": 2.678492012174116e-06, "loss": 0.1584, "step": 48562 }, { "epoch": 0.8661755787821496, "grad_norm": 0.31233611702919006, "learning_rate": 2.677791107602051e-06, "loss": 0.1383, "step": 48563 }, { "epoch": 0.8661934149038633, "grad_norm": 0.24152925610542297, "learning_rate": 2.6770902895585704e-06, "loss": 0.1152, "step": 48564 }, { "epoch": 0.866211251025577, "grad_norm": 0.22455281019210815, "learning_rate": 2.676389558046405e-06, "loss": 0.1299, "step": 48565 }, { "epoch": 0.8662290871472907, "grad_norm": 0.29154765605926514, "learning_rate": 2.675688913068264e-06, "loss": 0.0844, "step": 48566 }, { "epoch": 0.8662469232690044, "grad_norm": 0.2528809607028961, "learning_rate": 2.6749883546268617e-06, "loss": 0.0634, "step": 48567 }, { "epoch": 0.8662647593907181, "grad_norm": 0.2875394821166992, "learning_rate": 2.6742878827249156e-06, "loss": 0.0757, "step": 48568 }, { "epoch": 0.8662825955124318, "grad_norm": 0.34754088521003723, "learning_rate": 2.6735874973651347e-06, "loss": 0.1158, "step": 48569 }, { "epoch": 0.8663004316341455, "grad_norm": 0.3402722477912903, "learning_rate": 2.6728871985502472e-06, "loss": 0.2099, "step": 48570 }, { "epoch": 0.8663182677558592, "grad_norm": 0.5429072976112366, "learning_rate": 2.6721869862829594e-06, "loss": 0.1119, "step": 48571 }, { "epoch": 0.8663361038775729, "grad_norm": 0.367783784866333, "learning_rate": 2.671486860565986e-06, "loss": 0.1955, "step": 48572 }, { "epoch": 0.8663539399992866, "grad_norm": 0.2709261178970337, "learning_rate": 2.670786821402038e-06, "loss": 0.0964, "step": 48573 }, { "epoch": 0.8663717761210002, "grad_norm": 0.2615801990032196, "learning_rate": 2.670086868793836e-06, "loss": 0.0738, "step": 48574 }, { "epoch": 0.8663896122427139, "grad_norm": 0.3666120767593384, "learning_rate": 2.669387002744092e-06, "loss": 0.1188, "step": 48575 }, { "epoch": 0.8664074483644276, "grad_norm": 0.21841605007648468, "learning_rate": 2.6686872232555086e-06, "loss": 0.091, "step": 48576 }, { "epoch": 0.8664252844861413, "grad_norm": 0.3070674538612366, "learning_rate": 2.667987530330815e-06, "loss": 0.1493, "step": 48577 }, { "epoch": 0.866443120607855, "grad_norm": 0.261738121509552, "learning_rate": 2.6672879239727083e-06, "loss": 0.1323, "step": 48578 }, { "epoch": 0.8664609567295687, "grad_norm": 0.31118008494377136, "learning_rate": 2.666588404183912e-06, "loss": 0.0707, "step": 48579 }, { "epoch": 0.8664787928512824, "grad_norm": 0.23535367846488953, "learning_rate": 2.6658889709671314e-06, "loss": 0.1061, "step": 48580 }, { "epoch": 0.8664966289729961, "grad_norm": 0.19728673994541168, "learning_rate": 2.665189624325082e-06, "loss": 0.0942, "step": 48581 }, { "epoch": 0.8665144650947098, "grad_norm": 0.33237403631210327, "learning_rate": 2.6644903642604633e-06, "loss": 0.1288, "step": 48582 }, { "epoch": 0.8665323012164236, "grad_norm": 0.3385293781757355, "learning_rate": 2.6637911907760023e-06, "loss": 0.164, "step": 48583 }, { "epoch": 0.8665501373381372, "grad_norm": 0.28505703806877136, "learning_rate": 2.6630921038744012e-06, "loss": 0.1322, "step": 48584 }, { "epoch": 0.8665679734598509, "grad_norm": 0.30644890666007996, "learning_rate": 2.6623931035583694e-06, "loss": 0.1292, "step": 48585 }, { "epoch": 0.8665858095815646, "grad_norm": 0.3161781132221222, "learning_rate": 2.66169418983061e-06, "loss": 0.1165, "step": 48586 }, { "epoch": 0.8666036457032783, "grad_norm": 0.299847811460495, "learning_rate": 2.6609953626938465e-06, "loss": 0.1066, "step": 48587 }, { "epoch": 0.866621481824992, "grad_norm": 0.35688701272010803, "learning_rate": 2.660296622150782e-06, "loss": 0.0563, "step": 48588 }, { "epoch": 0.8666393179467057, "grad_norm": 0.1942257136106491, "learning_rate": 2.6595979682041165e-06, "loss": 0.1075, "step": 48589 }, { "epoch": 0.8666571540684194, "grad_norm": 0.26867055892944336, "learning_rate": 2.658899400856571e-06, "loss": 0.1151, "step": 48590 }, { "epoch": 0.866674990190133, "grad_norm": 0.353449285030365, "learning_rate": 2.6582009201108425e-06, "loss": 0.1168, "step": 48591 }, { "epoch": 0.8666928263118467, "grad_norm": 0.3380625545978546, "learning_rate": 2.657502525969649e-06, "loss": 0.1242, "step": 48592 }, { "epoch": 0.8667106624335604, "grad_norm": 0.3841957151889801, "learning_rate": 2.6568042184356934e-06, "loss": 0.1064, "step": 48593 }, { "epoch": 0.8667284985552741, "grad_norm": 0.22774510085582733, "learning_rate": 2.6561059975116794e-06, "loss": 0.1059, "step": 48594 }, { "epoch": 0.8667463346769878, "grad_norm": 0.27195748686790466, "learning_rate": 2.6554078632003126e-06, "loss": 0.078, "step": 48595 }, { "epoch": 0.8667641707987015, "grad_norm": 0.21842284500598907, "learning_rate": 2.654709815504308e-06, "loss": 0.086, "step": 48596 }, { "epoch": 0.8667820069204152, "grad_norm": 0.2673019766807556, "learning_rate": 2.6540118544263667e-06, "loss": 0.0693, "step": 48597 }, { "epoch": 0.8667998430421289, "grad_norm": 0.2516014277935028, "learning_rate": 2.653313979969191e-06, "loss": 0.0835, "step": 48598 }, { "epoch": 0.8668176791638427, "grad_norm": 0.24400992691516876, "learning_rate": 2.6526161921354847e-06, "loss": 0.1013, "step": 48599 }, { "epoch": 0.8668355152855564, "grad_norm": 0.3259427547454834, "learning_rate": 2.6519184909279626e-06, "loss": 0.1158, "step": 48600 }, { "epoch": 0.86685335140727, "grad_norm": 0.4715590476989746, "learning_rate": 2.6512208763493194e-06, "loss": 0.0897, "step": 48601 }, { "epoch": 0.8668711875289837, "grad_norm": 0.2877531945705414, "learning_rate": 2.6505233484022672e-06, "loss": 0.1246, "step": 48602 }, { "epoch": 0.8668890236506974, "grad_norm": 0.30854853987693787, "learning_rate": 2.649825907089501e-06, "loss": 0.1108, "step": 48603 }, { "epoch": 0.8669068597724111, "grad_norm": 0.2034696489572525, "learning_rate": 2.649128552413732e-06, "loss": 0.0514, "step": 48604 }, { "epoch": 0.8669246958941248, "grad_norm": 0.30569523572921753, "learning_rate": 2.648431284377656e-06, "loss": 0.0796, "step": 48605 }, { "epoch": 0.8669425320158385, "grad_norm": 0.1643456369638443, "learning_rate": 2.647734102983984e-06, "loss": 0.0634, "step": 48606 }, { "epoch": 0.8669603681375522, "grad_norm": 0.5523666143417358, "learning_rate": 2.647037008235412e-06, "loss": 0.1585, "step": 48607 }, { "epoch": 0.8669782042592659, "grad_norm": 0.2524758577346802, "learning_rate": 2.646340000134648e-06, "loss": 0.1074, "step": 48608 }, { "epoch": 0.8669960403809795, "grad_norm": 0.27780383825302124, "learning_rate": 2.6456430786843928e-06, "loss": 0.0689, "step": 48609 }, { "epoch": 0.8670138765026932, "grad_norm": 0.2856437563896179, "learning_rate": 2.6449462438873445e-06, "loss": 0.1066, "step": 48610 }, { "epoch": 0.8670317126244069, "grad_norm": 0.31625935435295105, "learning_rate": 2.6442494957462065e-06, "loss": 0.1075, "step": 48611 }, { "epoch": 0.8670495487461206, "grad_norm": 0.3473754823207855, "learning_rate": 2.643552834263674e-06, "loss": 0.1585, "step": 48612 }, { "epoch": 0.8670673848678343, "grad_norm": 0.28323686122894287, "learning_rate": 2.6428562594424582e-06, "loss": 0.0954, "step": 48613 }, { "epoch": 0.867085220989548, "grad_norm": 0.23895299434661865, "learning_rate": 2.642159771285252e-06, "loss": 0.1284, "step": 48614 }, { "epoch": 0.8671030571112617, "grad_norm": 0.2907993793487549, "learning_rate": 2.6414633697947583e-06, "loss": 0.1617, "step": 48615 }, { "epoch": 0.8671208932329755, "grad_norm": 0.3060983717441559, "learning_rate": 2.6407670549736698e-06, "loss": 0.1099, "step": 48616 }, { "epoch": 0.8671387293546892, "grad_norm": 0.2411821186542511, "learning_rate": 2.6400708268246922e-06, "loss": 0.1476, "step": 48617 }, { "epoch": 0.8671565654764029, "grad_norm": 0.2727629244327545, "learning_rate": 2.639374685350521e-06, "loss": 0.1182, "step": 48618 }, { "epoch": 0.8671744015981165, "grad_norm": 0.28775155544281006, "learning_rate": 2.6386786305538617e-06, "loss": 0.0816, "step": 48619 }, { "epoch": 0.8671922377198302, "grad_norm": 0.2429989129304886, "learning_rate": 2.6379826624374023e-06, "loss": 0.1054, "step": 48620 }, { "epoch": 0.8672100738415439, "grad_norm": 0.27986016869544983, "learning_rate": 2.6372867810038506e-06, "loss": 0.112, "step": 48621 }, { "epoch": 0.8672279099632576, "grad_norm": 0.28324660658836365, "learning_rate": 2.6365909862558992e-06, "loss": 0.0941, "step": 48622 }, { "epoch": 0.8672457460849713, "grad_norm": 0.2582818567752838, "learning_rate": 2.635895278196243e-06, "loss": 0.076, "step": 48623 }, { "epoch": 0.867263582206685, "grad_norm": 0.35883527994155884, "learning_rate": 2.635199656827583e-06, "loss": 0.1522, "step": 48624 }, { "epoch": 0.8672814183283987, "grad_norm": 0.3026193082332611, "learning_rate": 2.634504122152609e-06, "loss": 0.0766, "step": 48625 }, { "epoch": 0.8672992544501124, "grad_norm": 0.2771889567375183, "learning_rate": 2.6338086741740263e-06, "loss": 0.0903, "step": 48626 }, { "epoch": 0.867317090571826, "grad_norm": 0.2743991017341614, "learning_rate": 2.633113312894528e-06, "loss": 0.1474, "step": 48627 }, { "epoch": 0.8673349266935397, "grad_norm": 0.24703501164913177, "learning_rate": 2.6324180383168056e-06, "loss": 0.1242, "step": 48628 }, { "epoch": 0.8673527628152534, "grad_norm": 0.2984876036643982, "learning_rate": 2.631722850443552e-06, "loss": 0.1244, "step": 48629 }, { "epoch": 0.8673705989369671, "grad_norm": 0.30970442295074463, "learning_rate": 2.6310277492774703e-06, "loss": 0.1398, "step": 48630 }, { "epoch": 0.8673884350586808, "grad_norm": 0.25778430700302124, "learning_rate": 2.630332734821253e-06, "loss": 0.1032, "step": 48631 }, { "epoch": 0.8674062711803945, "grad_norm": 0.2707928717136383, "learning_rate": 2.6296378070775868e-06, "loss": 0.0858, "step": 48632 }, { "epoch": 0.8674241073021083, "grad_norm": 0.2565414011478424, "learning_rate": 2.628942966049169e-06, "loss": 0.0576, "step": 48633 }, { "epoch": 0.867441943423822, "grad_norm": 0.3035796284675598, "learning_rate": 2.6282482117387014e-06, "loss": 0.1018, "step": 48634 }, { "epoch": 0.8674597795455357, "grad_norm": 0.26896676421165466, "learning_rate": 2.6275535441488725e-06, "loss": 0.1079, "step": 48635 }, { "epoch": 0.8674776156672493, "grad_norm": 0.29652872681617737, "learning_rate": 2.626858963282369e-06, "loss": 0.1392, "step": 48636 }, { "epoch": 0.867495451788963, "grad_norm": 0.23057928681373596, "learning_rate": 2.6261644691418925e-06, "loss": 0.1115, "step": 48637 }, { "epoch": 0.8675132879106767, "grad_norm": 0.23853172361850739, "learning_rate": 2.62547006173012e-06, "loss": 0.1052, "step": 48638 }, { "epoch": 0.8675311240323904, "grad_norm": 0.2926994264125824, "learning_rate": 2.624775741049762e-06, "loss": 0.1339, "step": 48639 }, { "epoch": 0.8675489601541041, "grad_norm": 0.38823041319847107, "learning_rate": 2.6240815071035014e-06, "loss": 0.1565, "step": 48640 }, { "epoch": 0.8675667962758178, "grad_norm": 0.30450350046157837, "learning_rate": 2.6233873598940278e-06, "loss": 0.1304, "step": 48641 }, { "epoch": 0.8675846323975315, "grad_norm": 0.22317253053188324, "learning_rate": 2.622693299424031e-06, "loss": 0.0638, "step": 48642 }, { "epoch": 0.8676024685192452, "grad_norm": 0.39300301671028137, "learning_rate": 2.621999325696206e-06, "loss": 0.1083, "step": 48643 }, { "epoch": 0.8676203046409589, "grad_norm": 0.23108522593975067, "learning_rate": 2.6213054387132422e-06, "loss": 0.0876, "step": 48644 }, { "epoch": 0.8676381407626725, "grad_norm": 0.2547154128551483, "learning_rate": 2.620611638477821e-06, "loss": 0.1305, "step": 48645 }, { "epoch": 0.8676559768843862, "grad_norm": 0.3216720223426819, "learning_rate": 2.619917924992646e-06, "loss": 0.1142, "step": 48646 }, { "epoch": 0.8676738130060999, "grad_norm": 0.3700425624847412, "learning_rate": 2.619224298260392e-06, "loss": 0.1144, "step": 48647 }, { "epoch": 0.8676916491278136, "grad_norm": 0.3872331976890564, "learning_rate": 2.6185307582837603e-06, "loss": 0.1414, "step": 48648 }, { "epoch": 0.8677094852495273, "grad_norm": 0.2810850143432617, "learning_rate": 2.617837305065435e-06, "loss": 0.1126, "step": 48649 }, { "epoch": 0.8677273213712411, "grad_norm": 0.2455441951751709, "learning_rate": 2.617143938608102e-06, "loss": 0.1109, "step": 48650 }, { "epoch": 0.8677451574929548, "grad_norm": 0.33465322852134705, "learning_rate": 2.6164506589144434e-06, "loss": 0.1077, "step": 48651 }, { "epoch": 0.8677629936146685, "grad_norm": 0.21493053436279297, "learning_rate": 2.615757465987159e-06, "loss": 0.0911, "step": 48652 }, { "epoch": 0.8677808297363822, "grad_norm": 0.2472672164440155, "learning_rate": 2.6150643598289307e-06, "loss": 0.1115, "step": 48653 }, { "epoch": 0.8677986658580958, "grad_norm": 0.3944537937641144, "learning_rate": 2.6143713404424452e-06, "loss": 0.1455, "step": 48654 }, { "epoch": 0.8678165019798095, "grad_norm": 0.28953656554222107, "learning_rate": 2.6136784078303834e-06, "loss": 0.1138, "step": 48655 }, { "epoch": 0.8678343381015232, "grad_norm": 0.21833036839962006, "learning_rate": 2.6129855619954376e-06, "loss": 0.0964, "step": 48656 }, { "epoch": 0.8678521742232369, "grad_norm": 0.29130542278289795, "learning_rate": 2.612292802940294e-06, "loss": 0.1072, "step": 48657 }, { "epoch": 0.8678700103449506, "grad_norm": 0.26437899470329285, "learning_rate": 2.611600130667638e-06, "loss": 0.1228, "step": 48658 }, { "epoch": 0.8678878464666643, "grad_norm": 0.292957067489624, "learning_rate": 2.610907545180144e-06, "loss": 0.1086, "step": 48659 }, { "epoch": 0.867905682588378, "grad_norm": 0.6413722634315491, "learning_rate": 2.610215046480513e-06, "loss": 0.1327, "step": 48660 }, { "epoch": 0.8679235187100917, "grad_norm": 0.313829630613327, "learning_rate": 2.609522634571415e-06, "loss": 0.126, "step": 48661 }, { "epoch": 0.8679413548318053, "grad_norm": 0.2830031216144562, "learning_rate": 2.6088303094555454e-06, "loss": 0.1269, "step": 48662 }, { "epoch": 0.867959190953519, "grad_norm": 0.2789459824562073, "learning_rate": 2.608138071135585e-06, "loss": 0.1007, "step": 48663 }, { "epoch": 0.8679770270752327, "grad_norm": 0.24733521044254303, "learning_rate": 2.60744591961421e-06, "loss": 0.0625, "step": 48664 }, { "epoch": 0.8679948631969464, "grad_norm": 0.3072355091571808, "learning_rate": 2.6067538548941117e-06, "loss": 0.0879, "step": 48665 }, { "epoch": 0.8680126993186601, "grad_norm": 0.2519523799419403, "learning_rate": 2.606061876977972e-06, "loss": 0.121, "step": 48666 }, { "epoch": 0.8680305354403739, "grad_norm": 0.3460981845855713, "learning_rate": 2.605369985868472e-06, "loss": 0.1553, "step": 48667 }, { "epoch": 0.8680483715620876, "grad_norm": 0.2794781029224396, "learning_rate": 2.6046781815682847e-06, "loss": 0.1131, "step": 48668 }, { "epoch": 0.8680662076838013, "grad_norm": 0.23102281987667084, "learning_rate": 2.6039864640801077e-06, "loss": 0.0947, "step": 48669 }, { "epoch": 0.868084043805515, "grad_norm": 0.3197089433670044, "learning_rate": 2.603294833406614e-06, "loss": 0.1272, "step": 48670 }, { "epoch": 0.8681018799272286, "grad_norm": 0.27525627613067627, "learning_rate": 2.6026032895504815e-06, "loss": 0.1501, "step": 48671 }, { "epoch": 0.8681197160489423, "grad_norm": 0.2974686324596405, "learning_rate": 2.601911832514395e-06, "loss": 0.0969, "step": 48672 }, { "epoch": 0.868137552170656, "grad_norm": 0.2813066840171814, "learning_rate": 2.601220462301035e-06, "loss": 0.1074, "step": 48673 }, { "epoch": 0.8681553882923697, "grad_norm": 0.30259424448013306, "learning_rate": 2.6005291789130775e-06, "loss": 0.1514, "step": 48674 }, { "epoch": 0.8681732244140834, "grad_norm": 0.2650390863418579, "learning_rate": 2.5998379823532094e-06, "loss": 0.0979, "step": 48675 }, { "epoch": 0.8681910605357971, "grad_norm": 0.31853342056274414, "learning_rate": 2.5991468726241087e-06, "loss": 0.1149, "step": 48676 }, { "epoch": 0.8682088966575108, "grad_norm": 0.2759436368942261, "learning_rate": 2.598455849728443e-06, "loss": 0.077, "step": 48677 }, { "epoch": 0.8682267327792245, "grad_norm": 0.2520129084587097, "learning_rate": 2.597764913668907e-06, "loss": 0.1098, "step": 48678 }, { "epoch": 0.8682445689009382, "grad_norm": 0.30461743474006653, "learning_rate": 2.597074064448171e-06, "loss": 0.0924, "step": 48679 }, { "epoch": 0.8682624050226518, "grad_norm": 0.2827073335647583, "learning_rate": 2.5963833020689138e-06, "loss": 0.1288, "step": 48680 }, { "epoch": 0.8682802411443655, "grad_norm": 0.2870013117790222, "learning_rate": 2.595692626533808e-06, "loss": 0.1142, "step": 48681 }, { "epoch": 0.8682980772660792, "grad_norm": 0.2983800768852234, "learning_rate": 2.5950020378455424e-06, "loss": 0.1286, "step": 48682 }, { "epoch": 0.8683159133877929, "grad_norm": 0.20682698488235474, "learning_rate": 2.5943115360067854e-06, "loss": 0.106, "step": 48683 }, { "epoch": 0.8683337495095067, "grad_norm": 0.2710472047328949, "learning_rate": 2.5936211210202148e-06, "loss": 0.1159, "step": 48684 }, { "epoch": 0.8683515856312204, "grad_norm": 0.2740075886249542, "learning_rate": 2.5929307928885038e-06, "loss": 0.0875, "step": 48685 }, { "epoch": 0.8683694217529341, "grad_norm": 0.2933413088321686, "learning_rate": 2.5922405516143362e-06, "loss": 0.0795, "step": 48686 }, { "epoch": 0.8683872578746478, "grad_norm": 0.24890920519828796, "learning_rate": 2.5915503972003845e-06, "loss": 0.0933, "step": 48687 }, { "epoch": 0.8684050939963615, "grad_norm": 0.23078541457653046, "learning_rate": 2.5908603296493194e-06, "loss": 0.1557, "step": 48688 }, { "epoch": 0.8684229301180751, "grad_norm": 0.3492889702320099, "learning_rate": 2.5901703489638246e-06, "loss": 0.0919, "step": 48689 }, { "epoch": 0.8684407662397888, "grad_norm": 0.2752855122089386, "learning_rate": 2.589480455146562e-06, "loss": 0.1118, "step": 48690 }, { "epoch": 0.8684586023615025, "grad_norm": 0.3009353280067444, "learning_rate": 2.5887906482002204e-06, "loss": 0.117, "step": 48691 }, { "epoch": 0.8684764384832162, "grad_norm": 0.30130213499069214, "learning_rate": 2.5881009281274656e-06, "loss": 0.0712, "step": 48692 }, { "epoch": 0.8684942746049299, "grad_norm": 0.26302117109298706, "learning_rate": 2.587411294930972e-06, "loss": 0.0986, "step": 48693 }, { "epoch": 0.8685121107266436, "grad_norm": 0.31603842973709106, "learning_rate": 2.5867217486134105e-06, "loss": 0.1128, "step": 48694 }, { "epoch": 0.8685299468483573, "grad_norm": 0.22961276769638062, "learning_rate": 2.586032289177459e-06, "loss": 0.0776, "step": 48695 }, { "epoch": 0.868547782970071, "grad_norm": 0.24750012159347534, "learning_rate": 2.5853429166257908e-06, "loss": 0.1123, "step": 48696 }, { "epoch": 0.8685656190917846, "grad_norm": 0.2424774020910263, "learning_rate": 2.584653630961073e-06, "loss": 0.0904, "step": 48697 }, { "epoch": 0.8685834552134983, "grad_norm": 0.22761297225952148, "learning_rate": 2.5839644321859757e-06, "loss": 0.102, "step": 48698 }, { "epoch": 0.868601291335212, "grad_norm": 0.31616827845573425, "learning_rate": 2.58327532030318e-06, "loss": 0.1356, "step": 48699 }, { "epoch": 0.8686191274569258, "grad_norm": 0.4137493669986725, "learning_rate": 2.582586295315351e-06, "loss": 0.1182, "step": 48700 }, { "epoch": 0.8686369635786395, "grad_norm": 0.30806007981300354, "learning_rate": 2.5818973572251548e-06, "loss": 0.0981, "step": 48701 }, { "epoch": 0.8686547997003532, "grad_norm": 0.25863999128341675, "learning_rate": 2.5812085060352735e-06, "loss": 0.1276, "step": 48702 }, { "epoch": 0.8686726358220669, "grad_norm": 0.2708914875984192, "learning_rate": 2.5805197417483664e-06, "loss": 0.1072, "step": 48703 }, { "epoch": 0.8686904719437806, "grad_norm": 0.28094902634620667, "learning_rate": 2.5798310643671137e-06, "loss": 0.138, "step": 48704 }, { "epoch": 0.8687083080654943, "grad_norm": 0.22386638820171356, "learning_rate": 2.5791424738941778e-06, "loss": 0.0961, "step": 48705 }, { "epoch": 0.868726144187208, "grad_norm": 0.3509886860847473, "learning_rate": 2.5784539703322284e-06, "loss": 0.0926, "step": 48706 }, { "epoch": 0.8687439803089216, "grad_norm": 0.35328739881515503, "learning_rate": 2.5777655536839334e-06, "loss": 0.1481, "step": 48707 }, { "epoch": 0.8687618164306353, "grad_norm": 0.2598971426486969, "learning_rate": 2.577077223951968e-06, "loss": 0.1364, "step": 48708 }, { "epoch": 0.868779652552349, "grad_norm": 0.3480691611766815, "learning_rate": 2.576388981138994e-06, "loss": 0.1422, "step": 48709 }, { "epoch": 0.8687974886740627, "grad_norm": 0.30219706892967224, "learning_rate": 2.575700825247682e-06, "loss": 0.142, "step": 48710 }, { "epoch": 0.8688153247957764, "grad_norm": 0.21827824413776398, "learning_rate": 2.5750127562806953e-06, "loss": 0.0651, "step": 48711 }, { "epoch": 0.8688331609174901, "grad_norm": 0.17950038611888885, "learning_rate": 2.5743247742407076e-06, "loss": 0.0732, "step": 48712 }, { "epoch": 0.8688509970392038, "grad_norm": 0.27343860268592834, "learning_rate": 2.573636879130384e-06, "loss": 0.1475, "step": 48713 }, { "epoch": 0.8688688331609175, "grad_norm": 0.27780279517173767, "learning_rate": 2.572949070952388e-06, "loss": 0.0583, "step": 48714 }, { "epoch": 0.8688866692826311, "grad_norm": 0.32142001390457153, "learning_rate": 2.5722613497093844e-06, "loss": 0.1337, "step": 48715 }, { "epoch": 0.8689045054043448, "grad_norm": 0.1972419023513794, "learning_rate": 2.5715737154040408e-06, "loss": 0.1249, "step": 48716 }, { "epoch": 0.8689223415260586, "grad_norm": 0.24779509007930756, "learning_rate": 2.57088616803903e-06, "loss": 0.1288, "step": 48717 }, { "epoch": 0.8689401776477723, "grad_norm": 0.251101016998291, "learning_rate": 2.570198707617011e-06, "loss": 0.091, "step": 48718 }, { "epoch": 0.868958013769486, "grad_norm": 0.21175187826156616, "learning_rate": 2.569511334140648e-06, "loss": 0.0849, "step": 48719 }, { "epoch": 0.8689758498911997, "grad_norm": 0.3272428512573242, "learning_rate": 2.568824047612603e-06, "loss": 0.1007, "step": 48720 }, { "epoch": 0.8689936860129134, "grad_norm": 0.23609288036823273, "learning_rate": 2.568136848035546e-06, "loss": 0.1011, "step": 48721 }, { "epoch": 0.8690115221346271, "grad_norm": 0.3013632893562317, "learning_rate": 2.567449735412139e-06, "loss": 0.162, "step": 48722 }, { "epoch": 0.8690293582563408, "grad_norm": 0.281926691532135, "learning_rate": 2.5667627097450462e-06, "loss": 0.1039, "step": 48723 }, { "epoch": 0.8690471943780544, "grad_norm": 0.16842341423034668, "learning_rate": 2.5660757710369245e-06, "loss": 0.094, "step": 48724 }, { "epoch": 0.8690650304997681, "grad_norm": 0.28846049308776855, "learning_rate": 2.5653889192904427e-06, "loss": 0.1213, "step": 48725 }, { "epoch": 0.8690828666214818, "grad_norm": 0.2769353985786438, "learning_rate": 2.5647021545082665e-06, "loss": 0.1366, "step": 48726 }, { "epoch": 0.8691007027431955, "grad_norm": 0.2867237329483032, "learning_rate": 2.564015476693052e-06, "loss": 0.0833, "step": 48727 }, { "epoch": 0.8691185388649092, "grad_norm": 0.3349776566028595, "learning_rate": 2.5633288858474575e-06, "loss": 0.1565, "step": 48728 }, { "epoch": 0.8691363749866229, "grad_norm": 0.3175070285797119, "learning_rate": 2.5626423819741537e-06, "loss": 0.141, "step": 48729 }, { "epoch": 0.8691542111083366, "grad_norm": 0.2893778085708618, "learning_rate": 2.5619559650757934e-06, "loss": 0.0919, "step": 48730 }, { "epoch": 0.8691720472300503, "grad_norm": 0.2615737318992615, "learning_rate": 2.5612696351550473e-06, "loss": 0.0889, "step": 48731 }, { "epoch": 0.869189883351764, "grad_norm": 0.23890773952007294, "learning_rate": 2.5605833922145716e-06, "loss": 0.1038, "step": 48732 }, { "epoch": 0.8692077194734776, "grad_norm": 0.31569021940231323, "learning_rate": 2.559897236257017e-06, "loss": 0.1821, "step": 48733 }, { "epoch": 0.8692255555951914, "grad_norm": 0.3673979640007019, "learning_rate": 2.5592111672850583e-06, "loss": 0.1395, "step": 48734 }, { "epoch": 0.8692433917169051, "grad_norm": 0.238789364695549, "learning_rate": 2.558525185301347e-06, "loss": 0.0862, "step": 48735 }, { "epoch": 0.8692612278386188, "grad_norm": 0.3171417713165283, "learning_rate": 2.557839290308542e-06, "loss": 0.1062, "step": 48736 }, { "epoch": 0.8692790639603325, "grad_norm": 0.30866560339927673, "learning_rate": 2.557153482309299e-06, "loss": 0.0986, "step": 48737 }, { "epoch": 0.8692969000820462, "grad_norm": 0.30295780301094055, "learning_rate": 2.5564677613062886e-06, "loss": 0.1479, "step": 48738 }, { "epoch": 0.8693147362037599, "grad_norm": 0.30899497866630554, "learning_rate": 2.555782127302156e-06, "loss": 0.1288, "step": 48739 }, { "epoch": 0.8693325723254736, "grad_norm": 0.23120485246181488, "learning_rate": 2.555096580299568e-06, "loss": 0.1183, "step": 48740 }, { "epoch": 0.8693504084471873, "grad_norm": 0.27584919333457947, "learning_rate": 2.5544111203011754e-06, "loss": 0.134, "step": 48741 }, { "epoch": 0.8693682445689009, "grad_norm": 0.1963358372449875, "learning_rate": 2.553725747309632e-06, "loss": 0.1086, "step": 48742 }, { "epoch": 0.8693860806906146, "grad_norm": 0.29744452238082886, "learning_rate": 2.553040461327602e-06, "loss": 0.1038, "step": 48743 }, { "epoch": 0.8694039168123283, "grad_norm": 0.2964674234390259, "learning_rate": 2.5523552623577474e-06, "loss": 0.1134, "step": 48744 }, { "epoch": 0.869421752934042, "grad_norm": 0.541043221950531, "learning_rate": 2.551670150402713e-06, "loss": 0.1316, "step": 48745 }, { "epoch": 0.8694395890557557, "grad_norm": 0.30327659845352173, "learning_rate": 2.5509851254651553e-06, "loss": 0.1515, "step": 48746 }, { "epoch": 0.8694574251774694, "grad_norm": 0.2531784772872925, "learning_rate": 2.5503001875477384e-06, "loss": 0.1132, "step": 48747 }, { "epoch": 0.8694752612991831, "grad_norm": 0.29986971616744995, "learning_rate": 2.5496153366531105e-06, "loss": 0.12, "step": 48748 }, { "epoch": 0.8694930974208968, "grad_norm": 0.3939153552055359, "learning_rate": 2.548930572783928e-06, "loss": 0.1373, "step": 48749 }, { "epoch": 0.8695109335426104, "grad_norm": 0.34003064036369324, "learning_rate": 2.5482458959428385e-06, "loss": 0.1011, "step": 48750 }, { "epoch": 0.8695287696643242, "grad_norm": 0.23042713105678558, "learning_rate": 2.5475613061325094e-06, "loss": 0.1468, "step": 48751 }, { "epoch": 0.8695466057860379, "grad_norm": 0.2887931168079376, "learning_rate": 2.5468768033555886e-06, "loss": 0.1072, "step": 48752 }, { "epoch": 0.8695644419077516, "grad_norm": 0.31251659989356995, "learning_rate": 2.5461923876147264e-06, "loss": 0.0777, "step": 48753 }, { "epoch": 0.8695822780294653, "grad_norm": 0.29049623012542725, "learning_rate": 2.545508058912577e-06, "loss": 0.0735, "step": 48754 }, { "epoch": 0.869600114151179, "grad_norm": 0.359990656375885, "learning_rate": 2.5448238172517904e-06, "loss": 0.1644, "step": 48755 }, { "epoch": 0.8696179502728927, "grad_norm": 0.31109046936035156, "learning_rate": 2.5441396626350282e-06, "loss": 0.08, "step": 48756 }, { "epoch": 0.8696357863946064, "grad_norm": 0.36081916093826294, "learning_rate": 2.543455595064931e-06, "loss": 0.1417, "step": 48757 }, { "epoch": 0.8696536225163201, "grad_norm": 0.1988876909017563, "learning_rate": 2.5427716145441596e-06, "loss": 0.1203, "step": 48758 }, { "epoch": 0.8696714586380337, "grad_norm": 0.3619775176048279, "learning_rate": 2.5420877210753593e-06, "loss": 0.138, "step": 48759 }, { "epoch": 0.8696892947597474, "grad_norm": 0.3323259949684143, "learning_rate": 2.5414039146611862e-06, "loss": 0.0828, "step": 48760 }, { "epoch": 0.8697071308814611, "grad_norm": 0.2197135090827942, "learning_rate": 2.5407201953042913e-06, "loss": 0.1348, "step": 48761 }, { "epoch": 0.8697249670031748, "grad_norm": 0.2875751852989197, "learning_rate": 2.5400365630073196e-06, "loss": 0.1242, "step": 48762 }, { "epoch": 0.8697428031248885, "grad_norm": 0.22646726667881012, "learning_rate": 2.5393530177729217e-06, "loss": 0.1498, "step": 48763 }, { "epoch": 0.8697606392466022, "grad_norm": 0.24403299391269684, "learning_rate": 2.5386695596037513e-06, "loss": 0.0876, "step": 48764 }, { "epoch": 0.8697784753683159, "grad_norm": 0.2947208881378174, "learning_rate": 2.5379861885024557e-06, "loss": 0.1097, "step": 48765 }, { "epoch": 0.8697963114900296, "grad_norm": 0.20807866752147675, "learning_rate": 2.537302904471686e-06, "loss": 0.0785, "step": 48766 }, { "epoch": 0.8698141476117432, "grad_norm": 0.27427777647972107, "learning_rate": 2.5366197075140846e-06, "loss": 0.0814, "step": 48767 }, { "epoch": 0.869831983733457, "grad_norm": 0.25727513432502747, "learning_rate": 2.535936597632302e-06, "loss": 0.0655, "step": 48768 }, { "epoch": 0.8698498198551707, "grad_norm": 0.3162531554698944, "learning_rate": 2.535253574828994e-06, "loss": 0.1014, "step": 48769 }, { "epoch": 0.8698676559768844, "grad_norm": 0.30200427770614624, "learning_rate": 2.5345706391067953e-06, "loss": 0.1372, "step": 48770 }, { "epoch": 0.8698854920985981, "grad_norm": 0.32227572798728943, "learning_rate": 2.5338877904683643e-06, "loss": 0.1346, "step": 48771 }, { "epoch": 0.8699033282203118, "grad_norm": 0.1963113695383072, "learning_rate": 2.533205028916341e-06, "loss": 0.0868, "step": 48772 }, { "epoch": 0.8699211643420255, "grad_norm": 0.3146909475326538, "learning_rate": 2.532522354453379e-06, "loss": 0.1085, "step": 48773 }, { "epoch": 0.8699390004637392, "grad_norm": 0.373526930809021, "learning_rate": 2.53183976708212e-06, "loss": 0.1005, "step": 48774 }, { "epoch": 0.8699568365854529, "grad_norm": 0.23058359324932098, "learning_rate": 2.531157266805209e-06, "loss": 0.1223, "step": 48775 }, { "epoch": 0.8699746727071666, "grad_norm": 0.2550305128097534, "learning_rate": 2.5304748536252894e-06, "loss": 0.0701, "step": 48776 }, { "epoch": 0.8699925088288802, "grad_norm": 0.26112306118011475, "learning_rate": 2.5297925275450136e-06, "loss": 0.1386, "step": 48777 }, { "epoch": 0.8700103449505939, "grad_norm": 0.26633623242378235, "learning_rate": 2.5291102885670243e-06, "loss": 0.1525, "step": 48778 }, { "epoch": 0.8700281810723076, "grad_norm": 0.21439997851848602, "learning_rate": 2.5284281366939667e-06, "loss": 0.0486, "step": 48779 }, { "epoch": 0.8700460171940213, "grad_norm": 0.24864917993545532, "learning_rate": 2.52774607192848e-06, "loss": 0.0895, "step": 48780 }, { "epoch": 0.870063853315735, "grad_norm": 0.2315618097782135, "learning_rate": 2.527064094273207e-06, "loss": 0.0755, "step": 48781 }, { "epoch": 0.8700816894374487, "grad_norm": 0.26426947116851807, "learning_rate": 2.526382203730801e-06, "loss": 0.1035, "step": 48782 }, { "epoch": 0.8700995255591624, "grad_norm": 0.3090909421443939, "learning_rate": 2.5257004003038985e-06, "loss": 0.1401, "step": 48783 }, { "epoch": 0.8701173616808761, "grad_norm": 0.27203044295310974, "learning_rate": 2.5250186839951397e-06, "loss": 0.116, "step": 48784 }, { "epoch": 0.8701351978025899, "grad_norm": 0.2559853494167328, "learning_rate": 2.524337054807177e-06, "loss": 0.1235, "step": 48785 }, { "epoch": 0.8701530339243035, "grad_norm": 0.42401984333992004, "learning_rate": 2.5236555127426396e-06, "loss": 0.1111, "step": 48786 }, { "epoch": 0.8701708700460172, "grad_norm": 0.23002992570400238, "learning_rate": 2.522974057804181e-06, "loss": 0.0721, "step": 48787 }, { "epoch": 0.8701887061677309, "grad_norm": 0.2991747558116913, "learning_rate": 2.5222926899944404e-06, "loss": 0.0899, "step": 48788 }, { "epoch": 0.8702065422894446, "grad_norm": 0.23942267894744873, "learning_rate": 2.521611409316052e-06, "loss": 0.0783, "step": 48789 }, { "epoch": 0.8702243784111583, "grad_norm": 0.21381866931915283, "learning_rate": 2.5209302157716664e-06, "loss": 0.0755, "step": 48790 }, { "epoch": 0.870242214532872, "grad_norm": 0.3925561010837555, "learning_rate": 2.5202491093639173e-06, "loss": 0.0893, "step": 48791 }, { "epoch": 0.8702600506545857, "grad_norm": 0.33114877343177795, "learning_rate": 2.5195680900954475e-06, "loss": 0.1621, "step": 48792 }, { "epoch": 0.8702778867762994, "grad_norm": 0.2122526615858078, "learning_rate": 2.5188871579688994e-06, "loss": 0.1047, "step": 48793 }, { "epoch": 0.870295722898013, "grad_norm": 0.33374249935150146, "learning_rate": 2.518206312986901e-06, "loss": 0.1152, "step": 48794 }, { "epoch": 0.8703135590197267, "grad_norm": 0.25987598299980164, "learning_rate": 2.517525555152106e-06, "loss": 0.1362, "step": 48795 }, { "epoch": 0.8703313951414404, "grad_norm": 0.2597131133079529, "learning_rate": 2.5168448844671454e-06, "loss": 0.1716, "step": 48796 }, { "epoch": 0.8703492312631541, "grad_norm": 0.25804784893989563, "learning_rate": 2.516164300934656e-06, "loss": 0.0906, "step": 48797 }, { "epoch": 0.8703670673848678, "grad_norm": 0.3604278564453125, "learning_rate": 2.5154838045572867e-06, "loss": 0.1364, "step": 48798 }, { "epoch": 0.8703849035065815, "grad_norm": 0.36034461855888367, "learning_rate": 2.5148033953376614e-06, "loss": 0.129, "step": 48799 }, { "epoch": 0.8704027396282952, "grad_norm": 0.29679515957832336, "learning_rate": 2.5141230732784264e-06, "loss": 0.07, "step": 48800 }, { "epoch": 0.870420575750009, "grad_norm": 0.37985461950302124, "learning_rate": 2.513442838382221e-06, "loss": 0.1411, "step": 48801 }, { "epoch": 0.8704384118717227, "grad_norm": 0.3420778214931488, "learning_rate": 2.512762690651671e-06, "loss": 0.0865, "step": 48802 }, { "epoch": 0.8704562479934363, "grad_norm": 0.30549710988998413, "learning_rate": 2.512082630089424e-06, "loss": 0.1345, "step": 48803 }, { "epoch": 0.87047408411515, "grad_norm": 0.2198539823293686, "learning_rate": 2.5114026566981115e-06, "loss": 0.0845, "step": 48804 }, { "epoch": 0.8704919202368637, "grad_norm": 0.24978187680244446, "learning_rate": 2.5107227704803725e-06, "loss": 0.0964, "step": 48805 }, { "epoch": 0.8705097563585774, "grad_norm": 0.23699213564395905, "learning_rate": 2.510042971438836e-06, "loss": 0.1328, "step": 48806 }, { "epoch": 0.8705275924802911, "grad_norm": 0.298720121383667, "learning_rate": 2.509363259576139e-06, "loss": 0.1476, "step": 48807 }, { "epoch": 0.8705454286020048, "grad_norm": 0.2284708321094513, "learning_rate": 2.5086836348949207e-06, "loss": 0.1065, "step": 48808 }, { "epoch": 0.8705632647237185, "grad_norm": 0.3133024275302887, "learning_rate": 2.508004097397812e-06, "loss": 0.1589, "step": 48809 }, { "epoch": 0.8705811008454322, "grad_norm": 0.27239471673965454, "learning_rate": 2.50732464708745e-06, "loss": 0.1036, "step": 48810 }, { "epoch": 0.8705989369671459, "grad_norm": 0.23195764422416687, "learning_rate": 2.5066452839664607e-06, "loss": 0.0908, "step": 48811 }, { "epoch": 0.8706167730888595, "grad_norm": 0.27285176515579224, "learning_rate": 2.5059660080374886e-06, "loss": 0.0901, "step": 48812 }, { "epoch": 0.8706346092105732, "grad_norm": 0.24582798779010773, "learning_rate": 2.505286819303157e-06, "loss": 0.1186, "step": 48813 }, { "epoch": 0.8706524453322869, "grad_norm": 0.20475958287715912, "learning_rate": 2.5046077177661083e-06, "loss": 0.0596, "step": 48814 }, { "epoch": 0.8706702814540006, "grad_norm": 0.2312474399805069, "learning_rate": 2.503928703428962e-06, "loss": 0.0861, "step": 48815 }, { "epoch": 0.8706881175757143, "grad_norm": 0.28702399134635925, "learning_rate": 2.503249776294364e-06, "loss": 0.1168, "step": 48816 }, { "epoch": 0.870705953697428, "grad_norm": 0.27082788944244385, "learning_rate": 2.5025709363649425e-06, "loss": 0.1066, "step": 48817 }, { "epoch": 0.8707237898191418, "grad_norm": 0.3364080488681793, "learning_rate": 2.501892183643323e-06, "loss": 0.0984, "step": 48818 }, { "epoch": 0.8707416259408555, "grad_norm": 0.3233310878276825, "learning_rate": 2.5012135181321422e-06, "loss": 0.1104, "step": 48819 }, { "epoch": 0.8707594620625692, "grad_norm": 0.29159247875213623, "learning_rate": 2.500534939834023e-06, "loss": 0.1002, "step": 48820 }, { "epoch": 0.8707772981842828, "grad_norm": 0.4067905843257904, "learning_rate": 2.4998564487516058e-06, "loss": 0.1001, "step": 48821 }, { "epoch": 0.8707951343059965, "grad_norm": 0.38580071926116943, "learning_rate": 2.499178044887518e-06, "loss": 0.1584, "step": 48822 }, { "epoch": 0.8708129704277102, "grad_norm": 0.2752784192562103, "learning_rate": 2.4984997282443857e-06, "loss": 0.1186, "step": 48823 }, { "epoch": 0.8708308065494239, "grad_norm": 0.2208663374185562, "learning_rate": 2.497821498824837e-06, "loss": 0.0913, "step": 48824 }, { "epoch": 0.8708486426711376, "grad_norm": 0.3801005184650421, "learning_rate": 2.4971433566315096e-06, "loss": 0.1155, "step": 48825 }, { "epoch": 0.8708664787928513, "grad_norm": 0.2505093812942505, "learning_rate": 2.4964653016670198e-06, "loss": 0.1263, "step": 48826 }, { "epoch": 0.870884314914565, "grad_norm": 0.2930073142051697, "learning_rate": 2.4957873339340076e-06, "loss": 0.1143, "step": 48827 }, { "epoch": 0.8709021510362787, "grad_norm": 0.2723417282104492, "learning_rate": 2.4951094534350932e-06, "loss": 0.0798, "step": 48828 }, { "epoch": 0.8709199871579923, "grad_norm": 0.2004810869693756, "learning_rate": 2.4944316601729106e-06, "loss": 0.0602, "step": 48829 }, { "epoch": 0.870937823279706, "grad_norm": 0.233836829662323, "learning_rate": 2.4937539541500856e-06, "loss": 0.0626, "step": 48830 }, { "epoch": 0.8709556594014197, "grad_norm": 0.2493063360452652, "learning_rate": 2.4930763353692434e-06, "loss": 0.066, "step": 48831 }, { "epoch": 0.8709734955231334, "grad_norm": 0.24943096935749054, "learning_rate": 2.49239880383301e-06, "loss": 0.0573, "step": 48832 }, { "epoch": 0.8709913316448471, "grad_norm": 0.2869252562522888, "learning_rate": 2.491721359544011e-06, "loss": 0.112, "step": 48833 }, { "epoch": 0.8710091677665608, "grad_norm": 0.35033470392227173, "learning_rate": 2.491044002504875e-06, "loss": 0.0918, "step": 48834 }, { "epoch": 0.8710270038882746, "grad_norm": 0.2965717911720276, "learning_rate": 2.490366732718227e-06, "loss": 0.1151, "step": 48835 }, { "epoch": 0.8710448400099883, "grad_norm": 0.42473161220550537, "learning_rate": 2.489689550186694e-06, "loss": 0.1728, "step": 48836 }, { "epoch": 0.871062676131702, "grad_norm": 0.3192669153213501, "learning_rate": 2.489012454912895e-06, "loss": 0.0876, "step": 48837 }, { "epoch": 0.8710805122534157, "grad_norm": 0.21786446869373322, "learning_rate": 2.4883354468994617e-06, "loss": 0.1027, "step": 48838 }, { "epoch": 0.8710983483751293, "grad_norm": 0.21870316565036774, "learning_rate": 2.4876585261490173e-06, "loss": 0.1483, "step": 48839 }, { "epoch": 0.871116184496843, "grad_norm": 0.30879902839660645, "learning_rate": 2.486981692664178e-06, "loss": 0.1302, "step": 48840 }, { "epoch": 0.8711340206185567, "grad_norm": 0.236866757273674, "learning_rate": 2.486304946447579e-06, "loss": 0.125, "step": 48841 }, { "epoch": 0.8711518567402704, "grad_norm": 0.2195359319448471, "learning_rate": 2.4856282875018316e-06, "loss": 0.1079, "step": 48842 }, { "epoch": 0.8711696928619841, "grad_norm": 0.2912219166755676, "learning_rate": 2.48495171582957e-06, "loss": 0.0924, "step": 48843 }, { "epoch": 0.8711875289836978, "grad_norm": 0.24497102200984955, "learning_rate": 2.4842752314334138e-06, "loss": 0.112, "step": 48844 }, { "epoch": 0.8712053651054115, "grad_norm": 0.2763284146785736, "learning_rate": 2.483598834315981e-06, "loss": 0.0926, "step": 48845 }, { "epoch": 0.8712232012271252, "grad_norm": 0.24086697399616241, "learning_rate": 2.482922524479894e-06, "loss": 0.1008, "step": 48846 }, { "epoch": 0.8712410373488388, "grad_norm": 0.4496169686317444, "learning_rate": 2.482246301927779e-06, "loss": 0.1778, "step": 48847 }, { "epoch": 0.8712588734705525, "grad_norm": 0.18537591397762299, "learning_rate": 2.4815701666622558e-06, "loss": 0.0662, "step": 48848 }, { "epoch": 0.8712767095922662, "grad_norm": 0.3441908359527588, "learning_rate": 2.4808941186859445e-06, "loss": 0.1191, "step": 48849 }, { "epoch": 0.8712945457139799, "grad_norm": 0.2697514295578003, "learning_rate": 2.48021815800146e-06, "loss": 0.0942, "step": 48850 }, { "epoch": 0.8713123818356936, "grad_norm": 0.2659436762332916, "learning_rate": 2.479542284611433e-06, "loss": 0.0829, "step": 48851 }, { "epoch": 0.8713302179574074, "grad_norm": 0.2795272171497345, "learning_rate": 2.4788664985184785e-06, "loss": 0.0847, "step": 48852 }, { "epoch": 0.8713480540791211, "grad_norm": 0.2340693175792694, "learning_rate": 2.4781907997252135e-06, "loss": 0.0918, "step": 48853 }, { "epoch": 0.8713658902008348, "grad_norm": 0.3255627155303955, "learning_rate": 2.4775151882342614e-06, "loss": 0.162, "step": 48854 }, { "epoch": 0.8713837263225485, "grad_norm": 0.27811694145202637, "learning_rate": 2.476839664048236e-06, "loss": 0.1014, "step": 48855 }, { "epoch": 0.8714015624442621, "grad_norm": 0.28932762145996094, "learning_rate": 2.4761642271697637e-06, "loss": 0.1376, "step": 48856 }, { "epoch": 0.8714193985659758, "grad_norm": 0.3712722063064575, "learning_rate": 2.475488877601456e-06, "loss": 0.148, "step": 48857 }, { "epoch": 0.8714372346876895, "grad_norm": 0.3367063105106354, "learning_rate": 2.4748136153459363e-06, "loss": 0.1513, "step": 48858 }, { "epoch": 0.8714550708094032, "grad_norm": 0.29865390062332153, "learning_rate": 2.4741384404058125e-06, "loss": 0.1395, "step": 48859 }, { "epoch": 0.8714729069311169, "grad_norm": 0.30798327922821045, "learning_rate": 2.473463352783714e-06, "loss": 0.1546, "step": 48860 }, { "epoch": 0.8714907430528306, "grad_norm": 0.18185406923294067, "learning_rate": 2.4727883524822527e-06, "loss": 0.0424, "step": 48861 }, { "epoch": 0.8715085791745443, "grad_norm": 0.20749178528785706, "learning_rate": 2.4721134395040453e-06, "loss": 0.1407, "step": 48862 }, { "epoch": 0.871526415296258, "grad_norm": 0.27528512477874756, "learning_rate": 2.4714386138516984e-06, "loss": 0.1137, "step": 48863 }, { "epoch": 0.8715442514179716, "grad_norm": 0.37108317017555237, "learning_rate": 2.470763875527843e-06, "loss": 0.1868, "step": 48864 }, { "epoch": 0.8715620875396853, "grad_norm": 0.35055819153785706, "learning_rate": 2.4700892245350908e-06, "loss": 0.1351, "step": 48865 }, { "epoch": 0.871579923661399, "grad_norm": 0.24428337812423706, "learning_rate": 2.4694146608760517e-06, "loss": 0.1231, "step": 48866 }, { "epoch": 0.8715977597831127, "grad_norm": 0.26510217785835266, "learning_rate": 2.4687401845533394e-06, "loss": 0.1212, "step": 48867 }, { "epoch": 0.8716155959048264, "grad_norm": 0.2758074402809143, "learning_rate": 2.468065795569574e-06, "loss": 0.1674, "step": 48868 }, { "epoch": 0.8716334320265402, "grad_norm": 0.34988608956336975, "learning_rate": 2.467391493927368e-06, "loss": 0.1569, "step": 48869 }, { "epoch": 0.8716512681482539, "grad_norm": 0.2557702362537384, "learning_rate": 2.4667172796293355e-06, "loss": 0.0627, "step": 48870 }, { "epoch": 0.8716691042699676, "grad_norm": 0.18752634525299072, "learning_rate": 2.466043152678091e-06, "loss": 0.0924, "step": 48871 }, { "epoch": 0.8716869403916813, "grad_norm": 0.3367287814617157, "learning_rate": 2.4653691130762434e-06, "loss": 0.1704, "step": 48872 }, { "epoch": 0.871704776513395, "grad_norm": 0.22540201246738434, "learning_rate": 2.4646951608264108e-06, "loss": 0.0865, "step": 48873 }, { "epoch": 0.8717226126351086, "grad_norm": 0.2627692222595215, "learning_rate": 2.464021295931204e-06, "loss": 0.1153, "step": 48874 }, { "epoch": 0.8717404487568223, "grad_norm": 0.3276059329509735, "learning_rate": 2.463347518393236e-06, "loss": 0.1203, "step": 48875 }, { "epoch": 0.871758284878536, "grad_norm": 0.3505856394767761, "learning_rate": 2.462673828215109e-06, "loss": 0.1014, "step": 48876 }, { "epoch": 0.8717761210002497, "grad_norm": 0.2076411098241806, "learning_rate": 2.4620002253994495e-06, "loss": 0.0968, "step": 48877 }, { "epoch": 0.8717939571219634, "grad_norm": 0.29111114144325256, "learning_rate": 2.4613267099488606e-06, "loss": 0.1243, "step": 48878 }, { "epoch": 0.8718117932436771, "grad_norm": 0.2613781690597534, "learning_rate": 2.4606532818659546e-06, "loss": 0.1579, "step": 48879 }, { "epoch": 0.8718296293653908, "grad_norm": 0.24378587305545807, "learning_rate": 2.4599799411533343e-06, "loss": 0.103, "step": 48880 }, { "epoch": 0.8718474654871045, "grad_norm": 0.22437183558940887, "learning_rate": 2.4593066878136227e-06, "loss": 0.1208, "step": 48881 }, { "epoch": 0.8718653016088181, "grad_norm": 0.28232720494270325, "learning_rate": 2.458633521849421e-06, "loss": 0.1582, "step": 48882 }, { "epoch": 0.8718831377305318, "grad_norm": 0.2162172496318817, "learning_rate": 2.4579604432633436e-06, "loss": 0.095, "step": 48883 }, { "epoch": 0.8719009738522455, "grad_norm": 0.3094240128993988, "learning_rate": 2.4572874520579993e-06, "loss": 0.1088, "step": 48884 }, { "epoch": 0.8719188099739592, "grad_norm": 0.4338151812553406, "learning_rate": 2.4566145482359886e-06, "loss": 0.1051, "step": 48885 }, { "epoch": 0.871936646095673, "grad_norm": 0.2873224914073944, "learning_rate": 2.4559417317999323e-06, "loss": 0.118, "step": 48886 }, { "epoch": 0.8719544822173867, "grad_norm": 0.23062574863433838, "learning_rate": 2.4552690027524304e-06, "loss": 0.1386, "step": 48887 }, { "epoch": 0.8719723183391004, "grad_norm": 0.2627892792224884, "learning_rate": 2.4545963610960925e-06, "loss": 0.0981, "step": 48888 }, { "epoch": 0.8719901544608141, "grad_norm": 0.24387618899345398, "learning_rate": 2.453923806833522e-06, "loss": 0.0828, "step": 48889 }, { "epoch": 0.8720079905825278, "grad_norm": 0.4822535812854767, "learning_rate": 2.453251339967336e-06, "loss": 0.0914, "step": 48890 }, { "epoch": 0.8720258267042414, "grad_norm": 0.28246212005615234, "learning_rate": 2.4525789605001348e-06, "loss": 0.088, "step": 48891 }, { "epoch": 0.8720436628259551, "grad_norm": 0.27207309007644653, "learning_rate": 2.4519066684345256e-06, "loss": 0.1223, "step": 48892 }, { "epoch": 0.8720614989476688, "grad_norm": 0.21045000851154327, "learning_rate": 2.4512344637731057e-06, "loss": 0.1302, "step": 48893 }, { "epoch": 0.8720793350693825, "grad_norm": 0.2550332844257355, "learning_rate": 2.450562346518498e-06, "loss": 0.0927, "step": 48894 }, { "epoch": 0.8720971711910962, "grad_norm": 0.27137085795402527, "learning_rate": 2.4498903166732977e-06, "loss": 0.0843, "step": 48895 }, { "epoch": 0.8721150073128099, "grad_norm": 0.31101417541503906, "learning_rate": 2.449218374240106e-06, "loss": 0.1185, "step": 48896 }, { "epoch": 0.8721328434345236, "grad_norm": 0.2749179005622864, "learning_rate": 2.4485465192215366e-06, "loss": 0.0771, "step": 48897 }, { "epoch": 0.8721506795562373, "grad_norm": 0.29282280802726746, "learning_rate": 2.4478747516201856e-06, "loss": 0.08, "step": 48898 }, { "epoch": 0.872168515677951, "grad_norm": 0.26182353496551514, "learning_rate": 2.447203071438667e-06, "loss": 0.0967, "step": 48899 }, { "epoch": 0.8721863517996646, "grad_norm": 0.27749302983283997, "learning_rate": 2.4465314786795786e-06, "loss": 0.1269, "step": 48900 }, { "epoch": 0.8722041879213783, "grad_norm": 0.26597028970718384, "learning_rate": 2.445859973345524e-06, "loss": 0.0693, "step": 48901 }, { "epoch": 0.872222024043092, "grad_norm": 0.3577239513397217, "learning_rate": 2.4451885554391014e-06, "loss": 0.1586, "step": 48902 }, { "epoch": 0.8722398601648058, "grad_norm": 0.25598835945129395, "learning_rate": 2.444517224962922e-06, "loss": 0.1135, "step": 48903 }, { "epoch": 0.8722576962865195, "grad_norm": 0.2673466205596924, "learning_rate": 2.4438459819195847e-06, "loss": 0.0779, "step": 48904 }, { "epoch": 0.8722755324082332, "grad_norm": 0.3307526707649231, "learning_rate": 2.443174826311689e-06, "loss": 0.1623, "step": 48905 }, { "epoch": 0.8722933685299469, "grad_norm": 0.2783094048500061, "learning_rate": 2.442503758141837e-06, "loss": 0.0935, "step": 48906 }, { "epoch": 0.8723112046516606, "grad_norm": 0.31175684928894043, "learning_rate": 2.4418327774126336e-06, "loss": 0.1338, "step": 48907 }, { "epoch": 0.8723290407733743, "grad_norm": 0.19709528982639313, "learning_rate": 2.4411618841266804e-06, "loss": 0.0392, "step": 48908 }, { "epoch": 0.8723468768950879, "grad_norm": 0.2973250448703766, "learning_rate": 2.4404910782865692e-06, "loss": 0.1405, "step": 48909 }, { "epoch": 0.8723647130168016, "grad_norm": 0.27532443404197693, "learning_rate": 2.439820359894912e-06, "loss": 0.1161, "step": 48910 }, { "epoch": 0.8723825491385153, "grad_norm": 0.281662255525589, "learning_rate": 2.4391497289542957e-06, "loss": 0.1078, "step": 48911 }, { "epoch": 0.872400385260229, "grad_norm": 0.3302733600139618, "learning_rate": 2.438479185467335e-06, "loss": 0.1324, "step": 48912 }, { "epoch": 0.8724182213819427, "grad_norm": 0.26644861698150635, "learning_rate": 2.437808729436622e-06, "loss": 0.1334, "step": 48913 }, { "epoch": 0.8724360575036564, "grad_norm": 0.2155432552099228, "learning_rate": 2.4371383608647518e-06, "loss": 0.0912, "step": 48914 }, { "epoch": 0.8724538936253701, "grad_norm": 0.357900470495224, "learning_rate": 2.4364680797543225e-06, "loss": 0.1436, "step": 48915 }, { "epoch": 0.8724717297470838, "grad_norm": 0.24283993244171143, "learning_rate": 2.4357978861079435e-06, "loss": 0.0873, "step": 48916 }, { "epoch": 0.8724895658687974, "grad_norm": 0.2726002335548401, "learning_rate": 2.4351277799282035e-06, "loss": 0.1176, "step": 48917 }, { "epoch": 0.8725074019905111, "grad_norm": 0.27351316809654236, "learning_rate": 2.434457761217701e-06, "loss": 0.1011, "step": 48918 }, { "epoch": 0.8725252381122249, "grad_norm": 0.4146108627319336, "learning_rate": 2.4337878299790313e-06, "loss": 0.1302, "step": 48919 }, { "epoch": 0.8725430742339386, "grad_norm": 0.376118004322052, "learning_rate": 2.4331179862147973e-06, "loss": 0.117, "step": 48920 }, { "epoch": 0.8725609103556523, "grad_norm": 0.280676931142807, "learning_rate": 2.432448229927592e-06, "loss": 0.0795, "step": 48921 }, { "epoch": 0.872578746477366, "grad_norm": 0.25002437829971313, "learning_rate": 2.431778561120013e-06, "loss": 0.0938, "step": 48922 }, { "epoch": 0.8725965825990797, "grad_norm": 0.2694808840751648, "learning_rate": 2.4311089797946494e-06, "loss": 0.1192, "step": 48923 }, { "epoch": 0.8726144187207934, "grad_norm": 0.3451583683490753, "learning_rate": 2.430439485954103e-06, "loss": 0.101, "step": 48924 }, { "epoch": 0.8726322548425071, "grad_norm": 0.2184550166130066, "learning_rate": 2.4297700796009737e-06, "loss": 0.0981, "step": 48925 }, { "epoch": 0.8726500909642207, "grad_norm": 0.30546683073043823, "learning_rate": 2.429100760737851e-06, "loss": 0.1447, "step": 48926 }, { "epoch": 0.8726679270859344, "grad_norm": 0.4720088243484497, "learning_rate": 2.4284315293673284e-06, "loss": 0.1132, "step": 48927 }, { "epoch": 0.8726857632076481, "grad_norm": 0.24959751963615417, "learning_rate": 2.4277623854919967e-06, "loss": 0.0965, "step": 48928 }, { "epoch": 0.8727035993293618, "grad_norm": 0.30644840002059937, "learning_rate": 2.4270933291144603e-06, "loss": 0.1163, "step": 48929 }, { "epoch": 0.8727214354510755, "grad_norm": 0.26736292243003845, "learning_rate": 2.426424360237306e-06, "loss": 0.1178, "step": 48930 }, { "epoch": 0.8727392715727892, "grad_norm": 0.1963820904493332, "learning_rate": 2.425755478863129e-06, "loss": 0.1354, "step": 48931 }, { "epoch": 0.8727571076945029, "grad_norm": 0.23830799758434296, "learning_rate": 2.4250866849945127e-06, "loss": 0.0868, "step": 48932 }, { "epoch": 0.8727749438162166, "grad_norm": 0.23102404177188873, "learning_rate": 2.4244179786340637e-06, "loss": 0.0935, "step": 48933 }, { "epoch": 0.8727927799379303, "grad_norm": 0.2513000965118408, "learning_rate": 2.4237493597843692e-06, "loss": 0.1133, "step": 48934 }, { "epoch": 0.8728106160596439, "grad_norm": 0.23170988261699677, "learning_rate": 2.4230808284480182e-06, "loss": 0.1203, "step": 48935 }, { "epoch": 0.8728284521813577, "grad_norm": 0.32318252325057983, "learning_rate": 2.422412384627598e-06, "loss": 0.1322, "step": 48936 }, { "epoch": 0.8728462883030714, "grad_norm": 0.2838955223560333, "learning_rate": 2.421744028325712e-06, "loss": 0.0757, "step": 48937 }, { "epoch": 0.8728641244247851, "grad_norm": 0.2863087058067322, "learning_rate": 2.4210757595449385e-06, "loss": 0.0912, "step": 48938 }, { "epoch": 0.8728819605464988, "grad_norm": 0.27006933093070984, "learning_rate": 2.420407578287881e-06, "loss": 0.1109, "step": 48939 }, { "epoch": 0.8728997966682125, "grad_norm": 0.2834243178367615, "learning_rate": 2.4197394845571206e-06, "loss": 0.1692, "step": 48940 }, { "epoch": 0.8729176327899262, "grad_norm": 0.358725905418396, "learning_rate": 2.4190714783552447e-06, "loss": 0.101, "step": 48941 }, { "epoch": 0.8729354689116399, "grad_norm": 0.21277566254138947, "learning_rate": 2.418403559684851e-06, "loss": 0.0625, "step": 48942 }, { "epoch": 0.8729533050333536, "grad_norm": 0.25441327691078186, "learning_rate": 2.417735728548523e-06, "loss": 0.1167, "step": 48943 }, { "epoch": 0.8729711411550672, "grad_norm": 0.3345637023448944, "learning_rate": 2.4170679849488535e-06, "loss": 0.1525, "step": 48944 }, { "epoch": 0.8729889772767809, "grad_norm": 0.2666950523853302, "learning_rate": 2.4164003288884243e-06, "loss": 0.0924, "step": 48945 }, { "epoch": 0.8730068133984946, "grad_norm": 0.25407615303993225, "learning_rate": 2.4157327603698292e-06, "loss": 0.1002, "step": 48946 }, { "epoch": 0.8730246495202083, "grad_norm": 0.3852671980857849, "learning_rate": 2.415065279395656e-06, "loss": 0.1719, "step": 48947 }, { "epoch": 0.873042485641922, "grad_norm": 0.19352583587169647, "learning_rate": 2.414397885968492e-06, "loss": 0.0433, "step": 48948 }, { "epoch": 0.8730603217636357, "grad_norm": 0.28823792934417725, "learning_rate": 2.41373058009092e-06, "loss": 0.0987, "step": 48949 }, { "epoch": 0.8730781578853494, "grad_norm": 0.3840271532535553, "learning_rate": 2.4130633617655274e-06, "loss": 0.1327, "step": 48950 }, { "epoch": 0.8730959940070631, "grad_norm": 0.22522905468940735, "learning_rate": 2.4123962309949012e-06, "loss": 0.1194, "step": 48951 }, { "epoch": 0.8731138301287767, "grad_norm": 0.24343860149383545, "learning_rate": 2.411729187781631e-06, "loss": 0.0979, "step": 48952 }, { "epoch": 0.8731316662504905, "grad_norm": 0.2513010501861572, "learning_rate": 2.411062232128303e-06, "loss": 0.1642, "step": 48953 }, { "epoch": 0.8731495023722042, "grad_norm": 0.2829985022544861, "learning_rate": 2.410395364037493e-06, "loss": 0.1529, "step": 48954 }, { "epoch": 0.8731673384939179, "grad_norm": 0.32416999340057373, "learning_rate": 2.4097285835117998e-06, "loss": 0.0727, "step": 48955 }, { "epoch": 0.8731851746156316, "grad_norm": 0.31245869398117065, "learning_rate": 2.4090618905537986e-06, "loss": 0.1162, "step": 48956 }, { "epoch": 0.8732030107373453, "grad_norm": 0.2820016145706177, "learning_rate": 2.408395285166079e-06, "loss": 0.0776, "step": 48957 }, { "epoch": 0.873220846859059, "grad_norm": 0.22606006264686584, "learning_rate": 2.407728767351217e-06, "loss": 0.0756, "step": 48958 }, { "epoch": 0.8732386829807727, "grad_norm": 0.2523399591445923, "learning_rate": 2.407062337111804e-06, "loss": 0.0923, "step": 48959 }, { "epoch": 0.8732565191024864, "grad_norm": 0.1685180813074112, "learning_rate": 2.406395994450422e-06, "loss": 0.0585, "step": 48960 }, { "epoch": 0.8732743552242, "grad_norm": 0.22345389425754547, "learning_rate": 2.4057297393696525e-06, "loss": 0.0706, "step": 48961 }, { "epoch": 0.8732921913459137, "grad_norm": 0.2905835509300232, "learning_rate": 2.4050635718720794e-06, "loss": 0.0636, "step": 48962 }, { "epoch": 0.8733100274676274, "grad_norm": 0.33429697155952454, "learning_rate": 2.404397491960278e-06, "loss": 0.1072, "step": 48963 }, { "epoch": 0.8733278635893411, "grad_norm": 0.32072851061820984, "learning_rate": 2.403731499636841e-06, "loss": 0.1541, "step": 48964 }, { "epoch": 0.8733456997110548, "grad_norm": 0.245486319065094, "learning_rate": 2.4030655949043413e-06, "loss": 0.0792, "step": 48965 }, { "epoch": 0.8733635358327685, "grad_norm": 0.2666672170162201, "learning_rate": 2.4023997777653654e-06, "loss": 0.0818, "step": 48966 }, { "epoch": 0.8733813719544822, "grad_norm": 0.23721128702163696, "learning_rate": 2.4017340482224928e-06, "loss": 0.0862, "step": 48967 }, { "epoch": 0.8733992080761959, "grad_norm": 0.2651722729206085, "learning_rate": 2.4010684062783064e-06, "loss": 0.0805, "step": 48968 }, { "epoch": 0.8734170441979096, "grad_norm": 0.2025548219680786, "learning_rate": 2.400402851935385e-06, "loss": 0.0994, "step": 48969 }, { "epoch": 0.8734348803196234, "grad_norm": 0.2940991222858429, "learning_rate": 2.399737385196307e-06, "loss": 0.2086, "step": 48970 }, { "epoch": 0.873452716441337, "grad_norm": 0.30726146697998047, "learning_rate": 2.399072006063649e-06, "loss": 0.0957, "step": 48971 }, { "epoch": 0.8734705525630507, "grad_norm": 0.2609618306159973, "learning_rate": 2.39840671454e-06, "loss": 0.0895, "step": 48972 }, { "epoch": 0.8734883886847644, "grad_norm": 0.2944090664386749, "learning_rate": 2.397741510627929e-06, "loss": 0.1365, "step": 48973 }, { "epoch": 0.8735062248064781, "grad_norm": 0.2327173352241516, "learning_rate": 2.3970763943300223e-06, "loss": 0.0718, "step": 48974 }, { "epoch": 0.8735240609281918, "grad_norm": 0.31209731101989746, "learning_rate": 2.396411365648854e-06, "loss": 0.1147, "step": 48975 }, { "epoch": 0.8735418970499055, "grad_norm": 0.24604645371437073, "learning_rate": 2.3957464245869947e-06, "loss": 0.1014, "step": 48976 }, { "epoch": 0.8735597331716192, "grad_norm": 0.24622175097465515, "learning_rate": 2.395081571147037e-06, "loss": 0.1158, "step": 48977 }, { "epoch": 0.8735775692933329, "grad_norm": 0.2905532121658325, "learning_rate": 2.3944168053315454e-06, "loss": 0.0596, "step": 48978 }, { "epoch": 0.8735954054150465, "grad_norm": 0.3453649878501892, "learning_rate": 2.393752127143106e-06, "loss": 0.085, "step": 48979 }, { "epoch": 0.8736132415367602, "grad_norm": 0.25328147411346436, "learning_rate": 2.3930875365842877e-06, "loss": 0.1388, "step": 48980 }, { "epoch": 0.8736310776584739, "grad_norm": 0.22761796414852142, "learning_rate": 2.392423033657673e-06, "loss": 0.1264, "step": 48981 }, { "epoch": 0.8736489137801876, "grad_norm": 0.20159262418746948, "learning_rate": 2.391758618365836e-06, "loss": 0.0956, "step": 48982 }, { "epoch": 0.8736667499019013, "grad_norm": 0.29003745317459106, "learning_rate": 2.3910942907113515e-06, "loss": 0.1828, "step": 48983 }, { "epoch": 0.873684586023615, "grad_norm": 0.24854856729507446, "learning_rate": 2.3904300506967904e-06, "loss": 0.0682, "step": 48984 }, { "epoch": 0.8737024221453287, "grad_norm": 0.30418720841407776, "learning_rate": 2.3897658983247335e-06, "loss": 0.2264, "step": 48985 }, { "epoch": 0.8737202582670424, "grad_norm": 0.27203935384750366, "learning_rate": 2.3891018335977566e-06, "loss": 0.1226, "step": 48986 }, { "epoch": 0.8737380943887562, "grad_norm": 0.2965022027492523, "learning_rate": 2.3884378565184273e-06, "loss": 0.0947, "step": 48987 }, { "epoch": 0.8737559305104698, "grad_norm": 0.30365481972694397, "learning_rate": 2.387773967089324e-06, "loss": 0.1401, "step": 48988 }, { "epoch": 0.8737737666321835, "grad_norm": 0.255367636680603, "learning_rate": 2.3871101653130167e-06, "loss": 0.1138, "step": 48989 }, { "epoch": 0.8737916027538972, "grad_norm": 0.2791960835456848, "learning_rate": 2.3864464511920814e-06, "loss": 0.1193, "step": 48990 }, { "epoch": 0.8738094388756109, "grad_norm": 0.25066882371902466, "learning_rate": 2.385782824729094e-06, "loss": 0.1053, "step": 48991 }, { "epoch": 0.8738272749973246, "grad_norm": 0.2879200279712677, "learning_rate": 2.3851192859266153e-06, "loss": 0.0994, "step": 48992 }, { "epoch": 0.8738451111190383, "grad_norm": 0.2680374085903168, "learning_rate": 2.38445583478723e-06, "loss": 0.1341, "step": 48993 }, { "epoch": 0.873862947240752, "grad_norm": 0.26426082849502563, "learning_rate": 2.3837924713135007e-06, "loss": 0.1275, "step": 48994 }, { "epoch": 0.8738807833624657, "grad_norm": 0.29085299372673035, "learning_rate": 2.3831291955080104e-06, "loss": 0.1682, "step": 48995 }, { "epoch": 0.8738986194841794, "grad_norm": 0.2757631242275238, "learning_rate": 2.3824660073733214e-06, "loss": 0.0881, "step": 48996 }, { "epoch": 0.873916455605893, "grad_norm": 0.3565802574157715, "learning_rate": 2.3818029069120008e-06, "loss": 0.1095, "step": 48997 }, { "epoch": 0.8739342917276067, "grad_norm": 0.4203716516494751, "learning_rate": 2.3811398941266272e-06, "loss": 0.1211, "step": 48998 }, { "epoch": 0.8739521278493204, "grad_norm": 0.29902541637420654, "learning_rate": 2.3804769690197707e-06, "loss": 0.1126, "step": 48999 }, { "epoch": 0.8739699639710341, "grad_norm": 0.2832728624343872, "learning_rate": 2.3798141315939964e-06, "loss": 0.1263, "step": 49000 }, { "epoch": 0.8739699639710341, "eval_loss": 0.10793811827898026, "eval_runtime": 107.3556, "eval_samples_per_second": 9.538, "eval_steps_per_second": 1.593, "step": 49000 }, { "epoch": 0.8739878000927478, "grad_norm": 0.34250015020370483, "learning_rate": 2.3791513818518714e-06, "loss": 0.1478, "step": 49001 }, { "epoch": 0.8740056362144615, "grad_norm": 0.3086620569229126, "learning_rate": 2.378488719795971e-06, "loss": 0.0904, "step": 49002 }, { "epoch": 0.8740234723361752, "grad_norm": 0.3199694752693176, "learning_rate": 2.377826145428863e-06, "loss": 0.1209, "step": 49003 }, { "epoch": 0.874041308457889, "grad_norm": 0.32794883847236633, "learning_rate": 2.3771636587531147e-06, "loss": 0.1098, "step": 49004 }, { "epoch": 0.8740591445796027, "grad_norm": 0.21602557599544525, "learning_rate": 2.376501259771294e-06, "loss": 0.1042, "step": 49005 }, { "epoch": 0.8740769807013163, "grad_norm": 0.19758708775043488, "learning_rate": 2.3758389484859644e-06, "loss": 0.1144, "step": 49006 }, { "epoch": 0.87409481682303, "grad_norm": 0.26631733775138855, "learning_rate": 2.3751767248996972e-06, "loss": 0.0913, "step": 49007 }, { "epoch": 0.8741126529447437, "grad_norm": 0.30449196696281433, "learning_rate": 2.3745145890150617e-06, "loss": 0.1081, "step": 49008 }, { "epoch": 0.8741304890664574, "grad_norm": 0.2515089809894562, "learning_rate": 2.373852540834623e-06, "loss": 0.0965, "step": 49009 }, { "epoch": 0.8741483251881711, "grad_norm": 0.2104242742061615, "learning_rate": 2.373190580360943e-06, "loss": 0.0941, "step": 49010 }, { "epoch": 0.8741661613098848, "grad_norm": 0.29072099924087524, "learning_rate": 2.372528707596594e-06, "loss": 0.0954, "step": 49011 }, { "epoch": 0.8741839974315985, "grad_norm": 0.2656518220901489, "learning_rate": 2.3718669225441414e-06, "loss": 0.1452, "step": 49012 }, { "epoch": 0.8742018335533122, "grad_norm": 0.30386754870414734, "learning_rate": 2.3712052252061467e-06, "loss": 0.117, "step": 49013 }, { "epoch": 0.8742196696750258, "grad_norm": 0.46391886472702026, "learning_rate": 2.3705436155851748e-06, "loss": 0.1065, "step": 49014 }, { "epoch": 0.8742375057967395, "grad_norm": 0.23648697137832642, "learning_rate": 2.3698820936837925e-06, "loss": 0.0706, "step": 49015 }, { "epoch": 0.8742553419184532, "grad_norm": 0.25519511103630066, "learning_rate": 2.3692206595045646e-06, "loss": 0.0876, "step": 49016 }, { "epoch": 0.8742731780401669, "grad_norm": 0.228533074259758, "learning_rate": 2.3685593130500535e-06, "loss": 0.123, "step": 49017 }, { "epoch": 0.8742910141618806, "grad_norm": 0.19661208987236023, "learning_rate": 2.367898054322823e-06, "loss": 0.0872, "step": 49018 }, { "epoch": 0.8743088502835943, "grad_norm": 0.2891220450401306, "learning_rate": 2.3672368833254326e-06, "loss": 0.1572, "step": 49019 }, { "epoch": 0.8743266864053081, "grad_norm": 0.35472527146339417, "learning_rate": 2.3665758000604555e-06, "loss": 0.1262, "step": 49020 }, { "epoch": 0.8743445225270218, "grad_norm": 0.3111489713191986, "learning_rate": 2.365914804530442e-06, "loss": 0.1517, "step": 49021 }, { "epoch": 0.8743623586487355, "grad_norm": 0.28271231055259705, "learning_rate": 2.3652538967379623e-06, "loss": 0.0666, "step": 49022 }, { "epoch": 0.8743801947704491, "grad_norm": 0.34630849957466125, "learning_rate": 2.3645930766855754e-06, "loss": 0.1397, "step": 49023 }, { "epoch": 0.8743980308921628, "grad_norm": 0.29569879174232483, "learning_rate": 2.3639323443758467e-06, "loss": 0.0947, "step": 49024 }, { "epoch": 0.8744158670138765, "grad_norm": 0.24688155949115753, "learning_rate": 2.363271699811334e-06, "loss": 0.1362, "step": 49025 }, { "epoch": 0.8744337031355902, "grad_norm": 0.26550984382629395, "learning_rate": 2.3626111429946e-06, "loss": 0.0869, "step": 49026 }, { "epoch": 0.8744515392573039, "grad_norm": 0.329212486743927, "learning_rate": 2.3619506739281983e-06, "loss": 0.1571, "step": 49027 }, { "epoch": 0.8744693753790176, "grad_norm": 0.29569125175476074, "learning_rate": 2.3612902926146987e-06, "loss": 0.1208, "step": 49028 }, { "epoch": 0.8744872115007313, "grad_norm": 0.2973051965236664, "learning_rate": 2.360629999056657e-06, "loss": 0.1681, "step": 49029 }, { "epoch": 0.874505047622445, "grad_norm": 0.3216267228126526, "learning_rate": 2.3599697932566335e-06, "loss": 0.1139, "step": 49030 }, { "epoch": 0.8745228837441587, "grad_norm": 0.2688002586364746, "learning_rate": 2.359309675217189e-06, "loss": 0.0648, "step": 49031 }, { "epoch": 0.8745407198658723, "grad_norm": 0.25336775183677673, "learning_rate": 2.3586496449408718e-06, "loss": 0.1014, "step": 49032 }, { "epoch": 0.874558555987586, "grad_norm": 0.23914194107055664, "learning_rate": 2.357989702430255e-06, "loss": 0.0985, "step": 49033 }, { "epoch": 0.8745763921092997, "grad_norm": 0.2207818478345871, "learning_rate": 2.3573298476878863e-06, "loss": 0.0791, "step": 49034 }, { "epoch": 0.8745942282310134, "grad_norm": 0.23462559282779694, "learning_rate": 2.3566700807163304e-06, "loss": 0.0742, "step": 49035 }, { "epoch": 0.8746120643527271, "grad_norm": 0.3224104940891266, "learning_rate": 2.3560104015181384e-06, "loss": 0.113, "step": 49036 }, { "epoch": 0.8746299004744409, "grad_norm": 0.2706161439418793, "learning_rate": 2.355350810095877e-06, "loss": 0.107, "step": 49037 }, { "epoch": 0.8746477365961546, "grad_norm": 0.29104551672935486, "learning_rate": 2.3546913064520946e-06, "loss": 0.1048, "step": 49038 }, { "epoch": 0.8746655727178683, "grad_norm": 0.30357736349105835, "learning_rate": 2.3540318905893533e-06, "loss": 0.1239, "step": 49039 }, { "epoch": 0.874683408839582, "grad_norm": 0.20416851341724396, "learning_rate": 2.3533725625101976e-06, "loss": 0.0879, "step": 49040 }, { "epoch": 0.8747012449612956, "grad_norm": 0.21284979581832886, "learning_rate": 2.352713322217201e-06, "loss": 0.0856, "step": 49041 }, { "epoch": 0.8747190810830093, "grad_norm": 0.26216670870780945, "learning_rate": 2.3520541697129057e-06, "loss": 0.0979, "step": 49042 }, { "epoch": 0.874736917204723, "grad_norm": 0.27146539092063904, "learning_rate": 2.3513951049998733e-06, "loss": 0.0885, "step": 49043 }, { "epoch": 0.8747547533264367, "grad_norm": 0.35666701197624207, "learning_rate": 2.350736128080655e-06, "loss": 0.1283, "step": 49044 }, { "epoch": 0.8747725894481504, "grad_norm": 0.31937843561172485, "learning_rate": 2.350077238957801e-06, "loss": 0.1195, "step": 49045 }, { "epoch": 0.8747904255698641, "grad_norm": 0.3077686131000519, "learning_rate": 2.349418437633874e-06, "loss": 0.1224, "step": 49046 }, { "epoch": 0.8748082616915778, "grad_norm": 0.32812246680259705, "learning_rate": 2.3487597241114266e-06, "loss": 0.0883, "step": 49047 }, { "epoch": 0.8748260978132915, "grad_norm": 0.24321341514587402, "learning_rate": 2.3481010983930046e-06, "loss": 0.1547, "step": 49048 }, { "epoch": 0.8748439339350051, "grad_norm": 0.3040071725845337, "learning_rate": 2.3474425604811723e-06, "loss": 0.088, "step": 49049 }, { "epoch": 0.8748617700567188, "grad_norm": 0.21051782369613647, "learning_rate": 2.346784110378472e-06, "loss": 0.0804, "step": 49050 }, { "epoch": 0.8748796061784325, "grad_norm": 0.3067069947719574, "learning_rate": 2.346125748087463e-06, "loss": 0.1624, "step": 49051 }, { "epoch": 0.8748974423001462, "grad_norm": 0.3249727785587311, "learning_rate": 2.3454674736106962e-06, "loss": 0.0839, "step": 49052 }, { "epoch": 0.8749152784218599, "grad_norm": 0.2631492614746094, "learning_rate": 2.3448092869507195e-06, "loss": 0.0942, "step": 49053 }, { "epoch": 0.8749331145435737, "grad_norm": 0.20457443594932556, "learning_rate": 2.3441511881100888e-06, "loss": 0.1043, "step": 49054 }, { "epoch": 0.8749509506652874, "grad_norm": 0.2152179330587387, "learning_rate": 2.343493177091355e-06, "loss": 0.1316, "step": 49055 }, { "epoch": 0.8749687867870011, "grad_norm": 0.2957698106765747, "learning_rate": 2.342835253897063e-06, "loss": 0.1275, "step": 49056 }, { "epoch": 0.8749866229087148, "grad_norm": 0.2898455858230591, "learning_rate": 2.3421774185297728e-06, "loss": 0.1532, "step": 49057 }, { "epoch": 0.8750044590304284, "grad_norm": 0.25608012080192566, "learning_rate": 2.3415196709920208e-06, "loss": 0.1139, "step": 49058 }, { "epoch": 0.8750222951521421, "grad_norm": 0.43150776624679565, "learning_rate": 2.340862011286368e-06, "loss": 0.1279, "step": 49059 }, { "epoch": 0.8750401312738558, "grad_norm": 0.188703715801239, "learning_rate": 2.340204439415364e-06, "loss": 0.0693, "step": 49060 }, { "epoch": 0.8750579673955695, "grad_norm": 0.22968249022960663, "learning_rate": 2.339546955381547e-06, "loss": 0.0775, "step": 49061 }, { "epoch": 0.8750758035172832, "grad_norm": 0.36129093170166016, "learning_rate": 2.3388895591874764e-06, "loss": 0.0828, "step": 49062 }, { "epoch": 0.8750936396389969, "grad_norm": 0.27361932396888733, "learning_rate": 2.3382322508356922e-06, "loss": 0.1575, "step": 49063 }, { "epoch": 0.8751114757607106, "grad_norm": 0.2412867695093155, "learning_rate": 2.3375750303287535e-06, "loss": 0.0973, "step": 49064 }, { "epoch": 0.8751293118824243, "grad_norm": 0.38039711117744446, "learning_rate": 2.3369178976692022e-06, "loss": 0.121, "step": 49065 }, { "epoch": 0.875147148004138, "grad_norm": 0.3710176348686218, "learning_rate": 2.3362608528595786e-06, "loss": 0.1401, "step": 49066 }, { "epoch": 0.8751649841258516, "grad_norm": 0.25758734345436096, "learning_rate": 2.3356038959024386e-06, "loss": 0.0936, "step": 49067 }, { "epoch": 0.8751828202475653, "grad_norm": 0.23830674588680267, "learning_rate": 2.334947026800327e-06, "loss": 0.088, "step": 49068 }, { "epoch": 0.875200656369279, "grad_norm": 0.3027196228504181, "learning_rate": 2.3342902455557895e-06, "loss": 0.1726, "step": 49069 }, { "epoch": 0.8752184924909927, "grad_norm": 0.26158955693244934, "learning_rate": 2.3336335521713714e-06, "loss": 0.1129, "step": 49070 }, { "epoch": 0.8752363286127065, "grad_norm": 0.2567870020866394, "learning_rate": 2.332976946649615e-06, "loss": 0.1151, "step": 49071 }, { "epoch": 0.8752541647344202, "grad_norm": 0.2848890721797943, "learning_rate": 2.3323204289930734e-06, "loss": 0.1008, "step": 49072 }, { "epoch": 0.8752720008561339, "grad_norm": 0.36752718687057495, "learning_rate": 2.3316639992042836e-06, "loss": 0.1255, "step": 49073 }, { "epoch": 0.8752898369778476, "grad_norm": 0.23002707958221436, "learning_rate": 2.331007657285797e-06, "loss": 0.0957, "step": 49074 }, { "epoch": 0.8753076730995613, "grad_norm": 0.32130834460258484, "learning_rate": 2.3303514032401497e-06, "loss": 0.1199, "step": 49075 }, { "epoch": 0.8753255092212749, "grad_norm": 0.32750561833381653, "learning_rate": 2.329695237069893e-06, "loss": 0.1342, "step": 49076 }, { "epoch": 0.8753433453429886, "grad_norm": 0.21657831966876984, "learning_rate": 2.3290391587775627e-06, "loss": 0.1037, "step": 49077 }, { "epoch": 0.8753611814647023, "grad_norm": 0.19653582572937012, "learning_rate": 2.3283831683657136e-06, "loss": 0.0896, "step": 49078 }, { "epoch": 0.875379017586416, "grad_norm": 0.4856494665145874, "learning_rate": 2.3277272658368765e-06, "loss": 0.1917, "step": 49079 }, { "epoch": 0.8753968537081297, "grad_norm": 0.2232699692249298, "learning_rate": 2.327071451193602e-06, "loss": 0.0754, "step": 49080 }, { "epoch": 0.8754146898298434, "grad_norm": 0.2399214655160904, "learning_rate": 2.326415724438433e-06, "loss": 0.0771, "step": 49081 }, { "epoch": 0.8754325259515571, "grad_norm": 0.31210198998451233, "learning_rate": 2.3257600855739052e-06, "loss": 0.1291, "step": 49082 }, { "epoch": 0.8754503620732708, "grad_norm": 0.3355759382247925, "learning_rate": 2.3251045346025624e-06, "loss": 0.1158, "step": 49083 }, { "epoch": 0.8754681981949844, "grad_norm": 0.18690429627895355, "learning_rate": 2.3244490715269434e-06, "loss": 0.0689, "step": 49084 }, { "epoch": 0.8754860343166981, "grad_norm": 0.3659724295139313, "learning_rate": 2.3237936963495964e-06, "loss": 0.0868, "step": 49085 }, { "epoch": 0.8755038704384118, "grad_norm": 0.2935331165790558, "learning_rate": 2.323138409073056e-06, "loss": 0.141, "step": 49086 }, { "epoch": 0.8755217065601255, "grad_norm": 0.27686309814453125, "learning_rate": 2.3224832096998633e-06, "loss": 0.1436, "step": 49087 }, { "epoch": 0.8755395426818393, "grad_norm": 0.19992637634277344, "learning_rate": 2.3218280982325533e-06, "loss": 0.0871, "step": 49088 }, { "epoch": 0.875557378803553, "grad_norm": 0.2729877233505249, "learning_rate": 2.321173074673677e-06, "loss": 0.1022, "step": 49089 }, { "epoch": 0.8755752149252667, "grad_norm": 0.5481345653533936, "learning_rate": 2.32051813902576e-06, "loss": 0.0951, "step": 49090 }, { "epoch": 0.8755930510469804, "grad_norm": 0.28571000695228577, "learning_rate": 2.3198632912913547e-06, "loss": 0.1532, "step": 49091 }, { "epoch": 0.8756108871686941, "grad_norm": 0.23745673894882202, "learning_rate": 2.3192085314729856e-06, "loss": 0.1044, "step": 49092 }, { "epoch": 0.8756287232904078, "grad_norm": 0.3053630292415619, "learning_rate": 2.3185538595732056e-06, "loss": 0.1161, "step": 49093 }, { "epoch": 0.8756465594121214, "grad_norm": 0.32495880126953125, "learning_rate": 2.317899275594543e-06, "loss": 0.1077, "step": 49094 }, { "epoch": 0.8756643955338351, "grad_norm": 0.20112845301628113, "learning_rate": 2.3172447795395345e-06, "loss": 0.1073, "step": 49095 }, { "epoch": 0.8756822316555488, "grad_norm": 0.27047351002693176, "learning_rate": 2.3165903714107236e-06, "loss": 0.1456, "step": 49096 }, { "epoch": 0.8757000677772625, "grad_norm": 0.3040211796760559, "learning_rate": 2.3159360512106378e-06, "loss": 0.0785, "step": 49097 }, { "epoch": 0.8757179038989762, "grad_norm": 0.21334435045719147, "learning_rate": 2.3152818189418223e-06, "loss": 0.0979, "step": 49098 }, { "epoch": 0.8757357400206899, "grad_norm": 0.29952272772789, "learning_rate": 2.314627674606809e-06, "loss": 0.0598, "step": 49099 }, { "epoch": 0.8757535761424036, "grad_norm": 0.29360049962997437, "learning_rate": 2.3139736182081346e-06, "loss": 0.141, "step": 49100 }, { "epoch": 0.8757714122641173, "grad_norm": 0.2605254352092743, "learning_rate": 2.31331964974833e-06, "loss": 0.0846, "step": 49101 }, { "epoch": 0.8757892483858309, "grad_norm": 0.28678998351097107, "learning_rate": 2.3126657692299382e-06, "loss": 0.1371, "step": 49102 }, { "epoch": 0.8758070845075446, "grad_norm": 0.2714792788028717, "learning_rate": 2.31201197665549e-06, "loss": 0.0675, "step": 49103 }, { "epoch": 0.8758249206292583, "grad_norm": 0.33933860063552856, "learning_rate": 2.311358272027514e-06, "loss": 0.1232, "step": 49104 }, { "epoch": 0.8758427567509721, "grad_norm": 0.2021796852350235, "learning_rate": 2.31070465534855e-06, "loss": 0.0953, "step": 49105 }, { "epoch": 0.8758605928726858, "grad_norm": 0.2256878912448883, "learning_rate": 2.310051126621138e-06, "loss": 0.1043, "step": 49106 }, { "epoch": 0.8758784289943995, "grad_norm": 0.25990548729896545, "learning_rate": 2.3093976858478027e-06, "loss": 0.0932, "step": 49107 }, { "epoch": 0.8758962651161132, "grad_norm": 0.26381561160087585, "learning_rate": 2.308744333031079e-06, "loss": 0.1041, "step": 49108 }, { "epoch": 0.8759141012378269, "grad_norm": 0.2565864622592926, "learning_rate": 2.3080910681735013e-06, "loss": 0.1022, "step": 49109 }, { "epoch": 0.8759319373595406, "grad_norm": 0.27001383900642395, "learning_rate": 2.3074378912775948e-06, "loss": 0.1301, "step": 49110 }, { "epoch": 0.8759497734812542, "grad_norm": 0.2518083453178406, "learning_rate": 2.306784802345899e-06, "loss": 0.1182, "step": 49111 }, { "epoch": 0.8759676096029679, "grad_norm": 0.29305702447891235, "learning_rate": 2.3061318013809453e-06, "loss": 0.0941, "step": 49112 }, { "epoch": 0.8759854457246816, "grad_norm": 0.29134494066238403, "learning_rate": 2.305478888385265e-06, "loss": 0.0533, "step": 49113 }, { "epoch": 0.8760032818463953, "grad_norm": 0.3118290305137634, "learning_rate": 2.3048260633613787e-06, "loss": 0.1092, "step": 49114 }, { "epoch": 0.876021117968109, "grad_norm": 0.25105753540992737, "learning_rate": 2.3041733263118313e-06, "loss": 0.0994, "step": 49115 }, { "epoch": 0.8760389540898227, "grad_norm": 0.23561029136180878, "learning_rate": 2.3035206772391487e-06, "loss": 0.0962, "step": 49116 }, { "epoch": 0.8760567902115364, "grad_norm": 0.2948676645755768, "learning_rate": 2.302868116145851e-06, "loss": 0.1096, "step": 49117 }, { "epoch": 0.8760746263332501, "grad_norm": 0.3280108571052551, "learning_rate": 2.3022156430344834e-06, "loss": 0.1247, "step": 49118 }, { "epoch": 0.8760924624549637, "grad_norm": 0.2578265070915222, "learning_rate": 2.301563257907563e-06, "loss": 0.1007, "step": 49119 }, { "epoch": 0.8761102985766774, "grad_norm": 0.25525856018066406, "learning_rate": 2.3009109607676243e-06, "loss": 0.1064, "step": 49120 }, { "epoch": 0.8761281346983912, "grad_norm": 0.3216416835784912, "learning_rate": 2.300258751617199e-06, "loss": 0.1401, "step": 49121 }, { "epoch": 0.8761459708201049, "grad_norm": 0.2494664341211319, "learning_rate": 2.2996066304588086e-06, "loss": 0.1033, "step": 49122 }, { "epoch": 0.8761638069418186, "grad_norm": 0.31407660245895386, "learning_rate": 2.2989545972949804e-06, "loss": 0.1143, "step": 49123 }, { "epoch": 0.8761816430635323, "grad_norm": 0.2560386657714844, "learning_rate": 2.298302652128248e-06, "loss": 0.1326, "step": 49124 }, { "epoch": 0.876199479185246, "grad_norm": 0.6525815725326538, "learning_rate": 2.2976507949611343e-06, "loss": 0.1205, "step": 49125 }, { "epoch": 0.8762173153069597, "grad_norm": 0.3841899037361145, "learning_rate": 2.2969990257961703e-06, "loss": 0.1238, "step": 49126 }, { "epoch": 0.8762351514286734, "grad_norm": 0.20940366387367249, "learning_rate": 2.296347344635871e-06, "loss": 0.112, "step": 49127 }, { "epoch": 0.876252987550387, "grad_norm": 0.29055291414260864, "learning_rate": 2.2956957514827794e-06, "loss": 0.1231, "step": 49128 }, { "epoch": 0.8762708236721007, "grad_norm": 0.29986679553985596, "learning_rate": 2.295044246339412e-06, "loss": 0.0965, "step": 49129 }, { "epoch": 0.8762886597938144, "grad_norm": 0.3281868100166321, "learning_rate": 2.2943928292082944e-06, "loss": 0.1549, "step": 49130 }, { "epoch": 0.8763064959155281, "grad_norm": 0.3696381449699402, "learning_rate": 2.2937415000919475e-06, "loss": 0.135, "step": 49131 }, { "epoch": 0.8763243320372418, "grad_norm": 0.2898821234703064, "learning_rate": 2.2930902589928998e-06, "loss": 0.0965, "step": 49132 }, { "epoch": 0.8763421681589555, "grad_norm": 0.3090604245662689, "learning_rate": 2.2924391059136853e-06, "loss": 0.1015, "step": 49133 }, { "epoch": 0.8763600042806692, "grad_norm": 0.2524409592151642, "learning_rate": 2.2917880408568183e-06, "loss": 0.1463, "step": 49134 }, { "epoch": 0.8763778404023829, "grad_norm": 0.22474107146263123, "learning_rate": 2.291137063824822e-06, "loss": 0.1468, "step": 49135 }, { "epoch": 0.8763956765240966, "grad_norm": 0.28334102034568787, "learning_rate": 2.2904861748202192e-06, "loss": 0.1511, "step": 49136 }, { "epoch": 0.8764135126458102, "grad_norm": 0.24980977177619934, "learning_rate": 2.289835373845539e-06, "loss": 0.1377, "step": 49137 }, { "epoch": 0.876431348767524, "grad_norm": 0.3001442551612854, "learning_rate": 2.2891846609033012e-06, "loss": 0.1466, "step": 49138 }, { "epoch": 0.8764491848892377, "grad_norm": 0.28811752796173096, "learning_rate": 2.2885340359960287e-06, "loss": 0.0944, "step": 49139 }, { "epoch": 0.8764670210109514, "grad_norm": 0.25675734877586365, "learning_rate": 2.2878834991262364e-06, "loss": 0.0984, "step": 49140 }, { "epoch": 0.8764848571326651, "grad_norm": 0.244055837392807, "learning_rate": 2.2872330502964583e-06, "loss": 0.1363, "step": 49141 }, { "epoch": 0.8765026932543788, "grad_norm": 0.1616104692220688, "learning_rate": 2.286582689509209e-06, "loss": 0.0979, "step": 49142 }, { "epoch": 0.8765205293760925, "grad_norm": 0.2777489125728607, "learning_rate": 2.285932416767009e-06, "loss": 0.082, "step": 49143 }, { "epoch": 0.8765383654978062, "grad_norm": 0.32997822761535645, "learning_rate": 2.2852822320723776e-06, "loss": 0.1054, "step": 49144 }, { "epoch": 0.8765562016195199, "grad_norm": 0.30911630392074585, "learning_rate": 2.2846321354278417e-06, "loss": 0.1258, "step": 49145 }, { "epoch": 0.8765740377412335, "grad_norm": 0.26052671670913696, "learning_rate": 2.2839821268359125e-06, "loss": 0.1143, "step": 49146 }, { "epoch": 0.8765918738629472, "grad_norm": 0.32524868845939636, "learning_rate": 2.2833322062991186e-06, "loss": 0.1055, "step": 49147 }, { "epoch": 0.8766097099846609, "grad_norm": 0.23927100002765656, "learning_rate": 2.282682373819975e-06, "loss": 0.1017, "step": 49148 }, { "epoch": 0.8766275461063746, "grad_norm": 0.3291065990924835, "learning_rate": 2.282032629400996e-06, "loss": 0.1465, "step": 49149 }, { "epoch": 0.8766453822280883, "grad_norm": 0.3337452709674835, "learning_rate": 2.28138297304471e-06, "loss": 0.1317, "step": 49150 }, { "epoch": 0.876663218349802, "grad_norm": 0.27962666749954224, "learning_rate": 2.280733404753632e-06, "loss": 0.1315, "step": 49151 }, { "epoch": 0.8766810544715157, "grad_norm": 0.29704219102859497, "learning_rate": 2.280083924530277e-06, "loss": 0.1401, "step": 49152 }, { "epoch": 0.8766988905932294, "grad_norm": 0.25297796726226807, "learning_rate": 2.279434532377159e-06, "loss": 0.0742, "step": 49153 }, { "epoch": 0.876716726714943, "grad_norm": 0.18221089243888855, "learning_rate": 2.2787852282968036e-06, "loss": 0.0629, "step": 49154 }, { "epoch": 0.8767345628366568, "grad_norm": 0.2791878581047058, "learning_rate": 2.2781360122917262e-06, "loss": 0.0758, "step": 49155 }, { "epoch": 0.8767523989583705, "grad_norm": 0.24226456880569458, "learning_rate": 2.277486884364441e-06, "loss": 0.1054, "step": 49156 }, { "epoch": 0.8767702350800842, "grad_norm": 0.3039802312850952, "learning_rate": 2.2768378445174596e-06, "loss": 0.0965, "step": 49157 }, { "epoch": 0.8767880712017979, "grad_norm": 0.26751869916915894, "learning_rate": 2.2761888927533086e-06, "loss": 0.1124, "step": 49158 }, { "epoch": 0.8768059073235116, "grad_norm": 0.29262450337409973, "learning_rate": 2.2755400290744964e-06, "loss": 0.1309, "step": 49159 }, { "epoch": 0.8768237434452253, "grad_norm": 0.2677420973777771, "learning_rate": 2.274891253483538e-06, "loss": 0.1008, "step": 49160 }, { "epoch": 0.876841579566939, "grad_norm": 0.2991850674152374, "learning_rate": 2.2742425659829537e-06, "loss": 0.1178, "step": 49161 }, { "epoch": 0.8768594156886527, "grad_norm": 0.2580156922340393, "learning_rate": 2.2735939665752494e-06, "loss": 0.1514, "step": 49162 }, { "epoch": 0.8768772518103664, "grad_norm": 0.21000432968139648, "learning_rate": 2.272945455262948e-06, "loss": 0.0647, "step": 49163 }, { "epoch": 0.87689508793208, "grad_norm": 0.24749843776226044, "learning_rate": 2.272297032048559e-06, "loss": 0.0923, "step": 49164 }, { "epoch": 0.8769129240537937, "grad_norm": 0.2307685762643814, "learning_rate": 2.2716486969346e-06, "loss": 0.1076, "step": 49165 }, { "epoch": 0.8769307601755074, "grad_norm": 0.2505471408367157, "learning_rate": 2.2710004499235744e-06, "loss": 0.1082, "step": 49166 }, { "epoch": 0.8769485962972211, "grad_norm": 0.24869133532047272, "learning_rate": 2.2703522910180047e-06, "loss": 0.0755, "step": 49167 }, { "epoch": 0.8769664324189348, "grad_norm": 0.29524222016334534, "learning_rate": 2.2697042202204006e-06, "loss": 0.094, "step": 49168 }, { "epoch": 0.8769842685406485, "grad_norm": 0.3042192757129669, "learning_rate": 2.2690562375332737e-06, "loss": 0.1433, "step": 49169 }, { "epoch": 0.8770021046623622, "grad_norm": 0.2601214349269867, "learning_rate": 2.268408342959133e-06, "loss": 0.0948, "step": 49170 }, { "epoch": 0.8770199407840759, "grad_norm": 0.2645348310470581, "learning_rate": 2.2677605365004962e-06, "loss": 0.0531, "step": 49171 }, { "epoch": 0.8770377769057897, "grad_norm": 0.33378586173057556, "learning_rate": 2.2671128181598724e-06, "loss": 0.1244, "step": 49172 }, { "epoch": 0.8770556130275033, "grad_norm": 0.2772667706012726, "learning_rate": 2.2664651879397647e-06, "loss": 0.0984, "step": 49173 }, { "epoch": 0.877073449149217, "grad_norm": 0.29864782094955444, "learning_rate": 2.265817645842694e-06, "loss": 0.0958, "step": 49174 }, { "epoch": 0.8770912852709307, "grad_norm": 0.3517008125782013, "learning_rate": 2.265170191871163e-06, "loss": 0.0628, "step": 49175 }, { "epoch": 0.8771091213926444, "grad_norm": 0.25018373131752014, "learning_rate": 2.2645228260276875e-06, "loss": 0.0983, "step": 49176 }, { "epoch": 0.8771269575143581, "grad_norm": 0.2971194386482239, "learning_rate": 2.2638755483147755e-06, "loss": 0.1277, "step": 49177 }, { "epoch": 0.8771447936360718, "grad_norm": 0.318308562040329, "learning_rate": 2.263228358734934e-06, "loss": 0.1172, "step": 49178 }, { "epoch": 0.8771626297577855, "grad_norm": 0.21772401034832, "learning_rate": 2.262581257290669e-06, "loss": 0.0828, "step": 49179 }, { "epoch": 0.8771804658794992, "grad_norm": 0.2133631557226181, "learning_rate": 2.261934243984498e-06, "loss": 0.0827, "step": 49180 }, { "epoch": 0.8771983020012128, "grad_norm": 0.3687916100025177, "learning_rate": 2.2612873188189193e-06, "loss": 0.0749, "step": 49181 }, { "epoch": 0.8772161381229265, "grad_norm": 0.30929720401763916, "learning_rate": 2.260640481796447e-06, "loss": 0.0985, "step": 49182 }, { "epoch": 0.8772339742446402, "grad_norm": 0.2361096292734146, "learning_rate": 2.2599937329195827e-06, "loss": 0.077, "step": 49183 }, { "epoch": 0.8772518103663539, "grad_norm": 0.2850714921951294, "learning_rate": 2.25934707219084e-06, "loss": 0.1102, "step": 49184 }, { "epoch": 0.8772696464880676, "grad_norm": 0.26227477192878723, "learning_rate": 2.258700499612723e-06, "loss": 0.0633, "step": 49185 }, { "epoch": 0.8772874826097813, "grad_norm": 0.3133750557899475, "learning_rate": 2.2580540151877383e-06, "loss": 0.1128, "step": 49186 }, { "epoch": 0.877305318731495, "grad_norm": 0.24264498054981232, "learning_rate": 2.2574076189183833e-06, "loss": 0.0844, "step": 49187 }, { "epoch": 0.8773231548532087, "grad_norm": 0.24139946699142456, "learning_rate": 2.2567613108071735e-06, "loss": 0.0923, "step": 49188 }, { "epoch": 0.8773409909749225, "grad_norm": 0.2859754264354706, "learning_rate": 2.256115090856617e-06, "loss": 0.0877, "step": 49189 }, { "epoch": 0.8773588270966362, "grad_norm": 0.2596757113933563, "learning_rate": 2.2554689590692123e-06, "loss": 0.1293, "step": 49190 }, { "epoch": 0.8773766632183498, "grad_norm": 0.28929105401039124, "learning_rate": 2.2548229154474687e-06, "loss": 0.1235, "step": 49191 }, { "epoch": 0.8773944993400635, "grad_norm": 0.3321802020072937, "learning_rate": 2.2541769599938813e-06, "loss": 0.1362, "step": 49192 }, { "epoch": 0.8774123354617772, "grad_norm": 0.2534790337085724, "learning_rate": 2.253531092710964e-06, "loss": 0.1197, "step": 49193 }, { "epoch": 0.8774301715834909, "grad_norm": 0.23860037326812744, "learning_rate": 2.2528853136012164e-06, "loss": 0.083, "step": 49194 }, { "epoch": 0.8774480077052046, "grad_norm": 0.3414667546749115, "learning_rate": 2.2522396226671432e-06, "loss": 0.1303, "step": 49195 }, { "epoch": 0.8774658438269183, "grad_norm": 0.19862067699432373, "learning_rate": 2.2515940199112408e-06, "loss": 0.0688, "step": 49196 }, { "epoch": 0.877483679948632, "grad_norm": 0.30365604162216187, "learning_rate": 2.25094850533602e-06, "loss": 0.1066, "step": 49197 }, { "epoch": 0.8775015160703457, "grad_norm": 0.21867285668849945, "learning_rate": 2.250303078943983e-06, "loss": 0.0978, "step": 49198 }, { "epoch": 0.8775193521920593, "grad_norm": 0.21739085018634796, "learning_rate": 2.249657740737626e-06, "loss": 0.079, "step": 49199 }, { "epoch": 0.877537188313773, "grad_norm": 0.2717151641845703, "learning_rate": 2.2490124907194513e-06, "loss": 0.0733, "step": 49200 }, { "epoch": 0.8775550244354867, "grad_norm": 0.28030017018318176, "learning_rate": 2.248367328891965e-06, "loss": 0.1617, "step": 49201 }, { "epoch": 0.8775728605572004, "grad_norm": 0.2272324562072754, "learning_rate": 2.2477222552576615e-06, "loss": 0.1178, "step": 49202 }, { "epoch": 0.8775906966789141, "grad_norm": 0.2215447574853897, "learning_rate": 2.247077269819048e-06, "loss": 0.115, "step": 49203 }, { "epoch": 0.8776085328006278, "grad_norm": 0.26245278120040894, "learning_rate": 2.2464323725786217e-06, "loss": 0.0922, "step": 49204 }, { "epoch": 0.8776263689223415, "grad_norm": 0.3499809205532074, "learning_rate": 2.2457875635388786e-06, "loss": 0.1072, "step": 49205 }, { "epoch": 0.8776442050440553, "grad_norm": 0.44295820593833923, "learning_rate": 2.2451428427023276e-06, "loss": 0.1676, "step": 49206 }, { "epoch": 0.877662041165769, "grad_norm": 0.34075355529785156, "learning_rate": 2.2444982100714607e-06, "loss": 0.0812, "step": 49207 }, { "epoch": 0.8776798772874826, "grad_norm": 0.31146571040153503, "learning_rate": 2.243853665648779e-06, "loss": 0.1255, "step": 49208 }, { "epoch": 0.8776977134091963, "grad_norm": 0.44378989934921265, "learning_rate": 2.2432092094367753e-06, "loss": 0.1322, "step": 49209 }, { "epoch": 0.87771554953091, "grad_norm": 0.20723335444927216, "learning_rate": 2.242564841437958e-06, "loss": 0.0893, "step": 49210 }, { "epoch": 0.8777333856526237, "grad_norm": 0.39754387736320496, "learning_rate": 2.24192056165482e-06, "loss": 0.0954, "step": 49211 }, { "epoch": 0.8777512217743374, "grad_norm": 0.27974045276641846, "learning_rate": 2.2412763700898563e-06, "loss": 0.1233, "step": 49212 }, { "epoch": 0.8777690578960511, "grad_norm": 0.2563789188861847, "learning_rate": 2.2406322667455653e-06, "loss": 0.1564, "step": 49213 }, { "epoch": 0.8777868940177648, "grad_norm": 0.4163587987422943, "learning_rate": 2.239988251624442e-06, "loss": 0.1092, "step": 49214 }, { "epoch": 0.8778047301394785, "grad_norm": 0.22461192309856415, "learning_rate": 2.2393443247289842e-06, "loss": 0.1045, "step": 49215 }, { "epoch": 0.8778225662611921, "grad_norm": 0.2711215615272522, "learning_rate": 2.2387004860616955e-06, "loss": 0.1264, "step": 49216 }, { "epoch": 0.8778404023829058, "grad_norm": 0.2895231544971466, "learning_rate": 2.238056735625063e-06, "loss": 0.1195, "step": 49217 }, { "epoch": 0.8778582385046195, "grad_norm": 0.19543756544589996, "learning_rate": 2.237413073421582e-06, "loss": 0.0809, "step": 49218 }, { "epoch": 0.8778760746263332, "grad_norm": 0.30299803614616394, "learning_rate": 2.236769499453753e-06, "loss": 0.1072, "step": 49219 }, { "epoch": 0.8778939107480469, "grad_norm": 0.26403459906578064, "learning_rate": 2.2361260137240657e-06, "loss": 0.1134, "step": 49220 }, { "epoch": 0.8779117468697606, "grad_norm": 0.24129967391490936, "learning_rate": 2.2354826162350212e-06, "loss": 0.1092, "step": 49221 }, { "epoch": 0.8779295829914744, "grad_norm": 0.22784292697906494, "learning_rate": 2.2348393069891005e-06, "loss": 0.064, "step": 49222 }, { "epoch": 0.8779474191131881, "grad_norm": 0.31371980905532837, "learning_rate": 2.23419608598881e-06, "loss": 0.1511, "step": 49223 }, { "epoch": 0.8779652552349018, "grad_norm": 0.31481418013572693, "learning_rate": 2.2335529532366394e-06, "loss": 0.1166, "step": 49224 }, { "epoch": 0.8779830913566155, "grad_norm": 0.3452967405319214, "learning_rate": 2.2329099087350812e-06, "loss": 0.1224, "step": 49225 }, { "epoch": 0.8780009274783291, "grad_norm": 0.3746187686920166, "learning_rate": 2.232266952486628e-06, "loss": 0.0847, "step": 49226 }, { "epoch": 0.8780187636000428, "grad_norm": 0.2701776325702667, "learning_rate": 2.2316240844937665e-06, "loss": 0.1073, "step": 49227 }, { "epoch": 0.8780365997217565, "grad_norm": 0.3587571084499359, "learning_rate": 2.2309813047589973e-06, "loss": 0.11, "step": 49228 }, { "epoch": 0.8780544358434702, "grad_norm": 0.2687029540538788, "learning_rate": 2.230338613284805e-06, "loss": 0.1121, "step": 49229 }, { "epoch": 0.8780722719651839, "grad_norm": 0.32443615794181824, "learning_rate": 2.2296960100736895e-06, "loss": 0.1115, "step": 49230 }, { "epoch": 0.8780901080868976, "grad_norm": 0.24746912717819214, "learning_rate": 2.229053495128133e-06, "loss": 0.1013, "step": 49231 }, { "epoch": 0.8781079442086113, "grad_norm": 0.26213327050209045, "learning_rate": 2.228411068450634e-06, "loss": 0.1387, "step": 49232 }, { "epoch": 0.878125780330325, "grad_norm": 0.29794061183929443, "learning_rate": 2.2277687300436776e-06, "loss": 0.1189, "step": 49233 }, { "epoch": 0.8781436164520386, "grad_norm": 0.2491191178560257, "learning_rate": 2.227126479909758e-06, "loss": 0.1081, "step": 49234 }, { "epoch": 0.8781614525737523, "grad_norm": 0.20252706110477448, "learning_rate": 2.2264843180513555e-06, "loss": 0.097, "step": 49235 }, { "epoch": 0.878179288695466, "grad_norm": 0.2804051637649536, "learning_rate": 2.225842244470969e-06, "loss": 0.1277, "step": 49236 }, { "epoch": 0.8781971248171797, "grad_norm": 0.29793640971183777, "learning_rate": 2.2252002591710842e-06, "loss": 0.1449, "step": 49237 }, { "epoch": 0.8782149609388934, "grad_norm": 0.28213897347450256, "learning_rate": 2.2245583621541886e-06, "loss": 0.1098, "step": 49238 }, { "epoch": 0.8782327970606072, "grad_norm": 0.26342231035232544, "learning_rate": 2.2239165534227726e-06, "loss": 0.1301, "step": 49239 }, { "epoch": 0.8782506331823209, "grad_norm": 0.24856138229370117, "learning_rate": 2.223274832979319e-06, "loss": 0.1059, "step": 49240 }, { "epoch": 0.8782684693040346, "grad_norm": 0.28711897134780884, "learning_rate": 2.222633200826321e-06, "loss": 0.0778, "step": 49241 }, { "epoch": 0.8782863054257483, "grad_norm": 0.23394736647605896, "learning_rate": 2.2219916569662603e-06, "loss": 0.0891, "step": 49242 }, { "epoch": 0.878304141547462, "grad_norm": 0.35065212845802307, "learning_rate": 2.221350201401634e-06, "loss": 0.1434, "step": 49243 }, { "epoch": 0.8783219776691756, "grad_norm": 0.24713405966758728, "learning_rate": 2.220708834134913e-06, "loss": 0.1105, "step": 49244 }, { "epoch": 0.8783398137908893, "grad_norm": 0.40670716762542725, "learning_rate": 2.2200675551686005e-06, "loss": 0.0873, "step": 49245 }, { "epoch": 0.878357649912603, "grad_norm": 0.3071151673793793, "learning_rate": 2.2194263645051728e-06, "loss": 0.191, "step": 49246 }, { "epoch": 0.8783754860343167, "grad_norm": 0.3479996919631958, "learning_rate": 2.218785262147116e-06, "loss": 0.1203, "step": 49247 }, { "epoch": 0.8783933221560304, "grad_norm": 0.21634791791439056, "learning_rate": 2.2181442480969124e-06, "loss": 0.0968, "step": 49248 }, { "epoch": 0.8784111582777441, "grad_norm": 0.3879588842391968, "learning_rate": 2.2175033223570535e-06, "loss": 0.0646, "step": 49249 }, { "epoch": 0.8784289943994578, "grad_norm": 0.2578437030315399, "learning_rate": 2.216862484930021e-06, "loss": 0.1113, "step": 49250 }, { "epoch": 0.8784468305211715, "grad_norm": 0.3589254915714264, "learning_rate": 2.2162217358182997e-06, "loss": 0.1103, "step": 49251 }, { "epoch": 0.8784646666428851, "grad_norm": 0.40773728489875793, "learning_rate": 2.2155810750243726e-06, "loss": 0.0884, "step": 49252 }, { "epoch": 0.8784825027645988, "grad_norm": 0.17489473521709442, "learning_rate": 2.214940502550716e-06, "loss": 0.1007, "step": 49253 }, { "epoch": 0.8785003388863125, "grad_norm": 0.2856663763523102, "learning_rate": 2.214300018399826e-06, "loss": 0.1178, "step": 49254 }, { "epoch": 0.8785181750080262, "grad_norm": 0.30280280113220215, "learning_rate": 2.213659622574177e-06, "loss": 0.0938, "step": 49255 }, { "epoch": 0.87853601112974, "grad_norm": 0.244004026055336, "learning_rate": 2.213019315076251e-06, "loss": 0.115, "step": 49256 }, { "epoch": 0.8785538472514537, "grad_norm": 0.2830543518066406, "learning_rate": 2.212379095908537e-06, "loss": 0.1213, "step": 49257 }, { "epoch": 0.8785716833731674, "grad_norm": 0.22903460264205933, "learning_rate": 2.2117389650735065e-06, "loss": 0.1131, "step": 49258 }, { "epoch": 0.8785895194948811, "grad_norm": 0.3859170973300934, "learning_rate": 2.211098922573651e-06, "loss": 0.1788, "step": 49259 }, { "epoch": 0.8786073556165948, "grad_norm": 0.18580417335033417, "learning_rate": 2.2104589684114497e-06, "loss": 0.0822, "step": 49260 }, { "epoch": 0.8786251917383084, "grad_norm": 0.24412298202514648, "learning_rate": 2.2098191025893723e-06, "loss": 0.0813, "step": 49261 }, { "epoch": 0.8786430278600221, "grad_norm": 0.2092595398426056, "learning_rate": 2.209179325109914e-06, "loss": 0.0807, "step": 49262 }, { "epoch": 0.8786608639817358, "grad_norm": 0.23754996061325073, "learning_rate": 2.208539635975548e-06, "loss": 0.1068, "step": 49263 }, { "epoch": 0.8786787001034495, "grad_norm": 0.22136349976062775, "learning_rate": 2.2079000351887565e-06, "loss": 0.0951, "step": 49264 }, { "epoch": 0.8786965362251632, "grad_norm": 0.2848581075668335, "learning_rate": 2.207260522752014e-06, "loss": 0.1106, "step": 49265 }, { "epoch": 0.8787143723468769, "grad_norm": 0.2509171962738037, "learning_rate": 2.2066210986677964e-06, "loss": 0.0657, "step": 49266 }, { "epoch": 0.8787322084685906, "grad_norm": 0.26343268156051636, "learning_rate": 2.205981762938594e-06, "loss": 0.0839, "step": 49267 }, { "epoch": 0.8787500445903043, "grad_norm": 0.25738710165023804, "learning_rate": 2.20534251556688e-06, "loss": 0.1268, "step": 49268 }, { "epoch": 0.878767880712018, "grad_norm": 0.25700485706329346, "learning_rate": 2.204703356555127e-06, "loss": 0.0922, "step": 49269 }, { "epoch": 0.8787857168337316, "grad_norm": 0.23591314256191254, "learning_rate": 2.2040642859058214e-06, "loss": 0.0881, "step": 49270 }, { "epoch": 0.8788035529554453, "grad_norm": 0.29068422317504883, "learning_rate": 2.2034253036214316e-06, "loss": 0.1331, "step": 49271 }, { "epoch": 0.878821389077159, "grad_norm": 0.2947634160518646, "learning_rate": 2.202786409704441e-06, "loss": 0.075, "step": 49272 }, { "epoch": 0.8788392251988728, "grad_norm": 0.30926308035850525, "learning_rate": 2.2021476041573287e-06, "loss": 0.1341, "step": 49273 }, { "epoch": 0.8788570613205865, "grad_norm": 0.1835303157567978, "learning_rate": 2.201508886982559e-06, "loss": 0.0682, "step": 49274 }, { "epoch": 0.8788748974423002, "grad_norm": 0.26731470227241516, "learning_rate": 2.200870258182619e-06, "loss": 0.1124, "step": 49275 }, { "epoch": 0.8788927335640139, "grad_norm": 0.2902938425540924, "learning_rate": 2.200231717759982e-06, "loss": 0.1386, "step": 49276 }, { "epoch": 0.8789105696857276, "grad_norm": 0.24245618283748627, "learning_rate": 2.1995932657171202e-06, "loss": 0.1325, "step": 49277 }, { "epoch": 0.8789284058074412, "grad_norm": 0.204793319106102, "learning_rate": 2.1989549020565104e-06, "loss": 0.1252, "step": 49278 }, { "epoch": 0.8789462419291549, "grad_norm": 0.2994351387023926, "learning_rate": 2.198316626780625e-06, "loss": 0.1284, "step": 49279 }, { "epoch": 0.8789640780508686, "grad_norm": 0.337112694978714, "learning_rate": 2.1976784398919396e-06, "loss": 0.1566, "step": 49280 }, { "epoch": 0.8789819141725823, "grad_norm": 0.26452934741973877, "learning_rate": 2.197040341392931e-06, "loss": 0.1339, "step": 49281 }, { "epoch": 0.878999750294296, "grad_norm": 0.3279476761817932, "learning_rate": 2.196402331286071e-06, "loss": 0.0903, "step": 49282 }, { "epoch": 0.8790175864160097, "grad_norm": 0.21207202970981598, "learning_rate": 2.195764409573825e-06, "loss": 0.0949, "step": 49283 }, { "epoch": 0.8790354225377234, "grad_norm": 0.2903973460197449, "learning_rate": 2.195126576258677e-06, "loss": 0.1153, "step": 49284 }, { "epoch": 0.8790532586594371, "grad_norm": 0.6664719581604004, "learning_rate": 2.1944888313430917e-06, "loss": 0.1925, "step": 49285 }, { "epoch": 0.8790710947811508, "grad_norm": 0.17960165441036224, "learning_rate": 2.1938511748295476e-06, "loss": 0.0982, "step": 49286 }, { "epoch": 0.8790889309028644, "grad_norm": 0.2270062267780304, "learning_rate": 2.1932136067205096e-06, "loss": 0.1009, "step": 49287 }, { "epoch": 0.8791067670245781, "grad_norm": 0.34451597929000854, "learning_rate": 2.1925761270184584e-06, "loss": 0.108, "step": 49288 }, { "epoch": 0.8791246031462918, "grad_norm": 0.24558772146701813, "learning_rate": 2.191938735725857e-06, "loss": 0.0984, "step": 49289 }, { "epoch": 0.8791424392680056, "grad_norm": 0.28395959734916687, "learning_rate": 2.1913014328451803e-06, "loss": 0.1147, "step": 49290 }, { "epoch": 0.8791602753897193, "grad_norm": 0.3853025734424591, "learning_rate": 2.190664218378899e-06, "loss": 0.1593, "step": 49291 }, { "epoch": 0.879178111511433, "grad_norm": 0.3152863681316376, "learning_rate": 2.190027092329475e-06, "loss": 0.1532, "step": 49292 }, { "epoch": 0.8791959476331467, "grad_norm": 0.2937759459018707, "learning_rate": 2.189390054699386e-06, "loss": 0.0992, "step": 49293 }, { "epoch": 0.8792137837548604, "grad_norm": 0.22882647812366486, "learning_rate": 2.1887531054911035e-06, "loss": 0.123, "step": 49294 }, { "epoch": 0.879231619876574, "grad_norm": 0.23416295647621155, "learning_rate": 2.188116244707092e-06, "loss": 0.0773, "step": 49295 }, { "epoch": 0.8792494559982877, "grad_norm": 0.2864985466003418, "learning_rate": 2.1874794723498156e-06, "loss": 0.0959, "step": 49296 }, { "epoch": 0.8792672921200014, "grad_norm": 0.2612197697162628, "learning_rate": 2.186842788421753e-06, "loss": 0.1317, "step": 49297 }, { "epoch": 0.8792851282417151, "grad_norm": 0.2376280128955841, "learning_rate": 2.186206192925361e-06, "loss": 0.0839, "step": 49298 }, { "epoch": 0.8793029643634288, "grad_norm": 0.2539444863796234, "learning_rate": 2.18556968586312e-06, "loss": 0.1146, "step": 49299 }, { "epoch": 0.8793208004851425, "grad_norm": 0.22710853815078735, "learning_rate": 2.184933267237485e-06, "loss": 0.1086, "step": 49300 }, { "epoch": 0.8793386366068562, "grad_norm": 0.30184367299079895, "learning_rate": 2.1842969370509336e-06, "loss": 0.1491, "step": 49301 }, { "epoch": 0.8793564727285699, "grad_norm": 0.24350878596305847, "learning_rate": 2.183660695305928e-06, "loss": 0.1095, "step": 49302 }, { "epoch": 0.8793743088502836, "grad_norm": 0.2632853388786316, "learning_rate": 2.1830245420049356e-06, "loss": 0.1342, "step": 49303 }, { "epoch": 0.8793921449719972, "grad_norm": 0.19945082068443298, "learning_rate": 2.1823884771504183e-06, "loss": 0.0918, "step": 49304 }, { "epoch": 0.8794099810937109, "grad_norm": 0.31310898065567017, "learning_rate": 2.181752500744841e-06, "loss": 0.1196, "step": 49305 }, { "epoch": 0.8794278172154246, "grad_norm": 0.3122529983520508, "learning_rate": 2.1811166127906763e-06, "loss": 0.1056, "step": 49306 }, { "epoch": 0.8794456533371384, "grad_norm": 0.3728542625904083, "learning_rate": 2.1804808132903835e-06, "loss": 0.1174, "step": 49307 }, { "epoch": 0.8794634894588521, "grad_norm": 0.21311181783676147, "learning_rate": 2.1798451022464305e-06, "loss": 0.126, "step": 49308 }, { "epoch": 0.8794813255805658, "grad_norm": 0.27999240159988403, "learning_rate": 2.179209479661273e-06, "loss": 0.1089, "step": 49309 }, { "epoch": 0.8794991617022795, "grad_norm": 0.2537831962108612, "learning_rate": 2.1785739455373903e-06, "loss": 0.1235, "step": 49310 }, { "epoch": 0.8795169978239932, "grad_norm": 0.22349326312541962, "learning_rate": 2.177938499877233e-06, "loss": 0.1066, "step": 49311 }, { "epoch": 0.8795348339457069, "grad_norm": 0.3453611135482788, "learning_rate": 2.177303142683265e-06, "loss": 0.1112, "step": 49312 }, { "epoch": 0.8795526700674206, "grad_norm": 0.2920825779438019, "learning_rate": 2.176667873957955e-06, "loss": 0.108, "step": 49313 }, { "epoch": 0.8795705061891342, "grad_norm": 0.2659646272659302, "learning_rate": 2.1760326937037665e-06, "loss": 0.0969, "step": 49314 }, { "epoch": 0.8795883423108479, "grad_norm": 0.29027992486953735, "learning_rate": 2.175397601923157e-06, "loss": 0.1476, "step": 49315 }, { "epoch": 0.8796061784325616, "grad_norm": 0.3211362957954407, "learning_rate": 2.1747625986185932e-06, "loss": 0.117, "step": 49316 }, { "epoch": 0.8796240145542753, "grad_norm": 0.5697280168533325, "learning_rate": 2.174127683792529e-06, "loss": 0.1293, "step": 49317 }, { "epoch": 0.879641850675989, "grad_norm": 0.26624172925949097, "learning_rate": 2.173492857447429e-06, "loss": 0.1019, "step": 49318 }, { "epoch": 0.8796596867977027, "grad_norm": 0.4039880335330963, "learning_rate": 2.172858119585755e-06, "loss": 0.1098, "step": 49319 }, { "epoch": 0.8796775229194164, "grad_norm": 0.3455815017223358, "learning_rate": 2.1722234702099718e-06, "loss": 0.0609, "step": 49320 }, { "epoch": 0.87969535904113, "grad_norm": 0.4179365932941437, "learning_rate": 2.1715889093225307e-06, "loss": 0.1329, "step": 49321 }, { "epoch": 0.8797131951628437, "grad_norm": 0.2731788754463196, "learning_rate": 2.170954436925893e-06, "loss": 0.1196, "step": 49322 }, { "epoch": 0.8797310312845575, "grad_norm": 0.2133973389863968, "learning_rate": 2.170320053022526e-06, "loss": 0.0921, "step": 49323 }, { "epoch": 0.8797488674062712, "grad_norm": 0.25417691469192505, "learning_rate": 2.1696857576148837e-06, "loss": 0.0912, "step": 49324 }, { "epoch": 0.8797667035279849, "grad_norm": 0.23578456044197083, "learning_rate": 2.16905155070542e-06, "loss": 0.1089, "step": 49325 }, { "epoch": 0.8797845396496986, "grad_norm": 0.3673192262649536, "learning_rate": 2.1684174322966017e-06, "loss": 0.0986, "step": 49326 }, { "epoch": 0.8798023757714123, "grad_norm": 0.2576500475406647, "learning_rate": 2.16778340239088e-06, "loss": 0.0778, "step": 49327 }, { "epoch": 0.879820211893126, "grad_norm": 0.2585069239139557, "learning_rate": 2.1671494609907167e-06, "loss": 0.122, "step": 49328 }, { "epoch": 0.8798380480148397, "grad_norm": 0.22544099390506744, "learning_rate": 2.166515608098571e-06, "loss": 0.1253, "step": 49329 }, { "epoch": 0.8798558841365534, "grad_norm": 0.3013015687465668, "learning_rate": 2.1658818437168992e-06, "loss": 0.1107, "step": 49330 }, { "epoch": 0.879873720258267, "grad_norm": 0.24815087020397186, "learning_rate": 2.1652481678481497e-06, "loss": 0.1403, "step": 49331 }, { "epoch": 0.8798915563799807, "grad_norm": 0.297088086605072, "learning_rate": 2.164614580494789e-06, "loss": 0.1426, "step": 49332 }, { "epoch": 0.8799093925016944, "grad_norm": 0.25410720705986023, "learning_rate": 2.1639810816592693e-06, "loss": 0.1499, "step": 49333 }, { "epoch": 0.8799272286234081, "grad_norm": 0.4218274652957916, "learning_rate": 2.163347671344046e-06, "loss": 0.0907, "step": 49334 }, { "epoch": 0.8799450647451218, "grad_norm": 0.32722097635269165, "learning_rate": 2.162714349551573e-06, "loss": 0.1314, "step": 49335 }, { "epoch": 0.8799629008668355, "grad_norm": 0.30587467551231384, "learning_rate": 2.1620811162843093e-06, "loss": 0.1159, "step": 49336 }, { "epoch": 0.8799807369885492, "grad_norm": 0.27871185541152954, "learning_rate": 2.1614479715447055e-06, "loss": 0.1104, "step": 49337 }, { "epoch": 0.8799985731102629, "grad_norm": 0.3603885769844055, "learning_rate": 2.1608149153352185e-06, "loss": 0.1654, "step": 49338 }, { "epoch": 0.8800164092319765, "grad_norm": 0.29584890604019165, "learning_rate": 2.160181947658299e-06, "loss": 0.1242, "step": 49339 }, { "epoch": 0.8800342453536903, "grad_norm": 0.2352956384420395, "learning_rate": 2.159549068516406e-06, "loss": 0.1289, "step": 49340 }, { "epoch": 0.880052081475404, "grad_norm": 0.2944217026233673, "learning_rate": 2.158916277911985e-06, "loss": 0.1061, "step": 49341 }, { "epoch": 0.8800699175971177, "grad_norm": 0.25249195098876953, "learning_rate": 2.158283575847497e-06, "loss": 0.0729, "step": 49342 }, { "epoch": 0.8800877537188314, "grad_norm": 0.27610111236572266, "learning_rate": 2.1576509623253944e-06, "loss": 0.1174, "step": 49343 }, { "epoch": 0.8801055898405451, "grad_norm": 0.19652724266052246, "learning_rate": 2.1570184373481184e-06, "loss": 0.0654, "step": 49344 }, { "epoch": 0.8801234259622588, "grad_norm": 0.29927709698677063, "learning_rate": 2.156386000918134e-06, "loss": 0.1493, "step": 49345 }, { "epoch": 0.8801412620839725, "grad_norm": 0.214554563164711, "learning_rate": 2.155753653037887e-06, "loss": 0.1486, "step": 49346 }, { "epoch": 0.8801590982056862, "grad_norm": 0.294188529253006, "learning_rate": 2.15512139370983e-06, "loss": 0.1248, "step": 49347 }, { "epoch": 0.8801769343273999, "grad_norm": 0.2982638478279114, "learning_rate": 2.1544892229364093e-06, "loss": 0.1657, "step": 49348 }, { "epoch": 0.8801947704491135, "grad_norm": 0.2377844899892807, "learning_rate": 2.1538571407200807e-06, "loss": 0.1073, "step": 49349 }, { "epoch": 0.8802126065708272, "grad_norm": 0.2099645882844925, "learning_rate": 2.1532251470632952e-06, "loss": 0.0666, "step": 49350 }, { "epoch": 0.8802304426925409, "grad_norm": 0.2671528458595276, "learning_rate": 2.152593241968498e-06, "loss": 0.1101, "step": 49351 }, { "epoch": 0.8802482788142546, "grad_norm": 0.2318814992904663, "learning_rate": 2.151961425438137e-06, "loss": 0.0547, "step": 49352 }, { "epoch": 0.8802661149359683, "grad_norm": 0.2701147198677063, "learning_rate": 2.151329697474669e-06, "loss": 0.1063, "step": 49353 }, { "epoch": 0.880283951057682, "grad_norm": 0.2808949053287506, "learning_rate": 2.150698058080536e-06, "loss": 0.1298, "step": 49354 }, { "epoch": 0.8803017871793957, "grad_norm": 0.23980826139450073, "learning_rate": 2.1500665072581914e-06, "loss": 0.0619, "step": 49355 }, { "epoch": 0.8803196233011094, "grad_norm": 0.3192655146121979, "learning_rate": 2.1494350450100815e-06, "loss": 0.0892, "step": 49356 }, { "epoch": 0.8803374594228232, "grad_norm": 0.20160838961601257, "learning_rate": 2.1488036713386505e-06, "loss": 0.1016, "step": 49357 }, { "epoch": 0.8803552955445368, "grad_norm": 0.24458405375480652, "learning_rate": 2.1481723862463528e-06, "loss": 0.1748, "step": 49358 }, { "epoch": 0.8803731316662505, "grad_norm": 0.3356369435787201, "learning_rate": 2.147541189735633e-06, "loss": 0.1232, "step": 49359 }, { "epoch": 0.8803909677879642, "grad_norm": 0.18355625867843628, "learning_rate": 2.146910081808934e-06, "loss": 0.1513, "step": 49360 }, { "epoch": 0.8804088039096779, "grad_norm": 0.31734001636505127, "learning_rate": 2.1462790624687033e-06, "loss": 0.1398, "step": 49361 }, { "epoch": 0.8804266400313916, "grad_norm": 0.23904845118522644, "learning_rate": 2.14564813171739e-06, "loss": 0.0939, "step": 49362 }, { "epoch": 0.8804444761531053, "grad_norm": 0.25867345929145813, "learning_rate": 2.145017289557441e-06, "loss": 0.0998, "step": 49363 }, { "epoch": 0.880462312274819, "grad_norm": 0.3008505702018738, "learning_rate": 2.1443865359912968e-06, "loss": 0.1234, "step": 49364 }, { "epoch": 0.8804801483965327, "grad_norm": 0.25963446497917175, "learning_rate": 2.1437558710213997e-06, "loss": 0.1308, "step": 49365 }, { "epoch": 0.8804979845182463, "grad_norm": 0.21107198297977448, "learning_rate": 2.1431252946502057e-06, "loss": 0.0556, "step": 49366 }, { "epoch": 0.88051582063996, "grad_norm": 0.2771542966365814, "learning_rate": 2.1424948068801492e-06, "loss": 0.1011, "step": 49367 }, { "epoch": 0.8805336567616737, "grad_norm": 0.2519318163394928, "learning_rate": 2.1418644077136756e-06, "loss": 0.1044, "step": 49368 }, { "epoch": 0.8805514928833874, "grad_norm": 0.2567456364631653, "learning_rate": 2.1412340971532357e-06, "loss": 0.134, "step": 49369 }, { "epoch": 0.8805693290051011, "grad_norm": 0.2275715470314026, "learning_rate": 2.140603875201261e-06, "loss": 0.11, "step": 49370 }, { "epoch": 0.8805871651268148, "grad_norm": 0.35440659523010254, "learning_rate": 2.1399737418602075e-06, "loss": 0.0986, "step": 49371 }, { "epoch": 0.8806050012485285, "grad_norm": 0.39519476890563965, "learning_rate": 2.1393436971325094e-06, "loss": 0.1262, "step": 49372 }, { "epoch": 0.8806228373702422, "grad_norm": 0.31076329946517944, "learning_rate": 2.1387137410206123e-06, "loss": 0.1457, "step": 49373 }, { "epoch": 0.880640673491956, "grad_norm": 0.2845359444618225, "learning_rate": 2.13808387352695e-06, "loss": 0.1429, "step": 49374 }, { "epoch": 0.8806585096136696, "grad_norm": 0.22091351449489594, "learning_rate": 2.137454094653979e-06, "loss": 0.1167, "step": 49375 }, { "epoch": 0.8806763457353833, "grad_norm": 0.2092379629611969, "learning_rate": 2.136824404404131e-06, "loss": 0.1063, "step": 49376 }, { "epoch": 0.880694181857097, "grad_norm": 0.3307375907897949, "learning_rate": 2.136194802779845e-06, "loss": 0.1201, "step": 49377 }, { "epoch": 0.8807120179788107, "grad_norm": 0.36467018723487854, "learning_rate": 2.1355652897835645e-06, "loss": 0.1028, "step": 49378 }, { "epoch": 0.8807298541005244, "grad_norm": 0.25610044598579407, "learning_rate": 2.134935865417734e-06, "loss": 0.1103, "step": 49379 }, { "epoch": 0.8807476902222381, "grad_norm": 0.35355618596076965, "learning_rate": 2.1343065296847875e-06, "loss": 0.1943, "step": 49380 }, { "epoch": 0.8807655263439518, "grad_norm": 0.2638136148452759, "learning_rate": 2.1336772825871628e-06, "loss": 0.1262, "step": 49381 }, { "epoch": 0.8807833624656655, "grad_norm": 0.31387999653816223, "learning_rate": 2.133048124127307e-06, "loss": 0.105, "step": 49382 }, { "epoch": 0.8808011985873792, "grad_norm": 0.25916004180908203, "learning_rate": 2.132419054307652e-06, "loss": 0.1289, "step": 49383 }, { "epoch": 0.8808190347090928, "grad_norm": 0.2242920696735382, "learning_rate": 2.1317900731306434e-06, "loss": 0.0723, "step": 49384 }, { "epoch": 0.8808368708308065, "grad_norm": 0.20153427124023438, "learning_rate": 2.131161180598712e-06, "loss": 0.1049, "step": 49385 }, { "epoch": 0.8808547069525202, "grad_norm": 0.27675026655197144, "learning_rate": 2.1305323767143002e-06, "loss": 0.1339, "step": 49386 }, { "epoch": 0.8808725430742339, "grad_norm": 0.3084661364555359, "learning_rate": 2.12990366147984e-06, "loss": 0.1133, "step": 49387 }, { "epoch": 0.8808903791959476, "grad_norm": 0.5250598788261414, "learning_rate": 2.1292750348977765e-06, "loss": 0.0935, "step": 49388 }, { "epoch": 0.8809082153176613, "grad_norm": 0.2611176073551178, "learning_rate": 2.1286464969705404e-06, "loss": 0.1012, "step": 49389 }, { "epoch": 0.880926051439375, "grad_norm": 0.24788115918636322, "learning_rate": 2.128018047700572e-06, "loss": 0.1016, "step": 49390 }, { "epoch": 0.8809438875610888, "grad_norm": 0.2953091561794281, "learning_rate": 2.1273896870903e-06, "loss": 0.1377, "step": 49391 }, { "epoch": 0.8809617236828025, "grad_norm": 0.25488021969795227, "learning_rate": 2.12676141514217e-06, "loss": 0.0818, "step": 49392 }, { "epoch": 0.8809795598045161, "grad_norm": 0.24367518723011017, "learning_rate": 2.1261332318586125e-06, "loss": 0.0903, "step": 49393 }, { "epoch": 0.8809973959262298, "grad_norm": 0.5073939561843872, "learning_rate": 2.1255051372420648e-06, "loss": 0.1471, "step": 49394 }, { "epoch": 0.8810152320479435, "grad_norm": 0.3212815225124359, "learning_rate": 2.1248771312949554e-06, "loss": 0.1424, "step": 49395 }, { "epoch": 0.8810330681696572, "grad_norm": 0.31436392664909363, "learning_rate": 2.124249214019722e-06, "loss": 0.1464, "step": 49396 }, { "epoch": 0.8810509042913709, "grad_norm": 0.22638578712940216, "learning_rate": 2.123621385418803e-06, "loss": 0.179, "step": 49397 }, { "epoch": 0.8810687404130846, "grad_norm": 0.21386833488941193, "learning_rate": 2.1229936454946282e-06, "loss": 0.0734, "step": 49398 }, { "epoch": 0.8810865765347983, "grad_norm": 0.3712170720100403, "learning_rate": 2.1223659942496336e-06, "loss": 0.1465, "step": 49399 }, { "epoch": 0.881104412656512, "grad_norm": 0.2534027397632599, "learning_rate": 2.121738431686246e-06, "loss": 0.1107, "step": 49400 }, { "epoch": 0.8811222487782256, "grad_norm": 0.2919429838657379, "learning_rate": 2.1211109578069045e-06, "loss": 0.1253, "step": 49401 }, { "epoch": 0.8811400848999393, "grad_norm": 0.24583253264427185, "learning_rate": 2.1204835726140404e-06, "loss": 0.1118, "step": 49402 }, { "epoch": 0.881157921021653, "grad_norm": 0.2129170149564743, "learning_rate": 2.1198562761100855e-06, "loss": 0.0787, "step": 49403 }, { "epoch": 0.8811757571433667, "grad_norm": 0.21634620428085327, "learning_rate": 2.1192290682974627e-06, "loss": 0.1392, "step": 49404 }, { "epoch": 0.8811935932650804, "grad_norm": 0.28531765937805176, "learning_rate": 2.118601949178617e-06, "loss": 0.1217, "step": 49405 }, { "epoch": 0.8812114293867941, "grad_norm": 0.44769516587257385, "learning_rate": 2.1179749187559745e-06, "loss": 0.1299, "step": 49406 }, { "epoch": 0.8812292655085078, "grad_norm": 0.30657604336738586, "learning_rate": 2.117347977031961e-06, "loss": 0.1055, "step": 49407 }, { "epoch": 0.8812471016302216, "grad_norm": 0.34153756499290466, "learning_rate": 2.1167211240090077e-06, "loss": 0.147, "step": 49408 }, { "epoch": 0.8812649377519353, "grad_norm": 0.3517022728919983, "learning_rate": 2.116094359689552e-06, "loss": 0.0976, "step": 49409 }, { "epoch": 0.881282773873649, "grad_norm": 0.20783376693725586, "learning_rate": 2.115467684076014e-06, "loss": 0.0778, "step": 49410 }, { "epoch": 0.8813006099953626, "grad_norm": 0.2701862156391144, "learning_rate": 2.1148410971708304e-06, "loss": 0.1265, "step": 49411 }, { "epoch": 0.8813184461170763, "grad_norm": 0.30719611048698425, "learning_rate": 2.1142145989764272e-06, "loss": 0.073, "step": 49412 }, { "epoch": 0.88133628223879, "grad_norm": 0.28177008032798767, "learning_rate": 2.1135881894952303e-06, "loss": 0.1314, "step": 49413 }, { "epoch": 0.8813541183605037, "grad_norm": 0.2552705407142639, "learning_rate": 2.1129618687296737e-06, "loss": 0.0844, "step": 49414 }, { "epoch": 0.8813719544822174, "grad_norm": 0.2846696674823761, "learning_rate": 2.1123356366821806e-06, "loss": 0.1406, "step": 49415 }, { "epoch": 0.8813897906039311, "grad_norm": 0.28623223304748535, "learning_rate": 2.1117094933551796e-06, "loss": 0.1724, "step": 49416 }, { "epoch": 0.8814076267256448, "grad_norm": 0.2970600426197052, "learning_rate": 2.1110834387510936e-06, "loss": 0.1188, "step": 49417 }, { "epoch": 0.8814254628473585, "grad_norm": 0.31855344772338867, "learning_rate": 2.11045747287236e-06, "loss": 0.1181, "step": 49418 }, { "epoch": 0.8814432989690721, "grad_norm": 0.31087368726730347, "learning_rate": 2.109831595721398e-06, "loss": 0.121, "step": 49419 }, { "epoch": 0.8814611350907858, "grad_norm": 0.3066253662109375, "learning_rate": 2.109205807300635e-06, "loss": 0.0951, "step": 49420 }, { "epoch": 0.8814789712124995, "grad_norm": 0.23050422966480255, "learning_rate": 2.108580107612493e-06, "loss": 0.1112, "step": 49421 }, { "epoch": 0.8814968073342132, "grad_norm": 0.22933296859264374, "learning_rate": 2.1079544966594034e-06, "loss": 0.0912, "step": 49422 }, { "epoch": 0.8815146434559269, "grad_norm": 0.34030359983444214, "learning_rate": 2.1073289744437843e-06, "loss": 0.0791, "step": 49423 }, { "epoch": 0.8815324795776407, "grad_norm": 0.3031795620918274, "learning_rate": 2.106703540968069e-06, "loss": 0.0987, "step": 49424 }, { "epoch": 0.8815503156993544, "grad_norm": 0.25037652254104614, "learning_rate": 2.1060781962346816e-06, "loss": 0.0897, "step": 49425 }, { "epoch": 0.8815681518210681, "grad_norm": 0.26562973856925964, "learning_rate": 2.1054529402460334e-06, "loss": 0.1082, "step": 49426 }, { "epoch": 0.8815859879427818, "grad_norm": 0.30815649032592773, "learning_rate": 2.104827773004564e-06, "loss": 0.1078, "step": 49427 }, { "epoch": 0.8816038240644954, "grad_norm": 0.3072338402271271, "learning_rate": 2.104202694512691e-06, "loss": 0.097, "step": 49428 }, { "epoch": 0.8816216601862091, "grad_norm": 0.2984631061553955, "learning_rate": 2.1035777047728355e-06, "loss": 0.1413, "step": 49429 }, { "epoch": 0.8816394963079228, "grad_norm": 0.4053855240345001, "learning_rate": 2.102952803787414e-06, "loss": 0.1769, "step": 49430 }, { "epoch": 0.8816573324296365, "grad_norm": 0.25490662455558777, "learning_rate": 2.102327991558864e-06, "loss": 0.0976, "step": 49431 }, { "epoch": 0.8816751685513502, "grad_norm": 0.2718149423599243, "learning_rate": 2.101703268089597e-06, "loss": 0.1327, "step": 49432 }, { "epoch": 0.8816930046730639, "grad_norm": 0.31242114305496216, "learning_rate": 2.1010786333820364e-06, "loss": 0.0999, "step": 49433 }, { "epoch": 0.8817108407947776, "grad_norm": 0.2962825894355774, "learning_rate": 2.1004540874385996e-06, "loss": 0.1432, "step": 49434 }, { "epoch": 0.8817286769164913, "grad_norm": 0.2486339509487152, "learning_rate": 2.0998296302617183e-06, "loss": 0.103, "step": 49435 }, { "epoch": 0.881746513038205, "grad_norm": 0.20273301005363464, "learning_rate": 2.0992052618538068e-06, "loss": 0.0826, "step": 49436 }, { "epoch": 0.8817643491599186, "grad_norm": 0.3166324496269226, "learning_rate": 2.0985809822172796e-06, "loss": 0.1258, "step": 49437 }, { "epoch": 0.8817821852816323, "grad_norm": 0.3244864046573639, "learning_rate": 2.097956791354569e-06, "loss": 0.1144, "step": 49438 }, { "epoch": 0.881800021403346, "grad_norm": 0.3394557535648346, "learning_rate": 2.0973326892680805e-06, "loss": 0.1067, "step": 49439 }, { "epoch": 0.8818178575250597, "grad_norm": 0.31407296657562256, "learning_rate": 2.096708675960246e-06, "loss": 0.1084, "step": 49440 }, { "epoch": 0.8818356936467735, "grad_norm": 0.18862786889076233, "learning_rate": 2.09608475143348e-06, "loss": 0.08, "step": 49441 }, { "epoch": 0.8818535297684872, "grad_norm": 0.22936619818210602, "learning_rate": 2.0954609156902004e-06, "loss": 0.0946, "step": 49442 }, { "epoch": 0.8818713658902009, "grad_norm": 0.25299686193466187, "learning_rate": 2.0948371687328215e-06, "loss": 0.0993, "step": 49443 }, { "epoch": 0.8818892020119146, "grad_norm": 0.3145373463630676, "learning_rate": 2.0942135105637693e-06, "loss": 0.0836, "step": 49444 }, { "epoch": 0.8819070381336283, "grad_norm": 0.5313447713851929, "learning_rate": 2.0935899411854557e-06, "loss": 0.1366, "step": 49445 }, { "epoch": 0.8819248742553419, "grad_norm": 0.2950810194015503, "learning_rate": 2.0929664606002986e-06, "loss": 0.0944, "step": 49446 }, { "epoch": 0.8819427103770556, "grad_norm": 0.2714419662952423, "learning_rate": 2.092343068810712e-06, "loss": 0.0795, "step": 49447 }, { "epoch": 0.8819605464987693, "grad_norm": 0.3711424171924591, "learning_rate": 2.0917197658191197e-06, "loss": 0.1234, "step": 49448 }, { "epoch": 0.881978382620483, "grad_norm": 0.20156267285346985, "learning_rate": 2.0910965516279357e-06, "loss": 0.0736, "step": 49449 }, { "epoch": 0.8819962187421967, "grad_norm": 0.3054533898830414, "learning_rate": 2.090473426239567e-06, "loss": 0.1526, "step": 49450 }, { "epoch": 0.8820140548639104, "grad_norm": 0.2628709673881531, "learning_rate": 2.089850389656442e-06, "loss": 0.103, "step": 49451 }, { "epoch": 0.8820318909856241, "grad_norm": 0.22848740220069885, "learning_rate": 2.0892274418809644e-06, "loss": 0.081, "step": 49452 }, { "epoch": 0.8820497271073378, "grad_norm": 0.3265763819217682, "learning_rate": 2.0886045829155597e-06, "loss": 0.1007, "step": 49453 }, { "epoch": 0.8820675632290514, "grad_norm": 0.20340044796466827, "learning_rate": 2.087981812762635e-06, "loss": 0.0781, "step": 49454 }, { "epoch": 0.8820853993507651, "grad_norm": 0.25925788283348083, "learning_rate": 2.0873591314246072e-06, "loss": 0.1295, "step": 49455 }, { "epoch": 0.8821032354724788, "grad_norm": 0.2397794872522354, "learning_rate": 2.086736538903886e-06, "loss": 0.1294, "step": 49456 }, { "epoch": 0.8821210715941925, "grad_norm": 0.244323268532753, "learning_rate": 2.0861140352028907e-06, "loss": 0.1475, "step": 49457 }, { "epoch": 0.8821389077159063, "grad_norm": 0.27704474329948425, "learning_rate": 2.0854916203240344e-06, "loss": 0.1179, "step": 49458 }, { "epoch": 0.88215674383762, "grad_norm": 0.26258665323257446, "learning_rate": 2.0848692942697257e-06, "loss": 0.1227, "step": 49459 }, { "epoch": 0.8821745799593337, "grad_norm": 0.2461012750864029, "learning_rate": 2.084247057042371e-06, "loss": 0.1225, "step": 49460 }, { "epoch": 0.8821924160810474, "grad_norm": 0.30213266611099243, "learning_rate": 2.0836249086443964e-06, "loss": 0.1202, "step": 49461 }, { "epoch": 0.8822102522027611, "grad_norm": 0.2420114129781723, "learning_rate": 2.083002849078208e-06, "loss": 0.1481, "step": 49462 }, { "epoch": 0.8822280883244747, "grad_norm": 0.22456768155097961, "learning_rate": 2.0823808783462155e-06, "loss": 0.0568, "step": 49463 }, { "epoch": 0.8822459244461884, "grad_norm": 0.42278358340263367, "learning_rate": 2.0817589964508223e-06, "loss": 0.14, "step": 49464 }, { "epoch": 0.8822637605679021, "grad_norm": 0.2925378978252411, "learning_rate": 2.0811372033944537e-06, "loss": 0.1064, "step": 49465 }, { "epoch": 0.8822815966896158, "grad_norm": 0.244202122092247, "learning_rate": 2.080515499179511e-06, "loss": 0.1005, "step": 49466 }, { "epoch": 0.8822994328113295, "grad_norm": 0.359566330909729, "learning_rate": 2.0798938838084092e-06, "loss": 0.0872, "step": 49467 }, { "epoch": 0.8823172689330432, "grad_norm": 0.22111792862415314, "learning_rate": 2.079272357283554e-06, "loss": 0.1242, "step": 49468 }, { "epoch": 0.8823351050547569, "grad_norm": 0.26042407751083374, "learning_rate": 2.07865091960735e-06, "loss": 0.1077, "step": 49469 }, { "epoch": 0.8823529411764706, "grad_norm": 0.2323639690876007, "learning_rate": 2.0780295707822194e-06, "loss": 0.0826, "step": 49470 }, { "epoch": 0.8823707772981843, "grad_norm": 0.29817500710487366, "learning_rate": 2.0774083108105606e-06, "loss": 0.1213, "step": 49471 }, { "epoch": 0.8823886134198979, "grad_norm": 0.20066680014133453, "learning_rate": 2.0767871396947858e-06, "loss": 0.1043, "step": 49472 }, { "epoch": 0.8824064495416116, "grad_norm": 0.3479580283164978, "learning_rate": 2.0761660574372983e-06, "loss": 0.1079, "step": 49473 }, { "epoch": 0.8824242856633253, "grad_norm": 0.3746626079082489, "learning_rate": 2.07554506404051e-06, "loss": 0.0962, "step": 49474 }, { "epoch": 0.8824421217850391, "grad_norm": 0.26637962460517883, "learning_rate": 2.07492415950683e-06, "loss": 0.1045, "step": 49475 }, { "epoch": 0.8824599579067528, "grad_norm": 0.3373264670372009, "learning_rate": 2.07430334383866e-06, "loss": 0.1441, "step": 49476 }, { "epoch": 0.8824777940284665, "grad_norm": 0.2721731960773468, "learning_rate": 2.0736826170384054e-06, "loss": 0.1577, "step": 49477 }, { "epoch": 0.8824956301501802, "grad_norm": 0.28891292214393616, "learning_rate": 2.0730619791084786e-06, "loss": 0.1527, "step": 49478 }, { "epoch": 0.8825134662718939, "grad_norm": 0.2672925889492035, "learning_rate": 2.072441430051278e-06, "loss": 0.0882, "step": 49479 }, { "epoch": 0.8825313023936076, "grad_norm": 0.24898603558540344, "learning_rate": 2.0718209698692205e-06, "loss": 0.0689, "step": 49480 }, { "epoch": 0.8825491385153212, "grad_norm": 0.34353986382484436, "learning_rate": 2.0712005985647016e-06, "loss": 0.1379, "step": 49481 }, { "epoch": 0.8825669746370349, "grad_norm": 0.34004947543144226, "learning_rate": 2.070580316140122e-06, "loss": 0.1058, "step": 49482 }, { "epoch": 0.8825848107587486, "grad_norm": 0.218428373336792, "learning_rate": 2.0699601225979e-06, "loss": 0.0962, "step": 49483 }, { "epoch": 0.8826026468804623, "grad_norm": 0.28842514753341675, "learning_rate": 2.0693400179404328e-06, "loss": 0.15, "step": 49484 }, { "epoch": 0.882620483002176, "grad_norm": 0.26158249378204346, "learning_rate": 2.0687200021701216e-06, "loss": 0.1032, "step": 49485 }, { "epoch": 0.8826383191238897, "grad_norm": 0.17603634297847748, "learning_rate": 2.06810007528937e-06, "loss": 0.0923, "step": 49486 }, { "epoch": 0.8826561552456034, "grad_norm": 0.22819095849990845, "learning_rate": 2.0674802373005844e-06, "loss": 0.0979, "step": 49487 }, { "epoch": 0.8826739913673171, "grad_norm": 0.24512246251106262, "learning_rate": 2.066860488206168e-06, "loss": 0.087, "step": 49488 }, { "epoch": 0.8826918274890307, "grad_norm": 0.2636117935180664, "learning_rate": 2.0662408280085225e-06, "loss": 0.1072, "step": 49489 }, { "epoch": 0.8827096636107444, "grad_norm": 0.22856329381465912, "learning_rate": 2.065621256710046e-06, "loss": 0.1242, "step": 49490 }, { "epoch": 0.8827274997324581, "grad_norm": 0.2186232954263687, "learning_rate": 2.0650017743131383e-06, "loss": 0.0914, "step": 49491 }, { "epoch": 0.8827453358541719, "grad_norm": 0.32454976439476013, "learning_rate": 2.064382380820212e-06, "loss": 0.1028, "step": 49492 }, { "epoch": 0.8827631719758856, "grad_norm": 0.23620378971099854, "learning_rate": 2.063763076233655e-06, "loss": 0.1094, "step": 49493 }, { "epoch": 0.8827810080975993, "grad_norm": 0.38535383343696594, "learning_rate": 2.0631438605558803e-06, "loss": 0.1626, "step": 49494 }, { "epoch": 0.882798844219313, "grad_norm": 0.3389303684234619, "learning_rate": 2.0625247337892755e-06, "loss": 0.1582, "step": 49495 }, { "epoch": 0.8828166803410267, "grad_norm": 0.31598159670829773, "learning_rate": 2.061905695936253e-06, "loss": 0.1216, "step": 49496 }, { "epoch": 0.8828345164627404, "grad_norm": 0.35806748270988464, "learning_rate": 2.0612867469992054e-06, "loss": 0.1796, "step": 49497 }, { "epoch": 0.882852352584454, "grad_norm": 0.3328573703765869, "learning_rate": 2.0606678869805327e-06, "loss": 0.1496, "step": 49498 }, { "epoch": 0.8828701887061677, "grad_norm": 0.1679372787475586, "learning_rate": 2.0600491158826314e-06, "loss": 0.0739, "step": 49499 }, { "epoch": 0.8828880248278814, "grad_norm": 0.24111592769622803, "learning_rate": 2.059430433707907e-06, "loss": 0.0926, "step": 49500 }, { "epoch": 0.8829058609495951, "grad_norm": 0.31532177329063416, "learning_rate": 2.0588118404587546e-06, "loss": 0.116, "step": 49501 }, { "epoch": 0.8829236970713088, "grad_norm": 0.25310298800468445, "learning_rate": 2.0581933361375704e-06, "loss": 0.1443, "step": 49502 }, { "epoch": 0.8829415331930225, "grad_norm": 0.27139073610305786, "learning_rate": 2.057574920746752e-06, "loss": 0.0792, "step": 49503 }, { "epoch": 0.8829593693147362, "grad_norm": 0.3119518756866455, "learning_rate": 2.056956594288695e-06, "loss": 0.1194, "step": 49504 }, { "epoch": 0.8829772054364499, "grad_norm": 0.9270125031471252, "learning_rate": 2.0563383567657994e-06, "loss": 0.1143, "step": 49505 }, { "epoch": 0.8829950415581636, "grad_norm": 0.44426754117012024, "learning_rate": 2.0557202081804588e-06, "loss": 0.1215, "step": 49506 }, { "epoch": 0.8830128776798772, "grad_norm": 0.3076658546924591, "learning_rate": 2.0551021485350764e-06, "loss": 0.0882, "step": 49507 }, { "epoch": 0.8830307138015909, "grad_norm": 0.2759837508201599, "learning_rate": 2.0544841778320363e-06, "loss": 0.0909, "step": 49508 }, { "epoch": 0.8830485499233047, "grad_norm": 0.23088671267032623, "learning_rate": 2.0538662960737474e-06, "loss": 0.0986, "step": 49509 }, { "epoch": 0.8830663860450184, "grad_norm": 0.2549494206905365, "learning_rate": 2.0532485032625946e-06, "loss": 0.0998, "step": 49510 }, { "epoch": 0.8830842221667321, "grad_norm": 0.23078858852386475, "learning_rate": 2.0526307994009787e-06, "loss": 0.0885, "step": 49511 }, { "epoch": 0.8831020582884458, "grad_norm": 0.26283302903175354, "learning_rate": 2.052013184491289e-06, "loss": 0.0693, "step": 49512 }, { "epoch": 0.8831198944101595, "grad_norm": 0.27256253361701965, "learning_rate": 2.0513956585359233e-06, "loss": 0.0975, "step": 49513 }, { "epoch": 0.8831377305318732, "grad_norm": 0.2075197845697403, "learning_rate": 2.050778221537275e-06, "loss": 0.1439, "step": 49514 }, { "epoch": 0.8831555666535869, "grad_norm": 0.25969287753105164, "learning_rate": 2.050160873497739e-06, "loss": 0.0918, "step": 49515 }, { "epoch": 0.8831734027753005, "grad_norm": 0.24521677196025848, "learning_rate": 2.0495436144197023e-06, "loss": 0.0839, "step": 49516 }, { "epoch": 0.8831912388970142, "grad_norm": 0.2796004116535187, "learning_rate": 2.04892644430556e-06, "loss": 0.0819, "step": 49517 }, { "epoch": 0.8832090750187279, "grad_norm": 0.22466719150543213, "learning_rate": 2.048309363157708e-06, "loss": 0.0863, "step": 49518 }, { "epoch": 0.8832269111404416, "grad_norm": 0.27003031969070435, "learning_rate": 2.0476923709785377e-06, "loss": 0.1237, "step": 49519 }, { "epoch": 0.8832447472621553, "grad_norm": 0.21677754819393158, "learning_rate": 2.0470754677704344e-06, "loss": 0.1244, "step": 49520 }, { "epoch": 0.883262583383869, "grad_norm": 0.2606586217880249, "learning_rate": 2.046458653535799e-06, "loss": 0.1053, "step": 49521 }, { "epoch": 0.8832804195055827, "grad_norm": 0.1929212063550949, "learning_rate": 2.045841928277012e-06, "loss": 0.0606, "step": 49522 }, { "epoch": 0.8832982556272964, "grad_norm": 0.2580725848674774, "learning_rate": 2.045225291996478e-06, "loss": 0.1077, "step": 49523 }, { "epoch": 0.88331609174901, "grad_norm": 0.23047815263271332, "learning_rate": 2.0446087446965743e-06, "loss": 0.1069, "step": 49524 }, { "epoch": 0.8833339278707238, "grad_norm": 0.24394388496875763, "learning_rate": 2.0439922863796952e-06, "loss": 0.1337, "step": 49525 }, { "epoch": 0.8833517639924375, "grad_norm": 0.2879149317741394, "learning_rate": 2.043375917048232e-06, "loss": 0.1193, "step": 49526 }, { "epoch": 0.8833696001141512, "grad_norm": 0.333153635263443, "learning_rate": 2.042759636704575e-06, "loss": 0.1045, "step": 49527 }, { "epoch": 0.8833874362358649, "grad_norm": 0.2467833161354065, "learning_rate": 2.0421434453511077e-06, "loss": 0.1296, "step": 49528 }, { "epoch": 0.8834052723575786, "grad_norm": 0.3096359968185425, "learning_rate": 2.0415273429902236e-06, "loss": 0.1642, "step": 49529 }, { "epoch": 0.8834231084792923, "grad_norm": 0.26842281222343445, "learning_rate": 2.0409113296243066e-06, "loss": 0.142, "step": 49530 }, { "epoch": 0.883440944601006, "grad_norm": 0.19256141781806946, "learning_rate": 2.040295405255749e-06, "loss": 0.0953, "step": 49531 }, { "epoch": 0.8834587807227197, "grad_norm": 0.26695820689201355, "learning_rate": 2.0396795698869376e-06, "loss": 0.1047, "step": 49532 }, { "epoch": 0.8834766168444333, "grad_norm": 0.22599327564239502, "learning_rate": 2.0390638235202542e-06, "loss": 0.0652, "step": 49533 }, { "epoch": 0.883494452966147, "grad_norm": 0.2299235314130783, "learning_rate": 2.038448166158094e-06, "loss": 0.0965, "step": 49534 }, { "epoch": 0.8835122890878607, "grad_norm": 0.22735629975795746, "learning_rate": 2.037832597802836e-06, "loss": 0.086, "step": 49535 }, { "epoch": 0.8835301252095744, "grad_norm": 0.29025423526763916, "learning_rate": 2.0372171184568716e-06, "loss": 0.0961, "step": 49536 }, { "epoch": 0.8835479613312881, "grad_norm": 0.21940742433071136, "learning_rate": 2.0366017281225865e-06, "loss": 0.0505, "step": 49537 }, { "epoch": 0.8835657974530018, "grad_norm": 0.31663379073143005, "learning_rate": 2.035986426802358e-06, "loss": 0.1026, "step": 49538 }, { "epoch": 0.8835836335747155, "grad_norm": 0.28553298115730286, "learning_rate": 2.035371214498585e-06, "loss": 0.1204, "step": 49539 }, { "epoch": 0.8836014696964292, "grad_norm": 0.3004331588745117, "learning_rate": 2.0347560912136438e-06, "loss": 0.0882, "step": 49540 }, { "epoch": 0.8836193058181429, "grad_norm": 0.34469443559646606, "learning_rate": 2.0341410569499203e-06, "loss": 0.1538, "step": 49541 }, { "epoch": 0.8836371419398567, "grad_norm": 0.2539229989051819, "learning_rate": 2.0335261117097963e-06, "loss": 0.1072, "step": 49542 }, { "epoch": 0.8836549780615703, "grad_norm": 0.19817131757736206, "learning_rate": 2.0329112554956535e-06, "loss": 0.056, "step": 49543 }, { "epoch": 0.883672814183284, "grad_norm": 0.260511189699173, "learning_rate": 2.0322964883098844e-06, "loss": 0.1199, "step": 49544 }, { "epoch": 0.8836906503049977, "grad_norm": 0.2513865828514099, "learning_rate": 2.0316818101548675e-06, "loss": 0.0709, "step": 49545 }, { "epoch": 0.8837084864267114, "grad_norm": 0.29783475399017334, "learning_rate": 2.031067221032984e-06, "loss": 0.1564, "step": 49546 }, { "epoch": 0.8837263225484251, "grad_norm": 0.3489333987236023, "learning_rate": 2.030452720946613e-06, "loss": 0.0718, "step": 49547 }, { "epoch": 0.8837441586701388, "grad_norm": 0.18202170729637146, "learning_rate": 2.0298383098981467e-06, "loss": 0.0651, "step": 49548 }, { "epoch": 0.8837619947918525, "grad_norm": 0.28724801540374756, "learning_rate": 2.0292239878899554e-06, "loss": 0.0536, "step": 49549 }, { "epoch": 0.8837798309135662, "grad_norm": 0.2434171587228775, "learning_rate": 2.0286097549244293e-06, "loss": 0.0821, "step": 49550 }, { "epoch": 0.8837976670352798, "grad_norm": 0.2876301109790802, "learning_rate": 2.027995611003941e-06, "loss": 0.1101, "step": 49551 }, { "epoch": 0.8838155031569935, "grad_norm": 0.2827867865562439, "learning_rate": 2.027381556130881e-06, "loss": 0.1221, "step": 49552 }, { "epoch": 0.8838333392787072, "grad_norm": 0.3787000775337219, "learning_rate": 2.026767590307624e-06, "loss": 0.0967, "step": 49553 }, { "epoch": 0.8838511754004209, "grad_norm": 0.26926618814468384, "learning_rate": 2.026153713536552e-06, "loss": 0.1284, "step": 49554 }, { "epoch": 0.8838690115221346, "grad_norm": 0.40540242195129395, "learning_rate": 2.025539925820041e-06, "loss": 0.1276, "step": 49555 }, { "epoch": 0.8838868476438483, "grad_norm": 0.2444566935300827, "learning_rate": 2.0249262271604702e-06, "loss": 0.1183, "step": 49556 }, { "epoch": 0.883904683765562, "grad_norm": 0.26007384061813354, "learning_rate": 2.0243126175602256e-06, "loss": 0.1141, "step": 49557 }, { "epoch": 0.8839225198872757, "grad_norm": 0.3018735349178314, "learning_rate": 2.023699097021678e-06, "loss": 0.1161, "step": 49558 }, { "epoch": 0.8839403560089895, "grad_norm": 0.25285229086875916, "learning_rate": 2.0230856655472114e-06, "loss": 0.0698, "step": 49559 }, { "epoch": 0.8839581921307031, "grad_norm": 0.3195061981678009, "learning_rate": 2.0224723231391935e-06, "loss": 0.1111, "step": 49560 }, { "epoch": 0.8839760282524168, "grad_norm": 0.29395386576652527, "learning_rate": 2.021859069800017e-06, "loss": 0.1067, "step": 49561 }, { "epoch": 0.8839938643741305, "grad_norm": 0.37777024507522583, "learning_rate": 2.021245905532043e-06, "loss": 0.1095, "step": 49562 }, { "epoch": 0.8840117004958442, "grad_norm": 0.3774457573890686, "learning_rate": 2.0206328303376625e-06, "loss": 0.0861, "step": 49563 }, { "epoch": 0.8840295366175579, "grad_norm": 0.3174244165420532, "learning_rate": 2.0200198442192423e-06, "loss": 0.1127, "step": 49564 }, { "epoch": 0.8840473727392716, "grad_norm": 0.31690070033073425, "learning_rate": 2.0194069471791644e-06, "loss": 0.0829, "step": 49565 }, { "epoch": 0.8840652088609853, "grad_norm": 0.31819257140159607, "learning_rate": 2.018794139219804e-06, "loss": 0.1355, "step": 49566 }, { "epoch": 0.884083044982699, "grad_norm": 0.27593687176704407, "learning_rate": 2.0181814203435345e-06, "loss": 0.0952, "step": 49567 }, { "epoch": 0.8841008811044127, "grad_norm": 0.2703091502189636, "learning_rate": 2.017568790552732e-06, "loss": 0.1269, "step": 49568 }, { "epoch": 0.8841187172261263, "grad_norm": 0.2493617683649063, "learning_rate": 2.0169562498497662e-06, "loss": 0.0915, "step": 49569 }, { "epoch": 0.88413655334784, "grad_norm": 0.24354255199432373, "learning_rate": 2.016343798237022e-06, "loss": 0.1144, "step": 49570 }, { "epoch": 0.8841543894695537, "grad_norm": 0.26051583886146545, "learning_rate": 2.0157314357168637e-06, "loss": 0.0988, "step": 49571 }, { "epoch": 0.8841722255912674, "grad_norm": 0.25750574469566345, "learning_rate": 2.015119162291673e-06, "loss": 0.1034, "step": 49572 }, { "epoch": 0.8841900617129811, "grad_norm": 0.21313445270061493, "learning_rate": 2.014506977963812e-06, "loss": 0.0918, "step": 49573 }, { "epoch": 0.8842078978346948, "grad_norm": 0.24788883328437805, "learning_rate": 2.0138948827356673e-06, "loss": 0.1134, "step": 49574 }, { "epoch": 0.8842257339564085, "grad_norm": 0.24662795662879944, "learning_rate": 2.013282876609604e-06, "loss": 0.1408, "step": 49575 }, { "epoch": 0.8842435700781223, "grad_norm": 0.2726486325263977, "learning_rate": 2.0126709595879895e-06, "loss": 0.137, "step": 49576 }, { "epoch": 0.884261406199836, "grad_norm": 0.2955690622329712, "learning_rate": 2.012059131673205e-06, "loss": 0.1025, "step": 49577 }, { "epoch": 0.8842792423215496, "grad_norm": 0.3030845820903778, "learning_rate": 2.0114473928676212e-06, "loss": 0.1053, "step": 49578 }, { "epoch": 0.8842970784432633, "grad_norm": 0.31055718660354614, "learning_rate": 2.0108357431736088e-06, "loss": 0.1263, "step": 49579 }, { "epoch": 0.884314914564977, "grad_norm": 0.30920547246932983, "learning_rate": 2.010224182593537e-06, "loss": 0.1461, "step": 49580 }, { "epoch": 0.8843327506866907, "grad_norm": 0.24259616434574127, "learning_rate": 2.0096127111297767e-06, "loss": 0.1173, "step": 49581 }, { "epoch": 0.8843505868084044, "grad_norm": 0.24070823192596436, "learning_rate": 2.0090013287846933e-06, "loss": 0.0754, "step": 49582 }, { "epoch": 0.8843684229301181, "grad_norm": 0.5232437252998352, "learning_rate": 2.0083900355606673e-06, "loss": 0.1225, "step": 49583 }, { "epoch": 0.8843862590518318, "grad_norm": 0.24039065837860107, "learning_rate": 2.007778831460061e-06, "loss": 0.0843, "step": 49584 }, { "epoch": 0.8844040951735455, "grad_norm": 0.2232964187860489, "learning_rate": 2.0071677164852447e-06, "loss": 0.0677, "step": 49585 }, { "epoch": 0.8844219312952591, "grad_norm": 0.29047688841819763, "learning_rate": 2.0065566906385834e-06, "loss": 0.1175, "step": 49586 }, { "epoch": 0.8844397674169728, "grad_norm": 0.35257411003112793, "learning_rate": 2.0059457539224557e-06, "loss": 0.0974, "step": 49587 }, { "epoch": 0.8844576035386865, "grad_norm": 0.24554544687271118, "learning_rate": 2.0053349063392234e-06, "loss": 0.084, "step": 49588 }, { "epoch": 0.8844754396604002, "grad_norm": 0.2537404000759125, "learning_rate": 2.0047241478912515e-06, "loss": 0.1458, "step": 49589 }, { "epoch": 0.8844932757821139, "grad_norm": 0.21217772364616394, "learning_rate": 2.0041134785809155e-06, "loss": 0.0471, "step": 49590 }, { "epoch": 0.8845111119038276, "grad_norm": 0.33039751648902893, "learning_rate": 2.0035028984105726e-06, "loss": 0.1272, "step": 49591 }, { "epoch": 0.8845289480255413, "grad_norm": 0.23602797091007233, "learning_rate": 2.0028924073826006e-06, "loss": 0.0948, "step": 49592 }, { "epoch": 0.8845467841472551, "grad_norm": 0.2822903096675873, "learning_rate": 2.002282005499362e-06, "loss": 0.1908, "step": 49593 }, { "epoch": 0.8845646202689688, "grad_norm": 0.5448985695838928, "learning_rate": 2.0016716927632182e-06, "loss": 0.1655, "step": 49594 }, { "epoch": 0.8845824563906824, "grad_norm": 0.5512979626655579, "learning_rate": 2.001061469176538e-06, "loss": 0.1292, "step": 49595 }, { "epoch": 0.8846002925123961, "grad_norm": 0.25886738300323486, "learning_rate": 2.0004513347416875e-06, "loss": 0.135, "step": 49596 }, { "epoch": 0.8846181286341098, "grad_norm": 0.2702408730983734, "learning_rate": 1.9998412894610325e-06, "loss": 0.1394, "step": 49597 }, { "epoch": 0.8846359647558235, "grad_norm": 0.3085061311721802, "learning_rate": 1.9992313333369373e-06, "loss": 0.0831, "step": 49598 }, { "epoch": 0.8846538008775372, "grad_norm": 0.25254586338996887, "learning_rate": 1.998621466371761e-06, "loss": 0.0769, "step": 49599 }, { "epoch": 0.8846716369992509, "grad_norm": 0.326857328414917, "learning_rate": 1.9980116885678774e-06, "loss": 0.0902, "step": 49600 }, { "epoch": 0.8846894731209646, "grad_norm": 0.23103033006191254, "learning_rate": 1.997401999927645e-06, "loss": 0.1017, "step": 49601 }, { "epoch": 0.8847073092426783, "grad_norm": 0.31632867455482483, "learning_rate": 1.996792400453426e-06, "loss": 0.1181, "step": 49602 }, { "epoch": 0.884725145364392, "grad_norm": 0.23101124167442322, "learning_rate": 1.996182890147583e-06, "loss": 0.147, "step": 49603 }, { "epoch": 0.8847429814861056, "grad_norm": 0.25761860609054565, "learning_rate": 1.9955734690124766e-06, "loss": 0.1129, "step": 49604 }, { "epoch": 0.8847608176078193, "grad_norm": 0.3172912895679474, "learning_rate": 1.9949641370504812e-06, "loss": 0.1162, "step": 49605 }, { "epoch": 0.884778653729533, "grad_norm": 0.3307587504386902, "learning_rate": 1.9943548942639473e-06, "loss": 0.1402, "step": 49606 }, { "epoch": 0.8847964898512467, "grad_norm": 0.2567841410636902, "learning_rate": 1.9937457406552422e-06, "loss": 0.1215, "step": 49607 }, { "epoch": 0.8848143259729604, "grad_norm": 0.3045521378517151, "learning_rate": 1.9931366762267173e-06, "loss": 0.1255, "step": 49608 }, { "epoch": 0.8848321620946741, "grad_norm": 0.41305452585220337, "learning_rate": 1.992527700980748e-06, "loss": 0.1301, "step": 49609 }, { "epoch": 0.8848499982163879, "grad_norm": 0.3504682183265686, "learning_rate": 1.9919188149196883e-06, "loss": 0.1215, "step": 49610 }, { "epoch": 0.8848678343381016, "grad_norm": 0.2765824794769287, "learning_rate": 1.9913100180458944e-06, "loss": 0.0972, "step": 49611 }, { "epoch": 0.8848856704598153, "grad_norm": 0.2925173044204712, "learning_rate": 1.9907013103617285e-06, "loss": 0.1484, "step": 49612 }, { "epoch": 0.8849035065815289, "grad_norm": 0.3033662736415863, "learning_rate": 1.990092691869555e-06, "loss": 0.0994, "step": 49613 }, { "epoch": 0.8849213427032426, "grad_norm": 0.2038131058216095, "learning_rate": 1.989484162571731e-06, "loss": 0.0651, "step": 49614 }, { "epoch": 0.8849391788249563, "grad_norm": 0.2634255290031433, "learning_rate": 1.9888757224706123e-06, "loss": 0.0717, "step": 49615 }, { "epoch": 0.88495701494667, "grad_norm": 0.26705339550971985, "learning_rate": 1.9882673715685556e-06, "loss": 0.1119, "step": 49616 }, { "epoch": 0.8849748510683837, "grad_norm": 0.2884557247161865, "learning_rate": 1.987659109867926e-06, "loss": 0.1366, "step": 49617 }, { "epoch": 0.8849926871900974, "grad_norm": 0.2735777795314789, "learning_rate": 1.987050937371074e-06, "loss": 0.1289, "step": 49618 }, { "epoch": 0.8850105233118111, "grad_norm": 0.3681984543800354, "learning_rate": 1.9864428540803643e-06, "loss": 0.1294, "step": 49619 }, { "epoch": 0.8850283594335248, "grad_norm": 0.3934820890426636, "learning_rate": 1.985834859998151e-06, "loss": 0.0922, "step": 49620 }, { "epoch": 0.8850461955552384, "grad_norm": 0.211423859000206, "learning_rate": 1.985226955126787e-06, "loss": 0.1487, "step": 49621 }, { "epoch": 0.8850640316769521, "grad_norm": 0.25320059061050415, "learning_rate": 1.984619139468638e-06, "loss": 0.1318, "step": 49622 }, { "epoch": 0.8850818677986658, "grad_norm": 0.29037126898765564, "learning_rate": 1.984011413026052e-06, "loss": 0.0775, "step": 49623 }, { "epoch": 0.8850997039203795, "grad_norm": 0.2292759120464325, "learning_rate": 1.9834037758013903e-06, "loss": 0.0659, "step": 49624 }, { "epoch": 0.8851175400420932, "grad_norm": 0.28474709391593933, "learning_rate": 1.9827962277969986e-06, "loss": 0.1239, "step": 49625 }, { "epoch": 0.885135376163807, "grad_norm": 0.25914838910102844, "learning_rate": 1.982188769015242e-06, "loss": 0.079, "step": 49626 }, { "epoch": 0.8851532122855207, "grad_norm": 0.24624231457710266, "learning_rate": 1.9815813994584737e-06, "loss": 0.0898, "step": 49627 }, { "epoch": 0.8851710484072344, "grad_norm": 0.321963906288147, "learning_rate": 1.980974119129045e-06, "loss": 0.1025, "step": 49628 }, { "epoch": 0.8851888845289481, "grad_norm": 0.19781802594661713, "learning_rate": 1.980366928029309e-06, "loss": 0.1001, "step": 49629 }, { "epoch": 0.8852067206506617, "grad_norm": 0.2166374772787094, "learning_rate": 1.979759826161623e-06, "loss": 0.0802, "step": 49630 }, { "epoch": 0.8852245567723754, "grad_norm": 0.37161746621131897, "learning_rate": 1.979152813528337e-06, "loss": 0.1814, "step": 49631 }, { "epoch": 0.8852423928940891, "grad_norm": 0.25688329339027405, "learning_rate": 1.978545890131808e-06, "loss": 0.1009, "step": 49632 }, { "epoch": 0.8852602290158028, "grad_norm": 0.3030528128147125, "learning_rate": 1.977939055974387e-06, "loss": 0.1254, "step": 49633 }, { "epoch": 0.8852780651375165, "grad_norm": 0.22229984402656555, "learning_rate": 1.9773323110584217e-06, "loss": 0.1024, "step": 49634 }, { "epoch": 0.8852959012592302, "grad_norm": 0.22781424224376678, "learning_rate": 1.9767256553862746e-06, "loss": 0.0853, "step": 49635 }, { "epoch": 0.8853137373809439, "grad_norm": 0.3471640944480896, "learning_rate": 1.976119088960288e-06, "loss": 0.1532, "step": 49636 }, { "epoch": 0.8853315735026576, "grad_norm": 0.28439220786094666, "learning_rate": 1.9755126117828178e-06, "loss": 0.0758, "step": 49637 }, { "epoch": 0.8853494096243713, "grad_norm": 0.3464740812778473, "learning_rate": 1.9749062238562073e-06, "loss": 0.1156, "step": 49638 }, { "epoch": 0.8853672457460849, "grad_norm": 0.320024698972702, "learning_rate": 1.974299925182818e-06, "loss": 0.0901, "step": 49639 }, { "epoch": 0.8853850818677986, "grad_norm": 0.265421062707901, "learning_rate": 1.973693715764996e-06, "loss": 0.136, "step": 49640 }, { "epoch": 0.8854029179895123, "grad_norm": 0.2583046555519104, "learning_rate": 1.973087595605089e-06, "loss": 0.0745, "step": 49641 }, { "epoch": 0.885420754111226, "grad_norm": 0.3214467465877533, "learning_rate": 1.972481564705442e-06, "loss": 0.0806, "step": 49642 }, { "epoch": 0.8854385902329398, "grad_norm": 0.19599206745624542, "learning_rate": 1.9718756230684173e-06, "loss": 0.0936, "step": 49643 }, { "epoch": 0.8854564263546535, "grad_norm": 0.28757527470588684, "learning_rate": 1.9712697706963546e-06, "loss": 0.1146, "step": 49644 }, { "epoch": 0.8854742624763672, "grad_norm": 0.36969393491744995, "learning_rate": 1.9706640075915996e-06, "loss": 0.1379, "step": 49645 }, { "epoch": 0.8854920985980809, "grad_norm": 0.2446933537721634, "learning_rate": 1.9700583337565084e-06, "loss": 0.0949, "step": 49646 }, { "epoch": 0.8855099347197946, "grad_norm": 0.28313106298446655, "learning_rate": 1.9694527491934233e-06, "loss": 0.1198, "step": 49647 }, { "epoch": 0.8855277708415082, "grad_norm": 0.2282789945602417, "learning_rate": 1.968847253904696e-06, "loss": 0.087, "step": 49648 }, { "epoch": 0.8855456069632219, "grad_norm": 0.1931321620941162, "learning_rate": 1.968241847892674e-06, "loss": 0.0743, "step": 49649 }, { "epoch": 0.8855634430849356, "grad_norm": 0.34312137961387634, "learning_rate": 1.9676365311597e-06, "loss": 0.1206, "step": 49650 }, { "epoch": 0.8855812792066493, "grad_norm": 0.2022121548652649, "learning_rate": 1.9670313037081167e-06, "loss": 0.0591, "step": 49651 }, { "epoch": 0.885599115328363, "grad_norm": 0.26006537675857544, "learning_rate": 1.9664261655402807e-06, "loss": 0.0965, "step": 49652 }, { "epoch": 0.8856169514500767, "grad_norm": 0.318836510181427, "learning_rate": 1.965821116658534e-06, "loss": 0.1165, "step": 49653 }, { "epoch": 0.8856347875717904, "grad_norm": 0.3420856297016144, "learning_rate": 1.9652161570652168e-06, "loss": 0.1039, "step": 49654 }, { "epoch": 0.8856526236935041, "grad_norm": 0.2644520699977875, "learning_rate": 1.9646112867626772e-06, "loss": 0.1108, "step": 49655 }, { "epoch": 0.8856704598152177, "grad_norm": 0.2115037888288498, "learning_rate": 1.964006505753263e-06, "loss": 0.0865, "step": 49656 }, { "epoch": 0.8856882959369314, "grad_norm": 0.2404254823923111, "learning_rate": 1.9634018140393148e-06, "loss": 0.1002, "step": 49657 }, { "epoch": 0.8857061320586451, "grad_norm": 0.3011566996574402, "learning_rate": 1.9627972116231797e-06, "loss": 0.0731, "step": 49658 }, { "epoch": 0.8857239681803588, "grad_norm": 0.35265228152275085, "learning_rate": 1.9621926985071953e-06, "loss": 0.0777, "step": 49659 }, { "epoch": 0.8857418043020726, "grad_norm": 0.29091161489486694, "learning_rate": 1.961588274693707e-06, "loss": 0.2177, "step": 49660 }, { "epoch": 0.8857596404237863, "grad_norm": 0.22855977714061737, "learning_rate": 1.9609839401850654e-06, "loss": 0.1001, "step": 49661 }, { "epoch": 0.8857774765455, "grad_norm": 0.2923247516155243, "learning_rate": 1.960379694983608e-06, "loss": 0.1337, "step": 49662 }, { "epoch": 0.8857953126672137, "grad_norm": 0.2918730080127716, "learning_rate": 1.9597755390916737e-06, "loss": 0.1189, "step": 49663 }, { "epoch": 0.8858131487889274, "grad_norm": 0.27331048250198364, "learning_rate": 1.9591714725116063e-06, "loss": 0.1208, "step": 49664 }, { "epoch": 0.885830984910641, "grad_norm": 0.2466292381286621, "learning_rate": 1.95856749524575e-06, "loss": 0.121, "step": 49665 }, { "epoch": 0.8858488210323547, "grad_norm": 0.26272016763687134, "learning_rate": 1.9579636072964454e-06, "loss": 0.118, "step": 49666 }, { "epoch": 0.8858666571540684, "grad_norm": 0.2634202241897583, "learning_rate": 1.9573598086660343e-06, "loss": 0.142, "step": 49667 }, { "epoch": 0.8858844932757821, "grad_norm": 0.24630270898342133, "learning_rate": 1.956756099356849e-06, "loss": 0.0998, "step": 49668 }, { "epoch": 0.8859023293974958, "grad_norm": 0.2468818575143814, "learning_rate": 1.95615247937124e-06, "loss": 0.107, "step": 49669 }, { "epoch": 0.8859201655192095, "grad_norm": 0.2564699053764343, "learning_rate": 1.955548948711544e-06, "loss": 0.0979, "step": 49670 }, { "epoch": 0.8859380016409232, "grad_norm": 0.32016000151634216, "learning_rate": 1.9549455073800985e-06, "loss": 0.185, "step": 49671 }, { "epoch": 0.8859558377626369, "grad_norm": 0.3354667127132416, "learning_rate": 1.9543421553792406e-06, "loss": 0.124, "step": 49672 }, { "epoch": 0.8859736738843506, "grad_norm": 0.27343663573265076, "learning_rate": 1.9537388927113155e-06, "loss": 0.1356, "step": 49673 }, { "epoch": 0.8859915100060642, "grad_norm": 0.2568040192127228, "learning_rate": 1.9531357193786543e-06, "loss": 0.0816, "step": 49674 }, { "epoch": 0.8860093461277779, "grad_norm": 0.2814258635044098, "learning_rate": 1.952532635383603e-06, "loss": 0.1203, "step": 49675 }, { "epoch": 0.8860271822494916, "grad_norm": 0.33536139130592346, "learning_rate": 1.9519296407284977e-06, "loss": 0.1241, "step": 49676 }, { "epoch": 0.8860450183712054, "grad_norm": 0.27339211106300354, "learning_rate": 1.951326735415668e-06, "loss": 0.1398, "step": 49677 }, { "epoch": 0.8860628544929191, "grad_norm": 0.25292903184890747, "learning_rate": 1.9507239194474586e-06, "loss": 0.1206, "step": 49678 }, { "epoch": 0.8860806906146328, "grad_norm": 0.23713791370391846, "learning_rate": 1.950121192826207e-06, "loss": 0.1253, "step": 49679 }, { "epoch": 0.8860985267363465, "grad_norm": 0.2483091503381729, "learning_rate": 1.9495185555542473e-06, "loss": 0.1199, "step": 49680 }, { "epoch": 0.8861163628580602, "grad_norm": 0.28200775384902954, "learning_rate": 1.948916007633911e-06, "loss": 0.1303, "step": 49681 }, { "epoch": 0.8861341989797739, "grad_norm": 0.2500542104244232, "learning_rate": 1.9483135490675403e-06, "loss": 0.0846, "step": 49682 }, { "epoch": 0.8861520351014875, "grad_norm": 0.28934359550476074, "learning_rate": 1.947711179857467e-06, "loss": 0.1507, "step": 49683 }, { "epoch": 0.8861698712232012, "grad_norm": 0.2448359727859497, "learning_rate": 1.9471089000060284e-06, "loss": 0.0759, "step": 49684 }, { "epoch": 0.8861877073449149, "grad_norm": 0.23450268805027008, "learning_rate": 1.946506709515561e-06, "loss": 0.1099, "step": 49685 }, { "epoch": 0.8862055434666286, "grad_norm": 0.28258439898490906, "learning_rate": 1.945904608388388e-06, "loss": 0.1198, "step": 49686 }, { "epoch": 0.8862233795883423, "grad_norm": 0.2995196580886841, "learning_rate": 1.945302596626852e-06, "loss": 0.0979, "step": 49687 }, { "epoch": 0.886241215710056, "grad_norm": 0.3189823627471924, "learning_rate": 1.9447006742332906e-06, "loss": 0.1327, "step": 49688 }, { "epoch": 0.8862590518317697, "grad_norm": 0.279433012008667, "learning_rate": 1.944098841210032e-06, "loss": 0.1084, "step": 49689 }, { "epoch": 0.8862768879534834, "grad_norm": 0.24563564360141754, "learning_rate": 1.9434970975594073e-06, "loss": 0.1216, "step": 49690 }, { "epoch": 0.886294724075197, "grad_norm": 0.22916372120380402, "learning_rate": 1.942895443283754e-06, "loss": 0.1335, "step": 49691 }, { "epoch": 0.8863125601969107, "grad_norm": 0.27623093128204346, "learning_rate": 1.9422938783854013e-06, "loss": 0.1254, "step": 49692 }, { "epoch": 0.8863303963186244, "grad_norm": 0.26458650827407837, "learning_rate": 1.941692402866682e-06, "loss": 0.1103, "step": 49693 }, { "epoch": 0.8863482324403382, "grad_norm": 0.2947564125061035, "learning_rate": 1.9410910167299234e-06, "loss": 0.127, "step": 49694 }, { "epoch": 0.8863660685620519, "grad_norm": 0.27672770619392395, "learning_rate": 1.940489719977462e-06, "loss": 0.0956, "step": 49695 }, { "epoch": 0.8863839046837656, "grad_norm": 0.29894351959228516, "learning_rate": 1.939888512611629e-06, "loss": 0.129, "step": 49696 }, { "epoch": 0.8864017408054793, "grad_norm": 0.33805254101753235, "learning_rate": 1.9392873946347534e-06, "loss": 0.1153, "step": 49697 }, { "epoch": 0.886419576927193, "grad_norm": 0.1979510486125946, "learning_rate": 1.938686366049164e-06, "loss": 0.0568, "step": 49698 }, { "epoch": 0.8864374130489067, "grad_norm": 0.32126984000205994, "learning_rate": 1.9380854268571863e-06, "loss": 0.0763, "step": 49699 }, { "epoch": 0.8864552491706204, "grad_norm": 0.262399286031723, "learning_rate": 1.9374845770611604e-06, "loss": 0.0993, "step": 49700 }, { "epoch": 0.886473085292334, "grad_norm": 0.34093043208122253, "learning_rate": 1.936883816663404e-06, "loss": 0.0907, "step": 49701 }, { "epoch": 0.8864909214140477, "grad_norm": 0.2571010887622833, "learning_rate": 1.936283145666257e-06, "loss": 0.1206, "step": 49702 }, { "epoch": 0.8865087575357614, "grad_norm": 0.24466572701931, "learning_rate": 1.9356825640720387e-06, "loss": 0.1031, "step": 49703 }, { "epoch": 0.8865265936574751, "grad_norm": 0.28962230682373047, "learning_rate": 1.9350820718830846e-06, "loss": 0.174, "step": 49704 }, { "epoch": 0.8865444297791888, "grad_norm": 0.25840824842453003, "learning_rate": 1.9344816691017174e-06, "loss": 0.1122, "step": 49705 }, { "epoch": 0.8865622659009025, "grad_norm": 0.46787112951278687, "learning_rate": 1.9338813557302687e-06, "loss": 0.1541, "step": 49706 }, { "epoch": 0.8865801020226162, "grad_norm": 0.2818623483181, "learning_rate": 1.933281131771056e-06, "loss": 0.0582, "step": 49707 }, { "epoch": 0.8865979381443299, "grad_norm": 0.20159101486206055, "learning_rate": 1.932680997226419e-06, "loss": 0.0972, "step": 49708 }, { "epoch": 0.8866157742660435, "grad_norm": 0.23572859168052673, "learning_rate": 1.9320809520986747e-06, "loss": 0.0931, "step": 49709 }, { "epoch": 0.8866336103877572, "grad_norm": 0.2375248372554779, "learning_rate": 1.9314809963901533e-06, "loss": 0.1032, "step": 49710 }, { "epoch": 0.886651446509471, "grad_norm": 0.35562703013420105, "learning_rate": 1.930881130103182e-06, "loss": 0.1757, "step": 49711 }, { "epoch": 0.8866692826311847, "grad_norm": 0.24101747572422028, "learning_rate": 1.9302813532400766e-06, "loss": 0.0856, "step": 49712 }, { "epoch": 0.8866871187528984, "grad_norm": 0.24090975522994995, "learning_rate": 1.929681665803171e-06, "loss": 0.0547, "step": 49713 }, { "epoch": 0.8867049548746121, "grad_norm": 0.43578094244003296, "learning_rate": 1.929082067794785e-06, "loss": 0.1828, "step": 49714 }, { "epoch": 0.8867227909963258, "grad_norm": 0.26033157110214233, "learning_rate": 1.928482559217251e-06, "loss": 0.0763, "step": 49715 }, { "epoch": 0.8867406271180395, "grad_norm": 0.2312462329864502, "learning_rate": 1.927883140072881e-06, "loss": 0.1303, "step": 49716 }, { "epoch": 0.8867584632397532, "grad_norm": 0.2216435670852661, "learning_rate": 1.9272838103640112e-06, "loss": 0.1238, "step": 49717 }, { "epoch": 0.8867762993614668, "grad_norm": 0.3196179270744324, "learning_rate": 1.926684570092957e-06, "loss": 0.1292, "step": 49718 }, { "epoch": 0.8867941354831805, "grad_norm": 0.28491735458374023, "learning_rate": 1.9260854192620413e-06, "loss": 0.1087, "step": 49719 }, { "epoch": 0.8868119716048942, "grad_norm": 0.31764113903045654, "learning_rate": 1.925486357873585e-06, "loss": 0.1288, "step": 49720 }, { "epoch": 0.8868298077266079, "grad_norm": 0.32463398575782776, "learning_rate": 1.924887385929919e-06, "loss": 0.1125, "step": 49721 }, { "epoch": 0.8868476438483216, "grad_norm": 0.20111265778541565, "learning_rate": 1.924288503433358e-06, "loss": 0.0487, "step": 49722 }, { "epoch": 0.8868654799700353, "grad_norm": 0.3146669268608093, "learning_rate": 1.9236897103862255e-06, "loss": 0.0787, "step": 49723 }, { "epoch": 0.886883316091749, "grad_norm": 0.2350408136844635, "learning_rate": 1.923091006790839e-06, "loss": 0.1128, "step": 49724 }, { "epoch": 0.8869011522134627, "grad_norm": 0.2963438630104065, "learning_rate": 1.922492392649522e-06, "loss": 0.1082, "step": 49725 }, { "epoch": 0.8869189883351764, "grad_norm": 0.28236159682273865, "learning_rate": 1.921893867964597e-06, "loss": 0.1624, "step": 49726 }, { "epoch": 0.88693682445689, "grad_norm": 0.2680119276046753, "learning_rate": 1.9212954327383845e-06, "loss": 0.0966, "step": 49727 }, { "epoch": 0.8869546605786038, "grad_norm": 0.32395491003990173, "learning_rate": 1.9206970869731946e-06, "loss": 0.163, "step": 49728 }, { "epoch": 0.8869724967003175, "grad_norm": 0.32477495074272156, "learning_rate": 1.9200988306713603e-06, "loss": 0.0899, "step": 49729 }, { "epoch": 0.8869903328220312, "grad_norm": 0.31545814871788025, "learning_rate": 1.9195006638351914e-06, "loss": 0.129, "step": 49730 }, { "epoch": 0.8870081689437449, "grad_norm": 0.2848271429538727, "learning_rate": 1.9189025864670114e-06, "loss": 0.1387, "step": 49731 }, { "epoch": 0.8870260050654586, "grad_norm": 0.2928328514099121, "learning_rate": 1.91830459856914e-06, "loss": 0.137, "step": 49732 }, { "epoch": 0.8870438411871723, "grad_norm": 0.3611222207546234, "learning_rate": 1.9177067001438842e-06, "loss": 0.1216, "step": 49733 }, { "epoch": 0.887061677308886, "grad_norm": 0.22243250906467438, "learning_rate": 1.9171088911935754e-06, "loss": 0.1185, "step": 49734 }, { "epoch": 0.8870795134305997, "grad_norm": 0.28262144327163696, "learning_rate": 1.9165111717205254e-06, "loss": 0.0904, "step": 49735 }, { "epoch": 0.8870973495523133, "grad_norm": 0.3102903664112091, "learning_rate": 1.915913541727052e-06, "loss": 0.1132, "step": 49736 }, { "epoch": 0.887115185674027, "grad_norm": 0.28757914900779724, "learning_rate": 1.9153160012154695e-06, "loss": 0.1222, "step": 49737 }, { "epoch": 0.8871330217957407, "grad_norm": 0.26365897059440613, "learning_rate": 1.914718550188091e-06, "loss": 0.1, "step": 49738 }, { "epoch": 0.8871508579174544, "grad_norm": 0.26044753193855286, "learning_rate": 1.9141211886472414e-06, "loss": 0.0804, "step": 49739 }, { "epoch": 0.8871686940391681, "grad_norm": 0.3047908544540405, "learning_rate": 1.9135239165952307e-06, "loss": 0.1395, "step": 49740 }, { "epoch": 0.8871865301608818, "grad_norm": 0.19373618066310883, "learning_rate": 1.912926734034373e-06, "loss": 0.0867, "step": 49741 }, { "epoch": 0.8872043662825955, "grad_norm": 0.27001887559890747, "learning_rate": 1.9123296409669896e-06, "loss": 0.0945, "step": 49742 }, { "epoch": 0.8872222024043092, "grad_norm": 0.2431207150220871, "learning_rate": 1.911732637395383e-06, "loss": 0.112, "step": 49743 }, { "epoch": 0.887240038526023, "grad_norm": 0.24677345156669617, "learning_rate": 1.911135723321883e-06, "loss": 0.0821, "step": 49744 }, { "epoch": 0.8872578746477366, "grad_norm": 0.19661147892475128, "learning_rate": 1.910538898748793e-06, "loss": 0.0723, "step": 49745 }, { "epoch": 0.8872757107694503, "grad_norm": 0.32297438383102417, "learning_rate": 1.909942163678427e-06, "loss": 0.1247, "step": 49746 }, { "epoch": 0.887293546891164, "grad_norm": 0.24456030130386353, "learning_rate": 1.9093455181131037e-06, "loss": 0.0957, "step": 49747 }, { "epoch": 0.8873113830128777, "grad_norm": 0.23744532465934753, "learning_rate": 1.9087489620551317e-06, "loss": 0.0984, "step": 49748 }, { "epoch": 0.8873292191345914, "grad_norm": 0.3174133598804474, "learning_rate": 1.908152495506824e-06, "loss": 0.0752, "step": 49749 }, { "epoch": 0.8873470552563051, "grad_norm": 0.2902401387691498, "learning_rate": 1.9075561184704938e-06, "loss": 0.0904, "step": 49750 }, { "epoch": 0.8873648913780188, "grad_norm": 0.24518780410289764, "learning_rate": 1.906959830948446e-06, "loss": 0.061, "step": 49751 }, { "epoch": 0.8873827274997325, "grad_norm": 0.33072373270988464, "learning_rate": 1.9063636329430036e-06, "loss": 0.1131, "step": 49752 }, { "epoch": 0.8874005636214461, "grad_norm": 0.2066785991191864, "learning_rate": 1.9057675244564726e-06, "loss": 0.0564, "step": 49753 }, { "epoch": 0.8874183997431598, "grad_norm": 0.2651154398918152, "learning_rate": 1.905171505491163e-06, "loss": 0.122, "step": 49754 }, { "epoch": 0.8874362358648735, "grad_norm": 0.29715126752853394, "learning_rate": 1.9045755760493806e-06, "loss": 0.1041, "step": 49755 }, { "epoch": 0.8874540719865872, "grad_norm": 0.2227640002965927, "learning_rate": 1.903979736133446e-06, "loss": 0.0977, "step": 49756 }, { "epoch": 0.8874719081083009, "grad_norm": 0.22558589279651642, "learning_rate": 1.9033839857456576e-06, "loss": 0.0361, "step": 49757 }, { "epoch": 0.8874897442300146, "grad_norm": 0.23243005573749542, "learning_rate": 1.9027883248883326e-06, "loss": 0.1216, "step": 49758 }, { "epoch": 0.8875075803517283, "grad_norm": 0.2683306336402893, "learning_rate": 1.9021927535637752e-06, "loss": 0.1068, "step": 49759 }, { "epoch": 0.887525416473442, "grad_norm": 0.243260458111763, "learning_rate": 1.9015972717742998e-06, "loss": 0.0803, "step": 49760 }, { "epoch": 0.8875432525951558, "grad_norm": 0.1841120570898056, "learning_rate": 1.9010018795222128e-06, "loss": 0.0897, "step": 49761 }, { "epoch": 0.8875610887168695, "grad_norm": 0.2340327501296997, "learning_rate": 1.9004065768098183e-06, "loss": 0.1119, "step": 49762 }, { "epoch": 0.8875789248385831, "grad_norm": 0.2835519313812256, "learning_rate": 1.8998113636394282e-06, "loss": 0.1577, "step": 49763 }, { "epoch": 0.8875967609602968, "grad_norm": 0.23356936872005463, "learning_rate": 1.8992162400133435e-06, "loss": 0.1539, "step": 49764 }, { "epoch": 0.8876145970820105, "grad_norm": 0.2503810524940491, "learning_rate": 1.8986212059338787e-06, "loss": 0.1276, "step": 49765 }, { "epoch": 0.8876324332037242, "grad_norm": 0.2631121873855591, "learning_rate": 1.8980262614033378e-06, "loss": 0.1181, "step": 49766 }, { "epoch": 0.8876502693254379, "grad_norm": 0.27552226185798645, "learning_rate": 1.8974314064240273e-06, "loss": 0.1611, "step": 49767 }, { "epoch": 0.8876681054471516, "grad_norm": 0.2293064147233963, "learning_rate": 1.8968366409982452e-06, "loss": 0.1058, "step": 49768 }, { "epoch": 0.8876859415688653, "grad_norm": 0.3883008658885956, "learning_rate": 1.8962419651283092e-06, "loss": 0.1064, "step": 49769 }, { "epoch": 0.887703777690579, "grad_norm": 0.27328699827194214, "learning_rate": 1.8956473788165146e-06, "loss": 0.1098, "step": 49770 }, { "epoch": 0.8877216138122926, "grad_norm": 0.23133574426174164, "learning_rate": 1.8950528820651765e-06, "loss": 0.1086, "step": 49771 }, { "epoch": 0.8877394499340063, "grad_norm": 0.21188603341579437, "learning_rate": 1.8944584748765897e-06, "loss": 0.1182, "step": 49772 }, { "epoch": 0.88775728605572, "grad_norm": 0.29569345712661743, "learning_rate": 1.893864157253064e-06, "loss": 0.0967, "step": 49773 }, { "epoch": 0.8877751221774337, "grad_norm": 0.23778888583183289, "learning_rate": 1.893269929196903e-06, "loss": 0.0838, "step": 49774 }, { "epoch": 0.8877929582991474, "grad_norm": 0.2736237645149231, "learning_rate": 1.8926757907104075e-06, "loss": 0.133, "step": 49775 }, { "epoch": 0.8878107944208611, "grad_norm": 0.3804624676704407, "learning_rate": 1.8920817417958815e-06, "loss": 0.0919, "step": 49776 }, { "epoch": 0.8878286305425748, "grad_norm": 0.36116495728492737, "learning_rate": 1.8914877824556253e-06, "loss": 0.1129, "step": 49777 }, { "epoch": 0.8878464666642886, "grad_norm": 0.3025190830230713, "learning_rate": 1.890893912691949e-06, "loss": 0.1434, "step": 49778 }, { "epoch": 0.8878643027860023, "grad_norm": 0.24519546329975128, "learning_rate": 1.8903001325071474e-06, "loss": 0.074, "step": 49779 }, { "epoch": 0.887882138907716, "grad_norm": 0.2681308686733246, "learning_rate": 1.8897064419035244e-06, "loss": 0.129, "step": 49780 }, { "epoch": 0.8878999750294296, "grad_norm": 0.29937997460365295, "learning_rate": 1.8891128408833781e-06, "loss": 0.119, "step": 49781 }, { "epoch": 0.8879178111511433, "grad_norm": 0.26641565561294556, "learning_rate": 1.888519329449015e-06, "loss": 0.0603, "step": 49782 }, { "epoch": 0.887935647272857, "grad_norm": 0.3053593039512634, "learning_rate": 1.8879259076027334e-06, "loss": 0.0512, "step": 49783 }, { "epoch": 0.8879534833945707, "grad_norm": 0.28969714045524597, "learning_rate": 1.8873325753468312e-06, "loss": 0.1614, "step": 49784 }, { "epoch": 0.8879713195162844, "grad_norm": 0.18694236874580383, "learning_rate": 1.8867393326836096e-06, "loss": 0.136, "step": 49785 }, { "epoch": 0.8879891556379981, "grad_norm": 0.34125542640686035, "learning_rate": 1.886146179615375e-06, "loss": 0.1472, "step": 49786 }, { "epoch": 0.8880069917597118, "grad_norm": 0.29271045327186584, "learning_rate": 1.8855531161444201e-06, "loss": 0.1105, "step": 49787 }, { "epoch": 0.8880248278814254, "grad_norm": 0.31370100378990173, "learning_rate": 1.8849601422730457e-06, "loss": 0.1024, "step": 49788 }, { "epoch": 0.8880426640031391, "grad_norm": 0.29369911551475525, "learning_rate": 1.8843672580035498e-06, "loss": 0.0819, "step": 49789 }, { "epoch": 0.8880605001248528, "grad_norm": 0.22341814637184143, "learning_rate": 1.8837744633382227e-06, "loss": 0.0858, "step": 49790 }, { "epoch": 0.8880783362465665, "grad_norm": 0.3277416229248047, "learning_rate": 1.8831817582793787e-06, "loss": 0.1244, "step": 49791 }, { "epoch": 0.8880961723682802, "grad_norm": 0.20853938162326813, "learning_rate": 1.8825891428293024e-06, "loss": 0.0943, "step": 49792 }, { "epoch": 0.8881140084899939, "grad_norm": 0.2934702932834625, "learning_rate": 1.8819966169902975e-06, "loss": 0.08, "step": 49793 }, { "epoch": 0.8881318446117076, "grad_norm": 0.3217245936393738, "learning_rate": 1.8814041807646537e-06, "loss": 0.1128, "step": 49794 }, { "epoch": 0.8881496807334214, "grad_norm": 0.2448878288269043, "learning_rate": 1.8808118341546748e-06, "loss": 0.1118, "step": 49795 }, { "epoch": 0.8881675168551351, "grad_norm": 0.3173161745071411, "learning_rate": 1.8802195771626564e-06, "loss": 0.0936, "step": 49796 }, { "epoch": 0.8881853529768488, "grad_norm": 0.26010164618492126, "learning_rate": 1.8796274097908878e-06, "loss": 0.1112, "step": 49797 }, { "epoch": 0.8882031890985624, "grad_norm": 0.29174935817718506, "learning_rate": 1.8790353320416732e-06, "loss": 0.1345, "step": 49798 }, { "epoch": 0.8882210252202761, "grad_norm": 0.23360399901866913, "learning_rate": 1.8784433439172993e-06, "loss": 0.1442, "step": 49799 }, { "epoch": 0.8882388613419898, "grad_norm": 0.28030991554260254, "learning_rate": 1.8778514454200674e-06, "loss": 0.1404, "step": 49800 }, { "epoch": 0.8882566974637035, "grad_norm": 0.3899186849594116, "learning_rate": 1.8772596365522726e-06, "loss": 0.0906, "step": 49801 }, { "epoch": 0.8882745335854172, "grad_norm": 0.195840984582901, "learning_rate": 1.8766679173162022e-06, "loss": 0.1261, "step": 49802 }, { "epoch": 0.8882923697071309, "grad_norm": 0.3473118245601654, "learning_rate": 1.8760762877141512e-06, "loss": 0.085, "step": 49803 }, { "epoch": 0.8883102058288446, "grad_norm": 0.29280713200569153, "learning_rate": 1.8754847477484183e-06, "loss": 0.1048, "step": 49804 }, { "epoch": 0.8883280419505583, "grad_norm": 0.33813855051994324, "learning_rate": 1.8748932974212957e-06, "loss": 0.1208, "step": 49805 }, { "epoch": 0.8883458780722719, "grad_norm": 0.3542816638946533, "learning_rate": 1.8743019367350707e-06, "loss": 0.1151, "step": 49806 }, { "epoch": 0.8883637141939856, "grad_norm": 0.2478209286928177, "learning_rate": 1.8737106656920384e-06, "loss": 0.0671, "step": 49807 }, { "epoch": 0.8883815503156993, "grad_norm": 0.3592206835746765, "learning_rate": 1.8731194842944916e-06, "loss": 0.1244, "step": 49808 }, { "epoch": 0.888399386437413, "grad_norm": 0.2791474759578705, "learning_rate": 1.8725283925447228e-06, "loss": 0.1256, "step": 49809 }, { "epoch": 0.8884172225591267, "grad_norm": 0.22975216805934906, "learning_rate": 1.8719373904450221e-06, "loss": 0.0943, "step": 49810 }, { "epoch": 0.8884350586808404, "grad_norm": 0.22240890562534332, "learning_rate": 1.8713464779976764e-06, "loss": 0.1485, "step": 49811 }, { "epoch": 0.8884528948025542, "grad_norm": 0.18733976781368256, "learning_rate": 1.870755655204981e-06, "loss": 0.0463, "step": 49812 }, { "epoch": 0.8884707309242679, "grad_norm": 0.2414817214012146, "learning_rate": 1.8701649220692286e-06, "loss": 0.0548, "step": 49813 }, { "epoch": 0.8884885670459816, "grad_norm": 0.2640984356403351, "learning_rate": 1.8695742785927062e-06, "loss": 0.1102, "step": 49814 }, { "epoch": 0.8885064031676952, "grad_norm": 0.2612643539905548, "learning_rate": 1.8689837247776953e-06, "loss": 0.1425, "step": 49815 }, { "epoch": 0.8885242392894089, "grad_norm": 0.29173120856285095, "learning_rate": 1.8683932606264998e-06, "loss": 0.0877, "step": 49816 }, { "epoch": 0.8885420754111226, "grad_norm": 0.23039419949054718, "learning_rate": 1.867802886141401e-06, "loss": 0.0504, "step": 49817 }, { "epoch": 0.8885599115328363, "grad_norm": 0.2941480875015259, "learning_rate": 1.8672126013246889e-06, "loss": 0.0936, "step": 49818 }, { "epoch": 0.88857774765455, "grad_norm": 0.2458086758852005, "learning_rate": 1.8666224061786503e-06, "loss": 0.0806, "step": 49819 }, { "epoch": 0.8885955837762637, "grad_norm": 0.22181279957294464, "learning_rate": 1.8660323007055669e-06, "loss": 0.0974, "step": 49820 }, { "epoch": 0.8886134198979774, "grad_norm": 0.32111454010009766, "learning_rate": 1.8654422849077396e-06, "loss": 0.113, "step": 49821 }, { "epoch": 0.8886312560196911, "grad_norm": 0.31020745635032654, "learning_rate": 1.8648523587874473e-06, "loss": 0.0952, "step": 49822 }, { "epoch": 0.8886490921414048, "grad_norm": 0.2366929054260254, "learning_rate": 1.8642625223469768e-06, "loss": 0.0791, "step": 49823 }, { "epoch": 0.8886669282631184, "grad_norm": 0.26705101132392883, "learning_rate": 1.8636727755886124e-06, "loss": 0.1114, "step": 49824 }, { "epoch": 0.8886847643848321, "grad_norm": 0.6369178295135498, "learning_rate": 1.8630831185146468e-06, "loss": 0.1415, "step": 49825 }, { "epoch": 0.8887026005065458, "grad_norm": 0.273028165102005, "learning_rate": 1.862493551127359e-06, "loss": 0.0813, "step": 49826 }, { "epoch": 0.8887204366282595, "grad_norm": 0.25852999091148376, "learning_rate": 1.861904073429041e-06, "loss": 0.1233, "step": 49827 }, { "epoch": 0.8887382727499732, "grad_norm": 0.28221389651298523, "learning_rate": 1.861314685421972e-06, "loss": 0.0647, "step": 49828 }, { "epoch": 0.888756108871687, "grad_norm": 0.3221103847026825, "learning_rate": 1.8607253871084417e-06, "loss": 0.0769, "step": 49829 }, { "epoch": 0.8887739449934007, "grad_norm": 0.2946473956108093, "learning_rate": 1.8601361784907316e-06, "loss": 0.1326, "step": 49830 }, { "epoch": 0.8887917811151144, "grad_norm": 0.20750118792057037, "learning_rate": 1.8595470595711262e-06, "loss": 0.1045, "step": 49831 }, { "epoch": 0.888809617236828, "grad_norm": 0.3170025944709778, "learning_rate": 1.8589580303519095e-06, "loss": 0.1472, "step": 49832 }, { "epoch": 0.8888274533585417, "grad_norm": 0.3039397597312927, "learning_rate": 1.8583690908353602e-06, "loss": 0.1286, "step": 49833 }, { "epoch": 0.8888452894802554, "grad_norm": 0.33231019973754883, "learning_rate": 1.8577802410237682e-06, "loss": 0.1122, "step": 49834 }, { "epoch": 0.8888631256019691, "grad_norm": 0.3095668852329254, "learning_rate": 1.8571914809194153e-06, "loss": 0.0919, "step": 49835 }, { "epoch": 0.8888809617236828, "grad_norm": 0.33183181285858154, "learning_rate": 1.8566028105245798e-06, "loss": 0.1291, "step": 49836 }, { "epoch": 0.8888987978453965, "grad_norm": 0.2880662679672241, "learning_rate": 1.8560142298415405e-06, "loss": 0.1152, "step": 49837 }, { "epoch": 0.8889166339671102, "grad_norm": 0.31454232335090637, "learning_rate": 1.8554257388725875e-06, "loss": 0.0971, "step": 49838 }, { "epoch": 0.8889344700888239, "grad_norm": 0.677558422088623, "learning_rate": 1.8548373376199996e-06, "loss": 0.1097, "step": 49839 }, { "epoch": 0.8889523062105376, "grad_norm": 0.30070415139198303, "learning_rate": 1.8542490260860523e-06, "loss": 0.1119, "step": 49840 }, { "epoch": 0.8889701423322512, "grad_norm": 0.2502022385597229, "learning_rate": 1.8536608042730303e-06, "loss": 0.0986, "step": 49841 }, { "epoch": 0.8889879784539649, "grad_norm": 0.2670758068561554, "learning_rate": 1.8530726721832148e-06, "loss": 0.1181, "step": 49842 }, { "epoch": 0.8890058145756786, "grad_norm": 0.25057753920555115, "learning_rate": 1.8524846298188874e-06, "loss": 0.1037, "step": 49843 }, { "epoch": 0.8890236506973923, "grad_norm": 0.2412441223859787, "learning_rate": 1.8518966771823244e-06, "loss": 0.0971, "step": 49844 }, { "epoch": 0.8890414868191061, "grad_norm": 0.2951279282569885, "learning_rate": 1.8513088142758038e-06, "loss": 0.1331, "step": 49845 }, { "epoch": 0.8890593229408198, "grad_norm": 0.3844563364982605, "learning_rate": 1.8507210411015996e-06, "loss": 0.0934, "step": 49846 }, { "epoch": 0.8890771590625335, "grad_norm": 0.3238573670387268, "learning_rate": 1.850133357662004e-06, "loss": 0.1305, "step": 49847 }, { "epoch": 0.8890949951842472, "grad_norm": 0.23506927490234375, "learning_rate": 1.8495457639592844e-06, "loss": 0.0945, "step": 49848 }, { "epoch": 0.8891128313059609, "grad_norm": 0.2517795264720917, "learning_rate": 1.848958259995723e-06, "loss": 0.0922, "step": 49849 }, { "epoch": 0.8891306674276745, "grad_norm": 0.23196156322956085, "learning_rate": 1.8483708457735922e-06, "loss": 0.0794, "step": 49850 }, { "epoch": 0.8891485035493882, "grad_norm": 0.2365451604127884, "learning_rate": 1.8477835212951738e-06, "loss": 0.0858, "step": 49851 }, { "epoch": 0.8891663396711019, "grad_norm": 0.22328735888004303, "learning_rate": 1.847196286562744e-06, "loss": 0.0905, "step": 49852 }, { "epoch": 0.8891841757928156, "grad_norm": 0.30784499645233154, "learning_rate": 1.8466091415785759e-06, "loss": 0.0945, "step": 49853 }, { "epoch": 0.8892020119145293, "grad_norm": 0.24009576439857483, "learning_rate": 1.8460220863449479e-06, "loss": 0.1149, "step": 49854 }, { "epoch": 0.889219848036243, "grad_norm": 0.2380632907152176, "learning_rate": 1.8454351208641336e-06, "loss": 0.116, "step": 49855 }, { "epoch": 0.8892376841579567, "grad_norm": 0.25152263045310974, "learning_rate": 1.8448482451384142e-06, "loss": 0.0903, "step": 49856 }, { "epoch": 0.8892555202796704, "grad_norm": 0.20498567819595337, "learning_rate": 1.8442614591700602e-06, "loss": 0.1189, "step": 49857 }, { "epoch": 0.889273356401384, "grad_norm": 0.31050199270248413, "learning_rate": 1.8436747629613476e-06, "loss": 0.1073, "step": 49858 }, { "epoch": 0.8892911925230977, "grad_norm": 0.32119742035865784, "learning_rate": 1.8430881565145441e-06, "loss": 0.1129, "step": 49859 }, { "epoch": 0.8893090286448114, "grad_norm": 0.19084841012954712, "learning_rate": 1.842501639831934e-06, "loss": 0.0428, "step": 49860 }, { "epoch": 0.8893268647665251, "grad_norm": 0.31924012303352356, "learning_rate": 1.8419152129157875e-06, "loss": 0.1309, "step": 49861 }, { "epoch": 0.8893447008882389, "grad_norm": 0.35856541991233826, "learning_rate": 1.8413288757683723e-06, "loss": 0.1448, "step": 49862 }, { "epoch": 0.8893625370099526, "grad_norm": 0.3894062042236328, "learning_rate": 1.8407426283919643e-06, "loss": 0.0714, "step": 49863 }, { "epoch": 0.8893803731316663, "grad_norm": 0.25119802355766296, "learning_rate": 1.8401564707888397e-06, "loss": 0.1016, "step": 49864 }, { "epoch": 0.88939820925338, "grad_norm": 0.2892312705516815, "learning_rate": 1.8395704029612659e-06, "loss": 0.1262, "step": 49865 }, { "epoch": 0.8894160453750937, "grad_norm": 0.25144341588020325, "learning_rate": 1.838984424911519e-06, "loss": 0.1062, "step": 49866 }, { "epoch": 0.8894338814968074, "grad_norm": 0.36705195903778076, "learning_rate": 1.8383985366418638e-06, "loss": 0.1769, "step": 49867 }, { "epoch": 0.889451717618521, "grad_norm": 0.30127978324890137, "learning_rate": 1.8378127381545762e-06, "loss": 0.1458, "step": 49868 }, { "epoch": 0.8894695537402347, "grad_norm": 0.30124983191490173, "learning_rate": 1.8372270294519294e-06, "loss": 0.1055, "step": 49869 }, { "epoch": 0.8894873898619484, "grad_norm": 0.3250449001789093, "learning_rate": 1.8366414105361884e-06, "loss": 0.1468, "step": 49870 }, { "epoch": 0.8895052259836621, "grad_norm": 0.32466554641723633, "learning_rate": 1.836055881409629e-06, "loss": 0.1372, "step": 49871 }, { "epoch": 0.8895230621053758, "grad_norm": 0.21098098158836365, "learning_rate": 1.8354704420745132e-06, "loss": 0.0389, "step": 49872 }, { "epoch": 0.8895408982270895, "grad_norm": 0.3244823217391968, "learning_rate": 1.8348850925331174e-06, "loss": 0.1178, "step": 49873 }, { "epoch": 0.8895587343488032, "grad_norm": 0.18866463005542755, "learning_rate": 1.8342998327877086e-06, "loss": 0.0592, "step": 49874 }, { "epoch": 0.8895765704705169, "grad_norm": 0.37507542967796326, "learning_rate": 1.833714662840555e-06, "loss": 0.0919, "step": 49875 }, { "epoch": 0.8895944065922305, "grad_norm": 0.20332205295562744, "learning_rate": 1.833129582693921e-06, "loss": 0.1195, "step": 49876 }, { "epoch": 0.8896122427139442, "grad_norm": 0.26294079422950745, "learning_rate": 1.8325445923500827e-06, "loss": 0.0911, "step": 49877 }, { "epoch": 0.8896300788356579, "grad_norm": 0.25406116247177124, "learning_rate": 1.8319596918113025e-06, "loss": 0.0931, "step": 49878 }, { "epoch": 0.8896479149573717, "grad_norm": 0.38951027393341064, "learning_rate": 1.8313748810798475e-06, "loss": 0.0945, "step": 49879 }, { "epoch": 0.8896657510790854, "grad_norm": 0.2512340843677521, "learning_rate": 1.830790160157983e-06, "loss": 0.1159, "step": 49880 }, { "epoch": 0.8896835872007991, "grad_norm": 0.2336951196193695, "learning_rate": 1.830205529047982e-06, "loss": 0.1023, "step": 49881 }, { "epoch": 0.8897014233225128, "grad_norm": 0.25117599964141846, "learning_rate": 1.8296209877521037e-06, "loss": 0.1121, "step": 49882 }, { "epoch": 0.8897192594442265, "grad_norm": 0.26584678888320923, "learning_rate": 1.8290365362726213e-06, "loss": 0.1199, "step": 49883 }, { "epoch": 0.8897370955659402, "grad_norm": 0.29908284544944763, "learning_rate": 1.8284521746117944e-06, "loss": 0.0703, "step": 49884 }, { "epoch": 0.8897549316876538, "grad_norm": 0.4404044449329376, "learning_rate": 1.8278679027718875e-06, "loss": 0.1961, "step": 49885 }, { "epoch": 0.8897727678093675, "grad_norm": 0.269890159368515, "learning_rate": 1.8272837207551713e-06, "loss": 0.1226, "step": 49886 }, { "epoch": 0.8897906039310812, "grad_norm": 0.22089998424053192, "learning_rate": 1.8266996285639077e-06, "loss": 0.0863, "step": 49887 }, { "epoch": 0.8898084400527949, "grad_norm": 0.18101929128170013, "learning_rate": 1.8261156262003588e-06, "loss": 0.0716, "step": 49888 }, { "epoch": 0.8898262761745086, "grad_norm": 0.2906607985496521, "learning_rate": 1.8255317136667865e-06, "loss": 0.1181, "step": 49889 }, { "epoch": 0.8898441122962223, "grad_norm": 0.2994353175163269, "learning_rate": 1.8249478909654644e-06, "loss": 0.1198, "step": 49890 }, { "epoch": 0.889861948417936, "grad_norm": 0.2071688175201416, "learning_rate": 1.824364158098646e-06, "loss": 0.0596, "step": 49891 }, { "epoch": 0.8898797845396497, "grad_norm": 0.33059561252593994, "learning_rate": 1.823780515068596e-06, "loss": 0.0835, "step": 49892 }, { "epoch": 0.8898976206613634, "grad_norm": 0.22787611186504364, "learning_rate": 1.823196961877574e-06, "loss": 0.1168, "step": 49893 }, { "epoch": 0.889915456783077, "grad_norm": 0.3035147488117218, "learning_rate": 1.8226134985278504e-06, "loss": 0.1045, "step": 49894 }, { "epoch": 0.8899332929047907, "grad_norm": 0.3333717882633209, "learning_rate": 1.822030125021676e-06, "loss": 0.1248, "step": 49895 }, { "epoch": 0.8899511290265045, "grad_norm": 0.25385555624961853, "learning_rate": 1.821446841361324e-06, "loss": 0.1445, "step": 49896 }, { "epoch": 0.8899689651482182, "grad_norm": 0.3475976884365082, "learning_rate": 1.820863647549051e-06, "loss": 0.102, "step": 49897 }, { "epoch": 0.8899868012699319, "grad_norm": 0.26944491267204285, "learning_rate": 1.8202805435871107e-06, "loss": 0.0928, "step": 49898 }, { "epoch": 0.8900046373916456, "grad_norm": 0.297126442193985, "learning_rate": 1.8196975294777735e-06, "loss": 0.1095, "step": 49899 }, { "epoch": 0.8900224735133593, "grad_norm": 0.2458484023809433, "learning_rate": 1.8191146052232931e-06, "loss": 0.0867, "step": 49900 }, { "epoch": 0.890040309635073, "grad_norm": 0.2071489542722702, "learning_rate": 1.818531770825932e-06, "loss": 0.0507, "step": 49901 }, { "epoch": 0.8900581457567867, "grad_norm": 0.22817501425743103, "learning_rate": 1.8179490262879463e-06, "loss": 0.113, "step": 49902 }, { "epoch": 0.8900759818785003, "grad_norm": 0.3017560839653015, "learning_rate": 1.8173663716115979e-06, "loss": 0.1001, "step": 49903 }, { "epoch": 0.890093818000214, "grad_norm": 0.3293018341064453, "learning_rate": 1.8167838067991466e-06, "loss": 0.1466, "step": 49904 }, { "epoch": 0.8901116541219277, "grad_norm": 0.34821516275405884, "learning_rate": 1.8162013318528486e-06, "loss": 0.0573, "step": 49905 }, { "epoch": 0.8901294902436414, "grad_norm": 0.21277935802936554, "learning_rate": 1.815618946774955e-06, "loss": 0.0773, "step": 49906 }, { "epoch": 0.8901473263653551, "grad_norm": 0.30802449584007263, "learning_rate": 1.8150366515677364e-06, "loss": 0.1957, "step": 49907 }, { "epoch": 0.8901651624870688, "grad_norm": 0.24075330793857574, "learning_rate": 1.8144544462334434e-06, "loss": 0.073, "step": 49908 }, { "epoch": 0.8901829986087825, "grad_norm": 0.3012112081050873, "learning_rate": 1.81387233077433e-06, "loss": 0.1567, "step": 49909 }, { "epoch": 0.8902008347304962, "grad_norm": 0.24156977236270905, "learning_rate": 1.813290305192658e-06, "loss": 0.1052, "step": 49910 }, { "epoch": 0.8902186708522098, "grad_norm": 0.2963894307613373, "learning_rate": 1.812708369490676e-06, "loss": 0.1078, "step": 49911 }, { "epoch": 0.8902365069739235, "grad_norm": 0.3894694149494171, "learning_rate": 1.8121265236706514e-06, "loss": 0.092, "step": 49912 }, { "epoch": 0.8902543430956373, "grad_norm": 0.19741907715797424, "learning_rate": 1.8115447677348324e-06, "loss": 0.0931, "step": 49913 }, { "epoch": 0.890272179217351, "grad_norm": 0.3187587857246399, "learning_rate": 1.8109631016854756e-06, "loss": 0.1086, "step": 49914 }, { "epoch": 0.8902900153390647, "grad_norm": 0.5088554620742798, "learning_rate": 1.810381525524829e-06, "loss": 0.0885, "step": 49915 }, { "epoch": 0.8903078514607784, "grad_norm": 0.3323962092399597, "learning_rate": 1.809800039255158e-06, "loss": 0.1236, "step": 49916 }, { "epoch": 0.8903256875824921, "grad_norm": 0.35256895422935486, "learning_rate": 1.8092186428787129e-06, "loss": 0.1568, "step": 49917 }, { "epoch": 0.8903435237042058, "grad_norm": 0.19838082790374756, "learning_rate": 1.808637336397745e-06, "loss": 0.0789, "step": 49918 }, { "epoch": 0.8903613598259195, "grad_norm": 0.258879691362381, "learning_rate": 1.8080561198145052e-06, "loss": 0.1361, "step": 49919 }, { "epoch": 0.8903791959476332, "grad_norm": 0.2989346385002136, "learning_rate": 1.8074749931312557e-06, "loss": 0.0797, "step": 49920 }, { "epoch": 0.8903970320693468, "grad_norm": 0.3387611508369446, "learning_rate": 1.8068939563502419e-06, "loss": 0.1395, "step": 49921 }, { "epoch": 0.8904148681910605, "grad_norm": 0.27298688888549805, "learning_rate": 1.806313009473712e-06, "loss": 0.1292, "step": 49922 }, { "epoch": 0.8904327043127742, "grad_norm": 0.21711039543151855, "learning_rate": 1.8057321525039305e-06, "loss": 0.112, "step": 49923 }, { "epoch": 0.8904505404344879, "grad_norm": 0.31412264704704285, "learning_rate": 1.8051513854431378e-06, "loss": 0.0902, "step": 49924 }, { "epoch": 0.8904683765562016, "grad_norm": 0.2695717215538025, "learning_rate": 1.8045707082935932e-06, "loss": 0.0905, "step": 49925 }, { "epoch": 0.8904862126779153, "grad_norm": 0.2534194588661194, "learning_rate": 1.8039901210575416e-06, "loss": 0.1317, "step": 49926 }, { "epoch": 0.890504048799629, "grad_norm": 0.29785025119781494, "learning_rate": 1.80340962373724e-06, "loss": 0.1171, "step": 49927 }, { "epoch": 0.8905218849213427, "grad_norm": 0.4032098352909088, "learning_rate": 1.802829216334928e-06, "loss": 0.1417, "step": 49928 }, { "epoch": 0.8905397210430563, "grad_norm": 0.4395829737186432, "learning_rate": 1.802248898852868e-06, "loss": 0.132, "step": 49929 }, { "epoch": 0.8905575571647701, "grad_norm": 0.5400553345680237, "learning_rate": 1.8016686712933023e-06, "loss": 0.1466, "step": 49930 }, { "epoch": 0.8905753932864838, "grad_norm": 0.2580517530441284, "learning_rate": 1.8010885336584792e-06, "loss": 0.1138, "step": 49931 }, { "epoch": 0.8905932294081975, "grad_norm": 0.22351336479187012, "learning_rate": 1.8005084859506472e-06, "loss": 0.1268, "step": 49932 }, { "epoch": 0.8906110655299112, "grad_norm": 0.324955552816391, "learning_rate": 1.7999285281720623e-06, "loss": 0.164, "step": 49933 }, { "epoch": 0.8906289016516249, "grad_norm": 0.36037677526474, "learning_rate": 1.7993486603249676e-06, "loss": 0.1106, "step": 49934 }, { "epoch": 0.8906467377733386, "grad_norm": 0.3040764331817627, "learning_rate": 1.7987688824116083e-06, "loss": 0.1804, "step": 49935 }, { "epoch": 0.8906645738950523, "grad_norm": 0.3858616054058075, "learning_rate": 1.7981891944342327e-06, "loss": 0.1353, "step": 49936 }, { "epoch": 0.890682410016766, "grad_norm": 0.22974008321762085, "learning_rate": 1.7976095963950918e-06, "loss": 0.1406, "step": 49937 }, { "epoch": 0.8907002461384796, "grad_norm": 0.26076796650886536, "learning_rate": 1.797030088296428e-06, "loss": 0.0808, "step": 49938 }, { "epoch": 0.8907180822601933, "grad_norm": 0.2533327341079712, "learning_rate": 1.7964506701404926e-06, "loss": 0.07, "step": 49939 }, { "epoch": 0.890735918381907, "grad_norm": 0.2939600348472595, "learning_rate": 1.7958713419295282e-06, "loss": 0.1484, "step": 49940 }, { "epoch": 0.8907537545036207, "grad_norm": 0.3236691355705261, "learning_rate": 1.7952921036657772e-06, "loss": 0.1249, "step": 49941 }, { "epoch": 0.8907715906253344, "grad_norm": 0.2464517056941986, "learning_rate": 1.7947129553514908e-06, "loss": 0.0974, "step": 49942 }, { "epoch": 0.8907894267470481, "grad_norm": 0.3569663166999817, "learning_rate": 1.7941338969889144e-06, "loss": 0.1317, "step": 49943 }, { "epoch": 0.8908072628687618, "grad_norm": 0.26709234714508057, "learning_rate": 1.7935549285802905e-06, "loss": 0.0556, "step": 49944 }, { "epoch": 0.8908250989904755, "grad_norm": 0.33218643069267273, "learning_rate": 1.7929760501278564e-06, "loss": 0.1134, "step": 49945 }, { "epoch": 0.8908429351121893, "grad_norm": 0.3023509383201599, "learning_rate": 1.7923972616338685e-06, "loss": 0.1595, "step": 49946 }, { "epoch": 0.890860771233903, "grad_norm": 0.32520627975463867, "learning_rate": 1.791818563100564e-06, "loss": 0.145, "step": 49947 }, { "epoch": 0.8908786073556166, "grad_norm": 0.350848913192749, "learning_rate": 1.7912399545301856e-06, "loss": 0.0995, "step": 49948 }, { "epoch": 0.8908964434773303, "grad_norm": 0.3119257092475891, "learning_rate": 1.790661435924973e-06, "loss": 0.1538, "step": 49949 }, { "epoch": 0.890914279599044, "grad_norm": 0.3208456039428711, "learning_rate": 1.7900830072871799e-06, "loss": 0.1561, "step": 49950 }, { "epoch": 0.8909321157207577, "grad_norm": 0.4336583614349365, "learning_rate": 1.7895046686190354e-06, "loss": 0.1785, "step": 49951 }, { "epoch": 0.8909499518424714, "grad_norm": 0.22980165481567383, "learning_rate": 1.78892641992279e-06, "loss": 0.1098, "step": 49952 }, { "epoch": 0.8909677879641851, "grad_norm": 0.2909132242202759, "learning_rate": 1.788348261200684e-06, "loss": 0.106, "step": 49953 }, { "epoch": 0.8909856240858988, "grad_norm": 0.3052906394004822, "learning_rate": 1.7877701924549545e-06, "loss": 0.1197, "step": 49954 }, { "epoch": 0.8910034602076125, "grad_norm": 0.2439405471086502, "learning_rate": 1.7871922136878494e-06, "loss": 0.0912, "step": 49955 }, { "epoch": 0.8910212963293261, "grad_norm": 0.2927035987377167, "learning_rate": 1.786614324901603e-06, "loss": 0.1116, "step": 49956 }, { "epoch": 0.8910391324510398, "grad_norm": 0.32149219512939453, "learning_rate": 1.7860365260984558e-06, "loss": 0.1151, "step": 49957 }, { "epoch": 0.8910569685727535, "grad_norm": 0.2089749276638031, "learning_rate": 1.785458817280647e-06, "loss": 0.0899, "step": 49958 }, { "epoch": 0.8910748046944672, "grad_norm": 0.25410905480384827, "learning_rate": 1.784881198450422e-06, "loss": 0.1568, "step": 49959 }, { "epoch": 0.8910926408161809, "grad_norm": 0.20608460903167725, "learning_rate": 1.7843036696100129e-06, "loss": 0.0881, "step": 49960 }, { "epoch": 0.8911104769378946, "grad_norm": 0.28066226840019226, "learning_rate": 1.7837262307616648e-06, "loss": 0.0931, "step": 49961 }, { "epoch": 0.8911283130596083, "grad_norm": 0.196009561419487, "learning_rate": 1.783148881907612e-06, "loss": 0.0791, "step": 49962 }, { "epoch": 0.8911461491813221, "grad_norm": 0.22484147548675537, "learning_rate": 1.782571623050086e-06, "loss": 0.0996, "step": 49963 }, { "epoch": 0.8911639853030358, "grad_norm": 0.2638413608074188, "learning_rate": 1.781994454191338e-06, "loss": 0.1335, "step": 49964 }, { "epoch": 0.8911818214247494, "grad_norm": 0.2932688891887665, "learning_rate": 1.7814173753335938e-06, "loss": 0.1008, "step": 49965 }, { "epoch": 0.8911996575464631, "grad_norm": 0.3263104259967804, "learning_rate": 1.780840386479099e-06, "loss": 0.0803, "step": 49966 }, { "epoch": 0.8912174936681768, "grad_norm": 0.22115647792816162, "learning_rate": 1.780263487630085e-06, "loss": 0.0736, "step": 49967 }, { "epoch": 0.8912353297898905, "grad_norm": 0.2797151803970337, "learning_rate": 1.7796866787887916e-06, "loss": 0.1128, "step": 49968 }, { "epoch": 0.8912531659116042, "grad_norm": 0.30198103189468384, "learning_rate": 1.7791099599574535e-06, "loss": 0.1672, "step": 49969 }, { "epoch": 0.8912710020333179, "grad_norm": 0.26369860768318176, "learning_rate": 1.7785333311383045e-06, "loss": 0.1382, "step": 49970 }, { "epoch": 0.8912888381550316, "grad_norm": 0.23830004036426544, "learning_rate": 1.7779567923335765e-06, "loss": 0.11, "step": 49971 }, { "epoch": 0.8913066742767453, "grad_norm": 0.22044409811496735, "learning_rate": 1.7773803435455122e-06, "loss": 0.1023, "step": 49972 }, { "epoch": 0.891324510398459, "grad_norm": 0.2943316400051117, "learning_rate": 1.7768039847763429e-06, "loss": 0.123, "step": 49973 }, { "epoch": 0.8913423465201726, "grad_norm": 0.2572459876537323, "learning_rate": 1.7762277160283032e-06, "loss": 0.0977, "step": 49974 }, { "epoch": 0.8913601826418863, "grad_norm": 0.4160288870334625, "learning_rate": 1.775651537303627e-06, "loss": 0.1195, "step": 49975 }, { "epoch": 0.8913780187636, "grad_norm": 0.2470136284828186, "learning_rate": 1.7750754486045406e-06, "loss": 0.1084, "step": 49976 }, { "epoch": 0.8913958548853137, "grad_norm": 0.317086786031723, "learning_rate": 1.7744994499332896e-06, "loss": 0.096, "step": 49977 }, { "epoch": 0.8914136910070274, "grad_norm": 0.2757277190685272, "learning_rate": 1.773923541292094e-06, "loss": 0.1219, "step": 49978 }, { "epoch": 0.8914315271287411, "grad_norm": 0.2629562020301819, "learning_rate": 1.7733477226831995e-06, "loss": 0.0568, "step": 49979 }, { "epoch": 0.8914493632504549, "grad_norm": 0.38874703645706177, "learning_rate": 1.7727719941088266e-06, "loss": 0.0966, "step": 49980 }, { "epoch": 0.8914671993721686, "grad_norm": 0.26062703132629395, "learning_rate": 1.772196355571215e-06, "loss": 0.1244, "step": 49981 }, { "epoch": 0.8914850354938822, "grad_norm": 0.24988922476768494, "learning_rate": 1.7716208070725938e-06, "loss": 0.0875, "step": 49982 }, { "epoch": 0.8915028716155959, "grad_norm": 0.2589786648750305, "learning_rate": 1.7710453486151912e-06, "loss": 0.0893, "step": 49983 }, { "epoch": 0.8915207077373096, "grad_norm": 0.2575794458389282, "learning_rate": 1.7704699802012364e-06, "loss": 0.1147, "step": 49984 }, { "epoch": 0.8915385438590233, "grad_norm": 0.35063958168029785, "learning_rate": 1.7698947018329692e-06, "loss": 0.1057, "step": 49985 }, { "epoch": 0.891556379980737, "grad_norm": 0.35326242446899414, "learning_rate": 1.7693195135126128e-06, "loss": 0.1312, "step": 49986 }, { "epoch": 0.8915742161024507, "grad_norm": 0.4064968228340149, "learning_rate": 1.7687444152423988e-06, "loss": 0.202, "step": 49987 }, { "epoch": 0.8915920522241644, "grad_norm": 0.2466762512922287, "learning_rate": 1.768169407024553e-06, "loss": 0.141, "step": 49988 }, { "epoch": 0.8916098883458781, "grad_norm": 0.34842124581336975, "learning_rate": 1.7675944888613016e-06, "loss": 0.1611, "step": 49989 }, { "epoch": 0.8916277244675918, "grad_norm": 0.3802046775817871, "learning_rate": 1.7670196607548844e-06, "loss": 0.1123, "step": 49990 }, { "epoch": 0.8916455605893054, "grad_norm": 0.23128633201122284, "learning_rate": 1.7664449227075248e-06, "loss": 0.1096, "step": 49991 }, { "epoch": 0.8916633967110191, "grad_norm": 0.23446892201900482, "learning_rate": 1.7658702747214428e-06, "loss": 0.1072, "step": 49992 }, { "epoch": 0.8916812328327328, "grad_norm": 0.2435496300458908, "learning_rate": 1.765295716798876e-06, "loss": 0.1058, "step": 49993 }, { "epoch": 0.8916990689544465, "grad_norm": 0.2381276935338974, "learning_rate": 1.7647212489420444e-06, "loss": 0.1049, "step": 49994 }, { "epoch": 0.8917169050761602, "grad_norm": 0.25989797711372375, "learning_rate": 1.7641468711531855e-06, "loss": 0.1035, "step": 49995 }, { "epoch": 0.8917347411978739, "grad_norm": 0.3566727936267853, "learning_rate": 1.763572583434514e-06, "loss": 0.1091, "step": 49996 }, { "epoch": 0.8917525773195877, "grad_norm": 0.23091700673103333, "learning_rate": 1.7629983857882614e-06, "loss": 0.0919, "step": 49997 }, { "epoch": 0.8917704134413014, "grad_norm": 0.4175768196582794, "learning_rate": 1.762424278216654e-06, "loss": 0.1211, "step": 49998 }, { "epoch": 0.8917882495630151, "grad_norm": 0.28768599033355713, "learning_rate": 1.7618502607219173e-06, "loss": 0.1095, "step": 49999 }, { "epoch": 0.8918060856847287, "grad_norm": 0.2869391441345215, "learning_rate": 1.7612763333062748e-06, "loss": 0.0857, "step": 50000 }, { "epoch": 0.8918060856847287, "eval_loss": 0.10779532790184021, "eval_runtime": 107.9169, "eval_samples_per_second": 9.489, "eval_steps_per_second": 1.585, "step": 50000 } ], "logging_steps": 1, "max_steps": 56066, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.7691159822336e+21, "train_batch_size": 6, "trial_name": null, "trial_params": null }